Peter M. Groen / openbr

Authored by DepthDeluxe 2016-02-10 17:22:43 -0500

1 parent e6a8cd8f

Showing 3 changed files with 88 additions and 0 deletions

openbr/plugins/cuda/cudacvtfloat.cpp 0 → 100644

		1	+#include <iostream>
		2	+#include <unistd.h>
		3	+using namespace std;
		4	+
		5	+#include <opencv2/opencv.hpp>
		6	+using namespace cv;
		7	+
		8	+#include <openbr/plugins/openbr_internal.h>
		9	+
		10	+#include "cudacvtfloat.hpp"
		11	+
		12	+namespace br
		13	+{
		14	+
		15	+/*!
		16	+ * \ingroup transforms
		17	+ * \brief Converts byte to floating point
		18	+ * \author Colin Heinzmann \cite DepthDeluxe
		19	+ */
		20	+class CUDACvtFloatTransform : public UntrainableTransform
		21	+{
		22	+ Q_OBJECT
		23	+
		24	+ public:
		25	+ void project(const Template &src, Template &dst) const
		26	+ {
		27	+ // assume the image type is 256-monochrome
		28	+ // TODO(colin): real exception handling
		29	+ if (src.m().type() != CV_8UC1) {
		30	+ cout << "ERR: Invalid memory format" << endl;
		31	+ return;
		32	+ }
		33	+
		34	+
		35	+ int rows = src.m().rows;
		36	+ int cols = src.m().cols;
		37	+
		38	+ dst = Mat(rows, cols, CV_32FC1);
		39	+
		40	+ br::cuda::cudacvtfloat::wrapper((const unsigned char*)src.m().ptr<unsigned char>(), dst.m().ptr<float>(), rows, cols);
		41	+ }
		42	+};
		43	+
		44	+BR_REGISTER(Transform, CUDACvtFloatTransform)
		45	+
		46	+} // namespace br
		47	+
		48	+#include "cuda/cudacvtfloat.moc"

openbr/plugins/cuda/cudacvtfloat.cu 0 → 100644

		1	+namespace br { namespace cuda { namespace cudacvtfloat {
		2	+
		3	+ __global__ void kernel(const unsigned char* src, float* dst, int rows, int cols) {
		4	+ // get my index
		5	+ int rowInd = blockIdx.y*blockDim.y + threadIdx.y;
		6	+ int colInd = blockIdx.x*blockDim.x + threadIdx.x;
		7	+
		8	+ // bounds check
		9	+ if (rowInd >= rows \|\| colInd >= cols) {
		10	+ return;
		11	+ }
		12	+
		13	+ int index = rowInd*cols + colInd;
		14	+ dst[index] = (float)src[index];
		15	+ }
		16	+
		17	+ void wrapper(const unsigned char* src, float* dst, int rows, int cols) {
		18	+ unsigned char* cudaSrc;
		19	+ cudaMalloc(&cudaSrc, rowscolssizeof(unsigned char));
		20	+ cudaMemcpy(cudaSrc, src, rowscolssizeof(unsigned char), cudaMemcpyHostToDevice);
		21	+
		22	+ float* cudaDst;
		23	+ cudaMalloc(&cudaDst, rowscolssizeof(float));
		24	+
		25	+ dim3 threadsPerBlock(8, 8);
		26	+ dim3 blocks(
		27	+ cols / threadsPerBlock.x + 1,
		28	+ rows / threadsPerBlock.y + 1
		29	+ );
		30	+
		31	+ kernel<<<threadsPerBlock, blocks>>>(cudaSrc, cudaDst, rows, cols);
		32	+
		33	+ // copy the data back to the destination
		34	+ cudaMemcpy(dst, cudaDst, rowscolssizeof(float), cudaMemcpyDeviceToHost);
		35	+ }
		36	+
		37	+}}}

openbr/plugins/cuda/cudacvtfloat.hpp 0 → 100644

		1	+namespace br { namespace cuda { namespace cudacvtfloat {
		2	+ void wrapper(const unsigned char* src, float* dst, int rows, int cols);
		3	+}}}