diff --git a/openbr/plugins/cuda/cudacvtfloat.cpp b/openbr/plugins/cuda/cudacvtfloat.cpp new file mode 100644 index 0000000..211fcfb --- /dev/null +++ b/openbr/plugins/cuda/cudacvtfloat.cpp @@ -0,0 +1,48 @@ +#include +#include +using namespace std; + +#include +using namespace cv; + +#include + +#include "cudacvtfloat.hpp" + +namespace br +{ + +/*! + * \ingroup transforms + * \brief Converts byte to floating point + * \author Colin Heinzmann \cite DepthDeluxe + */ +class CUDACvtFloatTransform : public UntrainableTransform +{ + Q_OBJECT + + public: + void project(const Template &src, Template &dst) const + { + // assume the image type is 256-monochrome + // TODO(colin): real exception handling + if (src.m().type() != CV_8UC1) { + cout << "ERR: Invalid memory format" << endl; + return; + } + + + int rows = src.m().rows; + int cols = src.m().cols; + + dst = Mat(rows, cols, CV_32FC1); + + br::cuda::cudacvtfloat::wrapper((const unsigned char*)src.m().ptr(), dst.m().ptr(), rows, cols); + } +}; + +BR_REGISTER(Transform, CUDACvtFloatTransform) + +} // namespace br + +#include "cuda/cudacvtfloat.moc" diff --git a/openbr/plugins/cuda/cudacvtfloat.cu b/openbr/plugins/cuda/cudacvtfloat.cu new file mode 100644 index 0000000..4f675fb --- /dev/null +++ b/openbr/plugins/cuda/cudacvtfloat.cu @@ -0,0 +1,37 @@ +namespace br { namespace cuda { namespace cudacvtfloat { + + __global__ void kernel(const unsigned char* src, float* dst, int rows, int cols) { + // get my index + int rowInd = blockIdx.y*blockDim.y + threadIdx.y; + int colInd = blockIdx.x*blockDim.x + threadIdx.x; + + // bounds check + if (rowInd >= rows || colInd >= cols) { + return; + } + + int index = rowInd*cols + colInd; + dst[index] = (float)src[index]; + } + + void wrapper(const unsigned char* src, float* dst, int rows, int cols) { + unsigned char* cudaSrc; + cudaMalloc(&cudaSrc, rows*cols*sizeof(unsigned char)); + cudaMemcpy(cudaSrc, src, rows*cols*sizeof(unsigned char), cudaMemcpyHostToDevice); + + float* cudaDst; + cudaMalloc(&cudaDst, rows*cols*sizeof(float)); + + dim3 threadsPerBlock(8, 8); + dim3 blocks( + cols / threadsPerBlock.x + 1, + rows / threadsPerBlock.y + 1 + ); + + kernel<<>>(cudaSrc, cudaDst, rows, cols); + + // copy the data back to the destination + cudaMemcpy(dst, cudaDst, rows*cols*sizeof(float), cudaMemcpyDeviceToHost); + } + +}}} diff --git a/openbr/plugins/cuda/cudacvtfloat.hpp b/openbr/plugins/cuda/cudacvtfloat.hpp new file mode 100644 index 0000000..d2fe82a --- /dev/null +++ b/openbr/plugins/cuda/cudacvtfloat.hpp @@ -0,0 +1,3 @@ +namespace br { namespace cuda { namespace cudacvtfloat { + void wrapper(const unsigned char* src, float* dst, int rows, int cols); +}}}