diff --git a/openbr/plugins/cuda/copyfrom.cpp b/openbr/plugins/cuda/copyfrom.cpp index 68b9507..682e9da 100644 --- a/openbr/plugins/cuda/copyfrom.cpp +++ b/openbr/plugins/cuda/copyfrom.cpp @@ -11,7 +11,7 @@ using namespace cv; // extern CUDA declaration namespace br { namespace cuda { namespace cudacopyfrom { //template void wrapper(void* src, T* out, int rows, int cols) { - void wrapper(void* src, unsigned char* out, const int rows, const int cols); + void wrapper(void* src, float* out, const int rows, const int cols); }}} namespace br @@ -23,7 +23,7 @@ namespace br private: void project(const Template &src, Template &dst) const { - cout << "CUDACopyFrom Start" << endl; + cout << "CUDACopyFrom Start" << endl << endl << endl; // pull the data back out of the Mat void* const* dataPtr = src.m().ptr(); @@ -32,16 +32,26 @@ private: int cols = *((int*)dataPtr[2]); int type = *((int*)dataPtr[3]); + if (type != CV_32FC1) { + cout << "ERR: Invalid data type!" << endl; + return; + } + cout << "cudaMemPtr: " << cudaMemPtr << endl; cout << "rows: " << rows << endl; cout << "cols: " << cols << endl; cout << "type: " << type << endl; - dst = Mat(rows, cols, type); - - br::cuda::cudacopyfrom::wrapper(cudaMemPtr, dst.m().ptr(), rows, cols); + Mat dstMat = Mat(rows, cols, type); + br::cuda::cudacopyfrom::wrapper(cudaMemPtr, dstMat.ptr(), rows, cols); + dst = dstMat; cout << "CUDACopyFrom End" << endl; + + cout << "DST Data" << endl; + cout << "rows: " << dstMat.rows << endl; + cout << "cols: " << dstMat.cols << endl; + cout << "type: " << dstMat.type() << endl; } }; diff --git a/openbr/plugins/cuda/copyfrom.cu b/openbr/plugins/cuda/copyfrom.cu index f6f988c..2b93f4b 100644 --- a/openbr/plugins/cuda/copyfrom.cu +++ b/openbr/plugins/cuda/copyfrom.cu @@ -1,7 +1,7 @@ namespace br { namespace cuda { namespace cudacopyfrom { //template void wrapper(void* src, T* out, int rows, int cols) { - void wrapper(void* src, unsigned char* out, const int rows, const int cols) { - cudaMemcpy(out, src, rows*cols*sizeof(unsigned char), cudaMemcpyDeviceToHost); + void wrapper(void* src, float* dst, const int rows, const int cols) { + cudaMemcpy(dst, src, rows*cols*sizeof(float), cudaMemcpyDeviceToHost); cudaFree(src); } }}} diff --git a/openbr/plugins/cuda/cudacvtfloat.cpp b/openbr/plugins/cuda/cudacvtfloat.cpp index 211fcfb..e8fcfa5 100644 --- a/openbr/plugins/cuda/cudacvtfloat.cpp +++ b/openbr/plugins/cuda/cudacvtfloat.cpp @@ -7,7 +7,10 @@ using namespace cv; #include -#include "cudacvtfloat.hpp" + +namespace br { namespace cuda { namespace cudacvtfloat { + void wrapper(const unsigned char* src, void** dst, int rows, int cols); +}}} namespace br { @@ -24,20 +27,32 @@ class CUDACvtFloatTransform : public UntrainableTransform public: void project(const Template &src, Template &dst) const { + cout << "CUDACvtFloat Start" << endl; + + void* const* srcDataPtr = src.m().ptr(); + void* srcMemPtr = srcDataPtr[0]; + int rows = *((int*)srcDataPtr[1]); + int cols = *((int*)srcDataPtr[2]); + int type = *((int*)srcDataPtr[3]); + // assume the image type is 256-monochrome // TODO(colin): real exception handling - if (src.m().type() != CV_8UC1) { + if (type != CV_8UC1) { cout << "ERR: Invalid memory format" << endl; return; } + // build the destination mat + Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type()); + void** dstDataPtr = dstMat.ptr(); + dstDataPtr[1] = srcDataPtr[1]; + dstDataPtr[2] = srcDataPtr[2]; + dstDataPtr[3] = srcDataPtr[3]; *((int*)dstDataPtr[3]) = CV_32FC1; - int rows = src.m().rows; - int cols = src.m().cols; - - dst = Mat(rows, cols, CV_32FC1); + br::cuda::cudacvtfloat::wrapper((const unsigned char*)srcMemPtr, &dstDataPtr[0], rows, cols); + dst = dstMat; - br::cuda::cudacvtfloat::wrapper((const unsigned char*)src.m().ptr(), dst.m().ptr(), rows, cols); + cout << "CUDACvtFloat End" << endl; } }; diff --git a/openbr/plugins/cuda/cudacvtfloat.cu b/openbr/plugins/cuda/cudacvtfloat.cu index 4f675fb..3cc44ef 100644 --- a/openbr/plugins/cuda/cudacvtfloat.cu +++ b/openbr/plugins/cuda/cudacvtfloat.cu @@ -14,13 +14,15 @@ namespace br { namespace cuda { namespace cudacvtfloat { dst[index] = (float)src[index]; } - void wrapper(const unsigned char* src, float* dst, int rows, int cols) { - unsigned char* cudaSrc; - cudaMalloc(&cudaSrc, rows*cols*sizeof(unsigned char)); - cudaMemcpy(cudaSrc, src, rows*cols*sizeof(unsigned char), cudaMemcpyHostToDevice); + void wrapper(const unsigned char* src, void** dst, int rows, int cols) { + //unsigned char* cudaSrc; + //cudaMalloc(&cudaSrc, rows*cols*sizeof(unsigned char)); + //cudaMemcpy(cudaSrc, src, rows*cols*sizeof(unsigned char), cudaMemcpyHostToDevice); - float* cudaDst; - cudaMalloc(&cudaDst, rows*cols*sizeof(float)); + //float* cudaDst; + //cudaMalloc(&cudaDst, rows*cols*sizeof(float)); + + cudaMalloc(dst, rows*cols*sizeof(float)); dim3 threadsPerBlock(8, 8); dim3 blocks( @@ -28,10 +30,7 @@ namespace br { namespace cuda { namespace cudacvtfloat { rows / threadsPerBlock.y + 1 ); - kernel<<>>(cudaSrc, cudaDst, rows, cols); - - // copy the data back to the destination - cudaMemcpy(dst, cudaDst, rows*cols*sizeof(float), cudaMemcpyDeviceToHost); + kernel<<>>(src, (float*)(*dst), rows, cols); } }}} diff --git a/openbr/plugins/cuda/cudacvtfloat.hpp b/openbr/plugins/cuda/cudacvtfloat.hpp deleted file mode 100644 index d2fe82a..0000000 --- a/openbr/plugins/cuda/cudacvtfloat.hpp +++ /dev/null @@ -1,3 +0,0 @@ -namespace br { namespace cuda { namespace cudacvtfloat { - void wrapper(const unsigned char* src, float* dst, int rows, int cols); -}}}