Commit a2eadb178f1efa3714e2386f0b45490b0e4fc97b
1 parent
5e16788d
processing can now be done totally on the graphics card
Showing
5 changed files
with
48 additions
and
27 deletions
openbr/plugins/cuda/copyfrom.cpp
| @@ -11,7 +11,7 @@ using namespace cv; | @@ -11,7 +11,7 @@ using namespace cv; | ||
| 11 | // extern CUDA declaration | 11 | // extern CUDA declaration |
| 12 | namespace br { namespace cuda { namespace cudacopyfrom { | 12 | namespace br { namespace cuda { namespace cudacopyfrom { |
| 13 | //template <typename T> void wrapper(void* src, T* out, int rows, int cols) { | 13 | //template <typename T> void wrapper(void* src, T* out, int rows, int cols) { |
| 14 | - void wrapper(void* src, unsigned char* out, const int rows, const int cols); | 14 | + void wrapper(void* src, float* out, const int rows, const int cols); |
| 15 | }}} | 15 | }}} |
| 16 | 16 | ||
| 17 | namespace br | 17 | namespace br |
| @@ -23,7 +23,7 @@ namespace br | @@ -23,7 +23,7 @@ namespace br | ||
| 23 | private: | 23 | private: |
| 24 | void project(const Template &src, Template &dst) const | 24 | void project(const Template &src, Template &dst) const |
| 25 | { | 25 | { |
| 26 | - cout << "CUDACopyFrom Start" << endl; | 26 | + cout << "CUDACopyFrom Start" << endl << endl << endl; |
| 27 | 27 | ||
| 28 | // pull the data back out of the Mat | 28 | // pull the data back out of the Mat |
| 29 | void* const* dataPtr = src.m().ptr<void*>(); | 29 | void* const* dataPtr = src.m().ptr<void*>(); |
| @@ -32,16 +32,26 @@ private: | @@ -32,16 +32,26 @@ private: | ||
| 32 | int cols = *((int*)dataPtr[2]); | 32 | int cols = *((int*)dataPtr[2]); |
| 33 | int type = *((int*)dataPtr[3]); | 33 | int type = *((int*)dataPtr[3]); |
| 34 | 34 | ||
| 35 | + if (type != CV_32FC1) { | ||
| 36 | + cout << "ERR: Invalid data type!" << endl; | ||
| 37 | + return; | ||
| 38 | + } | ||
| 39 | + | ||
| 35 | cout << "cudaMemPtr: " << cudaMemPtr << endl; | 40 | cout << "cudaMemPtr: " << cudaMemPtr << endl; |
| 36 | cout << "rows: " << rows << endl; | 41 | cout << "rows: " << rows << endl; |
| 37 | cout << "cols: " << cols << endl; | 42 | cout << "cols: " << cols << endl; |
| 38 | cout << "type: " << type << endl; | 43 | cout << "type: " << type << endl; |
| 39 | 44 | ||
| 40 | - dst = Mat(rows, cols, type); | ||
| 41 | - | ||
| 42 | - br::cuda::cudacopyfrom::wrapper(cudaMemPtr, dst.m().ptr<unsigned char>(), rows, cols); | 45 | + Mat dstMat = Mat(rows, cols, type); |
| 46 | + br::cuda::cudacopyfrom::wrapper(cudaMemPtr, dstMat.ptr<float>(), rows, cols); | ||
| 47 | + dst = dstMat; | ||
| 43 | 48 | ||
| 44 | cout << "CUDACopyFrom End" << endl; | 49 | cout << "CUDACopyFrom End" << endl; |
| 50 | + | ||
| 51 | + cout << "DST Data" << endl; | ||
| 52 | + cout << "rows: " << dstMat.rows << endl; | ||
| 53 | + cout << "cols: " << dstMat.cols << endl; | ||
| 54 | + cout << "type: " << dstMat.type() << endl; | ||
| 45 | } | 55 | } |
| 46 | }; | 56 | }; |
| 47 | 57 |
openbr/plugins/cuda/copyfrom.cu
| 1 | namespace br { namespace cuda { namespace cudacopyfrom { | 1 | namespace br { namespace cuda { namespace cudacopyfrom { |
| 2 | //template <typename T> void wrapper(void* src, T* out, int rows, int cols) { | 2 | //template <typename T> void wrapper(void* src, T* out, int rows, int cols) { |
| 3 | - void wrapper(void* src, unsigned char* out, const int rows, const int cols) { | ||
| 4 | - cudaMemcpy(out, src, rows*cols*sizeof(unsigned char), cudaMemcpyDeviceToHost); | 3 | + void wrapper(void* src, float* dst, const int rows, const int cols) { |
| 4 | + cudaMemcpy(dst, src, rows*cols*sizeof(float), cudaMemcpyDeviceToHost); | ||
| 5 | cudaFree(src); | 5 | cudaFree(src); |
| 6 | } | 6 | } |
| 7 | }}} | 7 | }}} |
openbr/plugins/cuda/cudacvtfloat.cpp
| @@ -7,7 +7,10 @@ using namespace cv; | @@ -7,7 +7,10 @@ using namespace cv; | ||
| 7 | 7 | ||
| 8 | #include <openbr/plugins/openbr_internal.h> | 8 | #include <openbr/plugins/openbr_internal.h> |
| 9 | 9 | ||
| 10 | -#include "cudacvtfloat.hpp" | 10 | + |
| 11 | +namespace br { namespace cuda { namespace cudacvtfloat { | ||
| 12 | + void wrapper(const unsigned char* src, void** dst, int rows, int cols); | ||
| 13 | +}}} | ||
| 11 | 14 | ||
| 12 | namespace br | 15 | namespace br |
| 13 | { | 16 | { |
| @@ -24,20 +27,32 @@ class CUDACvtFloatTransform : public UntrainableTransform | @@ -24,20 +27,32 @@ class CUDACvtFloatTransform : public UntrainableTransform | ||
| 24 | public: | 27 | public: |
| 25 | void project(const Template &src, Template &dst) const | 28 | void project(const Template &src, Template &dst) const |
| 26 | { | 29 | { |
| 30 | + cout << "CUDACvtFloat Start" << endl; | ||
| 31 | + | ||
| 32 | + void* const* srcDataPtr = src.m().ptr<void*>(); | ||
| 33 | + void* srcMemPtr = srcDataPtr[0]; | ||
| 34 | + int rows = *((int*)srcDataPtr[1]); | ||
| 35 | + int cols = *((int*)srcDataPtr[2]); | ||
| 36 | + int type = *((int*)srcDataPtr[3]); | ||
| 37 | + | ||
| 27 | // assume the image type is 256-monochrome | 38 | // assume the image type is 256-monochrome |
| 28 | // TODO(colin): real exception handling | 39 | // TODO(colin): real exception handling |
| 29 | - if (src.m().type() != CV_8UC1) { | 40 | + if (type != CV_8UC1) { |
| 30 | cout << "ERR: Invalid memory format" << endl; | 41 | cout << "ERR: Invalid memory format" << endl; |
| 31 | return; | 42 | return; |
| 32 | } | 43 | } |
| 33 | 44 | ||
| 45 | + // build the destination mat | ||
| 46 | + Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type()); | ||
| 47 | + void** dstDataPtr = dstMat.ptr<void*>(); | ||
| 48 | + dstDataPtr[1] = srcDataPtr[1]; | ||
| 49 | + dstDataPtr[2] = srcDataPtr[2]; | ||
| 50 | + dstDataPtr[3] = srcDataPtr[3]; *((int*)dstDataPtr[3]) = CV_32FC1; | ||
| 34 | 51 | ||
| 35 | - int rows = src.m().rows; | ||
| 36 | - int cols = src.m().cols; | ||
| 37 | - | ||
| 38 | - dst = Mat(rows, cols, CV_32FC1); | 52 | + br::cuda::cudacvtfloat::wrapper((const unsigned char*)srcMemPtr, &dstDataPtr[0], rows, cols); |
| 53 | + dst = dstMat; | ||
| 39 | 54 | ||
| 40 | - br::cuda::cudacvtfloat::wrapper((const unsigned char*)src.m().ptr<unsigned char>(), dst.m().ptr<float>(), rows, cols); | 55 | + cout << "CUDACvtFloat End" << endl; |
| 41 | } | 56 | } |
| 42 | }; | 57 | }; |
| 43 | 58 |
openbr/plugins/cuda/cudacvtfloat.cu
| @@ -14,13 +14,15 @@ namespace br { namespace cuda { namespace cudacvtfloat { | @@ -14,13 +14,15 @@ namespace br { namespace cuda { namespace cudacvtfloat { | ||
| 14 | dst[index] = (float)src[index]; | 14 | dst[index] = (float)src[index]; |
| 15 | } | 15 | } |
| 16 | 16 | ||
| 17 | - void wrapper(const unsigned char* src, float* dst, int rows, int cols) { | ||
| 18 | - unsigned char* cudaSrc; | ||
| 19 | - cudaMalloc(&cudaSrc, rows*cols*sizeof(unsigned char)); | ||
| 20 | - cudaMemcpy(cudaSrc, src, rows*cols*sizeof(unsigned char), cudaMemcpyHostToDevice); | 17 | + void wrapper(const unsigned char* src, void** dst, int rows, int cols) { |
| 18 | + //unsigned char* cudaSrc; | ||
| 19 | + //cudaMalloc(&cudaSrc, rows*cols*sizeof(unsigned char)); | ||
| 20 | + //cudaMemcpy(cudaSrc, src, rows*cols*sizeof(unsigned char), cudaMemcpyHostToDevice); | ||
| 21 | 21 | ||
| 22 | - float* cudaDst; | ||
| 23 | - cudaMalloc(&cudaDst, rows*cols*sizeof(float)); | 22 | + //float* cudaDst; |
| 23 | + //cudaMalloc(&cudaDst, rows*cols*sizeof(float)); | ||
| 24 | + | ||
| 25 | + cudaMalloc(dst, rows*cols*sizeof(float)); | ||
| 24 | 26 | ||
| 25 | dim3 threadsPerBlock(8, 8); | 27 | dim3 threadsPerBlock(8, 8); |
| 26 | dim3 blocks( | 28 | dim3 blocks( |
| @@ -28,10 +30,7 @@ namespace br { namespace cuda { namespace cudacvtfloat { | @@ -28,10 +30,7 @@ namespace br { namespace cuda { namespace cudacvtfloat { | ||
| 28 | rows / threadsPerBlock.y + 1 | 30 | rows / threadsPerBlock.y + 1 |
| 29 | ); | 31 | ); |
| 30 | 32 | ||
| 31 | - kernel<<<threadsPerBlock, blocks>>>(cudaSrc, cudaDst, rows, cols); | ||
| 32 | - | ||
| 33 | - // copy the data back to the destination | ||
| 34 | - cudaMemcpy(dst, cudaDst, rows*cols*sizeof(float), cudaMemcpyDeviceToHost); | 33 | + kernel<<<threadsPerBlock, blocks>>>(src, (float*)(*dst), rows, cols); |
| 35 | } | 34 | } |
| 36 | 35 | ||
| 37 | }}} | 36 | }}} |
openbr/plugins/cuda/cudacvtfloat.hpp deleted