Commit a2eadb178f1efa3714e2386f0b45490b0e4fc97b

Authored by DepthDeluxe
1 parent 5e16788d

processing can now be done totally on the graphics card

openbr/plugins/cuda/copyfrom.cpp
@@ -11,7 +11,7 @@ using namespace cv; @@ -11,7 +11,7 @@ using namespace cv;
11 // extern CUDA declaration 11 // extern CUDA declaration
12 namespace br { namespace cuda { namespace cudacopyfrom { 12 namespace br { namespace cuda { namespace cudacopyfrom {
13 //template <typename T> void wrapper(void* src, T* out, int rows, int cols) { 13 //template <typename T> void wrapper(void* src, T* out, int rows, int cols) {
14 - void wrapper(void* src, unsigned char* out, const int rows, const int cols); 14 + void wrapper(void* src, float* out, const int rows, const int cols);
15 }}} 15 }}}
16 16
17 namespace br 17 namespace br
@@ -23,7 +23,7 @@ namespace br @@ -23,7 +23,7 @@ namespace br
23 private: 23 private:
24 void project(const Template &src, Template &dst) const 24 void project(const Template &src, Template &dst) const
25 { 25 {
26 - cout << "CUDACopyFrom Start" << endl; 26 + cout << "CUDACopyFrom Start" << endl << endl << endl;
27 27
28 // pull the data back out of the Mat 28 // pull the data back out of the Mat
29 void* const* dataPtr = src.m().ptr<void*>(); 29 void* const* dataPtr = src.m().ptr<void*>();
@@ -32,16 +32,26 @@ private: @@ -32,16 +32,26 @@ private:
32 int cols = *((int*)dataPtr[2]); 32 int cols = *((int*)dataPtr[2]);
33 int type = *((int*)dataPtr[3]); 33 int type = *((int*)dataPtr[3]);
34 34
  35 + if (type != CV_32FC1) {
  36 + cout << "ERR: Invalid data type!" << endl;
  37 + return;
  38 + }
  39 +
35 cout << "cudaMemPtr: " << cudaMemPtr << endl; 40 cout << "cudaMemPtr: " << cudaMemPtr << endl;
36 cout << "rows: " << rows << endl; 41 cout << "rows: " << rows << endl;
37 cout << "cols: " << cols << endl; 42 cout << "cols: " << cols << endl;
38 cout << "type: " << type << endl; 43 cout << "type: " << type << endl;
39 44
40 - dst = Mat(rows, cols, type);  
41 -  
42 - br::cuda::cudacopyfrom::wrapper(cudaMemPtr, dst.m().ptr<unsigned char>(), rows, cols); 45 + Mat dstMat = Mat(rows, cols, type);
  46 + br::cuda::cudacopyfrom::wrapper(cudaMemPtr, dstMat.ptr<float>(), rows, cols);
  47 + dst = dstMat;
43 48
44 cout << "CUDACopyFrom End" << endl; 49 cout << "CUDACopyFrom End" << endl;
  50 +
  51 + cout << "DST Data" << endl;
  52 + cout << "rows: " << dstMat.rows << endl;
  53 + cout << "cols: " << dstMat.cols << endl;
  54 + cout << "type: " << dstMat.type() << endl;
45 } 55 }
46 }; 56 };
47 57
openbr/plugins/cuda/copyfrom.cu
1 namespace br { namespace cuda { namespace cudacopyfrom { 1 namespace br { namespace cuda { namespace cudacopyfrom {
2 //template <typename T> void wrapper(void* src, T* out, int rows, int cols) { 2 //template <typename T> void wrapper(void* src, T* out, int rows, int cols) {
3 - void wrapper(void* src, unsigned char* out, const int rows, const int cols) {  
4 - cudaMemcpy(out, src, rows*cols*sizeof(unsigned char), cudaMemcpyDeviceToHost); 3 + void wrapper(void* src, float* dst, const int rows, const int cols) {
  4 + cudaMemcpy(dst, src, rows*cols*sizeof(float), cudaMemcpyDeviceToHost);
5 cudaFree(src); 5 cudaFree(src);
6 } 6 }
7 }}} 7 }}}
openbr/plugins/cuda/cudacvtfloat.cpp
@@ -7,7 +7,10 @@ using namespace cv; @@ -7,7 +7,10 @@ using namespace cv;
7 7
8 #include <openbr/plugins/openbr_internal.h> 8 #include <openbr/plugins/openbr_internal.h>
9 9
10 -#include "cudacvtfloat.hpp" 10 +
  11 +namespace br { namespace cuda { namespace cudacvtfloat {
  12 + void wrapper(const unsigned char* src, void** dst, int rows, int cols);
  13 +}}}
11 14
12 namespace br 15 namespace br
13 { 16 {
@@ -24,20 +27,32 @@ class CUDACvtFloatTransform : public UntrainableTransform @@ -24,20 +27,32 @@ class CUDACvtFloatTransform : public UntrainableTransform
24 public: 27 public:
25 void project(const Template &src, Template &dst) const 28 void project(const Template &src, Template &dst) const
26 { 29 {
  30 + cout << "CUDACvtFloat Start" << endl;
  31 +
  32 + void* const* srcDataPtr = src.m().ptr<void*>();
  33 + void* srcMemPtr = srcDataPtr[0];
  34 + int rows = *((int*)srcDataPtr[1]);
  35 + int cols = *((int*)srcDataPtr[2]);
  36 + int type = *((int*)srcDataPtr[3]);
  37 +
27 // assume the image type is 256-monochrome 38 // assume the image type is 256-monochrome
28 // TODO(colin): real exception handling 39 // TODO(colin): real exception handling
29 - if (src.m().type() != CV_8UC1) { 40 + if (type != CV_8UC1) {
30 cout << "ERR: Invalid memory format" << endl; 41 cout << "ERR: Invalid memory format" << endl;
31 return; 42 return;
32 } 43 }
33 44
  45 + // build the destination mat
  46 + Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type());
  47 + void** dstDataPtr = dstMat.ptr<void*>();
  48 + dstDataPtr[1] = srcDataPtr[1];
  49 + dstDataPtr[2] = srcDataPtr[2];
  50 + dstDataPtr[3] = srcDataPtr[3]; *((int*)dstDataPtr[3]) = CV_32FC1;
34 51
35 - int rows = src.m().rows;  
36 - int cols = src.m().cols;  
37 -  
38 - dst = Mat(rows, cols, CV_32FC1); 52 + br::cuda::cudacvtfloat::wrapper((const unsigned char*)srcMemPtr, &dstDataPtr[0], rows, cols);
  53 + dst = dstMat;
39 54
40 - br::cuda::cudacvtfloat::wrapper((const unsigned char*)src.m().ptr<unsigned char>(), dst.m().ptr<float>(), rows, cols); 55 + cout << "CUDACvtFloat End" << endl;
41 } 56 }
42 }; 57 };
43 58
openbr/plugins/cuda/cudacvtfloat.cu
@@ -14,13 +14,15 @@ namespace br { namespace cuda { namespace cudacvtfloat { @@ -14,13 +14,15 @@ namespace br { namespace cuda { namespace cudacvtfloat {
14 dst[index] = (float)src[index]; 14 dst[index] = (float)src[index];
15 } 15 }
16 16
17 - void wrapper(const unsigned char* src, float* dst, int rows, int cols) {  
18 - unsigned char* cudaSrc;  
19 - cudaMalloc(&cudaSrc, rows*cols*sizeof(unsigned char));  
20 - cudaMemcpy(cudaSrc, src, rows*cols*sizeof(unsigned char), cudaMemcpyHostToDevice); 17 + void wrapper(const unsigned char* src, void** dst, int rows, int cols) {
  18 + //unsigned char* cudaSrc;
  19 + //cudaMalloc(&cudaSrc, rows*cols*sizeof(unsigned char));
  20 + //cudaMemcpy(cudaSrc, src, rows*cols*sizeof(unsigned char), cudaMemcpyHostToDevice);
21 21
22 - float* cudaDst;  
23 - cudaMalloc(&cudaDst, rows*cols*sizeof(float)); 22 + //float* cudaDst;
  23 + //cudaMalloc(&cudaDst, rows*cols*sizeof(float));
  24 +
  25 + cudaMalloc(dst, rows*cols*sizeof(float));
24 26
25 dim3 threadsPerBlock(8, 8); 27 dim3 threadsPerBlock(8, 8);
26 dim3 blocks( 28 dim3 blocks(
@@ -28,10 +30,7 @@ namespace br { namespace cuda { namespace cudacvtfloat { @@ -28,10 +30,7 @@ namespace br { namespace cuda { namespace cudacvtfloat {
28 rows / threadsPerBlock.y + 1 30 rows / threadsPerBlock.y + 1
29 ); 31 );
30 32
31 - kernel<<<threadsPerBlock, blocks>>>(cudaSrc, cudaDst, rows, cols);  
32 -  
33 - // copy the data back to the destination  
34 - cudaMemcpy(dst, cudaDst, rows*cols*sizeof(float), cudaMemcpyDeviceToHost); 33 + kernel<<<threadsPerBlock, blocks>>>(src, (float*)(*dst), rows, cols);
35 } 34 }
36 35
37 }}} 36 }}}
openbr/plugins/cuda/cudacvtfloat.hpp deleted
1 -namespace br { namespace cuda { namespace cudacvtfloat {  
2 - void wrapper(const unsigned char* src, float* dst, int rows, int cols);  
3 -}}}