Commit a2eadb178f1efa3714e2386f0b45490b0e4fc97b

Authored by DepthDeluxe
1 parent 5e16788d

processing can now be done totally on the graphics card

openbr/plugins/cuda/copyfrom.cpp
... ... @@ -11,7 +11,7 @@ using namespace cv;
11 11 // extern CUDA declaration
12 12 namespace br { namespace cuda { namespace cudacopyfrom {
13 13 //template <typename T> void wrapper(void* src, T* out, int rows, int cols) {
14   - void wrapper(void* src, unsigned char* out, const int rows, const int cols);
  14 + void wrapper(void* src, float* out, const int rows, const int cols);
15 15 }}}
16 16  
17 17 namespace br
... ... @@ -23,7 +23,7 @@ namespace br
23 23 private:
24 24 void project(const Template &src, Template &dst) const
25 25 {
26   - cout << "CUDACopyFrom Start" << endl;
  26 + cout << "CUDACopyFrom Start" << endl << endl << endl;
27 27  
28 28 // pull the data back out of the Mat
29 29 void* const* dataPtr = src.m().ptr<void*>();
... ... @@ -32,16 +32,26 @@ private:
32 32 int cols = *((int*)dataPtr[2]);
33 33 int type = *((int*)dataPtr[3]);
34 34  
  35 + if (type != CV_32FC1) {
  36 + cout << "ERR: Invalid data type!" << endl;
  37 + return;
  38 + }
  39 +
35 40 cout << "cudaMemPtr: " << cudaMemPtr << endl;
36 41 cout << "rows: " << rows << endl;
37 42 cout << "cols: " << cols << endl;
38 43 cout << "type: " << type << endl;
39 44  
40   - dst = Mat(rows, cols, type);
41   -
42   - br::cuda::cudacopyfrom::wrapper(cudaMemPtr, dst.m().ptr<unsigned char>(), rows, cols);
  45 + Mat dstMat = Mat(rows, cols, type);
  46 + br::cuda::cudacopyfrom::wrapper(cudaMemPtr, dstMat.ptr<float>(), rows, cols);
  47 + dst = dstMat;
43 48  
44 49 cout << "CUDACopyFrom End" << endl;
  50 +
  51 + cout << "DST Data" << endl;
  52 + cout << "rows: " << dstMat.rows << endl;
  53 + cout << "cols: " << dstMat.cols << endl;
  54 + cout << "type: " << dstMat.type() << endl;
45 55 }
46 56 };
47 57  
... ...
openbr/plugins/cuda/copyfrom.cu
1 1 namespace br { namespace cuda { namespace cudacopyfrom {
2 2 //template <typename T> void wrapper(void* src, T* out, int rows, int cols) {
3   - void wrapper(void* src, unsigned char* out, const int rows, const int cols) {
4   - cudaMemcpy(out, src, rows*cols*sizeof(unsigned char), cudaMemcpyDeviceToHost);
  3 + void wrapper(void* src, float* dst, const int rows, const int cols) {
  4 + cudaMemcpy(dst, src, rows*cols*sizeof(float), cudaMemcpyDeviceToHost);
5 5 cudaFree(src);
6 6 }
7 7 }}}
... ...
openbr/plugins/cuda/cudacvtfloat.cpp
... ... @@ -7,7 +7,10 @@ using namespace cv;
7 7  
8 8 #include <openbr/plugins/openbr_internal.h>
9 9  
10   -#include "cudacvtfloat.hpp"
  10 +
  11 +namespace br { namespace cuda { namespace cudacvtfloat {
  12 + void wrapper(const unsigned char* src, void** dst, int rows, int cols);
  13 +}}}
11 14  
12 15 namespace br
13 16 {
... ... @@ -24,20 +27,32 @@ class CUDACvtFloatTransform : public UntrainableTransform
24 27 public:
25 28 void project(const Template &src, Template &dst) const
26 29 {
  30 + cout << "CUDACvtFloat Start" << endl;
  31 +
  32 + void* const* srcDataPtr = src.m().ptr<void*>();
  33 + void* srcMemPtr = srcDataPtr[0];
  34 + int rows = *((int*)srcDataPtr[1]);
  35 + int cols = *((int*)srcDataPtr[2]);
  36 + int type = *((int*)srcDataPtr[3]);
  37 +
27 38 // assume the image type is 256-monochrome
28 39 // TODO(colin): real exception handling
29   - if (src.m().type() != CV_8UC1) {
  40 + if (type != CV_8UC1) {
30 41 cout << "ERR: Invalid memory format" << endl;
31 42 return;
32 43 }
33 44  
  45 + // build the destination mat
  46 + Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type());
  47 + void** dstDataPtr = dstMat.ptr<void*>();
  48 + dstDataPtr[1] = srcDataPtr[1];
  49 + dstDataPtr[2] = srcDataPtr[2];
  50 + dstDataPtr[3] = srcDataPtr[3]; *((int*)dstDataPtr[3]) = CV_32FC1;
34 51  
35   - int rows = src.m().rows;
36   - int cols = src.m().cols;
37   -
38   - dst = Mat(rows, cols, CV_32FC1);
  52 + br::cuda::cudacvtfloat::wrapper((const unsigned char*)srcMemPtr, &dstDataPtr[0], rows, cols);
  53 + dst = dstMat;
39 54  
40   - br::cuda::cudacvtfloat::wrapper((const unsigned char*)src.m().ptr<unsigned char>(), dst.m().ptr<float>(), rows, cols);
  55 + cout << "CUDACvtFloat End" << endl;
41 56 }
42 57 };
43 58  
... ...
openbr/plugins/cuda/cudacvtfloat.cu
... ... @@ -14,13 +14,15 @@ namespace br { namespace cuda { namespace cudacvtfloat {
14 14 dst[index] = (float)src[index];
15 15 }
16 16  
17   - void wrapper(const unsigned char* src, float* dst, int rows, int cols) {
18   - unsigned char* cudaSrc;
19   - cudaMalloc(&cudaSrc, rows*cols*sizeof(unsigned char));
20   - cudaMemcpy(cudaSrc, src, rows*cols*sizeof(unsigned char), cudaMemcpyHostToDevice);
  17 + void wrapper(const unsigned char* src, void** dst, int rows, int cols) {
  18 + //unsigned char* cudaSrc;
  19 + //cudaMalloc(&cudaSrc, rows*cols*sizeof(unsigned char));
  20 + //cudaMemcpy(cudaSrc, src, rows*cols*sizeof(unsigned char), cudaMemcpyHostToDevice);
21 21  
22   - float* cudaDst;
23   - cudaMalloc(&cudaDst, rows*cols*sizeof(float));
  22 + //float* cudaDst;
  23 + //cudaMalloc(&cudaDst, rows*cols*sizeof(float));
  24 +
  25 + cudaMalloc(dst, rows*cols*sizeof(float));
24 26  
25 27 dim3 threadsPerBlock(8, 8);
26 28 dim3 blocks(
... ... @@ -28,10 +30,7 @@ namespace br { namespace cuda { namespace cudacvtfloat {
28 30 rows / threadsPerBlock.y + 1
29 31 );
30 32  
31   - kernel<<<threadsPerBlock, blocks>>>(cudaSrc, cudaDst, rows, cols);
32   -
33   - // copy the data back to the destination
34   - cudaMemcpy(dst, cudaDst, rows*cols*sizeof(float), cudaMemcpyDeviceToHost);
  33 + kernel<<<threadsPerBlock, blocks>>>(src, (float*)(*dst), rows, cols);
35 34 }
36 35  
37 36 }}}
... ...
openbr/plugins/cuda/cudacvtfloat.hpp deleted
1   -namespace br { namespace cuda { namespace cudacvtfloat {
2   - void wrapper(const unsigned char* src, float* dst, int rows, int cols);
3   -}}}