Commit 1dabf42dad2e33b7a9e3d11b4142a98fd53c5f9d

Authored by DepthDeluxe
1 parent e600a994

code refactoring for pull request

openbr/plugins/cuda/MatManager.cu renamed to openbr/core/cuda/MatManager.cu
openbr/plugins/cuda/MatManager.hpp renamed to openbr/core/cuda/MatManager.hpp
openbr/plugins/cuda/copyfrom.cpp
@@ -2,21 +2,25 @@ @@ -2,21 +2,25 @@
2 2
3 #include <opencv2/opencv.hpp> 3 #include <opencv2/opencv.hpp>
4 4
5 -//#include <gperftools/profiler.h>  
6 -  
7 #include <openbr/plugins/openbr_internal.h> 5 #include <openbr/plugins/openbr_internal.h>
8 6
9 using namespace std; 7 using namespace std;
10 8
11 using namespace cv; 9 using namespace cv;
12 10
13 -// extern CUDA declaration  
14 -namespace br { namespace cuda { namespace cudacopyfrom { 11 +// CUDA functions for this plugin
  12 +namespace br { namespace cuda { namespace copyfrom {
15 template <typename T> void wrapper(void* src, T* out, int rows, int cols); 13 template <typename T> void wrapper(void* src, T* out, int rows, int cols);
16 }}} 14 }}}
17 15
18 namespace br 16 namespace br
19 { 17 {
  18 + /*!
  19 + * \ingroup transforms
  20 + * \brief Copies a transform from the GPU to the CPU.
  21 + * \author Colin Heinzmann \cite DepthDeluxe
  22 + * \note Method: Automatically matches image dimensions, works for 32-bit single channel, 8-bit single channel, and 8-bit 3 channel
  23 + */
20 class CUDACopyFrom : public UntrainableTransform 24 class CUDACopyFrom : public UntrainableTransform
21 { 25 {
22 Q_OBJECT 26 Q_OBJECT
@@ -24,8 +28,6 @@ namespace br @@ -24,8 +28,6 @@ namespace br
24 private: 28 private:
25 void project(const Template &src, Template &dst) const 29 void project(const Template &src, Template &dst) const
26 { 30 {
27 -// ProfilerStart("PROFILEME.log");  
28 -  
29 // pull the data back out of the Mat 31 // pull the data back out of the Mat
30 void* const* dataPtr = src.m().ptr<void*>(); 32 void* const* dataPtr = src.m().ptr<void*>();
31 int rows = *((int*)dataPtr[1]); 33 int rows = *((int*)dataPtr[1]);
@@ -35,21 +37,19 @@ private: @@ -35,21 +37,19 @@ private:
35 Mat dstMat = Mat(rows, cols, type); 37 Mat dstMat = Mat(rows, cols, type);
36 switch(type) { 38 switch(type) {
37 case CV_32FC1: 39 case CV_32FC1:
38 - br::cuda::cudacopyfrom::wrapper(dataPtr[0], dstMat.ptr<float>(), rows, cols); 40 + cuda::copyfrom::wrapper(dataPtr[0], dstMat.ptr<float>(), rows, cols);
39 break; 41 break;
40 case CV_8UC1: 42 case CV_8UC1:
41 - br::cuda::cudacopyfrom::wrapper(dataPtr[0], dstMat.ptr<unsigned char>(), rows, cols); 43 + cuda::copyfrom::wrapper(dataPtr[0], dstMat.ptr<unsigned char>(), rows, cols);
42 break; 44 break;
43 case CV_8UC3: 45 case CV_8UC3:
44 - br::cuda::cudacopyfrom::wrapper(dataPtr[0], dstMat.ptr<unsigned char>(), rows, cols * 3); 46 + cuda::copyfrom::wrapper(dataPtr[0], dstMat.ptr<unsigned char>(), rows, cols * 3);
45 break; 47 break;
46 default: 48 default:
47 - cout << "ERR: Invalid image format" << endl; 49 + cout << "ERR: Invalid image type (" << type << ")" << endl;
48 break; 50 break;
49 } 51 }
50 dst = dstMat; 52 dst = dstMat;
51 -  
52 -// ProfilerStop();  
53 } 53 }
54 }; 54 };
55 55
openbr/plugins/cuda/copyfrom.cu
1 #include "cudadefines.hpp" 1 #include "cudadefines.hpp"
2 2
3 -namespace br { namespace cuda { namespace cudacopyfrom { 3 +namespace br { namespace cuda { namespace copyfrom {
4 template <typename T> void wrapper(void* src, T* dst, int rows, int cols) { 4 template <typename T> void wrapper(void* src, T* dst, int rows, int cols) {
5 cudaError_t err; 5 cudaError_t err;
6 CUDA_SAFE_MEMCPY(dst, src, rows*cols*sizeof(T), cudaMemcpyDeviceToHost, &err); 6 CUDA_SAFE_MEMCPY(dst, src, rows*cols*sizeof(T), cudaMemcpyDeviceToHost, &err);
openbr/plugins/cuda/copyto.cpp
@@ -8,14 +8,20 @@ using namespace std; @@ -8,14 +8,20 @@ using namespace std;
8 8
9 using namespace cv; 9 using namespace cv;
10 10
11 -extern string type2str(int type);  
12 -  
13 -namespace br { namespace cuda { namespace cudacopyto { 11 +// definitions from the CUDA source file
  12 +namespace br { namespace cuda { namespace copyto {
14 template <typename T> void wrapper(const T* in, void** out, const int rows, const int cols); 13 template <typename T> void wrapper(const T* in, void** out, const int rows, const int cols);
15 }}} 14 }}}
16 15
17 namespace br 16 namespace br
18 { 17 {
  18 +
  19 + /*!
  20 + * \ingroup transforms
  21 + * \brief Copies a transform to the GPU.
  22 + * \author Colin Heinzmann \cite DepthDeluxe
  23 + * \note Method: Automatically matches image dimensions, works for 32-bit single channel, 8-bit single channel, and 8-bit 3 channel
  24 + */
19 class CUDACopyTo : public UntrainableTransform 25 class CUDACopyTo : public UntrainableTransform
20 { 26 {
21 Q_OBJECT 27 Q_OBJECT
@@ -25,7 +31,7 @@ private: @@ -25,7 +31,7 @@ private:
25 { 31 {
26 const Mat& srcMat = src.m(); 32 const Mat& srcMat = src.m();
27 const int rows = srcMat.rows; 33 const int rows = srcMat.rows;
28 - const int cols = srcMat.cols; 34 + const int cols = srcMat.cols;
29 35
30 // output will be a single pointer to graphics card memory 36 // output will be a single pointer to graphics card memory
31 Mat dstMat = Mat(4, 1, DataType<void*>::type); 37 Mat dstMat = Mat(4, 1, DataType<void*>::type);
@@ -39,16 +45,16 @@ private: @@ -39,16 +45,16 @@ private:
39 void* cudaMemPtr; 45 void* cudaMemPtr;
40 switch(srcMat.type()) { 46 switch(srcMat.type()) {
41 case CV_32FC1: 47 case CV_32FC1:
42 - br::cuda::cudacopyto::wrapper(srcMat.ptr<float>(), &dstMatData[0], rows, cols); 48 + cuda::copyto::wrapper(srcMat.ptr<float>(), &dstMatData[0], rows, cols);
43 break; 49 break;
44 case CV_8UC1: 50 case CV_8UC1:
45 - br::cuda::cudacopyto::wrapper(srcMat.ptr<unsigned char>(), &dstMatData[0], rows, cols); 51 + cuda::copyto::wrapper(srcMat.ptr<unsigned char>(), &dstMatData[0], rows, cols);
46 break; 52 break;
47 case CV_8UC3: 53 case CV_8UC3:
48 - br::cuda::cudacopyto::wrapper(srcMat.ptr<unsigned char>(), &dstMatData[0], rows, 3*cols); 54 + cuda::copyto::wrapper(srcMat.ptr<unsigned char>(), &dstMatData[0], rows, 3*cols);
49 break; 55 break;
50 default: 56 default:
51 - cout << "ERR: Invalid image type! " << type2str(srcMat.type()) << endl; 57 + cout << "ERR: Invalid image type (" << srcMat.type() << ")" << endl;
52 return; 58 return;
53 } 59 }
54 60
openbr/plugins/cuda/copyto.cu
1 #include "cudadefines.hpp" 1 #include "cudadefines.hpp"
2 2
3 -namespace br { namespace cuda { namespace cudacopyto { 3 +namespace br { namespace cuda { namespace copyto {
  4 +
4 template <typename T> void wrapper(const T* in, void** out, const int rows, const int cols) { 5 template <typename T> void wrapper(const T* in, void** out, const int rows, const int cols) {
5 cudaError_t err; 6 cudaError_t err;
6 CUDA_SAFE_MALLOC(out, rows*cols*sizeof(T), &err); 7 CUDA_SAFE_MALLOC(out, rows*cols*sizeof(T), &err);
@@ -9,4 +10,5 @@ namespace br { namespace cuda { namespace cudacopyto { @@ -9,4 +10,5 @@ namespace br { namespace cuda { namespace cudacopyto {
9 10
10 template void wrapper(const float* in, void** out, const int rows, const int cols); 11 template void wrapper(const float* in, void** out, const int rows, const int cols);
11 template void wrapper(const unsigned char* in, void** out, const int rows, const int cols); 12 template void wrapper(const unsigned char* in, void** out, const int rows, const int cols);
  13 +
12 }}} 14 }}}
openbr/plugins/cuda/cudaaffine.cpp
@@ -33,159 +33,158 @@ using namespace std; @@ -33,159 +33,158 @@ using namespace std;
33 #include <openbr/plugins/openbr_internal.h> 33 #include <openbr/plugins/openbr_internal.h>
34 #include <openbr/core/opencvutils.h> 34 #include <openbr/core/opencvutils.h>
35 35
36 -#include "MatManager.hpp"  
37 -  
38 using namespace cv; 36 using namespace cv;
39 37
40 -namespace br { namespace cuda {  
41 - void cudaaffine_wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols);  
42 -}} 38 +// definitions from the CUDA source file
  39 +namespace br { namespace cuda { namespace affine {
  40 + void wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols);
  41 +}}}
43 42
44 namespace br 43 namespace br
45 { 44 {
46 45
47 -/*!  
48 - * \ingroup transforms  
49 - * \brief Performs a two or three point registration.  
50 - * \author Josh Klontz \cite jklontz  
51 - * \note Method: Area should be used for shrinking an image, Cubic for slow but accurate enlargment, Bilin for fast enlargement.  
52 - */  
53 -class CUDAAffineTransform : public UntrainableTransform  
54 -{  
55 - Q_OBJECT  
56 - Q_ENUMS(Method)  
57 -  
58 -public:  
59 - /*!< */  
60 - enum Method { Near = INTER_NEAREST,  
61 - Area = INTER_AREA,  
62 - Bilin = INTER_LINEAR,  
63 - Cubic = INTER_CUBIC,  
64 - Lanczo = INTER_LANCZOS4};  
65 -  
66 -private:  
67 - Q_PROPERTY(int width READ get_width WRITE set_width RESET reset_width STORED false)  
68 - Q_PROPERTY(int height READ get_height WRITE set_height RESET reset_height STORED false)  
69 - Q_PROPERTY(float x1 READ get_x1 WRITE set_x1 RESET reset_x1 STORED false)  
70 - Q_PROPERTY(float y1 READ get_y1 WRITE set_y1 RESET reset_y1 STORED false)  
71 - Q_PROPERTY(float x2 READ get_x2 WRITE set_x2 RESET reset_x2 STORED false)  
72 - Q_PROPERTY(float y2 READ get_y2 WRITE set_y2 RESET reset_y2 STORED false)  
73 - Q_PROPERTY(float x3 READ get_x3 WRITE set_x3 RESET reset_x3 STORED false)  
74 - Q_PROPERTY(float y3 READ get_y3 WRITE set_y3 RESET reset_y3 STORED false)  
75 - Q_PROPERTY(Method method READ get_method WRITE set_method RESET reset_method STORED false)  
76 - Q_PROPERTY(bool storeAffine READ get_storeAffine WRITE set_storeAffine RESET reset_storeAffine STORED false)  
77 - Q_PROPERTY(bool warpPoints READ get_warpPoints WRITE set_warpPoints RESET reset_warpPoints STORED false)  
78 - BR_PROPERTY(int, width, 64)  
79 - BR_PROPERTY(int, height, 64)  
80 - BR_PROPERTY(float, x1, 0)  
81 - BR_PROPERTY(float, y1, 0)  
82 - BR_PROPERTY(float, x2, -1)  
83 - BR_PROPERTY(float, y2, -1)  
84 - BR_PROPERTY(float, x3, -1)  
85 - BR_PROPERTY(float, y3, -1)  
86 - BR_PROPERTY(Method, method, Bilin)  
87 - BR_PROPERTY(bool, storeAffine, false)  
88 - BR_PROPERTY(bool, warpPoints, false)  
89 -  
90 - static Point2f getThirdAffinePoint(const Point2f &a, const Point2f &b)  
91 - {  
92 - float dx = b.x - a.x;  
93 - float dy = b.y - a.y;  
94 - return Point2f(a.x - dy, a.y + dx);  
95 - }  
96 -  
97 - void project(const Template &src, Template &dst) const  
98 - {  
99 - const bool twoPoints = ((x3 == -1) || (y3 == -1));  
100 -  
101 - Point2f dstPoints[3];  
102 - dstPoints[0] = Point2f(x1*width, y1*height);  
103 - dstPoints[1] = Point2f((x2 == -1 ? 1 - x1 : x2)*width, (y2 == -1 ? y1 : y2)*height);  
104 - if (twoPoints) dstPoints[2] = getThirdAffinePoint(dstPoints[0], dstPoints[1]);  
105 - else dstPoints[2] = Point2f(x3*width, y3*height);  
106 -  
107 - Point2f srcPoints[3];  
108 - if (src.file.contains("Affine_0") &&  
109 - src.file.contains("Affine_1") &&  
110 - (src.file.contains("Affine_2") || twoPoints)) {  
111 - srcPoints[0] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_0"));  
112 - srcPoints[1] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_1"));  
113 - if (!twoPoints) srcPoints[2] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_2"));  
114 - } else {  
115 - const QList<Point2f> landmarks = OpenCVUtils::toPoints(src.file.points());  
116 -  
117 - if ((landmarks.size() < 2) || (!twoPoints && (landmarks.size() < 3))) {  
118 - resize(src, dst, Size(width, height));  
119 - return;  
120 - } else {  
121 - srcPoints[0] = landmarks[0];  
122 - srcPoints[1] = landmarks[1];  
123 - if (!twoPoints) srcPoints[2] = landmarks[2];  
124 - }  
125 - }  
126 - if (twoPoints) srcPoints[2] = getThirdAffinePoint(srcPoints[0], srcPoints[1]);  
127 -  
128 - // Code section being altered (original)  
129 - //  
130 - // Mat affineTransform = getAffineTransform(srcPoints, dstPoints);  
131 - // warpAffine(src, dst, affineTransform, Size(width, height), method);  
132 - //  
133 - // end original  
134 -  
135 - Mat affineTransform = getAffineTransform(srcPoints, dstPoints);  
136 -  
137 - void* const* srcDataPtr = src.m().ptr<void*>();  
138 - int rows = *((int*)srcDataPtr[1]);  
139 - int cols = *((int*)srcDataPtr[2]);  
140 - int type = *((int*)srcDataPtr[3]);  
141 -  
142 - Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type());  
143 - void** dstDataPtr = dstMat.ptr<void*>();  
144 -  
145 - dstDataPtr[1] = srcDataPtr[1]; *((int*)dstDataPtr[1]) = height; // rows  
146 - dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = width; // cols  
147 - dstDataPtr[3] = srcDataPtr[3];  
148 -  
149 - // Print the transform  
150 - //for(int x = 0; x < affineTransform.rows; x++){  
151 - //for(int y = 0; y < affineTransform.cols; y++){  
152 - //printf("%8.3f\t", affineTransform.at<double>(x, y));  
153 - //}  
154 - //printf("\n");  
155 - //}  
156 -  
157 - br::cuda::cudaaffine_wrapper(srcDataPtr[0], &dstDataPtr[0], affineTransform, rows, cols, height, width);  
158 -  
159 - // end altered code  
160 -  
161 - if (warpPoints) {  
162 - QList<QPointF> points = src.file.points();  
163 - QList<QPointF> rotatedPoints;  
164 - for (int i=0; i<points.size(); i++) {  
165 - rotatedPoints.append(QPointF(points.at(i).x()*affineTransform.at<double>(0,0)+  
166 - points.at(i).y()*affineTransform.at<double>(0,1)+  
167 - affineTransform.at<double>(0,2),  
168 - points.at(i).x()*affineTransform.at<double>(1,0)+  
169 - points.at(i).y()*affineTransform.at<double>(1,1)+  
170 - affineTransform.at<double>(1,2)));  
171 - }  
172 -  
173 - dst.file.setPoints(rotatedPoints);  
174 - }  
175 -  
176 - if (storeAffine) {  
177 - QList<float> affineParams;  
178 - for (int i = 0 ; i < 2; i++)  
179 - for (int j = 0; j < 3; j++)  
180 - affineParams.append(affineTransform.at<double>(i, j));  
181 - dst.file.setList("affineParameters", affineParams);  
182 - }  
183 -  
184 - dst = dstMat;  
185 - }  
186 -};  
187 -  
188 -BR_REGISTER(Transform, CUDAAffineTransform) 46 + /*!
  47 + * \ingroup transforms
  48 + * \brief Performs a two or three point registration on the GPU. Modified from stock OpenBR implementation
  49 + * \author Greg Schrock \cite gls022
  50 + * \note Method: Area should be used for shrinking an image, Cubic for slow but accurate enlargment, Bilin for fast enlargement.
  51 + */
  52 + class CUDAAffineTransform : public UntrainableTransform
  53 + {
  54 + Q_OBJECT
  55 + Q_ENUMS(Method)
  56 +
  57 + public:
  58 + /*!< */
  59 + enum Method { Near = INTER_NEAREST,
  60 + Area = INTER_AREA,
  61 + Bilin = INTER_LINEAR,
  62 + Cubic = INTER_CUBIC,
  63 + Lanczo = INTER_LANCZOS4};
  64 +
  65 + private:
  66 + Q_PROPERTY(int width READ get_width WRITE set_width RESET reset_width STORED false)
  67 + Q_PROPERTY(int height READ get_height WRITE set_height RESET reset_height STORED false)
  68 + Q_PROPERTY(float x1 READ get_x1 WRITE set_x1 RESET reset_x1 STORED false)
  69 + Q_PROPERTY(float y1 READ get_y1 WRITE set_y1 RESET reset_y1 STORED false)
  70 + Q_PROPERTY(float x2 READ get_x2 WRITE set_x2 RESET reset_x2 STORED false)
  71 + Q_PROPERTY(float y2 READ get_y2 WRITE set_y2 RESET reset_y2 STORED false)
  72 + Q_PROPERTY(float x3 READ get_x3 WRITE set_x3 RESET reset_x3 STORED false)
  73 + Q_PROPERTY(float y3 READ get_y3 WRITE set_y3 RESET reset_y3 STORED false)
  74 + Q_PROPERTY(Method method READ get_method WRITE set_method RESET reset_method STORED false)
  75 + Q_PROPERTY(bool storeAffine READ get_storeAffine WRITE set_storeAffine RESET reset_storeAffine STORED false)
  76 + Q_PROPERTY(bool warpPoints READ get_warpPoints WRITE set_warpPoints RESET reset_warpPoints STORED false)
  77 + BR_PROPERTY(int, width, 64)
  78 + BR_PROPERTY(int, height, 64)
  79 + BR_PROPERTY(float, x1, 0)
  80 + BR_PROPERTY(float, y1, 0)
  81 + BR_PROPERTY(float, x2, -1)
  82 + BR_PROPERTY(float, y2, -1)
  83 + BR_PROPERTY(float, x3, -1)
  84 + BR_PROPERTY(float, y3, -1)
  85 + BR_PROPERTY(Method, method, Bilin)
  86 + BR_PROPERTY(bool, storeAffine, false)
  87 + BR_PROPERTY(bool, warpPoints, false)
  88 +
  89 + static Point2f getThirdAffinePoint(const Point2f &a, const Point2f &b)
  90 + {
  91 + float dx = b.x - a.x;
  92 + float dy = b.y - a.y;
  93 + return Point2f(a.x - dy, a.y + dx);
  94 + }
  95 +
  96 + void project(const Template &src, Template &dst) const
  97 + {
  98 + const bool twoPoints = ((x3 == -1) || (y3 == -1));
  99 +
  100 + Point2f dstPoints[3];
  101 + dstPoints[0] = Point2f(x1*width, y1*height);
  102 + dstPoints[1] = Point2f((x2 == -1 ? 1 - x1 : x2)*width, (y2 == -1 ? y1 : y2)*height);
  103 + if (twoPoints) dstPoints[2] = getThirdAffinePoint(dstPoints[0], dstPoints[1]);
  104 + else dstPoints[2] = Point2f(x3*width, y3*height);
  105 +
  106 + Point2f srcPoints[3];
  107 + if (src.file.contains("Affine_0") &&
  108 + src.file.contains("Affine_1") &&
  109 + (src.file.contains("Affine_2") || twoPoints)) {
  110 + srcPoints[0] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_0"));
  111 + srcPoints[1] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_1"));
  112 + if (!twoPoints) srcPoints[2] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_2"));
  113 + } else {
  114 + const QList<Point2f> landmarks = OpenCVUtils::toPoints(src.file.points());
  115 +
  116 + if ((landmarks.size() < 2) || (!twoPoints && (landmarks.size() < 3))) {
  117 + resize(src, dst, Size(width, height));
  118 + return;
  119 + } else {
  120 + srcPoints[0] = landmarks[0];
  121 + srcPoints[1] = landmarks[1];
  122 + if (!twoPoints) srcPoints[2] = landmarks[2];
  123 + }
  124 + }
  125 + if (twoPoints) srcPoints[2] = getThirdAffinePoint(srcPoints[0], srcPoints[1]);
  126 +
  127 + // Code section being altered (original)
  128 + //
  129 + // Mat affineTransform = getAffineTransform(srcPoints, dstPoints);
  130 + // warpAffine(src, dst, affineTransform, Size(width, height), method);
  131 + //
  132 + // end original
  133 +
  134 + Mat affineTransform = getAffineTransform(srcPoints, dstPoints);
  135 +
  136 + void* const* srcDataPtr = src.m().ptr<void*>();
  137 + int rows = *((int*)srcDataPtr[1]);
  138 + int cols = *((int*)srcDataPtr[2]);
  139 + int type = *((int*)srcDataPtr[3]);
  140 +
  141 + Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type());
  142 + void** dstDataPtr = dstMat.ptr<void*>();
  143 +
  144 + dstDataPtr[1] = srcDataPtr[1]; *((int*)dstDataPtr[1]) = height; // rows
  145 + dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = width; // cols
  146 + dstDataPtr[3] = srcDataPtr[3];
  147 +
  148 + // Print the transform
  149 + //for(int x = 0; x < affineTransform.rows; x++){
  150 + //for(int y = 0; y < affineTransform.cols; y++){
  151 + //printf("%8.3f\t", affineTransform.at<double>(x, y));
  152 + //}
  153 + //printf("\n");
  154 + //}
  155 +
  156 + cuda::affine::wrapper(srcDataPtr[0], &dstDataPtr[0], affineTransform, rows, cols, height, width);
  157 +
  158 + // end altered code
  159 +
  160 + if (warpPoints) {
  161 + QList<QPointF> points = src.file.points();
  162 + QList<QPointF> rotatedPoints;
  163 + for (int i=0; i<points.size(); i++) {
  164 + rotatedPoints.append(QPointF(points.at(i).x()*affineTransform.at<double>(0,0)+
  165 + points.at(i).y()*affineTransform.at<double>(0,1)+
  166 + affineTransform.at<double>(0,2),
  167 + points.at(i).x()*affineTransform.at<double>(1,0)+
  168 + points.at(i).y()*affineTransform.at<double>(1,1)+
  169 + affineTransform.at<double>(1,2)));
  170 + }
  171 +
  172 + dst.file.setPoints(rotatedPoints);
  173 + }
  174 +
  175 + if (storeAffine) {
  176 + QList<float> affineParams;
  177 + for (int i = 0 ; i < 2; i++)
  178 + for (int j = 0; j < 3; j++)
  179 + affineParams.append(affineTransform.at<double>(i, j));
  180 + dst.file.setList("affineParameters", affineParams);
  181 + }
  182 +
  183 + dst = dstMat;
  184 + }
  185 + };
  186 +
  187 + BR_REGISTER(Transform, CUDAAffineTransform)
189 188
190 } // namespace br 189 } // namespace br
191 190
openbr/plugins/cuda/cudaaffine.cu
@@ -11,30 +11,30 @@ using namespace std; @@ -11,30 +11,30 @@ using namespace std;
11 using namespace cv; 11 using namespace cv;
12 using namespace cv::gpu; 12 using namespace cv::gpu;
13 13
14 -namespace br { namespace cuda { 14 +namespace br { namespace cuda { namespace affine {
15 15
16 - __device__ __forceinline__ uint8_t cudaaffine_kernel_get_pixel_value(int row, int col, uint8_t* srcPtr, int rows, int cols) { 16 + __device__ __forceinline__ uint8_t getPixelValueDevice(int row, int col, uint8_t* srcPtr, int rows, int cols) {
17 if (row < 0 || row > rows || col < 0 || col > cols) { 17 if (row < 0 || row > rows || col < 0 || col > cols) {
18 if (row > rows || col > cols) { 18 if (row > rows || col > cols) {
19 return 0; 19 return 0;
20 } else{ 20 } else{
21 - return 0; 21 + return 0;
22 } 22 }
23 } 23 }
24 return (srcPtr + row*cols)[col]; 24 return (srcPtr + row*cols)[col];
25 } 25 }
26 26
27 27
28 - __device__ __forceinline__ uint8_t cudaaffine_kernel_get_bilinear_pixel_value(double row, double col, uint8_t* srcPtr, int rows, int cols) { 28 + __device__ __forceinline__ uint8_t getBilinearPixelValueDevice(double row, double col, uint8_t* srcPtr, int rows, int cols) {
29 // don't do anything if the index is out of bounds 29 // don't do anything if the index is out of bounds
30 if (row < 0 || row > rows || col < 0 || col > cols) { 30 if (row < 0 || row > rows || col < 0 || col > cols) {
31 if (row > rows || col > cols) { 31 if (row > rows || col > cols) {
32 return 0; 32 return 0;
33 } else{ 33 } else{
34 - return 0; 34 + return 0;
35 } 35 }
36 } 36 }
37 - 37 +
38 // http://www.sci.utah.edu/~acoste/uou/Image/project3/ArthurCOSTE_Project3.pdf 38 // http://www.sci.utah.edu/~acoste/uou/Image/project3/ArthurCOSTE_Project3.pdf
39 // Bilinear Transformation 39 // Bilinear Transformation
40 // f(Px, Py) = f(Q11)ร—(1โˆ’Rx)ร—(1โˆ’Sy)+f(Q21)ร—(Rx)ร—(1โˆ’Sy)+f(Q12)ร—(1โˆ’Rx)ร—(Sy)+f(Q22)ร—(Rx)ร—(Sy) 40 // f(Px, Py) = f(Q11)ร—(1โˆ’Rx)ร—(1โˆ’Sy)+f(Q21)ร—(Rx)ร—(1โˆ’Sy)+f(Q12)ร—(1โˆ’Rx)ร—(Sy)+f(Q22)ร—(Rx)ร—(Sy)
@@ -48,22 +48,22 @@ namespace br { namespace cuda { @@ -48,22 +48,22 @@ namespace br { namespace cuda {
48 double d_row = row - row1; 48 double d_row = row - row1;
49 double d_col = col - col1; 49 double d_col = col - col1;
50 50
51 - int Q11 = cudaaffine_kernel_get_pixel_value(row1, col1, srcPtr, rows, cols);  
52 - int Q21 = cudaaffine_kernel_get_pixel_value(row2, col1, srcPtr, rows, cols);  
53 - int Q12 = cudaaffine_kernel_get_pixel_value(row1, col2, srcPtr, rows, cols);  
54 - int Q22 = cudaaffine_kernel_get_pixel_value(row2, col2, srcPtr, rows, cols); 51 + int Q11 = getPixelValueDevice(row1, col1, srcPtr, rows, cols);
  52 + int Q21 = getPixelValueDevice(row2, col1, srcPtr, rows, cols);
  53 + int Q12 = getPixelValueDevice(row1, col2, srcPtr, rows, cols);
  54 + int Q22 = getPixelValueDevice(row2, col2, srcPtr, rows, cols);
55 55
56 double val = Q22*(d_row*d_col) + Q12*((1-d_row)*d_col) + Q21*(d_row*(1-d_col)) + Q11*((1-d_row)*(1-d_col)); 56 double val = Q22*(d_row*d_col) + Q12*((1-d_row)*d_col) + Q21*(d_row*(1-d_col)) + Q11*((1-d_row)*(1-d_col));
57 return ((uint8_t) round(val)); 57 return ((uint8_t) round(val));
58 } 58 }
59 59
60 - __device__ __forceinline__ uint8_t cudaaffine_kernel_get_distance_pixel_value(double row, double col, uint8_t* srcPtr, int rows, int cols) { 60 + __device__ __forceinline__ uint8_t getDistancePixelValueDevice(double row, double col, uint8_t* srcPtr, int rows, int cols) {
61 // don't do anything if the index is out of bounds 61 // don't do anything if the index is out of bounds
62 if (row < 1 || row >= rows-1 || col < 1 || col >= cols-1) { 62 if (row < 1 || row >= rows-1 || col < 1 || col >= cols-1) {
63 if (row >= rows || col >= cols) { 63 if (row >= rows || col >= cols) {
64 return 0; 64 return 0;
65 } else{ 65 } else{
66 - return 0; 66 + return 0;
67 } 67 }
68 } 68 }
69 69
@@ -90,10 +90,10 @@ namespace br { namespace cuda { @@ -90,10 +90,10 @@ namespace br { namespace cuda {
90 double w3 = d3/sum; 90 double w3 = d3/sum;
91 double w4 = d4/sum; 91 double w4 = d4/sum;
92 92
93 - uint8_t v1 = cudaaffine_kernel_get_pixel_value(row1, col1, srcPtr, rows, cols);  
94 - uint8_t v2 = cudaaffine_kernel_get_pixel_value(row2, col1, srcPtr, rows, cols);  
95 - uint8_t v3 = cudaaffine_kernel_get_pixel_value(row1, col2, srcPtr, rows, cols);  
96 - uint8_t v4 = cudaaffine_kernel_get_pixel_value(row2, col2, srcPtr, rows, cols); 93 + uint8_t v1 = getPixelValueDevice(row1, col1, srcPtr, rows, cols);
  94 + uint8_t v2 = getPixelValueDevice(row2, col1, srcPtr, rows, cols);
  95 + uint8_t v3 = getPixelValueDevice(row1, col2, srcPtr, rows, cols);
  96 + uint8_t v4 = getPixelValueDevice(row2, col2, srcPtr, rows, cols);
97 97
98 return round(w1*v1 + w2*v2 + w3*v3 + w4*v4); 98 return round(w1*v1 + w2*v2 + w3*v3 + w4*v4);
99 } 99 }
@@ -105,16 +105,16 @@ namespace br { namespace cuda { @@ -105,16 +105,16 @@ namespace br { namespace cuda {
105 * src_row - The computed source pixel row (mapping from this row) 105 * src_row - The computed source pixel row (mapping from this row)
106 * src_col - The computed source pixel column (mapping from this col) 106 * src_col - The computed source pixel column (mapping from this col)
107 */ 107 */
108 - __device__ __forceinline__ void cudaaffine_kernel_get_src_coord(double *trans_inv, int dst_row, int dst_col, double* src_row_pnt, double* src_col_pnt){ 108 + __device__ __forceinline__ void getSrcCoordDevice(double *trans_inv, int dst_row, int dst_col, double* src_row_pnt, double* src_col_pnt){
109 *src_col_pnt = dst_col * trans_inv[0] + dst_row * trans_inv[3] + trans_inv[6]; 109 *src_col_pnt = dst_col * trans_inv[0] + dst_row * trans_inv[3] + trans_inv[6];
110 *src_row_pnt = dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]; 110 *src_row_pnt = dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7];
111 111
112 //printf("Dst: [%d, %d, 1] = [%d, %d, 1] \n[ %0.4f, %0.4f, %0.4f] \n[ %0.4f, %0.4f, %0.4f ]\n[ %0.4f, %0.4f, %0.4f ]\n\n", *src_col, *src_row, dst_col, dst_row, trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]); 112 //printf("Dst: [%d, %d, 1] = [%d, %d, 1] \n[ %0.4f, %0.4f, %0.4f] \n[ %0.4f, %0.4f, %0.4f ]\n[ %0.4f, %0.4f, %0.4f ]\n\n", *src_col, *src_row, dst_col, dst_row, trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]);
113 113
114 } 114 }
115 -  
116 115
117 - __global__ void cudaaffine_kernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){ 116 +
  117 + __global__ void affineKernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){
118 int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y; 118 int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y;
119 int dstColInd = blockIdx.x*blockDim.x+threadIdx.x; 119 int dstColInd = blockIdx.x*blockDim.x+threadIdx.x;
120 int dstIndex = dstRowInd*dst_cols + dstColInd; 120 int dstIndex = dstRowInd*dst_cols + dstColInd;
@@ -134,15 +134,15 @@ namespace br { namespace cuda { @@ -134,15 +134,15 @@ namespace br { namespace cuda {
134 } 134 }
135 } 135 }
136 136
137 - cudaaffine_kernel_get_src_coord(trans_inv, dstRowInd, dstColInd, &srcRowPnt, &srcColPnt);  
138 - //const uint8_t cval = cudaaffine_kernel_get_distance_pixel_value(srcRowPnt, srcColPnt, srcPtr, src_rows, src_cols); // Get initial pixel value  
139 - const uint8_t cval = cudaaffine_kernel_get_bilinear_pixel_value(srcRowPnt, srcColPnt, srcPtr, src_rows, src_cols); // Get initial pixel value  
140 - //const uint8_t cval = cudaaffine_kernel_get_pixel_value(round(srcRowPnt), round(srcColPnt), srcPtr, src_rows, src_cols); // Get initial pixel value 137 + getSrcCoordDevice(trans_inv, dstRowInd, dstColInd, &srcRowPnt, &srcColPnt);
  138 + //const uint8_t cval = getDistancePixelValueDevice(srcRowPnt, srcColPnt, srcPtr, src_rows, src_cols); // Get initial pixel value
  139 + const uint8_t cval = getBilinearPixelValueDevice(srcRowPnt, srcColPnt, srcPtr, src_rows, src_cols); // Get initial pixel value
  140 + //const uint8_t cval = getPixelValueDevice(round(srcRowPnt), round(srcColPnt), srcPtr, src_rows, src_cols); // Get initial pixel value
141 141
142 dstPtr[dstIndex] = cval; 142 dstPtr[dstIndex] = cval;
143 } 143 }
144 144
145 - void cudaaffine_wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols) { 145 + void wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols) {
146 cudaError_t err; 146 cudaError_t err;
147 double* gpuInverse; 147 double* gpuInverse;
148 148
@@ -152,7 +152,7 @@ namespace br { namespace cuda { @@ -152,7 +152,7 @@ namespace br { namespace cuda {
152 152
153 //************************************************************************ 153 //************************************************************************
154 // Input affine is a 2x3 Mat whose transpose is used in the computations 154 // Input affine is a 2x3 Mat whose transpose is used in the computations
155 - // [x, y, 1] = [u, v, 1] [ a^T | [0 0 1]^T ] 155 + // [x, y, 1] = [u, v, 1] [ a^T | [0 0 1]^T ]
156 // See "Digital Image Warping" by George Wolburg (p. 50) 156 // See "Digital Image Warping" by George Wolburg (p. 50)
157 //************************************************************************ 157 //************************************************************************
158 158
@@ -210,7 +210,7 @@ namespace br { namespace cuda { @@ -210,7 +210,7 @@ namespace br { namespace cuda {
210 210
211 CUDA_SAFE_MEMCPY(gpuInverse, affineInverse, 9*sizeof(double), cudaMemcpyHostToDevice, &err); 211 CUDA_SAFE_MEMCPY(gpuInverse, affineInverse, 9*sizeof(double), cudaMemcpyHostToDevice, &err);
212 212
213 - cudaaffine_kernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*)(*dstPtr), gpuInverse, src_rows, src_cols, dst_rows, dst_cols); 213 + affineKernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*)(*dstPtr), gpuInverse, src_rows, src_cols, dst_rows, dst_cols);
214 CUDA_KERNEL_ERR_CHK(&err); 214 CUDA_KERNEL_ERR_CHK(&err);
215 215
216 CUDA_SAFE_FREE(srcPtr, &err); 216 CUDA_SAFE_FREE(srcPtr, &err);
@@ -225,5 +225,4 @@ namespace br { namespace cuda { @@ -225,5 +225,4 @@ namespace br { namespace cuda {
225 // } 225 // }
226 // printf("\n"); 226 // printf("\n");
227 } 227 }
228 -} // end cuda  
229 -} // end br 228 +}}}
openbr/plugins/cuda/cudacvtfloat.cpp
@@ -7,7 +7,8 @@ using namespace cv; @@ -7,7 +7,8 @@ using namespace cv;
7 7
8 #include <openbr/plugins/openbr_internal.h> 8 #include <openbr/plugins/openbr_internal.h>
9 9
10 -namespace br { namespace cuda { namespace cudacvtfloat { 10 +// definitions from the CUDA source file
  11 +namespace br { namespace cuda { namespace cvtfloat {
11 void wrapper(void* src, void** dst, int rows, int cols); 12 void wrapper(void* src, void** dst, int rows, int cols);
12 }}} 13 }}}
13 14
@@ -16,7 +17,7 @@ namespace br @@ -16,7 +17,7 @@ namespace br
16 17
17 /*! 18 /*!
18 * \ingroup transforms 19 * \ingroup transforms
19 - * \brief Converts byte to floating point 20 + * \brief Converts 8-bit images currently on GPU into 32-bit floating point equivalent.
20 * \author Colin Heinzmann \cite DepthDeluxe 21 * \author Colin Heinzmann \cite DepthDeluxe
21 */ 22 */
22 class CUDACvtFloatTransform : public UntrainableTransform 23 class CUDACvtFloatTransform : public UntrainableTransform
@@ -45,7 +46,7 @@ class CUDACvtFloatTransform : public UntrainableTransform @@ -45,7 +46,7 @@ class CUDACvtFloatTransform : public UntrainableTransform
45 dstDataPtr[2] = srcDataPtr[2]; 46 dstDataPtr[2] = srcDataPtr[2];
46 dstDataPtr[3] = srcDataPtr[3]; *((int*)dstDataPtr[3]) = CV_32FC1; 47 dstDataPtr[3] = srcDataPtr[3]; *((int*)dstDataPtr[3]) = CV_32FC1;
47 48
48 - br::cuda::cudacvtfloat::wrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols); 49 + cuda::cvtfloat::wrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols);
49 dst = dstMat; 50 dst = dstMat;
50 } 51 }
51 }; 52 };
openbr/plugins/cuda/cudacvtfloat.cu
@@ -3,7 +3,7 @@ using namespace std; @@ -3,7 +3,7 @@ using namespace std;
3 3
4 #include "cudadefines.hpp" 4 #include "cudadefines.hpp"
5 5
6 -namespace br { namespace cuda { namespace cudacvtfloat { 6 +namespace br { namespace cuda { namespace cvtfloat {
7 7
8 __global__ void kernel(const unsigned char* src, float* dst, int rows, int cols) { 8 __global__ void kernel(const unsigned char* src, float* dst, int rows, int cols) {
9 // get my index 9 // get my index
@@ -20,12 +20,6 @@ namespace br { namespace cuda { namespace cudacvtfloat { @@ -20,12 +20,6 @@ namespace br { namespace cuda { namespace cudacvtfloat {
20 } 20 }
21 21
22 void wrapper(void* src, void** dst, int rows, int cols) { 22 void wrapper(void* src, void** dst, int rows, int cols) {
23 - //unsigned char* cudaSrc;  
24 - //cudaMalloc(&cudaSrc, rows*cols*sizeof(unsigned char));  
25 - //cudaMemcpy(cudaSrc, src, rows*cols*sizeof(unsigned char), cudaMemcpyHostToDevice);  
26 -  
27 - //float* cudaDst;  
28 - //cudaMalloc(&cudaDst, rows*cols*sizeof(float));  
29 cudaError_t err; 23 cudaError_t err;
30 CUDA_SAFE_MALLOC(dst, rows*cols*sizeof(float), &err); 24 CUDA_SAFE_MALLOC(dst, rows*cols*sizeof(float), &err);
31 25
openbr/plugins/cuda/CUDAL2.cpp renamed to openbr/plugins/cuda/cudal2.cpp
@@ -19,7 +19,8 @@ using namespace std; @@ -19,7 +19,8 @@ using namespace std;
19 19
20 #include <openbr/plugins/openbr_internal.h> 20 #include <openbr/plugins/openbr_internal.h>
21 21
22 -namespace br { namespace cuda { namespace L2{ 22 +// definitions from the CUDA source file
  23 +namespace br { namespace cuda { namespace L2 {
23 void wrapper(float* cudaAPtr, float* cudaBPtr, int length, float* outPtr); 24 void wrapper(float* cudaAPtr, float* cudaBPtr, int length, float* outPtr);
24 }}} 25 }}}
25 26
@@ -29,7 +30,7 @@ namespace br @@ -29,7 +30,7 @@ namespace br
29 /*! 30 /*!
30 * \ingroup distances 31 * \ingroup distances
31 * \brief L2 distance computed using eigen. 32 * \brief L2 distance computed using eigen.
32 - * \author Josh Klontz \cite jklontz 33 + * \author Colin Heinzmann \cite DepthDeluxe
33 */ 34 */
34 class CUDAL2Distance : public UntrainableDistance 35 class CUDAL2Distance : public UntrainableDistance
35 { 36 {
@@ -45,7 +46,7 @@ class CUDAL2Distance : public UntrainableDistance @@ -45,7 +46,7 @@ class CUDAL2Distance : public UntrainableDistance
45 float* cudaBPtr = (float*)b.ptr<void*>()[0]; 46 float* cudaBPtr = (float*)b.ptr<void*>()[0];
46 47
47 float out; 48 float out;
48 - br::cuda::L2::wrapper(cudaAPtr, cudaBPtr, rows*cols, &out); 49 + cuda::L2::wrapper(cudaAPtr, cudaBPtr, rows*cols, &out);
49 50
50 return out; 51 return out;
51 } 52 }
@@ -55,4 +56,4 @@ BR_REGISTER(Distance, CUDAL2Distance) @@ -55,4 +56,4 @@ BR_REGISTER(Distance, CUDAL2Distance)
55 56
56 } // namespace br 57 } // namespace br
57 58
58 -#include "cuda/CUDAL2.moc" 59 +#include "cuda/cudal2.moc"
openbr/plugins/cuda/CUDAL2.cu renamed to openbr/plugins/cuda/cudal2.cu
@@ -4,7 +4,7 @@ @@ -4,7 +4,7 @@
4 4
5 namespace br { namespace cuda { namespace L2 { 5 namespace br { namespace cuda { namespace L2 {
6 6
7 - __global__ void my_subtract_kernel(float* aPtr, float* bPtr, float* workPtr, int length) { 7 + __global__ void subtractKernel(float* aPtr, float* bPtr, float* workPtr, int length) {
8 int index = blockIdx.x*blockDim.x+threadIdx.x; 8 int index = blockIdx.x*blockDim.x+threadIdx.x;
9 9
10 if (index >= length) { 10 if (index >= length) {
@@ -18,7 +18,7 @@ namespace br { namespace cuda { namespace L2 { @@ -18,7 +18,7 @@ namespace br { namespace cuda { namespace L2 {
18 workPtr[index] = workPtr[index] * workPtr[index]; 18 workPtr[index] = workPtr[index] * workPtr[index];
19 } 19 }
20 20
21 - __global__ void collapse_kernel(float* inPtr, float* outPtr, int length) { 21 + __global__ void collapseKernel(float* inPtr, float* outPtr, int length) {
22 // make sure there is only one thread that we are calling 22 // make sure there is only one thread that we are calling
23 if (blockIdx.x != 0 || threadIdx.x != 0) { 23 if (blockIdx.x != 0 || threadIdx.x != 0) {
24 return; 24 return;
@@ -45,11 +45,11 @@ namespace br { namespace cuda { namespace L2 { @@ -45,11 +45,11 @@ namespace br { namespace cuda { namespace L2 {
45 // perform the subtraction 45 // perform the subtraction
46 int threadsPerBlock = 64; 46 int threadsPerBlock = 64;
47 int numBlocks = length / threadsPerBlock + 1; 47 int numBlocks = length / threadsPerBlock + 1;
48 - my_subtract_kernel<<<threadsPerBlock, numBlocks>>>(cudaAPtr, cudaBPtr, cudaWorkBufferPtr, length); 48 + subtractKernel<<<threadsPerBlock, numBlocks>>>(cudaAPtr, cudaBPtr, cudaWorkBufferPtr, length);
49 CUDA_KERNEL_ERR_CHK(&err); 49 CUDA_KERNEL_ERR_CHK(&err);
50 50
51 // perform the collapse 51 // perform the collapse
52 - collapse_kernel<<<1,1>>>(cudaWorkBufferPtr, cudaOutPtr, length); 52 + collapseKernel<<<1,1>>>(cudaWorkBufferPtr, cudaOutPtr, length);
53 CUDA_KERNEL_ERR_CHK(&err); 53 CUDA_KERNEL_ERR_CHK(&err);
54 54
55 // copy the single value back to the destinsion 55 // copy the single value back to the destinsion
@@ -63,6 +63,3 @@ namespace br { namespace cuda { namespace L2 { @@ -63,6 +63,3 @@ namespace br { namespace cuda { namespace L2 {
63 CUDA_SAFE_FREE(cudaWorkBufferPtr, &err); 63 CUDA_SAFE_FREE(cudaWorkBufferPtr, &err);
64 } 64 }
65 }}} 65 }}}
66 -  
67 -// 128CUDAEigenfaces on 6400 ATT: 54.367s  
68 -// 128CUDAEigenfacesL2 on 6400 ATT:  
openbr/plugins/cuda/cudalbp.cpp
@@ -31,44 +31,21 @@ using namespace std; @@ -31,44 +31,21 @@ using namespace std;
31 31
32 #include <openbr/plugins/openbr_internal.h> 32 #include <openbr/plugins/openbr_internal.h>
33 33
34 -#include "MatManager.hpp"  
35 -  
36 using namespace cv; 34 using namespace cv;
37 35
38 -string type2str(int type) {  
39 - string r;  
40 -  
41 - uchar depth = type & CV_MAT_DEPTH_MASK;  
42 - uchar chans = 1 + (type >> CV_CN_SHIFT);  
43 -  
44 - switch ( depth ) {  
45 - case CV_8U: r = "8U"; break;  
46 - case CV_8S: r = "8S"; break;  
47 - case CV_16U: r = "16U"; break;  
48 - case CV_16S: r = "16S"; break;  
49 - case CV_32S: r = "32S"; break;  
50 - case CV_32F: r = "32F"; break;  
51 - case CV_64F: r = "64F"; break;  
52 - default: r = "User"; break;  
53 - }  
54 -  
55 - r += "C";  
56 - r += (chans+'0');  
57 -  
58 - return r;  
59 -}  
60 -  
61 -namespace br { namespace cuda {  
62 - void cudalbp_wrapper(void* srcPtr, void** dstPtr, int rows, int cols);  
63 - void cudalbp_init_wrapper(uint8_t* lut);  
64 -}} 36 +// definitions from the CUDA source file
  37 +namespace br { namespace cuda { namespace lbp {
  38 + void wrapper(void* srcPtr, void** dstPtr, int rows, int cols);
  39 + void initializeWrapper(uint8_t* lut);
  40 +}}}
65 41
66 namespace br 42 namespace br
67 { 43 {
68 /*! 44 /*!
69 * \ingroup transforms 45 * \ingroup transforms
70 - * \brief Convert the image into a feature vector using Local Binary Patterns in CUDA  
71 - * \author Colin Heinzmann, Li Li \cite DepthDeluxe, booli 46 + * \brief Convert the image into a feature vector using Local Binary Patterns in CUDA. Modified from stock OpenBR plugin.
  47 + * \author Colin Heinzmann \cite DepthDeluxe
  48 + * \author Li Li \cite booli
72 */ 49 */
73 class CUDALBPTransform : public UntrainableTransform 50 class CUDALBPTransform : public UntrainableTransform
74 { 51 {
@@ -84,8 +61,6 @@ class CUDALBPTransform : public UntrainableTransform @@ -84,8 +61,6 @@ class CUDALBPTransform : public UntrainableTransform
84 uchar lut[256]; 61 uchar lut[256];
85 uchar null; 62 uchar null;
86 63
87 - //cuda::MatManager* matManager;  
88 -  
89 public: 64 public:
90 /* Returns the number of 0->1 or 1->0 transitions in i */ 65 /* Returns the number of 0->1 or 1->0 transitions in i */
91 static int numTransitions(int i) 66 static int numTransitions(int i)
@@ -136,36 +111,14 @@ class CUDALBPTransform : public UntrainableTransform @@ -136,36 +111,14 @@ class CUDALBPTransform : public UntrainableTransform
136 if (!set[i]) 111 if (!set[i])
137 lut[i] = null; // Set to null id 112 lut[i] = null; // Set to null id
138 113
139 - // init the mat manager for managing 10 mats  
140 - //matManager = new cuda::MatManager(10);  
141 -  
142 // copy lut over to the GPU 114 // copy lut over to the GPU
143 - br::cuda::cudalbp_init_wrapper(lut); 115 + cuda::lbp::initializeWrapper(lut);
144 116
145 std::cout << "Initialized CUDALBP" << std::endl; 117 std::cout << "Initialized CUDALBP" << std::endl;
146 } 118 }
147 119
148 void project(const Template &src, Template &dst) const 120 void project(const Template &src, Template &dst) const
149 { 121 {
150 - //Mat& m = (Mat&)src.m();  
151 - //cuda::MatManager::matindex a;  
152 - //cuda::MatManager::matindex b;  
153 - //a = matManager->reserve(m);  
154 - //matManager->upload(a, m);  
155 -  
156 - // reserve the second mat and check the dimensiosn  
157 - //b = matManager->reserve(m);  
158 -  
159 - //uint8_t* srcMatPtr = matManager->get_mat_pointer_from_index(a);  
160 - //uint8_t* dstMatPtr = matManager->get_mat_pointer_from_index(b);  
161 - //br::cuda::cudalbp_wrapper(srcMatPtr, dstMatPtr, lutGpuPtr, m.cols, m.rows, m.step1());  
162 -  
163 - //matManager->download(b, dst);  
164 -  
165 - // release both the mats  
166 - //matManager->release(a);  
167 - //matManager->release(b);  
168 -  
169 void* const* srcDataPtr = src.m().ptr<void*>(); 122 void* const* srcDataPtr = src.m().ptr<void*>();
170 int rows = *((int*)srcDataPtr[1]); 123 int rows = *((int*)srcDataPtr[1]);
171 int cols = *((int*)srcDataPtr[2]); 124 int cols = *((int*)srcDataPtr[2]);
@@ -177,13 +130,13 @@ class CUDALBPTransform : public UntrainableTransform @@ -177,13 +130,13 @@ class CUDALBPTransform : public UntrainableTransform
177 dstDataPtr[2] = srcDataPtr[2]; 130 dstDataPtr[2] = srcDataPtr[2];
178 dstDataPtr[3] = srcDataPtr[3]; 131 dstDataPtr[3] = srcDataPtr[3];
179 132
180 - br::cuda::cudalbp_wrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols); 133 + cuda::lbp::wrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols);
181 dst = dstMat; 134 dst = dstMat;
182 } 135 }
183 }; 136 };
184 137
185 BR_REGISTER(Transform, CUDALBPTransform) 138 BR_REGISTER(Transform, CUDALBPTransform)
186 139
187 -} // namespace br 140 +}
188 141
189 #include "cuda/cudalbp.moc" 142 #include "cuda/cudalbp.moc"
openbr/plugins/cuda/cudalbp.cu
@@ -9,14 +9,18 @@ using namespace std; @@ -9,14 +9,18 @@ using namespace std;
9 using namespace cv; 9 using namespace cv;
10 using namespace cv::gpu; 10 using namespace cv::gpu;
11 11
12 -namespace br { namespace cuda { 12 +/*
  13 + * These are the CUDA functions for CUDALBP. See cudapca.cpp for more details
  14 + */
  15 +
  16 +namespace br { namespace cuda { namespace lbp {
13 uint8_t* lut; 17 uint8_t* lut;
14 18
15 - __device__ __forceinline__ uint8_t cudalbp_kernel_get_pixel_value(int row, int col, uint8_t* srcPtr, int rows, int cols) { 19 + __device__ __forceinline__ uint8_t getPixelValueKernel(int row, int col, uint8_t* srcPtr, int rows, int cols) {
16 return (srcPtr + row*cols)[col]; 20 return (srcPtr + row*cols)[col];
17 } 21 }
18 22
19 - __global__ void cudalbp_kernel(uint8_t* srcPtr, uint8_t* dstPtr, int rows, int cols, uint8_t* lut) 23 + __global__ void lutKernel(uint8_t* srcPtr, uint8_t* dstPtr, int rows, int cols, uint8_t* lut)
20 { 24 {
21 int rowInd = blockIdx.y*blockDim.y+threadIdx.y; 25 int rowInd = blockIdx.y*blockDim.y+threadIdx.y;
22 int colInd = blockIdx.x*blockDim.x+threadIdx.x; 26 int colInd = blockIdx.x*blockDim.x+threadIdx.x;
@@ -34,22 +38,22 @@ namespace br { namespace cuda { @@ -34,22 +38,22 @@ namespace br { namespace cuda {
34 } 38 }
35 } 39 }
36 40
37 - const uint8_t cval = cudalbp_kernel_get_pixel_value(rowInd+0*radius, colInd+0*radius, srcPtr, rows, cols);//(srcPtr[(rowInd*srcStep+0*radius)*m.cols+colInd+0*radius]); // center value  
38 - uint8_t val = lut[(cudalbp_kernel_get_pixel_value(rowInd-1*radius, colInd-1*radius, srcPtr, rows, cols) >= cval ? 128 : 0) |  
39 - (cudalbp_kernel_get_pixel_value(rowInd-1*radius, colInd+0*radius, srcPtr, rows, cols) >= cval ? 64 : 0) |  
40 - (cudalbp_kernel_get_pixel_value(rowInd-1*radius, colInd+1*radius, srcPtr, rows, cols) >= cval ? 32 : 0) |  
41 - (cudalbp_kernel_get_pixel_value(rowInd+0*radius, colInd+1*radius, srcPtr, rows, cols) >= cval ? 16 : 0) |  
42 - (cudalbp_kernel_get_pixel_value(rowInd+1*radius, colInd+1*radius, srcPtr, rows, cols) >= cval ? 8 : 0) |  
43 - (cudalbp_kernel_get_pixel_value(rowInd+1*radius, colInd+0*radius, srcPtr, rows, cols) >= cval ? 4 : 0) |  
44 - (cudalbp_kernel_get_pixel_value(rowInd+1*radius, colInd-1*radius, srcPtr, rows, cols) >= cval ? 2 : 0) |  
45 - (cudalbp_kernel_get_pixel_value(rowInd+0*radius, colInd-1*radius, srcPtr, rows, cols) >= cval ? 1 : 0)]; 41 + const uint8_t cval = getPixelValueKernel(rowInd+0*radius, colInd+0*radius, srcPtr, rows, cols);//(srcPtr[(rowInd*srcStep+0*radius)*m.cols+colInd+0*radius]); // center value
  42 + uint8_t val = lut[(getPixelValueKernel(rowInd-1*radius, colInd-1*radius, srcPtr, rows, cols) >= cval ? 128 : 0) |
  43 + (getPixelValueKernel(rowInd-1*radius, colInd+0*radius, srcPtr, rows, cols) >= cval ? 64 : 0) |
  44 + (getPixelValueKernel(rowInd-1*radius, colInd+1*radius, srcPtr, rows, cols) >= cval ? 32 : 0) |
  45 + (getPixelValueKernel(rowInd+0*radius, colInd+1*radius, srcPtr, rows, cols) >= cval ? 16 : 0) |
  46 + (getPixelValueKernel(rowInd+1*radius, colInd+1*radius, srcPtr, rows, cols) >= cval ? 8 : 0) |
  47 + (getPixelValueKernel(rowInd+1*radius, colInd+0*radius, srcPtr, rows, cols) >= cval ? 4 : 0) |
  48 + (getPixelValueKernel(rowInd+1*radius, colInd-1*radius, srcPtr, rows, cols) >= cval ? 2 : 0) |
  49 + (getPixelValueKernel(rowInd+0*radius, colInd-1*radius, srcPtr, rows, cols) >= cval ? 1 : 0)];
46 50
47 // store calculated value away in the right place 51 // store calculated value away in the right place
48 dstPtr[index] = val; 52 dstPtr[index] = val;
49 } 53 }
50 54
51 //void cudalbp_wrapper(uint8_t* srcPtr, uint8_t* dstPtr, uint8_t* lut, int imageWidth, int imageHeight, size_t step) 55 //void cudalbp_wrapper(uint8_t* srcPtr, uint8_t* dstPtr, uint8_t* lut, int imageWidth, int imageHeight, size_t step)
52 - void cudalbp_wrapper(void* srcPtr, void** dstPtr, int rows, int cols) 56 + void wrapper(void* srcPtr, void** dstPtr, int rows, int cols)
53 { 57 {
54 cudaError_t err; 58 cudaError_t err;
55 59
@@ -59,15 +63,15 @@ namespace br { namespace cuda { @@ -59,15 +63,15 @@ namespace br { namespace cuda {
59 rows/threadsPerBlock.y + 1); 63 rows/threadsPerBlock.y + 1);
60 64
61 CUDA_SAFE_MALLOC(dstPtr, rows*cols*sizeof(uint8_t), &err); 65 CUDA_SAFE_MALLOC(dstPtr, rows*cols*sizeof(uint8_t), &err);
62 - cudalbp_kernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*)(*dstPtr), rows, cols, lut); 66 + lutKernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*)(*dstPtr), rows, cols, lut);
63 CUDA_KERNEL_ERR_CHK(&err); 67 CUDA_KERNEL_ERR_CHK(&err);
64 68
65 CUDA_SAFE_FREE(srcPtr, &err); 69 CUDA_SAFE_FREE(srcPtr, &err);
66 } 70 }
67 71
68 - void cudalbp_init_wrapper(uint8_t* cpuLut) { 72 + void initializeWrapper(uint8_t* cpuLut) {
69 cudaError_t err; 73 cudaError_t err;
70 CUDA_SAFE_MALLOC(&lut, 256*sizeof(uint8_t), &err); 74 CUDA_SAFE_MALLOC(&lut, 256*sizeof(uint8_t), &err);
71 CUDA_SAFE_MEMCPY(lut, cpuLut, 256*sizeof(uint8_t), cudaMemcpyHostToDevice, &err); 75 CUDA_SAFE_MEMCPY(lut, cpuLut, 256*sizeof(uint8_t), cudaMemcpyHostToDevice, &err);
72 } 76 }
73 -}} 77 +}}}
openbr/plugins/cuda/cudapca.cpp
@@ -29,8 +29,9 @@ using namespace cv; @@ -29,8 +29,9 @@ using namespace cv;
29 #include <openbr/core/eigenutils.h> 29 #include <openbr/core/eigenutils.h>
30 #include <openbr/core/opencvutils.h> 30 #include <openbr/core/opencvutils.h>
31 31
  32 +// definitions from the CUDA source file
32 namespace br { namespace cuda { namespace pca { 33 namespace br { namespace cuda { namespace pca {
33 - void loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems); 34 + void initializeWrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems);
34 void wrapper(void* src, void** dst); 35 void wrapper(void* src, void** dst);
35 }}} 36 }}}
36 37
@@ -38,9 +39,7 @@ namespace br @@ -38,9 +39,7 @@ namespace br
38 { 39 {
39 /*! 40 /*!
40 * \ingroup transforms 41 * \ingroup transforms
41 - * \brief Projects input into learned Principal Component Analysis subspace using CUDA.  
42 - * \author Brendan Klare \cite bklare  
43 - * \author Josh Klontz \cite jklontz 42 + * \brief Projects input into learned Principal Component Analysis subspace using CUDA. Modified from original PCA plugin.
44 * \author Colin Heinzmann \cite DepthDeluxe 43 * \author Colin Heinzmann \cite DepthDeluxe
45 * 44 *
46 * \br_property float keep Options are: [keep < 0 - All eigenvalues are retained, keep == 0 - No PCA is performed and the eigenvectors form an identity matrix, 0 < keep < 1 - Keep is the fraction of the variance to retain, keep >= 1 - keep is the number of leading eigenvectors to retain] Default is 0.95. 45 * \br_property float keep Options are: [keep < 0 - All eigenvalues are retained, keep == 0 - No PCA is performed and the eigenvectors form an identity matrix, 0 < keep < 1 - Keep is the fraction of the variance to retain, keep >= 1 - keep is the number of leading eigenvectors to retain] Default is 0.95.
@@ -134,22 +133,9 @@ private: @@ -134,22 +133,9 @@ private:
134 dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = keep; 133 dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = keep;
135 dstDataPtr[3] = srcDataPtr[3]; 134 dstDataPtr[3] = srcDataPtr[3];
136 135
137 - br::cuda::pca::wrapper(srcDataPtr[0], &dstDataPtr[0]); 136 + cuda::pca::wrapper(srcDataPtr[0], &dstDataPtr[0]);
138 137
139 dst = dstMat; 138 dst = dstMat;
140 -  
141 - //dst = cv::Mat(1, keep, CV_32FC1);  
142 -  
143 - // perform the operation on the graphics card  
144 - //cuda::cudapca_projectwrapper((float*)src.m().ptr<float>(), (float*)dst.m().ptr<float>());  
145 -  
146 - // Map Eigen into OpenCV  
147 - //Mat cpuDst = cv::Mat(1, keep, CV_32FC1);  
148 - //Eigen::Map<const Eigen::MatrixXf> inMap(src.m().ptr<float>(), src.m().rows*src.m().cols, 1);  
149 - //Eigen::Map<Eigen::MatrixXf> outMap(cpuDst.ptr<float>(), keep, 1);  
150 -  
151 - // Do projection  
152 - //outMap = eVecs.transpose() * (inMap - mean);  
153 } 139 }
154 140
155 void store(QDataStream &stream) const 141 void store(QDataStream &stream) const
@@ -161,14 +147,6 @@ private: @@ -161,14 +147,6 @@ private:
161 { 147 {
162 stream >> keep >> drop >> whiten >> originalRows >> mean >> eVals >> eVecs; 148 stream >> keep >> drop >> whiten >> originalRows >> mean >> eVals >> eVecs;
163 149
164 - cout << "Mean Dimensions" << endl;  
165 - cout << "\tRows: " << mean.rows() << " Cols: " << mean.cols() << endl;  
166 - cout << "eVecs Dimensions" << endl;  
167 - cout << "\tRows: " << eVecs.rows() << " Cols: " << eVecs.cols() << endl;  
168 - cout << "eVals Dimensions" << endl;  
169 - cout << "\tRows: " << eVals.rows() << " Cols: " << eVals.cols() << endl;  
170 - cout << "Keep: " << keep << endl;  
171 -  
172 // TODO(colin): use Eigen Map class to generate map files so we don't have to copy the data 150 // TODO(colin): use Eigen Map class to generate map files so we don't have to copy the data
173 // serialize the eigenvectors 151 // serialize the eigenvectors
174 float* evBuffer = new float[eVecs.rows() * eVecs.cols()]; 152 float* evBuffer = new float[eVecs.rows() * eVecs.cols()];
@@ -187,7 +165,7 @@ private: @@ -187,7 +165,7 @@ private:
187 } 165 }
188 166
189 // call the wrapper function 167 // call the wrapper function
190 - br::cuda::pca::loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows()*mean.cols()); 168 + cuda::pca::initializeWrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows()*mean.cols());
191 169
192 delete evBuffer; 170 delete evBuffer;
193 delete meanBuffer; 171 delete meanBuffer;
openbr/plugins/cuda/cudapca.cu
@@ -9,6 +9,10 @@ using namespace std; @@ -9,6 +9,10 @@ using namespace std;
9 using namespace cv; 9 using namespace cv;
10 using namespace cv::gpu; 10 using namespace cv::gpu;
11 11
  12 +/*
  13 + * These are the CUDA functions for CUDAPCA. See cudapca.cpp for more details
  14 + */
  15 +
12 namespace br { namespace cuda { namespace pca { 16 namespace br { namespace cuda { namespace pca {
13 __global__ void multiplyKernel(float* src, float* intermediaryBuffer, float* evPtr, int evRows, int evCols, int stepSize) { 17 __global__ void multiplyKernel(float* src, float* intermediaryBuffer, float* evPtr, int evRows, int evCols, int stepSize) {
14 int colInd = blockIdx.x*blockDim.x+threadIdx.x; 18 int colInd = blockIdx.x*blockDim.x+threadIdx.x;
@@ -68,7 +72,7 @@ namespace br { namespace cuda { namespace pca { @@ -68,7 +72,7 @@ namespace br { namespace cuda { namespace pca {
68 int _numSteps; int _stepSize; 72 int _numSteps; int _stepSize;
69 float* intermediaryBuffer; 73 float* intermediaryBuffer;
70 74
71 - void loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems) { 75 + void initializeWrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems) {
72 _evRows = evRows; _evCols = evCols; 76 _evRows = evRows; _evCols = evCols;
73 _meanElems = meanElems; 77 _meanElems = meanElems;
74 78
@@ -95,7 +99,6 @@ namespace br { namespace cuda { namespace pca { @@ -95,7 +99,6 @@ namespace br { namespace cuda { namespace pca {
95 cudaError_t err; 99 cudaError_t err;
96 CUDA_SAFE_MALLOC(dst, _evCols*sizeof(float), &err); 100 CUDA_SAFE_MALLOC(dst, _evCols*sizeof(float), &err);
97 101
98 -  
99 // subtract out the mean of the image (mean is 1xpixels in size) 102 // subtract out the mean of the image (mean is 1xpixels in size)
100 int threadsPerBlock = 64; 103 int threadsPerBlock = 64;
101 int numBlocks = _meanElems / threadsPerBlock + 1; 104 int numBlocks = _meanElems / threadsPerBlock + 1;
@@ -114,8 +117,5 @@ namespace br { namespace cuda { namespace pca { @@ -114,8 +117,5 @@ namespace br { namespace cuda { namespace pca {
114 CUDA_KERNEL_ERR_CHK(&err); 117 CUDA_KERNEL_ERR_CHK(&err);
115 118
116 CUDA_SAFE_FREE(src, &err); // TODO(colin): figure out why adding this free causes memory corruption... 119 CUDA_SAFE_FREE(src, &err); // TODO(colin): figure out why adding this free causes memory corruption...
117 -  
118 - // copy the data back to the CPU  
119 - //cudaMemcpy(dst, _cudaDstPtr, _evCols*sizeof(float), cudaMemcpyDeviceToHost);  
120 } 120 }
121 }}} 121 }}}
openbr/plugins/cuda/cudargb2grayscale.cpp
@@ -25,17 +25,18 @@ @@ -25,17 +25,18 @@
25 25
26 using namespace cv; 26 using namespace cv;
27 27
28 -namespace br { namespace cuda{  
29 - void cudargb2grayscale_wrapper(void* srcPtr, void**dstPtr, int rows, int cols);  
30 -}} 28 +// definitions from the CUDA source file
  29 +namespace br { namespace cuda { namespace rgb2grayscale {
  30 + void wrapper(void* srcPtr, void**dstPtr, int rows, int cols);
  31 +}}}
31 32
32 namespace br 33 namespace br
33 { 34 {
34 35
35 /*! 36 /*!
36 * \ingroup transforms 37 * \ingroup transforms
37 - * \brief Colorspace conversion.  
38 - * \author Li Li \cite Josh Klontz \cite jklontz 38 + * \brief Converts 3-channel images to grayscale
  39 + * \author Li Li \cite booli
39 */ 40 */
40 class CUDARGB2GrayScaleTransform : public UntrainableTransform 41 class CUDARGB2GrayScaleTransform : public UntrainableTransform
41 { 42 {
@@ -57,8 +58,8 @@ private: @@ -57,8 +58,8 @@ private:
57 dstDataPtr[2] = srcDataPtr[2]; 58 dstDataPtr[2] = srcDataPtr[2];
58 dstDataPtr[3] = srcDataPtr[3]; 59 dstDataPtr[3] = srcDataPtr[3];
59 *((int*)dstDataPtr[3]) = CV_8UC1; // not sure if the type of the new mat is the same 60 *((int*)dstDataPtr[3]) = CV_8UC1; // not sure if the type of the new mat is the same
60 -  
61 - br::cuda::cudargb2grayscale_wrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols); 61 +
  62 + cuda::rgb2grayscale::wrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols);
62 dst = dstMat; 63 dst = dstMat;
63 64
64 /* 65 /*
openbr/plugins/cuda/cudargb2grayscale.cu
@@ -12,9 +12,9 @@ using namespace std; @@ -12,9 +12,9 @@ using namespace std;
12 using namespace cv; 12 using namespace cv;
13 using namespace cv::gpu; 13 using namespace cv::gpu;
14 14
15 -namespace br{ namespace cuda { 15 +namespace br { namespace cuda { namespace rgb2grayscale {
16 16
17 - __global__ void cudargb2grayscale_kernel(uint8_t* srcPtr, uint8_t* dstPtr, int rows, int cols) 17 + __global__ void kernel(uint8_t* srcPtr, uint8_t* dstPtr, int rows, int cols)
18 { 18 {
19 int rowInd = blockIdx.y*blockDim.y+threadIdx.y; 19 int rowInd = blockIdx.y*blockDim.y+threadIdx.y;
20 int colInd = blockIdx.x*blockDim.x+threadIdx.x; 20 int colInd = blockIdx.x*blockDim.x+threadIdx.x;
@@ -31,7 +31,7 @@ namespace br{ namespace cuda { @@ -31,7 +31,7 @@ namespace br{ namespace cuda {
31 return; 31 return;
32 } 32 }
33 33
34 - void cudargb2grayscale_wrapper(void* srcPtr, void** dstPtr, int rows, int cols) 34 + void wrapper(void* srcPtr, void** dstPtr, int rows, int cols)
35 { 35 {
36 cudaError_t err; 36 cudaError_t err;
37 dim3 threadsPerBlock(9, 9); 37 dim3 threadsPerBlock(9, 9);
@@ -39,9 +39,9 @@ namespace br{ namespace cuda { @@ -39,9 +39,9 @@ namespace br{ namespace cuda {
39 rows/threadsPerBlock.y + 1); 39 rows/threadsPerBlock.y + 1);
40 CUDA_SAFE_MALLOC(dstPtr, rows*cols*sizeof(uint8_t), &err); 40 CUDA_SAFE_MALLOC(dstPtr, rows*cols*sizeof(uint8_t), &err);
41 41
42 - cudargb2grayscale_kernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*) (*dstPtr), rows, cols); 42 + kernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*) (*dstPtr), rows, cols);
43 CUDA_KERNEL_ERR_CHK(&err); 43 CUDA_KERNEL_ERR_CHK(&err);
44 CUDA_SAFE_FREE(srcPtr, &err); 44 CUDA_SAFE_FREE(srcPtr, &err);
45 - } 45 + }
46 46
47 -}} 47 +}}}
openbr/plugins/cuda/passthrough.cpp deleted
1 -#include <openbr/plugins/openbr_internal.h>  
2 -  
3 -#include <opencv2/imgproc/imgproc.hpp>  
4 -#include <opencv2/gpu/gpu.hpp>  
5 -  
6 -using namespace cv;  
7 -using namespace cv::gpu;  
8 -  
9 -#include "passthrough.hpp"  
10 -  
11 -#include <iostream>  
12 -  
13 -  
14 -namespace br  
15 -{  
16 - class CUDAPassthroughTransform : public UntrainableTransform  
17 - {  
18 - Q_OBJECT  
19 -  
20 -private:  
21 - void project(const Template &src, Template &dst) const  
22 - {  
23 - // note: if you convert the image to grayscale, you get 8UC1  
24 -  
25 - // upload the src mat to the GPU  
26 - GpuMat srcGpuMat, dstGpuMat;  
27 - srcGpuMat.upload(src.m());  
28 - dstGpuMat.upload(src.m());  
29 -  
30 - br::cuda::passthrough_wrapper(srcGpuMat, dstGpuMat);  
31 -  
32 - dstGpuMat.download(dst.m());  
33 -  
34 - // TODO(colin): add delete code  
35 - srcGpuMat.release();  
36 - dstGpuMat.release();  
37 -  
38 - printf("srcGpuMat empty: %d\n", (int)srcGpuMat.empty());  
39 - printf("dstGpuMat empty: %d\n", (int)srcGpuMat.empty());  
40 - }  
41 - };  
42 -  
43 - BR_REGISTER(Transform, CUDAPassthroughTransform);  
44 -}  
45 -  
46 -#include "cuda/passthrough.moc"  
openbr/plugins/cuda/passthrough.cu deleted
1 -// note: Using 8-bit unsigned 1 channel images  
2 -  
3 -#include <opencv2/gpu/gpu.hpp>  
4 -  
5 -using namespace cv;  
6 -using namespace cv::gpu;  
7 -  
8 -#include "passthrough.hpp"  
9 -  
10 -namespace br { namespace cuda {  
11 - __global__ void passthrough_kernel(uint8_t* srcPtr, uint8_t* dstPtr, size_t srcStep, size_t dstStep, int cols, int rows) {  
12 - int rowInd = blockIdx.y*blockDim.y+threadIdx.y;  
13 - int colInd = blockIdx.x*blockDim.x+threadIdx.x;  
14 -  
15 - // don't do anything if we are outside the allowable positions  
16 - if (rowInd >= rows || colInd >= cols)  
17 - return;  
18 -  
19 - uint8_t srcVal = (srcPtr + rowInd*srcStep)[colInd];  
20 - uint8_t* rowDstPtr = dstPtr + rowInd*dstStep;  
21 -  
22 - rowDstPtr[colInd] = srcVal;  
23 - }  
24 -  
25 - void passthrough_wrapper(GpuMat& src, GpuMat& dst) {  
26 - // convert the GpuMats to pointers  
27 - uint8_t* srcPtr = (uint8_t*)src.data;  
28 - uint8_t* dstPtr = (uint8_t*)dst.data;  
29 -  
30 - int imageWidth = src.cols;  
31 - int imageHeight = src.rows;  
32 -  
33 - // make 8 * 8 = 64 square block  
34 - dim3 threadsPerBlock(8, 8);  
35 - dim3 numBlocks(imageWidth / threadsPerBlock.x + 1,  
36 - imageHeight / threadsPerBlock.y + 1);  
37 -  
38 - passthrough_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageWidth, imageHeight);  
39 - }  
40 -}}  
41 -  
42 -  
43 -// read http://stackoverflow.com/questions/31927297/array-of-ptrstepszgpumat-to-a-c-cuda-kernel  
openbr/plugins/cuda/passthrough.hpp deleted
1 -namespace br { namespace cuda {  
2 - void passthrough_wrapper(GpuMat& src, GpuMat& dst);  
3 -}}