Commit 1dabf42dad2e33b7a9e3d11b4142a98fd53c5f9d
1 parent
e600a994
code refactoring for pull request
Showing
21 changed files
with
272 additions
and
429 deletions
openbr/plugins/cuda/MatManager.cu renamed to openbr/core/cuda/MatManager.cu
openbr/plugins/cuda/MatManager.hpp renamed to openbr/core/cuda/MatManager.hpp
openbr/plugins/cuda/copyfrom.cpp
| @@ -2,21 +2,25 @@ | @@ -2,21 +2,25 @@ | ||
| 2 | 2 | ||
| 3 | #include <opencv2/opencv.hpp> | 3 | #include <opencv2/opencv.hpp> |
| 4 | 4 | ||
| 5 | -//#include <gperftools/profiler.h> | ||
| 6 | - | ||
| 7 | #include <openbr/plugins/openbr_internal.h> | 5 | #include <openbr/plugins/openbr_internal.h> |
| 8 | 6 | ||
| 9 | using namespace std; | 7 | using namespace std; |
| 10 | 8 | ||
| 11 | using namespace cv; | 9 | using namespace cv; |
| 12 | 10 | ||
| 13 | -// extern CUDA declaration | ||
| 14 | -namespace br { namespace cuda { namespace cudacopyfrom { | 11 | +// CUDA functions for this plugin |
| 12 | +namespace br { namespace cuda { namespace copyfrom { | ||
| 15 | template <typename T> void wrapper(void* src, T* out, int rows, int cols); | 13 | template <typename T> void wrapper(void* src, T* out, int rows, int cols); |
| 16 | }}} | 14 | }}} |
| 17 | 15 | ||
| 18 | namespace br | 16 | namespace br |
| 19 | { | 17 | { |
| 18 | + /*! | ||
| 19 | + * \ingroup transforms | ||
| 20 | + * \brief Copies a transform from the GPU to the CPU. | ||
| 21 | + * \author Colin Heinzmann \cite DepthDeluxe | ||
| 22 | + * \note Method: Automatically matches image dimensions, works for 32-bit single channel, 8-bit single channel, and 8-bit 3 channel | ||
| 23 | + */ | ||
| 20 | class CUDACopyFrom : public UntrainableTransform | 24 | class CUDACopyFrom : public UntrainableTransform |
| 21 | { | 25 | { |
| 22 | Q_OBJECT | 26 | Q_OBJECT |
| @@ -24,8 +28,6 @@ namespace br | @@ -24,8 +28,6 @@ namespace br | ||
| 24 | private: | 28 | private: |
| 25 | void project(const Template &src, Template &dst) const | 29 | void project(const Template &src, Template &dst) const |
| 26 | { | 30 | { |
| 27 | -// ProfilerStart("PROFILEME.log"); | ||
| 28 | - | ||
| 29 | // pull the data back out of the Mat | 31 | // pull the data back out of the Mat |
| 30 | void* const* dataPtr = src.m().ptr<void*>(); | 32 | void* const* dataPtr = src.m().ptr<void*>(); |
| 31 | int rows = *((int*)dataPtr[1]); | 33 | int rows = *((int*)dataPtr[1]); |
| @@ -35,21 +37,19 @@ private: | @@ -35,21 +37,19 @@ private: | ||
| 35 | Mat dstMat = Mat(rows, cols, type); | 37 | Mat dstMat = Mat(rows, cols, type); |
| 36 | switch(type) { | 38 | switch(type) { |
| 37 | case CV_32FC1: | 39 | case CV_32FC1: |
| 38 | - br::cuda::cudacopyfrom::wrapper(dataPtr[0], dstMat.ptr<float>(), rows, cols); | 40 | + cuda::copyfrom::wrapper(dataPtr[0], dstMat.ptr<float>(), rows, cols); |
| 39 | break; | 41 | break; |
| 40 | case CV_8UC1: | 42 | case CV_8UC1: |
| 41 | - br::cuda::cudacopyfrom::wrapper(dataPtr[0], dstMat.ptr<unsigned char>(), rows, cols); | 43 | + cuda::copyfrom::wrapper(dataPtr[0], dstMat.ptr<unsigned char>(), rows, cols); |
| 42 | break; | 44 | break; |
| 43 | case CV_8UC3: | 45 | case CV_8UC3: |
| 44 | - br::cuda::cudacopyfrom::wrapper(dataPtr[0], dstMat.ptr<unsigned char>(), rows, cols * 3); | 46 | + cuda::copyfrom::wrapper(dataPtr[0], dstMat.ptr<unsigned char>(), rows, cols * 3); |
| 45 | break; | 47 | break; |
| 46 | default: | 48 | default: |
| 47 | - cout << "ERR: Invalid image format" << endl; | 49 | + cout << "ERR: Invalid image type (" << type << ")" << endl; |
| 48 | break; | 50 | break; |
| 49 | } | 51 | } |
| 50 | dst = dstMat; | 52 | dst = dstMat; |
| 51 | - | ||
| 52 | -// ProfilerStop(); | ||
| 53 | } | 53 | } |
| 54 | }; | 54 | }; |
| 55 | 55 |
openbr/plugins/cuda/copyfrom.cu
| 1 | #include "cudadefines.hpp" | 1 | #include "cudadefines.hpp" |
| 2 | 2 | ||
| 3 | -namespace br { namespace cuda { namespace cudacopyfrom { | 3 | +namespace br { namespace cuda { namespace copyfrom { |
| 4 | template <typename T> void wrapper(void* src, T* dst, int rows, int cols) { | 4 | template <typename T> void wrapper(void* src, T* dst, int rows, int cols) { |
| 5 | cudaError_t err; | 5 | cudaError_t err; |
| 6 | CUDA_SAFE_MEMCPY(dst, src, rows*cols*sizeof(T), cudaMemcpyDeviceToHost, &err); | 6 | CUDA_SAFE_MEMCPY(dst, src, rows*cols*sizeof(T), cudaMemcpyDeviceToHost, &err); |
openbr/plugins/cuda/copyto.cpp
| @@ -8,14 +8,20 @@ using namespace std; | @@ -8,14 +8,20 @@ using namespace std; | ||
| 8 | 8 | ||
| 9 | using namespace cv; | 9 | using namespace cv; |
| 10 | 10 | ||
| 11 | -extern string type2str(int type); | ||
| 12 | - | ||
| 13 | -namespace br { namespace cuda { namespace cudacopyto { | 11 | +// definitions from the CUDA source file |
| 12 | +namespace br { namespace cuda { namespace copyto { | ||
| 14 | template <typename T> void wrapper(const T* in, void** out, const int rows, const int cols); | 13 | template <typename T> void wrapper(const T* in, void** out, const int rows, const int cols); |
| 15 | }}} | 14 | }}} |
| 16 | 15 | ||
| 17 | namespace br | 16 | namespace br |
| 18 | { | 17 | { |
| 18 | + | ||
| 19 | + /*! | ||
| 20 | + * \ingroup transforms | ||
| 21 | + * \brief Copies a transform to the GPU. | ||
| 22 | + * \author Colin Heinzmann \cite DepthDeluxe | ||
| 23 | + * \note Method: Automatically matches image dimensions, works for 32-bit single channel, 8-bit single channel, and 8-bit 3 channel | ||
| 24 | + */ | ||
| 19 | class CUDACopyTo : public UntrainableTransform | 25 | class CUDACopyTo : public UntrainableTransform |
| 20 | { | 26 | { |
| 21 | Q_OBJECT | 27 | Q_OBJECT |
| @@ -25,7 +31,7 @@ private: | @@ -25,7 +31,7 @@ private: | ||
| 25 | { | 31 | { |
| 26 | const Mat& srcMat = src.m(); | 32 | const Mat& srcMat = src.m(); |
| 27 | const int rows = srcMat.rows; | 33 | const int rows = srcMat.rows; |
| 28 | - const int cols = srcMat.cols; | 34 | + const int cols = srcMat.cols; |
| 29 | 35 | ||
| 30 | // output will be a single pointer to graphics card memory | 36 | // output will be a single pointer to graphics card memory |
| 31 | Mat dstMat = Mat(4, 1, DataType<void*>::type); | 37 | Mat dstMat = Mat(4, 1, DataType<void*>::type); |
| @@ -39,16 +45,16 @@ private: | @@ -39,16 +45,16 @@ private: | ||
| 39 | void* cudaMemPtr; | 45 | void* cudaMemPtr; |
| 40 | switch(srcMat.type()) { | 46 | switch(srcMat.type()) { |
| 41 | case CV_32FC1: | 47 | case CV_32FC1: |
| 42 | - br::cuda::cudacopyto::wrapper(srcMat.ptr<float>(), &dstMatData[0], rows, cols); | 48 | + cuda::copyto::wrapper(srcMat.ptr<float>(), &dstMatData[0], rows, cols); |
| 43 | break; | 49 | break; |
| 44 | case CV_8UC1: | 50 | case CV_8UC1: |
| 45 | - br::cuda::cudacopyto::wrapper(srcMat.ptr<unsigned char>(), &dstMatData[0], rows, cols); | 51 | + cuda::copyto::wrapper(srcMat.ptr<unsigned char>(), &dstMatData[0], rows, cols); |
| 46 | break; | 52 | break; |
| 47 | case CV_8UC3: | 53 | case CV_8UC3: |
| 48 | - br::cuda::cudacopyto::wrapper(srcMat.ptr<unsigned char>(), &dstMatData[0], rows, 3*cols); | 54 | + cuda::copyto::wrapper(srcMat.ptr<unsigned char>(), &dstMatData[0], rows, 3*cols); |
| 49 | break; | 55 | break; |
| 50 | default: | 56 | default: |
| 51 | - cout << "ERR: Invalid image type! " << type2str(srcMat.type()) << endl; | 57 | + cout << "ERR: Invalid image type (" << srcMat.type() << ")" << endl; |
| 52 | return; | 58 | return; |
| 53 | } | 59 | } |
| 54 | 60 |
openbr/plugins/cuda/copyto.cu
| 1 | #include "cudadefines.hpp" | 1 | #include "cudadefines.hpp" |
| 2 | 2 | ||
| 3 | -namespace br { namespace cuda { namespace cudacopyto { | 3 | +namespace br { namespace cuda { namespace copyto { |
| 4 | + | ||
| 4 | template <typename T> void wrapper(const T* in, void** out, const int rows, const int cols) { | 5 | template <typename T> void wrapper(const T* in, void** out, const int rows, const int cols) { |
| 5 | cudaError_t err; | 6 | cudaError_t err; |
| 6 | CUDA_SAFE_MALLOC(out, rows*cols*sizeof(T), &err); | 7 | CUDA_SAFE_MALLOC(out, rows*cols*sizeof(T), &err); |
| @@ -9,4 +10,5 @@ namespace br { namespace cuda { namespace cudacopyto { | @@ -9,4 +10,5 @@ namespace br { namespace cuda { namespace cudacopyto { | ||
| 9 | 10 | ||
| 10 | template void wrapper(const float* in, void** out, const int rows, const int cols); | 11 | template void wrapper(const float* in, void** out, const int rows, const int cols); |
| 11 | template void wrapper(const unsigned char* in, void** out, const int rows, const int cols); | 12 | template void wrapper(const unsigned char* in, void** out, const int rows, const int cols); |
| 13 | + | ||
| 12 | }}} | 14 | }}} |
openbr/plugins/cuda/cudaaffine.cpp
| @@ -33,159 +33,158 @@ using namespace std; | @@ -33,159 +33,158 @@ using namespace std; | ||
| 33 | #include <openbr/plugins/openbr_internal.h> | 33 | #include <openbr/plugins/openbr_internal.h> |
| 34 | #include <openbr/core/opencvutils.h> | 34 | #include <openbr/core/opencvutils.h> |
| 35 | 35 | ||
| 36 | -#include "MatManager.hpp" | ||
| 37 | - | ||
| 38 | using namespace cv; | 36 | using namespace cv; |
| 39 | 37 | ||
| 40 | -namespace br { namespace cuda { | ||
| 41 | - void cudaaffine_wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols); | ||
| 42 | -}} | 38 | +// definitions from the CUDA source file |
| 39 | +namespace br { namespace cuda { namespace affine { | ||
| 40 | + void wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols); | ||
| 41 | +}}} | ||
| 43 | 42 | ||
| 44 | namespace br | 43 | namespace br |
| 45 | { | 44 | { |
| 46 | 45 | ||
| 47 | -/*! | ||
| 48 | - * \ingroup transforms | ||
| 49 | - * \brief Performs a two or three point registration. | ||
| 50 | - * \author Josh Klontz \cite jklontz | ||
| 51 | - * \note Method: Area should be used for shrinking an image, Cubic for slow but accurate enlargment, Bilin for fast enlargement. | ||
| 52 | - */ | ||
| 53 | -class CUDAAffineTransform : public UntrainableTransform | ||
| 54 | -{ | ||
| 55 | - Q_OBJECT | ||
| 56 | - Q_ENUMS(Method) | ||
| 57 | - | ||
| 58 | -public: | ||
| 59 | - /*!< */ | ||
| 60 | - enum Method { Near = INTER_NEAREST, | ||
| 61 | - Area = INTER_AREA, | ||
| 62 | - Bilin = INTER_LINEAR, | ||
| 63 | - Cubic = INTER_CUBIC, | ||
| 64 | - Lanczo = INTER_LANCZOS4}; | ||
| 65 | - | ||
| 66 | -private: | ||
| 67 | - Q_PROPERTY(int width READ get_width WRITE set_width RESET reset_width STORED false) | ||
| 68 | - Q_PROPERTY(int height READ get_height WRITE set_height RESET reset_height STORED false) | ||
| 69 | - Q_PROPERTY(float x1 READ get_x1 WRITE set_x1 RESET reset_x1 STORED false) | ||
| 70 | - Q_PROPERTY(float y1 READ get_y1 WRITE set_y1 RESET reset_y1 STORED false) | ||
| 71 | - Q_PROPERTY(float x2 READ get_x2 WRITE set_x2 RESET reset_x2 STORED false) | ||
| 72 | - Q_PROPERTY(float y2 READ get_y2 WRITE set_y2 RESET reset_y2 STORED false) | ||
| 73 | - Q_PROPERTY(float x3 READ get_x3 WRITE set_x3 RESET reset_x3 STORED false) | ||
| 74 | - Q_PROPERTY(float y3 READ get_y3 WRITE set_y3 RESET reset_y3 STORED false) | ||
| 75 | - Q_PROPERTY(Method method READ get_method WRITE set_method RESET reset_method STORED false) | ||
| 76 | - Q_PROPERTY(bool storeAffine READ get_storeAffine WRITE set_storeAffine RESET reset_storeAffine STORED false) | ||
| 77 | - Q_PROPERTY(bool warpPoints READ get_warpPoints WRITE set_warpPoints RESET reset_warpPoints STORED false) | ||
| 78 | - BR_PROPERTY(int, width, 64) | ||
| 79 | - BR_PROPERTY(int, height, 64) | ||
| 80 | - BR_PROPERTY(float, x1, 0) | ||
| 81 | - BR_PROPERTY(float, y1, 0) | ||
| 82 | - BR_PROPERTY(float, x2, -1) | ||
| 83 | - BR_PROPERTY(float, y2, -1) | ||
| 84 | - BR_PROPERTY(float, x3, -1) | ||
| 85 | - BR_PROPERTY(float, y3, -1) | ||
| 86 | - BR_PROPERTY(Method, method, Bilin) | ||
| 87 | - BR_PROPERTY(bool, storeAffine, false) | ||
| 88 | - BR_PROPERTY(bool, warpPoints, false) | ||
| 89 | - | ||
| 90 | - static Point2f getThirdAffinePoint(const Point2f &a, const Point2f &b) | ||
| 91 | - { | ||
| 92 | - float dx = b.x - a.x; | ||
| 93 | - float dy = b.y - a.y; | ||
| 94 | - return Point2f(a.x - dy, a.y + dx); | ||
| 95 | - } | ||
| 96 | - | ||
| 97 | - void project(const Template &src, Template &dst) const | ||
| 98 | - { | ||
| 99 | - const bool twoPoints = ((x3 == -1) || (y3 == -1)); | ||
| 100 | - | ||
| 101 | - Point2f dstPoints[3]; | ||
| 102 | - dstPoints[0] = Point2f(x1*width, y1*height); | ||
| 103 | - dstPoints[1] = Point2f((x2 == -1 ? 1 - x1 : x2)*width, (y2 == -1 ? y1 : y2)*height); | ||
| 104 | - if (twoPoints) dstPoints[2] = getThirdAffinePoint(dstPoints[0], dstPoints[1]); | ||
| 105 | - else dstPoints[2] = Point2f(x3*width, y3*height); | ||
| 106 | - | ||
| 107 | - Point2f srcPoints[3]; | ||
| 108 | - if (src.file.contains("Affine_0") && | ||
| 109 | - src.file.contains("Affine_1") && | ||
| 110 | - (src.file.contains("Affine_2") || twoPoints)) { | ||
| 111 | - srcPoints[0] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_0")); | ||
| 112 | - srcPoints[1] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_1")); | ||
| 113 | - if (!twoPoints) srcPoints[2] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_2")); | ||
| 114 | - } else { | ||
| 115 | - const QList<Point2f> landmarks = OpenCVUtils::toPoints(src.file.points()); | ||
| 116 | - | ||
| 117 | - if ((landmarks.size() < 2) || (!twoPoints && (landmarks.size() < 3))) { | ||
| 118 | - resize(src, dst, Size(width, height)); | ||
| 119 | - return; | ||
| 120 | - } else { | ||
| 121 | - srcPoints[0] = landmarks[0]; | ||
| 122 | - srcPoints[1] = landmarks[1]; | ||
| 123 | - if (!twoPoints) srcPoints[2] = landmarks[2]; | ||
| 124 | - } | ||
| 125 | - } | ||
| 126 | - if (twoPoints) srcPoints[2] = getThirdAffinePoint(srcPoints[0], srcPoints[1]); | ||
| 127 | - | ||
| 128 | - // Code section being altered (original) | ||
| 129 | - // | ||
| 130 | - // Mat affineTransform = getAffineTransform(srcPoints, dstPoints); | ||
| 131 | - // warpAffine(src, dst, affineTransform, Size(width, height), method); | ||
| 132 | - // | ||
| 133 | - // end original | ||
| 134 | - | ||
| 135 | - Mat affineTransform = getAffineTransform(srcPoints, dstPoints); | ||
| 136 | - | ||
| 137 | - void* const* srcDataPtr = src.m().ptr<void*>(); | ||
| 138 | - int rows = *((int*)srcDataPtr[1]); | ||
| 139 | - int cols = *((int*)srcDataPtr[2]); | ||
| 140 | - int type = *((int*)srcDataPtr[3]); | ||
| 141 | - | ||
| 142 | - Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type()); | ||
| 143 | - void** dstDataPtr = dstMat.ptr<void*>(); | ||
| 144 | - | ||
| 145 | - dstDataPtr[1] = srcDataPtr[1]; *((int*)dstDataPtr[1]) = height; // rows | ||
| 146 | - dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = width; // cols | ||
| 147 | - dstDataPtr[3] = srcDataPtr[3]; | ||
| 148 | - | ||
| 149 | - // Print the transform | ||
| 150 | - //for(int x = 0; x < affineTransform.rows; x++){ | ||
| 151 | - //for(int y = 0; y < affineTransform.cols; y++){ | ||
| 152 | - //printf("%8.3f\t", affineTransform.at<double>(x, y)); | ||
| 153 | - //} | ||
| 154 | - //printf("\n"); | ||
| 155 | - //} | ||
| 156 | - | ||
| 157 | - br::cuda::cudaaffine_wrapper(srcDataPtr[0], &dstDataPtr[0], affineTransform, rows, cols, height, width); | ||
| 158 | - | ||
| 159 | - // end altered code | ||
| 160 | - | ||
| 161 | - if (warpPoints) { | ||
| 162 | - QList<QPointF> points = src.file.points(); | ||
| 163 | - QList<QPointF> rotatedPoints; | ||
| 164 | - for (int i=0; i<points.size(); i++) { | ||
| 165 | - rotatedPoints.append(QPointF(points.at(i).x()*affineTransform.at<double>(0,0)+ | ||
| 166 | - points.at(i).y()*affineTransform.at<double>(0,1)+ | ||
| 167 | - affineTransform.at<double>(0,2), | ||
| 168 | - points.at(i).x()*affineTransform.at<double>(1,0)+ | ||
| 169 | - points.at(i).y()*affineTransform.at<double>(1,1)+ | ||
| 170 | - affineTransform.at<double>(1,2))); | ||
| 171 | - } | ||
| 172 | - | ||
| 173 | - dst.file.setPoints(rotatedPoints); | ||
| 174 | - } | ||
| 175 | - | ||
| 176 | - if (storeAffine) { | ||
| 177 | - QList<float> affineParams; | ||
| 178 | - for (int i = 0 ; i < 2; i++) | ||
| 179 | - for (int j = 0; j < 3; j++) | ||
| 180 | - affineParams.append(affineTransform.at<double>(i, j)); | ||
| 181 | - dst.file.setList("affineParameters", affineParams); | ||
| 182 | - } | ||
| 183 | - | ||
| 184 | - dst = dstMat; | ||
| 185 | - } | ||
| 186 | -}; | ||
| 187 | - | ||
| 188 | -BR_REGISTER(Transform, CUDAAffineTransform) | 46 | + /*! |
| 47 | + * \ingroup transforms | ||
| 48 | + * \brief Performs a two or three point registration on the GPU. Modified from stock OpenBR implementation | ||
| 49 | + * \author Greg Schrock \cite gls022 | ||
| 50 | + * \note Method: Area should be used for shrinking an image, Cubic for slow but accurate enlargment, Bilin for fast enlargement. | ||
| 51 | + */ | ||
| 52 | + class CUDAAffineTransform : public UntrainableTransform | ||
| 53 | + { | ||
| 54 | + Q_OBJECT | ||
| 55 | + Q_ENUMS(Method) | ||
| 56 | + | ||
| 57 | + public: | ||
| 58 | + /*!< */ | ||
| 59 | + enum Method { Near = INTER_NEAREST, | ||
| 60 | + Area = INTER_AREA, | ||
| 61 | + Bilin = INTER_LINEAR, | ||
| 62 | + Cubic = INTER_CUBIC, | ||
| 63 | + Lanczo = INTER_LANCZOS4}; | ||
| 64 | + | ||
| 65 | + private: | ||
| 66 | + Q_PROPERTY(int width READ get_width WRITE set_width RESET reset_width STORED false) | ||
| 67 | + Q_PROPERTY(int height READ get_height WRITE set_height RESET reset_height STORED false) | ||
| 68 | + Q_PROPERTY(float x1 READ get_x1 WRITE set_x1 RESET reset_x1 STORED false) | ||
| 69 | + Q_PROPERTY(float y1 READ get_y1 WRITE set_y1 RESET reset_y1 STORED false) | ||
| 70 | + Q_PROPERTY(float x2 READ get_x2 WRITE set_x2 RESET reset_x2 STORED false) | ||
| 71 | + Q_PROPERTY(float y2 READ get_y2 WRITE set_y2 RESET reset_y2 STORED false) | ||
| 72 | + Q_PROPERTY(float x3 READ get_x3 WRITE set_x3 RESET reset_x3 STORED false) | ||
| 73 | + Q_PROPERTY(float y3 READ get_y3 WRITE set_y3 RESET reset_y3 STORED false) | ||
| 74 | + Q_PROPERTY(Method method READ get_method WRITE set_method RESET reset_method STORED false) | ||
| 75 | + Q_PROPERTY(bool storeAffine READ get_storeAffine WRITE set_storeAffine RESET reset_storeAffine STORED false) | ||
| 76 | + Q_PROPERTY(bool warpPoints READ get_warpPoints WRITE set_warpPoints RESET reset_warpPoints STORED false) | ||
| 77 | + BR_PROPERTY(int, width, 64) | ||
| 78 | + BR_PROPERTY(int, height, 64) | ||
| 79 | + BR_PROPERTY(float, x1, 0) | ||
| 80 | + BR_PROPERTY(float, y1, 0) | ||
| 81 | + BR_PROPERTY(float, x2, -1) | ||
| 82 | + BR_PROPERTY(float, y2, -1) | ||
| 83 | + BR_PROPERTY(float, x3, -1) | ||
| 84 | + BR_PROPERTY(float, y3, -1) | ||
| 85 | + BR_PROPERTY(Method, method, Bilin) | ||
| 86 | + BR_PROPERTY(bool, storeAffine, false) | ||
| 87 | + BR_PROPERTY(bool, warpPoints, false) | ||
| 88 | + | ||
| 89 | + static Point2f getThirdAffinePoint(const Point2f &a, const Point2f &b) | ||
| 90 | + { | ||
| 91 | + float dx = b.x - a.x; | ||
| 92 | + float dy = b.y - a.y; | ||
| 93 | + return Point2f(a.x - dy, a.y + dx); | ||
| 94 | + } | ||
| 95 | + | ||
| 96 | + void project(const Template &src, Template &dst) const | ||
| 97 | + { | ||
| 98 | + const bool twoPoints = ((x3 == -1) || (y3 == -1)); | ||
| 99 | + | ||
| 100 | + Point2f dstPoints[3]; | ||
| 101 | + dstPoints[0] = Point2f(x1*width, y1*height); | ||
| 102 | + dstPoints[1] = Point2f((x2 == -1 ? 1 - x1 : x2)*width, (y2 == -1 ? y1 : y2)*height); | ||
| 103 | + if (twoPoints) dstPoints[2] = getThirdAffinePoint(dstPoints[0], dstPoints[1]); | ||
| 104 | + else dstPoints[2] = Point2f(x3*width, y3*height); | ||
| 105 | + | ||
| 106 | + Point2f srcPoints[3]; | ||
| 107 | + if (src.file.contains("Affine_0") && | ||
| 108 | + src.file.contains("Affine_1") && | ||
| 109 | + (src.file.contains("Affine_2") || twoPoints)) { | ||
| 110 | + srcPoints[0] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_0")); | ||
| 111 | + srcPoints[1] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_1")); | ||
| 112 | + if (!twoPoints) srcPoints[2] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_2")); | ||
| 113 | + } else { | ||
| 114 | + const QList<Point2f> landmarks = OpenCVUtils::toPoints(src.file.points()); | ||
| 115 | + | ||
| 116 | + if ((landmarks.size() < 2) || (!twoPoints && (landmarks.size() < 3))) { | ||
| 117 | + resize(src, dst, Size(width, height)); | ||
| 118 | + return; | ||
| 119 | + } else { | ||
| 120 | + srcPoints[0] = landmarks[0]; | ||
| 121 | + srcPoints[1] = landmarks[1]; | ||
| 122 | + if (!twoPoints) srcPoints[2] = landmarks[2]; | ||
| 123 | + } | ||
| 124 | + } | ||
| 125 | + if (twoPoints) srcPoints[2] = getThirdAffinePoint(srcPoints[0], srcPoints[1]); | ||
| 126 | + | ||
| 127 | + // Code section being altered (original) | ||
| 128 | + // | ||
| 129 | + // Mat affineTransform = getAffineTransform(srcPoints, dstPoints); | ||
| 130 | + // warpAffine(src, dst, affineTransform, Size(width, height), method); | ||
| 131 | + // | ||
| 132 | + // end original | ||
| 133 | + | ||
| 134 | + Mat affineTransform = getAffineTransform(srcPoints, dstPoints); | ||
| 135 | + | ||
| 136 | + void* const* srcDataPtr = src.m().ptr<void*>(); | ||
| 137 | + int rows = *((int*)srcDataPtr[1]); | ||
| 138 | + int cols = *((int*)srcDataPtr[2]); | ||
| 139 | + int type = *((int*)srcDataPtr[3]); | ||
| 140 | + | ||
| 141 | + Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type()); | ||
| 142 | + void** dstDataPtr = dstMat.ptr<void*>(); | ||
| 143 | + | ||
| 144 | + dstDataPtr[1] = srcDataPtr[1]; *((int*)dstDataPtr[1]) = height; // rows | ||
| 145 | + dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = width; // cols | ||
| 146 | + dstDataPtr[3] = srcDataPtr[3]; | ||
| 147 | + | ||
| 148 | + // Print the transform | ||
| 149 | + //for(int x = 0; x < affineTransform.rows; x++){ | ||
| 150 | + //for(int y = 0; y < affineTransform.cols; y++){ | ||
| 151 | + //printf("%8.3f\t", affineTransform.at<double>(x, y)); | ||
| 152 | + //} | ||
| 153 | + //printf("\n"); | ||
| 154 | + //} | ||
| 155 | + | ||
| 156 | + cuda::affine::wrapper(srcDataPtr[0], &dstDataPtr[0], affineTransform, rows, cols, height, width); | ||
| 157 | + | ||
| 158 | + // end altered code | ||
| 159 | + | ||
| 160 | + if (warpPoints) { | ||
| 161 | + QList<QPointF> points = src.file.points(); | ||
| 162 | + QList<QPointF> rotatedPoints; | ||
| 163 | + for (int i=0; i<points.size(); i++) { | ||
| 164 | + rotatedPoints.append(QPointF(points.at(i).x()*affineTransform.at<double>(0,0)+ | ||
| 165 | + points.at(i).y()*affineTransform.at<double>(0,1)+ | ||
| 166 | + affineTransform.at<double>(0,2), | ||
| 167 | + points.at(i).x()*affineTransform.at<double>(1,0)+ | ||
| 168 | + points.at(i).y()*affineTransform.at<double>(1,1)+ | ||
| 169 | + affineTransform.at<double>(1,2))); | ||
| 170 | + } | ||
| 171 | + | ||
| 172 | + dst.file.setPoints(rotatedPoints); | ||
| 173 | + } | ||
| 174 | + | ||
| 175 | + if (storeAffine) { | ||
| 176 | + QList<float> affineParams; | ||
| 177 | + for (int i = 0 ; i < 2; i++) | ||
| 178 | + for (int j = 0; j < 3; j++) | ||
| 179 | + affineParams.append(affineTransform.at<double>(i, j)); | ||
| 180 | + dst.file.setList("affineParameters", affineParams); | ||
| 181 | + } | ||
| 182 | + | ||
| 183 | + dst = dstMat; | ||
| 184 | + } | ||
| 185 | + }; | ||
| 186 | + | ||
| 187 | + BR_REGISTER(Transform, CUDAAffineTransform) | ||
| 189 | 188 | ||
| 190 | } // namespace br | 189 | } // namespace br |
| 191 | 190 |
openbr/plugins/cuda/cudaaffine.cu
| @@ -11,30 +11,30 @@ using namespace std; | @@ -11,30 +11,30 @@ using namespace std; | ||
| 11 | using namespace cv; | 11 | using namespace cv; |
| 12 | using namespace cv::gpu; | 12 | using namespace cv::gpu; |
| 13 | 13 | ||
| 14 | -namespace br { namespace cuda { | 14 | +namespace br { namespace cuda { namespace affine { |
| 15 | 15 | ||
| 16 | - __device__ __forceinline__ uint8_t cudaaffine_kernel_get_pixel_value(int row, int col, uint8_t* srcPtr, int rows, int cols) { | 16 | + __device__ __forceinline__ uint8_t getPixelValueDevice(int row, int col, uint8_t* srcPtr, int rows, int cols) { |
| 17 | if (row < 0 || row > rows || col < 0 || col > cols) { | 17 | if (row < 0 || row > rows || col < 0 || col > cols) { |
| 18 | if (row > rows || col > cols) { | 18 | if (row > rows || col > cols) { |
| 19 | return 0; | 19 | return 0; |
| 20 | } else{ | 20 | } else{ |
| 21 | - return 0; | 21 | + return 0; |
| 22 | } | 22 | } |
| 23 | } | 23 | } |
| 24 | return (srcPtr + row*cols)[col]; | 24 | return (srcPtr + row*cols)[col]; |
| 25 | } | 25 | } |
| 26 | 26 | ||
| 27 | 27 | ||
| 28 | - __device__ __forceinline__ uint8_t cudaaffine_kernel_get_bilinear_pixel_value(double row, double col, uint8_t* srcPtr, int rows, int cols) { | 28 | + __device__ __forceinline__ uint8_t getBilinearPixelValueDevice(double row, double col, uint8_t* srcPtr, int rows, int cols) { |
| 29 | // don't do anything if the index is out of bounds | 29 | // don't do anything if the index is out of bounds |
| 30 | if (row < 0 || row > rows || col < 0 || col > cols) { | 30 | if (row < 0 || row > rows || col < 0 || col > cols) { |
| 31 | if (row > rows || col > cols) { | 31 | if (row > rows || col > cols) { |
| 32 | return 0; | 32 | return 0; |
| 33 | } else{ | 33 | } else{ |
| 34 | - return 0; | 34 | + return 0; |
| 35 | } | 35 | } |
| 36 | } | 36 | } |
| 37 | - | 37 | + |
| 38 | // http://www.sci.utah.edu/~acoste/uou/Image/project3/ArthurCOSTE_Project3.pdf | 38 | // http://www.sci.utah.edu/~acoste/uou/Image/project3/ArthurCOSTE_Project3.pdf |
| 39 | // Bilinear Transformation | 39 | // Bilinear Transformation |
| 40 | // f(Px, Py) = f(Q11)ร(1โRx)ร(1โSy)+f(Q21)ร(Rx)ร(1โSy)+f(Q12)ร(1โRx)ร(Sy)+f(Q22)ร(Rx)ร(Sy) | 40 | // f(Px, Py) = f(Q11)ร(1โRx)ร(1โSy)+f(Q21)ร(Rx)ร(1โSy)+f(Q12)ร(1โRx)ร(Sy)+f(Q22)ร(Rx)ร(Sy) |
| @@ -48,22 +48,22 @@ namespace br { namespace cuda { | @@ -48,22 +48,22 @@ namespace br { namespace cuda { | ||
| 48 | double d_row = row - row1; | 48 | double d_row = row - row1; |
| 49 | double d_col = col - col1; | 49 | double d_col = col - col1; |
| 50 | 50 | ||
| 51 | - int Q11 = cudaaffine_kernel_get_pixel_value(row1, col1, srcPtr, rows, cols); | ||
| 52 | - int Q21 = cudaaffine_kernel_get_pixel_value(row2, col1, srcPtr, rows, cols); | ||
| 53 | - int Q12 = cudaaffine_kernel_get_pixel_value(row1, col2, srcPtr, rows, cols); | ||
| 54 | - int Q22 = cudaaffine_kernel_get_pixel_value(row2, col2, srcPtr, rows, cols); | 51 | + int Q11 = getPixelValueDevice(row1, col1, srcPtr, rows, cols); |
| 52 | + int Q21 = getPixelValueDevice(row2, col1, srcPtr, rows, cols); | ||
| 53 | + int Q12 = getPixelValueDevice(row1, col2, srcPtr, rows, cols); | ||
| 54 | + int Q22 = getPixelValueDevice(row2, col2, srcPtr, rows, cols); | ||
| 55 | 55 | ||
| 56 | double val = Q22*(d_row*d_col) + Q12*((1-d_row)*d_col) + Q21*(d_row*(1-d_col)) + Q11*((1-d_row)*(1-d_col)); | 56 | double val = Q22*(d_row*d_col) + Q12*((1-d_row)*d_col) + Q21*(d_row*(1-d_col)) + Q11*((1-d_row)*(1-d_col)); |
| 57 | return ((uint8_t) round(val)); | 57 | return ((uint8_t) round(val)); |
| 58 | } | 58 | } |
| 59 | 59 | ||
| 60 | - __device__ __forceinline__ uint8_t cudaaffine_kernel_get_distance_pixel_value(double row, double col, uint8_t* srcPtr, int rows, int cols) { | 60 | + __device__ __forceinline__ uint8_t getDistancePixelValueDevice(double row, double col, uint8_t* srcPtr, int rows, int cols) { |
| 61 | // don't do anything if the index is out of bounds | 61 | // don't do anything if the index is out of bounds |
| 62 | if (row < 1 || row >= rows-1 || col < 1 || col >= cols-1) { | 62 | if (row < 1 || row >= rows-1 || col < 1 || col >= cols-1) { |
| 63 | if (row >= rows || col >= cols) { | 63 | if (row >= rows || col >= cols) { |
| 64 | return 0; | 64 | return 0; |
| 65 | } else{ | 65 | } else{ |
| 66 | - return 0; | 66 | + return 0; |
| 67 | } | 67 | } |
| 68 | } | 68 | } |
| 69 | 69 | ||
| @@ -90,10 +90,10 @@ namespace br { namespace cuda { | @@ -90,10 +90,10 @@ namespace br { namespace cuda { | ||
| 90 | double w3 = d3/sum; | 90 | double w3 = d3/sum; |
| 91 | double w4 = d4/sum; | 91 | double w4 = d4/sum; |
| 92 | 92 | ||
| 93 | - uint8_t v1 = cudaaffine_kernel_get_pixel_value(row1, col1, srcPtr, rows, cols); | ||
| 94 | - uint8_t v2 = cudaaffine_kernel_get_pixel_value(row2, col1, srcPtr, rows, cols); | ||
| 95 | - uint8_t v3 = cudaaffine_kernel_get_pixel_value(row1, col2, srcPtr, rows, cols); | ||
| 96 | - uint8_t v4 = cudaaffine_kernel_get_pixel_value(row2, col2, srcPtr, rows, cols); | 93 | + uint8_t v1 = getPixelValueDevice(row1, col1, srcPtr, rows, cols); |
| 94 | + uint8_t v2 = getPixelValueDevice(row2, col1, srcPtr, rows, cols); | ||
| 95 | + uint8_t v3 = getPixelValueDevice(row1, col2, srcPtr, rows, cols); | ||
| 96 | + uint8_t v4 = getPixelValueDevice(row2, col2, srcPtr, rows, cols); | ||
| 97 | 97 | ||
| 98 | return round(w1*v1 + w2*v2 + w3*v3 + w4*v4); | 98 | return round(w1*v1 + w2*v2 + w3*v3 + w4*v4); |
| 99 | } | 99 | } |
| @@ -105,16 +105,16 @@ namespace br { namespace cuda { | @@ -105,16 +105,16 @@ namespace br { namespace cuda { | ||
| 105 | * src_row - The computed source pixel row (mapping from this row) | 105 | * src_row - The computed source pixel row (mapping from this row) |
| 106 | * src_col - The computed source pixel column (mapping from this col) | 106 | * src_col - The computed source pixel column (mapping from this col) |
| 107 | */ | 107 | */ |
| 108 | - __device__ __forceinline__ void cudaaffine_kernel_get_src_coord(double *trans_inv, int dst_row, int dst_col, double* src_row_pnt, double* src_col_pnt){ | 108 | + __device__ __forceinline__ void getSrcCoordDevice(double *trans_inv, int dst_row, int dst_col, double* src_row_pnt, double* src_col_pnt){ |
| 109 | *src_col_pnt = dst_col * trans_inv[0] + dst_row * trans_inv[3] + trans_inv[6]; | 109 | *src_col_pnt = dst_col * trans_inv[0] + dst_row * trans_inv[3] + trans_inv[6]; |
| 110 | *src_row_pnt = dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]; | 110 | *src_row_pnt = dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]; |
| 111 | 111 | ||
| 112 | //printf("Dst: [%d, %d, 1] = [%d, %d, 1] \n[ %0.4f, %0.4f, %0.4f] \n[ %0.4f, %0.4f, %0.4f ]\n[ %0.4f, %0.4f, %0.4f ]\n\n", *src_col, *src_row, dst_col, dst_row, trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]); | 112 | //printf("Dst: [%d, %d, 1] = [%d, %d, 1] \n[ %0.4f, %0.4f, %0.4f] \n[ %0.4f, %0.4f, %0.4f ]\n[ %0.4f, %0.4f, %0.4f ]\n\n", *src_col, *src_row, dst_col, dst_row, trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]); |
| 113 | 113 | ||
| 114 | } | 114 | } |
| 115 | - | ||
| 116 | 115 | ||
| 117 | - __global__ void cudaaffine_kernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){ | 116 | + |
| 117 | + __global__ void affineKernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){ | ||
| 118 | int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y; | 118 | int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y; |
| 119 | int dstColInd = blockIdx.x*blockDim.x+threadIdx.x; | 119 | int dstColInd = blockIdx.x*blockDim.x+threadIdx.x; |
| 120 | int dstIndex = dstRowInd*dst_cols + dstColInd; | 120 | int dstIndex = dstRowInd*dst_cols + dstColInd; |
| @@ -134,15 +134,15 @@ namespace br { namespace cuda { | @@ -134,15 +134,15 @@ namespace br { namespace cuda { | ||
| 134 | } | 134 | } |
| 135 | } | 135 | } |
| 136 | 136 | ||
| 137 | - cudaaffine_kernel_get_src_coord(trans_inv, dstRowInd, dstColInd, &srcRowPnt, &srcColPnt); | ||
| 138 | - //const uint8_t cval = cudaaffine_kernel_get_distance_pixel_value(srcRowPnt, srcColPnt, srcPtr, src_rows, src_cols); // Get initial pixel value | ||
| 139 | - const uint8_t cval = cudaaffine_kernel_get_bilinear_pixel_value(srcRowPnt, srcColPnt, srcPtr, src_rows, src_cols); // Get initial pixel value | ||
| 140 | - //const uint8_t cval = cudaaffine_kernel_get_pixel_value(round(srcRowPnt), round(srcColPnt), srcPtr, src_rows, src_cols); // Get initial pixel value | 137 | + getSrcCoordDevice(trans_inv, dstRowInd, dstColInd, &srcRowPnt, &srcColPnt); |
| 138 | + //const uint8_t cval = getDistancePixelValueDevice(srcRowPnt, srcColPnt, srcPtr, src_rows, src_cols); // Get initial pixel value | ||
| 139 | + const uint8_t cval = getBilinearPixelValueDevice(srcRowPnt, srcColPnt, srcPtr, src_rows, src_cols); // Get initial pixel value | ||
| 140 | + //const uint8_t cval = getPixelValueDevice(round(srcRowPnt), round(srcColPnt), srcPtr, src_rows, src_cols); // Get initial pixel value | ||
| 141 | 141 | ||
| 142 | dstPtr[dstIndex] = cval; | 142 | dstPtr[dstIndex] = cval; |
| 143 | } | 143 | } |
| 144 | 144 | ||
| 145 | - void cudaaffine_wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols) { | 145 | + void wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols) { |
| 146 | cudaError_t err; | 146 | cudaError_t err; |
| 147 | double* gpuInverse; | 147 | double* gpuInverse; |
| 148 | 148 | ||
| @@ -152,7 +152,7 @@ namespace br { namespace cuda { | @@ -152,7 +152,7 @@ namespace br { namespace cuda { | ||
| 152 | 152 | ||
| 153 | //************************************************************************ | 153 | //************************************************************************ |
| 154 | // Input affine is a 2x3 Mat whose transpose is used in the computations | 154 | // Input affine is a 2x3 Mat whose transpose is used in the computations |
| 155 | - // [x, y, 1] = [u, v, 1] [ a^T | [0 0 1]^T ] | 155 | + // [x, y, 1] = [u, v, 1] [ a^T | [0 0 1]^T ] |
| 156 | // See "Digital Image Warping" by George Wolburg (p. 50) | 156 | // See "Digital Image Warping" by George Wolburg (p. 50) |
| 157 | //************************************************************************ | 157 | //************************************************************************ |
| 158 | 158 | ||
| @@ -210,7 +210,7 @@ namespace br { namespace cuda { | @@ -210,7 +210,7 @@ namespace br { namespace cuda { | ||
| 210 | 210 | ||
| 211 | CUDA_SAFE_MEMCPY(gpuInverse, affineInverse, 9*sizeof(double), cudaMemcpyHostToDevice, &err); | 211 | CUDA_SAFE_MEMCPY(gpuInverse, affineInverse, 9*sizeof(double), cudaMemcpyHostToDevice, &err); |
| 212 | 212 | ||
| 213 | - cudaaffine_kernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*)(*dstPtr), gpuInverse, src_rows, src_cols, dst_rows, dst_cols); | 213 | + affineKernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*)(*dstPtr), gpuInverse, src_rows, src_cols, dst_rows, dst_cols); |
| 214 | CUDA_KERNEL_ERR_CHK(&err); | 214 | CUDA_KERNEL_ERR_CHK(&err); |
| 215 | 215 | ||
| 216 | CUDA_SAFE_FREE(srcPtr, &err); | 216 | CUDA_SAFE_FREE(srcPtr, &err); |
| @@ -225,5 +225,4 @@ namespace br { namespace cuda { | @@ -225,5 +225,4 @@ namespace br { namespace cuda { | ||
| 225 | // } | 225 | // } |
| 226 | // printf("\n"); | 226 | // printf("\n"); |
| 227 | } | 227 | } |
| 228 | -} // end cuda | ||
| 229 | -} // end br | 228 | +}}} |
openbr/plugins/cuda/cudacvtfloat.cpp
| @@ -7,7 +7,8 @@ using namespace cv; | @@ -7,7 +7,8 @@ using namespace cv; | ||
| 7 | 7 | ||
| 8 | #include <openbr/plugins/openbr_internal.h> | 8 | #include <openbr/plugins/openbr_internal.h> |
| 9 | 9 | ||
| 10 | -namespace br { namespace cuda { namespace cudacvtfloat { | 10 | +// definitions from the CUDA source file |
| 11 | +namespace br { namespace cuda { namespace cvtfloat { | ||
| 11 | void wrapper(void* src, void** dst, int rows, int cols); | 12 | void wrapper(void* src, void** dst, int rows, int cols); |
| 12 | }}} | 13 | }}} |
| 13 | 14 | ||
| @@ -16,7 +17,7 @@ namespace br | @@ -16,7 +17,7 @@ namespace br | ||
| 16 | 17 | ||
| 17 | /*! | 18 | /*! |
| 18 | * \ingroup transforms | 19 | * \ingroup transforms |
| 19 | - * \brief Converts byte to floating point | 20 | + * \brief Converts 8-bit images currently on GPU into 32-bit floating point equivalent. |
| 20 | * \author Colin Heinzmann \cite DepthDeluxe | 21 | * \author Colin Heinzmann \cite DepthDeluxe |
| 21 | */ | 22 | */ |
| 22 | class CUDACvtFloatTransform : public UntrainableTransform | 23 | class CUDACvtFloatTransform : public UntrainableTransform |
| @@ -45,7 +46,7 @@ class CUDACvtFloatTransform : public UntrainableTransform | @@ -45,7 +46,7 @@ class CUDACvtFloatTransform : public UntrainableTransform | ||
| 45 | dstDataPtr[2] = srcDataPtr[2]; | 46 | dstDataPtr[2] = srcDataPtr[2]; |
| 46 | dstDataPtr[3] = srcDataPtr[3]; *((int*)dstDataPtr[3]) = CV_32FC1; | 47 | dstDataPtr[3] = srcDataPtr[3]; *((int*)dstDataPtr[3]) = CV_32FC1; |
| 47 | 48 | ||
| 48 | - br::cuda::cudacvtfloat::wrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols); | 49 | + cuda::cvtfloat::wrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols); |
| 49 | dst = dstMat; | 50 | dst = dstMat; |
| 50 | } | 51 | } |
| 51 | }; | 52 | }; |
openbr/plugins/cuda/cudacvtfloat.cu
| @@ -3,7 +3,7 @@ using namespace std; | @@ -3,7 +3,7 @@ using namespace std; | ||
| 3 | 3 | ||
| 4 | #include "cudadefines.hpp" | 4 | #include "cudadefines.hpp" |
| 5 | 5 | ||
| 6 | -namespace br { namespace cuda { namespace cudacvtfloat { | 6 | +namespace br { namespace cuda { namespace cvtfloat { |
| 7 | 7 | ||
| 8 | __global__ void kernel(const unsigned char* src, float* dst, int rows, int cols) { | 8 | __global__ void kernel(const unsigned char* src, float* dst, int rows, int cols) { |
| 9 | // get my index | 9 | // get my index |
| @@ -20,12 +20,6 @@ namespace br { namespace cuda { namespace cudacvtfloat { | @@ -20,12 +20,6 @@ namespace br { namespace cuda { namespace cudacvtfloat { | ||
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | void wrapper(void* src, void** dst, int rows, int cols) { | 22 | void wrapper(void* src, void** dst, int rows, int cols) { |
| 23 | - //unsigned char* cudaSrc; | ||
| 24 | - //cudaMalloc(&cudaSrc, rows*cols*sizeof(unsigned char)); | ||
| 25 | - //cudaMemcpy(cudaSrc, src, rows*cols*sizeof(unsigned char), cudaMemcpyHostToDevice); | ||
| 26 | - | ||
| 27 | - //float* cudaDst; | ||
| 28 | - //cudaMalloc(&cudaDst, rows*cols*sizeof(float)); | ||
| 29 | cudaError_t err; | 23 | cudaError_t err; |
| 30 | CUDA_SAFE_MALLOC(dst, rows*cols*sizeof(float), &err); | 24 | CUDA_SAFE_MALLOC(dst, rows*cols*sizeof(float), &err); |
| 31 | 25 |
openbr/plugins/cuda/CUDAL2.cpp renamed to openbr/plugins/cuda/cudal2.cpp
| @@ -19,7 +19,8 @@ using namespace std; | @@ -19,7 +19,8 @@ using namespace std; | ||
| 19 | 19 | ||
| 20 | #include <openbr/plugins/openbr_internal.h> | 20 | #include <openbr/plugins/openbr_internal.h> |
| 21 | 21 | ||
| 22 | -namespace br { namespace cuda { namespace L2{ | 22 | +// definitions from the CUDA source file |
| 23 | +namespace br { namespace cuda { namespace L2 { | ||
| 23 | void wrapper(float* cudaAPtr, float* cudaBPtr, int length, float* outPtr); | 24 | void wrapper(float* cudaAPtr, float* cudaBPtr, int length, float* outPtr); |
| 24 | }}} | 25 | }}} |
| 25 | 26 | ||
| @@ -29,7 +30,7 @@ namespace br | @@ -29,7 +30,7 @@ namespace br | ||
| 29 | /*! | 30 | /*! |
| 30 | * \ingroup distances | 31 | * \ingroup distances |
| 31 | * \brief L2 distance computed using eigen. | 32 | * \brief L2 distance computed using eigen. |
| 32 | - * \author Josh Klontz \cite jklontz | 33 | + * \author Colin Heinzmann \cite DepthDeluxe |
| 33 | */ | 34 | */ |
| 34 | class CUDAL2Distance : public UntrainableDistance | 35 | class CUDAL2Distance : public UntrainableDistance |
| 35 | { | 36 | { |
| @@ -45,7 +46,7 @@ class CUDAL2Distance : public UntrainableDistance | @@ -45,7 +46,7 @@ class CUDAL2Distance : public UntrainableDistance | ||
| 45 | float* cudaBPtr = (float*)b.ptr<void*>()[0]; | 46 | float* cudaBPtr = (float*)b.ptr<void*>()[0]; |
| 46 | 47 | ||
| 47 | float out; | 48 | float out; |
| 48 | - br::cuda::L2::wrapper(cudaAPtr, cudaBPtr, rows*cols, &out); | 49 | + cuda::L2::wrapper(cudaAPtr, cudaBPtr, rows*cols, &out); |
| 49 | 50 | ||
| 50 | return out; | 51 | return out; |
| 51 | } | 52 | } |
| @@ -55,4 +56,4 @@ BR_REGISTER(Distance, CUDAL2Distance) | @@ -55,4 +56,4 @@ BR_REGISTER(Distance, CUDAL2Distance) | ||
| 55 | 56 | ||
| 56 | } // namespace br | 57 | } // namespace br |
| 57 | 58 | ||
| 58 | -#include "cuda/CUDAL2.moc" | 59 | +#include "cuda/cudal2.moc" |
openbr/plugins/cuda/CUDAL2.cu renamed to openbr/plugins/cuda/cudal2.cu
| @@ -4,7 +4,7 @@ | @@ -4,7 +4,7 @@ | ||
| 4 | 4 | ||
| 5 | namespace br { namespace cuda { namespace L2 { | 5 | namespace br { namespace cuda { namespace L2 { |
| 6 | 6 | ||
| 7 | - __global__ void my_subtract_kernel(float* aPtr, float* bPtr, float* workPtr, int length) { | 7 | + __global__ void subtractKernel(float* aPtr, float* bPtr, float* workPtr, int length) { |
| 8 | int index = blockIdx.x*blockDim.x+threadIdx.x; | 8 | int index = blockIdx.x*blockDim.x+threadIdx.x; |
| 9 | 9 | ||
| 10 | if (index >= length) { | 10 | if (index >= length) { |
| @@ -18,7 +18,7 @@ namespace br { namespace cuda { namespace L2 { | @@ -18,7 +18,7 @@ namespace br { namespace cuda { namespace L2 { | ||
| 18 | workPtr[index] = workPtr[index] * workPtr[index]; | 18 | workPtr[index] = workPtr[index] * workPtr[index]; |
| 19 | } | 19 | } |
| 20 | 20 | ||
| 21 | - __global__ void collapse_kernel(float* inPtr, float* outPtr, int length) { | 21 | + __global__ void collapseKernel(float* inPtr, float* outPtr, int length) { |
| 22 | // make sure there is only one thread that we are calling | 22 | // make sure there is only one thread that we are calling |
| 23 | if (blockIdx.x != 0 || threadIdx.x != 0) { | 23 | if (blockIdx.x != 0 || threadIdx.x != 0) { |
| 24 | return; | 24 | return; |
| @@ -45,11 +45,11 @@ namespace br { namespace cuda { namespace L2 { | @@ -45,11 +45,11 @@ namespace br { namespace cuda { namespace L2 { | ||
| 45 | // perform the subtraction | 45 | // perform the subtraction |
| 46 | int threadsPerBlock = 64; | 46 | int threadsPerBlock = 64; |
| 47 | int numBlocks = length / threadsPerBlock + 1; | 47 | int numBlocks = length / threadsPerBlock + 1; |
| 48 | - my_subtract_kernel<<<threadsPerBlock, numBlocks>>>(cudaAPtr, cudaBPtr, cudaWorkBufferPtr, length); | 48 | + subtractKernel<<<threadsPerBlock, numBlocks>>>(cudaAPtr, cudaBPtr, cudaWorkBufferPtr, length); |
| 49 | CUDA_KERNEL_ERR_CHK(&err); | 49 | CUDA_KERNEL_ERR_CHK(&err); |
| 50 | 50 | ||
| 51 | // perform the collapse | 51 | // perform the collapse |
| 52 | - collapse_kernel<<<1,1>>>(cudaWorkBufferPtr, cudaOutPtr, length); | 52 | + collapseKernel<<<1,1>>>(cudaWorkBufferPtr, cudaOutPtr, length); |
| 53 | CUDA_KERNEL_ERR_CHK(&err); | 53 | CUDA_KERNEL_ERR_CHK(&err); |
| 54 | 54 | ||
| 55 | // copy the single value back to the destinsion | 55 | // copy the single value back to the destinsion |
| @@ -63,6 +63,3 @@ namespace br { namespace cuda { namespace L2 { | @@ -63,6 +63,3 @@ namespace br { namespace cuda { namespace L2 { | ||
| 63 | CUDA_SAFE_FREE(cudaWorkBufferPtr, &err); | 63 | CUDA_SAFE_FREE(cudaWorkBufferPtr, &err); |
| 64 | } | 64 | } |
| 65 | }}} | 65 | }}} |
| 66 | - | ||
| 67 | -// 128CUDAEigenfaces on 6400 ATT: 54.367s | ||
| 68 | -// 128CUDAEigenfacesL2 on 6400 ATT: |
openbr/plugins/cuda/cudalbp.cpp
| @@ -31,44 +31,21 @@ using namespace std; | @@ -31,44 +31,21 @@ using namespace std; | ||
| 31 | 31 | ||
| 32 | #include <openbr/plugins/openbr_internal.h> | 32 | #include <openbr/plugins/openbr_internal.h> |
| 33 | 33 | ||
| 34 | -#include "MatManager.hpp" | ||
| 35 | - | ||
| 36 | using namespace cv; | 34 | using namespace cv; |
| 37 | 35 | ||
| 38 | -string type2str(int type) { | ||
| 39 | - string r; | ||
| 40 | - | ||
| 41 | - uchar depth = type & CV_MAT_DEPTH_MASK; | ||
| 42 | - uchar chans = 1 + (type >> CV_CN_SHIFT); | ||
| 43 | - | ||
| 44 | - switch ( depth ) { | ||
| 45 | - case CV_8U: r = "8U"; break; | ||
| 46 | - case CV_8S: r = "8S"; break; | ||
| 47 | - case CV_16U: r = "16U"; break; | ||
| 48 | - case CV_16S: r = "16S"; break; | ||
| 49 | - case CV_32S: r = "32S"; break; | ||
| 50 | - case CV_32F: r = "32F"; break; | ||
| 51 | - case CV_64F: r = "64F"; break; | ||
| 52 | - default: r = "User"; break; | ||
| 53 | - } | ||
| 54 | - | ||
| 55 | - r += "C"; | ||
| 56 | - r += (chans+'0'); | ||
| 57 | - | ||
| 58 | - return r; | ||
| 59 | -} | ||
| 60 | - | ||
| 61 | -namespace br { namespace cuda { | ||
| 62 | - void cudalbp_wrapper(void* srcPtr, void** dstPtr, int rows, int cols); | ||
| 63 | - void cudalbp_init_wrapper(uint8_t* lut); | ||
| 64 | -}} | 36 | +// definitions from the CUDA source file |
| 37 | +namespace br { namespace cuda { namespace lbp { | ||
| 38 | + void wrapper(void* srcPtr, void** dstPtr, int rows, int cols); | ||
| 39 | + void initializeWrapper(uint8_t* lut); | ||
| 40 | +}}} | ||
| 65 | 41 | ||
| 66 | namespace br | 42 | namespace br |
| 67 | { | 43 | { |
| 68 | /*! | 44 | /*! |
| 69 | * \ingroup transforms | 45 | * \ingroup transforms |
| 70 | - * \brief Convert the image into a feature vector using Local Binary Patterns in CUDA | ||
| 71 | - * \author Colin Heinzmann, Li Li \cite DepthDeluxe, booli | 46 | + * \brief Convert the image into a feature vector using Local Binary Patterns in CUDA. Modified from stock OpenBR plugin. |
| 47 | + * \author Colin Heinzmann \cite DepthDeluxe | ||
| 48 | + * \author Li Li \cite booli | ||
| 72 | */ | 49 | */ |
| 73 | class CUDALBPTransform : public UntrainableTransform | 50 | class CUDALBPTransform : public UntrainableTransform |
| 74 | { | 51 | { |
| @@ -84,8 +61,6 @@ class CUDALBPTransform : public UntrainableTransform | @@ -84,8 +61,6 @@ class CUDALBPTransform : public UntrainableTransform | ||
| 84 | uchar lut[256]; | 61 | uchar lut[256]; |
| 85 | uchar null; | 62 | uchar null; |
| 86 | 63 | ||
| 87 | - //cuda::MatManager* matManager; | ||
| 88 | - | ||
| 89 | public: | 64 | public: |
| 90 | /* Returns the number of 0->1 or 1->0 transitions in i */ | 65 | /* Returns the number of 0->1 or 1->0 transitions in i */ |
| 91 | static int numTransitions(int i) | 66 | static int numTransitions(int i) |
| @@ -136,36 +111,14 @@ class CUDALBPTransform : public UntrainableTransform | @@ -136,36 +111,14 @@ class CUDALBPTransform : public UntrainableTransform | ||
| 136 | if (!set[i]) | 111 | if (!set[i]) |
| 137 | lut[i] = null; // Set to null id | 112 | lut[i] = null; // Set to null id |
| 138 | 113 | ||
| 139 | - // init the mat manager for managing 10 mats | ||
| 140 | - //matManager = new cuda::MatManager(10); | ||
| 141 | - | ||
| 142 | // copy lut over to the GPU | 114 | // copy lut over to the GPU |
| 143 | - br::cuda::cudalbp_init_wrapper(lut); | 115 | + cuda::lbp::initializeWrapper(lut); |
| 144 | 116 | ||
| 145 | std::cout << "Initialized CUDALBP" << std::endl; | 117 | std::cout << "Initialized CUDALBP" << std::endl; |
| 146 | } | 118 | } |
| 147 | 119 | ||
| 148 | void project(const Template &src, Template &dst) const | 120 | void project(const Template &src, Template &dst) const |
| 149 | { | 121 | { |
| 150 | - //Mat& m = (Mat&)src.m(); | ||
| 151 | - //cuda::MatManager::matindex a; | ||
| 152 | - //cuda::MatManager::matindex b; | ||
| 153 | - //a = matManager->reserve(m); | ||
| 154 | - //matManager->upload(a, m); | ||
| 155 | - | ||
| 156 | - // reserve the second mat and check the dimensiosn | ||
| 157 | - //b = matManager->reserve(m); | ||
| 158 | - | ||
| 159 | - //uint8_t* srcMatPtr = matManager->get_mat_pointer_from_index(a); | ||
| 160 | - //uint8_t* dstMatPtr = matManager->get_mat_pointer_from_index(b); | ||
| 161 | - //br::cuda::cudalbp_wrapper(srcMatPtr, dstMatPtr, lutGpuPtr, m.cols, m.rows, m.step1()); | ||
| 162 | - | ||
| 163 | - //matManager->download(b, dst); | ||
| 164 | - | ||
| 165 | - // release both the mats | ||
| 166 | - //matManager->release(a); | ||
| 167 | - //matManager->release(b); | ||
| 168 | - | ||
| 169 | void* const* srcDataPtr = src.m().ptr<void*>(); | 122 | void* const* srcDataPtr = src.m().ptr<void*>(); |
| 170 | int rows = *((int*)srcDataPtr[1]); | 123 | int rows = *((int*)srcDataPtr[1]); |
| 171 | int cols = *((int*)srcDataPtr[2]); | 124 | int cols = *((int*)srcDataPtr[2]); |
| @@ -177,13 +130,13 @@ class CUDALBPTransform : public UntrainableTransform | @@ -177,13 +130,13 @@ class CUDALBPTransform : public UntrainableTransform | ||
| 177 | dstDataPtr[2] = srcDataPtr[2]; | 130 | dstDataPtr[2] = srcDataPtr[2]; |
| 178 | dstDataPtr[3] = srcDataPtr[3]; | 131 | dstDataPtr[3] = srcDataPtr[3]; |
| 179 | 132 | ||
| 180 | - br::cuda::cudalbp_wrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols); | 133 | + cuda::lbp::wrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols); |
| 181 | dst = dstMat; | 134 | dst = dstMat; |
| 182 | } | 135 | } |
| 183 | }; | 136 | }; |
| 184 | 137 | ||
| 185 | BR_REGISTER(Transform, CUDALBPTransform) | 138 | BR_REGISTER(Transform, CUDALBPTransform) |
| 186 | 139 | ||
| 187 | -} // namespace br | 140 | +} |
| 188 | 141 | ||
| 189 | #include "cuda/cudalbp.moc" | 142 | #include "cuda/cudalbp.moc" |
openbr/plugins/cuda/cudalbp.cu
| @@ -9,14 +9,18 @@ using namespace std; | @@ -9,14 +9,18 @@ using namespace std; | ||
| 9 | using namespace cv; | 9 | using namespace cv; |
| 10 | using namespace cv::gpu; | 10 | using namespace cv::gpu; |
| 11 | 11 | ||
| 12 | -namespace br { namespace cuda { | 12 | +/* |
| 13 | + * These are the CUDA functions for CUDALBP. See cudapca.cpp for more details | ||
| 14 | + */ | ||
| 15 | + | ||
| 16 | +namespace br { namespace cuda { namespace lbp { | ||
| 13 | uint8_t* lut; | 17 | uint8_t* lut; |
| 14 | 18 | ||
| 15 | - __device__ __forceinline__ uint8_t cudalbp_kernel_get_pixel_value(int row, int col, uint8_t* srcPtr, int rows, int cols) { | 19 | + __device__ __forceinline__ uint8_t getPixelValueKernel(int row, int col, uint8_t* srcPtr, int rows, int cols) { |
| 16 | return (srcPtr + row*cols)[col]; | 20 | return (srcPtr + row*cols)[col]; |
| 17 | } | 21 | } |
| 18 | 22 | ||
| 19 | - __global__ void cudalbp_kernel(uint8_t* srcPtr, uint8_t* dstPtr, int rows, int cols, uint8_t* lut) | 23 | + __global__ void lutKernel(uint8_t* srcPtr, uint8_t* dstPtr, int rows, int cols, uint8_t* lut) |
| 20 | { | 24 | { |
| 21 | int rowInd = blockIdx.y*blockDim.y+threadIdx.y; | 25 | int rowInd = blockIdx.y*blockDim.y+threadIdx.y; |
| 22 | int colInd = blockIdx.x*blockDim.x+threadIdx.x; | 26 | int colInd = blockIdx.x*blockDim.x+threadIdx.x; |
| @@ -34,22 +38,22 @@ namespace br { namespace cuda { | @@ -34,22 +38,22 @@ namespace br { namespace cuda { | ||
| 34 | } | 38 | } |
| 35 | } | 39 | } |
| 36 | 40 | ||
| 37 | - const uint8_t cval = cudalbp_kernel_get_pixel_value(rowInd+0*radius, colInd+0*radius, srcPtr, rows, cols);//(srcPtr[(rowInd*srcStep+0*radius)*m.cols+colInd+0*radius]); // center value | ||
| 38 | - uint8_t val = lut[(cudalbp_kernel_get_pixel_value(rowInd-1*radius, colInd-1*radius, srcPtr, rows, cols) >= cval ? 128 : 0) | | ||
| 39 | - (cudalbp_kernel_get_pixel_value(rowInd-1*radius, colInd+0*radius, srcPtr, rows, cols) >= cval ? 64 : 0) | | ||
| 40 | - (cudalbp_kernel_get_pixel_value(rowInd-1*radius, colInd+1*radius, srcPtr, rows, cols) >= cval ? 32 : 0) | | ||
| 41 | - (cudalbp_kernel_get_pixel_value(rowInd+0*radius, colInd+1*radius, srcPtr, rows, cols) >= cval ? 16 : 0) | | ||
| 42 | - (cudalbp_kernel_get_pixel_value(rowInd+1*radius, colInd+1*radius, srcPtr, rows, cols) >= cval ? 8 : 0) | | ||
| 43 | - (cudalbp_kernel_get_pixel_value(rowInd+1*radius, colInd+0*radius, srcPtr, rows, cols) >= cval ? 4 : 0) | | ||
| 44 | - (cudalbp_kernel_get_pixel_value(rowInd+1*radius, colInd-1*radius, srcPtr, rows, cols) >= cval ? 2 : 0) | | ||
| 45 | - (cudalbp_kernel_get_pixel_value(rowInd+0*radius, colInd-1*radius, srcPtr, rows, cols) >= cval ? 1 : 0)]; | 41 | + const uint8_t cval = getPixelValueKernel(rowInd+0*radius, colInd+0*radius, srcPtr, rows, cols);//(srcPtr[(rowInd*srcStep+0*radius)*m.cols+colInd+0*radius]); // center value |
| 42 | + uint8_t val = lut[(getPixelValueKernel(rowInd-1*radius, colInd-1*radius, srcPtr, rows, cols) >= cval ? 128 : 0) | | ||
| 43 | + (getPixelValueKernel(rowInd-1*radius, colInd+0*radius, srcPtr, rows, cols) >= cval ? 64 : 0) | | ||
| 44 | + (getPixelValueKernel(rowInd-1*radius, colInd+1*radius, srcPtr, rows, cols) >= cval ? 32 : 0) | | ||
| 45 | + (getPixelValueKernel(rowInd+0*radius, colInd+1*radius, srcPtr, rows, cols) >= cval ? 16 : 0) | | ||
| 46 | + (getPixelValueKernel(rowInd+1*radius, colInd+1*radius, srcPtr, rows, cols) >= cval ? 8 : 0) | | ||
| 47 | + (getPixelValueKernel(rowInd+1*radius, colInd+0*radius, srcPtr, rows, cols) >= cval ? 4 : 0) | | ||
| 48 | + (getPixelValueKernel(rowInd+1*radius, colInd-1*radius, srcPtr, rows, cols) >= cval ? 2 : 0) | | ||
| 49 | + (getPixelValueKernel(rowInd+0*radius, colInd-1*radius, srcPtr, rows, cols) >= cval ? 1 : 0)]; | ||
| 46 | 50 | ||
| 47 | // store calculated value away in the right place | 51 | // store calculated value away in the right place |
| 48 | dstPtr[index] = val; | 52 | dstPtr[index] = val; |
| 49 | } | 53 | } |
| 50 | 54 | ||
| 51 | //void cudalbp_wrapper(uint8_t* srcPtr, uint8_t* dstPtr, uint8_t* lut, int imageWidth, int imageHeight, size_t step) | 55 | //void cudalbp_wrapper(uint8_t* srcPtr, uint8_t* dstPtr, uint8_t* lut, int imageWidth, int imageHeight, size_t step) |
| 52 | - void cudalbp_wrapper(void* srcPtr, void** dstPtr, int rows, int cols) | 56 | + void wrapper(void* srcPtr, void** dstPtr, int rows, int cols) |
| 53 | { | 57 | { |
| 54 | cudaError_t err; | 58 | cudaError_t err; |
| 55 | 59 | ||
| @@ -59,15 +63,15 @@ namespace br { namespace cuda { | @@ -59,15 +63,15 @@ namespace br { namespace cuda { | ||
| 59 | rows/threadsPerBlock.y + 1); | 63 | rows/threadsPerBlock.y + 1); |
| 60 | 64 | ||
| 61 | CUDA_SAFE_MALLOC(dstPtr, rows*cols*sizeof(uint8_t), &err); | 65 | CUDA_SAFE_MALLOC(dstPtr, rows*cols*sizeof(uint8_t), &err); |
| 62 | - cudalbp_kernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*)(*dstPtr), rows, cols, lut); | 66 | + lutKernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*)(*dstPtr), rows, cols, lut); |
| 63 | CUDA_KERNEL_ERR_CHK(&err); | 67 | CUDA_KERNEL_ERR_CHK(&err); |
| 64 | 68 | ||
| 65 | CUDA_SAFE_FREE(srcPtr, &err); | 69 | CUDA_SAFE_FREE(srcPtr, &err); |
| 66 | } | 70 | } |
| 67 | 71 | ||
| 68 | - void cudalbp_init_wrapper(uint8_t* cpuLut) { | 72 | + void initializeWrapper(uint8_t* cpuLut) { |
| 69 | cudaError_t err; | 73 | cudaError_t err; |
| 70 | CUDA_SAFE_MALLOC(&lut, 256*sizeof(uint8_t), &err); | 74 | CUDA_SAFE_MALLOC(&lut, 256*sizeof(uint8_t), &err); |
| 71 | CUDA_SAFE_MEMCPY(lut, cpuLut, 256*sizeof(uint8_t), cudaMemcpyHostToDevice, &err); | 75 | CUDA_SAFE_MEMCPY(lut, cpuLut, 256*sizeof(uint8_t), cudaMemcpyHostToDevice, &err); |
| 72 | } | 76 | } |
| 73 | -}} | 77 | +}}} |
openbr/plugins/cuda/cudapca.cpp
| @@ -29,8 +29,9 @@ using namespace cv; | @@ -29,8 +29,9 @@ using namespace cv; | ||
| 29 | #include <openbr/core/eigenutils.h> | 29 | #include <openbr/core/eigenutils.h> |
| 30 | #include <openbr/core/opencvutils.h> | 30 | #include <openbr/core/opencvutils.h> |
| 31 | 31 | ||
| 32 | +// definitions from the CUDA source file | ||
| 32 | namespace br { namespace cuda { namespace pca { | 33 | namespace br { namespace cuda { namespace pca { |
| 33 | - void loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems); | 34 | + void initializeWrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems); |
| 34 | void wrapper(void* src, void** dst); | 35 | void wrapper(void* src, void** dst); |
| 35 | }}} | 36 | }}} |
| 36 | 37 | ||
| @@ -38,9 +39,7 @@ namespace br | @@ -38,9 +39,7 @@ namespace br | ||
| 38 | { | 39 | { |
| 39 | /*! | 40 | /*! |
| 40 | * \ingroup transforms | 41 | * \ingroup transforms |
| 41 | - * \brief Projects input into learned Principal Component Analysis subspace using CUDA. | ||
| 42 | - * \author Brendan Klare \cite bklare | ||
| 43 | - * \author Josh Klontz \cite jklontz | 42 | + * \brief Projects input into learned Principal Component Analysis subspace using CUDA. Modified from original PCA plugin. |
| 44 | * \author Colin Heinzmann \cite DepthDeluxe | 43 | * \author Colin Heinzmann \cite DepthDeluxe |
| 45 | * | 44 | * |
| 46 | * \br_property float keep Options are: [keep < 0 - All eigenvalues are retained, keep == 0 - No PCA is performed and the eigenvectors form an identity matrix, 0 < keep < 1 - Keep is the fraction of the variance to retain, keep >= 1 - keep is the number of leading eigenvectors to retain] Default is 0.95. | 45 | * \br_property float keep Options are: [keep < 0 - All eigenvalues are retained, keep == 0 - No PCA is performed and the eigenvectors form an identity matrix, 0 < keep < 1 - Keep is the fraction of the variance to retain, keep >= 1 - keep is the number of leading eigenvectors to retain] Default is 0.95. |
| @@ -134,22 +133,9 @@ private: | @@ -134,22 +133,9 @@ private: | ||
| 134 | dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = keep; | 133 | dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = keep; |
| 135 | dstDataPtr[3] = srcDataPtr[3]; | 134 | dstDataPtr[3] = srcDataPtr[3]; |
| 136 | 135 | ||
| 137 | - br::cuda::pca::wrapper(srcDataPtr[0], &dstDataPtr[0]); | 136 | + cuda::pca::wrapper(srcDataPtr[0], &dstDataPtr[0]); |
| 138 | 137 | ||
| 139 | dst = dstMat; | 138 | dst = dstMat; |
| 140 | - | ||
| 141 | - //dst = cv::Mat(1, keep, CV_32FC1); | ||
| 142 | - | ||
| 143 | - // perform the operation on the graphics card | ||
| 144 | - //cuda::cudapca_projectwrapper((float*)src.m().ptr<float>(), (float*)dst.m().ptr<float>()); | ||
| 145 | - | ||
| 146 | - // Map Eigen into OpenCV | ||
| 147 | - //Mat cpuDst = cv::Mat(1, keep, CV_32FC1); | ||
| 148 | - //Eigen::Map<const Eigen::MatrixXf> inMap(src.m().ptr<float>(), src.m().rows*src.m().cols, 1); | ||
| 149 | - //Eigen::Map<Eigen::MatrixXf> outMap(cpuDst.ptr<float>(), keep, 1); | ||
| 150 | - | ||
| 151 | - // Do projection | ||
| 152 | - //outMap = eVecs.transpose() * (inMap - mean); | ||
| 153 | } | 139 | } |
| 154 | 140 | ||
| 155 | void store(QDataStream &stream) const | 141 | void store(QDataStream &stream) const |
| @@ -161,14 +147,6 @@ private: | @@ -161,14 +147,6 @@ private: | ||
| 161 | { | 147 | { |
| 162 | stream >> keep >> drop >> whiten >> originalRows >> mean >> eVals >> eVecs; | 148 | stream >> keep >> drop >> whiten >> originalRows >> mean >> eVals >> eVecs; |
| 163 | 149 | ||
| 164 | - cout << "Mean Dimensions" << endl; | ||
| 165 | - cout << "\tRows: " << mean.rows() << " Cols: " << mean.cols() << endl; | ||
| 166 | - cout << "eVecs Dimensions" << endl; | ||
| 167 | - cout << "\tRows: " << eVecs.rows() << " Cols: " << eVecs.cols() << endl; | ||
| 168 | - cout << "eVals Dimensions" << endl; | ||
| 169 | - cout << "\tRows: " << eVals.rows() << " Cols: " << eVals.cols() << endl; | ||
| 170 | - cout << "Keep: " << keep << endl; | ||
| 171 | - | ||
| 172 | // TODO(colin): use Eigen Map class to generate map files so we don't have to copy the data | 150 | // TODO(colin): use Eigen Map class to generate map files so we don't have to copy the data |
| 173 | // serialize the eigenvectors | 151 | // serialize the eigenvectors |
| 174 | float* evBuffer = new float[eVecs.rows() * eVecs.cols()]; | 152 | float* evBuffer = new float[eVecs.rows() * eVecs.cols()]; |
| @@ -187,7 +165,7 @@ private: | @@ -187,7 +165,7 @@ private: | ||
| 187 | } | 165 | } |
| 188 | 166 | ||
| 189 | // call the wrapper function | 167 | // call the wrapper function |
| 190 | - br::cuda::pca::loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows()*mean.cols()); | 168 | + cuda::pca::initializeWrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows()*mean.cols()); |
| 191 | 169 | ||
| 192 | delete evBuffer; | 170 | delete evBuffer; |
| 193 | delete meanBuffer; | 171 | delete meanBuffer; |
openbr/plugins/cuda/cudapca.cu
| @@ -9,6 +9,10 @@ using namespace std; | @@ -9,6 +9,10 @@ using namespace std; | ||
| 9 | using namespace cv; | 9 | using namespace cv; |
| 10 | using namespace cv::gpu; | 10 | using namespace cv::gpu; |
| 11 | 11 | ||
| 12 | +/* | ||
| 13 | + * These are the CUDA functions for CUDAPCA. See cudapca.cpp for more details | ||
| 14 | + */ | ||
| 15 | + | ||
| 12 | namespace br { namespace cuda { namespace pca { | 16 | namespace br { namespace cuda { namespace pca { |
| 13 | __global__ void multiplyKernel(float* src, float* intermediaryBuffer, float* evPtr, int evRows, int evCols, int stepSize) { | 17 | __global__ void multiplyKernel(float* src, float* intermediaryBuffer, float* evPtr, int evRows, int evCols, int stepSize) { |
| 14 | int colInd = blockIdx.x*blockDim.x+threadIdx.x; | 18 | int colInd = blockIdx.x*blockDim.x+threadIdx.x; |
| @@ -68,7 +72,7 @@ namespace br { namespace cuda { namespace pca { | @@ -68,7 +72,7 @@ namespace br { namespace cuda { namespace pca { | ||
| 68 | int _numSteps; int _stepSize; | 72 | int _numSteps; int _stepSize; |
| 69 | float* intermediaryBuffer; | 73 | float* intermediaryBuffer; |
| 70 | 74 | ||
| 71 | - void loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems) { | 75 | + void initializeWrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems) { |
| 72 | _evRows = evRows; _evCols = evCols; | 76 | _evRows = evRows; _evCols = evCols; |
| 73 | _meanElems = meanElems; | 77 | _meanElems = meanElems; |
| 74 | 78 | ||
| @@ -95,7 +99,6 @@ namespace br { namespace cuda { namespace pca { | @@ -95,7 +99,6 @@ namespace br { namespace cuda { namespace pca { | ||
| 95 | cudaError_t err; | 99 | cudaError_t err; |
| 96 | CUDA_SAFE_MALLOC(dst, _evCols*sizeof(float), &err); | 100 | CUDA_SAFE_MALLOC(dst, _evCols*sizeof(float), &err); |
| 97 | 101 | ||
| 98 | - | ||
| 99 | // subtract out the mean of the image (mean is 1xpixels in size) | 102 | // subtract out the mean of the image (mean is 1xpixels in size) |
| 100 | int threadsPerBlock = 64; | 103 | int threadsPerBlock = 64; |
| 101 | int numBlocks = _meanElems / threadsPerBlock + 1; | 104 | int numBlocks = _meanElems / threadsPerBlock + 1; |
| @@ -114,8 +117,5 @@ namespace br { namespace cuda { namespace pca { | @@ -114,8 +117,5 @@ namespace br { namespace cuda { namespace pca { | ||
| 114 | CUDA_KERNEL_ERR_CHK(&err); | 117 | CUDA_KERNEL_ERR_CHK(&err); |
| 115 | 118 | ||
| 116 | CUDA_SAFE_FREE(src, &err); // TODO(colin): figure out why adding this free causes memory corruption... | 119 | CUDA_SAFE_FREE(src, &err); // TODO(colin): figure out why adding this free causes memory corruption... |
| 117 | - | ||
| 118 | - // copy the data back to the CPU | ||
| 119 | - //cudaMemcpy(dst, _cudaDstPtr, _evCols*sizeof(float), cudaMemcpyDeviceToHost); | ||
| 120 | } | 120 | } |
| 121 | }}} | 121 | }}} |
openbr/plugins/cuda/cudargb2grayscale.cpp
| @@ -25,17 +25,18 @@ | @@ -25,17 +25,18 @@ | ||
| 25 | 25 | ||
| 26 | using namespace cv; | 26 | using namespace cv; |
| 27 | 27 | ||
| 28 | -namespace br { namespace cuda{ | ||
| 29 | - void cudargb2grayscale_wrapper(void* srcPtr, void**dstPtr, int rows, int cols); | ||
| 30 | -}} | 28 | +// definitions from the CUDA source file |
| 29 | +namespace br { namespace cuda { namespace rgb2grayscale { | ||
| 30 | + void wrapper(void* srcPtr, void**dstPtr, int rows, int cols); | ||
| 31 | +}}} | ||
| 31 | 32 | ||
| 32 | namespace br | 33 | namespace br |
| 33 | { | 34 | { |
| 34 | 35 | ||
| 35 | /*! | 36 | /*! |
| 36 | * \ingroup transforms | 37 | * \ingroup transforms |
| 37 | - * \brief Colorspace conversion. | ||
| 38 | - * \author Li Li \cite Josh Klontz \cite jklontz | 38 | + * \brief Converts 3-channel images to grayscale |
| 39 | + * \author Li Li \cite booli | ||
| 39 | */ | 40 | */ |
| 40 | class CUDARGB2GrayScaleTransform : public UntrainableTransform | 41 | class CUDARGB2GrayScaleTransform : public UntrainableTransform |
| 41 | { | 42 | { |
| @@ -57,8 +58,8 @@ private: | @@ -57,8 +58,8 @@ private: | ||
| 57 | dstDataPtr[2] = srcDataPtr[2]; | 58 | dstDataPtr[2] = srcDataPtr[2]; |
| 58 | dstDataPtr[3] = srcDataPtr[3]; | 59 | dstDataPtr[3] = srcDataPtr[3]; |
| 59 | *((int*)dstDataPtr[3]) = CV_8UC1; // not sure if the type of the new mat is the same | 60 | *((int*)dstDataPtr[3]) = CV_8UC1; // not sure if the type of the new mat is the same |
| 60 | - | ||
| 61 | - br::cuda::cudargb2grayscale_wrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols); | 61 | + |
| 62 | + cuda::rgb2grayscale::wrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols); | ||
| 62 | dst = dstMat; | 63 | dst = dstMat; |
| 63 | 64 | ||
| 64 | /* | 65 | /* |
openbr/plugins/cuda/cudargb2grayscale.cu
| @@ -12,9 +12,9 @@ using namespace std; | @@ -12,9 +12,9 @@ using namespace std; | ||
| 12 | using namespace cv; | 12 | using namespace cv; |
| 13 | using namespace cv::gpu; | 13 | using namespace cv::gpu; |
| 14 | 14 | ||
| 15 | -namespace br{ namespace cuda { | 15 | +namespace br { namespace cuda { namespace rgb2grayscale { |
| 16 | 16 | ||
| 17 | - __global__ void cudargb2grayscale_kernel(uint8_t* srcPtr, uint8_t* dstPtr, int rows, int cols) | 17 | + __global__ void kernel(uint8_t* srcPtr, uint8_t* dstPtr, int rows, int cols) |
| 18 | { | 18 | { |
| 19 | int rowInd = blockIdx.y*blockDim.y+threadIdx.y; | 19 | int rowInd = blockIdx.y*blockDim.y+threadIdx.y; |
| 20 | int colInd = blockIdx.x*blockDim.x+threadIdx.x; | 20 | int colInd = blockIdx.x*blockDim.x+threadIdx.x; |
| @@ -31,7 +31,7 @@ namespace br{ namespace cuda { | @@ -31,7 +31,7 @@ namespace br{ namespace cuda { | ||
| 31 | return; | 31 | return; |
| 32 | } | 32 | } |
| 33 | 33 | ||
| 34 | - void cudargb2grayscale_wrapper(void* srcPtr, void** dstPtr, int rows, int cols) | 34 | + void wrapper(void* srcPtr, void** dstPtr, int rows, int cols) |
| 35 | { | 35 | { |
| 36 | cudaError_t err; | 36 | cudaError_t err; |
| 37 | dim3 threadsPerBlock(9, 9); | 37 | dim3 threadsPerBlock(9, 9); |
| @@ -39,9 +39,9 @@ namespace br{ namespace cuda { | @@ -39,9 +39,9 @@ namespace br{ namespace cuda { | ||
| 39 | rows/threadsPerBlock.y + 1); | 39 | rows/threadsPerBlock.y + 1); |
| 40 | CUDA_SAFE_MALLOC(dstPtr, rows*cols*sizeof(uint8_t), &err); | 40 | CUDA_SAFE_MALLOC(dstPtr, rows*cols*sizeof(uint8_t), &err); |
| 41 | 41 | ||
| 42 | - cudargb2grayscale_kernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*) (*dstPtr), rows, cols); | 42 | + kernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*) (*dstPtr), rows, cols); |
| 43 | CUDA_KERNEL_ERR_CHK(&err); | 43 | CUDA_KERNEL_ERR_CHK(&err); |
| 44 | CUDA_SAFE_FREE(srcPtr, &err); | 44 | CUDA_SAFE_FREE(srcPtr, &err); |
| 45 | - } | 45 | + } |
| 46 | 46 | ||
| 47 | -}} | 47 | +}}} |
openbr/plugins/cuda/passthrough.cpp deleted
| 1 | -#include <openbr/plugins/openbr_internal.h> | ||
| 2 | - | ||
| 3 | -#include <opencv2/imgproc/imgproc.hpp> | ||
| 4 | -#include <opencv2/gpu/gpu.hpp> | ||
| 5 | - | ||
| 6 | -using namespace cv; | ||
| 7 | -using namespace cv::gpu; | ||
| 8 | - | ||
| 9 | -#include "passthrough.hpp" | ||
| 10 | - | ||
| 11 | -#include <iostream> | ||
| 12 | - | ||
| 13 | - | ||
| 14 | -namespace br | ||
| 15 | -{ | ||
| 16 | - class CUDAPassthroughTransform : public UntrainableTransform | ||
| 17 | - { | ||
| 18 | - Q_OBJECT | ||
| 19 | - | ||
| 20 | -private: | ||
| 21 | - void project(const Template &src, Template &dst) const | ||
| 22 | - { | ||
| 23 | - // note: if you convert the image to grayscale, you get 8UC1 | ||
| 24 | - | ||
| 25 | - // upload the src mat to the GPU | ||
| 26 | - GpuMat srcGpuMat, dstGpuMat; | ||
| 27 | - srcGpuMat.upload(src.m()); | ||
| 28 | - dstGpuMat.upload(src.m()); | ||
| 29 | - | ||
| 30 | - br::cuda::passthrough_wrapper(srcGpuMat, dstGpuMat); | ||
| 31 | - | ||
| 32 | - dstGpuMat.download(dst.m()); | ||
| 33 | - | ||
| 34 | - // TODO(colin): add delete code | ||
| 35 | - srcGpuMat.release(); | ||
| 36 | - dstGpuMat.release(); | ||
| 37 | - | ||
| 38 | - printf("srcGpuMat empty: %d\n", (int)srcGpuMat.empty()); | ||
| 39 | - printf("dstGpuMat empty: %d\n", (int)srcGpuMat.empty()); | ||
| 40 | - } | ||
| 41 | - }; | ||
| 42 | - | ||
| 43 | - BR_REGISTER(Transform, CUDAPassthroughTransform); | ||
| 44 | -} | ||
| 45 | - | ||
| 46 | -#include "cuda/passthrough.moc" |
openbr/plugins/cuda/passthrough.cu deleted
| 1 | -// note: Using 8-bit unsigned 1 channel images | ||
| 2 | - | ||
| 3 | -#include <opencv2/gpu/gpu.hpp> | ||
| 4 | - | ||
| 5 | -using namespace cv; | ||
| 6 | -using namespace cv::gpu; | ||
| 7 | - | ||
| 8 | -#include "passthrough.hpp" | ||
| 9 | - | ||
| 10 | -namespace br { namespace cuda { | ||
| 11 | - __global__ void passthrough_kernel(uint8_t* srcPtr, uint8_t* dstPtr, size_t srcStep, size_t dstStep, int cols, int rows) { | ||
| 12 | - int rowInd = blockIdx.y*blockDim.y+threadIdx.y; | ||
| 13 | - int colInd = blockIdx.x*blockDim.x+threadIdx.x; | ||
| 14 | - | ||
| 15 | - // don't do anything if we are outside the allowable positions | ||
| 16 | - if (rowInd >= rows || colInd >= cols) | ||
| 17 | - return; | ||
| 18 | - | ||
| 19 | - uint8_t srcVal = (srcPtr + rowInd*srcStep)[colInd]; | ||
| 20 | - uint8_t* rowDstPtr = dstPtr + rowInd*dstStep; | ||
| 21 | - | ||
| 22 | - rowDstPtr[colInd] = srcVal; | ||
| 23 | - } | ||
| 24 | - | ||
| 25 | - void passthrough_wrapper(GpuMat& src, GpuMat& dst) { | ||
| 26 | - // convert the GpuMats to pointers | ||
| 27 | - uint8_t* srcPtr = (uint8_t*)src.data; | ||
| 28 | - uint8_t* dstPtr = (uint8_t*)dst.data; | ||
| 29 | - | ||
| 30 | - int imageWidth = src.cols; | ||
| 31 | - int imageHeight = src.rows; | ||
| 32 | - | ||
| 33 | - // make 8 * 8 = 64 square block | ||
| 34 | - dim3 threadsPerBlock(8, 8); | ||
| 35 | - dim3 numBlocks(imageWidth / threadsPerBlock.x + 1, | ||
| 36 | - imageHeight / threadsPerBlock.y + 1); | ||
| 37 | - | ||
| 38 | - passthrough_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageWidth, imageHeight); | ||
| 39 | - } | ||
| 40 | -}} | ||
| 41 | - | ||
| 42 | - | ||
| 43 | -// read http://stackoverflow.com/questions/31927297/array-of-ptrstepszgpumat-to-a-c-cuda-kernel |
openbr/plugins/cuda/passthrough.hpp deleted