Commit 1dabf42dad2e33b7a9e3d11b4142a98fd53c5f9d
1 parent
e600a994
code refactoring for pull request
Showing
21 changed files
with
272 additions
and
429 deletions
openbr/plugins/cuda/MatManager.cu renamed to openbr/core/cuda/MatManager.cu
openbr/plugins/cuda/MatManager.hpp renamed to openbr/core/cuda/MatManager.hpp
openbr/plugins/cuda/copyfrom.cpp
| ... | ... | @@ -2,21 +2,25 @@ |
| 2 | 2 | |
| 3 | 3 | #include <opencv2/opencv.hpp> |
| 4 | 4 | |
| 5 | -//#include <gperftools/profiler.h> | |
| 6 | - | |
| 7 | 5 | #include <openbr/plugins/openbr_internal.h> |
| 8 | 6 | |
| 9 | 7 | using namespace std; |
| 10 | 8 | |
| 11 | 9 | using namespace cv; |
| 12 | 10 | |
| 13 | -// extern CUDA declaration | |
| 14 | -namespace br { namespace cuda { namespace cudacopyfrom { | |
| 11 | +// CUDA functions for this plugin | |
| 12 | +namespace br { namespace cuda { namespace copyfrom { | |
| 15 | 13 | template <typename T> void wrapper(void* src, T* out, int rows, int cols); |
| 16 | 14 | }}} |
| 17 | 15 | |
| 18 | 16 | namespace br |
| 19 | 17 | { |
| 18 | + /*! | |
| 19 | + * \ingroup transforms | |
| 20 | + * \brief Copies a transform from the GPU to the CPU. | |
| 21 | + * \author Colin Heinzmann \cite DepthDeluxe | |
| 22 | + * \note Method: Automatically matches image dimensions, works for 32-bit single channel, 8-bit single channel, and 8-bit 3 channel | |
| 23 | + */ | |
| 20 | 24 | class CUDACopyFrom : public UntrainableTransform |
| 21 | 25 | { |
| 22 | 26 | Q_OBJECT |
| ... | ... | @@ -24,8 +28,6 @@ namespace br |
| 24 | 28 | private: |
| 25 | 29 | void project(const Template &src, Template &dst) const |
| 26 | 30 | { |
| 27 | -// ProfilerStart("PROFILEME.log"); | |
| 28 | - | |
| 29 | 31 | // pull the data back out of the Mat |
| 30 | 32 | void* const* dataPtr = src.m().ptr<void*>(); |
| 31 | 33 | int rows = *((int*)dataPtr[1]); |
| ... | ... | @@ -35,21 +37,19 @@ private: |
| 35 | 37 | Mat dstMat = Mat(rows, cols, type); |
| 36 | 38 | switch(type) { |
| 37 | 39 | case CV_32FC1: |
| 38 | - br::cuda::cudacopyfrom::wrapper(dataPtr[0], dstMat.ptr<float>(), rows, cols); | |
| 40 | + cuda::copyfrom::wrapper(dataPtr[0], dstMat.ptr<float>(), rows, cols); | |
| 39 | 41 | break; |
| 40 | 42 | case CV_8UC1: |
| 41 | - br::cuda::cudacopyfrom::wrapper(dataPtr[0], dstMat.ptr<unsigned char>(), rows, cols); | |
| 43 | + cuda::copyfrom::wrapper(dataPtr[0], dstMat.ptr<unsigned char>(), rows, cols); | |
| 42 | 44 | break; |
| 43 | 45 | case CV_8UC3: |
| 44 | - br::cuda::cudacopyfrom::wrapper(dataPtr[0], dstMat.ptr<unsigned char>(), rows, cols * 3); | |
| 46 | + cuda::copyfrom::wrapper(dataPtr[0], dstMat.ptr<unsigned char>(), rows, cols * 3); | |
| 45 | 47 | break; |
| 46 | 48 | default: |
| 47 | - cout << "ERR: Invalid image format" << endl; | |
| 49 | + cout << "ERR: Invalid image type (" << type << ")" << endl; | |
| 48 | 50 | break; |
| 49 | 51 | } |
| 50 | 52 | dst = dstMat; |
| 51 | - | |
| 52 | -// ProfilerStop(); | |
| 53 | 53 | } |
| 54 | 54 | }; |
| 55 | 55 | ... | ... |
openbr/plugins/cuda/copyfrom.cu
| 1 | 1 | #include "cudadefines.hpp" |
| 2 | 2 | |
| 3 | -namespace br { namespace cuda { namespace cudacopyfrom { | |
| 3 | +namespace br { namespace cuda { namespace copyfrom { | |
| 4 | 4 | template <typename T> void wrapper(void* src, T* dst, int rows, int cols) { |
| 5 | 5 | cudaError_t err; |
| 6 | 6 | CUDA_SAFE_MEMCPY(dst, src, rows*cols*sizeof(T), cudaMemcpyDeviceToHost, &err); | ... | ... |
openbr/plugins/cuda/copyto.cpp
| ... | ... | @@ -8,14 +8,20 @@ using namespace std; |
| 8 | 8 | |
| 9 | 9 | using namespace cv; |
| 10 | 10 | |
| 11 | -extern string type2str(int type); | |
| 12 | - | |
| 13 | -namespace br { namespace cuda { namespace cudacopyto { | |
| 11 | +// definitions from the CUDA source file | |
| 12 | +namespace br { namespace cuda { namespace copyto { | |
| 14 | 13 | template <typename T> void wrapper(const T* in, void** out, const int rows, const int cols); |
| 15 | 14 | }}} |
| 16 | 15 | |
| 17 | 16 | namespace br |
| 18 | 17 | { |
| 18 | + | |
| 19 | + /*! | |
| 20 | + * \ingroup transforms | |
| 21 | + * \brief Copies a transform to the GPU. | |
| 22 | + * \author Colin Heinzmann \cite DepthDeluxe | |
| 23 | + * \note Method: Automatically matches image dimensions, works for 32-bit single channel, 8-bit single channel, and 8-bit 3 channel | |
| 24 | + */ | |
| 19 | 25 | class CUDACopyTo : public UntrainableTransform |
| 20 | 26 | { |
| 21 | 27 | Q_OBJECT |
| ... | ... | @@ -25,7 +31,7 @@ private: |
| 25 | 31 | { |
| 26 | 32 | const Mat& srcMat = src.m(); |
| 27 | 33 | const int rows = srcMat.rows; |
| 28 | - const int cols = srcMat.cols; | |
| 34 | + const int cols = srcMat.cols; | |
| 29 | 35 | |
| 30 | 36 | // output will be a single pointer to graphics card memory |
| 31 | 37 | Mat dstMat = Mat(4, 1, DataType<void*>::type); |
| ... | ... | @@ -39,16 +45,16 @@ private: |
| 39 | 45 | void* cudaMemPtr; |
| 40 | 46 | switch(srcMat.type()) { |
| 41 | 47 | case CV_32FC1: |
| 42 | - br::cuda::cudacopyto::wrapper(srcMat.ptr<float>(), &dstMatData[0], rows, cols); | |
| 48 | + cuda::copyto::wrapper(srcMat.ptr<float>(), &dstMatData[0], rows, cols); | |
| 43 | 49 | break; |
| 44 | 50 | case CV_8UC1: |
| 45 | - br::cuda::cudacopyto::wrapper(srcMat.ptr<unsigned char>(), &dstMatData[0], rows, cols); | |
| 51 | + cuda::copyto::wrapper(srcMat.ptr<unsigned char>(), &dstMatData[0], rows, cols); | |
| 46 | 52 | break; |
| 47 | 53 | case CV_8UC3: |
| 48 | - br::cuda::cudacopyto::wrapper(srcMat.ptr<unsigned char>(), &dstMatData[0], rows, 3*cols); | |
| 54 | + cuda::copyto::wrapper(srcMat.ptr<unsigned char>(), &dstMatData[0], rows, 3*cols); | |
| 49 | 55 | break; |
| 50 | 56 | default: |
| 51 | - cout << "ERR: Invalid image type! " << type2str(srcMat.type()) << endl; | |
| 57 | + cout << "ERR: Invalid image type (" << srcMat.type() << ")" << endl; | |
| 52 | 58 | return; |
| 53 | 59 | } |
| 54 | 60 | ... | ... |
openbr/plugins/cuda/copyto.cu
| 1 | 1 | #include "cudadefines.hpp" |
| 2 | 2 | |
| 3 | -namespace br { namespace cuda { namespace cudacopyto { | |
| 3 | +namespace br { namespace cuda { namespace copyto { | |
| 4 | + | |
| 4 | 5 | template <typename T> void wrapper(const T* in, void** out, const int rows, const int cols) { |
| 5 | 6 | cudaError_t err; |
| 6 | 7 | CUDA_SAFE_MALLOC(out, rows*cols*sizeof(T), &err); |
| ... | ... | @@ -9,4 +10,5 @@ namespace br { namespace cuda { namespace cudacopyto { |
| 9 | 10 | |
| 10 | 11 | template void wrapper(const float* in, void** out, const int rows, const int cols); |
| 11 | 12 | template void wrapper(const unsigned char* in, void** out, const int rows, const int cols); |
| 13 | + | |
| 12 | 14 | }}} | ... | ... |
openbr/plugins/cuda/cudaaffine.cpp
| ... | ... | @@ -33,159 +33,158 @@ using namespace std; |
| 33 | 33 | #include <openbr/plugins/openbr_internal.h> |
| 34 | 34 | #include <openbr/core/opencvutils.h> |
| 35 | 35 | |
| 36 | -#include "MatManager.hpp" | |
| 37 | - | |
| 38 | 36 | using namespace cv; |
| 39 | 37 | |
| 40 | -namespace br { namespace cuda { | |
| 41 | - void cudaaffine_wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols); | |
| 42 | -}} | |
| 38 | +// definitions from the CUDA source file | |
| 39 | +namespace br { namespace cuda { namespace affine { | |
| 40 | + void wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols); | |
| 41 | +}}} | |
| 43 | 42 | |
| 44 | 43 | namespace br |
| 45 | 44 | { |
| 46 | 45 | |
| 47 | -/*! | |
| 48 | - * \ingroup transforms | |
| 49 | - * \brief Performs a two or three point registration. | |
| 50 | - * \author Josh Klontz \cite jklontz | |
| 51 | - * \note Method: Area should be used for shrinking an image, Cubic for slow but accurate enlargment, Bilin for fast enlargement. | |
| 52 | - */ | |
| 53 | -class CUDAAffineTransform : public UntrainableTransform | |
| 54 | -{ | |
| 55 | - Q_OBJECT | |
| 56 | - Q_ENUMS(Method) | |
| 57 | - | |
| 58 | -public: | |
| 59 | - /*!< */ | |
| 60 | - enum Method { Near = INTER_NEAREST, | |
| 61 | - Area = INTER_AREA, | |
| 62 | - Bilin = INTER_LINEAR, | |
| 63 | - Cubic = INTER_CUBIC, | |
| 64 | - Lanczo = INTER_LANCZOS4}; | |
| 65 | - | |
| 66 | -private: | |
| 67 | - Q_PROPERTY(int width READ get_width WRITE set_width RESET reset_width STORED false) | |
| 68 | - Q_PROPERTY(int height READ get_height WRITE set_height RESET reset_height STORED false) | |
| 69 | - Q_PROPERTY(float x1 READ get_x1 WRITE set_x1 RESET reset_x1 STORED false) | |
| 70 | - Q_PROPERTY(float y1 READ get_y1 WRITE set_y1 RESET reset_y1 STORED false) | |
| 71 | - Q_PROPERTY(float x2 READ get_x2 WRITE set_x2 RESET reset_x2 STORED false) | |
| 72 | - Q_PROPERTY(float y2 READ get_y2 WRITE set_y2 RESET reset_y2 STORED false) | |
| 73 | - Q_PROPERTY(float x3 READ get_x3 WRITE set_x3 RESET reset_x3 STORED false) | |
| 74 | - Q_PROPERTY(float y3 READ get_y3 WRITE set_y3 RESET reset_y3 STORED false) | |
| 75 | - Q_PROPERTY(Method method READ get_method WRITE set_method RESET reset_method STORED false) | |
| 76 | - Q_PROPERTY(bool storeAffine READ get_storeAffine WRITE set_storeAffine RESET reset_storeAffine STORED false) | |
| 77 | - Q_PROPERTY(bool warpPoints READ get_warpPoints WRITE set_warpPoints RESET reset_warpPoints STORED false) | |
| 78 | - BR_PROPERTY(int, width, 64) | |
| 79 | - BR_PROPERTY(int, height, 64) | |
| 80 | - BR_PROPERTY(float, x1, 0) | |
| 81 | - BR_PROPERTY(float, y1, 0) | |
| 82 | - BR_PROPERTY(float, x2, -1) | |
| 83 | - BR_PROPERTY(float, y2, -1) | |
| 84 | - BR_PROPERTY(float, x3, -1) | |
| 85 | - BR_PROPERTY(float, y3, -1) | |
| 86 | - BR_PROPERTY(Method, method, Bilin) | |
| 87 | - BR_PROPERTY(bool, storeAffine, false) | |
| 88 | - BR_PROPERTY(bool, warpPoints, false) | |
| 89 | - | |
| 90 | - static Point2f getThirdAffinePoint(const Point2f &a, const Point2f &b) | |
| 91 | - { | |
| 92 | - float dx = b.x - a.x; | |
| 93 | - float dy = b.y - a.y; | |
| 94 | - return Point2f(a.x - dy, a.y + dx); | |
| 95 | - } | |
| 96 | - | |
| 97 | - void project(const Template &src, Template &dst) const | |
| 98 | - { | |
| 99 | - const bool twoPoints = ((x3 == -1) || (y3 == -1)); | |
| 100 | - | |
| 101 | - Point2f dstPoints[3]; | |
| 102 | - dstPoints[0] = Point2f(x1*width, y1*height); | |
| 103 | - dstPoints[1] = Point2f((x2 == -1 ? 1 - x1 : x2)*width, (y2 == -1 ? y1 : y2)*height); | |
| 104 | - if (twoPoints) dstPoints[2] = getThirdAffinePoint(dstPoints[0], dstPoints[1]); | |
| 105 | - else dstPoints[2] = Point2f(x3*width, y3*height); | |
| 106 | - | |
| 107 | - Point2f srcPoints[3]; | |
| 108 | - if (src.file.contains("Affine_0") && | |
| 109 | - src.file.contains("Affine_1") && | |
| 110 | - (src.file.contains("Affine_2") || twoPoints)) { | |
| 111 | - srcPoints[0] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_0")); | |
| 112 | - srcPoints[1] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_1")); | |
| 113 | - if (!twoPoints) srcPoints[2] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_2")); | |
| 114 | - } else { | |
| 115 | - const QList<Point2f> landmarks = OpenCVUtils::toPoints(src.file.points()); | |
| 116 | - | |
| 117 | - if ((landmarks.size() < 2) || (!twoPoints && (landmarks.size() < 3))) { | |
| 118 | - resize(src, dst, Size(width, height)); | |
| 119 | - return; | |
| 120 | - } else { | |
| 121 | - srcPoints[0] = landmarks[0]; | |
| 122 | - srcPoints[1] = landmarks[1]; | |
| 123 | - if (!twoPoints) srcPoints[2] = landmarks[2]; | |
| 124 | - } | |
| 125 | - } | |
| 126 | - if (twoPoints) srcPoints[2] = getThirdAffinePoint(srcPoints[0], srcPoints[1]); | |
| 127 | - | |
| 128 | - // Code section being altered (original) | |
| 129 | - // | |
| 130 | - // Mat affineTransform = getAffineTransform(srcPoints, dstPoints); | |
| 131 | - // warpAffine(src, dst, affineTransform, Size(width, height), method); | |
| 132 | - // | |
| 133 | - // end original | |
| 134 | - | |
| 135 | - Mat affineTransform = getAffineTransform(srcPoints, dstPoints); | |
| 136 | - | |
| 137 | - void* const* srcDataPtr = src.m().ptr<void*>(); | |
| 138 | - int rows = *((int*)srcDataPtr[1]); | |
| 139 | - int cols = *((int*)srcDataPtr[2]); | |
| 140 | - int type = *((int*)srcDataPtr[3]); | |
| 141 | - | |
| 142 | - Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type()); | |
| 143 | - void** dstDataPtr = dstMat.ptr<void*>(); | |
| 144 | - | |
| 145 | - dstDataPtr[1] = srcDataPtr[1]; *((int*)dstDataPtr[1]) = height; // rows | |
| 146 | - dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = width; // cols | |
| 147 | - dstDataPtr[3] = srcDataPtr[3]; | |
| 148 | - | |
| 149 | - // Print the transform | |
| 150 | - //for(int x = 0; x < affineTransform.rows; x++){ | |
| 151 | - //for(int y = 0; y < affineTransform.cols; y++){ | |
| 152 | - //printf("%8.3f\t", affineTransform.at<double>(x, y)); | |
| 153 | - //} | |
| 154 | - //printf("\n"); | |
| 155 | - //} | |
| 156 | - | |
| 157 | - br::cuda::cudaaffine_wrapper(srcDataPtr[0], &dstDataPtr[0], affineTransform, rows, cols, height, width); | |
| 158 | - | |
| 159 | - // end altered code | |
| 160 | - | |
| 161 | - if (warpPoints) { | |
| 162 | - QList<QPointF> points = src.file.points(); | |
| 163 | - QList<QPointF> rotatedPoints; | |
| 164 | - for (int i=0; i<points.size(); i++) { | |
| 165 | - rotatedPoints.append(QPointF(points.at(i).x()*affineTransform.at<double>(0,0)+ | |
| 166 | - points.at(i).y()*affineTransform.at<double>(0,1)+ | |
| 167 | - affineTransform.at<double>(0,2), | |
| 168 | - points.at(i).x()*affineTransform.at<double>(1,0)+ | |
| 169 | - points.at(i).y()*affineTransform.at<double>(1,1)+ | |
| 170 | - affineTransform.at<double>(1,2))); | |
| 171 | - } | |
| 172 | - | |
| 173 | - dst.file.setPoints(rotatedPoints); | |
| 174 | - } | |
| 175 | - | |
| 176 | - if (storeAffine) { | |
| 177 | - QList<float> affineParams; | |
| 178 | - for (int i = 0 ; i < 2; i++) | |
| 179 | - for (int j = 0; j < 3; j++) | |
| 180 | - affineParams.append(affineTransform.at<double>(i, j)); | |
| 181 | - dst.file.setList("affineParameters", affineParams); | |
| 182 | - } | |
| 183 | - | |
| 184 | - dst = dstMat; | |
| 185 | - } | |
| 186 | -}; | |
| 187 | - | |
| 188 | -BR_REGISTER(Transform, CUDAAffineTransform) | |
| 46 | + /*! | |
| 47 | + * \ingroup transforms | |
| 48 | + * \brief Performs a two or three point registration on the GPU. Modified from stock OpenBR implementation | |
| 49 | + * \author Greg Schrock \cite gls022 | |
| 50 | + * \note Method: Area should be used for shrinking an image, Cubic for slow but accurate enlargment, Bilin for fast enlargement. | |
| 51 | + */ | |
| 52 | + class CUDAAffineTransform : public UntrainableTransform | |
| 53 | + { | |
| 54 | + Q_OBJECT | |
| 55 | + Q_ENUMS(Method) | |
| 56 | + | |
| 57 | + public: | |
| 58 | + /*!< */ | |
| 59 | + enum Method { Near = INTER_NEAREST, | |
| 60 | + Area = INTER_AREA, | |
| 61 | + Bilin = INTER_LINEAR, | |
| 62 | + Cubic = INTER_CUBIC, | |
| 63 | + Lanczo = INTER_LANCZOS4}; | |
| 64 | + | |
| 65 | + private: | |
| 66 | + Q_PROPERTY(int width READ get_width WRITE set_width RESET reset_width STORED false) | |
| 67 | + Q_PROPERTY(int height READ get_height WRITE set_height RESET reset_height STORED false) | |
| 68 | + Q_PROPERTY(float x1 READ get_x1 WRITE set_x1 RESET reset_x1 STORED false) | |
| 69 | + Q_PROPERTY(float y1 READ get_y1 WRITE set_y1 RESET reset_y1 STORED false) | |
| 70 | + Q_PROPERTY(float x2 READ get_x2 WRITE set_x2 RESET reset_x2 STORED false) | |
| 71 | + Q_PROPERTY(float y2 READ get_y2 WRITE set_y2 RESET reset_y2 STORED false) | |
| 72 | + Q_PROPERTY(float x3 READ get_x3 WRITE set_x3 RESET reset_x3 STORED false) | |
| 73 | + Q_PROPERTY(float y3 READ get_y3 WRITE set_y3 RESET reset_y3 STORED false) | |
| 74 | + Q_PROPERTY(Method method READ get_method WRITE set_method RESET reset_method STORED false) | |
| 75 | + Q_PROPERTY(bool storeAffine READ get_storeAffine WRITE set_storeAffine RESET reset_storeAffine STORED false) | |
| 76 | + Q_PROPERTY(bool warpPoints READ get_warpPoints WRITE set_warpPoints RESET reset_warpPoints STORED false) | |
| 77 | + BR_PROPERTY(int, width, 64) | |
| 78 | + BR_PROPERTY(int, height, 64) | |
| 79 | + BR_PROPERTY(float, x1, 0) | |
| 80 | + BR_PROPERTY(float, y1, 0) | |
| 81 | + BR_PROPERTY(float, x2, -1) | |
| 82 | + BR_PROPERTY(float, y2, -1) | |
| 83 | + BR_PROPERTY(float, x3, -1) | |
| 84 | + BR_PROPERTY(float, y3, -1) | |
| 85 | + BR_PROPERTY(Method, method, Bilin) | |
| 86 | + BR_PROPERTY(bool, storeAffine, false) | |
| 87 | + BR_PROPERTY(bool, warpPoints, false) | |
| 88 | + | |
| 89 | + static Point2f getThirdAffinePoint(const Point2f &a, const Point2f &b) | |
| 90 | + { | |
| 91 | + float dx = b.x - a.x; | |
| 92 | + float dy = b.y - a.y; | |
| 93 | + return Point2f(a.x - dy, a.y + dx); | |
| 94 | + } | |
| 95 | + | |
| 96 | + void project(const Template &src, Template &dst) const | |
| 97 | + { | |
| 98 | + const bool twoPoints = ((x3 == -1) || (y3 == -1)); | |
| 99 | + | |
| 100 | + Point2f dstPoints[3]; | |
| 101 | + dstPoints[0] = Point2f(x1*width, y1*height); | |
| 102 | + dstPoints[1] = Point2f((x2 == -1 ? 1 - x1 : x2)*width, (y2 == -1 ? y1 : y2)*height); | |
| 103 | + if (twoPoints) dstPoints[2] = getThirdAffinePoint(dstPoints[0], dstPoints[1]); | |
| 104 | + else dstPoints[2] = Point2f(x3*width, y3*height); | |
| 105 | + | |
| 106 | + Point2f srcPoints[3]; | |
| 107 | + if (src.file.contains("Affine_0") && | |
| 108 | + src.file.contains("Affine_1") && | |
| 109 | + (src.file.contains("Affine_2") || twoPoints)) { | |
| 110 | + srcPoints[0] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_0")); | |
| 111 | + srcPoints[1] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_1")); | |
| 112 | + if (!twoPoints) srcPoints[2] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_2")); | |
| 113 | + } else { | |
| 114 | + const QList<Point2f> landmarks = OpenCVUtils::toPoints(src.file.points()); | |
| 115 | + | |
| 116 | + if ((landmarks.size() < 2) || (!twoPoints && (landmarks.size() < 3))) { | |
| 117 | + resize(src, dst, Size(width, height)); | |
| 118 | + return; | |
| 119 | + } else { | |
| 120 | + srcPoints[0] = landmarks[0]; | |
| 121 | + srcPoints[1] = landmarks[1]; | |
| 122 | + if (!twoPoints) srcPoints[2] = landmarks[2]; | |
| 123 | + } | |
| 124 | + } | |
| 125 | + if (twoPoints) srcPoints[2] = getThirdAffinePoint(srcPoints[0], srcPoints[1]); | |
| 126 | + | |
| 127 | + // Code section being altered (original) | |
| 128 | + // | |
| 129 | + // Mat affineTransform = getAffineTransform(srcPoints, dstPoints); | |
| 130 | + // warpAffine(src, dst, affineTransform, Size(width, height), method); | |
| 131 | + // | |
| 132 | + // end original | |
| 133 | + | |
| 134 | + Mat affineTransform = getAffineTransform(srcPoints, dstPoints); | |
| 135 | + | |
| 136 | + void* const* srcDataPtr = src.m().ptr<void*>(); | |
| 137 | + int rows = *((int*)srcDataPtr[1]); | |
| 138 | + int cols = *((int*)srcDataPtr[2]); | |
| 139 | + int type = *((int*)srcDataPtr[3]); | |
| 140 | + | |
| 141 | + Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type()); | |
| 142 | + void** dstDataPtr = dstMat.ptr<void*>(); | |
| 143 | + | |
| 144 | + dstDataPtr[1] = srcDataPtr[1]; *((int*)dstDataPtr[1]) = height; // rows | |
| 145 | + dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = width; // cols | |
| 146 | + dstDataPtr[3] = srcDataPtr[3]; | |
| 147 | + | |
| 148 | + // Print the transform | |
| 149 | + //for(int x = 0; x < affineTransform.rows; x++){ | |
| 150 | + //for(int y = 0; y < affineTransform.cols; y++){ | |
| 151 | + //printf("%8.3f\t", affineTransform.at<double>(x, y)); | |
| 152 | + //} | |
| 153 | + //printf("\n"); | |
| 154 | + //} | |
| 155 | + | |
| 156 | + cuda::affine::wrapper(srcDataPtr[0], &dstDataPtr[0], affineTransform, rows, cols, height, width); | |
| 157 | + | |
| 158 | + // end altered code | |
| 159 | + | |
| 160 | + if (warpPoints) { | |
| 161 | + QList<QPointF> points = src.file.points(); | |
| 162 | + QList<QPointF> rotatedPoints; | |
| 163 | + for (int i=0; i<points.size(); i++) { | |
| 164 | + rotatedPoints.append(QPointF(points.at(i).x()*affineTransform.at<double>(0,0)+ | |
| 165 | + points.at(i).y()*affineTransform.at<double>(0,1)+ | |
| 166 | + affineTransform.at<double>(0,2), | |
| 167 | + points.at(i).x()*affineTransform.at<double>(1,0)+ | |
| 168 | + points.at(i).y()*affineTransform.at<double>(1,1)+ | |
| 169 | + affineTransform.at<double>(1,2))); | |
| 170 | + } | |
| 171 | + | |
| 172 | + dst.file.setPoints(rotatedPoints); | |
| 173 | + } | |
| 174 | + | |
| 175 | + if (storeAffine) { | |
| 176 | + QList<float> affineParams; | |
| 177 | + for (int i = 0 ; i < 2; i++) | |
| 178 | + for (int j = 0; j < 3; j++) | |
| 179 | + affineParams.append(affineTransform.at<double>(i, j)); | |
| 180 | + dst.file.setList("affineParameters", affineParams); | |
| 181 | + } | |
| 182 | + | |
| 183 | + dst = dstMat; | |
| 184 | + } | |
| 185 | + }; | |
| 186 | + | |
| 187 | + BR_REGISTER(Transform, CUDAAffineTransform) | |
| 189 | 188 | |
| 190 | 189 | } // namespace br |
| 191 | 190 | ... | ... |
openbr/plugins/cuda/cudaaffine.cu
| ... | ... | @@ -11,30 +11,30 @@ using namespace std; |
| 11 | 11 | using namespace cv; |
| 12 | 12 | using namespace cv::gpu; |
| 13 | 13 | |
| 14 | -namespace br { namespace cuda { | |
| 14 | +namespace br { namespace cuda { namespace affine { | |
| 15 | 15 | |
| 16 | - __device__ __forceinline__ uint8_t cudaaffine_kernel_get_pixel_value(int row, int col, uint8_t* srcPtr, int rows, int cols) { | |
| 16 | + __device__ __forceinline__ uint8_t getPixelValueDevice(int row, int col, uint8_t* srcPtr, int rows, int cols) { | |
| 17 | 17 | if (row < 0 || row > rows || col < 0 || col > cols) { |
| 18 | 18 | if (row > rows || col > cols) { |
| 19 | 19 | return 0; |
| 20 | 20 | } else{ |
| 21 | - return 0; | |
| 21 | + return 0; | |
| 22 | 22 | } |
| 23 | 23 | } |
| 24 | 24 | return (srcPtr + row*cols)[col]; |
| 25 | 25 | } |
| 26 | 26 | |
| 27 | 27 | |
| 28 | - __device__ __forceinline__ uint8_t cudaaffine_kernel_get_bilinear_pixel_value(double row, double col, uint8_t* srcPtr, int rows, int cols) { | |
| 28 | + __device__ __forceinline__ uint8_t getBilinearPixelValueDevice(double row, double col, uint8_t* srcPtr, int rows, int cols) { | |
| 29 | 29 | // don't do anything if the index is out of bounds |
| 30 | 30 | if (row < 0 || row > rows || col < 0 || col > cols) { |
| 31 | 31 | if (row > rows || col > cols) { |
| 32 | 32 | return 0; |
| 33 | 33 | } else{ |
| 34 | - return 0; | |
| 34 | + return 0; | |
| 35 | 35 | } |
| 36 | 36 | } |
| 37 | - | |
| 37 | + | |
| 38 | 38 | // http://www.sci.utah.edu/~acoste/uou/Image/project3/ArthurCOSTE_Project3.pdf |
| 39 | 39 | // Bilinear Transformation |
| 40 | 40 | // f(Px, Py) = f(Q11)ร(1โRx)ร(1โSy)+f(Q21)ร(Rx)ร(1โSy)+f(Q12)ร(1โRx)ร(Sy)+f(Q22)ร(Rx)ร(Sy) |
| ... | ... | @@ -48,22 +48,22 @@ namespace br { namespace cuda { |
| 48 | 48 | double d_row = row - row1; |
| 49 | 49 | double d_col = col - col1; |
| 50 | 50 | |
| 51 | - int Q11 = cudaaffine_kernel_get_pixel_value(row1, col1, srcPtr, rows, cols); | |
| 52 | - int Q21 = cudaaffine_kernel_get_pixel_value(row2, col1, srcPtr, rows, cols); | |
| 53 | - int Q12 = cudaaffine_kernel_get_pixel_value(row1, col2, srcPtr, rows, cols); | |
| 54 | - int Q22 = cudaaffine_kernel_get_pixel_value(row2, col2, srcPtr, rows, cols); | |
| 51 | + int Q11 = getPixelValueDevice(row1, col1, srcPtr, rows, cols); | |
| 52 | + int Q21 = getPixelValueDevice(row2, col1, srcPtr, rows, cols); | |
| 53 | + int Q12 = getPixelValueDevice(row1, col2, srcPtr, rows, cols); | |
| 54 | + int Q22 = getPixelValueDevice(row2, col2, srcPtr, rows, cols); | |
| 55 | 55 | |
| 56 | 56 | double val = Q22*(d_row*d_col) + Q12*((1-d_row)*d_col) + Q21*(d_row*(1-d_col)) + Q11*((1-d_row)*(1-d_col)); |
| 57 | 57 | return ((uint8_t) round(val)); |
| 58 | 58 | } |
| 59 | 59 | |
| 60 | - __device__ __forceinline__ uint8_t cudaaffine_kernel_get_distance_pixel_value(double row, double col, uint8_t* srcPtr, int rows, int cols) { | |
| 60 | + __device__ __forceinline__ uint8_t getDistancePixelValueDevice(double row, double col, uint8_t* srcPtr, int rows, int cols) { | |
| 61 | 61 | // don't do anything if the index is out of bounds |
| 62 | 62 | if (row < 1 || row >= rows-1 || col < 1 || col >= cols-1) { |
| 63 | 63 | if (row >= rows || col >= cols) { |
| 64 | 64 | return 0; |
| 65 | 65 | } else{ |
| 66 | - return 0; | |
| 66 | + return 0; | |
| 67 | 67 | } |
| 68 | 68 | } |
| 69 | 69 | |
| ... | ... | @@ -90,10 +90,10 @@ namespace br { namespace cuda { |
| 90 | 90 | double w3 = d3/sum; |
| 91 | 91 | double w4 = d4/sum; |
| 92 | 92 | |
| 93 | - uint8_t v1 = cudaaffine_kernel_get_pixel_value(row1, col1, srcPtr, rows, cols); | |
| 94 | - uint8_t v2 = cudaaffine_kernel_get_pixel_value(row2, col1, srcPtr, rows, cols); | |
| 95 | - uint8_t v3 = cudaaffine_kernel_get_pixel_value(row1, col2, srcPtr, rows, cols); | |
| 96 | - uint8_t v4 = cudaaffine_kernel_get_pixel_value(row2, col2, srcPtr, rows, cols); | |
| 93 | + uint8_t v1 = getPixelValueDevice(row1, col1, srcPtr, rows, cols); | |
| 94 | + uint8_t v2 = getPixelValueDevice(row2, col1, srcPtr, rows, cols); | |
| 95 | + uint8_t v3 = getPixelValueDevice(row1, col2, srcPtr, rows, cols); | |
| 96 | + uint8_t v4 = getPixelValueDevice(row2, col2, srcPtr, rows, cols); | |
| 97 | 97 | |
| 98 | 98 | return round(w1*v1 + w2*v2 + w3*v3 + w4*v4); |
| 99 | 99 | } |
| ... | ... | @@ -105,16 +105,16 @@ namespace br { namespace cuda { |
| 105 | 105 | * src_row - The computed source pixel row (mapping from this row) |
| 106 | 106 | * src_col - The computed source pixel column (mapping from this col) |
| 107 | 107 | */ |
| 108 | - __device__ __forceinline__ void cudaaffine_kernel_get_src_coord(double *trans_inv, int dst_row, int dst_col, double* src_row_pnt, double* src_col_pnt){ | |
| 108 | + __device__ __forceinline__ void getSrcCoordDevice(double *trans_inv, int dst_row, int dst_col, double* src_row_pnt, double* src_col_pnt){ | |
| 109 | 109 | *src_col_pnt = dst_col * trans_inv[0] + dst_row * trans_inv[3] + trans_inv[6]; |
| 110 | 110 | *src_row_pnt = dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]; |
| 111 | 111 | |
| 112 | 112 | //printf("Dst: [%d, %d, 1] = [%d, %d, 1] \n[ %0.4f, %0.4f, %0.4f] \n[ %0.4f, %0.4f, %0.4f ]\n[ %0.4f, %0.4f, %0.4f ]\n\n", *src_col, *src_row, dst_col, dst_row, trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]); |
| 113 | 113 | |
| 114 | 114 | } |
| 115 | - | |
| 116 | 115 | |
| 117 | - __global__ void cudaaffine_kernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){ | |
| 116 | + | |
| 117 | + __global__ void affineKernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){ | |
| 118 | 118 | int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y; |
| 119 | 119 | int dstColInd = blockIdx.x*blockDim.x+threadIdx.x; |
| 120 | 120 | int dstIndex = dstRowInd*dst_cols + dstColInd; |
| ... | ... | @@ -134,15 +134,15 @@ namespace br { namespace cuda { |
| 134 | 134 | } |
| 135 | 135 | } |
| 136 | 136 | |
| 137 | - cudaaffine_kernel_get_src_coord(trans_inv, dstRowInd, dstColInd, &srcRowPnt, &srcColPnt); | |
| 138 | - //const uint8_t cval = cudaaffine_kernel_get_distance_pixel_value(srcRowPnt, srcColPnt, srcPtr, src_rows, src_cols); // Get initial pixel value | |
| 139 | - const uint8_t cval = cudaaffine_kernel_get_bilinear_pixel_value(srcRowPnt, srcColPnt, srcPtr, src_rows, src_cols); // Get initial pixel value | |
| 140 | - //const uint8_t cval = cudaaffine_kernel_get_pixel_value(round(srcRowPnt), round(srcColPnt), srcPtr, src_rows, src_cols); // Get initial pixel value | |
| 137 | + getSrcCoordDevice(trans_inv, dstRowInd, dstColInd, &srcRowPnt, &srcColPnt); | |
| 138 | + //const uint8_t cval = getDistancePixelValueDevice(srcRowPnt, srcColPnt, srcPtr, src_rows, src_cols); // Get initial pixel value | |
| 139 | + const uint8_t cval = getBilinearPixelValueDevice(srcRowPnt, srcColPnt, srcPtr, src_rows, src_cols); // Get initial pixel value | |
| 140 | + //const uint8_t cval = getPixelValueDevice(round(srcRowPnt), round(srcColPnt), srcPtr, src_rows, src_cols); // Get initial pixel value | |
| 141 | 141 | |
| 142 | 142 | dstPtr[dstIndex] = cval; |
| 143 | 143 | } |
| 144 | 144 | |
| 145 | - void cudaaffine_wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols) { | |
| 145 | + void wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols) { | |
| 146 | 146 | cudaError_t err; |
| 147 | 147 | double* gpuInverse; |
| 148 | 148 | |
| ... | ... | @@ -152,7 +152,7 @@ namespace br { namespace cuda { |
| 152 | 152 | |
| 153 | 153 | //************************************************************************ |
| 154 | 154 | // Input affine is a 2x3 Mat whose transpose is used in the computations |
| 155 | - // [x, y, 1] = [u, v, 1] [ a^T | [0 0 1]^T ] | |
| 155 | + // [x, y, 1] = [u, v, 1] [ a^T | [0 0 1]^T ] | |
| 156 | 156 | // See "Digital Image Warping" by George Wolburg (p. 50) |
| 157 | 157 | //************************************************************************ |
| 158 | 158 | |
| ... | ... | @@ -210,7 +210,7 @@ namespace br { namespace cuda { |
| 210 | 210 | |
| 211 | 211 | CUDA_SAFE_MEMCPY(gpuInverse, affineInverse, 9*sizeof(double), cudaMemcpyHostToDevice, &err); |
| 212 | 212 | |
| 213 | - cudaaffine_kernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*)(*dstPtr), gpuInverse, src_rows, src_cols, dst_rows, dst_cols); | |
| 213 | + affineKernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*)(*dstPtr), gpuInverse, src_rows, src_cols, dst_rows, dst_cols); | |
| 214 | 214 | CUDA_KERNEL_ERR_CHK(&err); |
| 215 | 215 | |
| 216 | 216 | CUDA_SAFE_FREE(srcPtr, &err); |
| ... | ... | @@ -225,5 +225,4 @@ namespace br { namespace cuda { |
| 225 | 225 | // } |
| 226 | 226 | // printf("\n"); |
| 227 | 227 | } |
| 228 | -} // end cuda | |
| 229 | -} // end br | |
| 228 | +}}} | ... | ... |
openbr/plugins/cuda/cudacvtfloat.cpp
| ... | ... | @@ -7,7 +7,8 @@ using namespace cv; |
| 7 | 7 | |
| 8 | 8 | #include <openbr/plugins/openbr_internal.h> |
| 9 | 9 | |
| 10 | -namespace br { namespace cuda { namespace cudacvtfloat { | |
| 10 | +// definitions from the CUDA source file | |
| 11 | +namespace br { namespace cuda { namespace cvtfloat { | |
| 11 | 12 | void wrapper(void* src, void** dst, int rows, int cols); |
| 12 | 13 | }}} |
| 13 | 14 | |
| ... | ... | @@ -16,7 +17,7 @@ namespace br |
| 16 | 17 | |
| 17 | 18 | /*! |
| 18 | 19 | * \ingroup transforms |
| 19 | - * \brief Converts byte to floating point | |
| 20 | + * \brief Converts 8-bit images currently on GPU into 32-bit floating point equivalent. | |
| 20 | 21 | * \author Colin Heinzmann \cite DepthDeluxe |
| 21 | 22 | */ |
| 22 | 23 | class CUDACvtFloatTransform : public UntrainableTransform |
| ... | ... | @@ -45,7 +46,7 @@ class CUDACvtFloatTransform : public UntrainableTransform |
| 45 | 46 | dstDataPtr[2] = srcDataPtr[2]; |
| 46 | 47 | dstDataPtr[3] = srcDataPtr[3]; *((int*)dstDataPtr[3]) = CV_32FC1; |
| 47 | 48 | |
| 48 | - br::cuda::cudacvtfloat::wrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols); | |
| 49 | + cuda::cvtfloat::wrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols); | |
| 49 | 50 | dst = dstMat; |
| 50 | 51 | } |
| 51 | 52 | }; | ... | ... |
openbr/plugins/cuda/cudacvtfloat.cu
| ... | ... | @@ -3,7 +3,7 @@ using namespace std; |
| 3 | 3 | |
| 4 | 4 | #include "cudadefines.hpp" |
| 5 | 5 | |
| 6 | -namespace br { namespace cuda { namespace cudacvtfloat { | |
| 6 | +namespace br { namespace cuda { namespace cvtfloat { | |
| 7 | 7 | |
| 8 | 8 | __global__ void kernel(const unsigned char* src, float* dst, int rows, int cols) { |
| 9 | 9 | // get my index |
| ... | ... | @@ -20,12 +20,6 @@ namespace br { namespace cuda { namespace cudacvtfloat { |
| 20 | 20 | } |
| 21 | 21 | |
| 22 | 22 | void wrapper(void* src, void** dst, int rows, int cols) { |
| 23 | - //unsigned char* cudaSrc; | |
| 24 | - //cudaMalloc(&cudaSrc, rows*cols*sizeof(unsigned char)); | |
| 25 | - //cudaMemcpy(cudaSrc, src, rows*cols*sizeof(unsigned char), cudaMemcpyHostToDevice); | |
| 26 | - | |
| 27 | - //float* cudaDst; | |
| 28 | - //cudaMalloc(&cudaDst, rows*cols*sizeof(float)); | |
| 29 | 23 | cudaError_t err; |
| 30 | 24 | CUDA_SAFE_MALLOC(dst, rows*cols*sizeof(float), &err); |
| 31 | 25 | ... | ... |
openbr/plugins/cuda/CUDAL2.cpp renamed to openbr/plugins/cuda/cudal2.cpp
| ... | ... | @@ -19,7 +19,8 @@ using namespace std; |
| 19 | 19 | |
| 20 | 20 | #include <openbr/plugins/openbr_internal.h> |
| 21 | 21 | |
| 22 | -namespace br { namespace cuda { namespace L2{ | |
| 22 | +// definitions from the CUDA source file | |
| 23 | +namespace br { namespace cuda { namespace L2 { | |
| 23 | 24 | void wrapper(float* cudaAPtr, float* cudaBPtr, int length, float* outPtr); |
| 24 | 25 | }}} |
| 25 | 26 | |
| ... | ... | @@ -29,7 +30,7 @@ namespace br |
| 29 | 30 | /*! |
| 30 | 31 | * \ingroup distances |
| 31 | 32 | * \brief L2 distance computed using eigen. |
| 32 | - * \author Josh Klontz \cite jklontz | |
| 33 | + * \author Colin Heinzmann \cite DepthDeluxe | |
| 33 | 34 | */ |
| 34 | 35 | class CUDAL2Distance : public UntrainableDistance |
| 35 | 36 | { |
| ... | ... | @@ -45,7 +46,7 @@ class CUDAL2Distance : public UntrainableDistance |
| 45 | 46 | float* cudaBPtr = (float*)b.ptr<void*>()[0]; |
| 46 | 47 | |
| 47 | 48 | float out; |
| 48 | - br::cuda::L2::wrapper(cudaAPtr, cudaBPtr, rows*cols, &out); | |
| 49 | + cuda::L2::wrapper(cudaAPtr, cudaBPtr, rows*cols, &out); | |
| 49 | 50 | |
| 50 | 51 | return out; |
| 51 | 52 | } |
| ... | ... | @@ -55,4 +56,4 @@ BR_REGISTER(Distance, CUDAL2Distance) |
| 55 | 56 | |
| 56 | 57 | } // namespace br |
| 57 | 58 | |
| 58 | -#include "cuda/CUDAL2.moc" | |
| 59 | +#include "cuda/cudal2.moc" | ... | ... |
openbr/plugins/cuda/CUDAL2.cu renamed to openbr/plugins/cuda/cudal2.cu
| ... | ... | @@ -4,7 +4,7 @@ |
| 4 | 4 | |
| 5 | 5 | namespace br { namespace cuda { namespace L2 { |
| 6 | 6 | |
| 7 | - __global__ void my_subtract_kernel(float* aPtr, float* bPtr, float* workPtr, int length) { | |
| 7 | + __global__ void subtractKernel(float* aPtr, float* bPtr, float* workPtr, int length) { | |
| 8 | 8 | int index = blockIdx.x*blockDim.x+threadIdx.x; |
| 9 | 9 | |
| 10 | 10 | if (index >= length) { |
| ... | ... | @@ -18,7 +18,7 @@ namespace br { namespace cuda { namespace L2 { |
| 18 | 18 | workPtr[index] = workPtr[index] * workPtr[index]; |
| 19 | 19 | } |
| 20 | 20 | |
| 21 | - __global__ void collapse_kernel(float* inPtr, float* outPtr, int length) { | |
| 21 | + __global__ void collapseKernel(float* inPtr, float* outPtr, int length) { | |
| 22 | 22 | // make sure there is only one thread that we are calling |
| 23 | 23 | if (blockIdx.x != 0 || threadIdx.x != 0) { |
| 24 | 24 | return; |
| ... | ... | @@ -45,11 +45,11 @@ namespace br { namespace cuda { namespace L2 { |
| 45 | 45 | // perform the subtraction |
| 46 | 46 | int threadsPerBlock = 64; |
| 47 | 47 | int numBlocks = length / threadsPerBlock + 1; |
| 48 | - my_subtract_kernel<<<threadsPerBlock, numBlocks>>>(cudaAPtr, cudaBPtr, cudaWorkBufferPtr, length); | |
| 48 | + subtractKernel<<<threadsPerBlock, numBlocks>>>(cudaAPtr, cudaBPtr, cudaWorkBufferPtr, length); | |
| 49 | 49 | CUDA_KERNEL_ERR_CHK(&err); |
| 50 | 50 | |
| 51 | 51 | // perform the collapse |
| 52 | - collapse_kernel<<<1,1>>>(cudaWorkBufferPtr, cudaOutPtr, length); | |
| 52 | + collapseKernel<<<1,1>>>(cudaWorkBufferPtr, cudaOutPtr, length); | |
| 53 | 53 | CUDA_KERNEL_ERR_CHK(&err); |
| 54 | 54 | |
| 55 | 55 | // copy the single value back to the destinsion |
| ... | ... | @@ -63,6 +63,3 @@ namespace br { namespace cuda { namespace L2 { |
| 63 | 63 | CUDA_SAFE_FREE(cudaWorkBufferPtr, &err); |
| 64 | 64 | } |
| 65 | 65 | }}} |
| 66 | - | |
| 67 | -// 128CUDAEigenfaces on 6400 ATT: 54.367s | |
| 68 | -// 128CUDAEigenfacesL2 on 6400 ATT: | ... | ... |
openbr/plugins/cuda/cudalbp.cpp
| ... | ... | @@ -31,44 +31,21 @@ using namespace std; |
| 31 | 31 | |
| 32 | 32 | #include <openbr/plugins/openbr_internal.h> |
| 33 | 33 | |
| 34 | -#include "MatManager.hpp" | |
| 35 | - | |
| 36 | 34 | using namespace cv; |
| 37 | 35 | |
| 38 | -string type2str(int type) { | |
| 39 | - string r; | |
| 40 | - | |
| 41 | - uchar depth = type & CV_MAT_DEPTH_MASK; | |
| 42 | - uchar chans = 1 + (type >> CV_CN_SHIFT); | |
| 43 | - | |
| 44 | - switch ( depth ) { | |
| 45 | - case CV_8U: r = "8U"; break; | |
| 46 | - case CV_8S: r = "8S"; break; | |
| 47 | - case CV_16U: r = "16U"; break; | |
| 48 | - case CV_16S: r = "16S"; break; | |
| 49 | - case CV_32S: r = "32S"; break; | |
| 50 | - case CV_32F: r = "32F"; break; | |
| 51 | - case CV_64F: r = "64F"; break; | |
| 52 | - default: r = "User"; break; | |
| 53 | - } | |
| 54 | - | |
| 55 | - r += "C"; | |
| 56 | - r += (chans+'0'); | |
| 57 | - | |
| 58 | - return r; | |
| 59 | -} | |
| 60 | - | |
| 61 | -namespace br { namespace cuda { | |
| 62 | - void cudalbp_wrapper(void* srcPtr, void** dstPtr, int rows, int cols); | |
| 63 | - void cudalbp_init_wrapper(uint8_t* lut); | |
| 64 | -}} | |
| 36 | +// definitions from the CUDA source file | |
| 37 | +namespace br { namespace cuda { namespace lbp { | |
| 38 | + void wrapper(void* srcPtr, void** dstPtr, int rows, int cols); | |
| 39 | + void initializeWrapper(uint8_t* lut); | |
| 40 | +}}} | |
| 65 | 41 | |
| 66 | 42 | namespace br |
| 67 | 43 | { |
| 68 | 44 | /*! |
| 69 | 45 | * \ingroup transforms |
| 70 | - * \brief Convert the image into a feature vector using Local Binary Patterns in CUDA | |
| 71 | - * \author Colin Heinzmann, Li Li \cite DepthDeluxe, booli | |
| 46 | + * \brief Convert the image into a feature vector using Local Binary Patterns in CUDA. Modified from stock OpenBR plugin. | |
| 47 | + * \author Colin Heinzmann \cite DepthDeluxe | |
| 48 | + * \author Li Li \cite booli | |
| 72 | 49 | */ |
| 73 | 50 | class CUDALBPTransform : public UntrainableTransform |
| 74 | 51 | { |
| ... | ... | @@ -84,8 +61,6 @@ class CUDALBPTransform : public UntrainableTransform |
| 84 | 61 | uchar lut[256]; |
| 85 | 62 | uchar null; |
| 86 | 63 | |
| 87 | - //cuda::MatManager* matManager; | |
| 88 | - | |
| 89 | 64 | public: |
| 90 | 65 | /* Returns the number of 0->1 or 1->0 transitions in i */ |
| 91 | 66 | static int numTransitions(int i) |
| ... | ... | @@ -136,36 +111,14 @@ class CUDALBPTransform : public UntrainableTransform |
| 136 | 111 | if (!set[i]) |
| 137 | 112 | lut[i] = null; // Set to null id |
| 138 | 113 | |
| 139 | - // init the mat manager for managing 10 mats | |
| 140 | - //matManager = new cuda::MatManager(10); | |
| 141 | - | |
| 142 | 114 | // copy lut over to the GPU |
| 143 | - br::cuda::cudalbp_init_wrapper(lut); | |
| 115 | + cuda::lbp::initializeWrapper(lut); | |
| 144 | 116 | |
| 145 | 117 | std::cout << "Initialized CUDALBP" << std::endl; |
| 146 | 118 | } |
| 147 | 119 | |
| 148 | 120 | void project(const Template &src, Template &dst) const |
| 149 | 121 | { |
| 150 | - //Mat& m = (Mat&)src.m(); | |
| 151 | - //cuda::MatManager::matindex a; | |
| 152 | - //cuda::MatManager::matindex b; | |
| 153 | - //a = matManager->reserve(m); | |
| 154 | - //matManager->upload(a, m); | |
| 155 | - | |
| 156 | - // reserve the second mat and check the dimensiosn | |
| 157 | - //b = matManager->reserve(m); | |
| 158 | - | |
| 159 | - //uint8_t* srcMatPtr = matManager->get_mat_pointer_from_index(a); | |
| 160 | - //uint8_t* dstMatPtr = matManager->get_mat_pointer_from_index(b); | |
| 161 | - //br::cuda::cudalbp_wrapper(srcMatPtr, dstMatPtr, lutGpuPtr, m.cols, m.rows, m.step1()); | |
| 162 | - | |
| 163 | - //matManager->download(b, dst); | |
| 164 | - | |
| 165 | - // release both the mats | |
| 166 | - //matManager->release(a); | |
| 167 | - //matManager->release(b); | |
| 168 | - | |
| 169 | 122 | void* const* srcDataPtr = src.m().ptr<void*>(); |
| 170 | 123 | int rows = *((int*)srcDataPtr[1]); |
| 171 | 124 | int cols = *((int*)srcDataPtr[2]); |
| ... | ... | @@ -177,13 +130,13 @@ class CUDALBPTransform : public UntrainableTransform |
| 177 | 130 | dstDataPtr[2] = srcDataPtr[2]; |
| 178 | 131 | dstDataPtr[3] = srcDataPtr[3]; |
| 179 | 132 | |
| 180 | - br::cuda::cudalbp_wrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols); | |
| 133 | + cuda::lbp::wrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols); | |
| 181 | 134 | dst = dstMat; |
| 182 | 135 | } |
| 183 | 136 | }; |
| 184 | 137 | |
| 185 | 138 | BR_REGISTER(Transform, CUDALBPTransform) |
| 186 | 139 | |
| 187 | -} // namespace br | |
| 140 | +} | |
| 188 | 141 | |
| 189 | 142 | #include "cuda/cudalbp.moc" | ... | ... |
openbr/plugins/cuda/cudalbp.cu
| ... | ... | @@ -9,14 +9,18 @@ using namespace std; |
| 9 | 9 | using namespace cv; |
| 10 | 10 | using namespace cv::gpu; |
| 11 | 11 | |
| 12 | -namespace br { namespace cuda { | |
| 12 | +/* | |
| 13 | + * These are the CUDA functions for CUDALBP. See cudapca.cpp for more details | |
| 14 | + */ | |
| 15 | + | |
| 16 | +namespace br { namespace cuda { namespace lbp { | |
| 13 | 17 | uint8_t* lut; |
| 14 | 18 | |
| 15 | - __device__ __forceinline__ uint8_t cudalbp_kernel_get_pixel_value(int row, int col, uint8_t* srcPtr, int rows, int cols) { | |
| 19 | + __device__ __forceinline__ uint8_t getPixelValueKernel(int row, int col, uint8_t* srcPtr, int rows, int cols) { | |
| 16 | 20 | return (srcPtr + row*cols)[col]; |
| 17 | 21 | } |
| 18 | 22 | |
| 19 | - __global__ void cudalbp_kernel(uint8_t* srcPtr, uint8_t* dstPtr, int rows, int cols, uint8_t* lut) | |
| 23 | + __global__ void lutKernel(uint8_t* srcPtr, uint8_t* dstPtr, int rows, int cols, uint8_t* lut) | |
| 20 | 24 | { |
| 21 | 25 | int rowInd = blockIdx.y*blockDim.y+threadIdx.y; |
| 22 | 26 | int colInd = blockIdx.x*blockDim.x+threadIdx.x; |
| ... | ... | @@ -34,22 +38,22 @@ namespace br { namespace cuda { |
| 34 | 38 | } |
| 35 | 39 | } |
| 36 | 40 | |
| 37 | - const uint8_t cval = cudalbp_kernel_get_pixel_value(rowInd+0*radius, colInd+0*radius, srcPtr, rows, cols);//(srcPtr[(rowInd*srcStep+0*radius)*m.cols+colInd+0*radius]); // center value | |
| 38 | - uint8_t val = lut[(cudalbp_kernel_get_pixel_value(rowInd-1*radius, colInd-1*radius, srcPtr, rows, cols) >= cval ? 128 : 0) | | |
| 39 | - (cudalbp_kernel_get_pixel_value(rowInd-1*radius, colInd+0*radius, srcPtr, rows, cols) >= cval ? 64 : 0) | | |
| 40 | - (cudalbp_kernel_get_pixel_value(rowInd-1*radius, colInd+1*radius, srcPtr, rows, cols) >= cval ? 32 : 0) | | |
| 41 | - (cudalbp_kernel_get_pixel_value(rowInd+0*radius, colInd+1*radius, srcPtr, rows, cols) >= cval ? 16 : 0) | | |
| 42 | - (cudalbp_kernel_get_pixel_value(rowInd+1*radius, colInd+1*radius, srcPtr, rows, cols) >= cval ? 8 : 0) | | |
| 43 | - (cudalbp_kernel_get_pixel_value(rowInd+1*radius, colInd+0*radius, srcPtr, rows, cols) >= cval ? 4 : 0) | | |
| 44 | - (cudalbp_kernel_get_pixel_value(rowInd+1*radius, colInd-1*radius, srcPtr, rows, cols) >= cval ? 2 : 0) | | |
| 45 | - (cudalbp_kernel_get_pixel_value(rowInd+0*radius, colInd-1*radius, srcPtr, rows, cols) >= cval ? 1 : 0)]; | |
| 41 | + const uint8_t cval = getPixelValueKernel(rowInd+0*radius, colInd+0*radius, srcPtr, rows, cols);//(srcPtr[(rowInd*srcStep+0*radius)*m.cols+colInd+0*radius]); // center value | |
| 42 | + uint8_t val = lut[(getPixelValueKernel(rowInd-1*radius, colInd-1*radius, srcPtr, rows, cols) >= cval ? 128 : 0) | | |
| 43 | + (getPixelValueKernel(rowInd-1*radius, colInd+0*radius, srcPtr, rows, cols) >= cval ? 64 : 0) | | |
| 44 | + (getPixelValueKernel(rowInd-1*radius, colInd+1*radius, srcPtr, rows, cols) >= cval ? 32 : 0) | | |
| 45 | + (getPixelValueKernel(rowInd+0*radius, colInd+1*radius, srcPtr, rows, cols) >= cval ? 16 : 0) | | |
| 46 | + (getPixelValueKernel(rowInd+1*radius, colInd+1*radius, srcPtr, rows, cols) >= cval ? 8 : 0) | | |
| 47 | + (getPixelValueKernel(rowInd+1*radius, colInd+0*radius, srcPtr, rows, cols) >= cval ? 4 : 0) | | |
| 48 | + (getPixelValueKernel(rowInd+1*radius, colInd-1*radius, srcPtr, rows, cols) >= cval ? 2 : 0) | | |
| 49 | + (getPixelValueKernel(rowInd+0*radius, colInd-1*radius, srcPtr, rows, cols) >= cval ? 1 : 0)]; | |
| 46 | 50 | |
| 47 | 51 | // store calculated value away in the right place |
| 48 | 52 | dstPtr[index] = val; |
| 49 | 53 | } |
| 50 | 54 | |
| 51 | 55 | //void cudalbp_wrapper(uint8_t* srcPtr, uint8_t* dstPtr, uint8_t* lut, int imageWidth, int imageHeight, size_t step) |
| 52 | - void cudalbp_wrapper(void* srcPtr, void** dstPtr, int rows, int cols) | |
| 56 | + void wrapper(void* srcPtr, void** dstPtr, int rows, int cols) | |
| 53 | 57 | { |
| 54 | 58 | cudaError_t err; |
| 55 | 59 | |
| ... | ... | @@ -59,15 +63,15 @@ namespace br { namespace cuda { |
| 59 | 63 | rows/threadsPerBlock.y + 1); |
| 60 | 64 | |
| 61 | 65 | CUDA_SAFE_MALLOC(dstPtr, rows*cols*sizeof(uint8_t), &err); |
| 62 | - cudalbp_kernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*)(*dstPtr), rows, cols, lut); | |
| 66 | + lutKernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*)(*dstPtr), rows, cols, lut); | |
| 63 | 67 | CUDA_KERNEL_ERR_CHK(&err); |
| 64 | 68 | |
| 65 | 69 | CUDA_SAFE_FREE(srcPtr, &err); |
| 66 | 70 | } |
| 67 | 71 | |
| 68 | - void cudalbp_init_wrapper(uint8_t* cpuLut) { | |
| 72 | + void initializeWrapper(uint8_t* cpuLut) { | |
| 69 | 73 | cudaError_t err; |
| 70 | 74 | CUDA_SAFE_MALLOC(&lut, 256*sizeof(uint8_t), &err); |
| 71 | 75 | CUDA_SAFE_MEMCPY(lut, cpuLut, 256*sizeof(uint8_t), cudaMemcpyHostToDevice, &err); |
| 72 | 76 | } |
| 73 | -}} | |
| 77 | +}}} | ... | ... |
openbr/plugins/cuda/cudapca.cpp
| ... | ... | @@ -29,8 +29,9 @@ using namespace cv; |
| 29 | 29 | #include <openbr/core/eigenutils.h> |
| 30 | 30 | #include <openbr/core/opencvutils.h> |
| 31 | 31 | |
| 32 | +// definitions from the CUDA source file | |
| 32 | 33 | namespace br { namespace cuda { namespace pca { |
| 33 | - void loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems); | |
| 34 | + void initializeWrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems); | |
| 34 | 35 | void wrapper(void* src, void** dst); |
| 35 | 36 | }}} |
| 36 | 37 | |
| ... | ... | @@ -38,9 +39,7 @@ namespace br |
| 38 | 39 | { |
| 39 | 40 | /*! |
| 40 | 41 | * \ingroup transforms |
| 41 | - * \brief Projects input into learned Principal Component Analysis subspace using CUDA. | |
| 42 | - * \author Brendan Klare \cite bklare | |
| 43 | - * \author Josh Klontz \cite jklontz | |
| 42 | + * \brief Projects input into learned Principal Component Analysis subspace using CUDA. Modified from original PCA plugin. | |
| 44 | 43 | * \author Colin Heinzmann \cite DepthDeluxe |
| 45 | 44 | * |
| 46 | 45 | * \br_property float keep Options are: [keep < 0 - All eigenvalues are retained, keep == 0 - No PCA is performed and the eigenvectors form an identity matrix, 0 < keep < 1 - Keep is the fraction of the variance to retain, keep >= 1 - keep is the number of leading eigenvectors to retain] Default is 0.95. |
| ... | ... | @@ -134,22 +133,9 @@ private: |
| 134 | 133 | dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = keep; |
| 135 | 134 | dstDataPtr[3] = srcDataPtr[3]; |
| 136 | 135 | |
| 137 | - br::cuda::pca::wrapper(srcDataPtr[0], &dstDataPtr[0]); | |
| 136 | + cuda::pca::wrapper(srcDataPtr[0], &dstDataPtr[0]); | |
| 138 | 137 | |
| 139 | 138 | dst = dstMat; |
| 140 | - | |
| 141 | - //dst = cv::Mat(1, keep, CV_32FC1); | |
| 142 | - | |
| 143 | - // perform the operation on the graphics card | |
| 144 | - //cuda::cudapca_projectwrapper((float*)src.m().ptr<float>(), (float*)dst.m().ptr<float>()); | |
| 145 | - | |
| 146 | - // Map Eigen into OpenCV | |
| 147 | - //Mat cpuDst = cv::Mat(1, keep, CV_32FC1); | |
| 148 | - //Eigen::Map<const Eigen::MatrixXf> inMap(src.m().ptr<float>(), src.m().rows*src.m().cols, 1); | |
| 149 | - //Eigen::Map<Eigen::MatrixXf> outMap(cpuDst.ptr<float>(), keep, 1); | |
| 150 | - | |
| 151 | - // Do projection | |
| 152 | - //outMap = eVecs.transpose() * (inMap - mean); | |
| 153 | 139 | } |
| 154 | 140 | |
| 155 | 141 | void store(QDataStream &stream) const |
| ... | ... | @@ -161,14 +147,6 @@ private: |
| 161 | 147 | { |
| 162 | 148 | stream >> keep >> drop >> whiten >> originalRows >> mean >> eVals >> eVecs; |
| 163 | 149 | |
| 164 | - cout << "Mean Dimensions" << endl; | |
| 165 | - cout << "\tRows: " << mean.rows() << " Cols: " << mean.cols() << endl; | |
| 166 | - cout << "eVecs Dimensions" << endl; | |
| 167 | - cout << "\tRows: " << eVecs.rows() << " Cols: " << eVecs.cols() << endl; | |
| 168 | - cout << "eVals Dimensions" << endl; | |
| 169 | - cout << "\tRows: " << eVals.rows() << " Cols: " << eVals.cols() << endl; | |
| 170 | - cout << "Keep: " << keep << endl; | |
| 171 | - | |
| 172 | 150 | // TODO(colin): use Eigen Map class to generate map files so we don't have to copy the data |
| 173 | 151 | // serialize the eigenvectors |
| 174 | 152 | float* evBuffer = new float[eVecs.rows() * eVecs.cols()]; |
| ... | ... | @@ -187,7 +165,7 @@ private: |
| 187 | 165 | } |
| 188 | 166 | |
| 189 | 167 | // call the wrapper function |
| 190 | - br::cuda::pca::loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows()*mean.cols()); | |
| 168 | + cuda::pca::initializeWrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows()*mean.cols()); | |
| 191 | 169 | |
| 192 | 170 | delete evBuffer; |
| 193 | 171 | delete meanBuffer; | ... | ... |
openbr/plugins/cuda/cudapca.cu
| ... | ... | @@ -9,6 +9,10 @@ using namespace std; |
| 9 | 9 | using namespace cv; |
| 10 | 10 | using namespace cv::gpu; |
| 11 | 11 | |
| 12 | +/* | |
| 13 | + * These are the CUDA functions for CUDAPCA. See cudapca.cpp for more details | |
| 14 | + */ | |
| 15 | + | |
| 12 | 16 | namespace br { namespace cuda { namespace pca { |
| 13 | 17 | __global__ void multiplyKernel(float* src, float* intermediaryBuffer, float* evPtr, int evRows, int evCols, int stepSize) { |
| 14 | 18 | int colInd = blockIdx.x*blockDim.x+threadIdx.x; |
| ... | ... | @@ -68,7 +72,7 @@ namespace br { namespace cuda { namespace pca { |
| 68 | 72 | int _numSteps; int _stepSize; |
| 69 | 73 | float* intermediaryBuffer; |
| 70 | 74 | |
| 71 | - void loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems) { | |
| 75 | + void initializeWrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems) { | |
| 72 | 76 | _evRows = evRows; _evCols = evCols; |
| 73 | 77 | _meanElems = meanElems; |
| 74 | 78 | |
| ... | ... | @@ -95,7 +99,6 @@ namespace br { namespace cuda { namespace pca { |
| 95 | 99 | cudaError_t err; |
| 96 | 100 | CUDA_SAFE_MALLOC(dst, _evCols*sizeof(float), &err); |
| 97 | 101 | |
| 98 | - | |
| 99 | 102 | // subtract out the mean of the image (mean is 1xpixels in size) |
| 100 | 103 | int threadsPerBlock = 64; |
| 101 | 104 | int numBlocks = _meanElems / threadsPerBlock + 1; |
| ... | ... | @@ -114,8 +117,5 @@ namespace br { namespace cuda { namespace pca { |
| 114 | 117 | CUDA_KERNEL_ERR_CHK(&err); |
| 115 | 118 | |
| 116 | 119 | CUDA_SAFE_FREE(src, &err); // TODO(colin): figure out why adding this free causes memory corruption... |
| 117 | - | |
| 118 | - // copy the data back to the CPU | |
| 119 | - //cudaMemcpy(dst, _cudaDstPtr, _evCols*sizeof(float), cudaMemcpyDeviceToHost); | |
| 120 | 120 | } |
| 121 | 121 | }}} | ... | ... |
openbr/plugins/cuda/cudargb2grayscale.cpp
| ... | ... | @@ -25,17 +25,18 @@ |
| 25 | 25 | |
| 26 | 26 | using namespace cv; |
| 27 | 27 | |
| 28 | -namespace br { namespace cuda{ | |
| 29 | - void cudargb2grayscale_wrapper(void* srcPtr, void**dstPtr, int rows, int cols); | |
| 30 | -}} | |
| 28 | +// definitions from the CUDA source file | |
| 29 | +namespace br { namespace cuda { namespace rgb2grayscale { | |
| 30 | + void wrapper(void* srcPtr, void**dstPtr, int rows, int cols); | |
| 31 | +}}} | |
| 31 | 32 | |
| 32 | 33 | namespace br |
| 33 | 34 | { |
| 34 | 35 | |
| 35 | 36 | /*! |
| 36 | 37 | * \ingroup transforms |
| 37 | - * \brief Colorspace conversion. | |
| 38 | - * \author Li Li \cite Josh Klontz \cite jklontz | |
| 38 | + * \brief Converts 3-channel images to grayscale | |
| 39 | + * \author Li Li \cite booli | |
| 39 | 40 | */ |
| 40 | 41 | class CUDARGB2GrayScaleTransform : public UntrainableTransform |
| 41 | 42 | { |
| ... | ... | @@ -57,8 +58,8 @@ private: |
| 57 | 58 | dstDataPtr[2] = srcDataPtr[2]; |
| 58 | 59 | dstDataPtr[3] = srcDataPtr[3]; |
| 59 | 60 | *((int*)dstDataPtr[3]) = CV_8UC1; // not sure if the type of the new mat is the same |
| 60 | - | |
| 61 | - br::cuda::cudargb2grayscale_wrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols); | |
| 61 | + | |
| 62 | + cuda::rgb2grayscale::wrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols); | |
| 62 | 63 | dst = dstMat; |
| 63 | 64 | |
| 64 | 65 | /* | ... | ... |
openbr/plugins/cuda/cudargb2grayscale.cu
| ... | ... | @@ -12,9 +12,9 @@ using namespace std; |
| 12 | 12 | using namespace cv; |
| 13 | 13 | using namespace cv::gpu; |
| 14 | 14 | |
| 15 | -namespace br{ namespace cuda { | |
| 15 | +namespace br { namespace cuda { namespace rgb2grayscale { | |
| 16 | 16 | |
| 17 | - __global__ void cudargb2grayscale_kernel(uint8_t* srcPtr, uint8_t* dstPtr, int rows, int cols) | |
| 17 | + __global__ void kernel(uint8_t* srcPtr, uint8_t* dstPtr, int rows, int cols) | |
| 18 | 18 | { |
| 19 | 19 | int rowInd = blockIdx.y*blockDim.y+threadIdx.y; |
| 20 | 20 | int colInd = blockIdx.x*blockDim.x+threadIdx.x; |
| ... | ... | @@ -31,7 +31,7 @@ namespace br{ namespace cuda { |
| 31 | 31 | return; |
| 32 | 32 | } |
| 33 | 33 | |
| 34 | - void cudargb2grayscale_wrapper(void* srcPtr, void** dstPtr, int rows, int cols) | |
| 34 | + void wrapper(void* srcPtr, void** dstPtr, int rows, int cols) | |
| 35 | 35 | { |
| 36 | 36 | cudaError_t err; |
| 37 | 37 | dim3 threadsPerBlock(9, 9); |
| ... | ... | @@ -39,9 +39,9 @@ namespace br{ namespace cuda { |
| 39 | 39 | rows/threadsPerBlock.y + 1); |
| 40 | 40 | CUDA_SAFE_MALLOC(dstPtr, rows*cols*sizeof(uint8_t), &err); |
| 41 | 41 | |
| 42 | - cudargb2grayscale_kernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*) (*dstPtr), rows, cols); | |
| 42 | + kernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*) (*dstPtr), rows, cols); | |
| 43 | 43 | CUDA_KERNEL_ERR_CHK(&err); |
| 44 | 44 | CUDA_SAFE_FREE(srcPtr, &err); |
| 45 | - } | |
| 45 | + } | |
| 46 | 46 | |
| 47 | -}} | |
| 47 | +}}} | ... | ... |
openbr/plugins/cuda/passthrough.cpp deleted
| 1 | -#include <openbr/plugins/openbr_internal.h> | |
| 2 | - | |
| 3 | -#include <opencv2/imgproc/imgproc.hpp> | |
| 4 | -#include <opencv2/gpu/gpu.hpp> | |
| 5 | - | |
| 6 | -using namespace cv; | |
| 7 | -using namespace cv::gpu; | |
| 8 | - | |
| 9 | -#include "passthrough.hpp" | |
| 10 | - | |
| 11 | -#include <iostream> | |
| 12 | - | |
| 13 | - | |
| 14 | -namespace br | |
| 15 | -{ | |
| 16 | - class CUDAPassthroughTransform : public UntrainableTransform | |
| 17 | - { | |
| 18 | - Q_OBJECT | |
| 19 | - | |
| 20 | -private: | |
| 21 | - void project(const Template &src, Template &dst) const | |
| 22 | - { | |
| 23 | - // note: if you convert the image to grayscale, you get 8UC1 | |
| 24 | - | |
| 25 | - // upload the src mat to the GPU | |
| 26 | - GpuMat srcGpuMat, dstGpuMat; | |
| 27 | - srcGpuMat.upload(src.m()); | |
| 28 | - dstGpuMat.upload(src.m()); | |
| 29 | - | |
| 30 | - br::cuda::passthrough_wrapper(srcGpuMat, dstGpuMat); | |
| 31 | - | |
| 32 | - dstGpuMat.download(dst.m()); | |
| 33 | - | |
| 34 | - // TODO(colin): add delete code | |
| 35 | - srcGpuMat.release(); | |
| 36 | - dstGpuMat.release(); | |
| 37 | - | |
| 38 | - printf("srcGpuMat empty: %d\n", (int)srcGpuMat.empty()); | |
| 39 | - printf("dstGpuMat empty: %d\n", (int)srcGpuMat.empty()); | |
| 40 | - } | |
| 41 | - }; | |
| 42 | - | |
| 43 | - BR_REGISTER(Transform, CUDAPassthroughTransform); | |
| 44 | -} | |
| 45 | - | |
| 46 | -#include "cuda/passthrough.moc" |
openbr/plugins/cuda/passthrough.cu deleted
| 1 | -// note: Using 8-bit unsigned 1 channel images | |
| 2 | - | |
| 3 | -#include <opencv2/gpu/gpu.hpp> | |
| 4 | - | |
| 5 | -using namespace cv; | |
| 6 | -using namespace cv::gpu; | |
| 7 | - | |
| 8 | -#include "passthrough.hpp" | |
| 9 | - | |
| 10 | -namespace br { namespace cuda { | |
| 11 | - __global__ void passthrough_kernel(uint8_t* srcPtr, uint8_t* dstPtr, size_t srcStep, size_t dstStep, int cols, int rows) { | |
| 12 | - int rowInd = blockIdx.y*blockDim.y+threadIdx.y; | |
| 13 | - int colInd = blockIdx.x*blockDim.x+threadIdx.x; | |
| 14 | - | |
| 15 | - // don't do anything if we are outside the allowable positions | |
| 16 | - if (rowInd >= rows || colInd >= cols) | |
| 17 | - return; | |
| 18 | - | |
| 19 | - uint8_t srcVal = (srcPtr + rowInd*srcStep)[colInd]; | |
| 20 | - uint8_t* rowDstPtr = dstPtr + rowInd*dstStep; | |
| 21 | - | |
| 22 | - rowDstPtr[colInd] = srcVal; | |
| 23 | - } | |
| 24 | - | |
| 25 | - void passthrough_wrapper(GpuMat& src, GpuMat& dst) { | |
| 26 | - // convert the GpuMats to pointers | |
| 27 | - uint8_t* srcPtr = (uint8_t*)src.data; | |
| 28 | - uint8_t* dstPtr = (uint8_t*)dst.data; | |
| 29 | - | |
| 30 | - int imageWidth = src.cols; | |
| 31 | - int imageHeight = src.rows; | |
| 32 | - | |
| 33 | - // make 8 * 8 = 64 square block | |
| 34 | - dim3 threadsPerBlock(8, 8); | |
| 35 | - dim3 numBlocks(imageWidth / threadsPerBlock.x + 1, | |
| 36 | - imageHeight / threadsPerBlock.y + 1); | |
| 37 | - | |
| 38 | - passthrough_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageWidth, imageHeight); | |
| 39 | - } | |
| 40 | -}} | |
| 41 | - | |
| 42 | - | |
| 43 | -// read http://stackoverflow.com/questions/31927297/array-of-ptrstepszgpumat-to-a-c-cuda-kernel |
openbr/plugins/cuda/passthrough.hpp deleted