Commit e534ff9c263a2e69048f865230a7b7122e02535e
1 parent
93a9cf84
Added preliminary functional cuda affine
Showing
2 changed files
with
345 additions
and
0 deletions
openbr/plugins/cuda/cudaaffine.cpp
0 → 100644
| 1 | +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | |
| 2 | + * Copyright 2012 The MITRE Corporation * | |
| 3 | + * * | |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); * | |
| 5 | + * you may not use this file except in compliance with the License. * | |
| 6 | + * You may obtain a copy of the License at * | |
| 7 | + * * | |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 * | |
| 9 | + * * | |
| 10 | + * Unless required by applicable law or agreed to in writing, software * | |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, * | |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * | |
| 13 | + * See the License for the specific language governing permissions and * | |
| 14 | + * limitations under the License. * | |
| 15 | + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ | |
| 16 | + | |
| 17 | + | |
| 18 | +#include <iostream> | |
| 19 | +using namespace std; | |
| 20 | + | |
| 21 | +#include <sys/types.h> | |
| 22 | +#include <unistd.h> | |
| 23 | + | |
| 24 | +#include <pthread.h> | |
| 25 | + | |
| 26 | +#include <opencv2/opencv.hpp> | |
| 27 | +#include <opencv2/imgproc/imgproc.hpp> | |
| 28 | +#include <opencv2/imgproc/imgproc_c.h> | |
| 29 | +#include <opencv2/highgui/highgui.hpp> | |
| 30 | +#include <opencv2/highgui/highgui_c.h> | |
| 31 | +#include <limits> | |
| 32 | + | |
| 33 | +#include <openbr/plugins/openbr_internal.h> | |
| 34 | +#include <openbr/core/opencvutils.h> | |
| 35 | + | |
| 36 | +#include "MatManager.hpp" | |
| 37 | + | |
| 38 | +using namespace cv; | |
| 39 | + | |
| 40 | +namespace br { namespace cuda { | |
| 41 | + void cudaaffine_wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols); | |
| 42 | +}} | |
| 43 | + | |
| 44 | +namespace br | |
| 45 | +{ | |
| 46 | + | |
| 47 | +/*! | |
| 48 | + * \ingroup transforms | |
| 49 | + * \brief Performs a two or three point registration. | |
| 50 | + * \author Josh Klontz \cite jklontz | |
| 51 | + * \note Method: Area should be used for shrinking an image, Cubic for slow but accurate enlargment, Bilin for fast enlargement. | |
| 52 | + */ | |
| 53 | +class CUDAAffineTransform : public UntrainableTransform | |
| 54 | +{ | |
| 55 | + Q_OBJECT | |
| 56 | + Q_ENUMS(Method) | |
| 57 | + | |
| 58 | +public: | |
| 59 | + /*!< */ | |
| 60 | + enum Method { Near = INTER_NEAREST, | |
| 61 | + Area = INTER_AREA, | |
| 62 | + Bilin = INTER_LINEAR, | |
| 63 | + Cubic = INTER_CUBIC, | |
| 64 | + Lanczo = INTER_LANCZOS4}; | |
| 65 | + | |
| 66 | +private: | |
| 67 | + Q_PROPERTY(int width READ get_width WRITE set_width RESET reset_width STORED false) | |
| 68 | + Q_PROPERTY(int height READ get_height WRITE set_height RESET reset_height STORED false) | |
| 69 | + Q_PROPERTY(float x1 READ get_x1 WRITE set_x1 RESET reset_x1 STORED false) | |
| 70 | + Q_PROPERTY(float y1 READ get_y1 WRITE set_y1 RESET reset_y1 STORED false) | |
| 71 | + Q_PROPERTY(float x2 READ get_x2 WRITE set_x2 RESET reset_x2 STORED false) | |
| 72 | + Q_PROPERTY(float y2 READ get_y2 WRITE set_y2 RESET reset_y2 STORED false) | |
| 73 | + Q_PROPERTY(float x3 READ get_x3 WRITE set_x3 RESET reset_x3 STORED false) | |
| 74 | + Q_PROPERTY(float y3 READ get_y3 WRITE set_y3 RESET reset_y3 STORED false) | |
| 75 | + Q_PROPERTY(Method method READ get_method WRITE set_method RESET reset_method STORED false) | |
| 76 | + Q_PROPERTY(bool storeAffine READ get_storeAffine WRITE set_storeAffine RESET reset_storeAffine STORED false) | |
| 77 | + Q_PROPERTY(bool warpPoints READ get_warpPoints WRITE set_warpPoints RESET reset_warpPoints STORED false) | |
| 78 | + BR_PROPERTY(int, width, 64) | |
| 79 | + BR_PROPERTY(int, height, 64) | |
| 80 | + BR_PROPERTY(float, x1, 0) | |
| 81 | + BR_PROPERTY(float, y1, 0) | |
| 82 | + BR_PROPERTY(float, x2, -1) | |
| 83 | + BR_PROPERTY(float, y2, -1) | |
| 84 | + BR_PROPERTY(float, x3, -1) | |
| 85 | + BR_PROPERTY(float, y3, -1) | |
| 86 | + BR_PROPERTY(Method, method, Bilin) | |
| 87 | + BR_PROPERTY(bool, storeAffine, false) | |
| 88 | + BR_PROPERTY(bool, warpPoints, false) | |
| 89 | + | |
| 90 | + static Point2f getThirdAffinePoint(const Point2f &a, const Point2f &b) | |
| 91 | + { | |
| 92 | + float dx = b.x - a.x; | |
| 93 | + float dy = b.y - a.y; | |
| 94 | + return Point2f(a.x - dy, a.y + dx); | |
| 95 | + } | |
| 96 | + | |
| 97 | + void project(const Template &src, Template &dst) const | |
| 98 | + { | |
| 99 | + const bool twoPoints = ((x3 == -1) || (y3 == -1)); | |
| 100 | + | |
| 101 | + Point2f dstPoints[3]; | |
| 102 | + dstPoints[0] = Point2f(x1*width, y1*height); | |
| 103 | + dstPoints[1] = Point2f((x2 == -1 ? 1 - x1 : x2)*width, (y2 == -1 ? y1 : y2)*height); | |
| 104 | + if (twoPoints) dstPoints[2] = getThirdAffinePoint(dstPoints[0], dstPoints[1]); | |
| 105 | + else dstPoints[2] = Point2f(x3*width, y3*height); | |
| 106 | + | |
| 107 | + Point2f srcPoints[3]; | |
| 108 | + if (src.file.contains("Affine_0") && | |
| 109 | + src.file.contains("Affine_1") && | |
| 110 | + (src.file.contains("Affine_2") || twoPoints)) { | |
| 111 | + srcPoints[0] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_0")); | |
| 112 | + srcPoints[1] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_1")); | |
| 113 | + if (!twoPoints) srcPoints[2] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_2")); | |
| 114 | + } else { | |
| 115 | + const QList<Point2f> landmarks = OpenCVUtils::toPoints(src.file.points()); | |
| 116 | + | |
| 117 | + if ((landmarks.size() < 2) || (!twoPoints && (landmarks.size() < 3))) { | |
| 118 | + resize(src, dst, Size(width, height)); | |
| 119 | + return; | |
| 120 | + } else { | |
| 121 | + srcPoints[0] = landmarks[0]; | |
| 122 | + srcPoints[1] = landmarks[1]; | |
| 123 | + if (!twoPoints) srcPoints[2] = landmarks[2]; | |
| 124 | + } | |
| 125 | + } | |
| 126 | + if (twoPoints) srcPoints[2] = getThirdAffinePoint(srcPoints[0], srcPoints[1]); | |
| 127 | + | |
| 128 | + // Code section being altered (original) | |
| 129 | + // | |
| 130 | + // Mat affineTransform = getAffineTransform(srcPoints, dstPoints); | |
| 131 | + // warpAffine(src, dst, affineTransform, Size(width, height), method); | |
| 132 | + // | |
| 133 | + // end original | |
| 134 | + | |
| 135 | + Mat affineTransform = getAffineTransform(srcPoints, dstPoints); | |
| 136 | + | |
| 137 | + void* const* srcDataPtr = src.m().ptr<void*>(); | |
| 138 | + int rows = *((int*)srcDataPtr[1]); | |
| 139 | + int cols = *((int*)srcDataPtr[2]); | |
| 140 | + int type = *((int*)srcDataPtr[3]); | |
| 141 | + | |
| 142 | + Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type()); | |
| 143 | + void** dstDataPtr = dstMat.ptr<void*>(); | |
| 144 | + | |
| 145 | + dstDataPtr[1] = srcDataPtr[1]; *((int*)dstDataPtr[1]) = height; // rows | |
| 146 | + dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = width; // cols | |
| 147 | + dstDataPtr[3] = srcDataPtr[3]; | |
| 148 | + | |
| 149 | + // Print the transform | |
| 150 | + //for(int x = 0; x < affineTransform.rows; x++){ | |
| 151 | + //for(int y = 0; y < affineTransform.cols; y++){ | |
| 152 | + //printf("%8.3f\t", affineTransform.at<double>(x, y)); | |
| 153 | + //} | |
| 154 | + //printf("\n"); | |
| 155 | + //} | |
| 156 | + | |
| 157 | + br::cuda::cudaaffine_wrapper(srcDataPtr[0], &dstDataPtr[0], affineTransform, rows, cols, height, width); | |
| 158 | + | |
| 159 | + // end altered code | |
| 160 | + | |
| 161 | + if (warpPoints) { | |
| 162 | + QList<QPointF> points = src.file.points(); | |
| 163 | + QList<QPointF> rotatedPoints; | |
| 164 | + for (int i=0; i<points.size(); i++) { | |
| 165 | + rotatedPoints.append(QPointF(points.at(i).x()*affineTransform.at<double>(0,0)+ | |
| 166 | + points.at(i).y()*affineTransform.at<double>(0,1)+ | |
| 167 | + affineTransform.at<double>(0,2), | |
| 168 | + points.at(i).x()*affineTransform.at<double>(1,0)+ | |
| 169 | + points.at(i).y()*affineTransform.at<double>(1,1)+ | |
| 170 | + affineTransform.at<double>(1,2))); | |
| 171 | + } | |
| 172 | + | |
| 173 | + dst.file.setPoints(rotatedPoints); | |
| 174 | + } | |
| 175 | + | |
| 176 | + if (storeAffine) { | |
| 177 | + QList<float> affineParams; | |
| 178 | + for (int i = 0 ; i < 2; i++) | |
| 179 | + for (int j = 0; j < 3; j++) | |
| 180 | + affineParams.append(affineTransform.at<double>(i, j)); | |
| 181 | + dst.file.setList("affineParameters", affineParams); | |
| 182 | + } | |
| 183 | + | |
| 184 | + dst = dstMat; | |
| 185 | + } | |
| 186 | +}; | |
| 187 | + | |
| 188 | +BR_REGISTER(Transform, CUDAAffineTransform) | |
| 189 | + | |
| 190 | +} // namespace br | |
| 191 | + | |
| 192 | +#include "cuda/cudaaffine.moc" | ... | ... |
openbr/plugins/cuda/cudaaffine.cu
0 → 100644
| 1 | +#include <iostream> | |
| 2 | +using namespace std; | |
| 3 | + | |
| 4 | +#include <opencv2/gpu/gpu.hpp> | |
| 5 | +#include <opencv2/opencv.hpp> | |
| 6 | +#include <stdio.h> | |
| 7 | +#include <math.h> | |
| 8 | + | |
| 9 | +#include "cudadefines.hpp" | |
| 10 | + | |
| 11 | +using namespace cv; | |
| 12 | +using namespace cv::gpu; | |
| 13 | + | |
| 14 | +namespace br { namespace cuda { | |
| 15 | + | |
| 16 | + __device__ __forceinline__ uint8_t cudaaffine_kernel_get_pixel_value(int row, int col, uint8_t* srcPtr, int rows, int cols) { | |
| 17 | + // don't do anything if the index is out of bounds | |
| 18 | + if (row < 1 || row >= rows-1 || col < 1 || col >= cols-1) { | |
| 19 | + if (row >= rows || col >= cols) { | |
| 20 | + return 0; | |
| 21 | + } else{ | |
| 22 | + return 0; } | |
| 23 | + } | |
| 24 | + return (srcPtr + row*cols)[col]; | |
| 25 | + } | |
| 26 | + | |
| 27 | + /* | |
| 28 | + * trans_inv - A pointer to a one-dimensional representation of the inverse of the transform matrix 3x3 | |
| 29 | + * dst_row - The destination row (mapping to this row) | |
| 30 | + * dst_col - The destination column (mapping to this column) | |
| 31 | + * src_row - The computed source pixel row (mapping from this row) | |
| 32 | + * src_col - The computed source pixel column (mapping from this col) | |
| 33 | + */ | |
| 34 | + __device__ __forceinline__ void cudaaffine_kernel_get_src_coord(double *trans_inv, int dst_row, int dst_col, int* src_row, int* src_col){ | |
| 35 | + *src_col = round(dst_col * trans_inv[0] + dst_row * trans_inv[3] + trans_inv[6]); | |
| 36 | + *src_row = round(dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]); | |
| 37 | + | |
| 38 | + //printf("Dst: [%d, %d, 1] = [%d, %d, 1] \n[ %0.4f, %0.4f, %0.4f] \n[ %0.4f, %0.4f, %0.4f ]\n[ %0.4f, %0.4f, %0.4f ]\n\n", *src_col, *src_row, dst_col, dst_row, trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]); | |
| 39 | + | |
| 40 | + } | |
| 41 | + | |
| 42 | + | |
| 43 | + __global__ void cudaaffine_kernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){ | |
| 44 | + int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y; | |
| 45 | + int dstColInd = blockIdx.x*blockDim.x+threadIdx.x; | |
| 46 | + int dstIndex = dstRowInd*dst_cols + dstColInd; | |
| 47 | + | |
| 48 | + //printf("Kernel Inv:\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n\n", trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]); | |
| 49 | + | |
| 50 | + int srcRowInd; | |
| 51 | + int srcColInd; | |
| 52 | + | |
| 53 | + // don't do anything if the index is out of bounds | |
| 54 | + if (dstRowInd < 1 || dstRowInd >= dst_rows-1 || dstColInd < 1 || dstColInd >= dst_cols-1) { | |
| 55 | + if (dstRowInd >= dst_rows || dstColInd >= dst_cols) { | |
| 56 | + return; | |
| 57 | + } else{ | |
| 58 | + dstPtr[dstIndex] = 0; | |
| 59 | + return; | |
| 60 | + } | |
| 61 | + } | |
| 62 | + | |
| 63 | + cudaaffine_kernel_get_src_coord(trans_inv, dstRowInd, dstColInd, &srcRowInd, &srcColInd); | |
| 64 | + const uint8_t cval = cudaaffine_kernel_get_pixel_value(srcRowInd, srcColInd, srcPtr, src_rows, src_cols); // Get initial pixel value | |
| 65 | + | |
| 66 | + dstPtr[dstIndex] = cval; | |
| 67 | + } | |
| 68 | + | |
| 69 | + void cudaaffine_wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols) { | |
| 70 | + cudaError_t err; | |
| 71 | + double* gpuInverse; | |
| 72 | + | |
| 73 | + dim3 threadsPerBlock(8, 8); | |
| 74 | + dim3 numBlocks(dst_cols/threadsPerBlock.x + 1, | |
| 75 | + dst_rows/threadsPerBlock.y + 1); | |
| 76 | + | |
| 77 | + //************************************************************************ | |
| 78 | + // Input affine is a 2x3 Mat whose transpose is used in the computations | |
| 79 | + // [x, y, 1] = [u, v, 1] [ a^T | [0 0 1]^T ] | |
| 80 | + // See "Digital Image Warping" by George Wolburg (p. 50) | |
| 81 | + //************************************************************************ | |
| 82 | + | |
| 83 | + // get new transform elements | |
| 84 | + double a11 = affineTransform.at<double>(0, 0); | |
| 85 | + double a12 = affineTransform.at<double>(1, 0); | |
| 86 | + double a21 = affineTransform.at<double>(0, 1); | |
| 87 | + double a22 = affineTransform.at<double>(1, 1); | |
| 88 | + double a31 = affineTransform.at<double>(0, 2); | |
| 89 | + double a32 = affineTransform.at<double>(1, 2); | |
| 90 | + // double a23 = 0; | |
| 91 | + // double a13 = 0; | |
| 92 | + // double a33 = 1; | |
| 93 | + | |
| 94 | + // compute transform inverse | |
| 95 | + double det = 1 / (a11*a22 - a21*a12); | |
| 96 | + | |
| 97 | + double affineInverse[9]; | |
| 98 | + affineInverse[0] = a22 * det; | |
| 99 | + affineInverse[1] = -a12 * det; | |
| 100 | + affineInverse[2] = 0; | |
| 101 | + affineInverse[3] = -a21 * det; | |
| 102 | + affineInverse[4] = a11 * det; | |
| 103 | + affineInverse[5] = 0; | |
| 104 | + affineInverse[6] = (a21*a32 - a31*a22) * det; | |
| 105 | + affineInverse[7] = (a31*a12 - a11*a32) * det; | |
| 106 | + affineInverse[8] = (a11*a22 - a21*a12) * det; | |
| 107 | + | |
| 108 | + // Move from affineTransform to gpuAffine (currently fake) | |
| 109 | + // double fakeAffine[6]; | |
| 110 | + // fakeAffine[0] = affineTransform.at<double>(0, 0); | |
| 111 | + // fakeAffine[1] = affineTransform.at<double>(0, 1); | |
| 112 | + // fakeAffine[2] = affineTransform.at<double>(0, 2); | |
| 113 | + // fakeAffine[3] = affineTransform.at<double>(1, 0); | |
| 114 | + // fakeAffine[4] = affineTransform.at<double>(1, 1); | |
| 115 | + // fakeAffine[5] = affineTransform.at<double>(1, 2); | |
| 116 | + | |
| 117 | + // printf("\n"); | |
| 118 | + // printf("%f\t%f\t%f\n", a11, a12, 0.0); | |
| 119 | + // printf("%f\t%f\t%f\n", a21, a22, 0.0); | |
| 120 | + // printf("%f\t%f\t%f\n", a31, a32, 1.0); | |
| 121 | + // printf("\n"); | |
| 122 | + | |
| 123 | + // printf("Affine Inverse:\n"); | |
| 124 | + // for(int i = 0; i < 3; i++){ | |
| 125 | + // for(int j = 0; j < 3; j++){ | |
| 126 | + // printf("%f\t", affineInverse[3*i + j]); | |
| 127 | + // } | |
| 128 | + // printf("\n"); | |
| 129 | + // } | |
| 130 | + | |
| 131 | + | |
| 132 | + CUDA_SAFE_MALLOC(dstPtr, dst_rows*dst_cols*sizeof(uint8_t), &err); | |
| 133 | + CUDA_SAFE_MALLOC(&gpuInverse, 3*3*sizeof(double), &err); | |
| 134 | + | |
| 135 | + CUDA_SAFE_MEMCPY(gpuInverse, affineInverse, 9*sizeof(double), cudaMemcpyHostToDevice, &err); | |
| 136 | + | |
| 137 | + cudaaffine_kernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*)(*dstPtr), gpuInverse, src_rows, src_cols, dst_rows, dst_cols); | |
| 138 | + CUDA_KERNEL_ERR_CHK(&err); | |
| 139 | + | |
| 140 | + CUDA_SAFE_FREE(srcPtr, &err); | |
| 141 | + CUDA_SAFE_FREE(gpuInverse, &err); | |
| 142 | + | |
| 143 | + // printf("\n\n"); | |
| 144 | + // for(int i = 0; i < cols; i++){ | |
| 145 | + // for(int j = 0; j < src_rows; j++){ | |
| 146 | + // printf("%4d\t", ((uint8_t*) dstPtr)[j*cols + i]); | |
| 147 | + // } | |
| 148 | + // printf("\n"); | |
| 149 | + // } | |
| 150 | + // printf("\n"); | |
| 151 | + } | |
| 152 | +} // end cuda | |
| 153 | +} // end br | ... | ... |