Commit e534ff9c263a2e69048f865230a7b7122e02535e
1 parent
93a9cf84
Added preliminary functional cuda affine
Showing
2 changed files
with
345 additions
and
0 deletions
openbr/plugins/cuda/cudaaffine.cpp
0 → 100644
| 1 | +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | ||
| 2 | + * Copyright 2012 The MITRE Corporation * | ||
| 3 | + * * | ||
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); * | ||
| 5 | + * you may not use this file except in compliance with the License. * | ||
| 6 | + * You may obtain a copy of the License at * | ||
| 7 | + * * | ||
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 * | ||
| 9 | + * * | ||
| 10 | + * Unless required by applicable law or agreed to in writing, software * | ||
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, * | ||
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * | ||
| 13 | + * See the License for the specific language governing permissions and * | ||
| 14 | + * limitations under the License. * | ||
| 15 | + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ | ||
| 16 | + | ||
| 17 | + | ||
| 18 | +#include <iostream> | ||
| 19 | +using namespace std; | ||
| 20 | + | ||
| 21 | +#include <sys/types.h> | ||
| 22 | +#include <unistd.h> | ||
| 23 | + | ||
| 24 | +#include <pthread.h> | ||
| 25 | + | ||
| 26 | +#include <opencv2/opencv.hpp> | ||
| 27 | +#include <opencv2/imgproc/imgproc.hpp> | ||
| 28 | +#include <opencv2/imgproc/imgproc_c.h> | ||
| 29 | +#include <opencv2/highgui/highgui.hpp> | ||
| 30 | +#include <opencv2/highgui/highgui_c.h> | ||
| 31 | +#include <limits> | ||
| 32 | + | ||
| 33 | +#include <openbr/plugins/openbr_internal.h> | ||
| 34 | +#include <openbr/core/opencvutils.h> | ||
| 35 | + | ||
| 36 | +#include "MatManager.hpp" | ||
| 37 | + | ||
| 38 | +using namespace cv; | ||
| 39 | + | ||
| 40 | +namespace br { namespace cuda { | ||
| 41 | + void cudaaffine_wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols); | ||
| 42 | +}} | ||
| 43 | + | ||
| 44 | +namespace br | ||
| 45 | +{ | ||
| 46 | + | ||
| 47 | +/*! | ||
| 48 | + * \ingroup transforms | ||
| 49 | + * \brief Performs a two or three point registration. | ||
| 50 | + * \author Josh Klontz \cite jklontz | ||
| 51 | + * \note Method: Area should be used for shrinking an image, Cubic for slow but accurate enlargment, Bilin for fast enlargement. | ||
| 52 | + */ | ||
| 53 | +class CUDAAffineTransform : public UntrainableTransform | ||
| 54 | +{ | ||
| 55 | + Q_OBJECT | ||
| 56 | + Q_ENUMS(Method) | ||
| 57 | + | ||
| 58 | +public: | ||
| 59 | + /*!< */ | ||
| 60 | + enum Method { Near = INTER_NEAREST, | ||
| 61 | + Area = INTER_AREA, | ||
| 62 | + Bilin = INTER_LINEAR, | ||
| 63 | + Cubic = INTER_CUBIC, | ||
| 64 | + Lanczo = INTER_LANCZOS4}; | ||
| 65 | + | ||
| 66 | +private: | ||
| 67 | + Q_PROPERTY(int width READ get_width WRITE set_width RESET reset_width STORED false) | ||
| 68 | + Q_PROPERTY(int height READ get_height WRITE set_height RESET reset_height STORED false) | ||
| 69 | + Q_PROPERTY(float x1 READ get_x1 WRITE set_x1 RESET reset_x1 STORED false) | ||
| 70 | + Q_PROPERTY(float y1 READ get_y1 WRITE set_y1 RESET reset_y1 STORED false) | ||
| 71 | + Q_PROPERTY(float x2 READ get_x2 WRITE set_x2 RESET reset_x2 STORED false) | ||
| 72 | + Q_PROPERTY(float y2 READ get_y2 WRITE set_y2 RESET reset_y2 STORED false) | ||
| 73 | + Q_PROPERTY(float x3 READ get_x3 WRITE set_x3 RESET reset_x3 STORED false) | ||
| 74 | + Q_PROPERTY(float y3 READ get_y3 WRITE set_y3 RESET reset_y3 STORED false) | ||
| 75 | + Q_PROPERTY(Method method READ get_method WRITE set_method RESET reset_method STORED false) | ||
| 76 | + Q_PROPERTY(bool storeAffine READ get_storeAffine WRITE set_storeAffine RESET reset_storeAffine STORED false) | ||
| 77 | + Q_PROPERTY(bool warpPoints READ get_warpPoints WRITE set_warpPoints RESET reset_warpPoints STORED false) | ||
| 78 | + BR_PROPERTY(int, width, 64) | ||
| 79 | + BR_PROPERTY(int, height, 64) | ||
| 80 | + BR_PROPERTY(float, x1, 0) | ||
| 81 | + BR_PROPERTY(float, y1, 0) | ||
| 82 | + BR_PROPERTY(float, x2, -1) | ||
| 83 | + BR_PROPERTY(float, y2, -1) | ||
| 84 | + BR_PROPERTY(float, x3, -1) | ||
| 85 | + BR_PROPERTY(float, y3, -1) | ||
| 86 | + BR_PROPERTY(Method, method, Bilin) | ||
| 87 | + BR_PROPERTY(bool, storeAffine, false) | ||
| 88 | + BR_PROPERTY(bool, warpPoints, false) | ||
| 89 | + | ||
| 90 | + static Point2f getThirdAffinePoint(const Point2f &a, const Point2f &b) | ||
| 91 | + { | ||
| 92 | + float dx = b.x - a.x; | ||
| 93 | + float dy = b.y - a.y; | ||
| 94 | + return Point2f(a.x - dy, a.y + dx); | ||
| 95 | + } | ||
| 96 | + | ||
| 97 | + void project(const Template &src, Template &dst) const | ||
| 98 | + { | ||
| 99 | + const bool twoPoints = ((x3 == -1) || (y3 == -1)); | ||
| 100 | + | ||
| 101 | + Point2f dstPoints[3]; | ||
| 102 | + dstPoints[0] = Point2f(x1*width, y1*height); | ||
| 103 | + dstPoints[1] = Point2f((x2 == -1 ? 1 - x1 : x2)*width, (y2 == -1 ? y1 : y2)*height); | ||
| 104 | + if (twoPoints) dstPoints[2] = getThirdAffinePoint(dstPoints[0], dstPoints[1]); | ||
| 105 | + else dstPoints[2] = Point2f(x3*width, y3*height); | ||
| 106 | + | ||
| 107 | + Point2f srcPoints[3]; | ||
| 108 | + if (src.file.contains("Affine_0") && | ||
| 109 | + src.file.contains("Affine_1") && | ||
| 110 | + (src.file.contains("Affine_2") || twoPoints)) { | ||
| 111 | + srcPoints[0] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_0")); | ||
| 112 | + srcPoints[1] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_1")); | ||
| 113 | + if (!twoPoints) srcPoints[2] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_2")); | ||
| 114 | + } else { | ||
| 115 | + const QList<Point2f> landmarks = OpenCVUtils::toPoints(src.file.points()); | ||
| 116 | + | ||
| 117 | + if ((landmarks.size() < 2) || (!twoPoints && (landmarks.size() < 3))) { | ||
| 118 | + resize(src, dst, Size(width, height)); | ||
| 119 | + return; | ||
| 120 | + } else { | ||
| 121 | + srcPoints[0] = landmarks[0]; | ||
| 122 | + srcPoints[1] = landmarks[1]; | ||
| 123 | + if (!twoPoints) srcPoints[2] = landmarks[2]; | ||
| 124 | + } | ||
| 125 | + } | ||
| 126 | + if (twoPoints) srcPoints[2] = getThirdAffinePoint(srcPoints[0], srcPoints[1]); | ||
| 127 | + | ||
| 128 | + // Code section being altered (original) | ||
| 129 | + // | ||
| 130 | + // Mat affineTransform = getAffineTransform(srcPoints, dstPoints); | ||
| 131 | + // warpAffine(src, dst, affineTransform, Size(width, height), method); | ||
| 132 | + // | ||
| 133 | + // end original | ||
| 134 | + | ||
| 135 | + Mat affineTransform = getAffineTransform(srcPoints, dstPoints); | ||
| 136 | + | ||
| 137 | + void* const* srcDataPtr = src.m().ptr<void*>(); | ||
| 138 | + int rows = *((int*)srcDataPtr[1]); | ||
| 139 | + int cols = *((int*)srcDataPtr[2]); | ||
| 140 | + int type = *((int*)srcDataPtr[3]); | ||
| 141 | + | ||
| 142 | + Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type()); | ||
| 143 | + void** dstDataPtr = dstMat.ptr<void*>(); | ||
| 144 | + | ||
| 145 | + dstDataPtr[1] = srcDataPtr[1]; *((int*)dstDataPtr[1]) = height; // rows | ||
| 146 | + dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = width; // cols | ||
| 147 | + dstDataPtr[3] = srcDataPtr[3]; | ||
| 148 | + | ||
| 149 | + // Print the transform | ||
| 150 | + //for(int x = 0; x < affineTransform.rows; x++){ | ||
| 151 | + //for(int y = 0; y < affineTransform.cols; y++){ | ||
| 152 | + //printf("%8.3f\t", affineTransform.at<double>(x, y)); | ||
| 153 | + //} | ||
| 154 | + //printf("\n"); | ||
| 155 | + //} | ||
| 156 | + | ||
| 157 | + br::cuda::cudaaffine_wrapper(srcDataPtr[0], &dstDataPtr[0], affineTransform, rows, cols, height, width); | ||
| 158 | + | ||
| 159 | + // end altered code | ||
| 160 | + | ||
| 161 | + if (warpPoints) { | ||
| 162 | + QList<QPointF> points = src.file.points(); | ||
| 163 | + QList<QPointF> rotatedPoints; | ||
| 164 | + for (int i=0; i<points.size(); i++) { | ||
| 165 | + rotatedPoints.append(QPointF(points.at(i).x()*affineTransform.at<double>(0,0)+ | ||
| 166 | + points.at(i).y()*affineTransform.at<double>(0,1)+ | ||
| 167 | + affineTransform.at<double>(0,2), | ||
| 168 | + points.at(i).x()*affineTransform.at<double>(1,0)+ | ||
| 169 | + points.at(i).y()*affineTransform.at<double>(1,1)+ | ||
| 170 | + affineTransform.at<double>(1,2))); | ||
| 171 | + } | ||
| 172 | + | ||
| 173 | + dst.file.setPoints(rotatedPoints); | ||
| 174 | + } | ||
| 175 | + | ||
| 176 | + if (storeAffine) { | ||
| 177 | + QList<float> affineParams; | ||
| 178 | + for (int i = 0 ; i < 2; i++) | ||
| 179 | + for (int j = 0; j < 3; j++) | ||
| 180 | + affineParams.append(affineTransform.at<double>(i, j)); | ||
| 181 | + dst.file.setList("affineParameters", affineParams); | ||
| 182 | + } | ||
| 183 | + | ||
| 184 | + dst = dstMat; | ||
| 185 | + } | ||
| 186 | +}; | ||
| 187 | + | ||
| 188 | +BR_REGISTER(Transform, CUDAAffineTransform) | ||
| 189 | + | ||
| 190 | +} // namespace br | ||
| 191 | + | ||
| 192 | +#include "cuda/cudaaffine.moc" |
openbr/plugins/cuda/cudaaffine.cu
0 → 100644
| 1 | +#include <iostream> | ||
| 2 | +using namespace std; | ||
| 3 | + | ||
| 4 | +#include <opencv2/gpu/gpu.hpp> | ||
| 5 | +#include <opencv2/opencv.hpp> | ||
| 6 | +#include <stdio.h> | ||
| 7 | +#include <math.h> | ||
| 8 | + | ||
| 9 | +#include "cudadefines.hpp" | ||
| 10 | + | ||
| 11 | +using namespace cv; | ||
| 12 | +using namespace cv::gpu; | ||
| 13 | + | ||
| 14 | +namespace br { namespace cuda { | ||
| 15 | + | ||
| 16 | + __device__ __forceinline__ uint8_t cudaaffine_kernel_get_pixel_value(int row, int col, uint8_t* srcPtr, int rows, int cols) { | ||
| 17 | + // don't do anything if the index is out of bounds | ||
| 18 | + if (row < 1 || row >= rows-1 || col < 1 || col >= cols-1) { | ||
| 19 | + if (row >= rows || col >= cols) { | ||
| 20 | + return 0; | ||
| 21 | + } else{ | ||
| 22 | + return 0; } | ||
| 23 | + } | ||
| 24 | + return (srcPtr + row*cols)[col]; | ||
| 25 | + } | ||
| 26 | + | ||
| 27 | + /* | ||
| 28 | + * trans_inv - A pointer to a one-dimensional representation of the inverse of the transform matrix 3x3 | ||
| 29 | + * dst_row - The destination row (mapping to this row) | ||
| 30 | + * dst_col - The destination column (mapping to this column) | ||
| 31 | + * src_row - The computed source pixel row (mapping from this row) | ||
| 32 | + * src_col - The computed source pixel column (mapping from this col) | ||
| 33 | + */ | ||
| 34 | + __device__ __forceinline__ void cudaaffine_kernel_get_src_coord(double *trans_inv, int dst_row, int dst_col, int* src_row, int* src_col){ | ||
| 35 | + *src_col = round(dst_col * trans_inv[0] + dst_row * trans_inv[3] + trans_inv[6]); | ||
| 36 | + *src_row = round(dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]); | ||
| 37 | + | ||
| 38 | + //printf("Dst: [%d, %d, 1] = [%d, %d, 1] \n[ %0.4f, %0.4f, %0.4f] \n[ %0.4f, %0.4f, %0.4f ]\n[ %0.4f, %0.4f, %0.4f ]\n\n", *src_col, *src_row, dst_col, dst_row, trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]); | ||
| 39 | + | ||
| 40 | + } | ||
| 41 | + | ||
| 42 | + | ||
| 43 | + __global__ void cudaaffine_kernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){ | ||
| 44 | + int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y; | ||
| 45 | + int dstColInd = blockIdx.x*blockDim.x+threadIdx.x; | ||
| 46 | + int dstIndex = dstRowInd*dst_cols + dstColInd; | ||
| 47 | + | ||
| 48 | + //printf("Kernel Inv:\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n\n", trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]); | ||
| 49 | + | ||
| 50 | + int srcRowInd; | ||
| 51 | + int srcColInd; | ||
| 52 | + | ||
| 53 | + // don't do anything if the index is out of bounds | ||
| 54 | + if (dstRowInd < 1 || dstRowInd >= dst_rows-1 || dstColInd < 1 || dstColInd >= dst_cols-1) { | ||
| 55 | + if (dstRowInd >= dst_rows || dstColInd >= dst_cols) { | ||
| 56 | + return; | ||
| 57 | + } else{ | ||
| 58 | + dstPtr[dstIndex] = 0; | ||
| 59 | + return; | ||
| 60 | + } | ||
| 61 | + } | ||
| 62 | + | ||
| 63 | + cudaaffine_kernel_get_src_coord(trans_inv, dstRowInd, dstColInd, &srcRowInd, &srcColInd); | ||
| 64 | + const uint8_t cval = cudaaffine_kernel_get_pixel_value(srcRowInd, srcColInd, srcPtr, src_rows, src_cols); // Get initial pixel value | ||
| 65 | + | ||
| 66 | + dstPtr[dstIndex] = cval; | ||
| 67 | + } | ||
| 68 | + | ||
| 69 | + void cudaaffine_wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols) { | ||
| 70 | + cudaError_t err; | ||
| 71 | + double* gpuInverse; | ||
| 72 | + | ||
| 73 | + dim3 threadsPerBlock(8, 8); | ||
| 74 | + dim3 numBlocks(dst_cols/threadsPerBlock.x + 1, | ||
| 75 | + dst_rows/threadsPerBlock.y + 1); | ||
| 76 | + | ||
| 77 | + //************************************************************************ | ||
| 78 | + // Input affine is a 2x3 Mat whose transpose is used in the computations | ||
| 79 | + // [x, y, 1] = [u, v, 1] [ a^T | [0 0 1]^T ] | ||
| 80 | + // See "Digital Image Warping" by George Wolburg (p. 50) | ||
| 81 | + //************************************************************************ | ||
| 82 | + | ||
| 83 | + // get new transform elements | ||
| 84 | + double a11 = affineTransform.at<double>(0, 0); | ||
| 85 | + double a12 = affineTransform.at<double>(1, 0); | ||
| 86 | + double a21 = affineTransform.at<double>(0, 1); | ||
| 87 | + double a22 = affineTransform.at<double>(1, 1); | ||
| 88 | + double a31 = affineTransform.at<double>(0, 2); | ||
| 89 | + double a32 = affineTransform.at<double>(1, 2); | ||
| 90 | + // double a23 = 0; | ||
| 91 | + // double a13 = 0; | ||
| 92 | + // double a33 = 1; | ||
| 93 | + | ||
| 94 | + // compute transform inverse | ||
| 95 | + double det = 1 / (a11*a22 - a21*a12); | ||
| 96 | + | ||
| 97 | + double affineInverse[9]; | ||
| 98 | + affineInverse[0] = a22 * det; | ||
| 99 | + affineInverse[1] = -a12 * det; | ||
| 100 | + affineInverse[2] = 0; | ||
| 101 | + affineInverse[3] = -a21 * det; | ||
| 102 | + affineInverse[4] = a11 * det; | ||
| 103 | + affineInverse[5] = 0; | ||
| 104 | + affineInverse[6] = (a21*a32 - a31*a22) * det; | ||
| 105 | + affineInverse[7] = (a31*a12 - a11*a32) * det; | ||
| 106 | + affineInverse[8] = (a11*a22 - a21*a12) * det; | ||
| 107 | + | ||
| 108 | + // Move from affineTransform to gpuAffine (currently fake) | ||
| 109 | + // double fakeAffine[6]; | ||
| 110 | + // fakeAffine[0] = affineTransform.at<double>(0, 0); | ||
| 111 | + // fakeAffine[1] = affineTransform.at<double>(0, 1); | ||
| 112 | + // fakeAffine[2] = affineTransform.at<double>(0, 2); | ||
| 113 | + // fakeAffine[3] = affineTransform.at<double>(1, 0); | ||
| 114 | + // fakeAffine[4] = affineTransform.at<double>(1, 1); | ||
| 115 | + // fakeAffine[5] = affineTransform.at<double>(1, 2); | ||
| 116 | + | ||
| 117 | + // printf("\n"); | ||
| 118 | + // printf("%f\t%f\t%f\n", a11, a12, 0.0); | ||
| 119 | + // printf("%f\t%f\t%f\n", a21, a22, 0.0); | ||
| 120 | + // printf("%f\t%f\t%f\n", a31, a32, 1.0); | ||
| 121 | + // printf("\n"); | ||
| 122 | + | ||
| 123 | + // printf("Affine Inverse:\n"); | ||
| 124 | + // for(int i = 0; i < 3; i++){ | ||
| 125 | + // for(int j = 0; j < 3; j++){ | ||
| 126 | + // printf("%f\t", affineInverse[3*i + j]); | ||
| 127 | + // } | ||
| 128 | + // printf("\n"); | ||
| 129 | + // } | ||
| 130 | + | ||
| 131 | + | ||
| 132 | + CUDA_SAFE_MALLOC(dstPtr, dst_rows*dst_cols*sizeof(uint8_t), &err); | ||
| 133 | + CUDA_SAFE_MALLOC(&gpuInverse, 3*3*sizeof(double), &err); | ||
| 134 | + | ||
| 135 | + CUDA_SAFE_MEMCPY(gpuInverse, affineInverse, 9*sizeof(double), cudaMemcpyHostToDevice, &err); | ||
| 136 | + | ||
| 137 | + cudaaffine_kernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*)(*dstPtr), gpuInverse, src_rows, src_cols, dst_rows, dst_cols); | ||
| 138 | + CUDA_KERNEL_ERR_CHK(&err); | ||
| 139 | + | ||
| 140 | + CUDA_SAFE_FREE(srcPtr, &err); | ||
| 141 | + CUDA_SAFE_FREE(gpuInverse, &err); | ||
| 142 | + | ||
| 143 | + // printf("\n\n"); | ||
| 144 | + // for(int i = 0; i < cols; i++){ | ||
| 145 | + // for(int j = 0; j < src_rows; j++){ | ||
| 146 | + // printf("%4d\t", ((uint8_t*) dstPtr)[j*cols + i]); | ||
| 147 | + // } | ||
| 148 | + // printf("\n"); | ||
| 149 | + // } | ||
| 150 | + // printf("\n"); | ||
| 151 | + } | ||
| 152 | +} // end cuda | ||
| 153 | +} // end br |