From e534ff9c263a2e69048f865230a7b7122e02535e Mon Sep 17 00:00:00 2001 From: Greg Schrock Date: Mon, 29 Feb 2016 16:01:16 -0500 Subject: [PATCH] Added preliminary functional cuda affine --- openbr/plugins/cuda/cudaaffine.cpp | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ openbr/plugins/cuda/cudaaffine.cu | 153 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 345 insertions(+), 0 deletions(-) create mode 100644 openbr/plugins/cuda/cudaaffine.cpp create mode 100644 openbr/plugins/cuda/cudaaffine.cu diff --git a/openbr/plugins/cuda/cudaaffine.cpp b/openbr/plugins/cuda/cudaaffine.cpp new file mode 100644 index 0000000..46dd71f --- /dev/null +++ b/openbr/plugins/cuda/cudaaffine.cpp @@ -0,0 +1,192 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright 2012 The MITRE Corporation * + * * + * Licensed under the Apache License, Version 2.0 (the "License"); * + * you may not use this file except in compliance with the License. * + * You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, software * + * distributed under the License is distributed on an "AS IS" BASIS, * + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * + * See the License for the specific language governing permissions and * + * limitations under the License. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + + +#include +using namespace std; + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "MatManager.hpp" + +using namespace cv; + +namespace br { namespace cuda { + void cudaaffine_wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols); +}} + +namespace br +{ + +/*! + * \ingroup transforms + * \brief Performs a two or three point registration. + * \author Josh Klontz \cite jklontz + * \note Method: Area should be used for shrinking an image, Cubic for slow but accurate enlargment, Bilin for fast enlargement. + */ +class CUDAAffineTransform : public UntrainableTransform +{ + Q_OBJECT + Q_ENUMS(Method) + +public: + /*!< */ + enum Method { Near = INTER_NEAREST, + Area = INTER_AREA, + Bilin = INTER_LINEAR, + Cubic = INTER_CUBIC, + Lanczo = INTER_LANCZOS4}; + +private: + Q_PROPERTY(int width READ get_width WRITE set_width RESET reset_width STORED false) + Q_PROPERTY(int height READ get_height WRITE set_height RESET reset_height STORED false) + Q_PROPERTY(float x1 READ get_x1 WRITE set_x1 RESET reset_x1 STORED false) + Q_PROPERTY(float y1 READ get_y1 WRITE set_y1 RESET reset_y1 STORED false) + Q_PROPERTY(float x2 READ get_x2 WRITE set_x2 RESET reset_x2 STORED false) + Q_PROPERTY(float y2 READ get_y2 WRITE set_y2 RESET reset_y2 STORED false) + Q_PROPERTY(float x3 READ get_x3 WRITE set_x3 RESET reset_x3 STORED false) + Q_PROPERTY(float y3 READ get_y3 WRITE set_y3 RESET reset_y3 STORED false) + Q_PROPERTY(Method method READ get_method WRITE set_method RESET reset_method STORED false) + Q_PROPERTY(bool storeAffine READ get_storeAffine WRITE set_storeAffine RESET reset_storeAffine STORED false) + Q_PROPERTY(bool warpPoints READ get_warpPoints WRITE set_warpPoints RESET reset_warpPoints STORED false) + BR_PROPERTY(int, width, 64) + BR_PROPERTY(int, height, 64) + BR_PROPERTY(float, x1, 0) + BR_PROPERTY(float, y1, 0) + BR_PROPERTY(float, x2, -1) + BR_PROPERTY(float, y2, -1) + BR_PROPERTY(float, x3, -1) + BR_PROPERTY(float, y3, -1) + BR_PROPERTY(Method, method, Bilin) + BR_PROPERTY(bool, storeAffine, false) + BR_PROPERTY(bool, warpPoints, false) + + static Point2f getThirdAffinePoint(const Point2f &a, const Point2f &b) + { + float dx = b.x - a.x; + float dy = b.y - a.y; + return Point2f(a.x - dy, a.y + dx); + } + + void project(const Template &src, Template &dst) const + { + const bool twoPoints = ((x3 == -1) || (y3 == -1)); + + Point2f dstPoints[3]; + dstPoints[0] = Point2f(x1*width, y1*height); + dstPoints[1] = Point2f((x2 == -1 ? 1 - x1 : x2)*width, (y2 == -1 ? y1 : y2)*height); + if (twoPoints) dstPoints[2] = getThirdAffinePoint(dstPoints[0], dstPoints[1]); + else dstPoints[2] = Point2f(x3*width, y3*height); + + Point2f srcPoints[3]; + if (src.file.contains("Affine_0") && + src.file.contains("Affine_1") && + (src.file.contains("Affine_2") || twoPoints)) { + srcPoints[0] = OpenCVUtils::toPoint(src.file.get("Affine_0")); + srcPoints[1] = OpenCVUtils::toPoint(src.file.get("Affine_1")); + if (!twoPoints) srcPoints[2] = OpenCVUtils::toPoint(src.file.get("Affine_2")); + } else { + const QList landmarks = OpenCVUtils::toPoints(src.file.points()); + + if ((landmarks.size() < 2) || (!twoPoints && (landmarks.size() < 3))) { + resize(src, dst, Size(width, height)); + return; + } else { + srcPoints[0] = landmarks[0]; + srcPoints[1] = landmarks[1]; + if (!twoPoints) srcPoints[2] = landmarks[2]; + } + } + if (twoPoints) srcPoints[2] = getThirdAffinePoint(srcPoints[0], srcPoints[1]); + + // Code section being altered (original) + // + // Mat affineTransform = getAffineTransform(srcPoints, dstPoints); + // warpAffine(src, dst, affineTransform, Size(width, height), method); + // + // end original + + Mat affineTransform = getAffineTransform(srcPoints, dstPoints); + + void* const* srcDataPtr = src.m().ptr(); + int rows = *((int*)srcDataPtr[1]); + int cols = *((int*)srcDataPtr[2]); + int type = *((int*)srcDataPtr[3]); + + Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type()); + void** dstDataPtr = dstMat.ptr(); + + dstDataPtr[1] = srcDataPtr[1]; *((int*)dstDataPtr[1]) = height; // rows + dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = width; // cols + dstDataPtr[3] = srcDataPtr[3]; + + // Print the transform + //for(int x = 0; x < affineTransform.rows; x++){ + //for(int y = 0; y < affineTransform.cols; y++){ + //printf("%8.3f\t", affineTransform.at(x, y)); + //} + //printf("\n"); + //} + + br::cuda::cudaaffine_wrapper(srcDataPtr[0], &dstDataPtr[0], affineTransform, rows, cols, height, width); + + // end altered code + + if (warpPoints) { + QList points = src.file.points(); + QList rotatedPoints; + for (int i=0; i(0,0)+ + points.at(i).y()*affineTransform.at(0,1)+ + affineTransform.at(0,2), + points.at(i).x()*affineTransform.at(1,0)+ + points.at(i).y()*affineTransform.at(1,1)+ + affineTransform.at(1,2))); + } + + dst.file.setPoints(rotatedPoints); + } + + if (storeAffine) { + QList affineParams; + for (int i = 0 ; i < 2; i++) + for (int j = 0; j < 3; j++) + affineParams.append(affineTransform.at(i, j)); + dst.file.setList("affineParameters", affineParams); + } + + dst = dstMat; + } +}; + +BR_REGISTER(Transform, CUDAAffineTransform) + +} // namespace br + +#include "cuda/cudaaffine.moc" diff --git a/openbr/plugins/cuda/cudaaffine.cu b/openbr/plugins/cuda/cudaaffine.cu new file mode 100644 index 0000000..11f81d2 --- /dev/null +++ b/openbr/plugins/cuda/cudaaffine.cu @@ -0,0 +1,153 @@ +#include +using namespace std; + +#include +#include +#include +#include + +#include "cudadefines.hpp" + +using namespace cv; +using namespace cv::gpu; + +namespace br { namespace cuda { + + __device__ __forceinline__ uint8_t cudaaffine_kernel_get_pixel_value(int row, int col, uint8_t* srcPtr, int rows, int cols) { + // don't do anything if the index is out of bounds + if (row < 1 || row >= rows-1 || col < 1 || col >= cols-1) { + if (row >= rows || col >= cols) { + return 0; + } else{ + return 0; } + } + return (srcPtr + row*cols)[col]; + } + + /* + * trans_inv - A pointer to a one-dimensional representation of the inverse of the transform matrix 3x3 + * dst_row - The destination row (mapping to this row) + * dst_col - The destination column (mapping to this column) + * src_row - The computed source pixel row (mapping from this row) + * src_col - The computed source pixel column (mapping from this col) + */ + __device__ __forceinline__ void cudaaffine_kernel_get_src_coord(double *trans_inv, int dst_row, int dst_col, int* src_row, int* src_col){ + *src_col = round(dst_col * trans_inv[0] + dst_row * trans_inv[3] + trans_inv[6]); + *src_row = round(dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]); + + //printf("Dst: [%d, %d, 1] = [%d, %d, 1] \n[ %0.4f, %0.4f, %0.4f] \n[ %0.4f, %0.4f, %0.4f ]\n[ %0.4f, %0.4f, %0.4f ]\n\n", *src_col, *src_row, dst_col, dst_row, trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]); + + } + + + __global__ void cudaaffine_kernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){ + int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y; + int dstColInd = blockIdx.x*blockDim.x+threadIdx.x; + int dstIndex = dstRowInd*dst_cols + dstColInd; + + //printf("Kernel Inv:\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n\n", trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]); + + int srcRowInd; + int srcColInd; + + // don't do anything if the index is out of bounds + if (dstRowInd < 1 || dstRowInd >= dst_rows-1 || dstColInd < 1 || dstColInd >= dst_cols-1) { + if (dstRowInd >= dst_rows || dstColInd >= dst_cols) { + return; + } else{ + dstPtr[dstIndex] = 0; + return; + } + } + + cudaaffine_kernel_get_src_coord(trans_inv, dstRowInd, dstColInd, &srcRowInd, &srcColInd); + const uint8_t cval = cudaaffine_kernel_get_pixel_value(srcRowInd, srcColInd, srcPtr, src_rows, src_cols); // Get initial pixel value + + dstPtr[dstIndex] = cval; + } + + void cudaaffine_wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols) { + cudaError_t err; + double* gpuInverse; + + dim3 threadsPerBlock(8, 8); + dim3 numBlocks(dst_cols/threadsPerBlock.x + 1, + dst_rows/threadsPerBlock.y + 1); + + //************************************************************************ + // Input affine is a 2x3 Mat whose transpose is used in the computations + // [x, y, 1] = [u, v, 1] [ a^T | [0 0 1]^T ] + // See "Digital Image Warping" by George Wolburg (p. 50) + //************************************************************************ + + // get new transform elements + double a11 = affineTransform.at(0, 0); + double a12 = affineTransform.at(1, 0); + double a21 = affineTransform.at(0, 1); + double a22 = affineTransform.at(1, 1); + double a31 = affineTransform.at(0, 2); + double a32 = affineTransform.at(1, 2); + // double a23 = 0; + // double a13 = 0; + // double a33 = 1; + + // compute transform inverse + double det = 1 / (a11*a22 - a21*a12); + + double affineInverse[9]; + affineInverse[0] = a22 * det; + affineInverse[1] = -a12 * det; + affineInverse[2] = 0; + affineInverse[3] = -a21 * det; + affineInverse[4] = a11 * det; + affineInverse[5] = 0; + affineInverse[6] = (a21*a32 - a31*a22) * det; + affineInverse[7] = (a31*a12 - a11*a32) * det; + affineInverse[8] = (a11*a22 - a21*a12) * det; + + // Move from affineTransform to gpuAffine (currently fake) + // double fakeAffine[6]; + // fakeAffine[0] = affineTransform.at(0, 0); + // fakeAffine[1] = affineTransform.at(0, 1); + // fakeAffine[2] = affineTransform.at(0, 2); + // fakeAffine[3] = affineTransform.at(1, 0); + // fakeAffine[4] = affineTransform.at(1, 1); + // fakeAffine[5] = affineTransform.at(1, 2); + + // printf("\n"); + // printf("%f\t%f\t%f\n", a11, a12, 0.0); + // printf("%f\t%f\t%f\n", a21, a22, 0.0); + // printf("%f\t%f\t%f\n", a31, a32, 1.0); + // printf("\n"); + + // printf("Affine Inverse:\n"); + // for(int i = 0; i < 3; i++){ + // for(int j = 0; j < 3; j++){ + // printf("%f\t", affineInverse[3*i + j]); + // } + // printf("\n"); + // } + + + CUDA_SAFE_MALLOC(dstPtr, dst_rows*dst_cols*sizeof(uint8_t), &err); + CUDA_SAFE_MALLOC(&gpuInverse, 3*3*sizeof(double), &err); + + CUDA_SAFE_MEMCPY(gpuInverse, affineInverse, 9*sizeof(double), cudaMemcpyHostToDevice, &err); + + cudaaffine_kernel<<>>((uint8_t*)srcPtr, (uint8_t*)(*dstPtr), gpuInverse, src_rows, src_cols, dst_rows, dst_cols); + CUDA_KERNEL_ERR_CHK(&err); + + CUDA_SAFE_FREE(srcPtr, &err); + CUDA_SAFE_FREE(gpuInverse, &err); + + // printf("\n\n"); + // for(int i = 0; i < cols; i++){ + // for(int j = 0; j < src_rows; j++){ + // printf("%4d\t", ((uint8_t*) dstPtr)[j*cols + i]); + // } + // printf("\n"); + // } + // printf("\n"); + } +} // end cuda +} // end br -- libgit2 0.21.4