Commit e534ff9c263a2e69048f865230a7b7122e02535e

Authored by Greg Schrock
1 parent 93a9cf84

Added preliminary functional cuda affine

openbr/plugins/cuda/cudaaffine.cpp 0 → 100644
  1 +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
  2 + * Copyright 2012 The MITRE Corporation *
  3 + * *
  4 + * Licensed under the Apache License, Version 2.0 (the "License"); *
  5 + * you may not use this file except in compliance with the License. *
  6 + * You may obtain a copy of the License at *
  7 + * *
  8 + * http://www.apache.org/licenses/LICENSE-2.0 *
  9 + * *
  10 + * Unless required by applicable law or agreed to in writing, software *
  11 + * distributed under the License is distributed on an "AS IS" BASIS, *
  12 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
  13 + * See the License for the specific language governing permissions and *
  14 + * limitations under the License. *
  15 + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
  16 +
  17 +
  18 +#include <iostream>
  19 +using namespace std;
  20 +
  21 +#include <sys/types.h>
  22 +#include <unistd.h>
  23 +
  24 +#include <pthread.h>
  25 +
  26 +#include <opencv2/opencv.hpp>
  27 +#include <opencv2/imgproc/imgproc.hpp>
  28 +#include <opencv2/imgproc/imgproc_c.h>
  29 +#include <opencv2/highgui/highgui.hpp>
  30 +#include <opencv2/highgui/highgui_c.h>
  31 +#include <limits>
  32 +
  33 +#include <openbr/plugins/openbr_internal.h>
  34 +#include <openbr/core/opencvutils.h>
  35 +
  36 +#include "MatManager.hpp"
  37 +
  38 +using namespace cv;
  39 +
  40 +namespace br { namespace cuda {
  41 + void cudaaffine_wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols);
  42 +}}
  43 +
  44 +namespace br
  45 +{
  46 +
  47 +/*!
  48 + * \ingroup transforms
  49 + * \brief Performs a two or three point registration.
  50 + * \author Josh Klontz \cite jklontz
  51 + * \note Method: Area should be used for shrinking an image, Cubic for slow but accurate enlargment, Bilin for fast enlargement.
  52 + */
  53 +class CUDAAffineTransform : public UntrainableTransform
  54 +{
  55 + Q_OBJECT
  56 + Q_ENUMS(Method)
  57 +
  58 +public:
  59 + /*!< */
  60 + enum Method { Near = INTER_NEAREST,
  61 + Area = INTER_AREA,
  62 + Bilin = INTER_LINEAR,
  63 + Cubic = INTER_CUBIC,
  64 + Lanczo = INTER_LANCZOS4};
  65 +
  66 +private:
  67 + Q_PROPERTY(int width READ get_width WRITE set_width RESET reset_width STORED false)
  68 + Q_PROPERTY(int height READ get_height WRITE set_height RESET reset_height STORED false)
  69 + Q_PROPERTY(float x1 READ get_x1 WRITE set_x1 RESET reset_x1 STORED false)
  70 + Q_PROPERTY(float y1 READ get_y1 WRITE set_y1 RESET reset_y1 STORED false)
  71 + Q_PROPERTY(float x2 READ get_x2 WRITE set_x2 RESET reset_x2 STORED false)
  72 + Q_PROPERTY(float y2 READ get_y2 WRITE set_y2 RESET reset_y2 STORED false)
  73 + Q_PROPERTY(float x3 READ get_x3 WRITE set_x3 RESET reset_x3 STORED false)
  74 + Q_PROPERTY(float y3 READ get_y3 WRITE set_y3 RESET reset_y3 STORED false)
  75 + Q_PROPERTY(Method method READ get_method WRITE set_method RESET reset_method STORED false)
  76 + Q_PROPERTY(bool storeAffine READ get_storeAffine WRITE set_storeAffine RESET reset_storeAffine STORED false)
  77 + Q_PROPERTY(bool warpPoints READ get_warpPoints WRITE set_warpPoints RESET reset_warpPoints STORED false)
  78 + BR_PROPERTY(int, width, 64)
  79 + BR_PROPERTY(int, height, 64)
  80 + BR_PROPERTY(float, x1, 0)
  81 + BR_PROPERTY(float, y1, 0)
  82 + BR_PROPERTY(float, x2, -1)
  83 + BR_PROPERTY(float, y2, -1)
  84 + BR_PROPERTY(float, x3, -1)
  85 + BR_PROPERTY(float, y3, -1)
  86 + BR_PROPERTY(Method, method, Bilin)
  87 + BR_PROPERTY(bool, storeAffine, false)
  88 + BR_PROPERTY(bool, warpPoints, false)
  89 +
  90 + static Point2f getThirdAffinePoint(const Point2f &a, const Point2f &b)
  91 + {
  92 + float dx = b.x - a.x;
  93 + float dy = b.y - a.y;
  94 + return Point2f(a.x - dy, a.y + dx);
  95 + }
  96 +
  97 + void project(const Template &src, Template &dst) const
  98 + {
  99 + const bool twoPoints = ((x3 == -1) || (y3 == -1));
  100 +
  101 + Point2f dstPoints[3];
  102 + dstPoints[0] = Point2f(x1*width, y1*height);
  103 + dstPoints[1] = Point2f((x2 == -1 ? 1 - x1 : x2)*width, (y2 == -1 ? y1 : y2)*height);
  104 + if (twoPoints) dstPoints[2] = getThirdAffinePoint(dstPoints[0], dstPoints[1]);
  105 + else dstPoints[2] = Point2f(x3*width, y3*height);
  106 +
  107 + Point2f srcPoints[3];
  108 + if (src.file.contains("Affine_0") &&
  109 + src.file.contains("Affine_1") &&
  110 + (src.file.contains("Affine_2") || twoPoints)) {
  111 + srcPoints[0] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_0"));
  112 + srcPoints[1] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_1"));
  113 + if (!twoPoints) srcPoints[2] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_2"));
  114 + } else {
  115 + const QList<Point2f> landmarks = OpenCVUtils::toPoints(src.file.points());
  116 +
  117 + if ((landmarks.size() < 2) || (!twoPoints && (landmarks.size() < 3))) {
  118 + resize(src, dst, Size(width, height));
  119 + return;
  120 + } else {
  121 + srcPoints[0] = landmarks[0];
  122 + srcPoints[1] = landmarks[1];
  123 + if (!twoPoints) srcPoints[2] = landmarks[2];
  124 + }
  125 + }
  126 + if (twoPoints) srcPoints[2] = getThirdAffinePoint(srcPoints[0], srcPoints[1]);
  127 +
  128 + // Code section being altered (original)
  129 + //
  130 + // Mat affineTransform = getAffineTransform(srcPoints, dstPoints);
  131 + // warpAffine(src, dst, affineTransform, Size(width, height), method);
  132 + //
  133 + // end original
  134 +
  135 + Mat affineTransform = getAffineTransform(srcPoints, dstPoints);
  136 +
  137 + void* const* srcDataPtr = src.m().ptr<void*>();
  138 + int rows = *((int*)srcDataPtr[1]);
  139 + int cols = *((int*)srcDataPtr[2]);
  140 + int type = *((int*)srcDataPtr[3]);
  141 +
  142 + Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type());
  143 + void** dstDataPtr = dstMat.ptr<void*>();
  144 +
  145 + dstDataPtr[1] = srcDataPtr[1]; *((int*)dstDataPtr[1]) = height; // rows
  146 + dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = width; // cols
  147 + dstDataPtr[3] = srcDataPtr[3];
  148 +
  149 + // Print the transform
  150 + //for(int x = 0; x < affineTransform.rows; x++){
  151 + //for(int y = 0; y < affineTransform.cols; y++){
  152 + //printf("%8.3f\t", affineTransform.at<double>(x, y));
  153 + //}
  154 + //printf("\n");
  155 + //}
  156 +
  157 + br::cuda::cudaaffine_wrapper(srcDataPtr[0], &dstDataPtr[0], affineTransform, rows, cols, height, width);
  158 +
  159 + // end altered code
  160 +
  161 + if (warpPoints) {
  162 + QList<QPointF> points = src.file.points();
  163 + QList<QPointF> rotatedPoints;
  164 + for (int i=0; i<points.size(); i++) {
  165 + rotatedPoints.append(QPointF(points.at(i).x()*affineTransform.at<double>(0,0)+
  166 + points.at(i).y()*affineTransform.at<double>(0,1)+
  167 + affineTransform.at<double>(0,2),
  168 + points.at(i).x()*affineTransform.at<double>(1,0)+
  169 + points.at(i).y()*affineTransform.at<double>(1,1)+
  170 + affineTransform.at<double>(1,2)));
  171 + }
  172 +
  173 + dst.file.setPoints(rotatedPoints);
  174 + }
  175 +
  176 + if (storeAffine) {
  177 + QList<float> affineParams;
  178 + for (int i = 0 ; i < 2; i++)
  179 + for (int j = 0; j < 3; j++)
  180 + affineParams.append(affineTransform.at<double>(i, j));
  181 + dst.file.setList("affineParameters", affineParams);
  182 + }
  183 +
  184 + dst = dstMat;
  185 + }
  186 +};
  187 +
  188 +BR_REGISTER(Transform, CUDAAffineTransform)
  189 +
  190 +} // namespace br
  191 +
  192 +#include "cuda/cudaaffine.moc"
... ...
openbr/plugins/cuda/cudaaffine.cu 0 → 100644
  1 +#include <iostream>
  2 +using namespace std;
  3 +
  4 +#include <opencv2/gpu/gpu.hpp>
  5 +#include <opencv2/opencv.hpp>
  6 +#include <stdio.h>
  7 +#include <math.h>
  8 +
  9 +#include "cudadefines.hpp"
  10 +
  11 +using namespace cv;
  12 +using namespace cv::gpu;
  13 +
  14 +namespace br { namespace cuda {
  15 +
  16 + __device__ __forceinline__ uint8_t cudaaffine_kernel_get_pixel_value(int row, int col, uint8_t* srcPtr, int rows, int cols) {
  17 + // don't do anything if the index is out of bounds
  18 + if (row < 1 || row >= rows-1 || col < 1 || col >= cols-1) {
  19 + if (row >= rows || col >= cols) {
  20 + return 0;
  21 + } else{
  22 + return 0; }
  23 + }
  24 + return (srcPtr + row*cols)[col];
  25 + }
  26 +
  27 + /*
  28 + * trans_inv - A pointer to a one-dimensional representation of the inverse of the transform matrix 3x3
  29 + * dst_row - The destination row (mapping to this row)
  30 + * dst_col - The destination column (mapping to this column)
  31 + * src_row - The computed source pixel row (mapping from this row)
  32 + * src_col - The computed source pixel column (mapping from this col)
  33 + */
  34 + __device__ __forceinline__ void cudaaffine_kernel_get_src_coord(double *trans_inv, int dst_row, int dst_col, int* src_row, int* src_col){
  35 + *src_col = round(dst_col * trans_inv[0] + dst_row * trans_inv[3] + trans_inv[6]);
  36 + *src_row = round(dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]);
  37 +
  38 + //printf("Dst: [%d, %d, 1] = [%d, %d, 1] \n[ %0.4f, %0.4f, %0.4f] \n[ %0.4f, %0.4f, %0.4f ]\n[ %0.4f, %0.4f, %0.4f ]\n\n", *src_col, *src_row, dst_col, dst_row, trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]);
  39 +
  40 + }
  41 +
  42 +
  43 + __global__ void cudaaffine_kernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){
  44 + int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y;
  45 + int dstColInd = blockIdx.x*blockDim.x+threadIdx.x;
  46 + int dstIndex = dstRowInd*dst_cols + dstColInd;
  47 +
  48 + //printf("Kernel Inv:\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n\n", trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]);
  49 +
  50 + int srcRowInd;
  51 + int srcColInd;
  52 +
  53 + // don't do anything if the index is out of bounds
  54 + if (dstRowInd < 1 || dstRowInd >= dst_rows-1 || dstColInd < 1 || dstColInd >= dst_cols-1) {
  55 + if (dstRowInd >= dst_rows || dstColInd >= dst_cols) {
  56 + return;
  57 + } else{
  58 + dstPtr[dstIndex] = 0;
  59 + return;
  60 + }
  61 + }
  62 +
  63 + cudaaffine_kernel_get_src_coord(trans_inv, dstRowInd, dstColInd, &srcRowInd, &srcColInd);
  64 + const uint8_t cval = cudaaffine_kernel_get_pixel_value(srcRowInd, srcColInd, srcPtr, src_rows, src_cols); // Get initial pixel value
  65 +
  66 + dstPtr[dstIndex] = cval;
  67 + }
  68 +
  69 + void cudaaffine_wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols) {
  70 + cudaError_t err;
  71 + double* gpuInverse;
  72 +
  73 + dim3 threadsPerBlock(8, 8);
  74 + dim3 numBlocks(dst_cols/threadsPerBlock.x + 1,
  75 + dst_rows/threadsPerBlock.y + 1);
  76 +
  77 + //************************************************************************
  78 + // Input affine is a 2x3 Mat whose transpose is used in the computations
  79 + // [x, y, 1] = [u, v, 1] [ a^T | [0 0 1]^T ]
  80 + // See "Digital Image Warping" by George Wolburg (p. 50)
  81 + //************************************************************************
  82 +
  83 + // get new transform elements
  84 + double a11 = affineTransform.at<double>(0, 0);
  85 + double a12 = affineTransform.at<double>(1, 0);
  86 + double a21 = affineTransform.at<double>(0, 1);
  87 + double a22 = affineTransform.at<double>(1, 1);
  88 + double a31 = affineTransform.at<double>(0, 2);
  89 + double a32 = affineTransform.at<double>(1, 2);
  90 + // double a23 = 0;
  91 + // double a13 = 0;
  92 + // double a33 = 1;
  93 +
  94 + // compute transform inverse
  95 + double det = 1 / (a11*a22 - a21*a12);
  96 +
  97 + double affineInverse[9];
  98 + affineInverse[0] = a22 * det;
  99 + affineInverse[1] = -a12 * det;
  100 + affineInverse[2] = 0;
  101 + affineInverse[3] = -a21 * det;
  102 + affineInverse[4] = a11 * det;
  103 + affineInverse[5] = 0;
  104 + affineInverse[6] = (a21*a32 - a31*a22) * det;
  105 + affineInverse[7] = (a31*a12 - a11*a32) * det;
  106 + affineInverse[8] = (a11*a22 - a21*a12) * det;
  107 +
  108 + // Move from affineTransform to gpuAffine (currently fake)
  109 + // double fakeAffine[6];
  110 + // fakeAffine[0] = affineTransform.at<double>(0, 0);
  111 + // fakeAffine[1] = affineTransform.at<double>(0, 1);
  112 + // fakeAffine[2] = affineTransform.at<double>(0, 2);
  113 + // fakeAffine[3] = affineTransform.at<double>(1, 0);
  114 + // fakeAffine[4] = affineTransform.at<double>(1, 1);
  115 + // fakeAffine[5] = affineTransform.at<double>(1, 2);
  116 +
  117 + // printf("\n");
  118 + // printf("%f\t%f\t%f\n", a11, a12, 0.0);
  119 + // printf("%f\t%f\t%f\n", a21, a22, 0.0);
  120 + // printf("%f\t%f\t%f\n", a31, a32, 1.0);
  121 + // printf("\n");
  122 +
  123 + // printf("Affine Inverse:\n");
  124 + // for(int i = 0; i < 3; i++){
  125 + // for(int j = 0; j < 3; j++){
  126 + // printf("%f\t", affineInverse[3*i + j]);
  127 + // }
  128 + // printf("\n");
  129 + // }
  130 +
  131 +
  132 + CUDA_SAFE_MALLOC(dstPtr, dst_rows*dst_cols*sizeof(uint8_t), &err);
  133 + CUDA_SAFE_MALLOC(&gpuInverse, 3*3*sizeof(double), &err);
  134 +
  135 + CUDA_SAFE_MEMCPY(gpuInverse, affineInverse, 9*sizeof(double), cudaMemcpyHostToDevice, &err);
  136 +
  137 + cudaaffine_kernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*)(*dstPtr), gpuInverse, src_rows, src_cols, dst_rows, dst_cols);
  138 + CUDA_KERNEL_ERR_CHK(&err);
  139 +
  140 + CUDA_SAFE_FREE(srcPtr, &err);
  141 + CUDA_SAFE_FREE(gpuInverse, &err);
  142 +
  143 + // printf("\n\n");
  144 + // for(int i = 0; i < cols; i++){
  145 + // for(int j = 0; j < src_rows; j++){
  146 + // printf("%4d\t", ((uint8_t*) dstPtr)[j*cols + i]);
  147 + // }
  148 + // printf("\n");
  149 + // }
  150 + // printf("\n");
  151 + }
  152 +} // end cuda
  153 +} // end br
... ...