Peter M. Groen / openbr

Browse Code »

Commit e534ff9c263a2e69048f865230a7b7122e02535e

Authored by Greg Schrock 2016-02-29 16:01:16 -0500

1 parent 93a9cf84

Added preliminary functional cuda affine

Inline Side-by-side

Showing 2 changed files with 345 additions and 0 deletions

openbr/plugins/cuda/cudaaffine.cpp 0 → 100644

View file @e534ff9

	1	+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
	2	+ * Copyright 2012 The MITRE Corporation *
	3	+ * *
	4	+ * Licensed under the Apache License, Version 2.0 (the "License"); *
	5	+ * you may not use this file except in compliance with the License. *
	6	+ * You may obtain a copy of the License at *
	7	+ * *
	8	+ * http://www.apache.org/licenses/LICENSE-2.0 *
	9	+ * *
	10	+ * Unless required by applicable law or agreed to in writing, software *
	11	+ * distributed under the License is distributed on an "AS IS" BASIS, *
	12	+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
	13	+ * See the License for the specific language governing permissions and *
	14	+ * limitations under the License. *
	15	+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
	16	+
	17	+
	18	+#include <iostream>
	19	+using namespace std;
	20	+
	21	+#include <sys/types.h>
	22	+#include <unistd.h>
	23	+
	24	+#include <pthread.h>
	25	+
	26	+#include <opencv2/opencv.hpp>
	27	+#include <opencv2/imgproc/imgproc.hpp>
	28	+#include <opencv2/imgproc/imgproc_c.h>
	29	+#include <opencv2/highgui/highgui.hpp>
	30	+#include <opencv2/highgui/highgui_c.h>
	31	+#include <limits>
	32	+
	33	+#include <openbr/plugins/openbr_internal.h>
	34	+#include <openbr/core/opencvutils.h>
	35	+
	36	+#include "MatManager.hpp"
	37	+
	38	+using namespace cv;
	39	+
	40	+namespace br { namespace cuda {
	41	+ void cudaaffine_wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols);
	42	+}}
	43	+
	44	+namespace br
	45	+{
	46	+
	47	+/*!
	48	+ * \ingroup transforms
	49	+ * \brief Performs a two or three point registration.
	50	+ * \author Josh Klontz \cite jklontz
	51	+ * \note Method: Area should be used for shrinking an image, Cubic for slow but accurate enlargment, Bilin for fast enlargement.
	52	+ */
	53	+class CUDAAffineTransform : public UntrainableTransform
	54	+{
	55	+ Q_OBJECT
	56	+ Q_ENUMS(Method)
	57	+
	58	+public:
	59	+ /!< /
	60	+ enum Method { Near = INTER_NEAREST,
	61	+ Area = INTER_AREA,
	62	+ Bilin = INTER_LINEAR,
	63	+ Cubic = INTER_CUBIC,
	64	+ Lanczo = INTER_LANCZOS4};
	65	+
	66	+private:
	67	+ Q_PROPERTY(int width READ get_width WRITE set_width RESET reset_width STORED false)
	68	+ Q_PROPERTY(int height READ get_height WRITE set_height RESET reset_height STORED false)
	69	+ Q_PROPERTY(float x1 READ get_x1 WRITE set_x1 RESET reset_x1 STORED false)
	70	+ Q_PROPERTY(float y1 READ get_y1 WRITE set_y1 RESET reset_y1 STORED false)
	71	+ Q_PROPERTY(float x2 READ get_x2 WRITE set_x2 RESET reset_x2 STORED false)
	72	+ Q_PROPERTY(float y2 READ get_y2 WRITE set_y2 RESET reset_y2 STORED false)
	73	+ Q_PROPERTY(float x3 READ get_x3 WRITE set_x3 RESET reset_x3 STORED false)
	74	+ Q_PROPERTY(float y3 READ get_y3 WRITE set_y3 RESET reset_y3 STORED false)
	75	+ Q_PROPERTY(Method method READ get_method WRITE set_method RESET reset_method STORED false)
	76	+ Q_PROPERTY(bool storeAffine READ get_storeAffine WRITE set_storeAffine RESET reset_storeAffine STORED false)
	77	+ Q_PROPERTY(bool warpPoints READ get_warpPoints WRITE set_warpPoints RESET reset_warpPoints STORED false)
	78	+ BR_PROPERTY(int, width, 64)
	79	+ BR_PROPERTY(int, height, 64)
	80	+ BR_PROPERTY(float, x1, 0)
	81	+ BR_PROPERTY(float, y1, 0)
	82	+ BR_PROPERTY(float, x2, -1)
	83	+ BR_PROPERTY(float, y2, -1)
	84	+ BR_PROPERTY(float, x3, -1)
	85	+ BR_PROPERTY(float, y3, -1)
	86	+ BR_PROPERTY(Method, method, Bilin)
	87	+ BR_PROPERTY(bool, storeAffine, false)
	88	+ BR_PROPERTY(bool, warpPoints, false)
	89	+
	90	+ static Point2f getThirdAffinePoint(const Point2f &a, const Point2f &b)
	91	+ {
	92	+ float dx = b.x - a.x;
	93	+ float dy = b.y - a.y;
	94	+ return Point2f(a.x - dy, a.y + dx);
	95	+ }
	96	+
	97	+ void project(const Template &src, Template &dst) const
	98	+ {
	99	+ const bool twoPoints = ((x3 == -1) \|\| (y3 == -1));
	100	+
	101	+ Point2f dstPoints[3];
	102	+ dstPoints[0] = Point2f(x1width, y1height);
	103	+ dstPoints[1] = Point2f((x2 == -1 ? 1 - x1 : x2)width, (y2 == -1 ? y1 : y2)height);
	104	+ if (twoPoints) dstPoints[2] = getThirdAffinePoint(dstPoints[0], dstPoints[1]);
	105	+ else dstPoints[2] = Point2f(x3width, y3height);
	106	+
	107	+ Point2f srcPoints[3];
	108	+ if (src.file.contains("Affine_0") &&
	109	+ src.file.contains("Affine_1") &&
	110	+ (src.file.contains("Affine_2") \|\| twoPoints)) {
	111	+ srcPoints[0] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_0"));
	112	+ srcPoints[1] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_1"));
	113	+ if (!twoPoints) srcPoints[2] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_2"));
	114	+ } else {
	115	+ const QList<Point2f> landmarks = OpenCVUtils::toPoints(src.file.points());
	116	+
	117	+ if ((landmarks.size() < 2) \|\| (!twoPoints && (landmarks.size() < 3))) {
	118	+ resize(src, dst, Size(width, height));
	119	+ return;
	120	+ } else {
	121	+ srcPoints[0] = landmarks[0];
	122	+ srcPoints[1] = landmarks[1];
	123	+ if (!twoPoints) srcPoints[2] = landmarks[2];
	124	+ }
	125	+ }
	126	+ if (twoPoints) srcPoints[2] = getThirdAffinePoint(srcPoints[0], srcPoints[1]);
	127	+
	128	+ // Code section being altered (original)
	129	+ //
	130	+ // Mat affineTransform = getAffineTransform(srcPoints, dstPoints);
	131	+ // warpAffine(src, dst, affineTransform, Size(width, height), method);
	132	+ //
	133	+ // end original
	134	+
	135	+ Mat affineTransform = getAffineTransform(srcPoints, dstPoints);
	136	+
	137	+ void* const* srcDataPtr = src.m().ptr<void*>();
	138	+ int rows = ((int)srcDataPtr[1]);
	139	+ int cols = ((int)srcDataPtr[2]);
	140	+ int type = ((int)srcDataPtr[3]);
	141	+
	142	+ Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type());
	143	+ void** dstDataPtr = dstMat.ptr<void*>();
	144	+
	145	+ dstDataPtr[1] = srcDataPtr[1]; ((int)dstDataPtr[1]) = height; // rows
	146	+ dstDataPtr[2] = srcDataPtr[2]; ((int)dstDataPtr[2]) = width; // cols
	147	+ dstDataPtr[3] = srcDataPtr[3];
	148	+
	149	+ // Print the transform
	150	+ //for(int x = 0; x < affineTransform.rows; x++){
	151	+ //for(int y = 0; y < affineTransform.cols; y++){
	152	+ //printf("%8.3f\t", affineTransform.at<double>(x, y));
	153	+ //}
	154	+ //printf("\n");
	155	+ //}
	156	+
	157	+ br::cuda::cudaaffine_wrapper(srcDataPtr[0], &dstDataPtr[0], affineTransform, rows, cols, height, width);
	158	+
	159	+ // end altered code
	160	+
	161	+ if (warpPoints) {
	162	+ QList<QPointF> points = src.file.points();
	163	+ QList<QPointF> rotatedPoints;
	164	+ for (int i=0; i<points.size(); i++) {
	165	+ rotatedPoints.append(QPointF(points.at(i).x()*affineTransform.at<double>(0,0)+
	166	+ points.at(i).y()*affineTransform.at<double>(0,1)+
	167	+ affineTransform.at<double>(0,2),
	168	+ points.at(i).x()*affineTransform.at<double>(1,0)+
	169	+ points.at(i).y()*affineTransform.at<double>(1,1)+
	170	+ affineTransform.at<double>(1,2)));
	171	+ }
	172	+
	173	+ dst.file.setPoints(rotatedPoints);
	174	+ }
	175	+
	176	+ if (storeAffine) {
	177	+ QList<float> affineParams;
	178	+ for (int i = 0 ; i < 2; i++)
	179	+ for (int j = 0; j < 3; j++)
	180	+ affineParams.append(affineTransform.at<double>(i, j));
	181	+ dst.file.setList("affineParameters", affineParams);
	182	+ }
	183	+
	184	+ dst = dstMat;
	185	+ }
	186	+};
	187	+
	188	+BR_REGISTER(Transform, CUDAAffineTransform)
	189	+
	190	+} // namespace br
	191	+
	192	+#include "cuda/cudaaffine.moc"
...	...

openbr/plugins/cuda/cudaaffine.cu 0 → 100644

View file @e534ff9

	1	+#include <iostream>
	2	+using namespace std;
	3	+
	4	+#include <opencv2/gpu/gpu.hpp>
	5	+#include <opencv2/opencv.hpp>
	6	+#include <stdio.h>
	7	+#include <math.h>
	8	+
	9	+#include "cudadefines.hpp"
	10	+
	11	+using namespace cv;
	12	+using namespace cv::gpu;
	13	+
	14	+namespace br { namespace cuda {
	15	+
	16	+ __device__ __forceinline__ uint8_t cudaaffine_kernel_get_pixel_value(int row, int col, uint8_t* srcPtr, int rows, int cols) {
	17	+ // don't do anything if the index is out of bounds
	18	+ if (row < 1 \|\| row >= rows-1 \|\| col < 1 \|\| col >= cols-1) {
	19	+ if (row >= rows \|\| col >= cols) {
	20	+ return 0;
	21	+ } else{
	22	+ return 0; }
	23	+ }
	24	+ return (srcPtr + row*cols)[col];
	25	+ }
	26	+
	27	+ /*
	28	+ * trans_inv - A pointer to a one-dimensional representation of the inverse of the transform matrix 3x3
	29	+ * dst_row - The destination row (mapping to this row)
	30	+ * dst_col - The destination column (mapping to this column)
	31	+ * src_row - The computed source pixel row (mapping from this row)
	32	+ * src_col - The computed source pixel column (mapping from this col)
	33	+ */
	34	+ __device__ __forceinline__ void cudaaffine_kernel_get_src_coord(double trans_inv, int dst_row, int dst_col, int src_row, int* src_col){
	35	+ src_col = round(dst_col trans_inv[0] + dst_row * trans_inv[3] + trans_inv[6]);
	36	+ src_row = round(dst_col trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]);
	37	+
	38	+ //printf("Dst: [%d, %d, 1] = [%d, %d, 1] \n[ %0.4f, %0.4f, %0.4f] \n[ %0.4f, %0.4f, %0.4f ]\n[ %0.4f, %0.4f, %0.4f ]\n\n", src_col, src_row, dst_col, dst_row, trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]);
	39	+
	40	+ }
	41	+
	42	+
	43	+ __global__ void cudaaffine_kernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){
	44	+ int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y;
	45	+ int dstColInd = blockIdx.x*blockDim.x+threadIdx.x;
	46	+ int dstIndex = dstRowInd*dst_cols + dstColInd;
	47	+
	48	+ //printf("Kernel Inv:\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n\n", trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]);
	49	+
	50	+ int srcRowInd;
	51	+ int srcColInd;
	52	+
	53	+ // don't do anything if the index is out of bounds
	54	+ if (dstRowInd < 1 \|\| dstRowInd >= dst_rows-1 \|\| dstColInd < 1 \|\| dstColInd >= dst_cols-1) {
	55	+ if (dstRowInd >= dst_rows \|\| dstColInd >= dst_cols) {
	56	+ return;
	57	+ } else{
	58	+ dstPtr[dstIndex] = 0;
	59	+ return;
	60	+ }
	61	+ }
	62	+
	63	+ cudaaffine_kernel_get_src_coord(trans_inv, dstRowInd, dstColInd, &srcRowInd, &srcColInd);
	64	+ const uint8_t cval = cudaaffine_kernel_get_pixel_value(srcRowInd, srcColInd, srcPtr, src_rows, src_cols); // Get initial pixel value
	65	+
	66	+ dstPtr[dstIndex] = cval;
	67	+ }
	68	+
	69	+ void cudaaffine_wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols) {
	70	+ cudaError_t err;
	71	+ double* gpuInverse;
	72	+
	73	+ dim3 threadsPerBlock(8, 8);
	74	+ dim3 numBlocks(dst_cols/threadsPerBlock.x + 1,
	75	+ dst_rows/threadsPerBlock.y + 1);
	76	+
	77	+ //************************************************************************
	78	+ // Input affine is a 2x3 Mat whose transpose is used in the computations
	79	+ // [x, y, 1] = [u, v, 1] [ a^T \| [0 0 1]^T ]
	80	+ // See "Digital Image Warping" by George Wolburg (p. 50)
	81	+ //************************************************************************
	82	+
	83	+ // get new transform elements
	84	+ double a11 = affineTransform.at<double>(0, 0);
	85	+ double a12 = affineTransform.at<double>(1, 0);
	86	+ double a21 = affineTransform.at<double>(0, 1);
	87	+ double a22 = affineTransform.at<double>(1, 1);
	88	+ double a31 = affineTransform.at<double>(0, 2);
	89	+ double a32 = affineTransform.at<double>(1, 2);
	90	+ // double a23 = 0;
	91	+ // double a13 = 0;
	92	+ // double a33 = 1;
	93	+
	94	+ // compute transform inverse
	95	+ double det = 1 / (a11a22 - a21a12);
	96	+
	97	+ double affineInverse[9];
	98	+ affineInverse[0] = a22 * det;
	99	+ affineInverse[1] = -a12 * det;
	100	+ affineInverse[2] = 0;
	101	+ affineInverse[3] = -a21 * det;
	102	+ affineInverse[4] = a11 * det;
	103	+ affineInverse[5] = 0;
	104	+ affineInverse[6] = (a21a32 - a31a22) * det;
	105	+ affineInverse[7] = (a31a12 - a11a32) * det;
	106	+ affineInverse[8] = (a11a22 - a21a12) * det;
	107	+
	108	+ // Move from affineTransform to gpuAffine (currently fake)
	109	+ // double fakeAffine[6];
	110	+ // fakeAffine[0] = affineTransform.at<double>(0, 0);
	111	+ // fakeAffine[1] = affineTransform.at<double>(0, 1);
	112	+ // fakeAffine[2] = affineTransform.at<double>(0, 2);
	113	+ // fakeAffine[3] = affineTransform.at<double>(1, 0);
	114	+ // fakeAffine[4] = affineTransform.at<double>(1, 1);
	115	+ // fakeAffine[5] = affineTransform.at<double>(1, 2);
	116	+
	117	+ // printf("\n");
	118	+ // printf("%f\t%f\t%f\n", a11, a12, 0.0);
	119	+ // printf("%f\t%f\t%f\n", a21, a22, 0.0);
	120	+ // printf("%f\t%f\t%f\n", a31, a32, 1.0);
	121	+ // printf("\n");
	122	+
	123	+ // printf("Affine Inverse:\n");
	124	+ // for(int i = 0; i < 3; i++){
	125	+ // for(int j = 0; j < 3; j++){
	126	+ // printf("%f\t", affineInverse[3*i + j]);
	127	+ // }
	128	+ // printf("\n");
	129	+ // }
	130	+
	131	+
	132	+ CUDA_SAFE_MALLOC(dstPtr, dst_rowsdst_colssizeof(uint8_t), &err);
	133	+ CUDA_SAFE_MALLOC(&gpuInverse, 33sizeof(double), &err);
	134	+
	135	+ CUDA_SAFE_MEMCPY(gpuInverse, affineInverse, 9*sizeof(double), cudaMemcpyHostToDevice, &err);
	136	+
	137	+ cudaaffine_kernel<<<numBlocks, threadsPerBlock>>>((uint8_t)srcPtr, (uint8_t)(*dstPtr), gpuInverse, src_rows, src_cols, dst_rows, dst_cols);
	138	+ CUDA_KERNEL_ERR_CHK(&err);
	139	+
	140	+ CUDA_SAFE_FREE(srcPtr, &err);
	141	+ CUDA_SAFE_FREE(gpuInverse, &err);
	142	+
	143	+ // printf("\n\n");
	144	+ // for(int i = 0; i < cols; i++){
	145	+ // for(int j = 0; j < src_rows; j++){
	146	+ // printf("%4d\t", ((uint8_t) dstPtr)[jcols + i]);
	147	+ // }
	148	+ // printf("\n");
	149	+ // }
	150	+ // printf("\n");
	151	+ }
	152	+} // end cuda
	153	+} // end br
...	...