Peter M. Groen / openbr

Browse Code »

Commit e534ff9c263a2e69048f865230a7b7122e02535e

Authored by Greg Schrock 2016-02-29 16:01:16 -0500

1 parent 93a9cf84

Added preliminary functional cuda affine

Inline Side-by-side

Showing 2 changed files with 345 additions and 0 deletions

openbr/plugins/cuda/cudaaffine.cpp 0 → 100644

View file @e534ff9

		1	+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
		2	+ * Copyright 2012 The MITRE Corporation *
		3	+ * *
		4	+ * Licensed under the Apache License, Version 2.0 (the "License"); *
		5	+ * you may not use this file except in compliance with the License. *
		6	+ * You may obtain a copy of the License at *
		7	+ * *
		8	+ * http://www.apache.org/licenses/LICENSE-2.0 *
		9	+ * *
		10	+ * Unless required by applicable law or agreed to in writing, software *
		11	+ * distributed under the License is distributed on an "AS IS" BASIS, *
		12	+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
		13	+ * See the License for the specific language governing permissions and *
		14	+ * limitations under the License. *
		15	+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
		16	+
		17	+
		18	+#include <iostream>
		19	+using namespace std;
		20	+
		21	+#include <sys/types.h>
		22	+#include <unistd.h>
		23	+
		24	+#include <pthread.h>
		25	+
		26	+#include <opencv2/opencv.hpp>
		27	+#include <opencv2/imgproc/imgproc.hpp>
		28	+#include <opencv2/imgproc/imgproc_c.h>
		29	+#include <opencv2/highgui/highgui.hpp>
		30	+#include <opencv2/highgui/highgui_c.h>
		31	+#include <limits>
		32	+
		33	+#include <openbr/plugins/openbr_internal.h>
		34	+#include <openbr/core/opencvutils.h>
		35	+
		36	+#include "MatManager.hpp"
		37	+
		38	+using namespace cv;
		39	+
		40	+namespace br { namespace cuda {
		41	+ void cudaaffine_wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols);
		42	+}}
		43	+
		44	+namespace br
		45	+{
		46	+
		47	+/*!
		48	+ * \ingroup transforms
		49	+ * \brief Performs a two or three point registration.
		50	+ * \author Josh Klontz \cite jklontz
		51	+ * \note Method: Area should be used for shrinking an image, Cubic for slow but accurate enlargment, Bilin for fast enlargement.
		52	+ */
		53	+class CUDAAffineTransform : public UntrainableTransform
		54	+{
		55	+ Q_OBJECT
		56	+ Q_ENUMS(Method)
		57	+
		58	+public:
		59	+ /!< /
		60	+ enum Method { Near = INTER_NEAREST,
		61	+ Area = INTER_AREA,
		62	+ Bilin = INTER_LINEAR,
		63	+ Cubic = INTER_CUBIC,
		64	+ Lanczo = INTER_LANCZOS4};
		65	+
		66	+private:
		67	+ Q_PROPERTY(int width READ get_width WRITE set_width RESET reset_width STORED false)
		68	+ Q_PROPERTY(int height READ get_height WRITE set_height RESET reset_height STORED false)
		69	+ Q_PROPERTY(float x1 READ get_x1 WRITE set_x1 RESET reset_x1 STORED false)
		70	+ Q_PROPERTY(float y1 READ get_y1 WRITE set_y1 RESET reset_y1 STORED false)
		71	+ Q_PROPERTY(float x2 READ get_x2 WRITE set_x2 RESET reset_x2 STORED false)
		72	+ Q_PROPERTY(float y2 READ get_y2 WRITE set_y2 RESET reset_y2 STORED false)
		73	+ Q_PROPERTY(float x3 READ get_x3 WRITE set_x3 RESET reset_x3 STORED false)
		74	+ Q_PROPERTY(float y3 READ get_y3 WRITE set_y3 RESET reset_y3 STORED false)
		75	+ Q_PROPERTY(Method method READ get_method WRITE set_method RESET reset_method STORED false)
		76	+ Q_PROPERTY(bool storeAffine READ get_storeAffine WRITE set_storeAffine RESET reset_storeAffine STORED false)
		77	+ Q_PROPERTY(bool warpPoints READ get_warpPoints WRITE set_warpPoints RESET reset_warpPoints STORED false)
		78	+ BR_PROPERTY(int, width, 64)
		79	+ BR_PROPERTY(int, height, 64)
		80	+ BR_PROPERTY(float, x1, 0)
		81	+ BR_PROPERTY(float, y1, 0)
		82	+ BR_PROPERTY(float, x2, -1)
		83	+ BR_PROPERTY(float, y2, -1)
		84	+ BR_PROPERTY(float, x3, -1)
		85	+ BR_PROPERTY(float, y3, -1)
		86	+ BR_PROPERTY(Method, method, Bilin)
		87	+ BR_PROPERTY(bool, storeAffine, false)
		88	+ BR_PROPERTY(bool, warpPoints, false)
		89	+
		90	+ static Point2f getThirdAffinePoint(const Point2f &a, const Point2f &b)
		91	+ {
		92	+ float dx = b.x - a.x;
		93	+ float dy = b.y - a.y;
		94	+ return Point2f(a.x - dy, a.y + dx);
		95	+ }
		96	+
		97	+ void project(const Template &src, Template &dst) const
		98	+ {
		99	+ const bool twoPoints = ((x3 == -1) \|\| (y3 == -1));
		100	+
		101	+ Point2f dstPoints[3];
		102	+ dstPoints[0] = Point2f(x1width, y1height);
		103	+ dstPoints[1] = Point2f((x2 == -1 ? 1 - x1 : x2)width, (y2 == -1 ? y1 : y2)height);
		104	+ if (twoPoints) dstPoints[2] = getThirdAffinePoint(dstPoints[0], dstPoints[1]);
		105	+ else dstPoints[2] = Point2f(x3width, y3height);
		106	+
		107	+ Point2f srcPoints[3];
		108	+ if (src.file.contains("Affine_0") &&
		109	+ src.file.contains("Affine_1") &&
		110	+ (src.file.contains("Affine_2") \|\| twoPoints)) {
		111	+ srcPoints[0] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_0"));
		112	+ srcPoints[1] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_1"));
		113	+ if (!twoPoints) srcPoints[2] = OpenCVUtils::toPoint(src.file.get<QPointF>("Affine_2"));
		114	+ } else {
		115	+ const QList<Point2f> landmarks = OpenCVUtils::toPoints(src.file.points());
		116	+
		117	+ if ((landmarks.size() < 2) \|\| (!twoPoints && (landmarks.size() < 3))) {
		118	+ resize(src, dst, Size(width, height));
		119	+ return;
		120	+ } else {
		121	+ srcPoints[0] = landmarks[0];
		122	+ srcPoints[1] = landmarks[1];
		123	+ if (!twoPoints) srcPoints[2] = landmarks[2];
		124	+ }
		125	+ }
		126	+ if (twoPoints) srcPoints[2] = getThirdAffinePoint(srcPoints[0], srcPoints[1]);
		127	+
		128	+ // Code section being altered (original)
		129	+ //
		130	+ // Mat affineTransform = getAffineTransform(srcPoints, dstPoints);
		131	+ // warpAffine(src, dst, affineTransform, Size(width, height), method);
		132	+ //
		133	+ // end original
		134	+
		135	+ Mat affineTransform = getAffineTransform(srcPoints, dstPoints);
		136	+
		137	+ void* const* srcDataPtr = src.m().ptr<void*>();
		138	+ int rows = ((int)srcDataPtr[1]);
		139	+ int cols = ((int)srcDataPtr[2]);
		140	+ int type = ((int)srcDataPtr[3]);
		141	+
		142	+ Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type());
		143	+ void** dstDataPtr = dstMat.ptr<void*>();
		144	+
		145	+ dstDataPtr[1] = srcDataPtr[1]; ((int)dstDataPtr[1]) = height; // rows
		146	+ dstDataPtr[2] = srcDataPtr[2]; ((int)dstDataPtr[2]) = width; // cols
		147	+ dstDataPtr[3] = srcDataPtr[3];
		148	+
		149	+ // Print the transform
		150	+ //for(int x = 0; x < affineTransform.rows; x++){
		151	+ //for(int y = 0; y < affineTransform.cols; y++){
		152	+ //printf("%8.3f\t", affineTransform.at<double>(x, y));
		153	+ //}
		154	+ //printf("\n");
		155	+ //}
		156	+
		157	+ br::cuda::cudaaffine_wrapper(srcDataPtr[0], &dstDataPtr[0], affineTransform, rows, cols, height, width);
		158	+
		159	+ // end altered code
		160	+
		161	+ if (warpPoints) {
		162	+ QList<QPointF> points = src.file.points();
		163	+ QList<QPointF> rotatedPoints;
		164	+ for (int i=0; i<points.size(); i++) {
		165	+ rotatedPoints.append(QPointF(points.at(i).x()*affineTransform.at<double>(0,0)+
		166	+ points.at(i).y()*affineTransform.at<double>(0,1)+
		167	+ affineTransform.at<double>(0,2),
		168	+ points.at(i).x()*affineTransform.at<double>(1,0)+
		169	+ points.at(i).y()*affineTransform.at<double>(1,1)+
		170	+ affineTransform.at<double>(1,2)));
		171	+ }
		172	+
		173	+ dst.file.setPoints(rotatedPoints);
		174	+ }
		175	+
		176	+ if (storeAffine) {
		177	+ QList<float> affineParams;
		178	+ for (int i = 0 ; i < 2; i++)
		179	+ for (int j = 0; j < 3; j++)
		180	+ affineParams.append(affineTransform.at<double>(i, j));
		181	+ dst.file.setList("affineParameters", affineParams);
		182	+ }
		183	+
		184	+ dst = dstMat;
		185	+ }
		186	+};
		187	+
		188	+BR_REGISTER(Transform, CUDAAffineTransform)
		189	+
		190	+} // namespace br
		191	+
		192	+#include "cuda/cudaaffine.moc"

openbr/plugins/cuda/cudaaffine.cu 0 → 100644

View file @e534ff9

		1	+#include <iostream>
		2	+using namespace std;
		3	+
		4	+#include <opencv2/gpu/gpu.hpp>
		5	+#include <opencv2/opencv.hpp>
		6	+#include <stdio.h>
		7	+#include <math.h>
		8	+
		9	+#include "cudadefines.hpp"
		10	+
		11	+using namespace cv;
		12	+using namespace cv::gpu;
		13	+
		14	+namespace br { namespace cuda {
		15	+
		16	+ __device__ __forceinline__ uint8_t cudaaffine_kernel_get_pixel_value(int row, int col, uint8_t* srcPtr, int rows, int cols) {
		17	+ // don't do anything if the index is out of bounds
		18	+ if (row < 1 \|\| row >= rows-1 \|\| col < 1 \|\| col >= cols-1) {
		19	+ if (row >= rows \|\| col >= cols) {
		20	+ return 0;
		21	+ } else{
		22	+ return 0; }
		23	+ }
		24	+ return (srcPtr + row*cols)[col];
		25	+ }
		26	+
		27	+ /*
		28	+ * trans_inv - A pointer to a one-dimensional representation of the inverse of the transform matrix 3x3
		29	+ * dst_row - The destination row (mapping to this row)
		30	+ * dst_col - The destination column (mapping to this column)
		31	+ * src_row - The computed source pixel row (mapping from this row)
		32	+ * src_col - The computed source pixel column (mapping from this col)
		33	+ */
		34	+ __device__ __forceinline__ void cudaaffine_kernel_get_src_coord(double trans_inv, int dst_row, int dst_col, int src_row, int* src_col){
		35	+ src_col = round(dst_col trans_inv[0] + dst_row * trans_inv[3] + trans_inv[6]);
		36	+ src_row = round(dst_col trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]);
		37	+
		38	+ //printf("Dst: [%d, %d, 1] = [%d, %d, 1] \n[ %0.4f, %0.4f, %0.4f] \n[ %0.4f, %0.4f, %0.4f ]\n[ %0.4f, %0.4f, %0.4f ]\n\n", src_col, src_row, dst_col, dst_row, trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]);
		39	+
		40	+ }
		41	+
		42	+
		43	+ __global__ void cudaaffine_kernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){
		44	+ int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y;
		45	+ int dstColInd = blockIdx.x*blockDim.x+threadIdx.x;
		46	+ int dstIndex = dstRowInd*dst_cols + dstColInd;
		47	+
		48	+ //printf("Kernel Inv:\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n\n", trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]);
		49	+
		50	+ int srcRowInd;
		51	+ int srcColInd;
		52	+
		53	+ // don't do anything if the index is out of bounds
		54	+ if (dstRowInd < 1 \|\| dstRowInd >= dst_rows-1 \|\| dstColInd < 1 \|\| dstColInd >= dst_cols-1) {
		55	+ if (dstRowInd >= dst_rows \|\| dstColInd >= dst_cols) {
		56	+ return;
		57	+ } else{
		58	+ dstPtr[dstIndex] = 0;
		59	+ return;
		60	+ }
		61	+ }
		62	+
		63	+ cudaaffine_kernel_get_src_coord(trans_inv, dstRowInd, dstColInd, &srcRowInd, &srcColInd);
		64	+ const uint8_t cval = cudaaffine_kernel_get_pixel_value(srcRowInd, srcColInd, srcPtr, src_rows, src_cols); // Get initial pixel value
		65	+
		66	+ dstPtr[dstIndex] = cval;
		67	+ }
		68	+
		69	+ void cudaaffine_wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols) {
		70	+ cudaError_t err;
		71	+ double* gpuInverse;
		72	+
		73	+ dim3 threadsPerBlock(8, 8);
		74	+ dim3 numBlocks(dst_cols/threadsPerBlock.x + 1,
		75	+ dst_rows/threadsPerBlock.y + 1);
		76	+
		77	+ //************************************************************************
		78	+ // Input affine is a 2x3 Mat whose transpose is used in the computations
		79	+ // [x, y, 1] = [u, v, 1] [ a^T \| [0 0 1]^T ]
		80	+ // See "Digital Image Warping" by George Wolburg (p. 50)
		81	+ //************************************************************************
		82	+
		83	+ // get new transform elements
		84	+ double a11 = affineTransform.at<double>(0, 0);
		85	+ double a12 = affineTransform.at<double>(1, 0);
		86	+ double a21 = affineTransform.at<double>(0, 1);
		87	+ double a22 = affineTransform.at<double>(1, 1);
		88	+ double a31 = affineTransform.at<double>(0, 2);
		89	+ double a32 = affineTransform.at<double>(1, 2);
		90	+ // double a23 = 0;
		91	+ // double a13 = 0;
		92	+ // double a33 = 1;
		93	+
		94	+ // compute transform inverse
		95	+ double det = 1 / (a11a22 - a21a12);
		96	+
		97	+ double affineInverse[9];
		98	+ affineInverse[0] = a22 * det;
		99	+ affineInverse[1] = -a12 * det;
		100	+ affineInverse[2] = 0;
		101	+ affineInverse[3] = -a21 * det;
		102	+ affineInverse[4] = a11 * det;
		103	+ affineInverse[5] = 0;
		104	+ affineInverse[6] = (a21a32 - a31a22) * det;
		105	+ affineInverse[7] = (a31a12 - a11a32) * det;
		106	+ affineInverse[8] = (a11a22 - a21a12) * det;
		107	+
		108	+ // Move from affineTransform to gpuAffine (currently fake)
		109	+ // double fakeAffine[6];
		110	+ // fakeAffine[0] = affineTransform.at<double>(0, 0);
		111	+ // fakeAffine[1] = affineTransform.at<double>(0, 1);
		112	+ // fakeAffine[2] = affineTransform.at<double>(0, 2);
		113	+ // fakeAffine[3] = affineTransform.at<double>(1, 0);
		114	+ // fakeAffine[4] = affineTransform.at<double>(1, 1);
		115	+ // fakeAffine[5] = affineTransform.at<double>(1, 2);
		116	+
		117	+ // printf("\n");
		118	+ // printf("%f\t%f\t%f\n", a11, a12, 0.0);
		119	+ // printf("%f\t%f\t%f\n", a21, a22, 0.0);
		120	+ // printf("%f\t%f\t%f\n", a31, a32, 1.0);
		121	+ // printf("\n");
		122	+
		123	+ // printf("Affine Inverse:\n");
		124	+ // for(int i = 0; i < 3; i++){
		125	+ // for(int j = 0; j < 3; j++){
		126	+ // printf("%f\t", affineInverse[3*i + j]);
		127	+ // }
		128	+ // printf("\n");
		129	+ // }
		130	+
		131	+
		132	+ CUDA_SAFE_MALLOC(dstPtr, dst_rowsdst_colssizeof(uint8_t), &err);
		133	+ CUDA_SAFE_MALLOC(&gpuInverse, 33sizeof(double), &err);
		134	+
		135	+ CUDA_SAFE_MEMCPY(gpuInverse, affineInverse, 9*sizeof(double), cudaMemcpyHostToDevice, &err);
		136	+
		137	+ cudaaffine_kernel<<<numBlocks, threadsPerBlock>>>((uint8_t)srcPtr, (uint8_t)(*dstPtr), gpuInverse, src_rows, src_cols, dst_rows, dst_cols);
		138	+ CUDA_KERNEL_ERR_CHK(&err);
		139	+
		140	+ CUDA_SAFE_FREE(srcPtr, &err);
		141	+ CUDA_SAFE_FREE(gpuInverse, &err);
		142	+
		143	+ // printf("\n\n");
		144	+ // for(int i = 0; i < cols; i++){
		145	+ // for(int j = 0; j < src_rows; j++){
		146	+ // printf("%4d\t", ((uint8_t) dstPtr)[jcols + i]);
		147	+ // }
		148	+ // printf("\n");
		149	+ // }
		150	+ // printf("\n");
		151	+ }
		152	+} // end cuda
		153	+} // end br