Peter M. Groen / openbr

Browse Code »

Commit cc916f3d6d5f1504b15703086395eedd4c853aa3

Authored by DepthDeluxe 2016-01-27 13:09:49 -0500

1 parent 32efc928

added preliminary CUDALBP algorithm

Inline Side-by-side

Showing 3 changed files with 202 additions and 0 deletions

openbr/plugins/cuda/cudalbp.cpp 0 → 100644

View file @cc916f3

		1	+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
		2	+ * Copyright 2012 The MITRE Corporation *
		3	+ * *
		4	+ * Licensed under the Apache License, Version 2.0 (the "License"); *
		5	+ * you may not use this file except in compliance with the License. *
		6	+ * You may obtain a copy of the License at *
		7	+ * *
		8	+ * http://www.apache.org/licenses/LICENSE-2.0 *
		9	+ * *
		10	+ * Unless required by applicable law or agreed to in writing, software *
		11	+ * distributed under the License is distributed on an "AS IS" BASIS, *
		12	+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
		13	+ * See the License for the specific language governing permissions and *
		14	+ * limitations under the License. *
		15	+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
		16	+
		17	+#include <opencv2/imgproc/imgproc.hpp>
		18	+#include <opencv2/imgproc/imgproc_c.h>
		19	+#include <opencv2/highgui/highgui.hpp>
		20	+#include <opencv2/highgui/highgui_c.h>
		21	+#include <limits>
		22	+
		23	+#include <openbr/plugins/openbr_internal.h>
		24	+
		25	+#include "cudalbp.hpp"
		26	+
		27	+using namespace cv;
		28	+
		29	+namespace br
		30	+{
		31	+
		32	+/*!
		33	+ * \ingroup transforms
		34	+ * \brief Convert the image into a feature vector using Local Binary Patterns
		35	+ * \br_paper Ahonen, T.; Hadid, A.; Pietikainen, M.;
		36	+ * "Face Description with Local Binary Patterns: Application to Face Recognition"
		37	+ * Pattern Analysis and Machine Intelligence, IEEE Transactions, vol.28, no.12, pp.2037-2041, Dec. 2006
		38	+ * \author Josh Klontz \cite jklontz
		39	+ */
		40	+class CUDALBPTransform : public UntrainableTransform
		41	+{
		42	+ Q_OBJECT
		43	+ Q_PROPERTY(int radius READ get_radius WRITE set_radius RESET reset_radius STORED false)
		44	+ Q_PROPERTY(int maxTransitions READ get_maxTransitions WRITE set_maxTransitions RESET reset_maxTransitions STORED false)
		45	+ Q_PROPERTY(bool rotationInvariant READ get_rotationInvariant WRITE set_rotationInvariant RESET reset_rotationInvariant STORED false)
		46	+ BR_PROPERTY(int, radius, 1)
		47	+ BR_PROPERTY(int, maxTransitions, 8)
		48	+ BR_PROPERTY(bool, rotationInvariant, false)
		49	+
		50	+ uchar lut[256];
		51	+ uint8_t* lutGpuPtr;
		52	+ uchar null;
		53	+
		54	+ /* Returns the number of 0->1 or 1->0 transitions in i */
		55	+ static int numTransitions(int i)
		56	+ {
		57	+ int transitions = 0;
		58	+ int curParity = i%2;
		59	+ for (int j=1; j<=8; j++) {
		60	+ int parity = (i>>(j%8)) % 2;
		61	+ if (parity != curParity) transitions++;
		62	+ curParity = parity;
		63	+ }
		64	+ return transitions;
		65	+ }
		66	+
		67	+ static int rotationInvariantEquivalent(int i)
		68	+ {
		69	+ int min = std::numeric_limits<int>::max();
		70	+ for (int j=0; j<8; j++) {
		71	+ bool parity = i % 2;
		72	+ i = i >> 1;
		73	+ if (parity) i+=128;
		74	+ min = std::min(min, i);
		75	+ }
		76	+ return min;
		77	+ }
		78	+
		79	+ void init()
		80	+ {
		81	+ bool set[256];
		82	+ uchar uid = 0;
		83	+ for (int i=0; i<256; i++) {
		84	+ if (numTransitions(i) <= maxTransitions) {
		85	+ int id;
		86	+ if (rotationInvariant) {
		87	+ int rie = rotationInvariantEquivalent(i);
		88	+ if (i == rie) id = uid++;
		89	+ else id = lut[rie];
		90	+ } else id = uid++;
		91	+ lut[i] = id;
		92	+ set[i] = true;
		93	+ } else {
		94	+ set[i] = false;
		95	+ }
		96	+ }
		97	+
		98	+ null = uid;
		99	+ for (int i=0; i<256; i++)
		100	+ if (!set[i])
		101	+ lut[i] = null; // Set to null id
		102	+
		103	+ // copy lut over to the GPU
		104	+ br::cuda::cudalbp_init_wrapper(lut, &lutGpuPtr);
		105	+ }
		106	+
		107	+ void project(const Template &src, Template &dst) const
		108	+ {
		109	+ // assume we are using an 8-bit 1 channel image
		110	+ GpuMat srcGpuMat, dstGpuMat;
		111	+
		112	+ // copy the data to the GPU
		113	+ srcGpuMat.upload(src.m());
		114	+ dstGpuMat.upload(src.m());
		115	+
		116	+ // call the kernel function
		117	+ br::cuda::cudalbp_wrapper(srcGpuMat, dstGpuMat, lutGpuPtr);
		118	+
		119	+ // download the result
		120	+ dstGpuMat.download(dst.m());
		121	+ }
		122	+};
		123	+
		124	+BR_REGISTER(Transform, CUDALBPTransform)
		125	+
		126	+} // namespace br
		127	+
		128	+#include "cuda/cudalbp.moc"

openbr/plugins/cuda/cudalbp.cu 0 → 100644

View file @cc916f3

		1	+#include <opencv2/gpu/gpu.hpp>
		2	+#include <stdio.h>
		3	+
		4	+using namespace cv;
		5	+using namespace cv::gpu;
		6	+
		7	+#include "cudalbp.hpp"
		8	+
		9	+namespace br { namespace cuda {
		10	+ __device__ __forceinline__ uint8_t cudalbp_kernel_get_pixel_value(int row, int col, uint8_t* srcPtr, size_t srcStep, int rows, int cols) {
		11	+ return (row >= rows \|\| col >= cols) ? 0 : (srcPtr + row*srcStep)[col];
		12	+ }
		13	+
		14	+ __global__ void cudalbp_kernel(uint8_t* srcPtr, uint8_t* dstPtr, size_t srcStep, size_t dstStep, int rows, int cols, uint8_t* lut)
		15	+ {
		16	+ int rowInd = blockIdx.y*blockDim.y+threadIdx.y;
		17	+ int colInd = blockIdx.x*blockDim.x+threadIdx.x;
		18	+ int radius = 1;
		19	+
		20	+ // don't do anything if the index is out of bounds
		21	+ if (rowInd >= rows \|\| colInd >= cols)
		22	+ return;
		23	+
		24	+ const uint8_t cval = cudalbp_kernel_get_pixel_value(rowInd+0radius, colInd+0radius, srcPtr, srcStep, rows, cols);//(srcPtr[(rowIndsrcStep+0radius)m.cols+colInd+0radius]); // center value
		25	+ uint8_t val = lut[(cudalbp_kernel_get_pixel_value(rowInd-1radius, colInd-1radius, srcPtr, srcStep, rows, cols) >= cval ? 128 : 0) \|
		26	+ (cudalbp_kernel_get_pixel_value(rowInd-1radius, colInd+0radius, srcPtr, srcStep, rows, cols) >= cval ? 64 : 0) \|
		27	+ (cudalbp_kernel_get_pixel_value(rowInd-1radius, colInd+1radius, srcPtr, srcStep, rows, cols) >= cval ? 32 : 0) \|
		28	+ (cudalbp_kernel_get_pixel_value(rowInd+0radius, colInd+1radius, srcPtr, srcStep, rows, cols) >= cval ? 16 : 0) \|
		29	+ (cudalbp_kernel_get_pixel_value(rowInd+1radius, colInd+1radius, srcPtr, srcStep, rows, cols) >= cval ? 8 : 0) \|
		30	+ (cudalbp_kernel_get_pixel_value(rowInd+1radius, colInd+0radius, srcPtr, srcStep, rows, cols) >= cval ? 4 : 0) \|
		31	+ (cudalbp_kernel_get_pixel_value(rowInd+1radius, colInd-1radius, srcPtr, srcStep, rows, cols) >= cval ? 2 : 0) \|
		32	+ (cudalbp_kernel_get_pixel_value(rowInd+0radius, colInd-1radius, srcPtr, srcStep, rows, cols) >= cval ? 1 : 0)];
		33	+
		34	+ // store calculated value away in the right place
		35	+ uint8_t* dstRowPtr = dstPtr + rowInd*dstStep;
		36	+ dstRowPtr[colInd] = val;
		37	+ }
		38	+
		39	+ void cudalbp_wrapper(GpuMat& src, GpuMat& dst, uint8_t* lut)
		40	+ {
		41	+ // convert the GpuMats to pointers
		42	+ uint8_t* srcPtr = (uint8_t*)src.data;
		43	+ uint8_t* dstPtr = (uint8_t*)dst.data;
		44	+
		45	+ int imageWidth = src.cols;
		46	+ int imageHeight = src.rows;
		47	+
		48	+ // make 8 * 8 = 64 square block
		49	+ dim3 threadsPerBlock(8, 8);
		50	+
		51	+ dim3 numBlocks(imageWidth/threadsPerBlock.x + 1,
		52	+ imageHeight/threadsPerBlock.y + 1);
		53	+
		54	+ printf("Src Image Dimesions:\n\trows: %d\tcols: %d\n", src.rows, src.cols);
		55	+ printf("Dst Image Dimesions:\n\trows: %d\tcols: %d\n", dst.rows, dst.cols);
		56	+ printf("Running CUDALBP\nBlock Dimensions:\n\tx: %d\ty: %d\n", numBlocks.x, numBlocks.y);
		57	+
		58	+ cudalbp_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageHeight, imageWidth, lut);
		59	+ }
		60	+
		61	+ void cudalbp_init_wrapper(uint8_t* lut, uint8_t** lutGpuPtrPtr) {
		62	+ cudaMalloc(lutGpuPtrPtr, 256*sizeof(uint8_t));
		63	+ cudaMemcpy(lutGpuPtrPtr, lut, 256sizeof(uint8_t), cudaMemcpyHostToDevice);
		64	+ }
		65	+}}

openbr/plugins/cuda/cudalbp.hpp 0 → 100644

View file @cc916f3

		1	+#include <opencv2/gpu/gpu.hpp>
		2	+
		3	+using namespace cv;
		4	+using namespace cv::gpu;
		5	+
		6	+namespace br { namespace cuda {
		7	+ void cudalbp_init_wrapper(uint8_t* lut, uint8_t** lutGpuPtrPtr);
		8	+ void cudalbp_wrapper(GpuMat& src, GpuMat& dst, uint8_t* lut);
		9	+}}