diff --git a/openbr/plugins/cuda/cudalbp.cpp b/openbr/plugins/cuda/cudalbp.cpp new file mode 100644 index 0000000..2d6d05f --- /dev/null +++ b/openbr/plugins/cuda/cudalbp.cpp @@ -0,0 +1,128 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright 2012 The MITRE Corporation * + * * + * Licensed under the Apache License, Version 2.0 (the "License"); * + * you may not use this file except in compliance with the License. * + * You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, software * + * distributed under the License is distributed on an "AS IS" BASIS, * + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * + * See the License for the specific language governing permissions and * + * limitations under the License. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#include +#include +#include +#include +#include + +#include + +#include "cudalbp.hpp" + +using namespace cv; + +namespace br +{ + +/*! + * \ingroup transforms + * \brief Convert the image into a feature vector using Local Binary Patterns + * \br_paper Ahonen, T.; Hadid, A.; Pietikainen, M.; + * "Face Description with Local Binary Patterns: Application to Face Recognition" + * Pattern Analysis and Machine Intelligence, IEEE Transactions, vol.28, no.12, pp.2037-2041, Dec. 2006 + * \author Josh Klontz \cite jklontz + */ +class CUDALBPTransform : public UntrainableTransform +{ + Q_OBJECT + Q_PROPERTY(int radius READ get_radius WRITE set_radius RESET reset_radius STORED false) + Q_PROPERTY(int maxTransitions READ get_maxTransitions WRITE set_maxTransitions RESET reset_maxTransitions STORED false) + Q_PROPERTY(bool rotationInvariant READ get_rotationInvariant WRITE set_rotationInvariant RESET reset_rotationInvariant STORED false) + BR_PROPERTY(int, radius, 1) + BR_PROPERTY(int, maxTransitions, 8) + BR_PROPERTY(bool, rotationInvariant, false) + + uchar lut[256]; + uint8_t* lutGpuPtr; + uchar null; + + /* Returns the number of 0->1 or 1->0 transitions in i */ + static int numTransitions(int i) + { + int transitions = 0; + int curParity = i%2; + for (int j=1; j<=8; j++) { + int parity = (i>>(j%8)) % 2; + if (parity != curParity) transitions++; + curParity = parity; + } + return transitions; + } + + static int rotationInvariantEquivalent(int i) + { + int min = std::numeric_limits::max(); + for (int j=0; j<8; j++) { + bool parity = i % 2; + i = i >> 1; + if (parity) i+=128; + min = std::min(min, i); + } + return min; + } + + void init() + { + bool set[256]; + uchar uid = 0; + for (int i=0; i<256; i++) { + if (numTransitions(i) <= maxTransitions) { + int id; + if (rotationInvariant) { + int rie = rotationInvariantEquivalent(i); + if (i == rie) id = uid++; + else id = lut[rie]; + } else id = uid++; + lut[i] = id; + set[i] = true; + } else { + set[i] = false; + } + } + + null = uid; + for (int i=0; i<256; i++) + if (!set[i]) + lut[i] = null; // Set to null id + + // copy lut over to the GPU + br::cuda::cudalbp_init_wrapper(lut, &lutGpuPtr); + } + + void project(const Template &src, Template &dst) const + { + // assume we are using an 8-bit 1 channel image + GpuMat srcGpuMat, dstGpuMat; + + // copy the data to the GPU + srcGpuMat.upload(src.m()); + dstGpuMat.upload(src.m()); + + // call the kernel function + br::cuda::cudalbp_wrapper(srcGpuMat, dstGpuMat, lutGpuPtr); + + // download the result + dstGpuMat.download(dst.m()); + } +}; + +BR_REGISTER(Transform, CUDALBPTransform) + +} // namespace br + +#include "cuda/cudalbp.moc" diff --git a/openbr/plugins/cuda/cudalbp.cu b/openbr/plugins/cuda/cudalbp.cu new file mode 100644 index 0000000..81007b6 --- /dev/null +++ b/openbr/plugins/cuda/cudalbp.cu @@ -0,0 +1,65 @@ +#include +#include + +using namespace cv; +using namespace cv::gpu; + +#include "cudalbp.hpp" + +namespace br { namespace cuda { + __device__ __forceinline__ uint8_t cudalbp_kernel_get_pixel_value(int row, int col, uint8_t* srcPtr, size_t srcStep, int rows, int cols) { + return (row >= rows || col >= cols) ? 0 : (srcPtr + row*srcStep)[col]; + } + + __global__ void cudalbp_kernel(uint8_t* srcPtr, uint8_t* dstPtr, size_t srcStep, size_t dstStep, int rows, int cols, uint8_t* lut) + { + int rowInd = blockIdx.y*blockDim.y+threadIdx.y; + int colInd = blockIdx.x*blockDim.x+threadIdx.x; + int radius = 1; + + // don't do anything if the index is out of bounds + if (rowInd >= rows || colInd >= cols) + return; + + const uint8_t cval = cudalbp_kernel_get_pixel_value(rowInd+0*radius, colInd+0*radius, srcPtr, srcStep, rows, cols);//(srcPtr[(rowInd*srcStep+0*radius)*m.cols+colInd+0*radius]); // center value + uint8_t val = lut[(cudalbp_kernel_get_pixel_value(rowInd-1*radius, colInd-1*radius, srcPtr, srcStep, rows, cols) >= cval ? 128 : 0) | + (cudalbp_kernel_get_pixel_value(rowInd-1*radius, colInd+0*radius, srcPtr, srcStep, rows, cols) >= cval ? 64 : 0) | + (cudalbp_kernel_get_pixel_value(rowInd-1*radius, colInd+1*radius, srcPtr, srcStep, rows, cols) >= cval ? 32 : 0) | + (cudalbp_kernel_get_pixel_value(rowInd+0*radius, colInd+1*radius, srcPtr, srcStep, rows, cols) >= cval ? 16 : 0) | + (cudalbp_kernel_get_pixel_value(rowInd+1*radius, colInd+1*radius, srcPtr, srcStep, rows, cols) >= cval ? 8 : 0) | + (cudalbp_kernel_get_pixel_value(rowInd+1*radius, colInd+0*radius, srcPtr, srcStep, rows, cols) >= cval ? 4 : 0) | + (cudalbp_kernel_get_pixel_value(rowInd+1*radius, colInd-1*radius, srcPtr, srcStep, rows, cols) >= cval ? 2 : 0) | + (cudalbp_kernel_get_pixel_value(rowInd+0*radius, colInd-1*radius, srcPtr, srcStep, rows, cols) >= cval ? 1 : 0)]; + + // store calculated value away in the right place + uint8_t* dstRowPtr = dstPtr + rowInd*dstStep; + dstRowPtr[colInd] = val; + } + + void cudalbp_wrapper(GpuMat& src, GpuMat& dst, uint8_t* lut) + { + // convert the GpuMats to pointers + uint8_t* srcPtr = (uint8_t*)src.data; + uint8_t* dstPtr = (uint8_t*)dst.data; + + int imageWidth = src.cols; + int imageHeight = src.rows; + + // make 8 * 8 = 64 square block + dim3 threadsPerBlock(8, 8); + + dim3 numBlocks(imageWidth/threadsPerBlock.x + 1, + imageHeight/threadsPerBlock.y + 1); + + printf("Src Image Dimesions:\n\trows: %d\tcols: %d\n", src.rows, src.cols); + printf("Dst Image Dimesions:\n\trows: %d\tcols: %d\n", dst.rows, dst.cols); + printf("Running CUDALBP\nBlock Dimensions:\n\tx: %d\ty: %d\n", numBlocks.x, numBlocks.y); + + cudalbp_kernel<<>>(srcPtr, dstPtr, src.step, dst.step, imageHeight, imageWidth, lut); + } + + void cudalbp_init_wrapper(uint8_t* lut, uint8_t** lutGpuPtrPtr) { + cudaMalloc(lutGpuPtrPtr, 256*sizeof(uint8_t)); + cudaMemcpy(*lutGpuPtrPtr, lut, 256*sizeof(uint8_t), cudaMemcpyHostToDevice); + } +}} diff --git a/openbr/plugins/cuda/cudalbp.hpp b/openbr/plugins/cuda/cudalbp.hpp new file mode 100644 index 0000000..f6569be --- /dev/null +++ b/openbr/plugins/cuda/cudalbp.hpp @@ -0,0 +1,9 @@ +#include + +using namespace cv; +using namespace cv::gpu; + +namespace br { namespace cuda { + void cudalbp_init_wrapper(uint8_t* lut, uint8_t** lutGpuPtrPtr); + void cudalbp_wrapper(GpuMat& src, GpuMat& dst, uint8_t* lut); +}}