Commit cc916f3d6d5f1504b15703086395eedd4c853aa3

Authored by DepthDeluxe
1 parent 32efc928

added preliminary CUDALBP algorithm

openbr/plugins/cuda/cudalbp.cpp 0 โ†’ 100644
  1 +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
  2 + * Copyright 2012 The MITRE Corporation *
  3 + * *
  4 + * Licensed under the Apache License, Version 2.0 (the "License"); *
  5 + * you may not use this file except in compliance with the License. *
  6 + * You may obtain a copy of the License at *
  7 + * *
  8 + * http://www.apache.org/licenses/LICENSE-2.0 *
  9 + * *
  10 + * Unless required by applicable law or agreed to in writing, software *
  11 + * distributed under the License is distributed on an "AS IS" BASIS, *
  12 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
  13 + * See the License for the specific language governing permissions and *
  14 + * limitations under the License. *
  15 + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
  16 +
  17 +#include <opencv2/imgproc/imgproc.hpp>
  18 +#include <opencv2/imgproc/imgproc_c.h>
  19 +#include <opencv2/highgui/highgui.hpp>
  20 +#include <opencv2/highgui/highgui_c.h>
  21 +#include <limits>
  22 +
  23 +#include <openbr/plugins/openbr_internal.h>
  24 +
  25 +#include "cudalbp.hpp"
  26 +
  27 +using namespace cv;
  28 +
  29 +namespace br
  30 +{
  31 +
  32 +/*!
  33 + * \ingroup transforms
  34 + * \brief Convert the image into a feature vector using Local Binary Patterns
  35 + * \br_paper Ahonen, T.; Hadid, A.; Pietikainen, M.;
  36 + * "Face Description with Local Binary Patterns: Application to Face Recognition"
  37 + * Pattern Analysis and Machine Intelligence, IEEE Transactions, vol.28, no.12, pp.2037-2041, Dec. 2006
  38 + * \author Josh Klontz \cite jklontz
  39 + */
  40 +class CUDALBPTransform : public UntrainableTransform
  41 +{
  42 + Q_OBJECT
  43 + Q_PROPERTY(int radius READ get_radius WRITE set_radius RESET reset_radius STORED false)
  44 + Q_PROPERTY(int maxTransitions READ get_maxTransitions WRITE set_maxTransitions RESET reset_maxTransitions STORED false)
  45 + Q_PROPERTY(bool rotationInvariant READ get_rotationInvariant WRITE set_rotationInvariant RESET reset_rotationInvariant STORED false)
  46 + BR_PROPERTY(int, radius, 1)
  47 + BR_PROPERTY(int, maxTransitions, 8)
  48 + BR_PROPERTY(bool, rotationInvariant, false)
  49 +
  50 + uchar lut[256];
  51 + uint8_t* lutGpuPtr;
  52 + uchar null;
  53 +
  54 + /* Returns the number of 0->1 or 1->0 transitions in i */
  55 + static int numTransitions(int i)
  56 + {
  57 + int transitions = 0;
  58 + int curParity = i%2;
  59 + for (int j=1; j<=8; j++) {
  60 + int parity = (i>>(j%8)) % 2;
  61 + if (parity != curParity) transitions++;
  62 + curParity = parity;
  63 + }
  64 + return transitions;
  65 + }
  66 +
  67 + static int rotationInvariantEquivalent(int i)
  68 + {
  69 + int min = std::numeric_limits<int>::max();
  70 + for (int j=0; j<8; j++) {
  71 + bool parity = i % 2;
  72 + i = i >> 1;
  73 + if (parity) i+=128;
  74 + min = std::min(min, i);
  75 + }
  76 + return min;
  77 + }
  78 +
  79 + void init()
  80 + {
  81 + bool set[256];
  82 + uchar uid = 0;
  83 + for (int i=0; i<256; i++) {
  84 + if (numTransitions(i) <= maxTransitions) {
  85 + int id;
  86 + if (rotationInvariant) {
  87 + int rie = rotationInvariantEquivalent(i);
  88 + if (i == rie) id = uid++;
  89 + else id = lut[rie];
  90 + } else id = uid++;
  91 + lut[i] = id;
  92 + set[i] = true;
  93 + } else {
  94 + set[i] = false;
  95 + }
  96 + }
  97 +
  98 + null = uid;
  99 + for (int i=0; i<256; i++)
  100 + if (!set[i])
  101 + lut[i] = null; // Set to null id
  102 +
  103 + // copy lut over to the GPU
  104 + br::cuda::cudalbp_init_wrapper(lut, &lutGpuPtr);
  105 + }
  106 +
  107 + void project(const Template &src, Template &dst) const
  108 + {
  109 + // assume we are using an 8-bit 1 channel image
  110 + GpuMat srcGpuMat, dstGpuMat;
  111 +
  112 + // copy the data to the GPU
  113 + srcGpuMat.upload(src.m());
  114 + dstGpuMat.upload(src.m());
  115 +
  116 + // call the kernel function
  117 + br::cuda::cudalbp_wrapper(srcGpuMat, dstGpuMat, lutGpuPtr);
  118 +
  119 + // download the result
  120 + dstGpuMat.download(dst.m());
  121 + }
  122 +};
  123 +
  124 +BR_REGISTER(Transform, CUDALBPTransform)
  125 +
  126 +} // namespace br
  127 +
  128 +#include "cuda/cudalbp.moc"
openbr/plugins/cuda/cudalbp.cu 0 โ†’ 100644
  1 +#include <opencv2/gpu/gpu.hpp>
  2 +#include <stdio.h>
  3 +
  4 +using namespace cv;
  5 +using namespace cv::gpu;
  6 +
  7 +#include "cudalbp.hpp"
  8 +
  9 +namespace br { namespace cuda {
  10 + __device__ __forceinline__ uint8_t cudalbp_kernel_get_pixel_value(int row, int col, uint8_t* srcPtr, size_t srcStep, int rows, int cols) {
  11 + return (row >= rows || col >= cols) ? 0 : (srcPtr + row*srcStep)[col];
  12 + }
  13 +
  14 + __global__ void cudalbp_kernel(uint8_t* srcPtr, uint8_t* dstPtr, size_t srcStep, size_t dstStep, int rows, int cols, uint8_t* lut)
  15 + {
  16 + int rowInd = blockIdx.y*blockDim.y+threadIdx.y;
  17 + int colInd = blockIdx.x*blockDim.x+threadIdx.x;
  18 + int radius = 1;
  19 +
  20 + // don't do anything if the index is out of bounds
  21 + if (rowInd >= rows || colInd >= cols)
  22 + return;
  23 +
  24 + const uint8_t cval = cudalbp_kernel_get_pixel_value(rowInd+0*radius, colInd+0*radius, srcPtr, srcStep, rows, cols);//(srcPtr[(rowInd*srcStep+0*radius)*m.cols+colInd+0*radius]); // center value
  25 + uint8_t val = lut[(cudalbp_kernel_get_pixel_value(rowInd-1*radius, colInd-1*radius, srcPtr, srcStep, rows, cols) >= cval ? 128 : 0) |
  26 + (cudalbp_kernel_get_pixel_value(rowInd-1*radius, colInd+0*radius, srcPtr, srcStep, rows, cols) >= cval ? 64 : 0) |
  27 + (cudalbp_kernel_get_pixel_value(rowInd-1*radius, colInd+1*radius, srcPtr, srcStep, rows, cols) >= cval ? 32 : 0) |
  28 + (cudalbp_kernel_get_pixel_value(rowInd+0*radius, colInd+1*radius, srcPtr, srcStep, rows, cols) >= cval ? 16 : 0) |
  29 + (cudalbp_kernel_get_pixel_value(rowInd+1*radius, colInd+1*radius, srcPtr, srcStep, rows, cols) >= cval ? 8 : 0) |
  30 + (cudalbp_kernel_get_pixel_value(rowInd+1*radius, colInd+0*radius, srcPtr, srcStep, rows, cols) >= cval ? 4 : 0) |
  31 + (cudalbp_kernel_get_pixel_value(rowInd+1*radius, colInd-1*radius, srcPtr, srcStep, rows, cols) >= cval ? 2 : 0) |
  32 + (cudalbp_kernel_get_pixel_value(rowInd+0*radius, colInd-1*radius, srcPtr, srcStep, rows, cols) >= cval ? 1 : 0)];
  33 +
  34 + // store calculated value away in the right place
  35 + uint8_t* dstRowPtr = dstPtr + rowInd*dstStep;
  36 + dstRowPtr[colInd] = val;
  37 + }
  38 +
  39 + void cudalbp_wrapper(GpuMat& src, GpuMat& dst, uint8_t* lut)
  40 + {
  41 + // convert the GpuMats to pointers
  42 + uint8_t* srcPtr = (uint8_t*)src.data;
  43 + uint8_t* dstPtr = (uint8_t*)dst.data;
  44 +
  45 + int imageWidth = src.cols;
  46 + int imageHeight = src.rows;
  47 +
  48 + // make 8 * 8 = 64 square block
  49 + dim3 threadsPerBlock(8, 8);
  50 +
  51 + dim3 numBlocks(imageWidth/threadsPerBlock.x + 1,
  52 + imageHeight/threadsPerBlock.y + 1);
  53 +
  54 + printf("Src Image Dimesions:\n\trows: %d\tcols: %d\n", src.rows, src.cols);
  55 + printf("Dst Image Dimesions:\n\trows: %d\tcols: %d\n", dst.rows, dst.cols);
  56 + printf("Running CUDALBP\nBlock Dimensions:\n\tx: %d\ty: %d\n", numBlocks.x, numBlocks.y);
  57 +
  58 + cudalbp_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageHeight, imageWidth, lut);
  59 + }
  60 +
  61 + void cudalbp_init_wrapper(uint8_t* lut, uint8_t** lutGpuPtrPtr) {
  62 + cudaMalloc(lutGpuPtrPtr, 256*sizeof(uint8_t));
  63 + cudaMemcpy(*lutGpuPtrPtr, lut, 256*sizeof(uint8_t), cudaMemcpyHostToDevice);
  64 + }
  65 +}}
openbr/plugins/cuda/cudalbp.hpp 0 โ†’ 100644
  1 +#include <opencv2/gpu/gpu.hpp>
  2 +
  3 +using namespace cv;
  4 +using namespace cv::gpu;
  5 +
  6 +namespace br { namespace cuda {
  7 + void cudalbp_init_wrapper(uint8_t* lut, uint8_t** lutGpuPtrPtr);
  8 + void cudalbp_wrapper(GpuMat& src, GpuMat& dst, uint8_t* lut);
  9 +}}