From 34ee86e8f185e65ce3478f50db7ad5eebadb206f Mon Sep 17 00:00:00 2001 From: DepthDeluxe Date: Mon, 25 Jan 2016 15:51:44 -0500 Subject: [PATCH] created basic passthrough plugin (doesn't quite work yet) --- openbr/plugins/cuda/passthrough.cpp | 31 +++++++++++++++++++++++++++++-- openbr/plugins/cuda/passthrough.cu | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 59 insertions(+), 5 deletions(-) diff --git a/openbr/plugins/cuda/passthrough.cpp b/openbr/plugins/cuda/passthrough.cpp index 13b9f19..f72a4a8 100644 --- a/openbr/plugins/cuda/passthrough.cpp +++ b/openbr/plugins/cuda/passthrough.cpp @@ -6,7 +6,32 @@ using namespace cv; using namespace cv::gpu; -extern void br_cuda_device_wrapper(); +#include + +extern void br_cuda_device_wrapper(GpuMat& src, GpuMat& dst); + +string type2str(int type) { + string r; + + uchar depth = type & CV_MAT_DEPTH_MASK; + uchar chans = 1 + (type >> CV_CN_SHIFT); + + switch ( depth ) { + case CV_8U: r = "8U"; break; + case CV_8S: r = "8S"; break; + case CV_16U: r = "16U"; break; + case CV_16S: r = "16S"; break; + case CV_32S: r = "32S"; break; + case CV_32F: r = "32F"; break; + case CV_64F: r = "64F"; break; + default: r = "User"; break; + } + + r += "C"; + r += (chans+'0'); + + return r; +} namespace br { @@ -17,12 +42,14 @@ namespace br private: void project(const Template &src, Template &dst) const { + // note: if you convert the image to grayscale, you get 8UC1 + // upload the src mat to the GPU GpuMat srcGpuMat, dstGpuMat; srcGpuMat.upload(src.m()); dstGpuMat.upload(src.m()); - br_cuda_device_wrapper(); + br_cuda_device_wrapper(srcGpuMat, dstGpuMat); dstGpuMat.download(dst.m()); diff --git a/openbr/plugins/cuda/passthrough.cu b/openbr/plugins/cuda/passthrough.cu index 983a24e..1e593db 100644 --- a/openbr/plugins/cuda/passthrough.cu +++ b/openbr/plugins/cuda/passthrough.cu @@ -1,7 +1,34 @@ -__global__ void br_cuda_device_kernel() { +// note: Using 8-bit unsigned 1 channel images +#include + +using namespace cv; +using namespace cv::gpu; + +__global__ void br_cuda_device_kernel(uint8_t* srcPtr, uint8_t* dstPtr, size_t srcStep, size_t dstStep, int cols, int rows) { + int rowInd = blockIdx.y*blockDim.y+threadIdx.y; + int colInd = blockIdx.x*blockDim.x+threadIdx.x; + + uint8_t srcVal = (srcPtr + rowInd*srcStep)[colInd]; + uint8_t* rowDstPtr = dstPtr + rowInd*dstStep; + + rowDstPtr[colInd] = srcVal; } -void br_cuda_device_wrapper() { - br_cuda_device_kernel<<<1,1>>>(); +void br_cuda_device_wrapper(GpuMat& src, GpuMat& dst) { + // convert the GpuMats to pointers + uint8_t* srcPtr = (uint8_t*)src.data; + uint8_t* dstPtr = (uint8_t*)dst.data; + + int imageWidth = src.cols; + int imageHeight = src.rows; + + // make 8 * 8 = 64 square block + dim3 threadsPerBlock(8, 8); + dim3 numBlocks(imageWidth / threadsPerBlock.x, + imageHeight / threadsPerBlock.y); + + br_cuda_device_kernel<<>>(srcPtr, dstPtr, src.step, dst.step, imageWidth, imageHeight); } + +// read http://stackoverflow.com/questions/31927297/array-of-ptrstepszgpumat-to-a-c-cuda-kernel -- libgit2 0.21.4