Commit 34ee86e8f185e65ce3478f50db7ad5eebadb206f

Authored by DepthDeluxe
1 parent 4148de2f

created basic passthrough plugin (doesn't quite work yet)

openbr/plugins/cuda/passthrough.cpp
... ... @@ -6,7 +6,32 @@
6 6 using namespace cv;
7 7 using namespace cv::gpu;
8 8  
9   -extern void br_cuda_device_wrapper();
  9 +#include <iostream>
  10 +
  11 +extern void br_cuda_device_wrapper(GpuMat& src, GpuMat& dst);
  12 +
  13 +string type2str(int type) {
  14 + string r;
  15 +
  16 + uchar depth = type & CV_MAT_DEPTH_MASK;
  17 + uchar chans = 1 + (type >> CV_CN_SHIFT);
  18 +
  19 + switch ( depth ) {
  20 + case CV_8U: r = "8U"; break;
  21 + case CV_8S: r = "8S"; break;
  22 + case CV_16U: r = "16U"; break;
  23 + case CV_16S: r = "16S"; break;
  24 + case CV_32S: r = "32S"; break;
  25 + case CV_32F: r = "32F"; break;
  26 + case CV_64F: r = "64F"; break;
  27 + default: r = "User"; break;
  28 + }
  29 +
  30 + r += "C";
  31 + r += (chans+'0');
  32 +
  33 + return r;
  34 +}
10 35  
11 36 namespace br
12 37 {
... ... @@ -17,12 +42,14 @@ namespace br
17 42 private:
18 43 void project(const Template &src, Template &dst) const
19 44 {
  45 + // note: if you convert the image to grayscale, you get 8UC1
  46 +
20 47 // upload the src mat to the GPU
21 48 GpuMat srcGpuMat, dstGpuMat;
22 49 srcGpuMat.upload(src.m());
23 50 dstGpuMat.upload(src.m());
24 51  
25   - br_cuda_device_wrapper();
  52 + br_cuda_device_wrapper(srcGpuMat, dstGpuMat);
26 53  
27 54 dstGpuMat.download(dst.m());
28 55  
... ...
openbr/plugins/cuda/passthrough.cu
1   -__global__ void br_cuda_device_kernel() {
  1 +// note: Using 8-bit unsigned 1 channel images
2 2  
  3 +#include <opencv2/gpu/gpu.hpp>
  4 +
  5 +using namespace cv;
  6 +using namespace cv::gpu;
  7 +
  8 +__global__ void br_cuda_device_kernel(uint8_t* srcPtr, uint8_t* dstPtr, size_t srcStep, size_t dstStep, int cols, int rows) {
  9 + int rowInd = blockIdx.y*blockDim.y+threadIdx.y;
  10 + int colInd = blockIdx.x*blockDim.x+threadIdx.x;
  11 +
  12 + uint8_t srcVal = (srcPtr + rowInd*srcStep)[colInd];
  13 + uint8_t* rowDstPtr = dstPtr + rowInd*dstStep;
  14 +
  15 + rowDstPtr[colInd] = srcVal;
3 16 }
4 17  
5   -void br_cuda_device_wrapper() {
6   - br_cuda_device_kernel<<<1,1>>>();
  18 +void br_cuda_device_wrapper(GpuMat& src, GpuMat& dst) {
  19 + // convert the GpuMats to pointers
  20 + uint8_t* srcPtr = (uint8_t*)src.data;
  21 + uint8_t* dstPtr = (uint8_t*)dst.data;
  22 +
  23 + int imageWidth = src.cols;
  24 + int imageHeight = src.rows;
  25 +
  26 + // make 8 * 8 = 64 square block
  27 + dim3 threadsPerBlock(8, 8);
  28 + dim3 numBlocks(imageWidth / threadsPerBlock.x,
  29 + imageHeight / threadsPerBlock.y);
  30 +
  31 + br_cuda_device_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageWidth, imageHeight);
7 32 }
  33 +
  34 +// read http://stackoverflow.com/questions/31927297/array-of-ptrstepszgpumat-to-a-c-cuda-kernel
... ...