Commit edd8d3e4ecccb56ab658bc98eb9d94873aa4cf17

Authored by DepthDeluxe
1 parent 34ee86e8

added proposed file structure for CUDA plugins

openbr/plugins/cuda/passthrough.cpp
... ... @@ -6,9 +6,9 @@
6 6 using namespace cv;
7 7 using namespace cv::gpu;
8 8  
9   -#include <iostream>
  9 +#include "passthrough.hpp"
10 10  
11   -extern void br_cuda_device_wrapper(GpuMat& src, GpuMat& dst);
  11 +#include <iostream>
12 12  
13 13 string type2str(int type) {
14 14 string r;
... ... @@ -49,7 +49,7 @@ private:
49 49 srcGpuMat.upload(src.m());
50 50 dstGpuMat.upload(src.m());
51 51  
52   - br_cuda_device_wrapper(srcGpuMat, dstGpuMat);
  52 + br::cuda::passthrough_wrapper(srcGpuMat, dstGpuMat);
53 53  
54 54 dstGpuMat.download(dst.m());
55 55  
... ...
openbr/plugins/cuda/passthrough.cu
... ... @@ -5,30 +5,35 @@
5 5 using namespace cv;
6 6 using namespace cv::gpu;
7 7  
8   -__global__ void br_cuda_device_kernel(uint8_t* srcPtr, uint8_t* dstPtr, size_t srcStep, size_t dstStep, int cols, int rows) {
9   - int rowInd = blockIdx.y*blockDim.y+threadIdx.y;
10   - int colInd = blockIdx.x*blockDim.x+threadIdx.x;
  8 +#include "passthrough.hpp"
11 9  
12   - uint8_t srcVal = (srcPtr + rowInd*srcStep)[colInd];
13   - uint8_t* rowDstPtr = dstPtr + rowInd*dstStep;
  10 +namespace br { namespace cuda {
  11 + __global__ void passthrough_kernel(uint8_t* srcPtr, uint8_t* dstPtr, size_t srcStep, size_t dstStep, int cols, int rows) {
  12 + int rowInd = blockIdx.y*blockDim.y+threadIdx.y;
  13 + int colInd = blockIdx.x*blockDim.x+threadIdx.x;
14 14  
15   - rowDstPtr[colInd] = srcVal;
16   -}
  15 + uint8_t srcVal = (srcPtr + rowInd*srcStep)[colInd];
  16 + uint8_t* rowDstPtr = dstPtr + rowInd*dstStep;
17 17  
18   -void br_cuda_device_wrapper(GpuMat& src, GpuMat& dst) {
19   - // convert the GpuMats to pointers
20   - uint8_t* srcPtr = (uint8_t*)src.data;
21   - uint8_t* dstPtr = (uint8_t*)dst.data;
  18 + rowDstPtr[colInd] = srcVal;
  19 + }
22 20  
23   - int imageWidth = src.cols;
24   - int imageHeight = src.rows;
  21 + void passthrough_wrapper(GpuMat& src, GpuMat& dst) {
  22 + // convert the GpuMats to pointers
  23 + uint8_t* srcPtr = (uint8_t*)src.data;
  24 + uint8_t* dstPtr = (uint8_t*)dst.data;
25 25  
26   - // make 8 * 8 = 64 square block
27   - dim3 threadsPerBlock(8, 8);
28   - dim3 numBlocks(imageWidth / threadsPerBlock.x,
29   - imageHeight / threadsPerBlock.y);
  26 + int imageWidth = src.cols;
  27 + int imageHeight = src.rows;
  28 +
  29 + // make 8 * 8 = 64 square block
  30 + dim3 threadsPerBlock(8, 8);
  31 + dim3 numBlocks(imageWidth / threadsPerBlock.x,
  32 + imageHeight / threadsPerBlock.y);
  33 +
  34 + passthrough_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageWidth, imageHeight);
  35 + }
  36 +}}
30 37  
31   - br_cuda_device_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageWidth, imageHeight);
32   -}
33 38  
34 39 // read http://stackoverflow.com/questions/31927297/array-of-ptrstepszgpumat-to-a-c-cuda-kernel
... ...
openbr/plugins/cuda/passthrough.hpp 0 → 100644
  1 +namespace br { namespace cuda {
  2 + void passthrough_wrapper(GpuMat& src, GpuMat& dst);
  3 +}}
... ...