Commit edd8d3e4ecccb56ab658bc98eb9d94873aa4cf17
1 parent
34ee86e8
added proposed file structure for CUDA plugins
Showing
3 changed files
with
30 additions
and
22 deletions
openbr/plugins/cuda/passthrough.cpp
| @@ -6,9 +6,9 @@ | @@ -6,9 +6,9 @@ | ||
| 6 | using namespace cv; | 6 | using namespace cv; |
| 7 | using namespace cv::gpu; | 7 | using namespace cv::gpu; |
| 8 | 8 | ||
| 9 | -#include <iostream> | 9 | +#include "passthrough.hpp" |
| 10 | 10 | ||
| 11 | -extern void br_cuda_device_wrapper(GpuMat& src, GpuMat& dst); | 11 | +#include <iostream> |
| 12 | 12 | ||
| 13 | string type2str(int type) { | 13 | string type2str(int type) { |
| 14 | string r; | 14 | string r; |
| @@ -49,7 +49,7 @@ private: | @@ -49,7 +49,7 @@ private: | ||
| 49 | srcGpuMat.upload(src.m()); | 49 | srcGpuMat.upload(src.m()); |
| 50 | dstGpuMat.upload(src.m()); | 50 | dstGpuMat.upload(src.m()); |
| 51 | 51 | ||
| 52 | - br_cuda_device_wrapper(srcGpuMat, dstGpuMat); | 52 | + br::cuda::passthrough_wrapper(srcGpuMat, dstGpuMat); |
| 53 | 53 | ||
| 54 | dstGpuMat.download(dst.m()); | 54 | dstGpuMat.download(dst.m()); |
| 55 | 55 |
openbr/plugins/cuda/passthrough.cu
| @@ -5,30 +5,35 @@ | @@ -5,30 +5,35 @@ | ||
| 5 | using namespace cv; | 5 | using namespace cv; |
| 6 | using namespace cv::gpu; | 6 | using namespace cv::gpu; |
| 7 | 7 | ||
| 8 | -__global__ void br_cuda_device_kernel(uint8_t* srcPtr, uint8_t* dstPtr, size_t srcStep, size_t dstStep, int cols, int rows) { | ||
| 9 | - int rowInd = blockIdx.y*blockDim.y+threadIdx.y; | ||
| 10 | - int colInd = blockIdx.x*blockDim.x+threadIdx.x; | 8 | +#include "passthrough.hpp" |
| 11 | 9 | ||
| 12 | - uint8_t srcVal = (srcPtr + rowInd*srcStep)[colInd]; | ||
| 13 | - uint8_t* rowDstPtr = dstPtr + rowInd*dstStep; | 10 | +namespace br { namespace cuda { |
| 11 | + __global__ void passthrough_kernel(uint8_t* srcPtr, uint8_t* dstPtr, size_t srcStep, size_t dstStep, int cols, int rows) { | ||
| 12 | + int rowInd = blockIdx.y*blockDim.y+threadIdx.y; | ||
| 13 | + int colInd = blockIdx.x*blockDim.x+threadIdx.x; | ||
| 14 | 14 | ||
| 15 | - rowDstPtr[colInd] = srcVal; | ||
| 16 | -} | 15 | + uint8_t srcVal = (srcPtr + rowInd*srcStep)[colInd]; |
| 16 | + uint8_t* rowDstPtr = dstPtr + rowInd*dstStep; | ||
| 17 | 17 | ||
| 18 | -void br_cuda_device_wrapper(GpuMat& src, GpuMat& dst) { | ||
| 19 | - // convert the GpuMats to pointers | ||
| 20 | - uint8_t* srcPtr = (uint8_t*)src.data; | ||
| 21 | - uint8_t* dstPtr = (uint8_t*)dst.data; | 18 | + rowDstPtr[colInd] = srcVal; |
| 19 | + } | ||
| 22 | 20 | ||
| 23 | - int imageWidth = src.cols; | ||
| 24 | - int imageHeight = src.rows; | 21 | + void passthrough_wrapper(GpuMat& src, GpuMat& dst) { |
| 22 | + // convert the GpuMats to pointers | ||
| 23 | + uint8_t* srcPtr = (uint8_t*)src.data; | ||
| 24 | + uint8_t* dstPtr = (uint8_t*)dst.data; | ||
| 25 | 25 | ||
| 26 | - // make 8 * 8 = 64 square block | ||
| 27 | - dim3 threadsPerBlock(8, 8); | ||
| 28 | - dim3 numBlocks(imageWidth / threadsPerBlock.x, | ||
| 29 | - imageHeight / threadsPerBlock.y); | 26 | + int imageWidth = src.cols; |
| 27 | + int imageHeight = src.rows; | ||
| 28 | + | ||
| 29 | + // make 8 * 8 = 64 square block | ||
| 30 | + dim3 threadsPerBlock(8, 8); | ||
| 31 | + dim3 numBlocks(imageWidth / threadsPerBlock.x, | ||
| 32 | + imageHeight / threadsPerBlock.y); | ||
| 33 | + | ||
| 34 | + passthrough_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageWidth, imageHeight); | ||
| 35 | + } | ||
| 36 | +}} | ||
| 30 | 37 | ||
| 31 | - br_cuda_device_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageWidth, imageHeight); | ||
| 32 | -} | ||
| 33 | 38 | ||
| 34 | // read http://stackoverflow.com/questions/31927297/array-of-ptrstepszgpumat-to-a-c-cuda-kernel | 39 | // read http://stackoverflow.com/questions/31927297/array-of-ptrstepszgpumat-to-a-c-cuda-kernel |
openbr/plugins/cuda/passthrough.hpp
0 → 100644