Commit edd8d3e4ecccb56ab658bc98eb9d94873aa4cf17
1 parent
34ee86e8
added proposed file structure for CUDA plugins
Showing
3 changed files
with
30 additions
and
22 deletions
openbr/plugins/cuda/passthrough.cpp
| ... | ... | @@ -6,9 +6,9 @@ |
| 6 | 6 | using namespace cv; |
| 7 | 7 | using namespace cv::gpu; |
| 8 | 8 | |
| 9 | -#include <iostream> | |
| 9 | +#include "passthrough.hpp" | |
| 10 | 10 | |
| 11 | -extern void br_cuda_device_wrapper(GpuMat& src, GpuMat& dst); | |
| 11 | +#include <iostream> | |
| 12 | 12 | |
| 13 | 13 | string type2str(int type) { |
| 14 | 14 | string r; |
| ... | ... | @@ -49,7 +49,7 @@ private: |
| 49 | 49 | srcGpuMat.upload(src.m()); |
| 50 | 50 | dstGpuMat.upload(src.m()); |
| 51 | 51 | |
| 52 | - br_cuda_device_wrapper(srcGpuMat, dstGpuMat); | |
| 52 | + br::cuda::passthrough_wrapper(srcGpuMat, dstGpuMat); | |
| 53 | 53 | |
| 54 | 54 | dstGpuMat.download(dst.m()); |
| 55 | 55 | ... | ... |
openbr/plugins/cuda/passthrough.cu
| ... | ... | @@ -5,30 +5,35 @@ |
| 5 | 5 | using namespace cv; |
| 6 | 6 | using namespace cv::gpu; |
| 7 | 7 | |
| 8 | -__global__ void br_cuda_device_kernel(uint8_t* srcPtr, uint8_t* dstPtr, size_t srcStep, size_t dstStep, int cols, int rows) { | |
| 9 | - int rowInd = blockIdx.y*blockDim.y+threadIdx.y; | |
| 10 | - int colInd = blockIdx.x*blockDim.x+threadIdx.x; | |
| 8 | +#include "passthrough.hpp" | |
| 11 | 9 | |
| 12 | - uint8_t srcVal = (srcPtr + rowInd*srcStep)[colInd]; | |
| 13 | - uint8_t* rowDstPtr = dstPtr + rowInd*dstStep; | |
| 10 | +namespace br { namespace cuda { | |
| 11 | + __global__ void passthrough_kernel(uint8_t* srcPtr, uint8_t* dstPtr, size_t srcStep, size_t dstStep, int cols, int rows) { | |
| 12 | + int rowInd = blockIdx.y*blockDim.y+threadIdx.y; | |
| 13 | + int colInd = blockIdx.x*blockDim.x+threadIdx.x; | |
| 14 | 14 | |
| 15 | - rowDstPtr[colInd] = srcVal; | |
| 16 | -} | |
| 15 | + uint8_t srcVal = (srcPtr + rowInd*srcStep)[colInd]; | |
| 16 | + uint8_t* rowDstPtr = dstPtr + rowInd*dstStep; | |
| 17 | 17 | |
| 18 | -void br_cuda_device_wrapper(GpuMat& src, GpuMat& dst) { | |
| 19 | - // convert the GpuMats to pointers | |
| 20 | - uint8_t* srcPtr = (uint8_t*)src.data; | |
| 21 | - uint8_t* dstPtr = (uint8_t*)dst.data; | |
| 18 | + rowDstPtr[colInd] = srcVal; | |
| 19 | + } | |
| 22 | 20 | |
| 23 | - int imageWidth = src.cols; | |
| 24 | - int imageHeight = src.rows; | |
| 21 | + void passthrough_wrapper(GpuMat& src, GpuMat& dst) { | |
| 22 | + // convert the GpuMats to pointers | |
| 23 | + uint8_t* srcPtr = (uint8_t*)src.data; | |
| 24 | + uint8_t* dstPtr = (uint8_t*)dst.data; | |
| 25 | 25 | |
| 26 | - // make 8 * 8 = 64 square block | |
| 27 | - dim3 threadsPerBlock(8, 8); | |
| 28 | - dim3 numBlocks(imageWidth / threadsPerBlock.x, | |
| 29 | - imageHeight / threadsPerBlock.y); | |
| 26 | + int imageWidth = src.cols; | |
| 27 | + int imageHeight = src.rows; | |
| 28 | + | |
| 29 | + // make 8 * 8 = 64 square block | |
| 30 | + dim3 threadsPerBlock(8, 8); | |
| 31 | + dim3 numBlocks(imageWidth / threadsPerBlock.x, | |
| 32 | + imageHeight / threadsPerBlock.y); | |
| 33 | + | |
| 34 | + passthrough_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageWidth, imageHeight); | |
| 35 | + } | |
| 36 | +}} | |
| 30 | 37 | |
| 31 | - br_cuda_device_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageWidth, imageHeight); | |
| 32 | -} | |
| 33 | 38 | |
| 34 | 39 | // read http://stackoverflow.com/questions/31927297/array-of-ptrstepszgpumat-to-a-c-cuda-kernel | ... | ... |
openbr/plugins/cuda/passthrough.hpp
0 → 100644