Commit 34ee86e8f185e65ce3478f50db7ad5eebadb206f
1 parent
4148de2f
created basic passthrough plugin (doesn't quite work yet)
Showing
2 changed files
with
59 additions
and
5 deletions
openbr/plugins/cuda/passthrough.cpp
| ... | ... | @@ -6,7 +6,32 @@ |
| 6 | 6 | using namespace cv; |
| 7 | 7 | using namespace cv::gpu; |
| 8 | 8 | |
| 9 | -extern void br_cuda_device_wrapper(); | |
| 9 | +#include <iostream> | |
| 10 | + | |
| 11 | +extern void br_cuda_device_wrapper(GpuMat& src, GpuMat& dst); | |
| 12 | + | |
| 13 | +string type2str(int type) { | |
| 14 | + string r; | |
| 15 | + | |
| 16 | + uchar depth = type & CV_MAT_DEPTH_MASK; | |
| 17 | + uchar chans = 1 + (type >> CV_CN_SHIFT); | |
| 18 | + | |
| 19 | + switch ( depth ) { | |
| 20 | + case CV_8U: r = "8U"; break; | |
| 21 | + case CV_8S: r = "8S"; break; | |
| 22 | + case CV_16U: r = "16U"; break; | |
| 23 | + case CV_16S: r = "16S"; break; | |
| 24 | + case CV_32S: r = "32S"; break; | |
| 25 | + case CV_32F: r = "32F"; break; | |
| 26 | + case CV_64F: r = "64F"; break; | |
| 27 | + default: r = "User"; break; | |
| 28 | + } | |
| 29 | + | |
| 30 | + r += "C"; | |
| 31 | + r += (chans+'0'); | |
| 32 | + | |
| 33 | + return r; | |
| 34 | +} | |
| 10 | 35 | |
| 11 | 36 | namespace br |
| 12 | 37 | { |
| ... | ... | @@ -17,12 +42,14 @@ namespace br |
| 17 | 42 | private: |
| 18 | 43 | void project(const Template &src, Template &dst) const |
| 19 | 44 | { |
| 45 | + // note: if you convert the image to grayscale, you get 8UC1 | |
| 46 | + | |
| 20 | 47 | // upload the src mat to the GPU |
| 21 | 48 | GpuMat srcGpuMat, dstGpuMat; |
| 22 | 49 | srcGpuMat.upload(src.m()); |
| 23 | 50 | dstGpuMat.upload(src.m()); |
| 24 | 51 | |
| 25 | - br_cuda_device_wrapper(); | |
| 52 | + br_cuda_device_wrapper(srcGpuMat, dstGpuMat); | |
| 26 | 53 | |
| 27 | 54 | dstGpuMat.download(dst.m()); |
| 28 | 55 | ... | ... |
openbr/plugins/cuda/passthrough.cu
| 1 | -__global__ void br_cuda_device_kernel() { | |
| 1 | +// note: Using 8-bit unsigned 1 channel images | |
| 2 | 2 | |
| 3 | +#include <opencv2/gpu/gpu.hpp> | |
| 4 | + | |
| 5 | +using namespace cv; | |
| 6 | +using namespace cv::gpu; | |
| 7 | + | |
| 8 | +__global__ void br_cuda_device_kernel(uint8_t* srcPtr, uint8_t* dstPtr, size_t srcStep, size_t dstStep, int cols, int rows) { | |
| 9 | + int rowInd = blockIdx.y*blockDim.y+threadIdx.y; | |
| 10 | + int colInd = blockIdx.x*blockDim.x+threadIdx.x; | |
| 11 | + | |
| 12 | + uint8_t srcVal = (srcPtr + rowInd*srcStep)[colInd]; | |
| 13 | + uint8_t* rowDstPtr = dstPtr + rowInd*dstStep; | |
| 14 | + | |
| 15 | + rowDstPtr[colInd] = srcVal; | |
| 3 | 16 | } |
| 4 | 17 | |
| 5 | -void br_cuda_device_wrapper() { | |
| 6 | - br_cuda_device_kernel<<<1,1>>>(); | |
| 18 | +void br_cuda_device_wrapper(GpuMat& src, GpuMat& dst) { | |
| 19 | + // convert the GpuMats to pointers | |
| 20 | + uint8_t* srcPtr = (uint8_t*)src.data; | |
| 21 | + uint8_t* dstPtr = (uint8_t*)dst.data; | |
| 22 | + | |
| 23 | + int imageWidth = src.cols; | |
| 24 | + int imageHeight = src.rows; | |
| 25 | + | |
| 26 | + // make 8 * 8 = 64 square block | |
| 27 | + dim3 threadsPerBlock(8, 8); | |
| 28 | + dim3 numBlocks(imageWidth / threadsPerBlock.x, | |
| 29 | + imageHeight / threadsPerBlock.y); | |
| 30 | + | |
| 31 | + br_cuda_device_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageWidth, imageHeight); | |
| 7 | 32 | } |
| 33 | + | |
| 34 | +// read http://stackoverflow.com/questions/31927297/array-of-ptrstepszgpumat-to-a-c-cuda-kernel | ... | ... |