created basic passthrough plugin (doesn't quite work yet)

DepthDeluxe
1 parent 4148de2f
Showing 2 changed files with 59 additions and 5 deletions
openbr/plugins/cuda/passthrough.cpp
openbr/plugins/cuda/passthrough.cu
@@ -6,7 +6,32 @@
 using namespace cv;
 using namespace cv::gpu;
  
-extern void br_cuda_device_wrapper();
+#include <iostream>
+
+extern void br_cuda_device_wrapper(GpuMat& src, GpuMat& dst);
+
+string type2str(int type) {
+  string r;
+
+  uchar depth = type & CV_MAT_DEPTH_MASK;
+  uchar chans = 1 + (type >> CV_CN_SHIFT);
+
+  switch ( depth ) {
+    case CV_8U:  r = "8U"; break;
+    case CV_8S:  r = "8S"; break;
+    case CV_16U: r = "16U"; break;
+    case CV_16S: r = "16S"; break;
+    case CV_32S: r = "32S"; break;
+    case CV_32F: r = "32F"; break;
+    case CV_64F: r = "64F"; break;
+    default:     r = "User"; break;
+  }
+
+  r += "C";
+  r += (chans+'0');
+
+  return r;
+}
  
 namespace br
 {
@@ -17,12 +42,14 @@ namespace br
 private:
     void project(const Template &src, Template &dst) const
     {
+      // note: if you convert the image to grayscale, you get 8UC1
+
       // upload the src mat to the GPU
       GpuMat srcGpuMat, dstGpuMat;
       srcGpuMat.upload(src.m());
       dstGpuMat.upload(src.m());
  
-      br_cuda_device_wrapper();
+      br_cuda_device_wrapper(srcGpuMat, dstGpuMat);
  
       dstGpuMat.download(dst.m());
  
-__global__ void br_cuda_device_kernel() {
+// note: Using 8-bit unsigned 1 channel images
  
+#include <opencv2/gpu/gpu.hpp>
+
+using namespace cv;
+using namespace cv::gpu;
+
+__global__ void br_cuda_device_kernel(uint8_t* srcPtr, uint8_t* dstPtr, size_t srcStep, size_t dstStep, int cols, int rows) {
+  int rowInd = blockIdx.y*blockDim.y+threadIdx.y;
+  int colInd = blockIdx.x*blockDim.x+threadIdx.x;
+
+  uint8_t srcVal = (srcPtr + rowInd*srcStep)[colInd];
+  uint8_t* rowDstPtr = dstPtr + rowInd*dstStep;
+
+  rowDstPtr[colInd] = srcVal;
 }
  
-void br_cuda_device_wrapper() {
-  br_cuda_device_kernel<<<1,1>>>();
+void br_cuda_device_wrapper(GpuMat& src, GpuMat& dst) {
+  // convert the GpuMats to pointers
+  uint8_t* srcPtr = (uint8_t*)src.data;
+  uint8_t* dstPtr = (uint8_t*)dst.data;
+
+  int imageWidth = src.cols;
+  int imageHeight = src.rows;
+
+  // make 8 * 8 = 64 square block
+  dim3 threadsPerBlock(8, 8);
+  dim3 numBlocks(imageWidth / threadsPerBlock.x,
+                 imageHeight / threadsPerBlock.y);
+
+  br_cuda_device_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageWidth, imageHeight);
 }
+
+// read http://stackoverflow.com/questions/31927297/array-of-ptrstepszgpumat-to-a-c-cuda-kernel