From 34ee86e8f185e65ce3478f50db7ad5eebadb206f Mon Sep 17 00:00:00 2001
From: DepthDeluxe <cheinzmann3@gmail.com>
Date: Mon, 25 Jan 2016 15:51:44 -0500
Subject: [PATCH] created basic passthrough plugin (doesn't quite work yet)

---
 openbr/plugins/cuda/passthrough.cpp | 31 +++++++++++++++++++++++++++++--
 openbr/plugins/cuda/passthrough.cu  | 33 ++++++++++++++++++++++++++++++---
 2 files changed, 59 insertions(+), 5 deletions(-)
diff --git a/openbr/plugins/cuda/passthrough.cpp b/openbr/plugins/cuda/passthrough.cpp
index 13b9f19..f72a4a8 100644
--- a/openbr/plugins/cuda/passthrough.cpp
+++ b/openbr/plugins/cuda/passthrough.cpp
@@ -6,7 +6,32 @@
 using namespace cv;
 using namespace cv::gpu;
 
-extern void br_cuda_device_wrapper();
+#include <iostream>
+
+extern void br_cuda_device_wrapper(GpuMat& src, GpuMat& dst);
+
+string type2str(int type) {
+  string r;
+
+  uchar depth = type & CV_MAT_DEPTH_MASK;
+  uchar chans = 1 + (type >> CV_CN_SHIFT);
+
+  switch ( depth ) {
+    case CV_8U:  r = "8U"; break;
+    case CV_8S:  r = "8S"; break;
+    case CV_16U: r = "16U"; break;
+    case CV_16S: r = "16S"; break;
+    case CV_32S: r = "32S"; break;
+    case CV_32F: r = "32F"; break;
+    case CV_64F: r = "64F"; break;
+    default:     r = "User"; break;
+  }
+
+  r += "C";
+  r += (chans+'0');
+
+  return r;
+}
 
 namespace br
 {
@@ -17,12 +42,14 @@ namespace br
 private:
     void project(const Template &src, Template &dst) const
     {
+      // note: if you convert the image to grayscale, you get 8UC1
+
       // upload the src mat to the GPU
       GpuMat srcGpuMat, dstGpuMat;
       srcGpuMat.upload(src.m());
       dstGpuMat.upload(src.m());
 
-      br_cuda_device_wrapper();
+      br_cuda_device_wrapper(srcGpuMat, dstGpuMat);
 
       dstGpuMat.download(dst.m());
 
diff --git a/openbr/plugins/cuda/passthrough.cu b/openbr/plugins/cuda/passthrough.cu
index 983a24e..1e593db 100644
--- a/openbr/plugins/cuda/passthrough.cu
+++ b/openbr/plugins/cuda/passthrough.cu
@@ -1,7 +1,34 @@
-__global__ void br_cuda_device_kernel() {
+// note: Using 8-bit unsigned 1 channel images
 
+#include <opencv2/gpu/gpu.hpp>
+
+using namespace cv;
+using namespace cv::gpu;
+
+__global__ void br_cuda_device_kernel(uint8_t* srcPtr, uint8_t* dstPtr, size_t srcStep, size_t dstStep, int cols, int rows) {
+  int rowInd = blockIdx.y*blockDim.y+threadIdx.y;
+  int colInd = blockIdx.x*blockDim.x+threadIdx.x;
+
+  uint8_t srcVal = (srcPtr + rowInd*srcStep)[colInd];
+  uint8_t* rowDstPtr = dstPtr + rowInd*dstStep;
+
+  rowDstPtr[colInd] = srcVal;
 }
 
-void br_cuda_device_wrapper() {
-  br_cuda_device_kernel<<<1,1>>>();
+void br_cuda_device_wrapper(GpuMat& src, GpuMat& dst) {
+  // convert the GpuMats to pointers
+  uint8_t* srcPtr = (uint8_t*)src.data;
+  uint8_t* dstPtr = (uint8_t*)dst.data;
+
+  int imageWidth = src.cols;
+  int imageHeight = src.rows;
+
+  // make 8 * 8 = 64 square block
+  dim3 threadsPerBlock(8, 8);
+  dim3 numBlocks(imageWidth / threadsPerBlock.x,
+                 imageHeight / threadsPerBlock.y);
+
+  br_cuda_device_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageWidth, imageHeight);
 }
+
+// read http://stackoverflow.com/questions/31927297/array-of-ptrstepszgpumat-to-a-c-cuda-kernel
--
libgit2 0.21.4