From aaabb3cbe5738cd9f9418c4b6b2ddb78c5937559 Mon Sep 17 00:00:00 2001
From: DepthDeluxe <cheinzmann3@gmail.com>
Date: Fri, 12 Feb 2016 13:54:50 -0500
Subject: [PATCH] fixed copyto and copyfrom functions to be thread safe

---
 openbr/plugins/cuda/copyfrom.cpp | 27 +++++++++++++--------------
 openbr/plugins/cuda/copyfrom.cu  |  7 +++++++
 openbr/plugins/cuda/copyto.cpp   | 57 +++++++++++++++++++++++++++++++++------------------------
 openbr/plugins/cuda/copyto.cu    |  8 ++++++++
 4 files changed, 61 insertions(+), 38 deletions(-)
 create mode 100644 openbr/plugins/cuda/copyfrom.cu
 create mode 100644 openbr/plugins/cuda/copyto.cu
diff --git a/openbr/plugins/cuda/copyfrom.cpp b/openbr/plugins/cuda/copyfrom.cpp
index e624aae..38dc913 100644
--- a/openbr/plugins/cuda/copyfrom.cpp
+++ b/openbr/plugins/cuda/copyfrom.cpp
@@ -1,15 +1,18 @@
 #include <iostream>
 
 #include <opencv2/opencv.hpp>
-#include <opencv2/gpu/gpu.hpp>
 
 #include <openbr/plugins/openbr_internal.h>
 
 using namespace std;
 
 using namespace cv;
-using namespace cv::gpu;
 
+// extern CUDA declaration
+namespace br { namespace cuda { namespace cudacopyfrom {
+  //template <typename T> void wrapper(void* src, T* out, int rows, int cols) {
+  void wrapper(void* src, unsigned char* out, const int rows, const int cols);
+}}}
 
 namespace br
 {
@@ -20,20 +23,16 @@ namespace br
 private:
     void project(const Template &src, Template &dst) const
     {
-      // reassemble the integer and then build pointer to it
-      uint64_t gpuMatInt = (((uint64_t)src.m().at<int>(1,0)) << (uint64_t)32) + ((uint64_t)src.m().at<int>(0,0));
-      GpuMat* gpuMat = (GpuMat*)gpuMatInt;
+      // pull the data back out of the Mat
+      void* const* dataPtr = src.m().ptr<void*>();
+      void* cudaMemPtr = dataPtr[0];
+      int rows = *((int*)dataPtr[1]);
+      int cols = *((int*)dataPtr[2]);
+      int type = *((int*)dataPtr[3]);
 
-      printf("gpuMatInt: %li\n", gpuMatInt);
-      printf("m.at(0,0): %i\nm.at(1,0): %i\n", src.m().at<int>(0,0), src.m().at<int>(1,0));
+      dst = Mat(rows, cols, type);
 
-      // download the data back into the destination
-      Size size = gpuMat->size();
-      Mat out = Mat(size.height, size.width, gpuMat->depth());
-
-      gpuMat->download(out);
-
-      dst = out;
+      br::cuda::cudacopyfrom::wrapper(cudaMemPtr, dst.m().ptr<unsigned char>(), rows, cols);
     }
   };
 
diff --git a/openbr/plugins/cuda/copyfrom.cu b/openbr/plugins/cuda/copyfrom.cu
new file mode 100644
index 0000000..f6f988c
--- /dev/null
+++ b/openbr/plugins/cuda/copyfrom.cu
@@ -0,0 +1,7 @@
+namespace br { namespace cuda { namespace cudacopyfrom {
+  //template <typename T> void wrapper(void* src, T* out, int rows, int cols) {
+  void wrapper(void* src, unsigned char* out, const int rows, const int cols) {
+    cudaMemcpy(out, src, rows*cols*sizeof(unsigned char), cudaMemcpyDeviceToHost);
+    cudaFree(src);
+  }
+}}}
diff --git a/openbr/plugins/cuda/copyto.cpp b/openbr/plugins/cuda/copyto.cpp
index 36f683f..3e8cd48 100644
--- a/openbr/plugins/cuda/copyto.cpp
+++ b/openbr/plugins/cuda/copyto.cpp
@@ -1,14 +1,20 @@
 #include <iostream>
 
 #include <opencv2/opencv.hpp>
-#include <opencv2/gpu/gpu.hpp>
 
 #include <openbr/plugins/openbr_internal.h>
 
 using namespace std;
 
 using namespace cv;
-using namespace cv::gpu;
+
+extern string type2str(int type);
+
+namespace br { namespace cuda { namespace cudacopyto {
+  //template<typename T>
+  //void wrapper(const T* in, void** out, const int rows, const int cols);
+  void wrapper(const unsigned char* in, void** out, const int rows, const int cols);
+}}}
 
 namespace br
 {
@@ -19,32 +25,35 @@ namespace br
 private:
     void project(const Template &src, Template &dst) const
     {
-      // get the mat to send to the GPU
-      GpuMat* gpuMat = new GpuMat;
-
-      try
-      {
-        // copy the contents to the GPU
-        gpuMat->upload(src.m());
+      const Mat& srcMat = src.m();
+      const int rows = srcMat.rows;
+      const int cols = srcMat.cols;
+
+      void* cudaMemPtr;
+      switch(srcMat.type()) {
+      //case CV_32FC1:
+      //  br::cuda::cudacopyfrom::wrapper<float>(srcMat.ptr<float>(), &cudaMemPtr, rows, cols);
+      //  break;
+      case CV_8UC1:
+        //br::cuda::cudacopyfrom::wrapper<unsigned char>(srcMat.ptr<unsigned char>(), &cudaMemPtr, rows, cols);
+        br::cuda::cudacopyto::wrapper(srcMat.ptr<unsigned char>(), &cudaMemPtr, rows, cols);
+        break;
+      default:
+        cout << "ERR: Invalid image type! " << type2str(srcMat.type()) << endl;
+        return;
       }
-      catch(const cv::Exception& ex)
-      {
-        cout << "Error: " << ex.what() << endl;
-      }
-
-      // now create a new Mat that contains the 64-bit pointer
-      Mat m = Mat(2, 1, CV_32S);
 
-      // pointer magic
-      uint64_t gpuMatInt = (uint64_t)gpuMat;
-      m.at<int>(0,0) = (int32_t)(gpuMatInt &  0x00000000FFFFFFFF);
-      m.at<int>(1,0) = (int32_t)((gpuMatInt & 0xFFFFFFFF00000000) >> (uint64_t)32);
+      // output will be a single pointer to graphics card memory
+      Mat dstMat = Mat(4, 1, DataType<void*>::type);
+      void** dstMatData = dstMat.ptr<void*>();
 
-      printf("gpuMatInt: %li\n", gpuMatInt);
-      printf("m.at(0,0): %i\nm.at(1,0): %i\n", m.at<int>(0,0), m.at<int>(1,0));
+      // save cuda ptr, rows, cols, then type
+      dstMatData[0] = cudaMemPtr;
+      dstMatData[1] = new int; *((int*)dstMatData[1]) = rows;
+      dstMatData[2] = new int; *((int*)dstMatData[2]) = cols;
+      dstMatData[3] = new int; *((int*)dstMatData[3]) = srcMat.type();
 
-      // save away in the destination mat
-      dst += m;
+      dst = dstMat;
     }
   };
 
diff --git a/openbr/plugins/cuda/copyto.cu b/openbr/plugins/cuda/copyto.cu
new file mode 100644
index 0000000..c4885a4
--- /dev/null
+++ b/openbr/plugins/cuda/copyto.cu
@@ -0,0 +1,8 @@
+namespace br { namespace cuda { namespace cudacopyto {
+  //template<typename T>
+  //void wrapper(const T* in, void** out, const int rows, const int cols) {
+  void wrapper(const unsigned char* in, void** out, const int rows, const int cols) {
+    cudaMalloc(out, rows*cols*sizeof(unsigned char));
+    cudaMemcpy(*out, in, rows*cols*sizeof(unsigned char), cudaMemcpyHostToDevice);
+  }
+}}}
--
libgit2 0.21.4