fixed copyto and copyfrom functions to be thread safe

DepthDeluxe
1 parent bdfd554c
Showing 4 changed files with 61 additions and 38 deletions
openbr/plugins/cuda/copyfrom.cpp
openbr/plugins/cuda/copyfrom.cu
openbr/plugins/cuda/copyto.cpp
openbr/plugins/cuda/copyto.cu
 #include <iostream>
  
 #include <opencv2/opencv.hpp>
-#include <opencv2/gpu/gpu.hpp>
  
 #include <openbr/plugins/openbr_internal.h>
  
 using namespace std;
  
 using namespace cv;
-using namespace cv::gpu;
  
+// extern CUDA declaration
+namespace br { namespace cuda { namespace cudacopyfrom {
+  //template <typename T> void wrapper(void* src, T* out, int rows, int cols) {
+  void wrapper(void* src, unsigned char* out, const int rows, const int cols);
+}}}
  
 namespace br
 {
@@ -20,20 +23,16 @@ namespace br
 private:
     void project(const Template &src, Template &dst) const
     {
-      // reassemble the integer and then build pointer to it
-      uint64_t gpuMatInt = (((uint64_t)src.m().at<int>(1,0)) << (uint64_t)32) + ((uint64_t)src.m().at<int>(0,0));
-      GpuMat* gpuMat = (GpuMat*)gpuMatInt;
+      // pull the data back out of the Mat
+      void* const* dataPtr = src.m().ptr<void*>();
+      void* cudaMemPtr = dataPtr[0];
+      int rows = *((int*)dataPtr[1]);
+      int cols = *((int*)dataPtr[2]);
+      int type = *((int*)dataPtr[3]);
  
-      printf("gpuMatInt: %li\n", gpuMatInt);
-      printf("m.at(0,0): %i\nm.at(1,0): %i\n", src.m().at<int>(0,0), src.m().at<int>(1,0));
+      dst = Mat(rows, cols, type);
  
-      // download the data back into the destination
-      Size size = gpuMat->size();
-      Mat out = Mat(size.height, size.width, gpuMat->depth());
-
-      gpuMat->download(out);
-
-      dst = out;
+      br::cuda::cudacopyfrom::wrapper(cudaMemPtr, dst.m().ptr<unsigned char>(), rows, cols);
     }
   };
  
+namespace br { namespace cuda { namespace cudacopyfrom {
+  //template <typename T> void wrapper(void* src, T* out, int rows, int cols) {
+  void wrapper(void* src, unsigned char* out, const int rows, const int cols) {
+    cudaMemcpy(out, src, rows*cols*sizeof(unsigned char), cudaMemcpyDeviceToHost);
+    cudaFree(src);
+  }
+}}}
 #include <iostream>
  
 #include <opencv2/opencv.hpp>
-#include <opencv2/gpu/gpu.hpp>
  
 #include <openbr/plugins/openbr_internal.h>
  
 using namespace std;
  
 using namespace cv;
-using namespace cv::gpu;
+
+extern string type2str(int type);
+
+namespace br { namespace cuda { namespace cudacopyto {
+  //template<typename T>
+  //void wrapper(const T* in, void** out, const int rows, const int cols);
+  void wrapper(const unsigned char* in, void** out, const int rows, const int cols);
+}}}
  
 namespace br
 {
@@ -19,32 +25,35 @@ namespace br
 private:
     void project(const Template &src, Template &dst) const
     {
-      // get the mat to send to the GPU
-      GpuMat* gpuMat = new GpuMat;
-
-      try
-      {
-        // copy the contents to the GPU
-        gpuMat->upload(src.m());
+      const Mat& srcMat = src.m();
+      const int rows = srcMat.rows;
+      const int cols = srcMat.cols;
+
+      void* cudaMemPtr;
+      switch(srcMat.type()) {
+      //case CV_32FC1:
+      //  br::cuda::cudacopyfrom::wrapper<float>(srcMat.ptr<float>(), &cudaMemPtr, rows, cols);
+      //  break;
+      case CV_8UC1:
+        //br::cuda::cudacopyfrom::wrapper<unsigned char>(srcMat.ptr<unsigned char>(), &cudaMemPtr, rows, cols);
+        br::cuda::cudacopyto::wrapper(srcMat.ptr<unsigned char>(), &cudaMemPtr, rows, cols);
+        break;
+      default:
+        cout << "ERR: Invalid image type! " << type2str(srcMat.type()) << endl;
+        return;
       }
-      catch(const cv::Exception& ex)
-      {
-        cout << "Error: " << ex.what() << endl;
-      }
-
-      // now create a new Mat that contains the 64-bit pointer
-      Mat m = Mat(2, 1, CV_32S);
  
-      // pointer magic
-      uint64_t gpuMatInt = (uint64_t)gpuMat;
-      m.at<int>(0,0) = (int32_t)(gpuMatInt &  0x00000000FFFFFFFF);
-      m.at<int>(1,0) = (int32_t)((gpuMatInt & 0xFFFFFFFF00000000) >> (uint64_t)32);
+      // output will be a single pointer to graphics card memory
+      Mat dstMat = Mat(4, 1, DataType<void*>::type);
+      void** dstMatData = dstMat.ptr<void*>();
  
-      printf("gpuMatInt: %li\n", gpuMatInt);
-      printf("m.at(0,0): %i\nm.at(1,0): %i\n", m.at<int>(0,0), m.at<int>(1,0));
+      // save cuda ptr, rows, cols, then type
+      dstMatData[0] = cudaMemPtr;
+      dstMatData[1] = new int; *((int*)dstMatData[1]) = rows;
+      dstMatData[2] = new int; *((int*)dstMatData[2]) = cols;
+      dstMatData[3] = new int; *((int*)dstMatData[3]) = srcMat.type();
  
-      // save away in the destination mat
-      dst += m;
+      dst = dstMat;
     }
   };
  
+namespace br { namespace cuda { namespace cudacopyto {
+  //template<typename T>
+  //void wrapper(const T* in, void** out, const int rows, const int cols) {
+  void wrapper(const unsigned char* in, void** out, const int rows, const int cols) {
+    cudaMalloc(out, rows*cols*sizeof(unsigned char));
+    cudaMemcpy(*out, in, rows*cols*sizeof(unsigned char), cudaMemcpyHostToDevice);
+  }
+}}}