added GpuMatManager on top of CUDALBP

DepthDeluxe
1 parent f7cd4d52
Showing 3 changed files with 166 additions and 54 deletions
openbr/plugins/cuda/GpuMatManager.cpp
openbr/plugins/cuda/GpuMatManager.hpp
openbr/plugins/cuda/cudalbp.cpp
+#include <pthread.h>
+#include <semaphore.h>
+
+#include <opencv2/opencv.hpp>
+
+#include "GpuMatManager.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+
+namespace br { namespace cuda {
+  GpuMatManager::GpuMatManager(int num) {
+    _numMats = num;
+
+    // initialize the GpuMats
+    _mats = (GpuMat**)malloc(num * sizeof(GpuMat*));
+    _matTaken = (bool**)malloc(num * sizeof(bool*));
+    for (int i=0; i < num; i++) {
+      _mats[i] = new GpuMat();
+      _matTaken[i] = new bool;
+      (*_matTaken[i]) = false;
+    }
+
+    // initialize the locks
+    _matTakenLock = new pthread_mutex_t;
+    pthread_mutex_init(_matTakenLock, NULL);
+    _openCvOperationLock = new pthread_mutex_t;
+    pthread_mutex_init(_openCvOperationLock, NULL);
+
+    // initialize the semaphore
+    _matSemaphore = new sem_t;
+    sem_init(_matSemaphore, 0, _numMats);
+  }
+
+  GpuMat* GpuMatManager::reserve() {
+    GpuMat* reservedMat = NULL;
+
+    // get the reserved GpuMat
+    //sem_wait(_matSemaphore);
+    pthread_mutex_lock(_matTakenLock);
+    for (int i=0; i < _numMats; i++) {
+      if ( !(*_matTaken[i]) ) {
+        reservedMat = _mats[i];
+        *_matTaken[i] = true;
+        break;
+      }
+    }
+    pthread_mutex_unlock(_matTakenLock);
+
+    return reservedMat;
+  }
+
+  void GpuMatManager::upload(GpuMat* reservedMat, Mat& mat) {
+    // check the image Dimensions
+    if (reservedMat->size() != mat.size()) {
+      pthread_mutex_lock(_openCvOperationLock);
+      reservedMat->release();
+      reservedMat->create(mat.size(), mat.type());
+      pthread_mutex_unlock(_openCvOperationLock);
+    }
+
+    // upload the image
+    pthread_mutex_lock(_openCvOperationLock);
+    reservedMat->upload(mat);
+    pthread_mutex_unlock(_openCvOperationLock);
+    pthread_mutex_lock(_openCvOperationLock);
+    reservedMat->upload(mat);
+    pthread_mutex_unlock(_openCvOperationLock);
+  }
+
+  void GpuMatManager::matchDimensions(GpuMat* srcMat, GpuMat* dstMat) {
+    if (srcMat->size() != dstMat->size()) {
+      pthread_mutex_lock(_openCvOperationLock);
+      dstMat->release();
+      dstMat->create(srcMat->size(), srcMat->type());
+      pthread_mutex_unlock(_openCvOperationLock);
+    }
+  }
+
+  void GpuMatManager::download(GpuMat* reservedMat, Mat& dstMat) {
+    pthread_mutex_lock(_openCvOperationLock);
+    reservedMat->download(dstMat);
+    pthread_mutex_unlock(_openCvOperationLock);
+  }
+
+  void GpuMatManager::release(GpuMat* reservedMat) {
+    pthread_mutex_lock(_matTakenLock);
+    bool foundMatch = false;
+    for (int i=0; i < _numMats; i++) {
+      if (reservedMat == _mats[i]) {
+        *_matTaken[i] = false;
+        foundMatch = true;
+      }
+    }
+    pthread_mutex_unlock(_matTakenLock);
+
+    // return unconditionally if we didn't find a match
+    if (!foundMatch) {
+      return;
+    }
+
+    sem_post(_matSemaphore);
+  }
+
+  GpuMatManager::~GpuMatManager() {
+    // assume a single thread is destroying the manager
+    // TODO(colin): add the destroy code
+  }
+
+}}
+#include <pthread.h>
+#include <semaphore.h>
+
+#include <opencv2/opencv.hpp>
+#include <opencv2/gpu/gpu.hpp>
+
+using namespace cv;
+using namespace cv::gpu;
+
+namespace br { namespace cuda {
+  class GpuMatManager {
+  private:
+    int _numMats;
+    GpuMat** _mats;         // holds all the mats
+    bool** _matTaken;       // holds whether or not they are taken
+
+    pthread_mutex_t* _matTakenLock;            // lock for matTaken table
+    pthread_mutex_t* _openCvOperationLock;     // lock for OpenCV upload/download/realloc operations
+    sem_t* _matSemaphore;
+
+  public:
+    GpuMatManager(int num);
+
+    GpuMat* reserve();
+    void upload(GpuMat* reservedMat, Mat& mat);
+    void matchDimensions(GpuMat* srcMat, GpuMat* dstMat);
+    void download(GpuMat* reservedMat, Mat& dstMat);
+    void release(GpuMat* mat);
+
+    ~GpuMatManager();
+  };
+}}
@@ -33,6 +33,7 @@
 #include <openbr/plugins/openbr_internal.h>
  
 #include "cudalbp.hpp"
+#include "GpuMatManager.hpp"
  
 using namespace cv;
  
@@ -88,8 +89,8 @@ class CUDALBPTransform : public UntrainableTransform
     uint8_t* lutGpuPtr;
     uchar null;
  
-    //std::mutex uploadMutex;
-    pthread_mutex_t* uploadMutex;
+
+    cuda::GpuMatManager* matManager;
  
   public:
     /* Returns the number of 0->1 or 1->0 transitions in i */
@@ -141,66 +142,35 @@ class CUDALBPTransform : public UntrainableTransform
             if (!set[i])
                 lut[i] = null; // Set to null id
  
+        // init the mat manager for managing 10 mats
+        matManager = new cuda::GpuMatManager(10);
  
         // copy lut over to the GPU
         br::cuda::cudalbp_init_wrapper(lut, &lutGpuPtr);
  
-        // initialize the mutex
-        std::cout << "STARING EVERYTHING" << std::endl<< std::flush;
-        if (uploadMutex == NULL) {
-          uploadMutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
-          pthread_mutex_init(uploadMutex, NULL);
-        }
+        std::cout << "Initialized CUDALBP" << std::endl;
     }
  
     void project(const Template &src, Template &dst) const
     {
-        int myCtr = ctr++;
-        GpuMat a, b;
-        const Mat& m = src.m();
-
-        std::cout << "PID: " << getpid() << std::endl << std::flush;
-
-        //std::cout << "START: " << myCtr << std::endl << std::flush;
-
-
-        //std::cout << "Image type: " << type2str(m.type()) << std::endl << std::flush;
-        pthread_mutex_lock(uploadMutex);
-        a.create(m.size(), m.type());
-        b.create(m.size(), m.type());
-        pthread_mutex_unlock(uploadMutex);
-
-        pthread_mutex_lock(uploadMutex);
-        a.upload(m);
-        b.upload(m);
-        pthread_mutex_unlock(uploadMutex);
-
-        // resize the mats
-        //if (m.size() != srcGpuMat->size()) {
-        //  printf("resizing...\n");
-        //  srcGpuMat->release();                    dstGpuMat->release();
-        //  srcGpuMat->create(m.size(), CV_8UC1);    dstGpuMat->create(m.size(), CV_8UC1);
-        //}
-
-        // copy the data to the GPU
-        //srcGpuMat->upload(m);
-
-        // call the kernel function
-        //br::cuda::cudalbp_wrapper(*srcGpuMat, *dstGpuMat, lutGpuPtr);
-        pthread_mutex_lock(uploadMutex);
-        br::cuda::cudalbp_wrapper(a, b, lutGpuPtr);
-        pthread_mutex_unlock(uploadMutex);
-
-        // download the result to the destination
-        //dstGpuMat->download(dst.m());
-        pthread_mutex_lock(uploadMutex);
-        b.download(dst.m());
-        pthread_mutex_unlock(uploadMutex);
-
-        pthread_mutex_lock(uploadMutex);
-        a.release();
-        b.release();
-        pthread_mutex_unlock(uploadMutex);
+        Mat& m = (Mat&)src.m();
+
+        GpuMat* a;
+        GpuMat* b;
+        a = matManager->reserve();
+        matManager->upload(a, m);
+
+        // reserve the second mat and check the dimensiosn
+        b = matManager->reserve();
+        matManager->matchDimensions(b, a);
+
+        br::cuda::cudalbp_wrapper(*a, *b, lutGpuPtr);
+
+        matManager->download(b, dst);
+
+        // release both the mats
+        matManager->release(a);
+        matManager->release(b);
     }
 };