Commit 3505e0971d918b26933c59accd7e7b296c5f3a21

Authored by DepthDeluxe
1 parent f7cd4d52

added GpuMatManager on top of CUDALBP

openbr/plugins/cuda/GpuMatManager.cpp 0 → 100644
  1 +#include <pthread.h>
  2 +#include <semaphore.h>
  3 +
  4 +#include <opencv2/opencv.hpp>
  5 +
  6 +#include "GpuMatManager.hpp"
  7 +
  8 +using namespace cv;
  9 +using namespace cv::gpu;
  10 +
  11 +namespace br { namespace cuda {
  12 + GpuMatManager::GpuMatManager(int num) {
  13 + _numMats = num;
  14 +
  15 + // initialize the GpuMats
  16 + _mats = (GpuMat**)malloc(num * sizeof(GpuMat*));
  17 + _matTaken = (bool**)malloc(num * sizeof(bool*));
  18 + for (int i=0; i < num; i++) {
  19 + _mats[i] = new GpuMat();
  20 + _matTaken[i] = new bool;
  21 + (*_matTaken[i]) = false;
  22 + }
  23 +
  24 + // initialize the locks
  25 + _matTakenLock = new pthread_mutex_t;
  26 + pthread_mutex_init(_matTakenLock, NULL);
  27 + _openCvOperationLock = new pthread_mutex_t;
  28 + pthread_mutex_init(_openCvOperationLock, NULL);
  29 +
  30 + // initialize the semaphore
  31 + _matSemaphore = new sem_t;
  32 + sem_init(_matSemaphore, 0, _numMats);
  33 + }
  34 +
  35 + GpuMat* GpuMatManager::reserve() {
  36 + GpuMat* reservedMat = NULL;
  37 +
  38 + // get the reserved GpuMat
  39 + //sem_wait(_matSemaphore);
  40 + pthread_mutex_lock(_matTakenLock);
  41 + for (int i=0; i < _numMats; i++) {
  42 + if ( !(*_matTaken[i]) ) {
  43 + reservedMat = _mats[i];
  44 + *_matTaken[i] = true;
  45 + break;
  46 + }
  47 + }
  48 + pthread_mutex_unlock(_matTakenLock);
  49 +
  50 + return reservedMat;
  51 + }
  52 +
  53 + void GpuMatManager::upload(GpuMat* reservedMat, Mat& mat) {
  54 + // check the image Dimensions
  55 + if (reservedMat->size() != mat.size()) {
  56 + pthread_mutex_lock(_openCvOperationLock);
  57 + reservedMat->release();
  58 + reservedMat->create(mat.size(), mat.type());
  59 + pthread_mutex_unlock(_openCvOperationLock);
  60 + }
  61 +
  62 + // upload the image
  63 + pthread_mutex_lock(_openCvOperationLock);
  64 + reservedMat->upload(mat);
  65 + pthread_mutex_unlock(_openCvOperationLock);
  66 + pthread_mutex_lock(_openCvOperationLock);
  67 + reservedMat->upload(mat);
  68 + pthread_mutex_unlock(_openCvOperationLock);
  69 + }
  70 +
  71 + void GpuMatManager::matchDimensions(GpuMat* srcMat, GpuMat* dstMat) {
  72 + if (srcMat->size() != dstMat->size()) {
  73 + pthread_mutex_lock(_openCvOperationLock);
  74 + dstMat->release();
  75 + dstMat->create(srcMat->size(), srcMat->type());
  76 + pthread_mutex_unlock(_openCvOperationLock);
  77 + }
  78 + }
  79 +
  80 + void GpuMatManager::download(GpuMat* reservedMat, Mat& dstMat) {
  81 + pthread_mutex_lock(_openCvOperationLock);
  82 + reservedMat->download(dstMat);
  83 + pthread_mutex_unlock(_openCvOperationLock);
  84 + }
  85 +
  86 + void GpuMatManager::release(GpuMat* reservedMat) {
  87 + pthread_mutex_lock(_matTakenLock);
  88 + bool foundMatch = false;
  89 + for (int i=0; i < _numMats; i++) {
  90 + if (reservedMat == _mats[i]) {
  91 + *_matTaken[i] = false;
  92 + foundMatch = true;
  93 + }
  94 + }
  95 + pthread_mutex_unlock(_matTakenLock);
  96 +
  97 + // return unconditionally if we didn't find a match
  98 + if (!foundMatch) {
  99 + return;
  100 + }
  101 +
  102 + sem_post(_matSemaphore);
  103 + }
  104 +
  105 + GpuMatManager::~GpuMatManager() {
  106 + // assume a single thread is destroying the manager
  107 + // TODO(colin): add the destroy code
  108 + }
  109 +
  110 +}}
openbr/plugins/cuda/GpuMatManager.hpp 0 → 100644
  1 +#include <pthread.h>
  2 +#include <semaphore.h>
  3 +
  4 +#include <opencv2/opencv.hpp>
  5 +#include <opencv2/gpu/gpu.hpp>
  6 +
  7 +using namespace cv;
  8 +using namespace cv::gpu;
  9 +
  10 +namespace br { namespace cuda {
  11 + class GpuMatManager {
  12 + private:
  13 + int _numMats;
  14 + GpuMat** _mats; // holds all the mats
  15 + bool** _matTaken; // holds whether or not they are taken
  16 +
  17 + pthread_mutex_t* _matTakenLock; // lock for matTaken table
  18 + pthread_mutex_t* _openCvOperationLock; // lock for OpenCV upload/download/realloc operations
  19 + sem_t* _matSemaphore;
  20 +
  21 + public:
  22 + GpuMatManager(int num);
  23 +
  24 + GpuMat* reserve();
  25 + void upload(GpuMat* reservedMat, Mat& mat);
  26 + void matchDimensions(GpuMat* srcMat, GpuMat* dstMat);
  27 + void download(GpuMat* reservedMat, Mat& dstMat);
  28 + void release(GpuMat* mat);
  29 +
  30 + ~GpuMatManager();
  31 + };
  32 +}}
openbr/plugins/cuda/cudalbp.cpp
@@ -33,6 +33,7 @@ @@ -33,6 +33,7 @@
33 #include <openbr/plugins/openbr_internal.h> 33 #include <openbr/plugins/openbr_internal.h>
34 34
35 #include "cudalbp.hpp" 35 #include "cudalbp.hpp"
  36 +#include "GpuMatManager.hpp"
36 37
37 using namespace cv; 38 using namespace cv;
38 39
@@ -88,8 +89,8 @@ class CUDALBPTransform : public UntrainableTransform @@ -88,8 +89,8 @@ class CUDALBPTransform : public UntrainableTransform
88 uint8_t* lutGpuPtr; 89 uint8_t* lutGpuPtr;
89 uchar null; 90 uchar null;
90 91
91 - //std::mutex uploadMutex;  
92 - pthread_mutex_t* uploadMutex; 92 +
  93 + cuda::GpuMatManager* matManager;
93 94
94 public: 95 public:
95 /* Returns the number of 0->1 or 1->0 transitions in i */ 96 /* Returns the number of 0->1 or 1->0 transitions in i */
@@ -141,66 +142,35 @@ class CUDALBPTransform : public UntrainableTransform @@ -141,66 +142,35 @@ class CUDALBPTransform : public UntrainableTransform
141 if (!set[i]) 142 if (!set[i])
142 lut[i] = null; // Set to null id 143 lut[i] = null; // Set to null id
143 144
  145 + // init the mat manager for managing 10 mats
  146 + matManager = new cuda::GpuMatManager(10);
144 147
145 // copy lut over to the GPU 148 // copy lut over to the GPU
146 br::cuda::cudalbp_init_wrapper(lut, &lutGpuPtr); 149 br::cuda::cudalbp_init_wrapper(lut, &lutGpuPtr);
147 150
148 - // initialize the mutex  
149 - std::cout << "STARING EVERYTHING" << std::endl<< std::flush;  
150 - if (uploadMutex == NULL) {  
151 - uploadMutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));  
152 - pthread_mutex_init(uploadMutex, NULL);  
153 - } 151 + std::cout << "Initialized CUDALBP" << std::endl;
154 } 152 }
155 153
156 void project(const Template &src, Template &dst) const 154 void project(const Template &src, Template &dst) const
157 { 155 {
158 - int myCtr = ctr++;  
159 - GpuMat a, b;  
160 - const Mat& m = src.m();  
161 -  
162 - std::cout << "PID: " << getpid() << std::endl << std::flush;  
163 -  
164 - //std::cout << "START: " << myCtr << std::endl << std::flush;  
165 -  
166 -  
167 - //std::cout << "Image type: " << type2str(m.type()) << std::endl << std::flush;  
168 - pthread_mutex_lock(uploadMutex);  
169 - a.create(m.size(), m.type());  
170 - b.create(m.size(), m.type());  
171 - pthread_mutex_unlock(uploadMutex);  
172 -  
173 - pthread_mutex_lock(uploadMutex);  
174 - a.upload(m);  
175 - b.upload(m);  
176 - pthread_mutex_unlock(uploadMutex);  
177 -  
178 - // resize the mats  
179 - //if (m.size() != srcGpuMat->size()) {  
180 - // printf("resizing...\n");  
181 - // srcGpuMat->release(); dstGpuMat->release();  
182 - // srcGpuMat->create(m.size(), CV_8UC1); dstGpuMat->create(m.size(), CV_8UC1);  
183 - //}  
184 -  
185 - // copy the data to the GPU  
186 - //srcGpuMat->upload(m);  
187 -  
188 - // call the kernel function  
189 - //br::cuda::cudalbp_wrapper(*srcGpuMat, *dstGpuMat, lutGpuPtr);  
190 - pthread_mutex_lock(uploadMutex);  
191 - br::cuda::cudalbp_wrapper(a, b, lutGpuPtr);  
192 - pthread_mutex_unlock(uploadMutex);  
193 -  
194 - // download the result to the destination  
195 - //dstGpuMat->download(dst.m());  
196 - pthread_mutex_lock(uploadMutex);  
197 - b.download(dst.m());  
198 - pthread_mutex_unlock(uploadMutex);  
199 -  
200 - pthread_mutex_lock(uploadMutex);  
201 - a.release();  
202 - b.release();  
203 - pthread_mutex_unlock(uploadMutex); 156 + Mat& m = (Mat&)src.m();
  157 +
  158 + GpuMat* a;
  159 + GpuMat* b;
  160 + a = matManager->reserve();
  161 + matManager->upload(a, m);
  162 +
  163 + // reserve the second mat and check the dimensiosn
  164 + b = matManager->reserve();
  165 + matManager->matchDimensions(b, a);
  166 +
  167 + br::cuda::cudalbp_wrapper(*a, *b, lutGpuPtr);
  168 +
  169 + matManager->download(b, dst);
  170 +
  171 + // release both the mats
  172 + matManager->release(a);
  173 + matManager->release(b);
204 } 174 }
205 }; 175 };
206 176