Commit 0de4de862041549101524f475b33858e5ec8e448

Authored by boolli
1 parent c298e0f6

Change GpuMatManager to MatManager, but somehow it still doesn't work

openbr/plugins/cuda/GpuMatManager.cpp deleted
1 -#include <pthread.h>  
2 -#include <semaphore.h>  
3 -  
4 -#include <opencv2/opencv.hpp>  
5 -  
6 -#include "GpuMatManager.hpp"  
7 -  
8 -using namespace cv;  
9 -using namespace cv::gpu;  
10 -  
11 -namespace br { namespace cuda {  
12 - GpuMatManager::GpuMatManager(int num) {  
13 - _numMats = num;  
14 -  
15 - // initialize the GpuMats  
16 - _mats = (GpuMat**)malloc(num * sizeof(GpuMat*));  
17 - _matTaken = (bool**)malloc(num * sizeof(bool*));  
18 - for (int i=0; i < num; i++) {  
19 - _mats[i] = new GpuMat();  
20 - _matTaken[i] = new bool;  
21 - (*_matTaken[i]) = false;  
22 - }  
23 -  
24 - // initialize the locks  
25 - _matTakenLock = new pthread_mutex_t;  
26 - pthread_mutex_init(_matTakenLock, NULL);  
27 - _openCvOperationLock = new pthread_mutex_t;  
28 - pthread_mutex_init(_openCvOperationLock, NULL);  
29 -  
30 - // initialize the semaphore  
31 - _matSemaphore = new sem_t;  
32 - sem_init(_matSemaphore, 0, _numMats);  
33 - }  
34 -  
35 - GpuMat* GpuMatManager::reserve() {  
36 - GpuMat* reservedMat = NULL;  
37 -  
38 - // get the reserved GpuMat  
39 - //sem_wait(_matSemaphore);  
40 - pthread_mutex_lock(_matTakenLock);  
41 - for (int i=0; i < _numMats; i++) {  
42 - if ( !(*_matTaken[i]) ) {  
43 - reservedMat = _mats[i];  
44 - *_matTaken[i] = true;  
45 - break;  
46 - }  
47 - }  
48 - pthread_mutex_unlock(_matTakenLock);  
49 -  
50 - return reservedMat;  
51 - }  
52 -  
53 - void GpuMatManager::upload(GpuMat* reservedMat, Mat& mat) {  
54 - // check the image Dimensions  
55 - if (reservedMat->size() != mat.size()) {  
56 - pthread_mutex_lock(_openCvOperationLock);  
57 - reservedMat->release();  
58 - reservedMat->create(mat.size(), mat.type());  
59 - pthread_mutex_unlock(_openCvOperationLock);  
60 - }  
61 -  
62 - // upload the image  
63 - pthread_mutex_lock(_openCvOperationLock);  
64 - reservedMat->upload(mat);  
65 - pthread_mutex_unlock(_openCvOperationLock);  
66 - pthread_mutex_lock(_openCvOperationLock);  
67 - reservedMat->upload(mat);  
68 - pthread_mutex_unlock(_openCvOperationLock);  
69 - }  
70 -  
71 - void GpuMatManager::matchDimensions(GpuMat* srcMat, GpuMat* dstMat) {  
72 - if (srcMat->size() != dstMat->size()) {  
73 - pthread_mutex_lock(_openCvOperationLock);  
74 - dstMat->release();  
75 - dstMat->create(srcMat->size(), srcMat->type());  
76 - pthread_mutex_unlock(_openCvOperationLock);  
77 - }  
78 - }  
79 -  
80 - void GpuMatManager::download(GpuMat* reservedMat, Mat& dstMat) {  
81 - pthread_mutex_lock(_openCvOperationLock);  
82 - reservedMat->download(dstMat);  
83 - pthread_mutex_unlock(_openCvOperationLock);  
84 - }  
85 -  
86 - void GpuMatManager::release(GpuMat* reservedMat) {  
87 - pthread_mutex_lock(_matTakenLock);  
88 - bool foundMatch = false;  
89 - for (int i=0; i < _numMats; i++) {  
90 - if (reservedMat == _mats[i]) {  
91 - *_matTaken[i] = false;  
92 - foundMatch = true;  
93 - }  
94 - }  
95 - pthread_mutex_unlock(_matTakenLock);  
96 -  
97 - // return unconditionally if we didn't find a match  
98 - if (!foundMatch) {  
99 - return;  
100 - }  
101 -  
102 - sem_post(_matSemaphore);  
103 - }  
104 -  
105 - GpuMatManager::~GpuMatManager() {  
106 - // assume a single thread is destroying the manager  
107 - // TODO(colin): add the destroy code  
108 - }  
109 -  
110 -}}  
openbr/plugins/cuda/MatManager.cu 0 โ†’ 100644
  1 +#include <pthread.h>
  2 +#include <semaphore.h>
  3 +
  4 +#include <opencv2/opencv.hpp>
  5 +
  6 +#include "MatManager.hpp"
  7 +
  8 +using namespace cv;
  9 +using namespace cv::gpu;
  10 +
  11 +namespace br { namespace cuda {
  12 + MatManager::MatManager(int num) {
  13 + _numMats = num;
  14 +
  15 + // initialize the an array of Mats
  16 + _mats = (uint8_t**)malloc(num * sizeof(uint8_t*));
  17 + _matTaken = (bool**)malloc(num * sizeof(bool*));
  18 + _matsDimension = (int**) malloc(num * sizeof(int));
  19 +
  20 + for (int i=0; i < num; i++) {
  21 + cudaMalloc(&_mats[i], 1 * sizeof(uint8_t));
  22 + //_mats[i] = new GpuMat();
  23 +
  24 + _matTaken[i] = new bool;
  25 + (*_matTaken[i]) = false;
  26 +
  27 + // initialize all mat dimensions to be 1
  28 + _matsDimension[i] = new int;
  29 + (*_matsDimension[i]) = 1;
  30 + }
  31 +
  32 + // initialize the locks
  33 + _matTakenLock = new pthread_mutex_t;
  34 + pthread_mutex_init(_matTakenLock, NULL);
  35 + _matsDimensionLock = new pthread_mutex_t;
  36 + pthread_mutex_init(_matsDimensionLock, NULL);
  37 +
  38 + // initialize the semaphore
  39 + _matSemaphore = new sem_t;
  40 + sem_init(_matSemaphore, 0, _numMats);
  41 + }
  42 +
  43 + uint8_t* MatManager::reserve(Mat *mat) {
  44 + int reservedMatIndex = 0;
  45 + std::cout << "Reserving" << std::endl << std::flush;
  46 +
  47 + sem_wait(_matSemaphore);
  48 + pthread_mutex_lock(_matTakenLock);
  49 + int i;
  50 + for (i=0; i < _numMats; i++) {
  51 + if ( !(*_matTaken[i]) ) {
  52 + *_matTaken[i] = true;
  53 + reservedMatIndex = i;
  54 + std::cout << "Taking " << i << std::endl << std::flush;
  55 + break;
  56 + }
  57 + }
  58 + if (i == _numMats) {
  59 + std::cout << "Cannot reserve a mat. Not enough GpuMat resourses\n" << std::endl << std::flush;
  60 + }
  61 +
  62 + //printMats();
  63 + //printSemValue();
  64 + pthread_mutex_unlock(_matTakenLock);
  65 +
  66 + // reallocate if size does not match
  67 + pthread_mutex_lock(_matsDimensionLock);
  68 + if (*_matsDimension[reservedMatIndex] != mat->rows * mat->cols) {
  69 + //printSizeChangingMat(reservedMat);
  70 + //reservedMat->release();
  71 + //reservedMat->create(mat->size(), mat->type());
  72 + std::cout << "Size mismatch" << std::endl << std::flush;
  73 + // re malloc
  74 + cudaFree(_mats[reservedMatIndex]); // free the previous memory first
  75 + cudaMalloc(&_mats[reservedMatIndex], mat->rows * mat->cols * sizeof(uint8_t));
  76 + // change the dimension of that matrix
  77 + *_matsDimension[reservedMatIndex] = mat->rows * mat->cols;
  78 +
  79 + }
  80 + pthread_mutex_unlock(_matsDimensionLock);
  81 + return _mats[reservedMatIndex];
  82 + }
  83 +
  84 + void MatManager::upload(uint8_t* reservedMat, Mat& mat) {
  85 + // upload the image
  86 + /*
  87 + pthread_mutex_lock(_matsDimensionLock);
  88 + reservedMat->upload(mat);
  89 + pthread_mutex_unlock(_matsDimensionLock);
  90 + */
  91 +
  92 + // copy the content of the Mat to GPU
  93 + cudaMemcpy(reservedMat, mat.ptr<uint8_t>(), mat.rows * mat.cols, cudaMemcpyHostToDevice);
  94 + }
  95 +
  96 + void MatManager::download(uint8_t* reservedMat, Mat& dstMat) {
  97 + /*
  98 + pthread_mutex_lock(_matsDimensionLock);
  99 + reservedMat->download(dstMat);
  100 + pthread_mutex_unlock(_matsDimensionLock);
  101 + */
  102 +
  103 + // copy the mat data back
  104 + int dimension = dstMat.rows * dstMat.cols;
  105 + cudaMemcpy(dstMat.ptr<uint8_t>(), reservedMat, dimension, cudaMemcpyDeviceToHost);
  106 + }
  107 +
  108 + void MatManager::release(uint8_t* reservedMat) {
  109 + pthread_mutex_lock(_matTakenLock);
  110 + bool foundMatch = false;
  111 + for (int i=0; i < _numMats; i++) {
  112 + if (reservedMat == _mats[i]) {
  113 + *_matTaken[i] = false;
  114 + foundMatch = true;
  115 + }
  116 + }
  117 + pthread_mutex_unlock(_matTakenLock);
  118 +
  119 + // return unconditionally if we didn't find a match
  120 + if (!foundMatch) {
  121 + std::cout << "Reservedmat is not in the _mats array" << std::endl << std::flush;
  122 + return;
  123 + }
  124 + /*
  125 + printReleasingMat(reservedMat);
  126 + pthread_mutex_lock(_matsDimensionLock);
  127 + Size size = reservedMat->size();
  128 + int type = reservedMat->type();
  129 + reservedMat->release();
  130 + reservedMat->create(size, type);
  131 +
  132 +
  133 +
  134 + pthread_mutex_unlock(_matsDimensionLock);
  135 + */
  136 +
  137 + sem_post(_matSemaphore);
  138 + }
  139 +
  140 + MatManager::~MatManager() {
  141 + // assume a single thread is destroying the manager
  142 + // TODO(colin): add the destroy code
  143 + std::cout << "Start to destroy.." << std::endl << std::flush;
  144 + }
  145 +
  146 + /*
  147 + void MatManager::printMats() {
  148 + for (int i = 0; i < _numMats; i++) {
  149 + if ((*_matTaken[i]) == true) {
  150 + std::cout << i << ": Taken, " << _mats[i]->size() << std::endl << std::flush;
  151 + } else {
  152 + std::cout << i << ": Not taken, " << _mats[i]->size() << std::endl << std::flush;
  153 + }
  154 + }
  155 + std::cout << std::endl << std::flush;
  156 + }
  157 +
  158 + void MatManager::printSemValue() {
  159 + int semValue;
  160 + sem_getvalue(_matSemaphore, &semValue);
  161 + std::cout << "Sem value: " << semValue << std::endl << std::flush;
  162 + }
  163 +
  164 + void MatManager::printSizeChangingMat(GpuMat* gpuMat) {
  165 + for (int i=0; i < _numMats; i++) {
  166 + if (gpuMat == _mats[i]) {
  167 + std::cout << "changing is size of" << i << " at " << gpuMat << std::endl << std::flush;
  168 + return;
  169 + }
  170 + }
  171 + std::cout << "can't change size of mat at address: " << gpuMat << std::endl << std::flush;
  172 + }
  173 +
  174 + void MatManager::printReleasingMat(GpuMat* gpuMat) {
  175 + for (int i=0; i < _numMats; i++) {
  176 + if (gpuMat == _mats[i]) {
  177 + std::cout << "releasing mat" << i << " at " << gpuMat << std::endl << std::flush;
  178 + return;
  179 + }
  180 + }
  181 + std::cout << "can't release mat at address: " << gpuMat << std::endl << std::flush;
  182 + }
  183 +*/
  184 +
  185 +}}
openbr/plugins/cuda/GpuMatManager.hpp renamed to openbr/plugins/cuda/MatManager.hpp
@@ -8,25 +8,29 @@ using namespace cv; @@ -8,25 +8,29 @@ using namespace cv;
8 using namespace cv::gpu; 8 using namespace cv::gpu;
9 9
10 namespace br { namespace cuda { 10 namespace br { namespace cuda {
11 - class GpuMatManager { 11 + class MatManager {
12 private: 12 private:
13 int _numMats; 13 int _numMats;
14 - GpuMat** _mats; // holds all the mats 14 + uint8_t** _mats; // holds all the mats
15 bool** _matTaken; // holds whether or not they are taken 15 bool** _matTaken; // holds whether or not they are taken
  16 + int** _matsDimension; // holds the dimension of the Mats
16 17
17 pthread_mutex_t* _matTakenLock; // lock for matTaken table 18 pthread_mutex_t* _matTakenLock; // lock for matTaken table
18 - pthread_mutex_t* _openCvOperationLock; // lock for OpenCV upload/download/realloc operations 19 + pthread_mutex_t* _matsDimensionLock; // lock for OpenCV upload/download/realloc operations
19 sem_t* _matSemaphore; 20 sem_t* _matSemaphore;
20 21
21 public: 22 public:
22 - GpuMatManager(int num); 23 + MatManager(int num);
23 24
24 - GpuMat* reserve();  
25 - void upload(GpuMat* reservedMat, Mat& mat);  
26 - void matchDimensions(GpuMat* srcMat, GpuMat* dstMat);  
27 - void download(GpuMat* reservedMat, Mat& dstMat);  
28 - void release(GpuMat* mat); 25 + uint8_t* reserve(Mat *mat);
  26 + void upload(uint8_t* reservedMat, Mat& mat);
  27 + void download(uint8_t* reservedMat, Mat& dstMat);
  28 + void release(uint8_t* mat);
29 29
30 - ~GpuMatManager(); 30 + ~MatManager();
  31 + //void printMats();
  32 + //void printSemValue();
  33 + //void printSizeChangingMat(uint8_t* gpuMat);
  34 + //void printReleasingMat(uint8_t* gpuMat);
31 }; 35 };
32 }} 36 }}
openbr/plugins/cuda/cudalbp.cpp
@@ -33,7 +33,7 @@ @@ -33,7 +33,7 @@
33 #include <openbr/plugins/openbr_internal.h> 33 #include <openbr/plugins/openbr_internal.h>
34 34
35 #include "cudalbp.hpp" 35 #include "cudalbp.hpp"
36 -#include "GpuMatManager.hpp" 36 +#include "MatManager.hpp"
37 37
38 using namespace cv; 38 using namespace cv;
39 39
@@ -90,7 +90,7 @@ class CUDALBPTransform : public UntrainableTransform @@ -90,7 +90,7 @@ class CUDALBPTransform : public UntrainableTransform
90 uchar null; 90 uchar null;
91 91
92 92
93 - cuda::GpuMatManager* matManager; 93 + cuda::MatManager* matManager;
94 94
95 public: 95 public:
96 /* Returns the number of 0->1 or 1->0 transitions in i */ 96 /* Returns the number of 0->1 or 1->0 transitions in i */
@@ -143,7 +143,7 @@ class CUDALBPTransform : public UntrainableTransform @@ -143,7 +143,7 @@ class CUDALBPTransform : public UntrainableTransform
143 lut[i] = null; // Set to null id 143 lut[i] = null; // Set to null id
144 144
145 // init the mat manager for managing 10 mats 145 // init the mat manager for managing 10 mats
146 - matManager = new cuda::GpuMatManager(10); 146 + matManager = new cuda::MatManager(10);
147 147
148 // copy lut over to the GPU 148 // copy lut over to the GPU
149 br::cuda::cudalbp_init_wrapper(lut, &lutGpuPtr); 149 br::cuda::cudalbp_init_wrapper(lut, &lutGpuPtr);
@@ -154,23 +154,29 @@ class CUDALBPTransform : public UntrainableTransform @@ -154,23 +154,29 @@ class CUDALBPTransform : public UntrainableTransform
154 void project(const Template &src, Template &dst) const 154 void project(const Template &src, Template &dst) const
155 { 155 {
156 Mat& m = (Mat&)src.m(); 156 Mat& m = (Mat&)src.m();
157 -  
158 - GpuMat* a;  
159 - GpuMat* b;  
160 - a = matManager->reserve(); 157 + uint8_t* a;
  158 + uint8_t* b;
  159 + a = matManager->reserve(&m);
  160 +// std::cout << "m: " << m.size() << ", " << m.type() << std::endl << std::flush;
  161 +// std::cout << "a: " << a->size() << ", " << a->type() << std::endl << std::flush;
161 matManager->upload(a, m); 162 matManager->upload(a, m);
162 163
163 // reserve the second mat and check the dimensiosn 164 // reserve the second mat and check the dimensiosn
164 - b = matManager->reserve();  
165 - matManager->matchDimensions(b, a);  
166 -  
167 - br::cuda::cudalbp_wrapper(*a, *b, lutGpuPtr);  
168 - 165 + b = matManager->reserve(&m);
  166 + //matManager->matchDimensions(b, a);
  167 +
  168 + //std::cout << "Coming to here" << std::endl << std::flush;
  169 + br::cuda::cudalbp_wrapper(a, b, lutGpuPtr, m.cols, m.rows, m.step1());
  170 + //std::cout << "Coming out of here" << std::endl << std::flush;
  171 +
  172 + //std::cout << "Start to download" << std::endl << std::flush;
169 matManager->download(b, dst); 173 matManager->download(b, dst);
  174 + //std::cout << "finish download" << std::endl << std::flush;
170 175
171 // release both the mats 176 // release both the mats
172 matManager->release(a); 177 matManager->release(a);
173 matManager->release(b); 178 matManager->release(b);
  179 + std::cout << "finish release" << std::endl << std::flush;
174 } 180 }
175 }; 181 };
176 182
openbr/plugins/cuda/cudalbp.cu
@@ -36,15 +36,8 @@ namespace br { namespace cuda { @@ -36,15 +36,8 @@ namespace br { namespace cuda {
36 dstRowPtr[colInd] = val; 36 dstRowPtr[colInd] = val;
37 } 37 }
38 38
39 - void cudalbp_wrapper(GpuMat& src, GpuMat& dst, uint8_t* lut) 39 + void cudalbp_wrapper(uint8_t* srcPtr, uint8_t* dstPtr, uint8_t* lut, int imageWidth, int imageHeight, size_t step)
40 { 40 {
41 - // convert the GpuMats to pointers  
42 - uint8_t* srcPtr = (uint8_t*)src.data;  
43 - uint8_t* dstPtr = (uint8_t*)dst.data;  
44 -  
45 - int imageWidth = src.cols;  
46 - int imageHeight = src.rows;  
47 -  
48 // make 8 * 8 = 64 square block 41 // make 8 * 8 = 64 square block
49 dim3 threadsPerBlock(8, 8); 42 dim3 threadsPerBlock(8, 8);
50 43
@@ -55,7 +48,7 @@ namespace br { namespace cuda { @@ -55,7 +48,7 @@ namespace br { namespace cuda {
55 //printf("Dst Image Dimesions:\n\trows: %d\tcols: %d\n", dst.rows, dst.cols); 48 //printf("Dst Image Dimesions:\n\trows: %d\tcols: %d\n", dst.rows, dst.cols);
56 //printf("Running CUDALBP\nBlock Dimensions:\n\tx: %d\ty: %d\n", numBlocks.x, numBlocks.y); 49 //printf("Running CUDALBP\nBlock Dimensions:\n\tx: %d\ty: %d\n", numBlocks.x, numBlocks.y);
57 50
58 - cudalbp_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageHeight, imageWidth, lut); 51 + cudalbp_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, step, step, imageHeight, imageWidth, lut);
59 } 52 }
60 53
61 void cudalbp_init_wrapper(uint8_t* lut, uint8_t** lutGpuPtrPtr) { 54 void cudalbp_init_wrapper(uint8_t* lut, uint8_t** lutGpuPtrPtr) {
openbr/plugins/cuda/cudalbp.hpp
@@ -5,5 +5,5 @@ using namespace cv::gpu; @@ -5,5 +5,5 @@ using namespace cv::gpu;
5 5
6 namespace br { namespace cuda { 6 namespace br { namespace cuda {
7 void cudalbp_init_wrapper(uint8_t* lut, uint8_t** lutGpuPtrPtr); 7 void cudalbp_init_wrapper(uint8_t* lut, uint8_t** lutGpuPtrPtr);
8 - void cudalbp_wrapper(GpuMat& src, GpuMat& dst, uint8_t* lut); 8 + void cudalbp_wrapper(uint8_t* src, uint8_t* dst, uint8_t* lut, int imageWidth, int imageHeight, size_t step);
9 }} 9 }}