diff --git a/openbr/plugins/cuda/GpuMatManager.cpp b/openbr/plugins/cuda/GpuMatManager.cpp deleted file mode 100644 index 187a549..0000000 --- a/openbr/plugins/cuda/GpuMatManager.cpp +++ /dev/null @@ -1,110 +0,0 @@ -#include -#include - -#include - -#include "GpuMatManager.hpp" - -using namespace cv; -using namespace cv::gpu; - -namespace br { namespace cuda { - GpuMatManager::GpuMatManager(int num) { - _numMats = num; - - // initialize the GpuMats - _mats = (GpuMat**)malloc(num * sizeof(GpuMat*)); - _matTaken = (bool**)malloc(num * sizeof(bool*)); - for (int i=0; i < num; i++) { - _mats[i] = new GpuMat(); - _matTaken[i] = new bool; - (*_matTaken[i]) = false; - } - - // initialize the locks - _matTakenLock = new pthread_mutex_t; - pthread_mutex_init(_matTakenLock, NULL); - _openCvOperationLock = new pthread_mutex_t; - pthread_mutex_init(_openCvOperationLock, NULL); - - // initialize the semaphore - _matSemaphore = new sem_t; - sem_init(_matSemaphore, 0, _numMats); - } - - GpuMat* GpuMatManager::reserve() { - GpuMat* reservedMat = NULL; - - // get the reserved GpuMat - //sem_wait(_matSemaphore); - pthread_mutex_lock(_matTakenLock); - for (int i=0; i < _numMats; i++) { - if ( !(*_matTaken[i]) ) { - reservedMat = _mats[i]; - *_matTaken[i] = true; - break; - } - } - pthread_mutex_unlock(_matTakenLock); - - return reservedMat; - } - - void GpuMatManager::upload(GpuMat* reservedMat, Mat& mat) { - // check the image Dimensions - if (reservedMat->size() != mat.size()) { - pthread_mutex_lock(_openCvOperationLock); - reservedMat->release(); - reservedMat->create(mat.size(), mat.type()); - pthread_mutex_unlock(_openCvOperationLock); - } - - // upload the image - pthread_mutex_lock(_openCvOperationLock); - reservedMat->upload(mat); - pthread_mutex_unlock(_openCvOperationLock); - pthread_mutex_lock(_openCvOperationLock); - reservedMat->upload(mat); - pthread_mutex_unlock(_openCvOperationLock); - } - - void GpuMatManager::matchDimensions(GpuMat* srcMat, GpuMat* dstMat) { - if (srcMat->size() != dstMat->size()) { - pthread_mutex_lock(_openCvOperationLock); - dstMat->release(); - dstMat->create(srcMat->size(), srcMat->type()); - pthread_mutex_unlock(_openCvOperationLock); - } - } - - void GpuMatManager::download(GpuMat* reservedMat, Mat& dstMat) { - pthread_mutex_lock(_openCvOperationLock); - reservedMat->download(dstMat); - pthread_mutex_unlock(_openCvOperationLock); - } - - void GpuMatManager::release(GpuMat* reservedMat) { - pthread_mutex_lock(_matTakenLock); - bool foundMatch = false; - for (int i=0; i < _numMats; i++) { - if (reservedMat == _mats[i]) { - *_matTaken[i] = false; - foundMatch = true; - } - } - pthread_mutex_unlock(_matTakenLock); - - // return unconditionally if we didn't find a match - if (!foundMatch) { - return; - } - - sem_post(_matSemaphore); - } - - GpuMatManager::~GpuMatManager() { - // assume a single thread is destroying the manager - // TODO(colin): add the destroy code - } - -}} diff --git a/openbr/plugins/cuda/MatManager.cu b/openbr/plugins/cuda/MatManager.cu new file mode 100644 index 0000000..daca2c4 --- /dev/null +++ b/openbr/plugins/cuda/MatManager.cu @@ -0,0 +1,185 @@ +#include +#include + +#include + +#include "MatManager.hpp" + +using namespace cv; +using namespace cv::gpu; + +namespace br { namespace cuda { + MatManager::MatManager(int num) { + _numMats = num; + + // initialize the an array of Mats + _mats = (uint8_t**)malloc(num * sizeof(uint8_t*)); + _matTaken = (bool**)malloc(num * sizeof(bool*)); + _matsDimension = (int**) malloc(num * sizeof(int)); + + for (int i=0; i < num; i++) { + cudaMalloc(&_mats[i], 1 * sizeof(uint8_t)); + //_mats[i] = new GpuMat(); + + _matTaken[i] = new bool; + (*_matTaken[i]) = false; + + // initialize all mat dimensions to be 1 + _matsDimension[i] = new int; + (*_matsDimension[i]) = 1; + } + + // initialize the locks + _matTakenLock = new pthread_mutex_t; + pthread_mutex_init(_matTakenLock, NULL); + _matsDimensionLock = new pthread_mutex_t; + pthread_mutex_init(_matsDimensionLock, NULL); + + // initialize the semaphore + _matSemaphore = new sem_t; + sem_init(_matSemaphore, 0, _numMats); + } + + uint8_t* MatManager::reserve(Mat *mat) { + int reservedMatIndex = 0; + std::cout << "Reserving" << std::endl << std::flush; + + sem_wait(_matSemaphore); + pthread_mutex_lock(_matTakenLock); + int i; + for (i=0; i < _numMats; i++) { + if ( !(*_matTaken[i]) ) { + *_matTaken[i] = true; + reservedMatIndex = i; + std::cout << "Taking " << i << std::endl << std::flush; + break; + } + } + if (i == _numMats) { + std::cout << "Cannot reserve a mat. Not enough GpuMat resourses\n" << std::endl << std::flush; + } + + //printMats(); + //printSemValue(); + pthread_mutex_unlock(_matTakenLock); + + // reallocate if size does not match + pthread_mutex_lock(_matsDimensionLock); + if (*_matsDimension[reservedMatIndex] != mat->rows * mat->cols) { + //printSizeChangingMat(reservedMat); + //reservedMat->release(); + //reservedMat->create(mat->size(), mat->type()); + std::cout << "Size mismatch" << std::endl << std::flush; + // re malloc + cudaFree(_mats[reservedMatIndex]); // free the previous memory first + cudaMalloc(&_mats[reservedMatIndex], mat->rows * mat->cols * sizeof(uint8_t)); + // change the dimension of that matrix + *_matsDimension[reservedMatIndex] = mat->rows * mat->cols; + + } + pthread_mutex_unlock(_matsDimensionLock); + return _mats[reservedMatIndex]; + } + + void MatManager::upload(uint8_t* reservedMat, Mat& mat) { + // upload the image + /* + pthread_mutex_lock(_matsDimensionLock); + reservedMat->upload(mat); + pthread_mutex_unlock(_matsDimensionLock); + */ + + // copy the content of the Mat to GPU + cudaMemcpy(reservedMat, mat.ptr(), mat.rows * mat.cols, cudaMemcpyHostToDevice); + } + + void MatManager::download(uint8_t* reservedMat, Mat& dstMat) { + /* + pthread_mutex_lock(_matsDimensionLock); + reservedMat->download(dstMat); + pthread_mutex_unlock(_matsDimensionLock); + */ + + // copy the mat data back + int dimension = dstMat.rows * dstMat.cols; + cudaMemcpy(dstMat.ptr(), reservedMat, dimension, cudaMemcpyDeviceToHost); + } + + void MatManager::release(uint8_t* reservedMat) { + pthread_mutex_lock(_matTakenLock); + bool foundMatch = false; + for (int i=0; i < _numMats; i++) { + if (reservedMat == _mats[i]) { + *_matTaken[i] = false; + foundMatch = true; + } + } + pthread_mutex_unlock(_matTakenLock); + + // return unconditionally if we didn't find a match + if (!foundMatch) { + std::cout << "Reservedmat is not in the _mats array" << std::endl << std::flush; + return; + } + /* + printReleasingMat(reservedMat); + pthread_mutex_lock(_matsDimensionLock); + Size size = reservedMat->size(); + int type = reservedMat->type(); + reservedMat->release(); + reservedMat->create(size, type); + + + + pthread_mutex_unlock(_matsDimensionLock); + */ + + sem_post(_matSemaphore); + } + + MatManager::~MatManager() { + // assume a single thread is destroying the manager + // TODO(colin): add the destroy code + std::cout << "Start to destroy.." << std::endl << std::flush; + } + + /* + void MatManager::printMats() { + for (int i = 0; i < _numMats; i++) { + if ((*_matTaken[i]) == true) { + std::cout << i << ": Taken, " << _mats[i]->size() << std::endl << std::flush; + } else { + std::cout << i << ": Not taken, " << _mats[i]->size() << std::endl << std::flush; + } + } + std::cout << std::endl << std::flush; + } + + void MatManager::printSemValue() { + int semValue; + sem_getvalue(_matSemaphore, &semValue); + std::cout << "Sem value: " << semValue << std::endl << std::flush; + } + + void MatManager::printSizeChangingMat(GpuMat* gpuMat) { + for (int i=0; i < _numMats; i++) { + if (gpuMat == _mats[i]) { + std::cout << "changing is size of" << i << " at " << gpuMat << std::endl << std::flush; + return; + } + } + std::cout << "can't change size of mat at address: " << gpuMat << std::endl << std::flush; + } + + void MatManager::printReleasingMat(GpuMat* gpuMat) { + for (int i=0; i < _numMats; i++) { + if (gpuMat == _mats[i]) { + std::cout << "releasing mat" << i << " at " << gpuMat << std::endl << std::flush; + return; + } + } + std::cout << "can't release mat at address: " << gpuMat << std::endl << std::flush; + } +*/ + +}} diff --git a/openbr/plugins/cuda/GpuMatManager.hpp b/openbr/plugins/cuda/MatManager.hpp index 86fb169..9302200 100644 --- a/openbr/plugins/cuda/GpuMatManager.hpp +++ b/openbr/plugins/cuda/MatManager.hpp @@ -8,25 +8,29 @@ using namespace cv; using namespace cv::gpu; namespace br { namespace cuda { - class GpuMatManager { + class MatManager { private: int _numMats; - GpuMat** _mats; // holds all the mats + uint8_t** _mats; // holds all the mats bool** _matTaken; // holds whether or not they are taken + int** _matsDimension; // holds the dimension of the Mats pthread_mutex_t* _matTakenLock; // lock for matTaken table - pthread_mutex_t* _openCvOperationLock; // lock for OpenCV upload/download/realloc operations + pthread_mutex_t* _matsDimensionLock; // lock for OpenCV upload/download/realloc operations sem_t* _matSemaphore; public: - GpuMatManager(int num); + MatManager(int num); - GpuMat* reserve(); - void upload(GpuMat* reservedMat, Mat& mat); - void matchDimensions(GpuMat* srcMat, GpuMat* dstMat); - void download(GpuMat* reservedMat, Mat& dstMat); - void release(GpuMat* mat); + uint8_t* reserve(Mat *mat); + void upload(uint8_t* reservedMat, Mat& mat); + void download(uint8_t* reservedMat, Mat& dstMat); + void release(uint8_t* mat); - ~GpuMatManager(); + ~MatManager(); + //void printMats(); + //void printSemValue(); + //void printSizeChangingMat(uint8_t* gpuMat); + //void printReleasingMat(uint8_t* gpuMat); }; }} diff --git a/openbr/plugins/cuda/cudalbp.cpp b/openbr/plugins/cuda/cudalbp.cpp index dca1b79..f5dc029 100644 --- a/openbr/plugins/cuda/cudalbp.cpp +++ b/openbr/plugins/cuda/cudalbp.cpp @@ -33,7 +33,7 @@ #include #include "cudalbp.hpp" -#include "GpuMatManager.hpp" +#include "MatManager.hpp" using namespace cv; @@ -90,7 +90,7 @@ class CUDALBPTransform : public UntrainableTransform uchar null; - cuda::GpuMatManager* matManager; + cuda::MatManager* matManager; public: /* Returns the number of 0->1 or 1->0 transitions in i */ @@ -143,7 +143,7 @@ class CUDALBPTransform : public UntrainableTransform lut[i] = null; // Set to null id // init the mat manager for managing 10 mats - matManager = new cuda::GpuMatManager(10); + matManager = new cuda::MatManager(10); // copy lut over to the GPU br::cuda::cudalbp_init_wrapper(lut, &lutGpuPtr); @@ -154,23 +154,29 @@ class CUDALBPTransform : public UntrainableTransform void project(const Template &src, Template &dst) const { Mat& m = (Mat&)src.m(); - - GpuMat* a; - GpuMat* b; - a = matManager->reserve(); + uint8_t* a; + uint8_t* b; + a = matManager->reserve(&m); +// std::cout << "m: " << m.size() << ", " << m.type() << std::endl << std::flush; +// std::cout << "a: " << a->size() << ", " << a->type() << std::endl << std::flush; matManager->upload(a, m); // reserve the second mat and check the dimensiosn - b = matManager->reserve(); - matManager->matchDimensions(b, a); - - br::cuda::cudalbp_wrapper(*a, *b, lutGpuPtr); - + b = matManager->reserve(&m); + //matManager->matchDimensions(b, a); + + //std::cout << "Coming to here" << std::endl << std::flush; + br::cuda::cudalbp_wrapper(a, b, lutGpuPtr, m.cols, m.rows, m.step1()); + //std::cout << "Coming out of here" << std::endl << std::flush; + + //std::cout << "Start to download" << std::endl << std::flush; matManager->download(b, dst); + //std::cout << "finish download" << std::endl << std::flush; // release both the mats matManager->release(a); matManager->release(b); + std::cout << "finish release" << std::endl << std::flush; } }; diff --git a/openbr/plugins/cuda/cudalbp.cu b/openbr/plugins/cuda/cudalbp.cu index 84644f0..23aec27 100644 --- a/openbr/plugins/cuda/cudalbp.cu +++ b/openbr/plugins/cuda/cudalbp.cu @@ -36,15 +36,8 @@ namespace br { namespace cuda { dstRowPtr[colInd] = val; } - void cudalbp_wrapper(GpuMat& src, GpuMat& dst, uint8_t* lut) + void cudalbp_wrapper(uint8_t* srcPtr, uint8_t* dstPtr, uint8_t* lut, int imageWidth, int imageHeight, size_t step) { - // convert the GpuMats to pointers - uint8_t* srcPtr = (uint8_t*)src.data; - uint8_t* dstPtr = (uint8_t*)dst.data; - - int imageWidth = src.cols; - int imageHeight = src.rows; - // make 8 * 8 = 64 square block dim3 threadsPerBlock(8, 8); @@ -55,7 +48,7 @@ namespace br { namespace cuda { //printf("Dst Image Dimesions:\n\trows: %d\tcols: %d\n", dst.rows, dst.cols); //printf("Running CUDALBP\nBlock Dimensions:\n\tx: %d\ty: %d\n", numBlocks.x, numBlocks.y); - cudalbp_kernel<<>>(srcPtr, dstPtr, src.step, dst.step, imageHeight, imageWidth, lut); + cudalbp_kernel<<>>(srcPtr, dstPtr, step, step, imageHeight, imageWidth, lut); } void cudalbp_init_wrapper(uint8_t* lut, uint8_t** lutGpuPtrPtr) { diff --git a/openbr/plugins/cuda/cudalbp.hpp b/openbr/plugins/cuda/cudalbp.hpp index f6569be..7fa270c 100644 --- a/openbr/plugins/cuda/cudalbp.hpp +++ b/openbr/plugins/cuda/cudalbp.hpp @@ -5,5 +5,5 @@ using namespace cv::gpu; namespace br { namespace cuda { void cudalbp_init_wrapper(uint8_t* lut, uint8_t** lutGpuPtrPtr); - void cudalbp_wrapper(GpuMat& src, GpuMat& dst, uint8_t* lut); + void cudalbp_wrapper(uint8_t* src, uint8_t* dst, uint8_t* lut, int imageWidth, int imageHeight, size_t step); }}