From f7cd4d52db6b3feeba40439c677d3b45340fa929 Mon Sep 17 00:00:00 2001 From: DepthDeluxe Date: Wed, 27 Jan 2016 17:05:21 -0500 Subject: [PATCH] fixed the threading problem by adding locks --- openbr/plugins/cuda/cudalbp.cpp | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------ openbr/plugins/cuda/cudalbp.cu | 6 +++--- openbr/plugins/cuda/passthrough.cpp | 27 +++++---------------------- 3 files changed, 103 insertions(+), 37 deletions(-) diff --git a/openbr/plugins/cuda/cudalbp.cpp b/openbr/plugins/cuda/cudalbp.cpp index 2d6d05f..1097fde 100644 --- a/openbr/plugins/cuda/cudalbp.cpp +++ b/openbr/plugins/cuda/cudalbp.cpp @@ -14,6 +14,16 @@ * limitations under the License. * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +#include +//#include +//#include + +#include +#include + +#include + +#include #include #include #include @@ -26,6 +36,32 @@ using namespace cv; +string type2str(int type) { + string r; + + uchar depth = type & CV_MAT_DEPTH_MASK; + uchar chans = 1 + (type >> CV_CN_SHIFT); + + switch ( depth ) { + case CV_8U: r = "8U"; break; + case CV_8S: r = "8S"; break; + case CV_16U: r = "16U"; break; + case CV_16S: r = "16S"; break; + case CV_32S: r = "32S"; break; + case CV_32F: r = "32F"; break; + case CV_64F: r = "64F"; break; + default: r = "User"; break; + } + + r += "C"; + r += (chans+'0'); + + return r; +} + +int ctr = 0; +pthread_mutex_t* uploadMutex = NULL; + namespace br { @@ -47,10 +83,15 @@ class CUDALBPTransform : public UntrainableTransform BR_PROPERTY(int, maxTransitions, 8) BR_PROPERTY(bool, rotationInvariant, false) + private: uchar lut[256]; uint8_t* lutGpuPtr; uchar null; + //std::mutex uploadMutex; + pthread_mutex_t* uploadMutex; + + public: /* Returns the number of 0->1 or 1->0 transitions in i */ static int numTransitions(int i) { @@ -100,24 +141,66 @@ class CUDALBPTransform : public UntrainableTransform if (!set[i]) lut[i] = null; // Set to null id + // copy lut over to the GPU br::cuda::cudalbp_init_wrapper(lut, &lutGpuPtr); + + // initialize the mutex + std::cout << "STARING EVERYTHING" << std::endl<< std::flush; + if (uploadMutex == NULL) { + uploadMutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t)); + pthread_mutex_init(uploadMutex, NULL); + } } void project(const Template &src, Template &dst) const { - // assume we are using an 8-bit 1 channel image - GpuMat srcGpuMat, dstGpuMat; - - // copy the data to the GPU - srcGpuMat.upload(src.m()); - dstGpuMat.upload(src.m()); - - // call the kernel function - br::cuda::cudalbp_wrapper(srcGpuMat, dstGpuMat, lutGpuPtr); - - // download the result - dstGpuMat.download(dst.m()); + int myCtr = ctr++; + GpuMat a, b; + const Mat& m = src.m(); + + std::cout << "PID: " << getpid() << std::endl << std::flush; + + //std::cout << "START: " << myCtr << std::endl << std::flush; + + + //std::cout << "Image type: " << type2str(m.type()) << std::endl << std::flush; + pthread_mutex_lock(uploadMutex); + a.create(m.size(), m.type()); + b.create(m.size(), m.type()); + pthread_mutex_unlock(uploadMutex); + + pthread_mutex_lock(uploadMutex); + a.upload(m); + b.upload(m); + pthread_mutex_unlock(uploadMutex); + + // resize the mats + //if (m.size() != srcGpuMat->size()) { + // printf("resizing...\n"); + // srcGpuMat->release(); dstGpuMat->release(); + // srcGpuMat->create(m.size(), CV_8UC1); dstGpuMat->create(m.size(), CV_8UC1); + //} + + // copy the data to the GPU + //srcGpuMat->upload(m); + + // call the kernel function + //br::cuda::cudalbp_wrapper(*srcGpuMat, *dstGpuMat, lutGpuPtr); + pthread_mutex_lock(uploadMutex); + br::cuda::cudalbp_wrapper(a, b, lutGpuPtr); + pthread_mutex_unlock(uploadMutex); + + // download the result to the destination + //dstGpuMat->download(dst.m()); + pthread_mutex_lock(uploadMutex); + b.download(dst.m()); + pthread_mutex_unlock(uploadMutex); + + pthread_mutex_lock(uploadMutex); + a.release(); + b.release(); + pthread_mutex_unlock(uploadMutex); } }; diff --git a/openbr/plugins/cuda/cudalbp.cu b/openbr/plugins/cuda/cudalbp.cu index 81007b6..84644f0 100644 --- a/openbr/plugins/cuda/cudalbp.cu +++ b/openbr/plugins/cuda/cudalbp.cu @@ -51,9 +51,9 @@ namespace br { namespace cuda { dim3 numBlocks(imageWidth/threadsPerBlock.x + 1, imageHeight/threadsPerBlock.y + 1); - printf("Src Image Dimesions:\n\trows: %d\tcols: %d\n", src.rows, src.cols); - printf("Dst Image Dimesions:\n\trows: %d\tcols: %d\n", dst.rows, dst.cols); - printf("Running CUDALBP\nBlock Dimensions:\n\tx: %d\ty: %d\n", numBlocks.x, numBlocks.y); + //printf("Src Image Dimesions:\n\trows: %d\tcols: %d\n", src.rows, src.cols); + //printf("Dst Image Dimesions:\n\trows: %d\tcols: %d\n", dst.rows, dst.cols); + //printf("Running CUDALBP\nBlock Dimensions:\n\tx: %d\ty: %d\n", numBlocks.x, numBlocks.y); cudalbp_kernel<<>>(srcPtr, dstPtr, src.step, dst.step, imageHeight, imageWidth, lut); } diff --git a/openbr/plugins/cuda/passthrough.cpp b/openbr/plugins/cuda/passthrough.cpp index 1b0e1f7..f7018ea 100644 --- a/openbr/plugins/cuda/passthrough.cpp +++ b/openbr/plugins/cuda/passthrough.cpp @@ -10,28 +10,6 @@ using namespace cv::gpu; #include -string type2str(int type) { - string r; - - uchar depth = type & CV_MAT_DEPTH_MASK; - uchar chans = 1 + (type >> CV_CN_SHIFT); - - switch ( depth ) { - case CV_8U: r = "8U"; break; - case CV_8S: r = "8S"; break; - case CV_16U: r = "16U"; break; - case CV_16S: r = "16S"; break; - case CV_32S: r = "32S"; break; - case CV_32F: r = "32F"; break; - case CV_64F: r = "64F"; break; - default: r = "User"; break; - } - - r += "C"; - r += (chans+'0'); - - return r; -} namespace br { @@ -54,6 +32,11 @@ private: dstGpuMat.download(dst.m()); // TODO(colin): add delete code + srcGpuMat.release(); + dstGpuMat.release(); + + printf("srcGpuMat empty: %d\n", (int)srcGpuMat.empty()); + printf("dstGpuMat empty: %d\n", (int)srcGpuMat.empty()); } }; -- libgit2 0.21.4