Commit f7cd4d52db6b3feeba40439c677d3b45340fa929

Authored by DepthDeluxe
1 parent cc916f3d

fixed the threading problem by adding locks

openbr/plugins/cuda/cudalbp.cpp
... ... @@ -14,6 +14,16 @@
14 14 * limitations under the License. *
15 15 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
16 16  
  17 +#include <iostream>
  18 +//#include <thread>
  19 +//#include <mutex>
  20 +
  21 +#include <sys/types.h>
  22 +#include <unistd.h>
  23 +
  24 +#include <pthread.h>
  25 +
  26 +#include <opencv2/opencv.hpp>
17 27 #include <opencv2/imgproc/imgproc.hpp>
18 28 #include <opencv2/imgproc/imgproc_c.h>
19 29 #include <opencv2/highgui/highgui.hpp>
... ... @@ -26,6 +36,32 @@
26 36  
27 37 using namespace cv;
28 38  
  39 +string type2str(int type) {
  40 + string r;
  41 +
  42 + uchar depth = type & CV_MAT_DEPTH_MASK;
  43 + uchar chans = 1 + (type >> CV_CN_SHIFT);
  44 +
  45 + switch ( depth ) {
  46 + case CV_8U: r = "8U"; break;
  47 + case CV_8S: r = "8S"; break;
  48 + case CV_16U: r = "16U"; break;
  49 + case CV_16S: r = "16S"; break;
  50 + case CV_32S: r = "32S"; break;
  51 + case CV_32F: r = "32F"; break;
  52 + case CV_64F: r = "64F"; break;
  53 + default: r = "User"; break;
  54 + }
  55 +
  56 + r += "C";
  57 + r += (chans+'0');
  58 +
  59 + return r;
  60 +}
  61 +
  62 +int ctr = 0;
  63 +pthread_mutex_t* uploadMutex = NULL;
  64 +
29 65 namespace br
30 66 {
31 67  
... ... @@ -47,10 +83,15 @@ class CUDALBPTransform : public UntrainableTransform
47 83 BR_PROPERTY(int, maxTransitions, 8)
48 84 BR_PROPERTY(bool, rotationInvariant, false)
49 85  
  86 + private:
50 87 uchar lut[256];
51 88 uint8_t* lutGpuPtr;
52 89 uchar null;
53 90  
  91 + //std::mutex uploadMutex;
  92 + pthread_mutex_t* uploadMutex;
  93 +
  94 + public:
54 95 /* Returns the number of 0->1 or 1->0 transitions in i */
55 96 static int numTransitions(int i)
56 97 {
... ... @@ -100,24 +141,66 @@ class CUDALBPTransform : public UntrainableTransform
100 141 if (!set[i])
101 142 lut[i] = null; // Set to null id
102 143  
  144 +
103 145 // copy lut over to the GPU
104 146 br::cuda::cudalbp_init_wrapper(lut, &lutGpuPtr);
  147 +
  148 + // initialize the mutex
  149 + std::cout << "STARING EVERYTHING" << std::endl<< std::flush;
  150 + if (uploadMutex == NULL) {
  151 + uploadMutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
  152 + pthread_mutex_init(uploadMutex, NULL);
  153 + }
105 154 }
106 155  
107 156 void project(const Template &src, Template &dst) const
108 157 {
109   - // assume we are using an 8-bit 1 channel image
110   - GpuMat srcGpuMat, dstGpuMat;
111   -
112   - // copy the data to the GPU
113   - srcGpuMat.upload(src.m());
114   - dstGpuMat.upload(src.m());
115   -
116   - // call the kernel function
117   - br::cuda::cudalbp_wrapper(srcGpuMat, dstGpuMat, lutGpuPtr);
118   -
119   - // download the result
120   - dstGpuMat.download(dst.m());
  158 + int myCtr = ctr++;
  159 + GpuMat a, b;
  160 + const Mat& m = src.m();
  161 +
  162 + std::cout << "PID: " << getpid() << std::endl << std::flush;
  163 +
  164 + //std::cout << "START: " << myCtr << std::endl << std::flush;
  165 +
  166 +
  167 + //std::cout << "Image type: " << type2str(m.type()) << std::endl << std::flush;
  168 + pthread_mutex_lock(uploadMutex);
  169 + a.create(m.size(), m.type());
  170 + b.create(m.size(), m.type());
  171 + pthread_mutex_unlock(uploadMutex);
  172 +
  173 + pthread_mutex_lock(uploadMutex);
  174 + a.upload(m);
  175 + b.upload(m);
  176 + pthread_mutex_unlock(uploadMutex);
  177 +
  178 + // resize the mats
  179 + //if (m.size() != srcGpuMat->size()) {
  180 + // printf("resizing...\n");
  181 + // srcGpuMat->release(); dstGpuMat->release();
  182 + // srcGpuMat->create(m.size(), CV_8UC1); dstGpuMat->create(m.size(), CV_8UC1);
  183 + //}
  184 +
  185 + // copy the data to the GPU
  186 + //srcGpuMat->upload(m);
  187 +
  188 + // call the kernel function
  189 + //br::cuda::cudalbp_wrapper(*srcGpuMat, *dstGpuMat, lutGpuPtr);
  190 + pthread_mutex_lock(uploadMutex);
  191 + br::cuda::cudalbp_wrapper(a, b, lutGpuPtr);
  192 + pthread_mutex_unlock(uploadMutex);
  193 +
  194 + // download the result to the destination
  195 + //dstGpuMat->download(dst.m());
  196 + pthread_mutex_lock(uploadMutex);
  197 + b.download(dst.m());
  198 + pthread_mutex_unlock(uploadMutex);
  199 +
  200 + pthread_mutex_lock(uploadMutex);
  201 + a.release();
  202 + b.release();
  203 + pthread_mutex_unlock(uploadMutex);
121 204 }
122 205 };
123 206  
... ...
openbr/plugins/cuda/cudalbp.cu
... ... @@ -51,9 +51,9 @@ namespace br { namespace cuda {
51 51 dim3 numBlocks(imageWidth/threadsPerBlock.x + 1,
52 52 imageHeight/threadsPerBlock.y + 1);
53 53  
54   - printf("Src Image Dimesions:\n\trows: %d\tcols: %d\n", src.rows, src.cols);
55   - printf("Dst Image Dimesions:\n\trows: %d\tcols: %d\n", dst.rows, dst.cols);
56   - printf("Running CUDALBP\nBlock Dimensions:\n\tx: %d\ty: %d\n", numBlocks.x, numBlocks.y);
  54 + //printf("Src Image Dimesions:\n\trows: %d\tcols: %d\n", src.rows, src.cols);
  55 + //printf("Dst Image Dimesions:\n\trows: %d\tcols: %d\n", dst.rows, dst.cols);
  56 + //printf("Running CUDALBP\nBlock Dimensions:\n\tx: %d\ty: %d\n", numBlocks.x, numBlocks.y);
57 57  
58 58 cudalbp_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageHeight, imageWidth, lut);
59 59 }
... ...
openbr/plugins/cuda/passthrough.cpp
... ... @@ -10,28 +10,6 @@ using namespace cv::gpu;
10 10  
11 11 #include <iostream>
12 12  
13   -string type2str(int type) {
14   - string r;
15   -
16   - uchar depth = type & CV_MAT_DEPTH_MASK;
17   - uchar chans = 1 + (type >> CV_CN_SHIFT);
18   -
19   - switch ( depth ) {
20   - case CV_8U: r = "8U"; break;
21   - case CV_8S: r = "8S"; break;
22   - case CV_16U: r = "16U"; break;
23   - case CV_16S: r = "16S"; break;
24   - case CV_32S: r = "32S"; break;
25   - case CV_32F: r = "32F"; break;
26   - case CV_64F: r = "64F"; break;
27   - default: r = "User"; break;
28   - }
29   -
30   - r += "C";
31   - r += (chans+'0');
32   -
33   - return r;
34   -}
35 13  
36 14 namespace br
37 15 {
... ... @@ -54,6 +32,11 @@ private:
54 32 dstGpuMat.download(dst.m());
55 33  
56 34 // TODO(colin): add delete code
  35 + srcGpuMat.release();
  36 + dstGpuMat.release();
  37 +
  38 + printf("srcGpuMat empty: %d\n", (int)srcGpuMat.empty());
  39 + printf("dstGpuMat empty: %d\n", (int)srcGpuMat.empty());
57 40 }
58 41 };
59 42  
... ...