Commit f7cd4d52db6b3feeba40439c677d3b45340fa929

Authored by DepthDeluxe
1 parent cc916f3d

fixed the threading problem by adding locks

openbr/plugins/cuda/cudalbp.cpp
@@ -14,6 +14,16 @@ @@ -14,6 +14,16 @@
14 * limitations under the License. * 14 * limitations under the License. *
15 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 15 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
16 16
  17 +#include <iostream>
  18 +//#include <thread>
  19 +//#include <mutex>
  20 +
  21 +#include <sys/types.h>
  22 +#include <unistd.h>
  23 +
  24 +#include <pthread.h>
  25 +
  26 +#include <opencv2/opencv.hpp>
17 #include <opencv2/imgproc/imgproc.hpp> 27 #include <opencv2/imgproc/imgproc.hpp>
18 #include <opencv2/imgproc/imgproc_c.h> 28 #include <opencv2/imgproc/imgproc_c.h>
19 #include <opencv2/highgui/highgui.hpp> 29 #include <opencv2/highgui/highgui.hpp>
@@ -26,6 +36,32 @@ @@ -26,6 +36,32 @@
26 36
27 using namespace cv; 37 using namespace cv;
28 38
  39 +string type2str(int type) {
  40 + string r;
  41 +
  42 + uchar depth = type & CV_MAT_DEPTH_MASK;
  43 + uchar chans = 1 + (type >> CV_CN_SHIFT);
  44 +
  45 + switch ( depth ) {
  46 + case CV_8U: r = "8U"; break;
  47 + case CV_8S: r = "8S"; break;
  48 + case CV_16U: r = "16U"; break;
  49 + case CV_16S: r = "16S"; break;
  50 + case CV_32S: r = "32S"; break;
  51 + case CV_32F: r = "32F"; break;
  52 + case CV_64F: r = "64F"; break;
  53 + default: r = "User"; break;
  54 + }
  55 +
  56 + r += "C";
  57 + r += (chans+'0');
  58 +
  59 + return r;
  60 +}
  61 +
  62 +int ctr = 0;
  63 +pthread_mutex_t* uploadMutex = NULL;
  64 +
29 namespace br 65 namespace br
30 { 66 {
31 67
@@ -47,10 +83,15 @@ class CUDALBPTransform : public UntrainableTransform @@ -47,10 +83,15 @@ class CUDALBPTransform : public UntrainableTransform
47 BR_PROPERTY(int, maxTransitions, 8) 83 BR_PROPERTY(int, maxTransitions, 8)
48 BR_PROPERTY(bool, rotationInvariant, false) 84 BR_PROPERTY(bool, rotationInvariant, false)
49 85
  86 + private:
50 uchar lut[256]; 87 uchar lut[256];
51 uint8_t* lutGpuPtr; 88 uint8_t* lutGpuPtr;
52 uchar null; 89 uchar null;
53 90
  91 + //std::mutex uploadMutex;
  92 + pthread_mutex_t* uploadMutex;
  93 +
  94 + public:
54 /* Returns the number of 0->1 or 1->0 transitions in i */ 95 /* Returns the number of 0->1 or 1->0 transitions in i */
55 static int numTransitions(int i) 96 static int numTransitions(int i)
56 { 97 {
@@ -100,24 +141,66 @@ class CUDALBPTransform : public UntrainableTransform @@ -100,24 +141,66 @@ class CUDALBPTransform : public UntrainableTransform
100 if (!set[i]) 141 if (!set[i])
101 lut[i] = null; // Set to null id 142 lut[i] = null; // Set to null id
102 143
  144 +
103 // copy lut over to the GPU 145 // copy lut over to the GPU
104 br::cuda::cudalbp_init_wrapper(lut, &lutGpuPtr); 146 br::cuda::cudalbp_init_wrapper(lut, &lutGpuPtr);
  147 +
  148 + // initialize the mutex
  149 + std::cout << "STARING EVERYTHING" << std::endl<< std::flush;
  150 + if (uploadMutex == NULL) {
  151 + uploadMutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
  152 + pthread_mutex_init(uploadMutex, NULL);
  153 + }
105 } 154 }
106 155
107 void project(const Template &src, Template &dst) const 156 void project(const Template &src, Template &dst) const
108 { 157 {
109 - // assume we are using an 8-bit 1 channel image  
110 - GpuMat srcGpuMat, dstGpuMat;  
111 -  
112 - // copy the data to the GPU  
113 - srcGpuMat.upload(src.m());  
114 - dstGpuMat.upload(src.m());  
115 -  
116 - // call the kernel function  
117 - br::cuda::cudalbp_wrapper(srcGpuMat, dstGpuMat, lutGpuPtr);  
118 -  
119 - // download the result  
120 - dstGpuMat.download(dst.m()); 158 + int myCtr = ctr++;
  159 + GpuMat a, b;
  160 + const Mat& m = src.m();
  161 +
  162 + std::cout << "PID: " << getpid() << std::endl << std::flush;
  163 +
  164 + //std::cout << "START: " << myCtr << std::endl << std::flush;
  165 +
  166 +
  167 + //std::cout << "Image type: " << type2str(m.type()) << std::endl << std::flush;
  168 + pthread_mutex_lock(uploadMutex);
  169 + a.create(m.size(), m.type());
  170 + b.create(m.size(), m.type());
  171 + pthread_mutex_unlock(uploadMutex);
  172 +
  173 + pthread_mutex_lock(uploadMutex);
  174 + a.upload(m);
  175 + b.upload(m);
  176 + pthread_mutex_unlock(uploadMutex);
  177 +
  178 + // resize the mats
  179 + //if (m.size() != srcGpuMat->size()) {
  180 + // printf("resizing...\n");
  181 + // srcGpuMat->release(); dstGpuMat->release();
  182 + // srcGpuMat->create(m.size(), CV_8UC1); dstGpuMat->create(m.size(), CV_8UC1);
  183 + //}
  184 +
  185 + // copy the data to the GPU
  186 + //srcGpuMat->upload(m);
  187 +
  188 + // call the kernel function
  189 + //br::cuda::cudalbp_wrapper(*srcGpuMat, *dstGpuMat, lutGpuPtr);
  190 + pthread_mutex_lock(uploadMutex);
  191 + br::cuda::cudalbp_wrapper(a, b, lutGpuPtr);
  192 + pthread_mutex_unlock(uploadMutex);
  193 +
  194 + // download the result to the destination
  195 + //dstGpuMat->download(dst.m());
  196 + pthread_mutex_lock(uploadMutex);
  197 + b.download(dst.m());
  198 + pthread_mutex_unlock(uploadMutex);
  199 +
  200 + pthread_mutex_lock(uploadMutex);
  201 + a.release();
  202 + b.release();
  203 + pthread_mutex_unlock(uploadMutex);
121 } 204 }
122 }; 205 };
123 206
openbr/plugins/cuda/cudalbp.cu
@@ -51,9 +51,9 @@ namespace br { namespace cuda { @@ -51,9 +51,9 @@ namespace br { namespace cuda {
51 dim3 numBlocks(imageWidth/threadsPerBlock.x + 1, 51 dim3 numBlocks(imageWidth/threadsPerBlock.x + 1,
52 imageHeight/threadsPerBlock.y + 1); 52 imageHeight/threadsPerBlock.y + 1);
53 53
54 - printf("Src Image Dimesions:\n\trows: %d\tcols: %d\n", src.rows, src.cols);  
55 - printf("Dst Image Dimesions:\n\trows: %d\tcols: %d\n", dst.rows, dst.cols);  
56 - printf("Running CUDALBP\nBlock Dimensions:\n\tx: %d\ty: %d\n", numBlocks.x, numBlocks.y); 54 + //printf("Src Image Dimesions:\n\trows: %d\tcols: %d\n", src.rows, src.cols);
  55 + //printf("Dst Image Dimesions:\n\trows: %d\tcols: %d\n", dst.rows, dst.cols);
  56 + //printf("Running CUDALBP\nBlock Dimensions:\n\tx: %d\ty: %d\n", numBlocks.x, numBlocks.y);
57 57
58 cudalbp_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageHeight, imageWidth, lut); 58 cudalbp_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageHeight, imageWidth, lut);
59 } 59 }
openbr/plugins/cuda/passthrough.cpp
@@ -10,28 +10,6 @@ using namespace cv::gpu; @@ -10,28 +10,6 @@ using namespace cv::gpu;
10 10
11 #include <iostream> 11 #include <iostream>
12 12
13 -string type2str(int type) {  
14 - string r;  
15 -  
16 - uchar depth = type & CV_MAT_DEPTH_MASK;  
17 - uchar chans = 1 + (type >> CV_CN_SHIFT);  
18 -  
19 - switch ( depth ) {  
20 - case CV_8U: r = "8U"; break;  
21 - case CV_8S: r = "8S"; break;  
22 - case CV_16U: r = "16U"; break;  
23 - case CV_16S: r = "16S"; break;  
24 - case CV_32S: r = "32S"; break;  
25 - case CV_32F: r = "32F"; break;  
26 - case CV_64F: r = "64F"; break;  
27 - default: r = "User"; break;  
28 - }  
29 -  
30 - r += "C";  
31 - r += (chans+'0');  
32 -  
33 - return r;  
34 -}  
35 13
36 namespace br 14 namespace br
37 { 15 {
@@ -54,6 +32,11 @@ private: @@ -54,6 +32,11 @@ private:
54 dstGpuMat.download(dst.m()); 32 dstGpuMat.download(dst.m());
55 33
56 // TODO(colin): add delete code 34 // TODO(colin): add delete code
  35 + srcGpuMat.release();
  36 + dstGpuMat.release();
  37 +
  38 + printf("srcGpuMat empty: %d\n", (int)srcGpuMat.empty());
  39 + printf("dstGpuMat empty: %d\n", (int)srcGpuMat.empty());
57 } 40 }
58 }; 41 };
59 42