Commit f7cd4d52db6b3feeba40439c677d3b45340fa929
1 parent
cc916f3d
fixed the threading problem by adding locks
Showing
3 changed files
with
103 additions
and
37 deletions
openbr/plugins/cuda/cudalbp.cpp
| @@ -14,6 +14,16 @@ | @@ -14,6 +14,16 @@ | ||
| 14 | * limitations under the License. * | 14 | * limitations under the License. * |
| 15 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ | 15 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ |
| 16 | 16 | ||
| 17 | +#include <iostream> | ||
| 18 | +//#include <thread> | ||
| 19 | +//#include <mutex> | ||
| 20 | + | ||
| 21 | +#include <sys/types.h> | ||
| 22 | +#include <unistd.h> | ||
| 23 | + | ||
| 24 | +#include <pthread.h> | ||
| 25 | + | ||
| 26 | +#include <opencv2/opencv.hpp> | ||
| 17 | #include <opencv2/imgproc/imgproc.hpp> | 27 | #include <opencv2/imgproc/imgproc.hpp> |
| 18 | #include <opencv2/imgproc/imgproc_c.h> | 28 | #include <opencv2/imgproc/imgproc_c.h> |
| 19 | #include <opencv2/highgui/highgui.hpp> | 29 | #include <opencv2/highgui/highgui.hpp> |
| @@ -26,6 +36,32 @@ | @@ -26,6 +36,32 @@ | ||
| 26 | 36 | ||
| 27 | using namespace cv; | 37 | using namespace cv; |
| 28 | 38 | ||
| 39 | +string type2str(int type) { | ||
| 40 | + string r; | ||
| 41 | + | ||
| 42 | + uchar depth = type & CV_MAT_DEPTH_MASK; | ||
| 43 | + uchar chans = 1 + (type >> CV_CN_SHIFT); | ||
| 44 | + | ||
| 45 | + switch ( depth ) { | ||
| 46 | + case CV_8U: r = "8U"; break; | ||
| 47 | + case CV_8S: r = "8S"; break; | ||
| 48 | + case CV_16U: r = "16U"; break; | ||
| 49 | + case CV_16S: r = "16S"; break; | ||
| 50 | + case CV_32S: r = "32S"; break; | ||
| 51 | + case CV_32F: r = "32F"; break; | ||
| 52 | + case CV_64F: r = "64F"; break; | ||
| 53 | + default: r = "User"; break; | ||
| 54 | + } | ||
| 55 | + | ||
| 56 | + r += "C"; | ||
| 57 | + r += (chans+'0'); | ||
| 58 | + | ||
| 59 | + return r; | ||
| 60 | +} | ||
| 61 | + | ||
| 62 | +int ctr = 0; | ||
| 63 | +pthread_mutex_t* uploadMutex = NULL; | ||
| 64 | + | ||
| 29 | namespace br | 65 | namespace br |
| 30 | { | 66 | { |
| 31 | 67 | ||
| @@ -47,10 +83,15 @@ class CUDALBPTransform : public UntrainableTransform | @@ -47,10 +83,15 @@ class CUDALBPTransform : public UntrainableTransform | ||
| 47 | BR_PROPERTY(int, maxTransitions, 8) | 83 | BR_PROPERTY(int, maxTransitions, 8) |
| 48 | BR_PROPERTY(bool, rotationInvariant, false) | 84 | BR_PROPERTY(bool, rotationInvariant, false) |
| 49 | 85 | ||
| 86 | + private: | ||
| 50 | uchar lut[256]; | 87 | uchar lut[256]; |
| 51 | uint8_t* lutGpuPtr; | 88 | uint8_t* lutGpuPtr; |
| 52 | uchar null; | 89 | uchar null; |
| 53 | 90 | ||
| 91 | + //std::mutex uploadMutex; | ||
| 92 | + pthread_mutex_t* uploadMutex; | ||
| 93 | + | ||
| 94 | + public: | ||
| 54 | /* Returns the number of 0->1 or 1->0 transitions in i */ | 95 | /* Returns the number of 0->1 or 1->0 transitions in i */ |
| 55 | static int numTransitions(int i) | 96 | static int numTransitions(int i) |
| 56 | { | 97 | { |
| @@ -100,24 +141,66 @@ class CUDALBPTransform : public UntrainableTransform | @@ -100,24 +141,66 @@ class CUDALBPTransform : public UntrainableTransform | ||
| 100 | if (!set[i]) | 141 | if (!set[i]) |
| 101 | lut[i] = null; // Set to null id | 142 | lut[i] = null; // Set to null id |
| 102 | 143 | ||
| 144 | + | ||
| 103 | // copy lut over to the GPU | 145 | // copy lut over to the GPU |
| 104 | br::cuda::cudalbp_init_wrapper(lut, &lutGpuPtr); | 146 | br::cuda::cudalbp_init_wrapper(lut, &lutGpuPtr); |
| 147 | + | ||
| 148 | + // initialize the mutex | ||
| 149 | + std::cout << "STARING EVERYTHING" << std::endl<< std::flush; | ||
| 150 | + if (uploadMutex == NULL) { | ||
| 151 | + uploadMutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t)); | ||
| 152 | + pthread_mutex_init(uploadMutex, NULL); | ||
| 153 | + } | ||
| 105 | } | 154 | } |
| 106 | 155 | ||
| 107 | void project(const Template &src, Template &dst) const | 156 | void project(const Template &src, Template &dst) const |
| 108 | { | 157 | { |
| 109 | - // assume we are using an 8-bit 1 channel image | ||
| 110 | - GpuMat srcGpuMat, dstGpuMat; | ||
| 111 | - | ||
| 112 | - // copy the data to the GPU | ||
| 113 | - srcGpuMat.upload(src.m()); | ||
| 114 | - dstGpuMat.upload(src.m()); | ||
| 115 | - | ||
| 116 | - // call the kernel function | ||
| 117 | - br::cuda::cudalbp_wrapper(srcGpuMat, dstGpuMat, lutGpuPtr); | ||
| 118 | - | ||
| 119 | - // download the result | ||
| 120 | - dstGpuMat.download(dst.m()); | 158 | + int myCtr = ctr++; |
| 159 | + GpuMat a, b; | ||
| 160 | + const Mat& m = src.m(); | ||
| 161 | + | ||
| 162 | + std::cout << "PID: " << getpid() << std::endl << std::flush; | ||
| 163 | + | ||
| 164 | + //std::cout << "START: " << myCtr << std::endl << std::flush; | ||
| 165 | + | ||
| 166 | + | ||
| 167 | + //std::cout << "Image type: " << type2str(m.type()) << std::endl << std::flush; | ||
| 168 | + pthread_mutex_lock(uploadMutex); | ||
| 169 | + a.create(m.size(), m.type()); | ||
| 170 | + b.create(m.size(), m.type()); | ||
| 171 | + pthread_mutex_unlock(uploadMutex); | ||
| 172 | + | ||
| 173 | + pthread_mutex_lock(uploadMutex); | ||
| 174 | + a.upload(m); | ||
| 175 | + b.upload(m); | ||
| 176 | + pthread_mutex_unlock(uploadMutex); | ||
| 177 | + | ||
| 178 | + // resize the mats | ||
| 179 | + //if (m.size() != srcGpuMat->size()) { | ||
| 180 | + // printf("resizing...\n"); | ||
| 181 | + // srcGpuMat->release(); dstGpuMat->release(); | ||
| 182 | + // srcGpuMat->create(m.size(), CV_8UC1); dstGpuMat->create(m.size(), CV_8UC1); | ||
| 183 | + //} | ||
| 184 | + | ||
| 185 | + // copy the data to the GPU | ||
| 186 | + //srcGpuMat->upload(m); | ||
| 187 | + | ||
| 188 | + // call the kernel function | ||
| 189 | + //br::cuda::cudalbp_wrapper(*srcGpuMat, *dstGpuMat, lutGpuPtr); | ||
| 190 | + pthread_mutex_lock(uploadMutex); | ||
| 191 | + br::cuda::cudalbp_wrapper(a, b, lutGpuPtr); | ||
| 192 | + pthread_mutex_unlock(uploadMutex); | ||
| 193 | + | ||
| 194 | + // download the result to the destination | ||
| 195 | + //dstGpuMat->download(dst.m()); | ||
| 196 | + pthread_mutex_lock(uploadMutex); | ||
| 197 | + b.download(dst.m()); | ||
| 198 | + pthread_mutex_unlock(uploadMutex); | ||
| 199 | + | ||
| 200 | + pthread_mutex_lock(uploadMutex); | ||
| 201 | + a.release(); | ||
| 202 | + b.release(); | ||
| 203 | + pthread_mutex_unlock(uploadMutex); | ||
| 121 | } | 204 | } |
| 122 | }; | 205 | }; |
| 123 | 206 |
openbr/plugins/cuda/cudalbp.cu
| @@ -51,9 +51,9 @@ namespace br { namespace cuda { | @@ -51,9 +51,9 @@ namespace br { namespace cuda { | ||
| 51 | dim3 numBlocks(imageWidth/threadsPerBlock.x + 1, | 51 | dim3 numBlocks(imageWidth/threadsPerBlock.x + 1, |
| 52 | imageHeight/threadsPerBlock.y + 1); | 52 | imageHeight/threadsPerBlock.y + 1); |
| 53 | 53 | ||
| 54 | - printf("Src Image Dimesions:\n\trows: %d\tcols: %d\n", src.rows, src.cols); | ||
| 55 | - printf("Dst Image Dimesions:\n\trows: %d\tcols: %d\n", dst.rows, dst.cols); | ||
| 56 | - printf("Running CUDALBP\nBlock Dimensions:\n\tx: %d\ty: %d\n", numBlocks.x, numBlocks.y); | 54 | + //printf("Src Image Dimesions:\n\trows: %d\tcols: %d\n", src.rows, src.cols); |
| 55 | + //printf("Dst Image Dimesions:\n\trows: %d\tcols: %d\n", dst.rows, dst.cols); | ||
| 56 | + //printf("Running CUDALBP\nBlock Dimensions:\n\tx: %d\ty: %d\n", numBlocks.x, numBlocks.y); | ||
| 57 | 57 | ||
| 58 | cudalbp_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageHeight, imageWidth, lut); | 58 | cudalbp_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageHeight, imageWidth, lut); |
| 59 | } | 59 | } |
openbr/plugins/cuda/passthrough.cpp
| @@ -10,28 +10,6 @@ using namespace cv::gpu; | @@ -10,28 +10,6 @@ using namespace cv::gpu; | ||
| 10 | 10 | ||
| 11 | #include <iostream> | 11 | #include <iostream> |
| 12 | 12 | ||
| 13 | -string type2str(int type) { | ||
| 14 | - string r; | ||
| 15 | - | ||
| 16 | - uchar depth = type & CV_MAT_DEPTH_MASK; | ||
| 17 | - uchar chans = 1 + (type >> CV_CN_SHIFT); | ||
| 18 | - | ||
| 19 | - switch ( depth ) { | ||
| 20 | - case CV_8U: r = "8U"; break; | ||
| 21 | - case CV_8S: r = "8S"; break; | ||
| 22 | - case CV_16U: r = "16U"; break; | ||
| 23 | - case CV_16S: r = "16S"; break; | ||
| 24 | - case CV_32S: r = "32S"; break; | ||
| 25 | - case CV_32F: r = "32F"; break; | ||
| 26 | - case CV_64F: r = "64F"; break; | ||
| 27 | - default: r = "User"; break; | ||
| 28 | - } | ||
| 29 | - | ||
| 30 | - r += "C"; | ||
| 31 | - r += (chans+'0'); | ||
| 32 | - | ||
| 33 | - return r; | ||
| 34 | -} | ||
| 35 | 13 | ||
| 36 | namespace br | 14 | namespace br |
| 37 | { | 15 | { |
| @@ -54,6 +32,11 @@ private: | @@ -54,6 +32,11 @@ private: | ||
| 54 | dstGpuMat.download(dst.m()); | 32 | dstGpuMat.download(dst.m()); |
| 55 | 33 | ||
| 56 | // TODO(colin): add delete code | 34 | // TODO(colin): add delete code |
| 35 | + srcGpuMat.release(); | ||
| 36 | + dstGpuMat.release(); | ||
| 37 | + | ||
| 38 | + printf("srcGpuMat empty: %d\n", (int)srcGpuMat.empty()); | ||
| 39 | + printf("dstGpuMat empty: %d\n", (int)srcGpuMat.empty()); | ||
| 57 | } | 40 | } |
| 58 | }; | 41 | }; |
| 59 | 42 |