Commit f7cd4d52db6b3feeba40439c677d3b45340fa929
1 parent
cc916f3d
fixed the threading problem by adding locks
Showing
3 changed files
with
103 additions
and
37 deletions
openbr/plugins/cuda/cudalbp.cpp
| ... | ... | @@ -14,6 +14,16 @@ |
| 14 | 14 | * limitations under the License. * |
| 15 | 15 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ |
| 16 | 16 | |
| 17 | +#include <iostream> | |
| 18 | +//#include <thread> | |
| 19 | +//#include <mutex> | |
| 20 | + | |
| 21 | +#include <sys/types.h> | |
| 22 | +#include <unistd.h> | |
| 23 | + | |
| 24 | +#include <pthread.h> | |
| 25 | + | |
| 26 | +#include <opencv2/opencv.hpp> | |
| 17 | 27 | #include <opencv2/imgproc/imgproc.hpp> |
| 18 | 28 | #include <opencv2/imgproc/imgproc_c.h> |
| 19 | 29 | #include <opencv2/highgui/highgui.hpp> |
| ... | ... | @@ -26,6 +36,32 @@ |
| 26 | 36 | |
| 27 | 37 | using namespace cv; |
| 28 | 38 | |
| 39 | +string type2str(int type) { | |
| 40 | + string r; | |
| 41 | + | |
| 42 | + uchar depth = type & CV_MAT_DEPTH_MASK; | |
| 43 | + uchar chans = 1 + (type >> CV_CN_SHIFT); | |
| 44 | + | |
| 45 | + switch ( depth ) { | |
| 46 | + case CV_8U: r = "8U"; break; | |
| 47 | + case CV_8S: r = "8S"; break; | |
| 48 | + case CV_16U: r = "16U"; break; | |
| 49 | + case CV_16S: r = "16S"; break; | |
| 50 | + case CV_32S: r = "32S"; break; | |
| 51 | + case CV_32F: r = "32F"; break; | |
| 52 | + case CV_64F: r = "64F"; break; | |
| 53 | + default: r = "User"; break; | |
| 54 | + } | |
| 55 | + | |
| 56 | + r += "C"; | |
| 57 | + r += (chans+'0'); | |
| 58 | + | |
| 59 | + return r; | |
| 60 | +} | |
| 61 | + | |
| 62 | +int ctr = 0; | |
| 63 | +pthread_mutex_t* uploadMutex = NULL; | |
| 64 | + | |
| 29 | 65 | namespace br |
| 30 | 66 | { |
| 31 | 67 | |
| ... | ... | @@ -47,10 +83,15 @@ class CUDALBPTransform : public UntrainableTransform |
| 47 | 83 | BR_PROPERTY(int, maxTransitions, 8) |
| 48 | 84 | BR_PROPERTY(bool, rotationInvariant, false) |
| 49 | 85 | |
| 86 | + private: | |
| 50 | 87 | uchar lut[256]; |
| 51 | 88 | uint8_t* lutGpuPtr; |
| 52 | 89 | uchar null; |
| 53 | 90 | |
| 91 | + //std::mutex uploadMutex; | |
| 92 | + pthread_mutex_t* uploadMutex; | |
| 93 | + | |
| 94 | + public: | |
| 54 | 95 | /* Returns the number of 0->1 or 1->0 transitions in i */ |
| 55 | 96 | static int numTransitions(int i) |
| 56 | 97 | { |
| ... | ... | @@ -100,24 +141,66 @@ class CUDALBPTransform : public UntrainableTransform |
| 100 | 141 | if (!set[i]) |
| 101 | 142 | lut[i] = null; // Set to null id |
| 102 | 143 | |
| 144 | + | |
| 103 | 145 | // copy lut over to the GPU |
| 104 | 146 | br::cuda::cudalbp_init_wrapper(lut, &lutGpuPtr); |
| 147 | + | |
| 148 | + // initialize the mutex | |
| 149 | + std::cout << "STARING EVERYTHING" << std::endl<< std::flush; | |
| 150 | + if (uploadMutex == NULL) { | |
| 151 | + uploadMutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t)); | |
| 152 | + pthread_mutex_init(uploadMutex, NULL); | |
| 153 | + } | |
| 105 | 154 | } |
| 106 | 155 | |
| 107 | 156 | void project(const Template &src, Template &dst) const |
| 108 | 157 | { |
| 109 | - // assume we are using an 8-bit 1 channel image | |
| 110 | - GpuMat srcGpuMat, dstGpuMat; | |
| 111 | - | |
| 112 | - // copy the data to the GPU | |
| 113 | - srcGpuMat.upload(src.m()); | |
| 114 | - dstGpuMat.upload(src.m()); | |
| 115 | - | |
| 116 | - // call the kernel function | |
| 117 | - br::cuda::cudalbp_wrapper(srcGpuMat, dstGpuMat, lutGpuPtr); | |
| 118 | - | |
| 119 | - // download the result | |
| 120 | - dstGpuMat.download(dst.m()); | |
| 158 | + int myCtr = ctr++; | |
| 159 | + GpuMat a, b; | |
| 160 | + const Mat& m = src.m(); | |
| 161 | + | |
| 162 | + std::cout << "PID: " << getpid() << std::endl << std::flush; | |
| 163 | + | |
| 164 | + //std::cout << "START: " << myCtr << std::endl << std::flush; | |
| 165 | + | |
| 166 | + | |
| 167 | + //std::cout << "Image type: " << type2str(m.type()) << std::endl << std::flush; | |
| 168 | + pthread_mutex_lock(uploadMutex); | |
| 169 | + a.create(m.size(), m.type()); | |
| 170 | + b.create(m.size(), m.type()); | |
| 171 | + pthread_mutex_unlock(uploadMutex); | |
| 172 | + | |
| 173 | + pthread_mutex_lock(uploadMutex); | |
| 174 | + a.upload(m); | |
| 175 | + b.upload(m); | |
| 176 | + pthread_mutex_unlock(uploadMutex); | |
| 177 | + | |
| 178 | + // resize the mats | |
| 179 | + //if (m.size() != srcGpuMat->size()) { | |
| 180 | + // printf("resizing...\n"); | |
| 181 | + // srcGpuMat->release(); dstGpuMat->release(); | |
| 182 | + // srcGpuMat->create(m.size(), CV_8UC1); dstGpuMat->create(m.size(), CV_8UC1); | |
| 183 | + //} | |
| 184 | + | |
| 185 | + // copy the data to the GPU | |
| 186 | + //srcGpuMat->upload(m); | |
| 187 | + | |
| 188 | + // call the kernel function | |
| 189 | + //br::cuda::cudalbp_wrapper(*srcGpuMat, *dstGpuMat, lutGpuPtr); | |
| 190 | + pthread_mutex_lock(uploadMutex); | |
| 191 | + br::cuda::cudalbp_wrapper(a, b, lutGpuPtr); | |
| 192 | + pthread_mutex_unlock(uploadMutex); | |
| 193 | + | |
| 194 | + // download the result to the destination | |
| 195 | + //dstGpuMat->download(dst.m()); | |
| 196 | + pthread_mutex_lock(uploadMutex); | |
| 197 | + b.download(dst.m()); | |
| 198 | + pthread_mutex_unlock(uploadMutex); | |
| 199 | + | |
| 200 | + pthread_mutex_lock(uploadMutex); | |
| 201 | + a.release(); | |
| 202 | + b.release(); | |
| 203 | + pthread_mutex_unlock(uploadMutex); | |
| 121 | 204 | } |
| 122 | 205 | }; |
| 123 | 206 | ... | ... |
openbr/plugins/cuda/cudalbp.cu
| ... | ... | @@ -51,9 +51,9 @@ namespace br { namespace cuda { |
| 51 | 51 | dim3 numBlocks(imageWidth/threadsPerBlock.x + 1, |
| 52 | 52 | imageHeight/threadsPerBlock.y + 1); |
| 53 | 53 | |
| 54 | - printf("Src Image Dimesions:\n\trows: %d\tcols: %d\n", src.rows, src.cols); | |
| 55 | - printf("Dst Image Dimesions:\n\trows: %d\tcols: %d\n", dst.rows, dst.cols); | |
| 56 | - printf("Running CUDALBP\nBlock Dimensions:\n\tx: %d\ty: %d\n", numBlocks.x, numBlocks.y); | |
| 54 | + //printf("Src Image Dimesions:\n\trows: %d\tcols: %d\n", src.rows, src.cols); | |
| 55 | + //printf("Dst Image Dimesions:\n\trows: %d\tcols: %d\n", dst.rows, dst.cols); | |
| 56 | + //printf("Running CUDALBP\nBlock Dimensions:\n\tx: %d\ty: %d\n", numBlocks.x, numBlocks.y); | |
| 57 | 57 | |
| 58 | 58 | cudalbp_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageHeight, imageWidth, lut); |
| 59 | 59 | } | ... | ... |
openbr/plugins/cuda/passthrough.cpp
| ... | ... | @@ -10,28 +10,6 @@ using namespace cv::gpu; |
| 10 | 10 | |
| 11 | 11 | #include <iostream> |
| 12 | 12 | |
| 13 | -string type2str(int type) { | |
| 14 | - string r; | |
| 15 | - | |
| 16 | - uchar depth = type & CV_MAT_DEPTH_MASK; | |
| 17 | - uchar chans = 1 + (type >> CV_CN_SHIFT); | |
| 18 | - | |
| 19 | - switch ( depth ) { | |
| 20 | - case CV_8U: r = "8U"; break; | |
| 21 | - case CV_8S: r = "8S"; break; | |
| 22 | - case CV_16U: r = "16U"; break; | |
| 23 | - case CV_16S: r = "16S"; break; | |
| 24 | - case CV_32S: r = "32S"; break; | |
| 25 | - case CV_32F: r = "32F"; break; | |
| 26 | - case CV_64F: r = "64F"; break; | |
| 27 | - default: r = "User"; break; | |
| 28 | - } | |
| 29 | - | |
| 30 | - r += "C"; | |
| 31 | - r += (chans+'0'); | |
| 32 | - | |
| 33 | - return r; | |
| 34 | -} | |
| 35 | 13 | |
| 36 | 14 | namespace br |
| 37 | 15 | { |
| ... | ... | @@ -54,6 +32,11 @@ private: |
| 54 | 32 | dstGpuMat.download(dst.m()); |
| 55 | 33 | |
| 56 | 34 | // TODO(colin): add delete code |
| 35 | + srcGpuMat.release(); | |
| 36 | + dstGpuMat.release(); | |
| 37 | + | |
| 38 | + printf("srcGpuMat empty: %d\n", (int)srcGpuMat.empty()); | |
| 39 | + printf("dstGpuMat empty: %d\n", (int)srcGpuMat.empty()); | |
| 57 | 40 | } |
| 58 | 41 | }; |
| 59 | 42 | ... | ... |