Commit aaabb3cbe5738cd9f9418c4b6b2ddb78c5937559
1 parent
bdfd554c
fixed copyto and copyfrom functions to be thread safe
Showing
4 changed files
with
61 additions
and
38 deletions
openbr/plugins/cuda/copyfrom.cpp
| 1 | 1 | #include <iostream> |
| 2 | 2 | |
| 3 | 3 | #include <opencv2/opencv.hpp> |
| 4 | -#include <opencv2/gpu/gpu.hpp> | |
| 5 | 4 | |
| 6 | 5 | #include <openbr/plugins/openbr_internal.h> |
| 7 | 6 | |
| 8 | 7 | using namespace std; |
| 9 | 8 | |
| 10 | 9 | using namespace cv; |
| 11 | -using namespace cv::gpu; | |
| 12 | 10 | |
| 11 | +// extern CUDA declaration | |
| 12 | +namespace br { namespace cuda { namespace cudacopyfrom { | |
| 13 | + //template <typename T> void wrapper(void* src, T* out, int rows, int cols) { | |
| 14 | + void wrapper(void* src, unsigned char* out, const int rows, const int cols); | |
| 15 | +}}} | |
| 13 | 16 | |
| 14 | 17 | namespace br |
| 15 | 18 | { |
| ... | ... | @@ -20,20 +23,16 @@ namespace br |
| 20 | 23 | private: |
| 21 | 24 | void project(const Template &src, Template &dst) const |
| 22 | 25 | { |
| 23 | - // reassemble the integer and then build pointer to it | |
| 24 | - uint64_t gpuMatInt = (((uint64_t)src.m().at<int>(1,0)) << (uint64_t)32) + ((uint64_t)src.m().at<int>(0,0)); | |
| 25 | - GpuMat* gpuMat = (GpuMat*)gpuMatInt; | |
| 26 | + // pull the data back out of the Mat | |
| 27 | + void* const* dataPtr = src.m().ptr<void*>(); | |
| 28 | + void* cudaMemPtr = dataPtr[0]; | |
| 29 | + int rows = *((int*)dataPtr[1]); | |
| 30 | + int cols = *((int*)dataPtr[2]); | |
| 31 | + int type = *((int*)dataPtr[3]); | |
| 26 | 32 | |
| 27 | - printf("gpuMatInt: %li\n", gpuMatInt); | |
| 28 | - printf("m.at(0,0): %i\nm.at(1,0): %i\n", src.m().at<int>(0,0), src.m().at<int>(1,0)); | |
| 33 | + dst = Mat(rows, cols, type); | |
| 29 | 34 | |
| 30 | - // download the data back into the destination | |
| 31 | - Size size = gpuMat->size(); | |
| 32 | - Mat out = Mat(size.height, size.width, gpuMat->depth()); | |
| 33 | - | |
| 34 | - gpuMat->download(out); | |
| 35 | - | |
| 36 | - dst = out; | |
| 35 | + br::cuda::cudacopyfrom::wrapper(cudaMemPtr, dst.m().ptr<unsigned char>(), rows, cols); | |
| 37 | 36 | } |
| 38 | 37 | }; |
| 39 | 38 | ... | ... |
openbr/plugins/cuda/copyfrom.cu
0 → 100644
| 1 | +namespace br { namespace cuda { namespace cudacopyfrom { | |
| 2 | + //template <typename T> void wrapper(void* src, T* out, int rows, int cols) { | |
| 3 | + void wrapper(void* src, unsigned char* out, const int rows, const int cols) { | |
| 4 | + cudaMemcpy(out, src, rows*cols*sizeof(unsigned char), cudaMemcpyDeviceToHost); | |
| 5 | + cudaFree(src); | |
| 6 | + } | |
| 7 | +}}} | ... | ... |
openbr/plugins/cuda/copyto.cpp
| 1 | 1 | #include <iostream> |
| 2 | 2 | |
| 3 | 3 | #include <opencv2/opencv.hpp> |
| 4 | -#include <opencv2/gpu/gpu.hpp> | |
| 5 | 4 | |
| 6 | 5 | #include <openbr/plugins/openbr_internal.h> |
| 7 | 6 | |
| 8 | 7 | using namespace std; |
| 9 | 8 | |
| 10 | 9 | using namespace cv; |
| 11 | -using namespace cv::gpu; | |
| 10 | + | |
| 11 | +extern string type2str(int type); | |
| 12 | + | |
| 13 | +namespace br { namespace cuda { namespace cudacopyto { | |
| 14 | + //template<typename T> | |
| 15 | + //void wrapper(const T* in, void** out, const int rows, const int cols); | |
| 16 | + void wrapper(const unsigned char* in, void** out, const int rows, const int cols); | |
| 17 | +}}} | |
| 12 | 18 | |
| 13 | 19 | namespace br |
| 14 | 20 | { |
| ... | ... | @@ -19,32 +25,35 @@ namespace br |
| 19 | 25 | private: |
| 20 | 26 | void project(const Template &src, Template &dst) const |
| 21 | 27 | { |
| 22 | - // get the mat to send to the GPU | |
| 23 | - GpuMat* gpuMat = new GpuMat; | |
| 24 | - | |
| 25 | - try | |
| 26 | - { | |
| 27 | - // copy the contents to the GPU | |
| 28 | - gpuMat->upload(src.m()); | |
| 28 | + const Mat& srcMat = src.m(); | |
| 29 | + const int rows = srcMat.rows; | |
| 30 | + const int cols = srcMat.cols; | |
| 31 | + | |
| 32 | + void* cudaMemPtr; | |
| 33 | + switch(srcMat.type()) { | |
| 34 | + //case CV_32FC1: | |
| 35 | + // br::cuda::cudacopyfrom::wrapper<float>(srcMat.ptr<float>(), &cudaMemPtr, rows, cols); | |
| 36 | + // break; | |
| 37 | + case CV_8UC1: | |
| 38 | + //br::cuda::cudacopyfrom::wrapper<unsigned char>(srcMat.ptr<unsigned char>(), &cudaMemPtr, rows, cols); | |
| 39 | + br::cuda::cudacopyto::wrapper(srcMat.ptr<unsigned char>(), &cudaMemPtr, rows, cols); | |
| 40 | + break; | |
| 41 | + default: | |
| 42 | + cout << "ERR: Invalid image type! " << type2str(srcMat.type()) << endl; | |
| 43 | + return; | |
| 29 | 44 | } |
| 30 | - catch(const cv::Exception& ex) | |
| 31 | - { | |
| 32 | - cout << "Error: " << ex.what() << endl; | |
| 33 | - } | |
| 34 | - | |
| 35 | - // now create a new Mat that contains the 64-bit pointer | |
| 36 | - Mat m = Mat(2, 1, CV_32S); | |
| 37 | 45 | |
| 38 | - // pointer magic | |
| 39 | - uint64_t gpuMatInt = (uint64_t)gpuMat; | |
| 40 | - m.at<int>(0,0) = (int32_t)(gpuMatInt & 0x00000000FFFFFFFF); | |
| 41 | - m.at<int>(1,0) = (int32_t)((gpuMatInt & 0xFFFFFFFF00000000) >> (uint64_t)32); | |
| 46 | + // output will be a single pointer to graphics card memory | |
| 47 | + Mat dstMat = Mat(4, 1, DataType<void*>::type); | |
| 48 | + void** dstMatData = dstMat.ptr<void*>(); | |
| 42 | 49 | |
| 43 | - printf("gpuMatInt: %li\n", gpuMatInt); | |
| 44 | - printf("m.at(0,0): %i\nm.at(1,0): %i\n", m.at<int>(0,0), m.at<int>(1,0)); | |
| 50 | + // save cuda ptr, rows, cols, then type | |
| 51 | + dstMatData[0] = cudaMemPtr; | |
| 52 | + dstMatData[1] = new int; *((int*)dstMatData[1]) = rows; | |
| 53 | + dstMatData[2] = new int; *((int*)dstMatData[2]) = cols; | |
| 54 | + dstMatData[3] = new int; *((int*)dstMatData[3]) = srcMat.type(); | |
| 45 | 55 | |
| 46 | - // save away in the destination mat | |
| 47 | - dst += m; | |
| 56 | + dst = dstMat; | |
| 48 | 57 | } |
| 49 | 58 | }; |
| 50 | 59 | ... | ... |
openbr/plugins/cuda/copyto.cu
0 → 100644
| 1 | +namespace br { namespace cuda { namespace cudacopyto { | |
| 2 | + //template<typename T> | |
| 3 | + //void wrapper(const T* in, void** out, const int rows, const int cols) { | |
| 4 | + void wrapper(const unsigned char* in, void** out, const int rows, const int cols) { | |
| 5 | + cudaMalloc(out, rows*cols*sizeof(unsigned char)); | |
| 6 | + cudaMemcpy(*out, in, rows*cols*sizeof(unsigned char), cudaMemcpyHostToDevice); | |
| 7 | + } | |
| 8 | +}}} | ... | ... |