Commit c298e0f66bca94c7c33ebd190b1c90444fb0f833
1 parent
e6a8cd8f
added float conversion plugin
Showing
3 changed files
with
88 additions
and
0 deletions
openbr/plugins/cuda/cudacvtfloat.cpp
0 โ 100644
| 1 | +#include <iostream> | |
| 2 | +#include <unistd.h> | |
| 3 | +using namespace std; | |
| 4 | + | |
| 5 | +#include <opencv2/opencv.hpp> | |
| 6 | +using namespace cv; | |
| 7 | + | |
| 8 | +#include <openbr/plugins/openbr_internal.h> | |
| 9 | + | |
| 10 | +#include "cudacvtfloat.hpp" | |
| 11 | + | |
| 12 | +namespace br | |
| 13 | +{ | |
| 14 | + | |
| 15 | +/*! | |
| 16 | + * \ingroup transforms | |
| 17 | + * \brief Converts byte to floating point | |
| 18 | + * \author Colin Heinzmann \cite DepthDeluxe | |
| 19 | + */ | |
| 20 | +class CUDACvtFloatTransform : public UntrainableTransform | |
| 21 | +{ | |
| 22 | + Q_OBJECT | |
| 23 | + | |
| 24 | + public: | |
| 25 | + void project(const Template &src, Template &dst) const | |
| 26 | + { | |
| 27 | + // assume the image type is 256-monochrome | |
| 28 | + // TODO(colin): real exception handling | |
| 29 | + if (src.m().type() != CV_8UC1) { | |
| 30 | + cout << "ERR: Invalid memory format" << endl; | |
| 31 | + return; | |
| 32 | + } | |
| 33 | + | |
| 34 | + | |
| 35 | + int rows = src.m().rows; | |
| 36 | + int cols = src.m().cols; | |
| 37 | + | |
| 38 | + dst = Mat(rows, cols, CV_32FC1); | |
| 39 | + | |
| 40 | + br::cuda::cudacvtfloat::wrapper((const unsigned char*)src.m().ptr<unsigned char>(), dst.m().ptr<float>(), rows, cols); | |
| 41 | + } | |
| 42 | +}; | |
| 43 | + | |
| 44 | +BR_REGISTER(Transform, CUDACvtFloatTransform) | |
| 45 | + | |
| 46 | +} // namespace br | |
| 47 | + | |
| 48 | +#include "cuda/cudacvtfloat.moc" | ... | ... |
openbr/plugins/cuda/cudacvtfloat.cu
0 โ 100644
| 1 | +namespace br { namespace cuda { namespace cudacvtfloat { | |
| 2 | + | |
| 3 | + __global__ void kernel(const unsigned char* src, float* dst, int rows, int cols) { | |
| 4 | + // get my index | |
| 5 | + int rowInd = blockIdx.y*blockDim.y + threadIdx.y; | |
| 6 | + int colInd = blockIdx.x*blockDim.x + threadIdx.x; | |
| 7 | + | |
| 8 | + // bounds check | |
| 9 | + if (rowInd >= rows || colInd >= cols) { | |
| 10 | + return; | |
| 11 | + } | |
| 12 | + | |
| 13 | + int index = rowInd*cols + colInd; | |
| 14 | + dst[index] = (float)src[index]; | |
| 15 | + } | |
| 16 | + | |
| 17 | + void wrapper(const unsigned char* src, float* dst, int rows, int cols) { | |
| 18 | + unsigned char* cudaSrc; | |
| 19 | + cudaMalloc(&cudaSrc, rows*cols*sizeof(unsigned char)); | |
| 20 | + cudaMemcpy(cudaSrc, src, rows*cols*sizeof(unsigned char), cudaMemcpyHostToDevice); | |
| 21 | + | |
| 22 | + float* cudaDst; | |
| 23 | + cudaMalloc(&cudaDst, rows*cols*sizeof(float)); | |
| 24 | + | |
| 25 | + dim3 threadsPerBlock(8, 8); | |
| 26 | + dim3 blocks( | |
| 27 | + cols / threadsPerBlock.x + 1, | |
| 28 | + rows / threadsPerBlock.y + 1 | |
| 29 | + ); | |
| 30 | + | |
| 31 | + kernel<<<threadsPerBlock, blocks>>>(cudaSrc, cudaDst, rows, cols); | |
| 32 | + | |
| 33 | + // copy the data back to the destination | |
| 34 | + cudaMemcpy(dst, cudaDst, rows*cols*sizeof(float), cudaMemcpyDeviceToHost); | |
| 35 | + } | |
| 36 | + | |
| 37 | +}}} | ... | ... |
openbr/plugins/cuda/cudacvtfloat.hpp
0 โ 100644