Commit c298e0f66bca94c7c33ebd190b1c90444fb0f833
1 parent
e6a8cd8f
added float conversion plugin
Showing
3 changed files
with
88 additions
and
0 deletions
openbr/plugins/cuda/cudacvtfloat.cpp
0 โ 100644
| 1 | +#include <iostream> | ||
| 2 | +#include <unistd.h> | ||
| 3 | +using namespace std; | ||
| 4 | + | ||
| 5 | +#include <opencv2/opencv.hpp> | ||
| 6 | +using namespace cv; | ||
| 7 | + | ||
| 8 | +#include <openbr/plugins/openbr_internal.h> | ||
| 9 | + | ||
| 10 | +#include "cudacvtfloat.hpp" | ||
| 11 | + | ||
| 12 | +namespace br | ||
| 13 | +{ | ||
| 14 | + | ||
| 15 | +/*! | ||
| 16 | + * \ingroup transforms | ||
| 17 | + * \brief Converts byte to floating point | ||
| 18 | + * \author Colin Heinzmann \cite DepthDeluxe | ||
| 19 | + */ | ||
| 20 | +class CUDACvtFloatTransform : public UntrainableTransform | ||
| 21 | +{ | ||
| 22 | + Q_OBJECT | ||
| 23 | + | ||
| 24 | + public: | ||
| 25 | + void project(const Template &src, Template &dst) const | ||
| 26 | + { | ||
| 27 | + // assume the image type is 256-monochrome | ||
| 28 | + // TODO(colin): real exception handling | ||
| 29 | + if (src.m().type() != CV_8UC1) { | ||
| 30 | + cout << "ERR: Invalid memory format" << endl; | ||
| 31 | + return; | ||
| 32 | + } | ||
| 33 | + | ||
| 34 | + | ||
| 35 | + int rows = src.m().rows; | ||
| 36 | + int cols = src.m().cols; | ||
| 37 | + | ||
| 38 | + dst = Mat(rows, cols, CV_32FC1); | ||
| 39 | + | ||
| 40 | + br::cuda::cudacvtfloat::wrapper((const unsigned char*)src.m().ptr<unsigned char>(), dst.m().ptr<float>(), rows, cols); | ||
| 41 | + } | ||
| 42 | +}; | ||
| 43 | + | ||
| 44 | +BR_REGISTER(Transform, CUDACvtFloatTransform) | ||
| 45 | + | ||
| 46 | +} // namespace br | ||
| 47 | + | ||
| 48 | +#include "cuda/cudacvtfloat.moc" |
openbr/plugins/cuda/cudacvtfloat.cu
0 โ 100644
| 1 | +namespace br { namespace cuda { namespace cudacvtfloat { | ||
| 2 | + | ||
| 3 | + __global__ void kernel(const unsigned char* src, float* dst, int rows, int cols) { | ||
| 4 | + // get my index | ||
| 5 | + int rowInd = blockIdx.y*blockDim.y + threadIdx.y; | ||
| 6 | + int colInd = blockIdx.x*blockDim.x + threadIdx.x; | ||
| 7 | + | ||
| 8 | + // bounds check | ||
| 9 | + if (rowInd >= rows || colInd >= cols) { | ||
| 10 | + return; | ||
| 11 | + } | ||
| 12 | + | ||
| 13 | + int index = rowInd*cols + colInd; | ||
| 14 | + dst[index] = (float)src[index]; | ||
| 15 | + } | ||
| 16 | + | ||
| 17 | + void wrapper(const unsigned char* src, float* dst, int rows, int cols) { | ||
| 18 | + unsigned char* cudaSrc; | ||
| 19 | + cudaMalloc(&cudaSrc, rows*cols*sizeof(unsigned char)); | ||
| 20 | + cudaMemcpy(cudaSrc, src, rows*cols*sizeof(unsigned char), cudaMemcpyHostToDevice); | ||
| 21 | + | ||
| 22 | + float* cudaDst; | ||
| 23 | + cudaMalloc(&cudaDst, rows*cols*sizeof(float)); | ||
| 24 | + | ||
| 25 | + dim3 threadsPerBlock(8, 8); | ||
| 26 | + dim3 blocks( | ||
| 27 | + cols / threadsPerBlock.x + 1, | ||
| 28 | + rows / threadsPerBlock.y + 1 | ||
| 29 | + ); | ||
| 30 | + | ||
| 31 | + kernel<<<threadsPerBlock, blocks>>>(cudaSrc, cudaDst, rows, cols); | ||
| 32 | + | ||
| 33 | + // copy the data back to the destination | ||
| 34 | + cudaMemcpy(dst, cudaDst, rows*cols*sizeof(float), cudaMemcpyDeviceToHost); | ||
| 35 | + } | ||
| 36 | + | ||
| 37 | +}}} |
openbr/plugins/cuda/cudacvtfloat.hpp
0 โ 100644