Commit c298e0f66bca94c7c33ebd190b1c90444fb0f833

Authored by DepthDeluxe
1 parent e6a8cd8f

added float conversion plugin

openbr/plugins/cuda/cudacvtfloat.cpp 0 โ†’ 100644
  1 +#include <iostream>
  2 +#include <unistd.h>
  3 +using namespace std;
  4 +
  5 +#include <opencv2/opencv.hpp>
  6 +using namespace cv;
  7 +
  8 +#include <openbr/plugins/openbr_internal.h>
  9 +
  10 +#include "cudacvtfloat.hpp"
  11 +
  12 +namespace br
  13 +{
  14 +
  15 +/*!
  16 + * \ingroup transforms
  17 + * \brief Converts byte to floating point
  18 + * \author Colin Heinzmann \cite DepthDeluxe
  19 + */
  20 +class CUDACvtFloatTransform : public UntrainableTransform
  21 +{
  22 + Q_OBJECT
  23 +
  24 + public:
  25 + void project(const Template &src, Template &dst) const
  26 + {
  27 + // assume the image type is 256-monochrome
  28 + // TODO(colin): real exception handling
  29 + if (src.m().type() != CV_8UC1) {
  30 + cout << "ERR: Invalid memory format" << endl;
  31 + return;
  32 + }
  33 +
  34 +
  35 + int rows = src.m().rows;
  36 + int cols = src.m().cols;
  37 +
  38 + dst = Mat(rows, cols, CV_32FC1);
  39 +
  40 + br::cuda::cudacvtfloat::wrapper((const unsigned char*)src.m().ptr<unsigned char>(), dst.m().ptr<float>(), rows, cols);
  41 + }
  42 +};
  43 +
  44 +BR_REGISTER(Transform, CUDACvtFloatTransform)
  45 +
  46 +} // namespace br
  47 +
  48 +#include "cuda/cudacvtfloat.moc"
openbr/plugins/cuda/cudacvtfloat.cu 0 โ†’ 100644
  1 +namespace br { namespace cuda { namespace cudacvtfloat {
  2 +
  3 + __global__ void kernel(const unsigned char* src, float* dst, int rows, int cols) {
  4 + // get my index
  5 + int rowInd = blockIdx.y*blockDim.y + threadIdx.y;
  6 + int colInd = blockIdx.x*blockDim.x + threadIdx.x;
  7 +
  8 + // bounds check
  9 + if (rowInd >= rows || colInd >= cols) {
  10 + return;
  11 + }
  12 +
  13 + int index = rowInd*cols + colInd;
  14 + dst[index] = (float)src[index];
  15 + }
  16 +
  17 + void wrapper(const unsigned char* src, float* dst, int rows, int cols) {
  18 + unsigned char* cudaSrc;
  19 + cudaMalloc(&cudaSrc, rows*cols*sizeof(unsigned char));
  20 + cudaMemcpy(cudaSrc, src, rows*cols*sizeof(unsigned char), cudaMemcpyHostToDevice);
  21 +
  22 + float* cudaDst;
  23 + cudaMalloc(&cudaDst, rows*cols*sizeof(float));
  24 +
  25 + dim3 threadsPerBlock(8, 8);
  26 + dim3 blocks(
  27 + cols / threadsPerBlock.x + 1,
  28 + rows / threadsPerBlock.y + 1
  29 + );
  30 +
  31 + kernel<<<threadsPerBlock, blocks>>>(cudaSrc, cudaDst, rows, cols);
  32 +
  33 + // copy the data back to the destination
  34 + cudaMemcpy(dst, cudaDst, rows*cols*sizeof(float), cudaMemcpyDeviceToHost);
  35 + }
  36 +
  37 +}}}
openbr/plugins/cuda/cudacvtfloat.hpp 0 โ†’ 100644
  1 +namespace br { namespace cuda { namespace cudacvtfloat {
  2 + void wrapper(const unsigned char* src, float* dst, int rows, int cols);
  3 +}}}