Commit d254fe2a76a807732520377334d745907fadd06b

Authored by DepthDeluxe
1 parent 555b8f38

fixed Affine transform bug

openbr/plugins/cuda/cudaaffine.cpp
... ... @@ -38,6 +38,7 @@ using namespace cv;
38 38  
39 39 // definitions from the CUDA source file
40 40 namespace br { namespace cuda { namespace affine {
  41 + void resizeWrapper(void* srcPtr, void** dstPtr, int src_rows, int src_cols, int dst_rows, int dst_cols);
41 42 void wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols);
42 43 }}}
43 44  
... ... @@ -115,7 +116,20 @@ namespace br
115 116 const QList<Point2f> landmarks = OpenCVUtils::toPoints(src.file.points());
116 117  
117 118 if ((landmarks.size() < 2) || (!twoPoints && (landmarks.size() < 3))) {
118   - resize(src, dst, Size(width, height));
  119 + void* const* srcDataPtr = src.m().ptr<void*>();
  120 + int rows = *((int*)srcDataPtr[1]);
  121 + int cols = *((int*)srcDataPtr[2]);
  122 + int type = *((int*)srcDataPtr[3]);
  123 +
  124 + Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type());
  125 + void** dstDataPtr = dstMat.ptr<void*>();
  126 +
  127 + dstDataPtr[1] = srcDataPtr[1]; *((int*)dstDataPtr[1]) = height; // rows
  128 + dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = width; // cols
  129 + dstDataPtr[3] = srcDataPtr[3];
  130 +
  131 + cuda::affine::resizeWrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols, height, width);
  132 + dst = dstMat;
119 133 return;
120 134 } else {
121 135 srcPoints[0] = landmarks[0];
... ...
openbr/plugins/cuda/cudaaffine.cu
... ... @@ -127,6 +127,28 @@ namespace br { namespace cuda { namespace affine {
127 127 *src_row_pnt = dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7];
128 128 }
129 129  
  130 + __global__ void bilinearKernel(uint8_t* srcPtr, uint8_t* dstPtr, int srcRows, int srcCols, int dstRows, int dstCols) {
  131 + int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y;
  132 + int dstColInd = blockIdx.x*blockDim.x+threadIdx.x;
  133 + int dstIndex = dstRowInd*dstCols + dstColInd;
  134 +
  135 + // don't do anything if the index is out of bounds
  136 + if (dstRowInd < 1 || dstRowInd >= dstRows-1 || dstColInd < 1 || dstColInd >= dstCols-1) {
  137 + if (dstRowInd >= dstRows || dstColInd >= dstCols) {
  138 + return;
  139 + } else{
  140 + dstPtr[dstIndex] = 0;
  141 + return;
  142 + }
  143 + }
  144 +
  145 + double rowScaleFactor = (double)dstRows / (double)srcRows;
  146 + double colScaleFactor = (double)dstCols / (double)srcCols;
  147 +
  148 + uint8_t out = getBilinearPixelValueDevice(dstRowInd/rowScaleFactor, dstColInd/colScaleFactor, srcPtr, srcRows, srcCols);
  149 +
  150 + dstPtr[dstIndex] = out;
  151 + }
130 152  
131 153 __global__ void affineKernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){
132 154 int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y;
... ... @@ -152,6 +174,24 @@ namespace br { namespace cuda { namespace affine {
152 174 dstPtr[dstIndex] = cval;
153 175 }
154 176  
  177 + void resizeWrapper(void* srcPtr, void** dstPtr, int srcRows, int srcCols, int dstRows, int dstCols) {
  178 + // perform bilinear filtering
  179 +
  180 + // allocate space for destination
  181 + cudaError_t err;
  182 + CUDA_SAFE_MALLOC(dstPtr, dstRows*dstCols*sizeof(uint8_t), &err);
  183 +
  184 + // call the bilinear kernel function
  185 + dim3 threadsPerBlock(8, 8);
  186 + dim3 numBlocks(dstCols/threadsPerBlock.x + 1,
  187 + dstRows/threadsPerBlock.y + 1);
  188 +
  189 + bilinearKernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*)*dstPtr, srcRows, srcCols, dstRows, dstCols);
  190 + CUDA_KERNEL_ERR_CHK(&err);
  191 +
  192 + CUDA_SAFE_FREE(srcPtr, &err);
  193 + }
  194 +
155 195 void wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols) {
156 196 cudaError_t err;
157 197 double* gpuInverse;
... ...