From d254fe2a76a807732520377334d745907fadd06b Mon Sep 17 00:00:00 2001 From: DepthDeluxe Date: Sun, 10 Apr 2016 19:20:19 -0400 Subject: [PATCH] fixed Affine transform bug --- openbr/plugins/cuda/cudaaffine.cpp | 16 +++++++++++++++- openbr/plugins/cuda/cudaaffine.cu | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/openbr/plugins/cuda/cudaaffine.cpp b/openbr/plugins/cuda/cudaaffine.cpp index a1963e7..8969248 100644 --- a/openbr/plugins/cuda/cudaaffine.cpp +++ b/openbr/plugins/cuda/cudaaffine.cpp @@ -38,6 +38,7 @@ using namespace cv; // definitions from the CUDA source file namespace br { namespace cuda { namespace affine { + void resizeWrapper(void* srcPtr, void** dstPtr, int src_rows, int src_cols, int dst_rows, int dst_cols); void wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols); }}} @@ -115,7 +116,20 @@ namespace br const QList landmarks = OpenCVUtils::toPoints(src.file.points()); if ((landmarks.size() < 2) || (!twoPoints && (landmarks.size() < 3))) { - resize(src, dst, Size(width, height)); + void* const* srcDataPtr = src.m().ptr(); + int rows = *((int*)srcDataPtr[1]); + int cols = *((int*)srcDataPtr[2]); + int type = *((int*)srcDataPtr[3]); + + Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type()); + void** dstDataPtr = dstMat.ptr(); + + dstDataPtr[1] = srcDataPtr[1]; *((int*)dstDataPtr[1]) = height; // rows + dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = width; // cols + dstDataPtr[3] = srcDataPtr[3]; + + cuda::affine::resizeWrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols, height, width); + dst = dstMat; return; } else { srcPoints[0] = landmarks[0]; diff --git a/openbr/plugins/cuda/cudaaffine.cu b/openbr/plugins/cuda/cudaaffine.cu index fc1baa7..b838b1d 100644 --- a/openbr/plugins/cuda/cudaaffine.cu +++ b/openbr/plugins/cuda/cudaaffine.cu @@ -127,6 +127,28 @@ namespace br { namespace cuda { namespace affine { *src_row_pnt = dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]; } + __global__ void bilinearKernel(uint8_t* srcPtr, uint8_t* dstPtr, int srcRows, int srcCols, int dstRows, int dstCols) { + int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y; + int dstColInd = blockIdx.x*blockDim.x+threadIdx.x; + int dstIndex = dstRowInd*dstCols + dstColInd; + + // don't do anything if the index is out of bounds + if (dstRowInd < 1 || dstRowInd >= dstRows-1 || dstColInd < 1 || dstColInd >= dstCols-1) { + if (dstRowInd >= dstRows || dstColInd >= dstCols) { + return; + } else{ + dstPtr[dstIndex] = 0; + return; + } + } + + double rowScaleFactor = (double)dstRows / (double)srcRows; + double colScaleFactor = (double)dstCols / (double)srcCols; + + uint8_t out = getBilinearPixelValueDevice(dstRowInd/rowScaleFactor, dstColInd/colScaleFactor, srcPtr, srcRows, srcCols); + + dstPtr[dstIndex] = out; + } __global__ void affineKernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){ int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y; @@ -152,6 +174,24 @@ namespace br { namespace cuda { namespace affine { dstPtr[dstIndex] = cval; } + void resizeWrapper(void* srcPtr, void** dstPtr, int srcRows, int srcCols, int dstRows, int dstCols) { + // perform bilinear filtering + + // allocate space for destination + cudaError_t err; + CUDA_SAFE_MALLOC(dstPtr, dstRows*dstCols*sizeof(uint8_t), &err); + + // call the bilinear kernel function + dim3 threadsPerBlock(8, 8); + dim3 numBlocks(dstCols/threadsPerBlock.x + 1, + dstRows/threadsPerBlock.y + 1); + + bilinearKernel<<>>((uint8_t*)srcPtr, (uint8_t*)*dstPtr, srcRows, srcCols, dstRows, dstCols); + CUDA_KERNEL_ERR_CHK(&err); + + CUDA_SAFE_FREE(srcPtr, &err); + } + void wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols) { cudaError_t err; double* gpuInverse; -- libgit2 0.21.4