Commit d254fe2a76a807732520377334d745907fadd06b

Authored by DepthDeluxe
1 parent 555b8f38

fixed Affine transform bug

openbr/plugins/cuda/cudaaffine.cpp
@@ -38,6 +38,7 @@ using namespace cv; @@ -38,6 +38,7 @@ using namespace cv;
38 38
39 // definitions from the CUDA source file 39 // definitions from the CUDA source file
40 namespace br { namespace cuda { namespace affine { 40 namespace br { namespace cuda { namespace affine {
  41 + void resizeWrapper(void* srcPtr, void** dstPtr, int src_rows, int src_cols, int dst_rows, int dst_cols);
41 void wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols); 42 void wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols);
42 }}} 43 }}}
43 44
@@ -115,7 +116,20 @@ namespace br @@ -115,7 +116,20 @@ namespace br
115 const QList<Point2f> landmarks = OpenCVUtils::toPoints(src.file.points()); 116 const QList<Point2f> landmarks = OpenCVUtils::toPoints(src.file.points());
116 117
117 if ((landmarks.size() < 2) || (!twoPoints && (landmarks.size() < 3))) { 118 if ((landmarks.size() < 2) || (!twoPoints && (landmarks.size() < 3))) {
118 - resize(src, dst, Size(width, height)); 119 + void* const* srcDataPtr = src.m().ptr<void*>();
  120 + int rows = *((int*)srcDataPtr[1]);
  121 + int cols = *((int*)srcDataPtr[2]);
  122 + int type = *((int*)srcDataPtr[3]);
  123 +
  124 + Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type());
  125 + void** dstDataPtr = dstMat.ptr<void*>();
  126 +
  127 + dstDataPtr[1] = srcDataPtr[1]; *((int*)dstDataPtr[1]) = height; // rows
  128 + dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = width; // cols
  129 + dstDataPtr[3] = srcDataPtr[3];
  130 +
  131 + cuda::affine::resizeWrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols, height, width);
  132 + dst = dstMat;
119 return; 133 return;
120 } else { 134 } else {
121 srcPoints[0] = landmarks[0]; 135 srcPoints[0] = landmarks[0];
openbr/plugins/cuda/cudaaffine.cu
@@ -127,6 +127,28 @@ namespace br { namespace cuda { namespace affine { @@ -127,6 +127,28 @@ namespace br { namespace cuda { namespace affine {
127 *src_row_pnt = dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]; 127 *src_row_pnt = dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7];
128 } 128 }
129 129
  130 + __global__ void bilinearKernel(uint8_t* srcPtr, uint8_t* dstPtr, int srcRows, int srcCols, int dstRows, int dstCols) {
  131 + int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y;
  132 + int dstColInd = blockIdx.x*blockDim.x+threadIdx.x;
  133 + int dstIndex = dstRowInd*dstCols + dstColInd;
  134 +
  135 + // don't do anything if the index is out of bounds
  136 + if (dstRowInd < 1 || dstRowInd >= dstRows-1 || dstColInd < 1 || dstColInd >= dstCols-1) {
  137 + if (dstRowInd >= dstRows || dstColInd >= dstCols) {
  138 + return;
  139 + } else{
  140 + dstPtr[dstIndex] = 0;
  141 + return;
  142 + }
  143 + }
  144 +
  145 + double rowScaleFactor = (double)dstRows / (double)srcRows;
  146 + double colScaleFactor = (double)dstCols / (double)srcCols;
  147 +
  148 + uint8_t out = getBilinearPixelValueDevice(dstRowInd/rowScaleFactor, dstColInd/colScaleFactor, srcPtr, srcRows, srcCols);
  149 +
  150 + dstPtr[dstIndex] = out;
  151 + }
130 152
131 __global__ void affineKernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){ 153 __global__ void affineKernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){
132 int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y; 154 int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y;
@@ -152,6 +174,24 @@ namespace br { namespace cuda { namespace affine { @@ -152,6 +174,24 @@ namespace br { namespace cuda { namespace affine {
152 dstPtr[dstIndex] = cval; 174 dstPtr[dstIndex] = cval;
153 } 175 }
154 176
  177 + void resizeWrapper(void* srcPtr, void** dstPtr, int srcRows, int srcCols, int dstRows, int dstCols) {
  178 + // perform bilinear filtering
  179 +
  180 + // allocate space for destination
  181 + cudaError_t err;
  182 + CUDA_SAFE_MALLOC(dstPtr, dstRows*dstCols*sizeof(uint8_t), &err);
  183 +
  184 + // call the bilinear kernel function
  185 + dim3 threadsPerBlock(8, 8);
  186 + dim3 numBlocks(dstCols/threadsPerBlock.x + 1,
  187 + dstRows/threadsPerBlock.y + 1);
  188 +
  189 + bilinearKernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*)*dstPtr, srcRows, srcCols, dstRows, dstCols);
  190 + CUDA_KERNEL_ERR_CHK(&err);
  191 +
  192 + CUDA_SAFE_FREE(srcPtr, &err);
  193 + }
  194 +
155 void wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols) { 195 void wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols) {
156 cudaError_t err; 196 cudaError_t err;
157 double* gpuInverse; 197 double* gpuInverse;