Commit d254fe2a76a807732520377334d745907fadd06b
1 parent
555b8f38
fixed Affine transform bug
Showing
2 changed files
with
55 additions
and
1 deletions
openbr/plugins/cuda/cudaaffine.cpp
| @@ -38,6 +38,7 @@ using namespace cv; | @@ -38,6 +38,7 @@ using namespace cv; | ||
| 38 | 38 | ||
| 39 | // definitions from the CUDA source file | 39 | // definitions from the CUDA source file |
| 40 | namespace br { namespace cuda { namespace affine { | 40 | namespace br { namespace cuda { namespace affine { |
| 41 | + void resizeWrapper(void* srcPtr, void** dstPtr, int src_rows, int src_cols, int dst_rows, int dst_cols); | ||
| 41 | void wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols); | 42 | void wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols); |
| 42 | }}} | 43 | }}} |
| 43 | 44 | ||
| @@ -115,7 +116,20 @@ namespace br | @@ -115,7 +116,20 @@ namespace br | ||
| 115 | const QList<Point2f> landmarks = OpenCVUtils::toPoints(src.file.points()); | 116 | const QList<Point2f> landmarks = OpenCVUtils::toPoints(src.file.points()); |
| 116 | 117 | ||
| 117 | if ((landmarks.size() < 2) || (!twoPoints && (landmarks.size() < 3))) { | 118 | if ((landmarks.size() < 2) || (!twoPoints && (landmarks.size() < 3))) { |
| 118 | - resize(src, dst, Size(width, height)); | 119 | + void* const* srcDataPtr = src.m().ptr<void*>(); |
| 120 | + int rows = *((int*)srcDataPtr[1]); | ||
| 121 | + int cols = *((int*)srcDataPtr[2]); | ||
| 122 | + int type = *((int*)srcDataPtr[3]); | ||
| 123 | + | ||
| 124 | + Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type()); | ||
| 125 | + void** dstDataPtr = dstMat.ptr<void*>(); | ||
| 126 | + | ||
| 127 | + dstDataPtr[1] = srcDataPtr[1]; *((int*)dstDataPtr[1]) = height; // rows | ||
| 128 | + dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = width; // cols | ||
| 129 | + dstDataPtr[3] = srcDataPtr[3]; | ||
| 130 | + | ||
| 131 | + cuda::affine::resizeWrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols, height, width); | ||
| 132 | + dst = dstMat; | ||
| 119 | return; | 133 | return; |
| 120 | } else { | 134 | } else { |
| 121 | srcPoints[0] = landmarks[0]; | 135 | srcPoints[0] = landmarks[0]; |
openbr/plugins/cuda/cudaaffine.cu
| @@ -127,6 +127,28 @@ namespace br { namespace cuda { namespace affine { | @@ -127,6 +127,28 @@ namespace br { namespace cuda { namespace affine { | ||
| 127 | *src_row_pnt = dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]; | 127 | *src_row_pnt = dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]; |
| 128 | } | 128 | } |
| 129 | 129 | ||
| 130 | + __global__ void bilinearKernel(uint8_t* srcPtr, uint8_t* dstPtr, int srcRows, int srcCols, int dstRows, int dstCols) { | ||
| 131 | + int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y; | ||
| 132 | + int dstColInd = blockIdx.x*blockDim.x+threadIdx.x; | ||
| 133 | + int dstIndex = dstRowInd*dstCols + dstColInd; | ||
| 134 | + | ||
| 135 | + // don't do anything if the index is out of bounds | ||
| 136 | + if (dstRowInd < 1 || dstRowInd >= dstRows-1 || dstColInd < 1 || dstColInd >= dstCols-1) { | ||
| 137 | + if (dstRowInd >= dstRows || dstColInd >= dstCols) { | ||
| 138 | + return; | ||
| 139 | + } else{ | ||
| 140 | + dstPtr[dstIndex] = 0; | ||
| 141 | + return; | ||
| 142 | + } | ||
| 143 | + } | ||
| 144 | + | ||
| 145 | + double rowScaleFactor = (double)dstRows / (double)srcRows; | ||
| 146 | + double colScaleFactor = (double)dstCols / (double)srcCols; | ||
| 147 | + | ||
| 148 | + uint8_t out = getBilinearPixelValueDevice(dstRowInd/rowScaleFactor, dstColInd/colScaleFactor, srcPtr, srcRows, srcCols); | ||
| 149 | + | ||
| 150 | + dstPtr[dstIndex] = out; | ||
| 151 | + } | ||
| 130 | 152 | ||
| 131 | __global__ void affineKernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){ | 153 | __global__ void affineKernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){ |
| 132 | int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y; | 154 | int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y; |
| @@ -152,6 +174,24 @@ namespace br { namespace cuda { namespace affine { | @@ -152,6 +174,24 @@ namespace br { namespace cuda { namespace affine { | ||
| 152 | dstPtr[dstIndex] = cval; | 174 | dstPtr[dstIndex] = cval; |
| 153 | } | 175 | } |
| 154 | 176 | ||
| 177 | + void resizeWrapper(void* srcPtr, void** dstPtr, int srcRows, int srcCols, int dstRows, int dstCols) { | ||
| 178 | + // perform bilinear filtering | ||
| 179 | + | ||
| 180 | + // allocate space for destination | ||
| 181 | + cudaError_t err; | ||
| 182 | + CUDA_SAFE_MALLOC(dstPtr, dstRows*dstCols*sizeof(uint8_t), &err); | ||
| 183 | + | ||
| 184 | + // call the bilinear kernel function | ||
| 185 | + dim3 threadsPerBlock(8, 8); | ||
| 186 | + dim3 numBlocks(dstCols/threadsPerBlock.x + 1, | ||
| 187 | + dstRows/threadsPerBlock.y + 1); | ||
| 188 | + | ||
| 189 | + bilinearKernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*)*dstPtr, srcRows, srcCols, dstRows, dstCols); | ||
| 190 | + CUDA_KERNEL_ERR_CHK(&err); | ||
| 191 | + | ||
| 192 | + CUDA_SAFE_FREE(srcPtr, &err); | ||
| 193 | + } | ||
| 194 | + | ||
| 155 | void wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols) { | 195 | void wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols) { |
| 156 | cudaError_t err; | 196 | cudaError_t err; |
| 157 | double* gpuInverse; | 197 | double* gpuInverse; |