Commit d254fe2a76a807732520377334d745907fadd06b
1 parent
555b8f38
fixed Affine transform bug
Showing
2 changed files
with
55 additions
and
1 deletions
openbr/plugins/cuda/cudaaffine.cpp
| ... | ... | @@ -38,6 +38,7 @@ using namespace cv; |
| 38 | 38 | |
| 39 | 39 | // definitions from the CUDA source file |
| 40 | 40 | namespace br { namespace cuda { namespace affine { |
| 41 | + void resizeWrapper(void* srcPtr, void** dstPtr, int src_rows, int src_cols, int dst_rows, int dst_cols); | |
| 41 | 42 | void wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols); |
| 42 | 43 | }}} |
| 43 | 44 | |
| ... | ... | @@ -115,7 +116,20 @@ namespace br |
| 115 | 116 | const QList<Point2f> landmarks = OpenCVUtils::toPoints(src.file.points()); |
| 116 | 117 | |
| 117 | 118 | if ((landmarks.size() < 2) || (!twoPoints && (landmarks.size() < 3))) { |
| 118 | - resize(src, dst, Size(width, height)); | |
| 119 | + void* const* srcDataPtr = src.m().ptr<void*>(); | |
| 120 | + int rows = *((int*)srcDataPtr[1]); | |
| 121 | + int cols = *((int*)srcDataPtr[2]); | |
| 122 | + int type = *((int*)srcDataPtr[3]); | |
| 123 | + | |
| 124 | + Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type()); | |
| 125 | + void** dstDataPtr = dstMat.ptr<void*>(); | |
| 126 | + | |
| 127 | + dstDataPtr[1] = srcDataPtr[1]; *((int*)dstDataPtr[1]) = height; // rows | |
| 128 | + dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = width; // cols | |
| 129 | + dstDataPtr[3] = srcDataPtr[3]; | |
| 130 | + | |
| 131 | + cuda::affine::resizeWrapper(srcDataPtr[0], &dstDataPtr[0], rows, cols, height, width); | |
| 132 | + dst = dstMat; | |
| 119 | 133 | return; |
| 120 | 134 | } else { |
| 121 | 135 | srcPoints[0] = landmarks[0]; | ... | ... |
openbr/plugins/cuda/cudaaffine.cu
| ... | ... | @@ -127,6 +127,28 @@ namespace br { namespace cuda { namespace affine { |
| 127 | 127 | *src_row_pnt = dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]; |
| 128 | 128 | } |
| 129 | 129 | |
| 130 | + __global__ void bilinearKernel(uint8_t* srcPtr, uint8_t* dstPtr, int srcRows, int srcCols, int dstRows, int dstCols) { | |
| 131 | + int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y; | |
| 132 | + int dstColInd = blockIdx.x*blockDim.x+threadIdx.x; | |
| 133 | + int dstIndex = dstRowInd*dstCols + dstColInd; | |
| 134 | + | |
| 135 | + // don't do anything if the index is out of bounds | |
| 136 | + if (dstRowInd < 1 || dstRowInd >= dstRows-1 || dstColInd < 1 || dstColInd >= dstCols-1) { | |
| 137 | + if (dstRowInd >= dstRows || dstColInd >= dstCols) { | |
| 138 | + return; | |
| 139 | + } else{ | |
| 140 | + dstPtr[dstIndex] = 0; | |
| 141 | + return; | |
| 142 | + } | |
| 143 | + } | |
| 144 | + | |
| 145 | + double rowScaleFactor = (double)dstRows / (double)srcRows; | |
| 146 | + double colScaleFactor = (double)dstCols / (double)srcCols; | |
| 147 | + | |
| 148 | + uint8_t out = getBilinearPixelValueDevice(dstRowInd/rowScaleFactor, dstColInd/colScaleFactor, srcPtr, srcRows, srcCols); | |
| 149 | + | |
| 150 | + dstPtr[dstIndex] = out; | |
| 151 | + } | |
| 130 | 152 | |
| 131 | 153 | __global__ void affineKernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){ |
| 132 | 154 | int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y; |
| ... | ... | @@ -152,6 +174,24 @@ namespace br { namespace cuda { namespace affine { |
| 152 | 174 | dstPtr[dstIndex] = cval; |
| 153 | 175 | } |
| 154 | 176 | |
| 177 | + void resizeWrapper(void* srcPtr, void** dstPtr, int srcRows, int srcCols, int dstRows, int dstCols) { | |
| 178 | + // perform bilinear filtering | |
| 179 | + | |
| 180 | + // allocate space for destination | |
| 181 | + cudaError_t err; | |
| 182 | + CUDA_SAFE_MALLOC(dstPtr, dstRows*dstCols*sizeof(uint8_t), &err); | |
| 183 | + | |
| 184 | + // call the bilinear kernel function | |
| 185 | + dim3 threadsPerBlock(8, 8); | |
| 186 | + dim3 numBlocks(dstCols/threadsPerBlock.x + 1, | |
| 187 | + dstRows/threadsPerBlock.y + 1); | |
| 188 | + | |
| 189 | + bilinearKernel<<<numBlocks, threadsPerBlock>>>((uint8_t*)srcPtr, (uint8_t*)*dstPtr, srcRows, srcCols, dstRows, dstCols); | |
| 190 | + CUDA_KERNEL_ERR_CHK(&err); | |
| 191 | + | |
| 192 | + CUDA_SAFE_FREE(srcPtr, &err); | |
| 193 | + } | |
| 194 | + | |
| 155 | 195 | void wrapper(void* srcPtr, void** dstPtr, Mat affineTransform, int src_rows, int src_cols, int dst_rows, int dst_cols) { |
| 156 | 196 | cudaError_t err; |
| 157 | 197 | double* gpuInverse; | ... | ... |