diff --git a/openbr/plugins/cuda/cudaaffine.cu b/openbr/plugins/cuda/cudaaffine.cu index 11f81d2..0c19dde 100644 --- a/openbr/plugins/cuda/cudaaffine.cu +++ b/openbr/plugins/cuda/cudaaffine.cu @@ -14,14 +14,88 @@ using namespace cv::gpu; namespace br { namespace cuda { __device__ __forceinline__ uint8_t cudaaffine_kernel_get_pixel_value(int row, int col, uint8_t* srcPtr, int rows, int cols) { + if (row < 0 || row > rows || col < 0 || col > cols) { + if (row > rows || col > cols) { + return 0; + } else{ + return 0; + } + } + return (srcPtr + row*cols)[col]; + } + + + __device__ __forceinline__ uint8_t cudaaffine_kernel_get_bilinear_pixel_value(double row, double col, uint8_t* srcPtr, int rows, int cols) { + // don't do anything if the index is out of bounds + if (row < 0 || row > rows || col < 0 || col > cols) { + if (row > rows || col > cols) { + return 0; + } else{ + return 0; + } + } + + // http://www.sci.utah.edu/~acoste/uou/Image/project3/ArthurCOSTE_Project3.pdf + // Bilinear Transformation + // f(Px, Py) = f(Q11)×(1−Rx)×(1−Sy)+f(Q21)×(Rx)×(1−Sy)+f(Q12)×(1−Rx)×(Sy)+f(Q22)×(Rx)×(Sy) + + int row1 = floor(row); + int row2 = row1+1; + + int col1 = floor(col); + int col2 = col1+1; + + double d_row = row - row1; + double d_col = col - col1; + + int Q11 = cudaaffine_kernel_get_pixel_value(row1, col1, srcPtr, rows, cols); + int Q21 = cudaaffine_kernel_get_pixel_value(row2, col1, srcPtr, rows, cols); + int Q12 = cudaaffine_kernel_get_pixel_value(row1, col2, srcPtr, rows, cols); + int Q22 = cudaaffine_kernel_get_pixel_value(row2, col2, srcPtr, rows, cols); + + double val = Q22*(d_row*d_col) + Q12*((1-d_row)*d_col) + Q21*(d_row*(1-d_col)) + Q11*((1-d_row)*(1-d_col)); + return ((uint8_t) round(val)); + } + + __device__ __forceinline__ uint8_t cudaaffine_kernel_get_distance_pixel_value(double row, double col, uint8_t* srcPtr, int rows, int cols) { // don't do anything if the index is out of bounds if (row < 1 || row >= rows-1 || col < 1 || col >= cols-1) { if (row >= rows || col >= cols) { return 0; } else{ - return 0; } + return 0; + } } - return (srcPtr + row*cols)[col]; + + int row1 = floor(row); + int row2 = row1+1; + + int col1 = floor(col); + int col2 = col1+1; + + double m1 = row2 - row; + double m12 = m1*m1; + + double m2 = col - col1; + double m22 = m2*m2; + + double d1 = sqrt(m12 - 2*m1 + 1 + m22); + double d2 = sqrt(m12 + m22); + double d3 = sqrt(m12 - 2*m1 + 1 + m22 - 2*m2 + 1); + double d4 = sqrt(m12 + m22 - 2*m2 + 1); + double sum = d1 + d2 + d3 + d4; + + double w1 = d1/sum; + double w2 = d2/sum; + double w3 = d3/sum; + double w4 = d4/sum; + + uint8_t v1 = cudaaffine_kernel_get_pixel_value(row1, col1, srcPtr, rows, cols); + uint8_t v2 = cudaaffine_kernel_get_pixel_value(row2, col1, srcPtr, rows, cols); + uint8_t v3 = cudaaffine_kernel_get_pixel_value(row1, col2, srcPtr, rows, cols); + uint8_t v4 = cudaaffine_kernel_get_pixel_value(row2, col2, srcPtr, rows, cols); + + return round(w1*v1 + w2*v2 + w3*v3 + w4*v4); } /* @@ -31,9 +105,9 @@ namespace br { namespace cuda { * src_row - The computed source pixel row (mapping from this row) * src_col - The computed source pixel column (mapping from this col) */ - __device__ __forceinline__ void cudaaffine_kernel_get_src_coord(double *trans_inv, int dst_row, int dst_col, int* src_row, int* src_col){ - *src_col = round(dst_col * trans_inv[0] + dst_row * trans_inv[3] + trans_inv[6]); - *src_row = round(dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]); + __device__ __forceinline__ void cudaaffine_kernel_get_src_coord(double *trans_inv, int dst_row, int dst_col, double* src_row_pnt, double* src_col_pnt){ + *src_col_pnt = dst_col * trans_inv[0] + dst_row * trans_inv[3] + trans_inv[6]; + *src_row_pnt = dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]; //printf("Dst: [%d, %d, 1] = [%d, %d, 1] \n[ %0.4f, %0.4f, %0.4f] \n[ %0.4f, %0.4f, %0.4f ]\n[ %0.4f, %0.4f, %0.4f ]\n\n", *src_col, *src_row, dst_col, dst_row, trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]); @@ -47,8 +121,8 @@ namespace br { namespace cuda { //printf("Kernel Inv:\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n\n", trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]); - int srcRowInd; - int srcColInd; + double srcRowPnt; + double srcColPnt; // don't do anything if the index is out of bounds if (dstRowInd < 1 || dstRowInd >= dst_rows-1 || dstColInd < 1 || dstColInd >= dst_cols-1) { @@ -60,8 +134,10 @@ namespace br { namespace cuda { } } - cudaaffine_kernel_get_src_coord(trans_inv, dstRowInd, dstColInd, &srcRowInd, &srcColInd); - const uint8_t cval = cudaaffine_kernel_get_pixel_value(srcRowInd, srcColInd, srcPtr, src_rows, src_cols); // Get initial pixel value + cudaaffine_kernel_get_src_coord(trans_inv, dstRowInd, dstColInd, &srcRowPnt, &srcColPnt); + //const uint8_t cval = cudaaffine_kernel_get_distance_pixel_value(srcRowPnt, srcColPnt, srcPtr, src_rows, src_cols); // Get initial pixel value + const uint8_t cval = cudaaffine_kernel_get_bilinear_pixel_value(srcRowPnt, srcColPnt, srcPtr, src_rows, src_cols); // Get initial pixel value + //const uint8_t cval = cudaaffine_kernel_get_pixel_value(round(srcRowPnt), round(srcColPnt), srcPtr, src_rows, src_cols); // Get initial pixel value dstPtr[dstIndex] = cval; }