Commit 3631db166b9992504463cf5f995b60e2d077589e
1 parent
e534ff9c
Working bilinear affine warp
Showing
1 changed file
with
85 additions
and
9 deletions
openbr/plugins/cuda/cudaaffine.cu
| ... | ... | @@ -14,14 +14,88 @@ using namespace cv::gpu; |
| 14 | 14 | namespace br { namespace cuda { |
| 15 | 15 | |
| 16 | 16 | __device__ __forceinline__ uint8_t cudaaffine_kernel_get_pixel_value(int row, int col, uint8_t* srcPtr, int rows, int cols) { |
| 17 | + if (row < 0 || row > rows || col < 0 || col > cols) { | |
| 18 | + if (row > rows || col > cols) { | |
| 19 | + return 0; | |
| 20 | + } else{ | |
| 21 | + return 0; | |
| 22 | + } | |
| 23 | + } | |
| 24 | + return (srcPtr + row*cols)[col]; | |
| 25 | + } | |
| 26 | + | |
| 27 | + | |
| 28 | + __device__ __forceinline__ uint8_t cudaaffine_kernel_get_bilinear_pixel_value(double row, double col, uint8_t* srcPtr, int rows, int cols) { | |
| 29 | + // don't do anything if the index is out of bounds | |
| 30 | + if (row < 0 || row > rows || col < 0 || col > cols) { | |
| 31 | + if (row > rows || col > cols) { | |
| 32 | + return 0; | |
| 33 | + } else{ | |
| 34 | + return 0; | |
| 35 | + } | |
| 36 | + } | |
| 37 | + | |
| 38 | + // http://www.sci.utah.edu/~acoste/uou/Image/project3/ArthurCOSTE_Project3.pdf | |
| 39 | + // Bilinear Transformation | |
| 40 | + // f(Px, Py) = f(Q11)×(1−Rx)×(1−Sy)+f(Q21)×(Rx)×(1−Sy)+f(Q12)×(1−Rx)×(Sy)+f(Q22)×(Rx)×(Sy) | |
| 41 | + | |
| 42 | + int row1 = floor(row); | |
| 43 | + int row2 = row1+1; | |
| 44 | + | |
| 45 | + int col1 = floor(col); | |
| 46 | + int col2 = col1+1; | |
| 47 | + | |
| 48 | + double d_row = row - row1; | |
| 49 | + double d_col = col - col1; | |
| 50 | + | |
| 51 | + int Q11 = cudaaffine_kernel_get_pixel_value(row1, col1, srcPtr, rows, cols); | |
| 52 | + int Q21 = cudaaffine_kernel_get_pixel_value(row2, col1, srcPtr, rows, cols); | |
| 53 | + int Q12 = cudaaffine_kernel_get_pixel_value(row1, col2, srcPtr, rows, cols); | |
| 54 | + int Q22 = cudaaffine_kernel_get_pixel_value(row2, col2, srcPtr, rows, cols); | |
| 55 | + | |
| 56 | + double val = Q22*(d_row*d_col) + Q12*((1-d_row)*d_col) + Q21*(d_row*(1-d_col)) + Q11*((1-d_row)*(1-d_col)); | |
| 57 | + return ((uint8_t) round(val)); | |
| 58 | + } | |
| 59 | + | |
| 60 | + __device__ __forceinline__ uint8_t cudaaffine_kernel_get_distance_pixel_value(double row, double col, uint8_t* srcPtr, int rows, int cols) { | |
| 17 | 61 | // don't do anything if the index is out of bounds |
| 18 | 62 | if (row < 1 || row >= rows-1 || col < 1 || col >= cols-1) { |
| 19 | 63 | if (row >= rows || col >= cols) { |
| 20 | 64 | return 0; |
| 21 | 65 | } else{ |
| 22 | - return 0; } | |
| 66 | + return 0; | |
| 67 | + } | |
| 23 | 68 | } |
| 24 | - return (srcPtr + row*cols)[col]; | |
| 69 | + | |
| 70 | + int row1 = floor(row); | |
| 71 | + int row2 = row1+1; | |
| 72 | + | |
| 73 | + int col1 = floor(col); | |
| 74 | + int col2 = col1+1; | |
| 75 | + | |
| 76 | + double m1 = row2 - row; | |
| 77 | + double m12 = m1*m1; | |
| 78 | + | |
| 79 | + double m2 = col - col1; | |
| 80 | + double m22 = m2*m2; | |
| 81 | + | |
| 82 | + double d1 = sqrt(m12 - 2*m1 + 1 + m22); | |
| 83 | + double d2 = sqrt(m12 + m22); | |
| 84 | + double d3 = sqrt(m12 - 2*m1 + 1 + m22 - 2*m2 + 1); | |
| 85 | + double d4 = sqrt(m12 + m22 - 2*m2 + 1); | |
| 86 | + double sum = d1 + d2 + d3 + d4; | |
| 87 | + | |
| 88 | + double w1 = d1/sum; | |
| 89 | + double w2 = d2/sum; | |
| 90 | + double w3 = d3/sum; | |
| 91 | + double w4 = d4/sum; | |
| 92 | + | |
| 93 | + uint8_t v1 = cudaaffine_kernel_get_pixel_value(row1, col1, srcPtr, rows, cols); | |
| 94 | + uint8_t v2 = cudaaffine_kernel_get_pixel_value(row2, col1, srcPtr, rows, cols); | |
| 95 | + uint8_t v3 = cudaaffine_kernel_get_pixel_value(row1, col2, srcPtr, rows, cols); | |
| 96 | + uint8_t v4 = cudaaffine_kernel_get_pixel_value(row2, col2, srcPtr, rows, cols); | |
| 97 | + | |
| 98 | + return round(w1*v1 + w2*v2 + w3*v3 + w4*v4); | |
| 25 | 99 | } |
| 26 | 100 | |
| 27 | 101 | /* |
| ... | ... | @@ -31,9 +105,9 @@ namespace br { namespace cuda { |
| 31 | 105 | * src_row - The computed source pixel row (mapping from this row) |
| 32 | 106 | * src_col - The computed source pixel column (mapping from this col) |
| 33 | 107 | */ |
| 34 | - __device__ __forceinline__ void cudaaffine_kernel_get_src_coord(double *trans_inv, int dst_row, int dst_col, int* src_row, int* src_col){ | |
| 35 | - *src_col = round(dst_col * trans_inv[0] + dst_row * trans_inv[3] + trans_inv[6]); | |
| 36 | - *src_row = round(dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]); | |
| 108 | + __device__ __forceinline__ void cudaaffine_kernel_get_src_coord(double *trans_inv, int dst_row, int dst_col, double* src_row_pnt, double* src_col_pnt){ | |
| 109 | + *src_col_pnt = dst_col * trans_inv[0] + dst_row * trans_inv[3] + trans_inv[6]; | |
| 110 | + *src_row_pnt = dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]; | |
| 37 | 111 | |
| 38 | 112 | //printf("Dst: [%d, %d, 1] = [%d, %d, 1] \n[ %0.4f, %0.4f, %0.4f] \n[ %0.4f, %0.4f, %0.4f ]\n[ %0.4f, %0.4f, %0.4f ]\n\n", *src_col, *src_row, dst_col, dst_row, trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]); |
| 39 | 113 | |
| ... | ... | @@ -47,8 +121,8 @@ namespace br { namespace cuda { |
| 47 | 121 | |
| 48 | 122 | //printf("Kernel Inv:\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n\n", trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]); |
| 49 | 123 | |
| 50 | - int srcRowInd; | |
| 51 | - int srcColInd; | |
| 124 | + double srcRowPnt; | |
| 125 | + double srcColPnt; | |
| 52 | 126 | |
| 53 | 127 | // don't do anything if the index is out of bounds |
| 54 | 128 | if (dstRowInd < 1 || dstRowInd >= dst_rows-1 || dstColInd < 1 || dstColInd >= dst_cols-1) { |
| ... | ... | @@ -60,8 +134,10 @@ namespace br { namespace cuda { |
| 60 | 134 | } |
| 61 | 135 | } |
| 62 | 136 | |
| 63 | - cudaaffine_kernel_get_src_coord(trans_inv, dstRowInd, dstColInd, &srcRowInd, &srcColInd); | |
| 64 | - const uint8_t cval = cudaaffine_kernel_get_pixel_value(srcRowInd, srcColInd, srcPtr, src_rows, src_cols); // Get initial pixel value | |
| 137 | + cudaaffine_kernel_get_src_coord(trans_inv, dstRowInd, dstColInd, &srcRowPnt, &srcColPnt); | |
| 138 | + //const uint8_t cval = cudaaffine_kernel_get_distance_pixel_value(srcRowPnt, srcColPnt, srcPtr, src_rows, src_cols); // Get initial pixel value | |
| 139 | + const uint8_t cval = cudaaffine_kernel_get_bilinear_pixel_value(srcRowPnt, srcColPnt, srcPtr, src_rows, src_cols); // Get initial pixel value | |
| 140 | + //const uint8_t cval = cudaaffine_kernel_get_pixel_value(round(srcRowPnt), round(srcColPnt), srcPtr, src_rows, src_cols); // Get initial pixel value | |
| 65 | 141 | |
| 66 | 142 | dstPtr[dstIndex] = cval; |
| 67 | 143 | } | ... | ... |