Commit 3631db166b9992504463cf5f995b60e2d077589e
1 parent
e534ff9c
Working bilinear affine warp
Showing
1 changed file
with
85 additions
and
9 deletions
openbr/plugins/cuda/cudaaffine.cu
| @@ -14,14 +14,88 @@ using namespace cv::gpu; | @@ -14,14 +14,88 @@ using namespace cv::gpu; | ||
| 14 | namespace br { namespace cuda { | 14 | namespace br { namespace cuda { |
| 15 | 15 | ||
| 16 | __device__ __forceinline__ uint8_t cudaaffine_kernel_get_pixel_value(int row, int col, uint8_t* srcPtr, int rows, int cols) { | 16 | __device__ __forceinline__ uint8_t cudaaffine_kernel_get_pixel_value(int row, int col, uint8_t* srcPtr, int rows, int cols) { |
| 17 | + if (row < 0 || row > rows || col < 0 || col > cols) { | ||
| 18 | + if (row > rows || col > cols) { | ||
| 19 | + return 0; | ||
| 20 | + } else{ | ||
| 21 | + return 0; | ||
| 22 | + } | ||
| 23 | + } | ||
| 24 | + return (srcPtr + row*cols)[col]; | ||
| 25 | + } | ||
| 26 | + | ||
| 27 | + | ||
| 28 | + __device__ __forceinline__ uint8_t cudaaffine_kernel_get_bilinear_pixel_value(double row, double col, uint8_t* srcPtr, int rows, int cols) { | ||
| 29 | + // don't do anything if the index is out of bounds | ||
| 30 | + if (row < 0 || row > rows || col < 0 || col > cols) { | ||
| 31 | + if (row > rows || col > cols) { | ||
| 32 | + return 0; | ||
| 33 | + } else{ | ||
| 34 | + return 0; | ||
| 35 | + } | ||
| 36 | + } | ||
| 37 | + | ||
| 38 | + // http://www.sci.utah.edu/~acoste/uou/Image/project3/ArthurCOSTE_Project3.pdf | ||
| 39 | + // Bilinear Transformation | ||
| 40 | + // f(Px, Py) = f(Q11)×(1−Rx)×(1−Sy)+f(Q21)×(Rx)×(1−Sy)+f(Q12)×(1−Rx)×(Sy)+f(Q22)×(Rx)×(Sy) | ||
| 41 | + | ||
| 42 | + int row1 = floor(row); | ||
| 43 | + int row2 = row1+1; | ||
| 44 | + | ||
| 45 | + int col1 = floor(col); | ||
| 46 | + int col2 = col1+1; | ||
| 47 | + | ||
| 48 | + double d_row = row - row1; | ||
| 49 | + double d_col = col - col1; | ||
| 50 | + | ||
| 51 | + int Q11 = cudaaffine_kernel_get_pixel_value(row1, col1, srcPtr, rows, cols); | ||
| 52 | + int Q21 = cudaaffine_kernel_get_pixel_value(row2, col1, srcPtr, rows, cols); | ||
| 53 | + int Q12 = cudaaffine_kernel_get_pixel_value(row1, col2, srcPtr, rows, cols); | ||
| 54 | + int Q22 = cudaaffine_kernel_get_pixel_value(row2, col2, srcPtr, rows, cols); | ||
| 55 | + | ||
| 56 | + double val = Q22*(d_row*d_col) + Q12*((1-d_row)*d_col) + Q21*(d_row*(1-d_col)) + Q11*((1-d_row)*(1-d_col)); | ||
| 57 | + return ((uint8_t) round(val)); | ||
| 58 | + } | ||
| 59 | + | ||
| 60 | + __device__ __forceinline__ uint8_t cudaaffine_kernel_get_distance_pixel_value(double row, double col, uint8_t* srcPtr, int rows, int cols) { | ||
| 17 | // don't do anything if the index is out of bounds | 61 | // don't do anything if the index is out of bounds |
| 18 | if (row < 1 || row >= rows-1 || col < 1 || col >= cols-1) { | 62 | if (row < 1 || row >= rows-1 || col < 1 || col >= cols-1) { |
| 19 | if (row >= rows || col >= cols) { | 63 | if (row >= rows || col >= cols) { |
| 20 | return 0; | 64 | return 0; |
| 21 | } else{ | 65 | } else{ |
| 22 | - return 0; } | 66 | + return 0; |
| 67 | + } | ||
| 23 | } | 68 | } |
| 24 | - return (srcPtr + row*cols)[col]; | 69 | + |
| 70 | + int row1 = floor(row); | ||
| 71 | + int row2 = row1+1; | ||
| 72 | + | ||
| 73 | + int col1 = floor(col); | ||
| 74 | + int col2 = col1+1; | ||
| 75 | + | ||
| 76 | + double m1 = row2 - row; | ||
| 77 | + double m12 = m1*m1; | ||
| 78 | + | ||
| 79 | + double m2 = col - col1; | ||
| 80 | + double m22 = m2*m2; | ||
| 81 | + | ||
| 82 | + double d1 = sqrt(m12 - 2*m1 + 1 + m22); | ||
| 83 | + double d2 = sqrt(m12 + m22); | ||
| 84 | + double d3 = sqrt(m12 - 2*m1 + 1 + m22 - 2*m2 + 1); | ||
| 85 | + double d4 = sqrt(m12 + m22 - 2*m2 + 1); | ||
| 86 | + double sum = d1 + d2 + d3 + d4; | ||
| 87 | + | ||
| 88 | + double w1 = d1/sum; | ||
| 89 | + double w2 = d2/sum; | ||
| 90 | + double w3 = d3/sum; | ||
| 91 | + double w4 = d4/sum; | ||
| 92 | + | ||
| 93 | + uint8_t v1 = cudaaffine_kernel_get_pixel_value(row1, col1, srcPtr, rows, cols); | ||
| 94 | + uint8_t v2 = cudaaffine_kernel_get_pixel_value(row2, col1, srcPtr, rows, cols); | ||
| 95 | + uint8_t v3 = cudaaffine_kernel_get_pixel_value(row1, col2, srcPtr, rows, cols); | ||
| 96 | + uint8_t v4 = cudaaffine_kernel_get_pixel_value(row2, col2, srcPtr, rows, cols); | ||
| 97 | + | ||
| 98 | + return round(w1*v1 + w2*v2 + w3*v3 + w4*v4); | ||
| 25 | } | 99 | } |
| 26 | 100 | ||
| 27 | /* | 101 | /* |
| @@ -31,9 +105,9 @@ namespace br { namespace cuda { | @@ -31,9 +105,9 @@ namespace br { namespace cuda { | ||
| 31 | * src_row - The computed source pixel row (mapping from this row) | 105 | * src_row - The computed source pixel row (mapping from this row) |
| 32 | * src_col - The computed source pixel column (mapping from this col) | 106 | * src_col - The computed source pixel column (mapping from this col) |
| 33 | */ | 107 | */ |
| 34 | - __device__ __forceinline__ void cudaaffine_kernel_get_src_coord(double *trans_inv, int dst_row, int dst_col, int* src_row, int* src_col){ | ||
| 35 | - *src_col = round(dst_col * trans_inv[0] + dst_row * trans_inv[3] + trans_inv[6]); | ||
| 36 | - *src_row = round(dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]); | 108 | + __device__ __forceinline__ void cudaaffine_kernel_get_src_coord(double *trans_inv, int dst_row, int dst_col, double* src_row_pnt, double* src_col_pnt){ |
| 109 | + *src_col_pnt = dst_col * trans_inv[0] + dst_row * trans_inv[3] + trans_inv[6]; | ||
| 110 | + *src_row_pnt = dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]; | ||
| 37 | 111 | ||
| 38 | //printf("Dst: [%d, %d, 1] = [%d, %d, 1] \n[ %0.4f, %0.4f, %0.4f] \n[ %0.4f, %0.4f, %0.4f ]\n[ %0.4f, %0.4f, %0.4f ]\n\n", *src_col, *src_row, dst_col, dst_row, trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]); | 112 | //printf("Dst: [%d, %d, 1] = [%d, %d, 1] \n[ %0.4f, %0.4f, %0.4f] \n[ %0.4f, %0.4f, %0.4f ]\n[ %0.4f, %0.4f, %0.4f ]\n\n", *src_col, *src_row, dst_col, dst_row, trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]); |
| 39 | 113 | ||
| @@ -47,8 +121,8 @@ namespace br { namespace cuda { | @@ -47,8 +121,8 @@ namespace br { namespace cuda { | ||
| 47 | 121 | ||
| 48 | //printf("Kernel Inv:\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n\n", trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]); | 122 | //printf("Kernel Inv:\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n\n", trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]); |
| 49 | 123 | ||
| 50 | - int srcRowInd; | ||
| 51 | - int srcColInd; | 124 | + double srcRowPnt; |
| 125 | + double srcColPnt; | ||
| 52 | 126 | ||
| 53 | // don't do anything if the index is out of bounds | 127 | // don't do anything if the index is out of bounds |
| 54 | if (dstRowInd < 1 || dstRowInd >= dst_rows-1 || dstColInd < 1 || dstColInd >= dst_cols-1) { | 128 | if (dstRowInd < 1 || dstRowInd >= dst_rows-1 || dstColInd < 1 || dstColInd >= dst_cols-1) { |
| @@ -60,8 +134,10 @@ namespace br { namespace cuda { | @@ -60,8 +134,10 @@ namespace br { namespace cuda { | ||
| 60 | } | 134 | } |
| 61 | } | 135 | } |
| 62 | 136 | ||
| 63 | - cudaaffine_kernel_get_src_coord(trans_inv, dstRowInd, dstColInd, &srcRowInd, &srcColInd); | ||
| 64 | - const uint8_t cval = cudaaffine_kernel_get_pixel_value(srcRowInd, srcColInd, srcPtr, src_rows, src_cols); // Get initial pixel value | 137 | + cudaaffine_kernel_get_src_coord(trans_inv, dstRowInd, dstColInd, &srcRowPnt, &srcColPnt); |
| 138 | + //const uint8_t cval = cudaaffine_kernel_get_distance_pixel_value(srcRowPnt, srcColPnt, srcPtr, src_rows, src_cols); // Get initial pixel value | ||
| 139 | + const uint8_t cval = cudaaffine_kernel_get_bilinear_pixel_value(srcRowPnt, srcColPnt, srcPtr, src_rows, src_cols); // Get initial pixel value | ||
| 140 | + //const uint8_t cval = cudaaffine_kernel_get_pixel_value(round(srcRowPnt), round(srcColPnt), srcPtr, src_rows, src_cols); // Get initial pixel value | ||
| 65 | 141 | ||
| 66 | dstPtr[dstIndex] = cval; | 142 | dstPtr[dstIndex] = cval; |
| 67 | } | 143 | } |