Commit 3631db166b9992504463cf5f995b60e2d077589e

Authored by Greg Schrock
1 parent e534ff9c

Working bilinear affine warp

openbr/plugins/cuda/cudaaffine.cu
... ... @@ -14,14 +14,88 @@ using namespace cv::gpu;
14 14 namespace br { namespace cuda {
15 15  
16 16 __device__ __forceinline__ uint8_t cudaaffine_kernel_get_pixel_value(int row, int col, uint8_t* srcPtr, int rows, int cols) {
  17 + if (row < 0 || row > rows || col < 0 || col > cols) {
  18 + if (row > rows || col > cols) {
  19 + return 0;
  20 + } else{
  21 + return 0;
  22 + }
  23 + }
  24 + return (srcPtr + row*cols)[col];
  25 + }
  26 +
  27 +
  28 + __device__ __forceinline__ uint8_t cudaaffine_kernel_get_bilinear_pixel_value(double row, double col, uint8_t* srcPtr, int rows, int cols) {
  29 + // don't do anything if the index is out of bounds
  30 + if (row < 0 || row > rows || col < 0 || col > cols) {
  31 + if (row > rows || col > cols) {
  32 + return 0;
  33 + } else{
  34 + return 0;
  35 + }
  36 + }
  37 +
  38 + // http://www.sci.utah.edu/~acoste/uou/Image/project3/ArthurCOSTE_Project3.pdf
  39 + // Bilinear Transformation
  40 + // f(Px, Py) = f(Q11)×(1−Rx)×(1−Sy)+f(Q21)×(Rx)×(1−Sy)+f(Q12)×(1−Rx)×(Sy)+f(Q22)×(Rx)×(Sy)
  41 +
  42 + int row1 = floor(row);
  43 + int row2 = row1+1;
  44 +
  45 + int col1 = floor(col);
  46 + int col2 = col1+1;
  47 +
  48 + double d_row = row - row1;
  49 + double d_col = col - col1;
  50 +
  51 + int Q11 = cudaaffine_kernel_get_pixel_value(row1, col1, srcPtr, rows, cols);
  52 + int Q21 = cudaaffine_kernel_get_pixel_value(row2, col1, srcPtr, rows, cols);
  53 + int Q12 = cudaaffine_kernel_get_pixel_value(row1, col2, srcPtr, rows, cols);
  54 + int Q22 = cudaaffine_kernel_get_pixel_value(row2, col2, srcPtr, rows, cols);
  55 +
  56 + double val = Q22*(d_row*d_col) + Q12*((1-d_row)*d_col) + Q21*(d_row*(1-d_col)) + Q11*((1-d_row)*(1-d_col));
  57 + return ((uint8_t) round(val));
  58 + }
  59 +
  60 + __device__ __forceinline__ uint8_t cudaaffine_kernel_get_distance_pixel_value(double row, double col, uint8_t* srcPtr, int rows, int cols) {
17 61 // don't do anything if the index is out of bounds
18 62 if (row < 1 || row >= rows-1 || col < 1 || col >= cols-1) {
19 63 if (row >= rows || col >= cols) {
20 64 return 0;
21 65 } else{
22   - return 0; }
  66 + return 0;
  67 + }
23 68 }
24   - return (srcPtr + row*cols)[col];
  69 +
  70 + int row1 = floor(row);
  71 + int row2 = row1+1;
  72 +
  73 + int col1 = floor(col);
  74 + int col2 = col1+1;
  75 +
  76 + double m1 = row2 - row;
  77 + double m12 = m1*m1;
  78 +
  79 + double m2 = col - col1;
  80 + double m22 = m2*m2;
  81 +
  82 + double d1 = sqrt(m12 - 2*m1 + 1 + m22);
  83 + double d2 = sqrt(m12 + m22);
  84 + double d3 = sqrt(m12 - 2*m1 + 1 + m22 - 2*m2 + 1);
  85 + double d4 = sqrt(m12 + m22 - 2*m2 + 1);
  86 + double sum = d1 + d2 + d3 + d4;
  87 +
  88 + double w1 = d1/sum;
  89 + double w2 = d2/sum;
  90 + double w3 = d3/sum;
  91 + double w4 = d4/sum;
  92 +
  93 + uint8_t v1 = cudaaffine_kernel_get_pixel_value(row1, col1, srcPtr, rows, cols);
  94 + uint8_t v2 = cudaaffine_kernel_get_pixel_value(row2, col1, srcPtr, rows, cols);
  95 + uint8_t v3 = cudaaffine_kernel_get_pixel_value(row1, col2, srcPtr, rows, cols);
  96 + uint8_t v4 = cudaaffine_kernel_get_pixel_value(row2, col2, srcPtr, rows, cols);
  97 +
  98 + return round(w1*v1 + w2*v2 + w3*v3 + w4*v4);
25 99 }
26 100  
27 101 /*
... ... @@ -31,9 +105,9 @@ namespace br { namespace cuda {
31 105 * src_row - The computed source pixel row (mapping from this row)
32 106 * src_col - The computed source pixel column (mapping from this col)
33 107 */
34   - __device__ __forceinline__ void cudaaffine_kernel_get_src_coord(double *trans_inv, int dst_row, int dst_col, int* src_row, int* src_col){
35   - *src_col = round(dst_col * trans_inv[0] + dst_row * trans_inv[3] + trans_inv[6]);
36   - *src_row = round(dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7]);
  108 + __device__ __forceinline__ void cudaaffine_kernel_get_src_coord(double *trans_inv, int dst_row, int dst_col, double* src_row_pnt, double* src_col_pnt){
  109 + *src_col_pnt = dst_col * trans_inv[0] + dst_row * trans_inv[3] + trans_inv[6];
  110 + *src_row_pnt = dst_col * trans_inv[1] + dst_row * trans_inv[4] + trans_inv[7];
37 111  
38 112 //printf("Dst: [%d, %d, 1] = [%d, %d, 1] \n[ %0.4f, %0.4f, %0.4f] \n[ %0.4f, %0.4f, %0.4f ]\n[ %0.4f, %0.4f, %0.4f ]\n\n", *src_col, *src_row, dst_col, dst_row, trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]);
39 113  
... ... @@ -47,8 +121,8 @@ namespace br { namespace cuda {
47 121  
48 122 //printf("Kernel Inv:\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n[%0.4f %0.4f %0.4f]\n\n", trans_inv[0], trans_inv[1], trans_inv[2], trans_inv[3], trans_inv[4], trans_inv[5], trans_inv[6], trans_inv[7], trans_inv[8]);
49 123  
50   - int srcRowInd;
51   - int srcColInd;
  124 + double srcRowPnt;
  125 + double srcColPnt;
52 126  
53 127 // don't do anything if the index is out of bounds
54 128 if (dstRowInd < 1 || dstRowInd >= dst_rows-1 || dstColInd < 1 || dstColInd >= dst_cols-1) {
... ... @@ -60,8 +134,10 @@ namespace br { namespace cuda {
60 134 }
61 135 }
62 136  
63   - cudaaffine_kernel_get_src_coord(trans_inv, dstRowInd, dstColInd, &srcRowInd, &srcColInd);
64   - const uint8_t cval = cudaaffine_kernel_get_pixel_value(srcRowInd, srcColInd, srcPtr, src_rows, src_cols); // Get initial pixel value
  137 + cudaaffine_kernel_get_src_coord(trans_inv, dstRowInd, dstColInd, &srcRowPnt, &srcColPnt);
  138 + //const uint8_t cval = cudaaffine_kernel_get_distance_pixel_value(srcRowPnt, srcColPnt, srcPtr, src_rows, src_cols); // Get initial pixel value
  139 + const uint8_t cval = cudaaffine_kernel_get_bilinear_pixel_value(srcRowPnt, srcColPnt, srcPtr, src_rows, src_cols); // Get initial pixel value
  140 + //const uint8_t cval = cudaaffine_kernel_get_pixel_value(round(srcRowPnt), round(srcColPnt), srcPtr, src_rows, src_cols); // Get initial pixel value
65 141  
66 142 dstPtr[dstIndex] = cval;
67 143 }
... ...