Commit 15c8a584ab7bbe182ff3006a2bd37165b7a97cba
1 parent
b8c2696b
fixed memory errors with affine
Showing
2 changed files
with
53 additions
and
52 deletions
openbr/plugins/cuda/cudaaffine.cpp
| ... | ... | @@ -95,6 +95,11 @@ namespace br |
| 95 | 95 | int cols = *((int*)srcDataPtr[2]); |
| 96 | 96 | int type = *((int*)srcDataPtr[3]); |
| 97 | 97 | |
| 98 | + if (type != CV_8UC1) { | |
| 99 | + cout << "ERR: Invalid image format!" << endl; | |
| 100 | + return; | |
| 101 | + } | |
| 102 | + | |
| 98 | 103 | Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type()); |
| 99 | 104 | void** dstDataPtr = dstMat.ptr<void*>(); |
| 100 | 105 | |
| ... | ... | @@ -119,6 +124,12 @@ namespace br |
| 119 | 124 | int cols = *((int*)srcDataPtr[2]); |
| 120 | 125 | int type = *((int*)srcDataPtr[3]); |
| 121 | 126 | |
| 127 | + if (type != CV_8UC1) { | |
| 128 | + cout << "ERR: Invalid image format!" << endl; | |
| 129 | + return; | |
| 130 | + } | |
| 131 | + | |
| 132 | + | |
| 122 | 133 | Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type()); |
| 123 | 134 | void** dstDataPtr = dstMat.ptr<void*>(); |
| 124 | 135 | ... | ... |
openbr/plugins/cuda/cudaaffine.cu
| ... | ... | @@ -31,27 +31,11 @@ using namespace cv::gpu; |
| 31 | 31 | namespace br { namespace cuda { namespace affine { |
| 32 | 32 | |
| 33 | 33 | __device__ __forceinline__ uint8_t getPixelValueDevice(int row, int col, uint8_t* srcPtr, int rows, int cols) { |
| 34 | - if (row < 0 || row > rows || col < 0 || col > cols) { | |
| 35 | - if (row > rows || col > cols) { | |
| 36 | - return 0; | |
| 37 | - } else{ | |
| 38 | - return 0; | |
| 39 | - } | |
| 40 | - } | |
| 41 | 34 | return (srcPtr + row*cols)[col]; |
| 42 | 35 | } |
| 43 | 36 | |
| 44 | 37 | |
| 45 | 38 | __device__ __forceinline__ uint8_t getBilinearPixelValueDevice(double row, double col, uint8_t* srcPtr, int rows, int cols) { |
| 46 | - // don't do anything if the index is out of bounds | |
| 47 | - if (row < 0 || row > rows || col < 0 || col > cols) { | |
| 48 | - if (row > rows || col > cols) { | |
| 49 | - return 0; | |
| 50 | - } else{ | |
| 51 | - return 0; | |
| 52 | - } | |
| 53 | - } | |
| 54 | - | |
| 55 | 39 | // http://www.sci.utah.edu/~acoste/uou/Image/project3/ArthurCOSTE_Project3.pdf |
| 56 | 40 | // Bilinear Transformation |
| 57 | 41 | // f(Px, Py) = f(Q11)×(1−Rx)×(1−Sy)+f(Q21)×(Rx)×(1−Sy)+f(Q12)×(1−Rx)×(Sy)+f(Q22)×(Rx)×(Sy) |
| ... | ... | @@ -75,15 +59,6 @@ namespace br { namespace cuda { namespace affine { |
| 75 | 59 | } |
| 76 | 60 | |
| 77 | 61 | __device__ __forceinline__ uint8_t getDistancePixelValueDevice(double row, double col, uint8_t* srcPtr, int rows, int cols) { |
| 78 | - // don't do anything if the index is out of bounds | |
| 79 | - if (row < 1 || row >= rows-1 || col < 1 || col >= cols-1) { | |
| 80 | - if (row >= rows || col >= cols) { | |
| 81 | - return 0; | |
| 82 | - } else{ | |
| 83 | - return 0; | |
| 84 | - } | |
| 85 | - } | |
| 86 | - | |
| 87 | 62 | int row1 = floor(row); |
| 88 | 63 | int row2 = row1+1; |
| 89 | 64 | |
| ... | ... | @@ -128,26 +103,42 @@ namespace br { namespace cuda { namespace affine { |
| 128 | 103 | } |
| 129 | 104 | |
| 130 | 105 | __global__ void bilinearKernel(uint8_t* srcPtr, uint8_t* dstPtr, int srcRows, int srcCols, int dstRows, int dstCols) { |
| 131 | - int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y; | |
| 132 | - int dstColInd = blockIdx.x*blockDim.x+threadIdx.x; | |
| 133 | - int dstIndex = dstRowInd*dstCols + dstColInd; | |
| 134 | - | |
| 135 | - // don't do anything if the index is out of bounds | |
| 136 | - if (dstRowInd < 1 || dstRowInd >= dstRows-1 || dstColInd < 1 || dstColInd >= dstCols-1) { | |
| 137 | - if (dstRowInd >= dstRows || dstColInd >= dstCols) { | |
| 138 | - return; | |
| 139 | - } else{ | |
| 140 | - dstPtr[dstIndex] = 0; | |
| 141 | - return; | |
| 142 | - } | |
| 143 | - } | |
| 144 | - | |
| 145 | - double rowScaleFactor = (double)dstRows / (double)srcRows; | |
| 146 | - double colScaleFactor = (double)dstCols / (double)srcCols; | |
| 147 | - | |
| 148 | - uint8_t out = getBilinearPixelValueDevice(dstRowInd/rowScaleFactor, dstColInd/colScaleFactor, srcPtr, srcRows, srcCols); | |
| 149 | - | |
| 150 | - dstPtr[dstIndex] = out; | |
| 106 | + int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y; | |
| 107 | + int dstColInd = blockIdx.x*blockDim.x+threadIdx.x; | |
| 108 | + int dstIndex = dstRowInd*dstCols+dstColInd; | |
| 109 | + | |
| 110 | + // destination boundary checking | |
| 111 | + if (dstRowInd >= dstRows || dstColInd >= dstCols) { | |
| 112 | + return; | |
| 113 | + } | |
| 114 | + | |
| 115 | + // get the reference indices and relative amounts | |
| 116 | + float exactSrcRowInd = (float)dstRowInd / (float)dstRows * (float)srcRows; | |
| 117 | + int minSrcRowInd = (int)exactSrcRowInd; | |
| 118 | + int maxSrcRowInd = minSrcRowInd+1; | |
| 119 | + float relSrcRowInd = 1.-(exactSrcRowInd-(float)minSrcRowInd); | |
| 120 | + | |
| 121 | + // get the reference indices and relative amounts | |
| 122 | + double exactSrcColInd = (double)dstColInd / (double)dstCols * (double)srcCols; | |
| 123 | + int minSrcColInd = (int)exactSrcColInd; | |
| 124 | + int maxSrcColInd = minSrcColInd+1; | |
| 125 | + float relSrcColInd = 1.-(exactSrcColInd-(float)minSrcColInd); | |
| 126 | + | |
| 127 | + // perform boundary checking | |
| 128 | + if (minSrcRowInd < 0 || maxSrcRowInd >= srcRows || minSrcColInd < 0 || maxSrcColInd >= srcCols) { | |
| 129 | + dstPtr[dstIndex] = 0; | |
| 130 | + return; | |
| 131 | + } | |
| 132 | + | |
| 133 | + // get each of the pixel values | |
| 134 | + float topLeft = srcPtr[minSrcRowInd*srcCols+minSrcColInd]; | |
| 135 | + float topRight = srcPtr[minSrcRowInd*srcCols+maxSrcColInd]; | |
| 136 | + float bottomLeft = srcPtr[maxSrcRowInd*srcCols+minSrcColInd]; | |
| 137 | + float bottomRight = srcPtr[maxSrcRowInd*srcCols+maxSrcColInd]; | |
| 138 | + | |
| 139 | + float out = relSrcRowInd*relSrcColInd*topLeft + relSrcRowInd*(1.-relSrcColInd)*topRight + (1.-relSrcRowInd)*relSrcColInd*bottomLeft + (1.-relSrcRowInd)*(1.-relSrcColInd)*bottomRight; | |
| 140 | + | |
| 141 | + dstPtr[dstIndex] = (int)out; | |
| 151 | 142 | } |
| 152 | 143 | |
| 153 | 144 | __global__ void affineKernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){ |
| ... | ... | @@ -159,13 +150,12 @@ namespace br { namespace cuda { namespace affine { |
| 159 | 150 | double srcColPnt; |
| 160 | 151 | |
| 161 | 152 | // don't do anything if the index is out of bounds |
| 162 | - if (dstRowInd < 1 || dstRowInd >= dst_rows-1 || dstColInd < 1 || dstColInd >= dst_cols-1) { | |
| 163 | - if (dstRowInd >= dst_rows || dstColInd >= dst_cols) { | |
| 164 | - return; | |
| 165 | - } else{ | |
| 166 | - dstPtr[dstIndex] = 0; | |
| 167 | - return; | |
| 168 | - } | |
| 153 | + if (dstRowInd >= dst_rows || dstColInd >= dst_cols) { | |
| 154 | + return; | |
| 155 | + } | |
| 156 | + if (dstRowInd == 0 || dstRowInd == dst_rows-1 || dstColInd ==0 || dstColInd == dst_cols-1) { | |
| 157 | + dstPtr[dstIndex] = 0; | |
| 158 | + return; | |
| 169 | 159 | } |
| 170 | 160 | |
| 171 | 161 | getSrcCoordDevice(trans_inv, dstRowInd, dstColInd, &srcRowPnt, &srcColPnt); | ... | ... |