Commit 15c8a584ab7bbe182ff3006a2bd37165b7a97cba
1 parent
b8c2696b
fixed memory errors with affine
Showing
2 changed files
with
53 additions
and
52 deletions
openbr/plugins/cuda/cudaaffine.cpp
| @@ -95,6 +95,11 @@ namespace br | @@ -95,6 +95,11 @@ namespace br | ||
| 95 | int cols = *((int*)srcDataPtr[2]); | 95 | int cols = *((int*)srcDataPtr[2]); |
| 96 | int type = *((int*)srcDataPtr[3]); | 96 | int type = *((int*)srcDataPtr[3]); |
| 97 | 97 | ||
| 98 | + if (type != CV_8UC1) { | ||
| 99 | + cout << "ERR: Invalid image format!" << endl; | ||
| 100 | + return; | ||
| 101 | + } | ||
| 102 | + | ||
| 98 | Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type()); | 103 | Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type()); |
| 99 | void** dstDataPtr = dstMat.ptr<void*>(); | 104 | void** dstDataPtr = dstMat.ptr<void*>(); |
| 100 | 105 | ||
| @@ -119,6 +124,12 @@ namespace br | @@ -119,6 +124,12 @@ namespace br | ||
| 119 | int cols = *((int*)srcDataPtr[2]); | 124 | int cols = *((int*)srcDataPtr[2]); |
| 120 | int type = *((int*)srcDataPtr[3]); | 125 | int type = *((int*)srcDataPtr[3]); |
| 121 | 126 | ||
| 127 | + if (type != CV_8UC1) { | ||
| 128 | + cout << "ERR: Invalid image format!" << endl; | ||
| 129 | + return; | ||
| 130 | + } | ||
| 131 | + | ||
| 132 | + | ||
| 122 | Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type()); | 133 | Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type()); |
| 123 | void** dstDataPtr = dstMat.ptr<void*>(); | 134 | void** dstDataPtr = dstMat.ptr<void*>(); |
| 124 | 135 |
openbr/plugins/cuda/cudaaffine.cu
| @@ -31,27 +31,11 @@ using namespace cv::gpu; | @@ -31,27 +31,11 @@ using namespace cv::gpu; | ||
| 31 | namespace br { namespace cuda { namespace affine { | 31 | namespace br { namespace cuda { namespace affine { |
| 32 | 32 | ||
| 33 | __device__ __forceinline__ uint8_t getPixelValueDevice(int row, int col, uint8_t* srcPtr, int rows, int cols) { | 33 | __device__ __forceinline__ uint8_t getPixelValueDevice(int row, int col, uint8_t* srcPtr, int rows, int cols) { |
| 34 | - if (row < 0 || row > rows || col < 0 || col > cols) { | ||
| 35 | - if (row > rows || col > cols) { | ||
| 36 | - return 0; | ||
| 37 | - } else{ | ||
| 38 | - return 0; | ||
| 39 | - } | ||
| 40 | - } | ||
| 41 | return (srcPtr + row*cols)[col]; | 34 | return (srcPtr + row*cols)[col]; |
| 42 | } | 35 | } |
| 43 | 36 | ||
| 44 | 37 | ||
| 45 | __device__ __forceinline__ uint8_t getBilinearPixelValueDevice(double row, double col, uint8_t* srcPtr, int rows, int cols) { | 38 | __device__ __forceinline__ uint8_t getBilinearPixelValueDevice(double row, double col, uint8_t* srcPtr, int rows, int cols) { |
| 46 | - // don't do anything if the index is out of bounds | ||
| 47 | - if (row < 0 || row > rows || col < 0 || col > cols) { | ||
| 48 | - if (row > rows || col > cols) { | ||
| 49 | - return 0; | ||
| 50 | - } else{ | ||
| 51 | - return 0; | ||
| 52 | - } | ||
| 53 | - } | ||
| 54 | - | ||
| 55 | // http://www.sci.utah.edu/~acoste/uou/Image/project3/ArthurCOSTE_Project3.pdf | 39 | // http://www.sci.utah.edu/~acoste/uou/Image/project3/ArthurCOSTE_Project3.pdf |
| 56 | // Bilinear Transformation | 40 | // Bilinear Transformation |
| 57 | // f(Px, Py) = f(Q11)×(1−Rx)×(1−Sy)+f(Q21)×(Rx)×(1−Sy)+f(Q12)×(1−Rx)×(Sy)+f(Q22)×(Rx)×(Sy) | 41 | // f(Px, Py) = f(Q11)×(1−Rx)×(1−Sy)+f(Q21)×(Rx)×(1−Sy)+f(Q12)×(1−Rx)×(Sy)+f(Q22)×(Rx)×(Sy) |
| @@ -75,15 +59,6 @@ namespace br { namespace cuda { namespace affine { | @@ -75,15 +59,6 @@ namespace br { namespace cuda { namespace affine { | ||
| 75 | } | 59 | } |
| 76 | 60 | ||
| 77 | __device__ __forceinline__ uint8_t getDistancePixelValueDevice(double row, double col, uint8_t* srcPtr, int rows, int cols) { | 61 | __device__ __forceinline__ uint8_t getDistancePixelValueDevice(double row, double col, uint8_t* srcPtr, int rows, int cols) { |
| 78 | - // don't do anything if the index is out of bounds | ||
| 79 | - if (row < 1 || row >= rows-1 || col < 1 || col >= cols-1) { | ||
| 80 | - if (row >= rows || col >= cols) { | ||
| 81 | - return 0; | ||
| 82 | - } else{ | ||
| 83 | - return 0; | ||
| 84 | - } | ||
| 85 | - } | ||
| 86 | - | ||
| 87 | int row1 = floor(row); | 62 | int row1 = floor(row); |
| 88 | int row2 = row1+1; | 63 | int row2 = row1+1; |
| 89 | 64 | ||
| @@ -128,26 +103,42 @@ namespace br { namespace cuda { namespace affine { | @@ -128,26 +103,42 @@ namespace br { namespace cuda { namespace affine { | ||
| 128 | } | 103 | } |
| 129 | 104 | ||
| 130 | __global__ void bilinearKernel(uint8_t* srcPtr, uint8_t* dstPtr, int srcRows, int srcCols, int dstRows, int dstCols) { | 105 | __global__ void bilinearKernel(uint8_t* srcPtr, uint8_t* dstPtr, int srcRows, int srcCols, int dstRows, int dstCols) { |
| 131 | - int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y; | ||
| 132 | - int dstColInd = blockIdx.x*blockDim.x+threadIdx.x; | ||
| 133 | - int dstIndex = dstRowInd*dstCols + dstColInd; | ||
| 134 | - | ||
| 135 | - // don't do anything if the index is out of bounds | ||
| 136 | - if (dstRowInd < 1 || dstRowInd >= dstRows-1 || dstColInd < 1 || dstColInd >= dstCols-1) { | ||
| 137 | - if (dstRowInd >= dstRows || dstColInd >= dstCols) { | ||
| 138 | - return; | ||
| 139 | - } else{ | ||
| 140 | - dstPtr[dstIndex] = 0; | ||
| 141 | - return; | ||
| 142 | - } | ||
| 143 | - } | ||
| 144 | - | ||
| 145 | - double rowScaleFactor = (double)dstRows / (double)srcRows; | ||
| 146 | - double colScaleFactor = (double)dstCols / (double)srcCols; | ||
| 147 | - | ||
| 148 | - uint8_t out = getBilinearPixelValueDevice(dstRowInd/rowScaleFactor, dstColInd/colScaleFactor, srcPtr, srcRows, srcCols); | ||
| 149 | - | ||
| 150 | - dstPtr[dstIndex] = out; | 106 | + int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y; |
| 107 | + int dstColInd = blockIdx.x*blockDim.x+threadIdx.x; | ||
| 108 | + int dstIndex = dstRowInd*dstCols+dstColInd; | ||
| 109 | + | ||
| 110 | + // destination boundary checking | ||
| 111 | + if (dstRowInd >= dstRows || dstColInd >= dstCols) { | ||
| 112 | + return; | ||
| 113 | + } | ||
| 114 | + | ||
| 115 | + // get the reference indices and relative amounts | ||
| 116 | + float exactSrcRowInd = (float)dstRowInd / (float)dstRows * (float)srcRows; | ||
| 117 | + int minSrcRowInd = (int)exactSrcRowInd; | ||
| 118 | + int maxSrcRowInd = minSrcRowInd+1; | ||
| 119 | + float relSrcRowInd = 1.-(exactSrcRowInd-(float)minSrcRowInd); | ||
| 120 | + | ||
| 121 | + // get the reference indices and relative amounts | ||
| 122 | + double exactSrcColInd = (double)dstColInd / (double)dstCols * (double)srcCols; | ||
| 123 | + int minSrcColInd = (int)exactSrcColInd; | ||
| 124 | + int maxSrcColInd = minSrcColInd+1; | ||
| 125 | + float relSrcColInd = 1.-(exactSrcColInd-(float)minSrcColInd); | ||
| 126 | + | ||
| 127 | + // perform boundary checking | ||
| 128 | + if (minSrcRowInd < 0 || maxSrcRowInd >= srcRows || minSrcColInd < 0 || maxSrcColInd >= srcCols) { | ||
| 129 | + dstPtr[dstIndex] = 0; | ||
| 130 | + return; | ||
| 131 | + } | ||
| 132 | + | ||
| 133 | + // get each of the pixel values | ||
| 134 | + float topLeft = srcPtr[minSrcRowInd*srcCols+minSrcColInd]; | ||
| 135 | + float topRight = srcPtr[minSrcRowInd*srcCols+maxSrcColInd]; | ||
| 136 | + float bottomLeft = srcPtr[maxSrcRowInd*srcCols+minSrcColInd]; | ||
| 137 | + float bottomRight = srcPtr[maxSrcRowInd*srcCols+maxSrcColInd]; | ||
| 138 | + | ||
| 139 | + float out = relSrcRowInd*relSrcColInd*topLeft + relSrcRowInd*(1.-relSrcColInd)*topRight + (1.-relSrcRowInd)*relSrcColInd*bottomLeft + (1.-relSrcRowInd)*(1.-relSrcColInd)*bottomRight; | ||
| 140 | + | ||
| 141 | + dstPtr[dstIndex] = (int)out; | ||
| 151 | } | 142 | } |
| 152 | 143 | ||
| 153 | __global__ void affineKernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){ | 144 | __global__ void affineKernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){ |
| @@ -159,13 +150,12 @@ namespace br { namespace cuda { namespace affine { | @@ -159,13 +150,12 @@ namespace br { namespace cuda { namespace affine { | ||
| 159 | double srcColPnt; | 150 | double srcColPnt; |
| 160 | 151 | ||
| 161 | // don't do anything if the index is out of bounds | 152 | // don't do anything if the index is out of bounds |
| 162 | - if (dstRowInd < 1 || dstRowInd >= dst_rows-1 || dstColInd < 1 || dstColInd >= dst_cols-1) { | ||
| 163 | - if (dstRowInd >= dst_rows || dstColInd >= dst_cols) { | ||
| 164 | - return; | ||
| 165 | - } else{ | ||
| 166 | - dstPtr[dstIndex] = 0; | ||
| 167 | - return; | ||
| 168 | - } | 153 | + if (dstRowInd >= dst_rows || dstColInd >= dst_cols) { |
| 154 | + return; | ||
| 155 | + } | ||
| 156 | + if (dstRowInd == 0 || dstRowInd == dst_rows-1 || dstColInd ==0 || dstColInd == dst_cols-1) { | ||
| 157 | + dstPtr[dstIndex] = 0; | ||
| 158 | + return; | ||
| 169 | } | 159 | } |
| 170 | 160 | ||
| 171 | getSrcCoordDevice(trans_inv, dstRowInd, dstColInd, &srcRowPnt, &srcColPnt); | 161 | getSrcCoordDevice(trans_inv, dstRowInd, dstColInd, &srcRowPnt, &srcColPnt); |