Commit 15c8a584ab7bbe182ff3006a2bd37165b7a97cba

Authored by Colin Heinzmann
1 parent b8c2696b

fixed memory errors with affine

openbr/plugins/cuda/cudaaffine.cpp
@@ -95,6 +95,11 @@ namespace br @@ -95,6 +95,11 @@ namespace br
95 int cols = *((int*)srcDataPtr[2]); 95 int cols = *((int*)srcDataPtr[2]);
96 int type = *((int*)srcDataPtr[3]); 96 int type = *((int*)srcDataPtr[3]);
97 97
  98 + if (type != CV_8UC1) {
  99 + cout << "ERR: Invalid image format!" << endl;
  100 + return;
  101 + }
  102 +
98 Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type()); 103 Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type());
99 void** dstDataPtr = dstMat.ptr<void*>(); 104 void** dstDataPtr = dstMat.ptr<void*>();
100 105
@@ -119,6 +124,12 @@ namespace br @@ -119,6 +124,12 @@ namespace br
119 int cols = *((int*)srcDataPtr[2]); 124 int cols = *((int*)srcDataPtr[2]);
120 int type = *((int*)srcDataPtr[3]); 125 int type = *((int*)srcDataPtr[3]);
121 126
  127 + if (type != CV_8UC1) {
  128 + cout << "ERR: Invalid image format!" << endl;
  129 + return;
  130 + }
  131 +
  132 +
122 Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type()); 133 Mat dstMat = Mat(src.m().rows, src.m().cols, src.m().type());
123 void** dstDataPtr = dstMat.ptr<void*>(); 134 void** dstDataPtr = dstMat.ptr<void*>();
124 135
openbr/plugins/cuda/cudaaffine.cu
@@ -31,27 +31,11 @@ using namespace cv::gpu; @@ -31,27 +31,11 @@ using namespace cv::gpu;
31 namespace br { namespace cuda { namespace affine { 31 namespace br { namespace cuda { namespace affine {
32 32
33 __device__ __forceinline__ uint8_t getPixelValueDevice(int row, int col, uint8_t* srcPtr, int rows, int cols) { 33 __device__ __forceinline__ uint8_t getPixelValueDevice(int row, int col, uint8_t* srcPtr, int rows, int cols) {
34 - if (row < 0 || row > rows || col < 0 || col > cols) {  
35 - if (row > rows || col > cols) {  
36 - return 0;  
37 - } else{  
38 - return 0;  
39 - }  
40 - }  
41 return (srcPtr + row*cols)[col]; 34 return (srcPtr + row*cols)[col];
42 } 35 }
43 36
44 37
45 __device__ __forceinline__ uint8_t getBilinearPixelValueDevice(double row, double col, uint8_t* srcPtr, int rows, int cols) { 38 __device__ __forceinline__ uint8_t getBilinearPixelValueDevice(double row, double col, uint8_t* srcPtr, int rows, int cols) {
46 - // don't do anything if the index is out of bounds  
47 - if (row < 0 || row > rows || col < 0 || col > cols) {  
48 - if (row > rows || col > cols) {  
49 - return 0;  
50 - } else{  
51 - return 0;  
52 - }  
53 - }  
54 -  
55 // http://www.sci.utah.edu/~acoste/uou/Image/project3/ArthurCOSTE_Project3.pdf 39 // http://www.sci.utah.edu/~acoste/uou/Image/project3/ArthurCOSTE_Project3.pdf
56 // Bilinear Transformation 40 // Bilinear Transformation
57 // f(Px, Py) = f(Q11)×(1−Rx)×(1−Sy)+f(Q21)×(Rx)×(1−Sy)+f(Q12)×(1−Rx)×(Sy)+f(Q22)×(Rx)×(Sy) 41 // f(Px, Py) = f(Q11)×(1−Rx)×(1−Sy)+f(Q21)×(Rx)×(1−Sy)+f(Q12)×(1−Rx)×(Sy)+f(Q22)×(Rx)×(Sy)
@@ -75,15 +59,6 @@ namespace br { namespace cuda { namespace affine { @@ -75,15 +59,6 @@ namespace br { namespace cuda { namespace affine {
75 } 59 }
76 60
77 __device__ __forceinline__ uint8_t getDistancePixelValueDevice(double row, double col, uint8_t* srcPtr, int rows, int cols) { 61 __device__ __forceinline__ uint8_t getDistancePixelValueDevice(double row, double col, uint8_t* srcPtr, int rows, int cols) {
78 - // don't do anything if the index is out of bounds  
79 - if (row < 1 || row >= rows-1 || col < 1 || col >= cols-1) {  
80 - if (row >= rows || col >= cols) {  
81 - return 0;  
82 - } else{  
83 - return 0;  
84 - }  
85 - }  
86 -  
87 int row1 = floor(row); 62 int row1 = floor(row);
88 int row2 = row1+1; 63 int row2 = row1+1;
89 64
@@ -128,26 +103,42 @@ namespace br { namespace cuda { namespace affine { @@ -128,26 +103,42 @@ namespace br { namespace cuda { namespace affine {
128 } 103 }
129 104
130 __global__ void bilinearKernel(uint8_t* srcPtr, uint8_t* dstPtr, int srcRows, int srcCols, int dstRows, int dstCols) { 105 __global__ void bilinearKernel(uint8_t* srcPtr, uint8_t* dstPtr, int srcRows, int srcCols, int dstRows, int dstCols) {
131 - int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y;  
132 - int dstColInd = blockIdx.x*blockDim.x+threadIdx.x;  
133 - int dstIndex = dstRowInd*dstCols + dstColInd;  
134 -  
135 - // don't do anything if the index is out of bounds  
136 - if (dstRowInd < 1 || dstRowInd >= dstRows-1 || dstColInd < 1 || dstColInd >= dstCols-1) {  
137 - if (dstRowInd >= dstRows || dstColInd >= dstCols) {  
138 - return;  
139 - } else{  
140 - dstPtr[dstIndex] = 0;  
141 - return;  
142 - }  
143 - }  
144 -  
145 - double rowScaleFactor = (double)dstRows / (double)srcRows;  
146 - double colScaleFactor = (double)dstCols / (double)srcCols;  
147 -  
148 - uint8_t out = getBilinearPixelValueDevice(dstRowInd/rowScaleFactor, dstColInd/colScaleFactor, srcPtr, srcRows, srcCols);  
149 -  
150 - dstPtr[dstIndex] = out; 106 + int dstRowInd = blockIdx.y*blockDim.y+threadIdx.y;
  107 + int dstColInd = blockIdx.x*blockDim.x+threadIdx.x;
  108 + int dstIndex = dstRowInd*dstCols+dstColInd;
  109 +
  110 + // destination boundary checking
  111 + if (dstRowInd >= dstRows || dstColInd >= dstCols) {
  112 + return;
  113 + }
  114 +
  115 + // get the reference indices and relative amounts
  116 + float exactSrcRowInd = (float)dstRowInd / (float)dstRows * (float)srcRows;
  117 + int minSrcRowInd = (int)exactSrcRowInd;
  118 + int maxSrcRowInd = minSrcRowInd+1;
  119 + float relSrcRowInd = 1.-(exactSrcRowInd-(float)minSrcRowInd);
  120 +
  121 + // get the reference indices and relative amounts
  122 + double exactSrcColInd = (double)dstColInd / (double)dstCols * (double)srcCols;
  123 + int minSrcColInd = (int)exactSrcColInd;
  124 + int maxSrcColInd = minSrcColInd+1;
  125 + float relSrcColInd = 1.-(exactSrcColInd-(float)minSrcColInd);
  126 +
  127 + // perform boundary checking
  128 + if (minSrcRowInd < 0 || maxSrcRowInd >= srcRows || minSrcColInd < 0 || maxSrcColInd >= srcCols) {
  129 + dstPtr[dstIndex] = 0;
  130 + return;
  131 + }
  132 +
  133 + // get each of the pixel values
  134 + float topLeft = srcPtr[minSrcRowInd*srcCols+minSrcColInd];
  135 + float topRight = srcPtr[minSrcRowInd*srcCols+maxSrcColInd];
  136 + float bottomLeft = srcPtr[maxSrcRowInd*srcCols+minSrcColInd];
  137 + float bottomRight = srcPtr[maxSrcRowInd*srcCols+maxSrcColInd];
  138 +
  139 + float out = relSrcRowInd*relSrcColInd*topLeft + relSrcRowInd*(1.-relSrcColInd)*topRight + (1.-relSrcRowInd)*relSrcColInd*bottomLeft + (1.-relSrcRowInd)*(1.-relSrcColInd)*bottomRight;
  140 +
  141 + dstPtr[dstIndex] = (int)out;
151 } 142 }
152 143
153 __global__ void affineKernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){ 144 __global__ void affineKernel(uint8_t* srcPtr, uint8_t* dstPtr, double* trans_inv, int src_rows, int src_cols, int dst_rows, int dst_cols){
@@ -159,13 +150,12 @@ namespace br { namespace cuda { namespace affine { @@ -159,13 +150,12 @@ namespace br { namespace cuda { namespace affine {
159 double srcColPnt; 150 double srcColPnt;
160 151
161 // don't do anything if the index is out of bounds 152 // don't do anything if the index is out of bounds
162 - if (dstRowInd < 1 || dstRowInd >= dst_rows-1 || dstColInd < 1 || dstColInd >= dst_cols-1) {  
163 - if (dstRowInd >= dst_rows || dstColInd >= dst_cols) {  
164 - return;  
165 - } else{  
166 - dstPtr[dstIndex] = 0;  
167 - return;  
168 - } 153 + if (dstRowInd >= dst_rows || dstColInd >= dst_cols) {
  154 + return;
  155 + }
  156 + if (dstRowInd == 0 || dstRowInd == dst_rows-1 || dstColInd ==0 || dstColInd == dst_cols-1) {
  157 + dstPtr[dstIndex] = 0;
  158 + return;
169 } 159 }
170 160
171 getSrcCoordDevice(trans_inv, dstRowInd, dstColInd, &srcRowPnt, &srcColPnt); 161 getSrcCoordDevice(trans_inv, dstRowInd, dstColInd, &srcRowPnt, &srcColPnt);