Commit f633017d134aafeb46b1d866c8bc1abac42b1599
1 parent
edd8d3e4
added proper block filling to get the extra leftovers
Showing
1 changed file
with
7 additions
and
3 deletions
openbr/plugins/cuda/passthrough.cu
| @@ -12,10 +12,14 @@ namespace br { namespace cuda { | @@ -12,10 +12,14 @@ namespace br { namespace cuda { | ||
| 12 | int rowInd = blockIdx.y*blockDim.y+threadIdx.y; | 12 | int rowInd = blockIdx.y*blockDim.y+threadIdx.y; |
| 13 | int colInd = blockIdx.x*blockDim.x+threadIdx.x; | 13 | int colInd = blockIdx.x*blockDim.x+threadIdx.x; |
| 14 | 14 | ||
| 15 | + // don't do anything if we are outside the allowable positions | ||
| 16 | + if (rowInd >= rows || colInd >= cols) | ||
| 17 | + return; | ||
| 18 | + | ||
| 15 | uint8_t srcVal = (srcPtr + rowInd*srcStep)[colInd]; | 19 | uint8_t srcVal = (srcPtr + rowInd*srcStep)[colInd]; |
| 16 | uint8_t* rowDstPtr = dstPtr + rowInd*dstStep; | 20 | uint8_t* rowDstPtr = dstPtr + rowInd*dstStep; |
| 17 | 21 | ||
| 18 | - rowDstPtr[colInd] = srcVal; | 22 | + rowDstPtr[colInd] = srcVal * 2; |
| 19 | } | 23 | } |
| 20 | 24 | ||
| 21 | void passthrough_wrapper(GpuMat& src, GpuMat& dst) { | 25 | void passthrough_wrapper(GpuMat& src, GpuMat& dst) { |
| @@ -28,8 +32,8 @@ namespace br { namespace cuda { | @@ -28,8 +32,8 @@ namespace br { namespace cuda { | ||
| 28 | 32 | ||
| 29 | // make 8 * 8 = 64 square block | 33 | // make 8 * 8 = 64 square block |
| 30 | dim3 threadsPerBlock(8, 8); | 34 | dim3 threadsPerBlock(8, 8); |
| 31 | - dim3 numBlocks(imageWidth / threadsPerBlock.x, | ||
| 32 | - imageHeight / threadsPerBlock.y); | 35 | + dim3 numBlocks(imageWidth / threadsPerBlock.x + 1, |
| 36 | + imageHeight / threadsPerBlock.y + 1); | ||
| 33 | 37 | ||
| 34 | passthrough_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageWidth, imageHeight); | 38 | passthrough_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageWidth, imageHeight); |
| 35 | } | 39 | } |