Commit f633017d134aafeb46b1d866c8bc1abac42b1599

Authored by DepthDeluxe
1 parent edd8d3e4

added proper block filling to get the extra leftovers

openbr/plugins/cuda/passthrough.cu
... ... @@ -12,10 +12,14 @@ namespace br { namespace cuda {
12 12 int rowInd = blockIdx.y*blockDim.y+threadIdx.y;
13 13 int colInd = blockIdx.x*blockDim.x+threadIdx.x;
14 14  
  15 + // don't do anything if we are outside the allowable positions
  16 + if (rowInd >= rows || colInd >= cols)
  17 + return;
  18 +
15 19 uint8_t srcVal = (srcPtr + rowInd*srcStep)[colInd];
16 20 uint8_t* rowDstPtr = dstPtr + rowInd*dstStep;
17 21  
18   - rowDstPtr[colInd] = srcVal;
  22 + rowDstPtr[colInd] = srcVal * 2;
19 23 }
20 24  
21 25 void passthrough_wrapper(GpuMat& src, GpuMat& dst) {
... ... @@ -28,8 +32,8 @@ namespace br { namespace cuda {
28 32  
29 33 // make 8 * 8 = 64 square block
30 34 dim3 threadsPerBlock(8, 8);
31   - dim3 numBlocks(imageWidth / threadsPerBlock.x,
32   - imageHeight / threadsPerBlock.y);
  35 + dim3 numBlocks(imageWidth / threadsPerBlock.x + 1,
  36 + imageHeight / threadsPerBlock.y + 1);
33 37  
34 38 passthrough_kernel<<<numBlocks, threadsPerBlock>>>(srcPtr, dstPtr, src.step, dst.step, imageWidth, imageHeight);
35 39 }
... ...