Commit b99b23d0b534302818cb2d39f7a9cb6cc8f01e68

Authored by Colin Heinzmann
1 parent 155b284d

increased parallelization of CUDAAffine

openbr/plugins/cuda/cudaaffine.cu
... ... @@ -172,7 +172,7 @@ namespace br { namespace cuda { namespace affine {
172 172 CUDA_SAFE_MALLOC(dstPtr, dstRows*dstCols*sizeof(uint8_t), &err);
173 173  
174 174 // call the bilinear kernel function
175   - dim3 threadsPerBlock(8, 8);
  175 + dim3 threadsPerBlock(32, 16);
176 176 dim3 numBlocks(dstCols/threadsPerBlock.x + 1,
177 177 dstRows/threadsPerBlock.y + 1);
178 178  
... ... @@ -186,7 +186,7 @@ namespace br { namespace cuda { namespace affine {
186 186 cudaError_t err;
187 187 double* gpuInverse;
188 188  
189   - dim3 threadsPerBlock(8, 8);
  189 + dim3 threadsPerBlock(32, 16);
190 190 dim3 numBlocks(dst_cols/threadsPerBlock.x + 1,
191 191 dst_rows/threadsPerBlock.y + 1);
192 192  
... ...