Commit b99b23d0b534302818cb2d39f7a9cb6cc8f01e68
1 parent
155b284d
increased parallelization of CUDAAffine
Showing
1 changed file
with
2 additions
and
2 deletions
openbr/plugins/cuda/cudaaffine.cu
| ... | ... | @@ -172,7 +172,7 @@ namespace br { namespace cuda { namespace affine { |
| 172 | 172 | CUDA_SAFE_MALLOC(dstPtr, dstRows*dstCols*sizeof(uint8_t), &err); |
| 173 | 173 | |
| 174 | 174 | // call the bilinear kernel function |
| 175 | - dim3 threadsPerBlock(8, 8); | |
| 175 | + dim3 threadsPerBlock(32, 16); | |
| 176 | 176 | dim3 numBlocks(dstCols/threadsPerBlock.x + 1, |
| 177 | 177 | dstRows/threadsPerBlock.y + 1); |
| 178 | 178 | |
| ... | ... | @@ -186,7 +186,7 @@ namespace br { namespace cuda { namespace affine { |
| 186 | 186 | cudaError_t err; |
| 187 | 187 | double* gpuInverse; |
| 188 | 188 | |
| 189 | - dim3 threadsPerBlock(8, 8); | |
| 189 | + dim3 threadsPerBlock(32, 16); | |
| 190 | 190 | dim3 numBlocks(dst_cols/threadsPerBlock.x + 1, |
| 191 | 191 | dst_rows/threadsPerBlock.y + 1); |
| 192 | 192 | ... | ... |