From b99b23d0b534302818cb2d39f7a9cb6cc8f01e68 Mon Sep 17 00:00:00 2001 From: Colin Heinzmann Date: Wed, 20 Apr 2016 10:56:05 -0400 Subject: [PATCH] increased parallelization of CUDAAffine --- openbr/plugins/cuda/cudaaffine.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openbr/plugins/cuda/cudaaffine.cu b/openbr/plugins/cuda/cudaaffine.cu index 6306cb2..6fa707b 100644 --- a/openbr/plugins/cuda/cudaaffine.cu +++ b/openbr/plugins/cuda/cudaaffine.cu @@ -172,7 +172,7 @@ namespace br { namespace cuda { namespace affine { CUDA_SAFE_MALLOC(dstPtr, dstRows*dstCols*sizeof(uint8_t), &err); // call the bilinear kernel function - dim3 threadsPerBlock(8, 8); + dim3 threadsPerBlock(32, 16); dim3 numBlocks(dstCols/threadsPerBlock.x + 1, dstRows/threadsPerBlock.y + 1); @@ -186,7 +186,7 @@ namespace br { namespace cuda { namespace affine { cudaError_t err; double* gpuInverse; - dim3 threadsPerBlock(8, 8); + dim3 threadsPerBlock(32, 16); dim3 numBlocks(dst_cols/threadsPerBlock.x + 1, dst_rows/threadsPerBlock.y + 1); -- libgit2 0.21.4