From 93a9cf84e9ba742af7fe4d579706cbd53b0197fc Mon Sep 17 00:00:00 2001 From: boolli Date: Mon, 29 Feb 2016 15:35:12 -0500 Subject: [PATCH] Optimize the GPU grayscale transformation a little bit, but it's still slower than the CPU version --- openbr/plugins/cuda/cudargb2grayscale.cu | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/openbr/plugins/cuda/cudargb2grayscale.cu b/openbr/plugins/cuda/cudargb2grayscale.cu index ba10b80..266389c 100644 --- a/openbr/plugins/cuda/cudargb2grayscale.cu +++ b/openbr/plugins/cuda/cudargb2grayscale.cu @@ -19,22 +19,22 @@ namespace br{ namespace cuda { int rowInd = blockIdx.y*blockDim.y+threadIdx.y; int colInd = blockIdx.x*blockDim.x+threadIdx.x; int index = rowInd*cols + colInd; - if (rowInd < 1 || rowInd >= rows-1 || colInd < 1 || colInd >= cols-1) { - if (rowInd >= rows || colInd >= cols) { - return; - } else { - return; - } + if (rowInd < 0 || rowInd >= rows || colInd < 0 || colInd >= cols) { + return; } + int new_index = 3 * index; + float g = (float) srcPtr[new_index]; + float b = (float) srcPtr[new_index+1]; + float r = (float) srcPtr[new_index+2]; - dstPtr[index] = 0.299f * srcPtr[3*index] + 0.587f * srcPtr[3*index+1] + 0.114f * srcPtr[3*index+2]; + dstPtr[index] = (uint8_t) (0.299f * g + 0.587f * b + 0.114f * r); return; } void cudargb2grayscale_wrapper(void* srcPtr, void** dstPtr, int rows, int cols) { cudaError_t err; - dim3 threadsPerBlock(8, 8); + dim3 threadsPerBlock(9, 9); dim3 numBlocks(cols/threadsPerBlock.x + 1, rows/threadsPerBlock.y + 1); CUDA_SAFE_MALLOC(dstPtr, rows*cols*sizeof(uint8_t), &err); -- libgit2 0.21.4