diff --git a/openbr/plugins/cuda/cudapca.cpp b/openbr/plugins/cuda/cudapca.cpp index 1e5845c..c783f46 100644 --- a/openbr/plugins/cuda/cudapca.cpp +++ b/openbr/plugins/cuda/cudapca.cpp @@ -143,7 +143,7 @@ private: cout << "Meanbuffer first value: " << meanBuffer[0] << endl; // call the wrapper function - cuda::cudapca_loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows(), mean.cols(), keep); + cuda::cudapca_loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows()*mean.cols()); delete evBuffer; delete meanBuffer; diff --git a/openbr/plugins/cuda/cudapca.cu b/openbr/plugins/cuda/cudapca.cu index bd6f7fb..1d7d395 100644 --- a/openbr/plugins/cuda/cudapca.cu +++ b/openbr/plugins/cuda/cudapca.cu @@ -57,25 +57,23 @@ namespace br { namespace cuda { } float* cudaEvPtr; int _evRows; int _evCols; - float* cudaMeanPtr; int _meanRows; int _meanCols; - int _keep; + float* cudaMeanPtr; int _meanElems; void cudapca_initwrapper() { } - void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanRows, int meanCols, int keep) { + void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems) { _evRows = evRows; _evCols = evCols; - _meanRows = meanRows; _meanCols = meanCols; - _keep = keep; + _meanElems = meanElems; // copy the eigenvectors to the GPU cudaMalloc(&cudaEvPtr, evRows*evCols*sizeof(float)); cudaMemcpy(cudaEvPtr, evPtr, evRows*evCols*sizeof(float), cudaMemcpyHostToDevice); // copy the mean to the GPU - cudaMalloc(&cudaMeanPtr, meanRows*meanCols*sizeof(float)); - cudaMemcpy(cudaMeanPtr, meanPtr, meanRows*meanCols*sizeof(float), cudaMemcpyHostToDevice); + cudaMalloc(&cudaMeanPtr, meanElems*sizeof(float)); + cudaMemcpy(cudaMeanPtr, meanPtr, meanElems*sizeof(float), cudaMemcpyHostToDevice); } void cudapca_trainwrapper() { @@ -176,24 +174,24 @@ namespace br { namespace cuda { void cudapca_projectwrapper(float* src, float* dst) { // copy the image to the GPU float* cudaSrcPtr; - cudaMalloc(&cudaSrcPtr, _meanRows*_meanCols*sizeof(float)); - cudaMemcpy(cudaSrcPtr, src, _meanRows*_meanCols*sizeof(float), cudaMemcpyHostToDevice); + cudaMalloc(&cudaSrcPtr, _meanElems*sizeof(float)); + cudaMemcpy(cudaSrcPtr, src, _meanElems*sizeof(float), cudaMemcpyHostToDevice); float* cudaDstPtr; - cudaMalloc(&cudaDstPtr, _keep*sizeof(float)); + cudaMalloc(&cudaDstPtr, _evCols*sizeof(float)); // subtract out the mean of the image (mean is 1xpixels in size) int threadsPerBlock = 64; - int numBlocks = _meanRows*_meanCols / threadsPerBlock; - cudapca_project_subtractmean_kernel<<>>(cudaSrcPtr, cudaMeanPtr, _meanRows*_meanCols); + int numBlocks = _meanElems / threadsPerBlock; + cudapca_project_subtractmean_kernel<<>>(cudaSrcPtr, cudaMeanPtr, _meanElems); // perform the multiplication threadsPerBlock = 64; - numBlocks = _keep / threadsPerBlock; + numBlocks = _evCols / threadsPerBlock; cudapca_project_multiply_kernel<<>>(cudaSrcPtr, cudaDstPtr, cudaEvPtr, _evRows, _evCols); // copy the data back to the CPU - cudaMemcpy(dst, cudaDstPtr, _keep*sizeof(float), cudaMemcpyDeviceToHost); + cudaMemcpy(dst, cudaDstPtr, _evCols*sizeof(float), cudaMemcpyDeviceToHost); cudaFree(cudaSrcPtr); cudaFree(cudaDstPtr); diff --git a/openbr/plugins/cuda/cudapca.hpp b/openbr/plugins/cuda/cudapca.hpp index b655e3e..792fa4a 100644 --- a/openbr/plugins/cuda/cudapca.hpp +++ b/openbr/plugins/cuda/cudapca.hpp @@ -7,7 +7,7 @@ using namespace cv::gpu; namespace br { namespace cuda { void cudapca_initwrapper(); - void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanRows, int meanCols, int keep); + void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems); void cudapca_trainwrapper(); void cudapca_projectwrapper(float* src, float* dst);