From c837c1d9df06d7d1708bb5661bf0c5556841df6e Mon Sep 17 00:00:00 2001 From: DepthDeluxe Date: Wed, 2 Mar 2016 09:36:56 -0500 Subject: [PATCH] cleaned up PCA --- openbr/plugins/cuda/copyfrom.cpp | 6 ++++++ openbr/plugins/cuda/cudapca.cpp | 21 ++++++++++++--------- openbr/plugins/cuda/cudapca.cu | 42 ++++++++---------------------------------- 3 files changed, 26 insertions(+), 43 deletions(-) diff --git a/openbr/plugins/cuda/copyfrom.cpp b/openbr/plugins/cuda/copyfrom.cpp index 44b6343..4bfc0bf 100644 --- a/openbr/plugins/cuda/copyfrom.cpp +++ b/openbr/plugins/cuda/copyfrom.cpp @@ -2,6 +2,8 @@ #include +//#include + #include using namespace std; @@ -22,6 +24,8 @@ namespace br private: void project(const Template &src, Template &dst) const { +// ProfilerStart("PROFILEME.log"); + // pull the data back out of the Mat void* const* dataPtr = src.m().ptr(); int rows = *((int*)dataPtr[1]); @@ -44,6 +48,8 @@ private: break; } dst = dstMat; + +// ProfilerStop(); } }; diff --git a/openbr/plugins/cuda/cudapca.cpp b/openbr/plugins/cuda/cudapca.cpp index 7f56070..cccd1f1 100644 --- a/openbr/plugins/cuda/cudapca.cpp +++ b/openbr/plugins/cuda/cudapca.cpp @@ -29,11 +29,10 @@ using namespace cv; #include #include -namespace br { namespace cuda { - void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems); - void cudapca_trainwrapper(void* cudaDataPtr, float* dataPtr, int rows, int cols); - void cudapca_projectwrapper(void* src, void** dst); -}} +namespace br { namespace cuda { namespace pca { + void loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems); + void wrapper(void* src, void** dst); +}}} namespace br { @@ -95,7 +94,6 @@ private: int type = *((int*)srcDataPtr[3]); Mat mat = Mat(rows, cols, type); - br::cuda::cudapca_trainwrapper(cudaMemPtr, mat.ptr(), rows, cols); trainingQlist.append(Template(mat)); } @@ -136,7 +134,7 @@ private: dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = keep; dstDataPtr[3] = srcDataPtr[3]; - br::cuda::cudapca_projectwrapper(srcDataPtr[0], &dstDataPtr[0]); + br::cuda::pca::wrapper(srcDataPtr[0], &dstDataPtr[0]); dst = dstMat; @@ -161,7 +159,11 @@ private: void load(QDataStream &stream) { - stream >> keep >> drop >> whiten >> originalRows >> mean >> eVals >> eVecs; + Eigen::MatrixXf originalEVecs; + stream >> keep >> drop >> whiten >> originalRows >> mean >> eVals >> originalEVecs; + + // perform transpose before copying over + eVecs = originalEVecs; //originalEVecs.transpose(); cout << "Mean Dimensions" << endl; cout << "\tRows: " << mean.rows() << " Cols: " << mean.cols() << endl; @@ -173,6 +175,7 @@ private: cout << "Mean first value: " << mean(0, 0) << endl; + // TODO(colin): use Eigen Map class to generate map files so we don't have to copy the data // serialize the eigenvectors float* evBuffer = new float[eVecs.rows() * eVecs.cols()]; @@ -191,7 +194,7 @@ private: } // call the wrapper function - cuda::cudapca_loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows()*mean.cols()); + br::cuda::pca::loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows()*mean.cols()); delete evBuffer; delete meanBuffer; diff --git a/openbr/plugins/cuda/cudapca.cu b/openbr/plugins/cuda/cudapca.cu index 10a59fb..1f1fd12 100644 --- a/openbr/plugins/cuda/cudapca.cu +++ b/openbr/plugins/cuda/cudapca.cu @@ -9,28 +9,8 @@ using namespace std; using namespace cv; using namespace cv::gpu; -namespace br { namespace cuda { - __global__ void calculateCovariance_kernel(float* trainingSet, float* cov, int numRows, int numCols) { - int rowInd = blockIdx.y*blockDim.y + threadIdx.y; - int colInd = blockIdx.x*blockDim.x + threadIdx.x; - - // this calculates trainingSet' * trainingSet - if (rowInd >= numRows || colInd >= numCols) { - return; - } - - // get a reference the value we wish to write - float& out = cov[rowInd*numRows + colInd]; - - // calculate the value of this position - out = 0; - for (int i=0; i>>((float*)src, cudaMeanPtr, _meanElems); + subtractMeanKernel<<>>((float*)src, cudaMeanPtr, _meanElems); CUDA_KERNEL_ERR_CHK(&err); // perform the multiplication threadsPerBlock = 64; numBlocks = _evCols / threadsPerBlock + 1; - cudapca_project_multiply_kernel<<>>((float*)src, (float*)(*dst), cudaEvPtr, _evRows, _evCols); + multiplyKernel<<>>((float*)src, (float*)(*dst), cudaEvPtr, _evRows, _evCols); CUDA_KERNEL_ERR_CHK(&err); CUDA_SAFE_FREE(src, &err); // TODO(colin): figure out why adding this free causes memory corruption... @@ -108,4 +82,4 @@ namespace br { namespace cuda { // copy the data back to the CPU //cudaMemcpy(dst, _cudaDstPtr, _evCols*sizeof(float), cudaMemcpyDeviceToHost); } -}} +}}} -- libgit2 0.21.4