Commit c837c1d9df06d7d1708bb5661bf0c5556841df6e
1 parent
fd23ffcd
cleaned up PCA
Showing
3 changed files
with
26 additions
and
43 deletions
openbr/plugins/cuda/copyfrom.cpp
| ... | ... | @@ -2,6 +2,8 @@ |
| 2 | 2 | |
| 3 | 3 | #include <opencv2/opencv.hpp> |
| 4 | 4 | |
| 5 | +//#include <gperftools/profiler.h> | |
| 6 | + | |
| 5 | 7 | #include <openbr/plugins/openbr_internal.h> |
| 6 | 8 | |
| 7 | 9 | using namespace std; |
| ... | ... | @@ -22,6 +24,8 @@ namespace br |
| 22 | 24 | private: |
| 23 | 25 | void project(const Template &src, Template &dst) const |
| 24 | 26 | { |
| 27 | +// ProfilerStart("PROFILEME.log"); | |
| 28 | + | |
| 25 | 29 | // pull the data back out of the Mat |
| 26 | 30 | void* const* dataPtr = src.m().ptr<void*>(); |
| 27 | 31 | int rows = *((int*)dataPtr[1]); |
| ... | ... | @@ -44,6 +48,8 @@ private: |
| 44 | 48 | break; |
| 45 | 49 | } |
| 46 | 50 | dst = dstMat; |
| 51 | + | |
| 52 | +// ProfilerStop(); | |
| 47 | 53 | } |
| 48 | 54 | }; |
| 49 | 55 | ... | ... |
openbr/plugins/cuda/cudapca.cpp
| ... | ... | @@ -29,11 +29,10 @@ using namespace cv; |
| 29 | 29 | #include <openbr/core/eigenutils.h> |
| 30 | 30 | #include <openbr/core/opencvutils.h> |
| 31 | 31 | |
| 32 | -namespace br { namespace cuda { | |
| 33 | - void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems); | |
| 34 | - void cudapca_trainwrapper(void* cudaDataPtr, float* dataPtr, int rows, int cols); | |
| 35 | - void cudapca_projectwrapper(void* src, void** dst); | |
| 36 | -}} | |
| 32 | +namespace br { namespace cuda { namespace pca { | |
| 33 | + void loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems); | |
| 34 | + void wrapper(void* src, void** dst); | |
| 35 | +}}} | |
| 37 | 36 | |
| 38 | 37 | namespace br |
| 39 | 38 | { |
| ... | ... | @@ -95,7 +94,6 @@ private: |
| 95 | 94 | int type = *((int*)srcDataPtr[3]); |
| 96 | 95 | |
| 97 | 96 | Mat mat = Mat(rows, cols, type); |
| 98 | - br::cuda::cudapca_trainwrapper(cudaMemPtr, mat.ptr<float>(), rows, cols); | |
| 99 | 97 | trainingQlist.append(Template(mat)); |
| 100 | 98 | } |
| 101 | 99 | |
| ... | ... | @@ -136,7 +134,7 @@ private: |
| 136 | 134 | dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = keep; |
| 137 | 135 | dstDataPtr[3] = srcDataPtr[3]; |
| 138 | 136 | |
| 139 | - br::cuda::cudapca_projectwrapper(srcDataPtr[0], &dstDataPtr[0]); | |
| 137 | + br::cuda::pca::wrapper(srcDataPtr[0], &dstDataPtr[0]); | |
| 140 | 138 | |
| 141 | 139 | dst = dstMat; |
| 142 | 140 | |
| ... | ... | @@ -161,7 +159,11 @@ private: |
| 161 | 159 | |
| 162 | 160 | void load(QDataStream &stream) |
| 163 | 161 | { |
| 164 | - stream >> keep >> drop >> whiten >> originalRows >> mean >> eVals >> eVecs; | |
| 162 | + Eigen::MatrixXf originalEVecs; | |
| 163 | + stream >> keep >> drop >> whiten >> originalRows >> mean >> eVals >> originalEVecs; | |
| 164 | + | |
| 165 | + // perform transpose before copying over | |
| 166 | + eVecs = originalEVecs; //originalEVecs.transpose(); | |
| 165 | 167 | |
| 166 | 168 | cout << "Mean Dimensions" << endl; |
| 167 | 169 | cout << "\tRows: " << mean.rows() << " Cols: " << mean.cols() << endl; |
| ... | ... | @@ -173,6 +175,7 @@ private: |
| 173 | 175 | |
| 174 | 176 | cout << "Mean first value: " << mean(0, 0) << endl; |
| 175 | 177 | |
| 178 | + | |
| 176 | 179 | // TODO(colin): use Eigen Map class to generate map files so we don't have to copy the data |
| 177 | 180 | // serialize the eigenvectors |
| 178 | 181 | float* evBuffer = new float[eVecs.rows() * eVecs.cols()]; |
| ... | ... | @@ -191,7 +194,7 @@ private: |
| 191 | 194 | } |
| 192 | 195 | |
| 193 | 196 | // call the wrapper function |
| 194 | - cuda::cudapca_loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows()*mean.cols()); | |
| 197 | + br::cuda::pca::loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows()*mean.cols()); | |
| 195 | 198 | |
| 196 | 199 | delete evBuffer; |
| 197 | 200 | delete meanBuffer; | ... | ... |
openbr/plugins/cuda/cudapca.cu
| ... | ... | @@ -9,28 +9,8 @@ using namespace std; |
| 9 | 9 | using namespace cv; |
| 10 | 10 | using namespace cv::gpu; |
| 11 | 11 | |
| 12 | -namespace br { namespace cuda { | |
| 13 | - __global__ void calculateCovariance_kernel(float* trainingSet, float* cov, int numRows, int numCols) { | |
| 14 | - int rowInd = blockIdx.y*blockDim.y + threadIdx.y; | |
| 15 | - int colInd = blockIdx.x*blockDim.x + threadIdx.x; | |
| 16 | - | |
| 17 | - // this calculates trainingSet' * trainingSet | |
| 18 | - if (rowInd >= numRows || colInd >= numCols) { | |
| 19 | - return; | |
| 20 | - } | |
| 21 | - | |
| 22 | - // get a reference the value we wish to write | |
| 23 | - float& out = cov[rowInd*numRows + colInd]; | |
| 24 | - | |
| 25 | - // calculate the value of this position | |
| 26 | - out = 0; | |
| 27 | - for (int i=0; i<numRows; i++) { | |
| 28 | - out += trainingSet[rowInd*numCols + colInd] * trainingSet[rowInd*numCols + numRows]; // XXX(colin): not sure if this is correct | |
| 29 | - } | |
| 30 | - out = out / (numRows-1); | |
| 31 | - } | |
| 32 | - | |
| 33 | - __global__ void cudapca_project_multiply_kernel(float* src, float* dst, float* evPtr, int evRows, int evCols) { | |
| 12 | +namespace br { namespace cuda { namespace pca { | |
| 13 | + __global__ void multiplyKernel(float* src, float* dst, float* evPtr, int evRows, int evCols) { | |
| 34 | 14 | int colInd = blockIdx.x*blockDim.x+threadIdx.x; |
| 35 | 15 | |
| 36 | 16 | // check dimensions |
| ... | ... | @@ -44,7 +24,7 @@ namespace br { namespace cuda { |
| 44 | 24 | } |
| 45 | 25 | } |
| 46 | 26 | |
| 47 | - __global__ void cudapca_project_subtractmean_kernel(float* out, float* mean, int numCols) { | |
| 27 | + __global__ void subtractMeanKernel(float* out, float* mean, int numCols) { | |
| 48 | 28 | int colInd = blockIdx.x*blockDim.x+threadIdx.x; |
| 49 | 29 | |
| 50 | 30 | // perform bound checking |
| ... | ... | @@ -61,7 +41,7 @@ namespace br { namespace cuda { |
| 61 | 41 | float* _cudaSrcPtr; |
| 62 | 42 | float* _cudaDstPtr; |
| 63 | 43 | |
| 64 | - void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems) { | |
| 44 | + void loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems) { | |
| 65 | 45 | _evRows = evRows; _evCols = evCols; |
| 66 | 46 | _meanElems = meanElems; |
| 67 | 47 | |
| ... | ... | @@ -79,13 +59,7 @@ namespace br { namespace cuda { |
| 79 | 59 | CUDA_SAFE_MALLOC(&_cudaDstPtr, _evCols*sizeof(float), &err); |
| 80 | 60 | } |
| 81 | 61 | |
| 82 | - void cudapca_trainwrapper(void* cudaDataPtr, float* dataPtr, int rows, int cols) { | |
| 83 | - cudaError_t err; | |
| 84 | - CUDA_SAFE_MEMCPY(dataPtr, cudaDataPtr, rows*cols*sizeof(float), cudaMemcpyDeviceToHost, &err); | |
| 85 | - CUDA_SAFE_FREE(cudaDataPtr, &err); | |
| 86 | - } | |
| 87 | - | |
| 88 | - void cudapca_projectwrapper(void* src, void** dst) { | |
| 62 | + void wrapper(void* src, void** dst) { | |
| 89 | 63 | // copy the image to the GPU |
| 90 | 64 | //cudaMemcpy(_cudaSrcPtr, src, _meanElems*sizeof(float), cudaMemcpyHostToDevice); |
| 91 | 65 | cudaError_t err; |
| ... | ... | @@ -94,13 +68,13 @@ namespace br { namespace cuda { |
| 94 | 68 | // subtract out the mean of the image (mean is 1xpixels in size) |
| 95 | 69 | int threadsPerBlock = 64; |
| 96 | 70 | int numBlocks = _meanElems / threadsPerBlock + 1; |
| 97 | - cudapca_project_subtractmean_kernel<<<numBlocks, threadsPerBlock>>>((float*)src, cudaMeanPtr, _meanElems); | |
| 71 | + subtractMeanKernel<<<numBlocks, threadsPerBlock>>>((float*)src, cudaMeanPtr, _meanElems); | |
| 98 | 72 | CUDA_KERNEL_ERR_CHK(&err); |
| 99 | 73 | |
| 100 | 74 | // perform the multiplication |
| 101 | 75 | threadsPerBlock = 64; |
| 102 | 76 | numBlocks = _evCols / threadsPerBlock + 1; |
| 103 | - cudapca_project_multiply_kernel<<<numBlocks, threadsPerBlock>>>((float*)src, (float*)(*dst), cudaEvPtr, _evRows, _evCols); | |
| 77 | + multiplyKernel<<<numBlocks, threadsPerBlock>>>((float*)src, (float*)(*dst), cudaEvPtr, _evRows, _evCols); | |
| 104 | 78 | CUDA_KERNEL_ERR_CHK(&err); |
| 105 | 79 | |
| 106 | 80 | CUDA_SAFE_FREE(src, &err); // TODO(colin): figure out why adding this free causes memory corruption... |
| ... | ... | @@ -108,4 +82,4 @@ namespace br { namespace cuda { |
| 108 | 82 | // copy the data back to the CPU |
| 109 | 83 | //cudaMemcpy(dst, _cudaDstPtr, _evCols*sizeof(float), cudaMemcpyDeviceToHost); |
| 110 | 84 | } |
| 111 | -}} | |
| 85 | +}}} | ... | ... |