Commit c837c1d9df06d7d1708bb5661bf0c5556841df6e
1 parent
fd23ffcd
cleaned up PCA
Showing
3 changed files
with
26 additions
and
43 deletions
openbr/plugins/cuda/copyfrom.cpp
| @@ -2,6 +2,8 @@ | @@ -2,6 +2,8 @@ | ||
| 2 | 2 | ||
| 3 | #include <opencv2/opencv.hpp> | 3 | #include <opencv2/opencv.hpp> |
| 4 | 4 | ||
| 5 | +//#include <gperftools/profiler.h> | ||
| 6 | + | ||
| 5 | #include <openbr/plugins/openbr_internal.h> | 7 | #include <openbr/plugins/openbr_internal.h> |
| 6 | 8 | ||
| 7 | using namespace std; | 9 | using namespace std; |
| @@ -22,6 +24,8 @@ namespace br | @@ -22,6 +24,8 @@ namespace br | ||
| 22 | private: | 24 | private: |
| 23 | void project(const Template &src, Template &dst) const | 25 | void project(const Template &src, Template &dst) const |
| 24 | { | 26 | { |
| 27 | +// ProfilerStart("PROFILEME.log"); | ||
| 28 | + | ||
| 25 | // pull the data back out of the Mat | 29 | // pull the data back out of the Mat |
| 26 | void* const* dataPtr = src.m().ptr<void*>(); | 30 | void* const* dataPtr = src.m().ptr<void*>(); |
| 27 | int rows = *((int*)dataPtr[1]); | 31 | int rows = *((int*)dataPtr[1]); |
| @@ -44,6 +48,8 @@ private: | @@ -44,6 +48,8 @@ private: | ||
| 44 | break; | 48 | break; |
| 45 | } | 49 | } |
| 46 | dst = dstMat; | 50 | dst = dstMat; |
| 51 | + | ||
| 52 | +// ProfilerStop(); | ||
| 47 | } | 53 | } |
| 48 | }; | 54 | }; |
| 49 | 55 |
openbr/plugins/cuda/cudapca.cpp
| @@ -29,11 +29,10 @@ using namespace cv; | @@ -29,11 +29,10 @@ using namespace cv; | ||
| 29 | #include <openbr/core/eigenutils.h> | 29 | #include <openbr/core/eigenutils.h> |
| 30 | #include <openbr/core/opencvutils.h> | 30 | #include <openbr/core/opencvutils.h> |
| 31 | 31 | ||
| 32 | -namespace br { namespace cuda { | ||
| 33 | - void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems); | ||
| 34 | - void cudapca_trainwrapper(void* cudaDataPtr, float* dataPtr, int rows, int cols); | ||
| 35 | - void cudapca_projectwrapper(void* src, void** dst); | ||
| 36 | -}} | 32 | +namespace br { namespace cuda { namespace pca { |
| 33 | + void loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems); | ||
| 34 | + void wrapper(void* src, void** dst); | ||
| 35 | +}}} | ||
| 37 | 36 | ||
| 38 | namespace br | 37 | namespace br |
| 39 | { | 38 | { |
| @@ -95,7 +94,6 @@ private: | @@ -95,7 +94,6 @@ private: | ||
| 95 | int type = *((int*)srcDataPtr[3]); | 94 | int type = *((int*)srcDataPtr[3]); |
| 96 | 95 | ||
| 97 | Mat mat = Mat(rows, cols, type); | 96 | Mat mat = Mat(rows, cols, type); |
| 98 | - br::cuda::cudapca_trainwrapper(cudaMemPtr, mat.ptr<float>(), rows, cols); | ||
| 99 | trainingQlist.append(Template(mat)); | 97 | trainingQlist.append(Template(mat)); |
| 100 | } | 98 | } |
| 101 | 99 | ||
| @@ -136,7 +134,7 @@ private: | @@ -136,7 +134,7 @@ private: | ||
| 136 | dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = keep; | 134 | dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = keep; |
| 137 | dstDataPtr[3] = srcDataPtr[3]; | 135 | dstDataPtr[3] = srcDataPtr[3]; |
| 138 | 136 | ||
| 139 | - br::cuda::cudapca_projectwrapper(srcDataPtr[0], &dstDataPtr[0]); | 137 | + br::cuda::pca::wrapper(srcDataPtr[0], &dstDataPtr[0]); |
| 140 | 138 | ||
| 141 | dst = dstMat; | 139 | dst = dstMat; |
| 142 | 140 | ||
| @@ -161,7 +159,11 @@ private: | @@ -161,7 +159,11 @@ private: | ||
| 161 | 159 | ||
| 162 | void load(QDataStream &stream) | 160 | void load(QDataStream &stream) |
| 163 | { | 161 | { |
| 164 | - stream >> keep >> drop >> whiten >> originalRows >> mean >> eVals >> eVecs; | 162 | + Eigen::MatrixXf originalEVecs; |
| 163 | + stream >> keep >> drop >> whiten >> originalRows >> mean >> eVals >> originalEVecs; | ||
| 164 | + | ||
| 165 | + // perform transpose before copying over | ||
| 166 | + eVecs = originalEVecs; //originalEVecs.transpose(); | ||
| 165 | 167 | ||
| 166 | cout << "Mean Dimensions" << endl; | 168 | cout << "Mean Dimensions" << endl; |
| 167 | cout << "\tRows: " << mean.rows() << " Cols: " << mean.cols() << endl; | 169 | cout << "\tRows: " << mean.rows() << " Cols: " << mean.cols() << endl; |
| @@ -173,6 +175,7 @@ private: | @@ -173,6 +175,7 @@ private: | ||
| 173 | 175 | ||
| 174 | cout << "Mean first value: " << mean(0, 0) << endl; | 176 | cout << "Mean first value: " << mean(0, 0) << endl; |
| 175 | 177 | ||
| 178 | + | ||
| 176 | // TODO(colin): use Eigen Map class to generate map files so we don't have to copy the data | 179 | // TODO(colin): use Eigen Map class to generate map files so we don't have to copy the data |
| 177 | // serialize the eigenvectors | 180 | // serialize the eigenvectors |
| 178 | float* evBuffer = new float[eVecs.rows() * eVecs.cols()]; | 181 | float* evBuffer = new float[eVecs.rows() * eVecs.cols()]; |
| @@ -191,7 +194,7 @@ private: | @@ -191,7 +194,7 @@ private: | ||
| 191 | } | 194 | } |
| 192 | 195 | ||
| 193 | // call the wrapper function | 196 | // call the wrapper function |
| 194 | - cuda::cudapca_loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows()*mean.cols()); | 197 | + br::cuda::pca::loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows()*mean.cols()); |
| 195 | 198 | ||
| 196 | delete evBuffer; | 199 | delete evBuffer; |
| 197 | delete meanBuffer; | 200 | delete meanBuffer; |
openbr/plugins/cuda/cudapca.cu
| @@ -9,28 +9,8 @@ using namespace std; | @@ -9,28 +9,8 @@ using namespace std; | ||
| 9 | using namespace cv; | 9 | using namespace cv; |
| 10 | using namespace cv::gpu; | 10 | using namespace cv::gpu; |
| 11 | 11 | ||
| 12 | -namespace br { namespace cuda { | ||
| 13 | - __global__ void calculateCovariance_kernel(float* trainingSet, float* cov, int numRows, int numCols) { | ||
| 14 | - int rowInd = blockIdx.y*blockDim.y + threadIdx.y; | ||
| 15 | - int colInd = blockIdx.x*blockDim.x + threadIdx.x; | ||
| 16 | - | ||
| 17 | - // this calculates trainingSet' * trainingSet | ||
| 18 | - if (rowInd >= numRows || colInd >= numCols) { | ||
| 19 | - return; | ||
| 20 | - } | ||
| 21 | - | ||
| 22 | - // get a reference the value we wish to write | ||
| 23 | - float& out = cov[rowInd*numRows + colInd]; | ||
| 24 | - | ||
| 25 | - // calculate the value of this position | ||
| 26 | - out = 0; | ||
| 27 | - for (int i=0; i<numRows; i++) { | ||
| 28 | - out += trainingSet[rowInd*numCols + colInd] * trainingSet[rowInd*numCols + numRows]; // XXX(colin): not sure if this is correct | ||
| 29 | - } | ||
| 30 | - out = out / (numRows-1); | ||
| 31 | - } | ||
| 32 | - | ||
| 33 | - __global__ void cudapca_project_multiply_kernel(float* src, float* dst, float* evPtr, int evRows, int evCols) { | 12 | +namespace br { namespace cuda { namespace pca { |
| 13 | + __global__ void multiplyKernel(float* src, float* dst, float* evPtr, int evRows, int evCols) { | ||
| 34 | int colInd = blockIdx.x*blockDim.x+threadIdx.x; | 14 | int colInd = blockIdx.x*blockDim.x+threadIdx.x; |
| 35 | 15 | ||
| 36 | // check dimensions | 16 | // check dimensions |
| @@ -44,7 +24,7 @@ namespace br { namespace cuda { | @@ -44,7 +24,7 @@ namespace br { namespace cuda { | ||
| 44 | } | 24 | } |
| 45 | } | 25 | } |
| 46 | 26 | ||
| 47 | - __global__ void cudapca_project_subtractmean_kernel(float* out, float* mean, int numCols) { | 27 | + __global__ void subtractMeanKernel(float* out, float* mean, int numCols) { |
| 48 | int colInd = blockIdx.x*blockDim.x+threadIdx.x; | 28 | int colInd = blockIdx.x*blockDim.x+threadIdx.x; |
| 49 | 29 | ||
| 50 | // perform bound checking | 30 | // perform bound checking |
| @@ -61,7 +41,7 @@ namespace br { namespace cuda { | @@ -61,7 +41,7 @@ namespace br { namespace cuda { | ||
| 61 | float* _cudaSrcPtr; | 41 | float* _cudaSrcPtr; |
| 62 | float* _cudaDstPtr; | 42 | float* _cudaDstPtr; |
| 63 | 43 | ||
| 64 | - void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems) { | 44 | + void loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems) { |
| 65 | _evRows = evRows; _evCols = evCols; | 45 | _evRows = evRows; _evCols = evCols; |
| 66 | _meanElems = meanElems; | 46 | _meanElems = meanElems; |
| 67 | 47 | ||
| @@ -79,13 +59,7 @@ namespace br { namespace cuda { | @@ -79,13 +59,7 @@ namespace br { namespace cuda { | ||
| 79 | CUDA_SAFE_MALLOC(&_cudaDstPtr, _evCols*sizeof(float), &err); | 59 | CUDA_SAFE_MALLOC(&_cudaDstPtr, _evCols*sizeof(float), &err); |
| 80 | } | 60 | } |
| 81 | 61 | ||
| 82 | - void cudapca_trainwrapper(void* cudaDataPtr, float* dataPtr, int rows, int cols) { | ||
| 83 | - cudaError_t err; | ||
| 84 | - CUDA_SAFE_MEMCPY(dataPtr, cudaDataPtr, rows*cols*sizeof(float), cudaMemcpyDeviceToHost, &err); | ||
| 85 | - CUDA_SAFE_FREE(cudaDataPtr, &err); | ||
| 86 | - } | ||
| 87 | - | ||
| 88 | - void cudapca_projectwrapper(void* src, void** dst) { | 62 | + void wrapper(void* src, void** dst) { |
| 89 | // copy the image to the GPU | 63 | // copy the image to the GPU |
| 90 | //cudaMemcpy(_cudaSrcPtr, src, _meanElems*sizeof(float), cudaMemcpyHostToDevice); | 64 | //cudaMemcpy(_cudaSrcPtr, src, _meanElems*sizeof(float), cudaMemcpyHostToDevice); |
| 91 | cudaError_t err; | 65 | cudaError_t err; |
| @@ -94,13 +68,13 @@ namespace br { namespace cuda { | @@ -94,13 +68,13 @@ namespace br { namespace cuda { | ||
| 94 | // subtract out the mean of the image (mean is 1xpixels in size) | 68 | // subtract out the mean of the image (mean is 1xpixels in size) |
| 95 | int threadsPerBlock = 64; | 69 | int threadsPerBlock = 64; |
| 96 | int numBlocks = _meanElems / threadsPerBlock + 1; | 70 | int numBlocks = _meanElems / threadsPerBlock + 1; |
| 97 | - cudapca_project_subtractmean_kernel<<<numBlocks, threadsPerBlock>>>((float*)src, cudaMeanPtr, _meanElems); | 71 | + subtractMeanKernel<<<numBlocks, threadsPerBlock>>>((float*)src, cudaMeanPtr, _meanElems); |
| 98 | CUDA_KERNEL_ERR_CHK(&err); | 72 | CUDA_KERNEL_ERR_CHK(&err); |
| 99 | 73 | ||
| 100 | // perform the multiplication | 74 | // perform the multiplication |
| 101 | threadsPerBlock = 64; | 75 | threadsPerBlock = 64; |
| 102 | numBlocks = _evCols / threadsPerBlock + 1; | 76 | numBlocks = _evCols / threadsPerBlock + 1; |
| 103 | - cudapca_project_multiply_kernel<<<numBlocks, threadsPerBlock>>>((float*)src, (float*)(*dst), cudaEvPtr, _evRows, _evCols); | 77 | + multiplyKernel<<<numBlocks, threadsPerBlock>>>((float*)src, (float*)(*dst), cudaEvPtr, _evRows, _evCols); |
| 104 | CUDA_KERNEL_ERR_CHK(&err); | 78 | CUDA_KERNEL_ERR_CHK(&err); |
| 105 | 79 | ||
| 106 | CUDA_SAFE_FREE(src, &err); // TODO(colin): figure out why adding this free causes memory corruption... | 80 | CUDA_SAFE_FREE(src, &err); // TODO(colin): figure out why adding this free causes memory corruption... |
| @@ -108,4 +82,4 @@ namespace br { namespace cuda { | @@ -108,4 +82,4 @@ namespace br { namespace cuda { | ||
| 108 | // copy the data back to the CPU | 82 | // copy the data back to the CPU |
| 109 | //cudaMemcpy(dst, _cudaDstPtr, _evCols*sizeof(float), cudaMemcpyDeviceToHost); | 83 | //cudaMemcpy(dst, _cudaDstPtr, _evCols*sizeof(float), cudaMemcpyDeviceToHost); |
| 110 | } | 84 | } |
| 111 | -}} | 85 | +}}} |