Commit c837c1d9df06d7d1708bb5661bf0c5556841df6e

Authored by DepthDeluxe
1 parent fd23ffcd

cleaned up PCA

openbr/plugins/cuda/copyfrom.cpp
... ... @@ -2,6 +2,8 @@
2 2  
3 3 #include <opencv2/opencv.hpp>
4 4  
  5 +//#include <gperftools/profiler.h>
  6 +
5 7 #include <openbr/plugins/openbr_internal.h>
6 8  
7 9 using namespace std;
... ... @@ -22,6 +24,8 @@ namespace br
22 24 private:
23 25 void project(const Template &src, Template &dst) const
24 26 {
  27 +// ProfilerStart("PROFILEME.log");
  28 +
25 29 // pull the data back out of the Mat
26 30 void* const* dataPtr = src.m().ptr<void*>();
27 31 int rows = *((int*)dataPtr[1]);
... ... @@ -44,6 +48,8 @@ private:
44 48 break;
45 49 }
46 50 dst = dstMat;
  51 +
  52 +// ProfilerStop();
47 53 }
48 54 };
49 55  
... ...
openbr/plugins/cuda/cudapca.cpp
... ... @@ -29,11 +29,10 @@ using namespace cv;
29 29 #include <openbr/core/eigenutils.h>
30 30 #include <openbr/core/opencvutils.h>
31 31  
32   -namespace br { namespace cuda {
33   - void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems);
34   - void cudapca_trainwrapper(void* cudaDataPtr, float* dataPtr, int rows, int cols);
35   - void cudapca_projectwrapper(void* src, void** dst);
36   -}}
  32 +namespace br { namespace cuda { namespace pca {
  33 + void loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems);
  34 + void wrapper(void* src, void** dst);
  35 +}}}
37 36  
38 37 namespace br
39 38 {
... ... @@ -95,7 +94,6 @@ private:
95 94 int type = *((int*)srcDataPtr[3]);
96 95  
97 96 Mat mat = Mat(rows, cols, type);
98   - br::cuda::cudapca_trainwrapper(cudaMemPtr, mat.ptr<float>(), rows, cols);
99 97 trainingQlist.append(Template(mat));
100 98 }
101 99  
... ... @@ -136,7 +134,7 @@ private:
136 134 dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = keep;
137 135 dstDataPtr[3] = srcDataPtr[3];
138 136  
139   - br::cuda::cudapca_projectwrapper(srcDataPtr[0], &dstDataPtr[0]);
  137 + br::cuda::pca::wrapper(srcDataPtr[0], &dstDataPtr[0]);
140 138  
141 139 dst = dstMat;
142 140  
... ... @@ -161,7 +159,11 @@ private:
161 159  
162 160 void load(QDataStream &stream)
163 161 {
164   - stream >> keep >> drop >> whiten >> originalRows >> mean >> eVals >> eVecs;
  162 + Eigen::MatrixXf originalEVecs;
  163 + stream >> keep >> drop >> whiten >> originalRows >> mean >> eVals >> originalEVecs;
  164 +
  165 + // perform transpose before copying over
  166 + eVecs = originalEVecs; //originalEVecs.transpose();
165 167  
166 168 cout << "Mean Dimensions" << endl;
167 169 cout << "\tRows: " << mean.rows() << " Cols: " << mean.cols() << endl;
... ... @@ -173,6 +175,7 @@ private:
173 175  
174 176 cout << "Mean first value: " << mean(0, 0) << endl;
175 177  
  178 +
176 179 // TODO(colin): use Eigen Map class to generate map files so we don't have to copy the data
177 180 // serialize the eigenvectors
178 181 float* evBuffer = new float[eVecs.rows() * eVecs.cols()];
... ... @@ -191,7 +194,7 @@ private:
191 194 }
192 195  
193 196 // call the wrapper function
194   - cuda::cudapca_loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows()*mean.cols());
  197 + br::cuda::pca::loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows()*mean.cols());
195 198  
196 199 delete evBuffer;
197 200 delete meanBuffer;
... ...
openbr/plugins/cuda/cudapca.cu
... ... @@ -9,28 +9,8 @@ using namespace std;
9 9 using namespace cv;
10 10 using namespace cv::gpu;
11 11  
12   -namespace br { namespace cuda {
13   - __global__ void calculateCovariance_kernel(float* trainingSet, float* cov, int numRows, int numCols) {
14   - int rowInd = blockIdx.y*blockDim.y + threadIdx.y;
15   - int colInd = blockIdx.x*blockDim.x + threadIdx.x;
16   -
17   - // this calculates trainingSet' * trainingSet
18   - if (rowInd >= numRows || colInd >= numCols) {
19   - return;
20   - }
21   -
22   - // get a reference the value we wish to write
23   - float& out = cov[rowInd*numRows + colInd];
24   -
25   - // calculate the value of this position
26   - out = 0;
27   - for (int i=0; i<numRows; i++) {
28   - out += trainingSet[rowInd*numCols + colInd] * trainingSet[rowInd*numCols + numRows]; // XXX(colin): not sure if this is correct
29   - }
30   - out = out / (numRows-1);
31   - }
32   -
33   - __global__ void cudapca_project_multiply_kernel(float* src, float* dst, float* evPtr, int evRows, int evCols) {
  12 +namespace br { namespace cuda { namespace pca {
  13 + __global__ void multiplyKernel(float* src, float* dst, float* evPtr, int evRows, int evCols) {
34 14 int colInd = blockIdx.x*blockDim.x+threadIdx.x;
35 15  
36 16 // check dimensions
... ... @@ -44,7 +24,7 @@ namespace br { namespace cuda {
44 24 }
45 25 }
46 26  
47   - __global__ void cudapca_project_subtractmean_kernel(float* out, float* mean, int numCols) {
  27 + __global__ void subtractMeanKernel(float* out, float* mean, int numCols) {
48 28 int colInd = blockIdx.x*blockDim.x+threadIdx.x;
49 29  
50 30 // perform bound checking
... ... @@ -61,7 +41,7 @@ namespace br { namespace cuda {
61 41 float* _cudaSrcPtr;
62 42 float* _cudaDstPtr;
63 43  
64   - void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems) {
  44 + void loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems) {
65 45 _evRows = evRows; _evCols = evCols;
66 46 _meanElems = meanElems;
67 47  
... ... @@ -79,13 +59,7 @@ namespace br { namespace cuda {
79 59 CUDA_SAFE_MALLOC(&_cudaDstPtr, _evCols*sizeof(float), &err);
80 60 }
81 61  
82   - void cudapca_trainwrapper(void* cudaDataPtr, float* dataPtr, int rows, int cols) {
83   - cudaError_t err;
84   - CUDA_SAFE_MEMCPY(dataPtr, cudaDataPtr, rows*cols*sizeof(float), cudaMemcpyDeviceToHost, &err);
85   - CUDA_SAFE_FREE(cudaDataPtr, &err);
86   - }
87   -
88   - void cudapca_projectwrapper(void* src, void** dst) {
  62 + void wrapper(void* src, void** dst) {
89 63 // copy the image to the GPU
90 64 //cudaMemcpy(_cudaSrcPtr, src, _meanElems*sizeof(float), cudaMemcpyHostToDevice);
91 65 cudaError_t err;
... ... @@ -94,13 +68,13 @@ namespace br { namespace cuda {
94 68 // subtract out the mean of the image (mean is 1xpixels in size)
95 69 int threadsPerBlock = 64;
96 70 int numBlocks = _meanElems / threadsPerBlock + 1;
97   - cudapca_project_subtractmean_kernel<<<numBlocks, threadsPerBlock>>>((float*)src, cudaMeanPtr, _meanElems);
  71 + subtractMeanKernel<<<numBlocks, threadsPerBlock>>>((float*)src, cudaMeanPtr, _meanElems);
98 72 CUDA_KERNEL_ERR_CHK(&err);
99 73  
100 74 // perform the multiplication
101 75 threadsPerBlock = 64;
102 76 numBlocks = _evCols / threadsPerBlock + 1;
103   - cudapca_project_multiply_kernel<<<numBlocks, threadsPerBlock>>>((float*)src, (float*)(*dst), cudaEvPtr, _evRows, _evCols);
  77 + multiplyKernel<<<numBlocks, threadsPerBlock>>>((float*)src, (float*)(*dst), cudaEvPtr, _evRows, _evCols);
104 78 CUDA_KERNEL_ERR_CHK(&err);
105 79  
106 80 CUDA_SAFE_FREE(src, &err); // TODO(colin): figure out why adding this free causes memory corruption...
... ... @@ -108,4 +82,4 @@ namespace br { namespace cuda {
108 82 // copy the data back to the CPU
109 83 //cudaMemcpy(dst, _cudaDstPtr, _evCols*sizeof(float), cudaMemcpyDeviceToHost);
110 84 }
111   -}}
  85 +}}}
... ...