Commit c837c1d9df06d7d1708bb5661bf0c5556841df6e

Authored by DepthDeluxe
1 parent fd23ffcd

cleaned up PCA

openbr/plugins/cuda/copyfrom.cpp
@@ -2,6 +2,8 @@ @@ -2,6 +2,8 @@
2 2
3 #include <opencv2/opencv.hpp> 3 #include <opencv2/opencv.hpp>
4 4
  5 +//#include <gperftools/profiler.h>
  6 +
5 #include <openbr/plugins/openbr_internal.h> 7 #include <openbr/plugins/openbr_internal.h>
6 8
7 using namespace std; 9 using namespace std;
@@ -22,6 +24,8 @@ namespace br @@ -22,6 +24,8 @@ namespace br
22 private: 24 private:
23 void project(const Template &src, Template &dst) const 25 void project(const Template &src, Template &dst) const
24 { 26 {
  27 +// ProfilerStart("PROFILEME.log");
  28 +
25 // pull the data back out of the Mat 29 // pull the data back out of the Mat
26 void* const* dataPtr = src.m().ptr<void*>(); 30 void* const* dataPtr = src.m().ptr<void*>();
27 int rows = *((int*)dataPtr[1]); 31 int rows = *((int*)dataPtr[1]);
@@ -44,6 +48,8 @@ private: @@ -44,6 +48,8 @@ private:
44 break; 48 break;
45 } 49 }
46 dst = dstMat; 50 dst = dstMat;
  51 +
  52 +// ProfilerStop();
47 } 53 }
48 }; 54 };
49 55
openbr/plugins/cuda/cudapca.cpp
@@ -29,11 +29,10 @@ using namespace cv; @@ -29,11 +29,10 @@ using namespace cv;
29 #include <openbr/core/eigenutils.h> 29 #include <openbr/core/eigenutils.h>
30 #include <openbr/core/opencvutils.h> 30 #include <openbr/core/opencvutils.h>
31 31
32 -namespace br { namespace cuda {  
33 - void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems);  
34 - void cudapca_trainwrapper(void* cudaDataPtr, float* dataPtr, int rows, int cols);  
35 - void cudapca_projectwrapper(void* src, void** dst);  
36 -}} 32 +namespace br { namespace cuda { namespace pca {
  33 + void loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems);
  34 + void wrapper(void* src, void** dst);
  35 +}}}
37 36
38 namespace br 37 namespace br
39 { 38 {
@@ -95,7 +94,6 @@ private: @@ -95,7 +94,6 @@ private:
95 int type = *((int*)srcDataPtr[3]); 94 int type = *((int*)srcDataPtr[3]);
96 95
97 Mat mat = Mat(rows, cols, type); 96 Mat mat = Mat(rows, cols, type);
98 - br::cuda::cudapca_trainwrapper(cudaMemPtr, mat.ptr<float>(), rows, cols);  
99 trainingQlist.append(Template(mat)); 97 trainingQlist.append(Template(mat));
100 } 98 }
101 99
@@ -136,7 +134,7 @@ private: @@ -136,7 +134,7 @@ private:
136 dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = keep; 134 dstDataPtr[2] = srcDataPtr[2]; *((int*)dstDataPtr[2]) = keep;
137 dstDataPtr[3] = srcDataPtr[3]; 135 dstDataPtr[3] = srcDataPtr[3];
138 136
139 - br::cuda::cudapca_projectwrapper(srcDataPtr[0], &dstDataPtr[0]); 137 + br::cuda::pca::wrapper(srcDataPtr[0], &dstDataPtr[0]);
140 138
141 dst = dstMat; 139 dst = dstMat;
142 140
@@ -161,7 +159,11 @@ private: @@ -161,7 +159,11 @@ private:
161 159
162 void load(QDataStream &stream) 160 void load(QDataStream &stream)
163 { 161 {
164 - stream >> keep >> drop >> whiten >> originalRows >> mean >> eVals >> eVecs; 162 + Eigen::MatrixXf originalEVecs;
  163 + stream >> keep >> drop >> whiten >> originalRows >> mean >> eVals >> originalEVecs;
  164 +
  165 + // perform transpose before copying over
  166 + eVecs = originalEVecs; //originalEVecs.transpose();
165 167
166 cout << "Mean Dimensions" << endl; 168 cout << "Mean Dimensions" << endl;
167 cout << "\tRows: " << mean.rows() << " Cols: " << mean.cols() << endl; 169 cout << "\tRows: " << mean.rows() << " Cols: " << mean.cols() << endl;
@@ -173,6 +175,7 @@ private: @@ -173,6 +175,7 @@ private:
173 175
174 cout << "Mean first value: " << mean(0, 0) << endl; 176 cout << "Mean first value: " << mean(0, 0) << endl;
175 177
  178 +
176 // TODO(colin): use Eigen Map class to generate map files so we don't have to copy the data 179 // TODO(colin): use Eigen Map class to generate map files so we don't have to copy the data
177 // serialize the eigenvectors 180 // serialize the eigenvectors
178 float* evBuffer = new float[eVecs.rows() * eVecs.cols()]; 181 float* evBuffer = new float[eVecs.rows() * eVecs.cols()];
@@ -191,7 +194,7 @@ private: @@ -191,7 +194,7 @@ private:
191 } 194 }
192 195
193 // call the wrapper function 196 // call the wrapper function
194 - cuda::cudapca_loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows()*mean.cols()); 197 + br::cuda::pca::loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows()*mean.cols());
195 198
196 delete evBuffer; 199 delete evBuffer;
197 delete meanBuffer; 200 delete meanBuffer;
openbr/plugins/cuda/cudapca.cu
@@ -9,28 +9,8 @@ using namespace std; @@ -9,28 +9,8 @@ using namespace std;
9 using namespace cv; 9 using namespace cv;
10 using namespace cv::gpu; 10 using namespace cv::gpu;
11 11
12 -namespace br { namespace cuda {  
13 - __global__ void calculateCovariance_kernel(float* trainingSet, float* cov, int numRows, int numCols) {  
14 - int rowInd = blockIdx.y*blockDim.y + threadIdx.y;  
15 - int colInd = blockIdx.x*blockDim.x + threadIdx.x;  
16 -  
17 - // this calculates trainingSet' * trainingSet  
18 - if (rowInd >= numRows || colInd >= numCols) {  
19 - return;  
20 - }  
21 -  
22 - // get a reference the value we wish to write  
23 - float& out = cov[rowInd*numRows + colInd];  
24 -  
25 - // calculate the value of this position  
26 - out = 0;  
27 - for (int i=0; i<numRows; i++) {  
28 - out += trainingSet[rowInd*numCols + colInd] * trainingSet[rowInd*numCols + numRows]; // XXX(colin): not sure if this is correct  
29 - }  
30 - out = out / (numRows-1);  
31 - }  
32 -  
33 - __global__ void cudapca_project_multiply_kernel(float* src, float* dst, float* evPtr, int evRows, int evCols) { 12 +namespace br { namespace cuda { namespace pca {
  13 + __global__ void multiplyKernel(float* src, float* dst, float* evPtr, int evRows, int evCols) {
34 int colInd = blockIdx.x*blockDim.x+threadIdx.x; 14 int colInd = blockIdx.x*blockDim.x+threadIdx.x;
35 15
36 // check dimensions 16 // check dimensions
@@ -44,7 +24,7 @@ namespace br { namespace cuda { @@ -44,7 +24,7 @@ namespace br { namespace cuda {
44 } 24 }
45 } 25 }
46 26
47 - __global__ void cudapca_project_subtractmean_kernel(float* out, float* mean, int numCols) { 27 + __global__ void subtractMeanKernel(float* out, float* mean, int numCols) {
48 int colInd = blockIdx.x*blockDim.x+threadIdx.x; 28 int colInd = blockIdx.x*blockDim.x+threadIdx.x;
49 29
50 // perform bound checking 30 // perform bound checking
@@ -61,7 +41,7 @@ namespace br { namespace cuda { @@ -61,7 +41,7 @@ namespace br { namespace cuda {
61 float* _cudaSrcPtr; 41 float* _cudaSrcPtr;
62 float* _cudaDstPtr; 42 float* _cudaDstPtr;
63 43
64 - void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems) { 44 + void loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems) {
65 _evRows = evRows; _evCols = evCols; 45 _evRows = evRows; _evCols = evCols;
66 _meanElems = meanElems; 46 _meanElems = meanElems;
67 47
@@ -79,13 +59,7 @@ namespace br { namespace cuda { @@ -79,13 +59,7 @@ namespace br { namespace cuda {
79 CUDA_SAFE_MALLOC(&_cudaDstPtr, _evCols*sizeof(float), &err); 59 CUDA_SAFE_MALLOC(&_cudaDstPtr, _evCols*sizeof(float), &err);
80 } 60 }
81 61
82 - void cudapca_trainwrapper(void* cudaDataPtr, float* dataPtr, int rows, int cols) {  
83 - cudaError_t err;  
84 - CUDA_SAFE_MEMCPY(dataPtr, cudaDataPtr, rows*cols*sizeof(float), cudaMemcpyDeviceToHost, &err);  
85 - CUDA_SAFE_FREE(cudaDataPtr, &err);  
86 - }  
87 -  
88 - void cudapca_projectwrapper(void* src, void** dst) { 62 + void wrapper(void* src, void** dst) {
89 // copy the image to the GPU 63 // copy the image to the GPU
90 //cudaMemcpy(_cudaSrcPtr, src, _meanElems*sizeof(float), cudaMemcpyHostToDevice); 64 //cudaMemcpy(_cudaSrcPtr, src, _meanElems*sizeof(float), cudaMemcpyHostToDevice);
91 cudaError_t err; 65 cudaError_t err;
@@ -94,13 +68,13 @@ namespace br { namespace cuda { @@ -94,13 +68,13 @@ namespace br { namespace cuda {
94 // subtract out the mean of the image (mean is 1xpixels in size) 68 // subtract out the mean of the image (mean is 1xpixels in size)
95 int threadsPerBlock = 64; 69 int threadsPerBlock = 64;
96 int numBlocks = _meanElems / threadsPerBlock + 1; 70 int numBlocks = _meanElems / threadsPerBlock + 1;
97 - cudapca_project_subtractmean_kernel<<<numBlocks, threadsPerBlock>>>((float*)src, cudaMeanPtr, _meanElems); 71 + subtractMeanKernel<<<numBlocks, threadsPerBlock>>>((float*)src, cudaMeanPtr, _meanElems);
98 CUDA_KERNEL_ERR_CHK(&err); 72 CUDA_KERNEL_ERR_CHK(&err);
99 73
100 // perform the multiplication 74 // perform the multiplication
101 threadsPerBlock = 64; 75 threadsPerBlock = 64;
102 numBlocks = _evCols / threadsPerBlock + 1; 76 numBlocks = _evCols / threadsPerBlock + 1;
103 - cudapca_project_multiply_kernel<<<numBlocks, threadsPerBlock>>>((float*)src, (float*)(*dst), cudaEvPtr, _evRows, _evCols); 77 + multiplyKernel<<<numBlocks, threadsPerBlock>>>((float*)src, (float*)(*dst), cudaEvPtr, _evRows, _evCols);
104 CUDA_KERNEL_ERR_CHK(&err); 78 CUDA_KERNEL_ERR_CHK(&err);
105 79
106 CUDA_SAFE_FREE(src, &err); // TODO(colin): figure out why adding this free causes memory corruption... 80 CUDA_SAFE_FREE(src, &err); // TODO(colin): figure out why adding this free causes memory corruption...
@@ -108,4 +82,4 @@ namespace br { namespace cuda { @@ -108,4 +82,4 @@ namespace br { namespace cuda {
108 // copy the data back to the CPU 82 // copy the data back to the CPU
109 //cudaMemcpy(dst, _cudaDstPtr, _evCols*sizeof(float), cudaMemcpyDeviceToHost); 83 //cudaMemcpy(dst, _cudaDstPtr, _evCols*sizeof(float), cudaMemcpyDeviceToHost);
110 } 84 }
111 -}} 85 +}}}