Commit 53e99a0da22e7623fab22069b2a62d0318d31e80

Authored by DepthDeluxe
1 parent 39f67cda

fixed some part of the PCA

produced more of better results
openbr/plugins/cuda/cudapca.cpp
@@ -143,7 +143,7 @@ private: @@ -143,7 +143,7 @@ private:
143 cout << "Meanbuffer first value: " << meanBuffer[0] << endl; 143 cout << "Meanbuffer first value: " << meanBuffer[0] << endl;
144 144
145 // call the wrapper function 145 // call the wrapper function
146 - cuda::cudapca_loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows(), mean.cols(), keep); 146 + cuda::cudapca_loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows()*mean.cols());
147 147
148 delete evBuffer; 148 delete evBuffer;
149 delete meanBuffer; 149 delete meanBuffer;
openbr/plugins/cuda/cudapca.cu
@@ -57,25 +57,23 @@ namespace br { namespace cuda { @@ -57,25 +57,23 @@ namespace br { namespace cuda {
57 } 57 }
58 58
59 float* cudaEvPtr; int _evRows; int _evCols; 59 float* cudaEvPtr; int _evRows; int _evCols;
60 - float* cudaMeanPtr; int _meanRows; int _meanCols;  
61 - int _keep; 60 + float* cudaMeanPtr; int _meanElems;
62 61
63 void cudapca_initwrapper() { 62 void cudapca_initwrapper() {
64 63
65 } 64 }
66 65
67 - void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanRows, int meanCols, int keep) { 66 + void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems) {
68 _evRows = evRows; _evCols = evCols; 67 _evRows = evRows; _evCols = evCols;
69 - _meanRows = meanRows; _meanCols = meanCols;  
70 - _keep = keep; 68 + _meanElems = meanElems;
71 69
72 // copy the eigenvectors to the GPU 70 // copy the eigenvectors to the GPU
73 cudaMalloc(&cudaEvPtr, evRows*evCols*sizeof(float)); 71 cudaMalloc(&cudaEvPtr, evRows*evCols*sizeof(float));
74 cudaMemcpy(cudaEvPtr, evPtr, evRows*evCols*sizeof(float), cudaMemcpyHostToDevice); 72 cudaMemcpy(cudaEvPtr, evPtr, evRows*evCols*sizeof(float), cudaMemcpyHostToDevice);
75 73
76 // copy the mean to the GPU 74 // copy the mean to the GPU
77 - cudaMalloc(&cudaMeanPtr, meanRows*meanCols*sizeof(float));  
78 - cudaMemcpy(cudaMeanPtr, meanPtr, meanRows*meanCols*sizeof(float), cudaMemcpyHostToDevice); 75 + cudaMalloc(&cudaMeanPtr, meanElems*sizeof(float));
  76 + cudaMemcpy(cudaMeanPtr, meanPtr, meanElems*sizeof(float), cudaMemcpyHostToDevice);
79 } 77 }
80 78
81 void cudapca_trainwrapper() { 79 void cudapca_trainwrapper() {
@@ -176,24 +174,24 @@ namespace br { namespace cuda { @@ -176,24 +174,24 @@ namespace br { namespace cuda {
176 void cudapca_projectwrapper(float* src, float* dst) { 174 void cudapca_projectwrapper(float* src, float* dst) {
177 // copy the image to the GPU 175 // copy the image to the GPU
178 float* cudaSrcPtr; 176 float* cudaSrcPtr;
179 - cudaMalloc(&cudaSrcPtr, _meanRows*_meanCols*sizeof(float));  
180 - cudaMemcpy(cudaSrcPtr, src, _meanRows*_meanCols*sizeof(float), cudaMemcpyHostToDevice); 177 + cudaMalloc(&cudaSrcPtr, _meanElems*sizeof(float));
  178 + cudaMemcpy(cudaSrcPtr, src, _meanElems*sizeof(float), cudaMemcpyHostToDevice);
181 179
182 float* cudaDstPtr; 180 float* cudaDstPtr;
183 - cudaMalloc(&cudaDstPtr, _keep*sizeof(float)); 181 + cudaMalloc(&cudaDstPtr, _evCols*sizeof(float));
184 182
185 // subtract out the mean of the image (mean is 1xpixels in size) 183 // subtract out the mean of the image (mean is 1xpixels in size)
186 int threadsPerBlock = 64; 184 int threadsPerBlock = 64;
187 - int numBlocks = _meanRows*_meanCols / threadsPerBlock;  
188 - cudapca_project_subtractmean_kernel<<<numBlocks, threadsPerBlock>>>(cudaSrcPtr, cudaMeanPtr, _meanRows*_meanCols); 185 + int numBlocks = _meanElems / threadsPerBlock;
  186 + cudapca_project_subtractmean_kernel<<<numBlocks, threadsPerBlock>>>(cudaSrcPtr, cudaMeanPtr, _meanElems);
189 187
190 // perform the multiplication 188 // perform the multiplication
191 threadsPerBlock = 64; 189 threadsPerBlock = 64;
192 - numBlocks = _keep / threadsPerBlock; 190 + numBlocks = _evCols / threadsPerBlock;
193 cudapca_project_multiply_kernel<<<numBlocks, threadsPerBlock>>>(cudaSrcPtr, cudaDstPtr, cudaEvPtr, _evRows, _evCols); 191 cudapca_project_multiply_kernel<<<numBlocks, threadsPerBlock>>>(cudaSrcPtr, cudaDstPtr, cudaEvPtr, _evRows, _evCols);
194 192
195 // copy the data back to the CPU 193 // copy the data back to the CPU
196 - cudaMemcpy(dst, cudaDstPtr, _keep*sizeof(float), cudaMemcpyDeviceToHost); 194 + cudaMemcpy(dst, cudaDstPtr, _evCols*sizeof(float), cudaMemcpyDeviceToHost);
197 195
198 cudaFree(cudaSrcPtr); 196 cudaFree(cudaSrcPtr);
199 cudaFree(cudaDstPtr); 197 cudaFree(cudaDstPtr);
openbr/plugins/cuda/cudapca.hpp
@@ -7,7 +7,7 @@ using namespace cv::gpu; @@ -7,7 +7,7 @@ using namespace cv::gpu;
7 namespace br { namespace cuda { 7 namespace br { namespace cuda {
8 void cudapca_initwrapper(); 8 void cudapca_initwrapper();
9 9
10 - void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanRows, int meanCols, int keep); 10 + void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems);
11 void cudapca_trainwrapper(); 11 void cudapca_trainwrapper();
12 12
13 void cudapca_projectwrapper(float* src, float* dst); 13 void cudapca_projectwrapper(float* src, float* dst);