Commit 53e99a0da22e7623fab22069b2a62d0318d31e80

Authored by DepthDeluxe
1 parent 39f67cda

fixed some part of the PCA

produced more of better results
openbr/plugins/cuda/cudapca.cpp
... ... @@ -143,7 +143,7 @@ private:
143 143 cout << "Meanbuffer first value: " << meanBuffer[0] << endl;
144 144  
145 145 // call the wrapper function
146   - cuda::cudapca_loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows(), mean.cols(), keep);
  146 + cuda::cudapca_loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows()*mean.cols());
147 147  
148 148 delete evBuffer;
149 149 delete meanBuffer;
... ...
openbr/plugins/cuda/cudapca.cu
... ... @@ -57,25 +57,23 @@ namespace br { namespace cuda {
57 57 }
58 58  
59 59 float* cudaEvPtr; int _evRows; int _evCols;
60   - float* cudaMeanPtr; int _meanRows; int _meanCols;
61   - int _keep;
  60 + float* cudaMeanPtr; int _meanElems;
62 61  
63 62 void cudapca_initwrapper() {
64 63  
65 64 }
66 65  
67   - void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanRows, int meanCols, int keep) {
  66 + void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems) {
68 67 _evRows = evRows; _evCols = evCols;
69   - _meanRows = meanRows; _meanCols = meanCols;
70   - _keep = keep;
  68 + _meanElems = meanElems;
71 69  
72 70 // copy the eigenvectors to the GPU
73 71 cudaMalloc(&cudaEvPtr, evRows*evCols*sizeof(float));
74 72 cudaMemcpy(cudaEvPtr, evPtr, evRows*evCols*sizeof(float), cudaMemcpyHostToDevice);
75 73  
76 74 // copy the mean to the GPU
77   - cudaMalloc(&cudaMeanPtr, meanRows*meanCols*sizeof(float));
78   - cudaMemcpy(cudaMeanPtr, meanPtr, meanRows*meanCols*sizeof(float), cudaMemcpyHostToDevice);
  75 + cudaMalloc(&cudaMeanPtr, meanElems*sizeof(float));
  76 + cudaMemcpy(cudaMeanPtr, meanPtr, meanElems*sizeof(float), cudaMemcpyHostToDevice);
79 77 }
80 78  
81 79 void cudapca_trainwrapper() {
... ... @@ -176,24 +174,24 @@ namespace br { namespace cuda {
176 174 void cudapca_projectwrapper(float* src, float* dst) {
177 175 // copy the image to the GPU
178 176 float* cudaSrcPtr;
179   - cudaMalloc(&cudaSrcPtr, _meanRows*_meanCols*sizeof(float));
180   - cudaMemcpy(cudaSrcPtr, src, _meanRows*_meanCols*sizeof(float), cudaMemcpyHostToDevice);
  177 + cudaMalloc(&cudaSrcPtr, _meanElems*sizeof(float));
  178 + cudaMemcpy(cudaSrcPtr, src, _meanElems*sizeof(float), cudaMemcpyHostToDevice);
181 179  
182 180 float* cudaDstPtr;
183   - cudaMalloc(&cudaDstPtr, _keep*sizeof(float));
  181 + cudaMalloc(&cudaDstPtr, _evCols*sizeof(float));
184 182  
185 183 // subtract out the mean of the image (mean is 1xpixels in size)
186 184 int threadsPerBlock = 64;
187   - int numBlocks = _meanRows*_meanCols / threadsPerBlock;
188   - cudapca_project_subtractmean_kernel<<<numBlocks, threadsPerBlock>>>(cudaSrcPtr, cudaMeanPtr, _meanRows*_meanCols);
  185 + int numBlocks = _meanElems / threadsPerBlock;
  186 + cudapca_project_subtractmean_kernel<<<numBlocks, threadsPerBlock>>>(cudaSrcPtr, cudaMeanPtr, _meanElems);
189 187  
190 188 // perform the multiplication
191 189 threadsPerBlock = 64;
192   - numBlocks = _keep / threadsPerBlock;
  190 + numBlocks = _evCols / threadsPerBlock;
193 191 cudapca_project_multiply_kernel<<<numBlocks, threadsPerBlock>>>(cudaSrcPtr, cudaDstPtr, cudaEvPtr, _evRows, _evCols);
194 192  
195 193 // copy the data back to the CPU
196   - cudaMemcpy(dst, cudaDstPtr, _keep*sizeof(float), cudaMemcpyDeviceToHost);
  194 + cudaMemcpy(dst, cudaDstPtr, _evCols*sizeof(float), cudaMemcpyDeviceToHost);
197 195  
198 196 cudaFree(cudaSrcPtr);
199 197 cudaFree(cudaDstPtr);
... ...
openbr/plugins/cuda/cudapca.hpp
... ... @@ -7,7 +7,7 @@ using namespace cv::gpu;
7 7 namespace br { namespace cuda {
8 8 void cudapca_initwrapper();
9 9  
10   - void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanRows, int meanCols, int keep);
  10 + void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems);
11 11 void cudapca_trainwrapper();
12 12  
13 13 void cudapca_projectwrapper(float* src, float* dst);
... ...