Commit 53e99a0da22e7623fab22069b2a62d0318d31e80
1 parent
39f67cda
fixed some part of the PCA
produced more of better results
Showing
3 changed files
with
14 additions
and
16 deletions
openbr/plugins/cuda/cudapca.cpp
| ... | ... | @@ -143,7 +143,7 @@ private: |
| 143 | 143 | cout << "Meanbuffer first value: " << meanBuffer[0] << endl; |
| 144 | 144 | |
| 145 | 145 | // call the wrapper function |
| 146 | - cuda::cudapca_loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows(), mean.cols(), keep); | |
| 146 | + cuda::cudapca_loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows()*mean.cols()); | |
| 147 | 147 | |
| 148 | 148 | delete evBuffer; |
| 149 | 149 | delete meanBuffer; | ... | ... |
openbr/plugins/cuda/cudapca.cu
| ... | ... | @@ -57,25 +57,23 @@ namespace br { namespace cuda { |
| 57 | 57 | } |
| 58 | 58 | |
| 59 | 59 | float* cudaEvPtr; int _evRows; int _evCols; |
| 60 | - float* cudaMeanPtr; int _meanRows; int _meanCols; | |
| 61 | - int _keep; | |
| 60 | + float* cudaMeanPtr; int _meanElems; | |
| 62 | 61 | |
| 63 | 62 | void cudapca_initwrapper() { |
| 64 | 63 | |
| 65 | 64 | } |
| 66 | 65 | |
| 67 | - void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanRows, int meanCols, int keep) { | |
| 66 | + void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems) { | |
| 68 | 67 | _evRows = evRows; _evCols = evCols; |
| 69 | - _meanRows = meanRows; _meanCols = meanCols; | |
| 70 | - _keep = keep; | |
| 68 | + _meanElems = meanElems; | |
| 71 | 69 | |
| 72 | 70 | // copy the eigenvectors to the GPU |
| 73 | 71 | cudaMalloc(&cudaEvPtr, evRows*evCols*sizeof(float)); |
| 74 | 72 | cudaMemcpy(cudaEvPtr, evPtr, evRows*evCols*sizeof(float), cudaMemcpyHostToDevice); |
| 75 | 73 | |
| 76 | 74 | // copy the mean to the GPU |
| 77 | - cudaMalloc(&cudaMeanPtr, meanRows*meanCols*sizeof(float)); | |
| 78 | - cudaMemcpy(cudaMeanPtr, meanPtr, meanRows*meanCols*sizeof(float), cudaMemcpyHostToDevice); | |
| 75 | + cudaMalloc(&cudaMeanPtr, meanElems*sizeof(float)); | |
| 76 | + cudaMemcpy(cudaMeanPtr, meanPtr, meanElems*sizeof(float), cudaMemcpyHostToDevice); | |
| 79 | 77 | } |
| 80 | 78 | |
| 81 | 79 | void cudapca_trainwrapper() { |
| ... | ... | @@ -176,24 +174,24 @@ namespace br { namespace cuda { |
| 176 | 174 | void cudapca_projectwrapper(float* src, float* dst) { |
| 177 | 175 | // copy the image to the GPU |
| 178 | 176 | float* cudaSrcPtr; |
| 179 | - cudaMalloc(&cudaSrcPtr, _meanRows*_meanCols*sizeof(float)); | |
| 180 | - cudaMemcpy(cudaSrcPtr, src, _meanRows*_meanCols*sizeof(float), cudaMemcpyHostToDevice); | |
| 177 | + cudaMalloc(&cudaSrcPtr, _meanElems*sizeof(float)); | |
| 178 | + cudaMemcpy(cudaSrcPtr, src, _meanElems*sizeof(float), cudaMemcpyHostToDevice); | |
| 181 | 179 | |
| 182 | 180 | float* cudaDstPtr; |
| 183 | - cudaMalloc(&cudaDstPtr, _keep*sizeof(float)); | |
| 181 | + cudaMalloc(&cudaDstPtr, _evCols*sizeof(float)); | |
| 184 | 182 | |
| 185 | 183 | // subtract out the mean of the image (mean is 1xpixels in size) |
| 186 | 184 | int threadsPerBlock = 64; |
| 187 | - int numBlocks = _meanRows*_meanCols / threadsPerBlock; | |
| 188 | - cudapca_project_subtractmean_kernel<<<numBlocks, threadsPerBlock>>>(cudaSrcPtr, cudaMeanPtr, _meanRows*_meanCols); | |
| 185 | + int numBlocks = _meanElems / threadsPerBlock; | |
| 186 | + cudapca_project_subtractmean_kernel<<<numBlocks, threadsPerBlock>>>(cudaSrcPtr, cudaMeanPtr, _meanElems); | |
| 189 | 187 | |
| 190 | 188 | // perform the multiplication |
| 191 | 189 | threadsPerBlock = 64; |
| 192 | - numBlocks = _keep / threadsPerBlock; | |
| 190 | + numBlocks = _evCols / threadsPerBlock; | |
| 193 | 191 | cudapca_project_multiply_kernel<<<numBlocks, threadsPerBlock>>>(cudaSrcPtr, cudaDstPtr, cudaEvPtr, _evRows, _evCols); |
| 194 | 192 | |
| 195 | 193 | // copy the data back to the CPU |
| 196 | - cudaMemcpy(dst, cudaDstPtr, _keep*sizeof(float), cudaMemcpyDeviceToHost); | |
| 194 | + cudaMemcpy(dst, cudaDstPtr, _evCols*sizeof(float), cudaMemcpyDeviceToHost); | |
| 197 | 195 | |
| 198 | 196 | cudaFree(cudaSrcPtr); |
| 199 | 197 | cudaFree(cudaDstPtr); | ... | ... |
openbr/plugins/cuda/cudapca.hpp
| ... | ... | @@ -7,7 +7,7 @@ using namespace cv::gpu; |
| 7 | 7 | namespace br { namespace cuda { |
| 8 | 8 | void cudapca_initwrapper(); |
| 9 | 9 | |
| 10 | - void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanRows, int meanCols, int keep); | |
| 10 | + void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems); | |
| 11 | 11 | void cudapca_trainwrapper(); |
| 12 | 12 | |
| 13 | 13 | void cudapca_projectwrapper(float* src, float* dst); | ... | ... |