Commit 53e99a0da22e7623fab22069b2a62d0318d31e80
1 parent
39f67cda
fixed some part of the PCA
produced more of better results
Showing
3 changed files
with
14 additions
and
16 deletions
openbr/plugins/cuda/cudapca.cpp
| @@ -143,7 +143,7 @@ private: | @@ -143,7 +143,7 @@ private: | ||
| 143 | cout << "Meanbuffer first value: " << meanBuffer[0] << endl; | 143 | cout << "Meanbuffer first value: " << meanBuffer[0] << endl; |
| 144 | 144 | ||
| 145 | // call the wrapper function | 145 | // call the wrapper function |
| 146 | - cuda::cudapca_loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows(), mean.cols(), keep); | 146 | + cuda::cudapca_loadwrapper(evBuffer, eVecs.rows(), eVecs.cols(), meanBuffer, mean.rows()*mean.cols()); |
| 147 | 147 | ||
| 148 | delete evBuffer; | 148 | delete evBuffer; |
| 149 | delete meanBuffer; | 149 | delete meanBuffer; |
openbr/plugins/cuda/cudapca.cu
| @@ -57,25 +57,23 @@ namespace br { namespace cuda { | @@ -57,25 +57,23 @@ namespace br { namespace cuda { | ||
| 57 | } | 57 | } |
| 58 | 58 | ||
| 59 | float* cudaEvPtr; int _evRows; int _evCols; | 59 | float* cudaEvPtr; int _evRows; int _evCols; |
| 60 | - float* cudaMeanPtr; int _meanRows; int _meanCols; | ||
| 61 | - int _keep; | 60 | + float* cudaMeanPtr; int _meanElems; |
| 62 | 61 | ||
| 63 | void cudapca_initwrapper() { | 62 | void cudapca_initwrapper() { |
| 64 | 63 | ||
| 65 | } | 64 | } |
| 66 | 65 | ||
| 67 | - void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanRows, int meanCols, int keep) { | 66 | + void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems) { |
| 68 | _evRows = evRows; _evCols = evCols; | 67 | _evRows = evRows; _evCols = evCols; |
| 69 | - _meanRows = meanRows; _meanCols = meanCols; | ||
| 70 | - _keep = keep; | 68 | + _meanElems = meanElems; |
| 71 | 69 | ||
| 72 | // copy the eigenvectors to the GPU | 70 | // copy the eigenvectors to the GPU |
| 73 | cudaMalloc(&cudaEvPtr, evRows*evCols*sizeof(float)); | 71 | cudaMalloc(&cudaEvPtr, evRows*evCols*sizeof(float)); |
| 74 | cudaMemcpy(cudaEvPtr, evPtr, evRows*evCols*sizeof(float), cudaMemcpyHostToDevice); | 72 | cudaMemcpy(cudaEvPtr, evPtr, evRows*evCols*sizeof(float), cudaMemcpyHostToDevice); |
| 75 | 73 | ||
| 76 | // copy the mean to the GPU | 74 | // copy the mean to the GPU |
| 77 | - cudaMalloc(&cudaMeanPtr, meanRows*meanCols*sizeof(float)); | ||
| 78 | - cudaMemcpy(cudaMeanPtr, meanPtr, meanRows*meanCols*sizeof(float), cudaMemcpyHostToDevice); | 75 | + cudaMalloc(&cudaMeanPtr, meanElems*sizeof(float)); |
| 76 | + cudaMemcpy(cudaMeanPtr, meanPtr, meanElems*sizeof(float), cudaMemcpyHostToDevice); | ||
| 79 | } | 77 | } |
| 80 | 78 | ||
| 81 | void cudapca_trainwrapper() { | 79 | void cudapca_trainwrapper() { |
| @@ -176,24 +174,24 @@ namespace br { namespace cuda { | @@ -176,24 +174,24 @@ namespace br { namespace cuda { | ||
| 176 | void cudapca_projectwrapper(float* src, float* dst) { | 174 | void cudapca_projectwrapper(float* src, float* dst) { |
| 177 | // copy the image to the GPU | 175 | // copy the image to the GPU |
| 178 | float* cudaSrcPtr; | 176 | float* cudaSrcPtr; |
| 179 | - cudaMalloc(&cudaSrcPtr, _meanRows*_meanCols*sizeof(float)); | ||
| 180 | - cudaMemcpy(cudaSrcPtr, src, _meanRows*_meanCols*sizeof(float), cudaMemcpyHostToDevice); | 177 | + cudaMalloc(&cudaSrcPtr, _meanElems*sizeof(float)); |
| 178 | + cudaMemcpy(cudaSrcPtr, src, _meanElems*sizeof(float), cudaMemcpyHostToDevice); | ||
| 181 | 179 | ||
| 182 | float* cudaDstPtr; | 180 | float* cudaDstPtr; |
| 183 | - cudaMalloc(&cudaDstPtr, _keep*sizeof(float)); | 181 | + cudaMalloc(&cudaDstPtr, _evCols*sizeof(float)); |
| 184 | 182 | ||
| 185 | // subtract out the mean of the image (mean is 1xpixels in size) | 183 | // subtract out the mean of the image (mean is 1xpixels in size) |
| 186 | int threadsPerBlock = 64; | 184 | int threadsPerBlock = 64; |
| 187 | - int numBlocks = _meanRows*_meanCols / threadsPerBlock; | ||
| 188 | - cudapca_project_subtractmean_kernel<<<numBlocks, threadsPerBlock>>>(cudaSrcPtr, cudaMeanPtr, _meanRows*_meanCols); | 185 | + int numBlocks = _meanElems / threadsPerBlock; |
| 186 | + cudapca_project_subtractmean_kernel<<<numBlocks, threadsPerBlock>>>(cudaSrcPtr, cudaMeanPtr, _meanElems); | ||
| 189 | 187 | ||
| 190 | // perform the multiplication | 188 | // perform the multiplication |
| 191 | threadsPerBlock = 64; | 189 | threadsPerBlock = 64; |
| 192 | - numBlocks = _keep / threadsPerBlock; | 190 | + numBlocks = _evCols / threadsPerBlock; |
| 193 | cudapca_project_multiply_kernel<<<numBlocks, threadsPerBlock>>>(cudaSrcPtr, cudaDstPtr, cudaEvPtr, _evRows, _evCols); | 191 | cudapca_project_multiply_kernel<<<numBlocks, threadsPerBlock>>>(cudaSrcPtr, cudaDstPtr, cudaEvPtr, _evRows, _evCols); |
| 194 | 192 | ||
| 195 | // copy the data back to the CPU | 193 | // copy the data back to the CPU |
| 196 | - cudaMemcpy(dst, cudaDstPtr, _keep*sizeof(float), cudaMemcpyDeviceToHost); | 194 | + cudaMemcpy(dst, cudaDstPtr, _evCols*sizeof(float), cudaMemcpyDeviceToHost); |
| 197 | 195 | ||
| 198 | cudaFree(cudaSrcPtr); | 196 | cudaFree(cudaSrcPtr); |
| 199 | cudaFree(cudaDstPtr); | 197 | cudaFree(cudaDstPtr); |
openbr/plugins/cuda/cudapca.hpp
| @@ -7,7 +7,7 @@ using namespace cv::gpu; | @@ -7,7 +7,7 @@ using namespace cv::gpu; | ||
| 7 | namespace br { namespace cuda { | 7 | namespace br { namespace cuda { |
| 8 | void cudapca_initwrapper(); | 8 | void cudapca_initwrapper(); |
| 9 | 9 | ||
| 10 | - void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanRows, int meanCols, int keep); | 10 | + void cudapca_loadwrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems); |
| 11 | void cudapca_trainwrapper(); | 11 | void cudapca_trainwrapper(); |
| 12 | 12 | ||
| 13 | void cudapca_projectwrapper(float* src, float* dst); | 13 | void cudapca_projectwrapper(float* src, float* dst); |