Commit c849ef1bd11d6b3ff78ce90efcc629d7af55c6cc
Committed by
GitHub
Merge pull request #493 from DepthDeluxe/master
Further performance enhancements of CUDAPCA
Showing
1 changed file
with
48 additions
and
1 deletions
openbr/plugins/cuda/cudapca.cpp
| ... | ... | @@ -297,7 +297,7 @@ protected: |
| 297 | 297 | for (int i=0; i<keep; i++) { |
| 298 | 298 | int index = i+drop; |
| 299 | 299 | eVals(i) = allEVals(index); |
| 300 | - eVecs.col(i) = allEVecs.col(index).cast<float>() / allEVecs.col(index).norm(); | |
| 300 | + eVecs.col(i) = allEVecs.col(index).cast<float>(); | |
| 301 | 301 | if (whiten) eVecs.col(i) /= sqrt(eVals(i)); |
| 302 | 302 | } |
| 303 | 303 | |
| ... | ... | @@ -519,6 +519,29 @@ protected: |
| 519 | 519 | dimsIn // ldc |
| 520 | 520 | ); |
| 521 | 521 | |
| 522 | + // normalize result then divide the column by the norm | |
| 523 | + for (int i=0; i < instances; i++) { | |
| 524 | + // compute the norm | |
| 525 | + double norm; | |
| 526 | + cublasDnrm2( | |
| 527 | + cublasHandle, | |
| 528 | + dimsIn, | |
| 529 | + cudaMultedAllEVecs+i*dimsIn, | |
| 530 | + 1, | |
| 531 | + &norm | |
| 532 | + ); | |
| 533 | + | |
| 534 | + // now divide by it | |
| 535 | + norm = 1.0/norm; | |
| 536 | + cublasDscal( | |
| 537 | + cublasHandle, | |
| 538 | + dimsIn, | |
| 539 | + &norm, | |
| 540 | + cudaMultedAllEVecs+i*dimsIn, | |
| 541 | + 1 | |
| 542 | + ); | |
| 543 | + } | |
| 544 | + | |
| 522 | 545 | // get the eigenvectors from the multiplied value |
| 523 | 546 | cublasGetMatrix( |
| 524 | 547 | dimsIn, |
| ... | ... | @@ -533,6 +556,30 @@ protected: |
| 533 | 556 | // free the memory used for multiplication |
| 534 | 557 | CUDA_SAFE_FREE(cudaMultedAllEVecs, &cudaError); |
| 535 | 558 | } else { |
| 559 | + // normalize result then divide the column by the norm | |
| 560 | + for (int i=0; i < instances; i++) { | |
| 561 | + // compute the norm | |
| 562 | + double norm; | |
| 563 | + cublasDnrm2( | |
| 564 | + cublasHandle, | |
| 565 | + covRows, | |
| 566 | + cudaUPtr+i*covRows, | |
| 567 | + 1, | |
| 568 | + &norm | |
| 569 | + ); | |
| 570 | + | |
| 571 | + // now divide by it | |
| 572 | + norm = 1.0/norm; | |
| 573 | + cublasDscal( | |
| 574 | + cublasHandle, | |
| 575 | + covRows, | |
| 576 | + &norm, | |
| 577 | + cudaUPtr+i*covRows, | |
| 578 | + 1 | |
| 579 | + ); | |
| 580 | + } | |
| 581 | + | |
| 582 | + | |
| 536 | 583 | // get the eigenvectors straight from the SVD |
| 537 | 584 | cublasGetMatrix( |
| 538 | 585 | covRows, | ... | ... |