Commit c849ef1bd11d6b3ff78ce90efcc629d7af55c6cc

Authored by Josh Klontz
Committed by GitHub
2 parents 6f8cc085 425433b4

Merge pull request #493 from DepthDeluxe/master

Further performance enhancements of CUDAPCA
Showing 1 changed file with 48 additions and 1 deletions
openbr/plugins/cuda/cudapca.cpp
@@ -297,7 +297,7 @@ protected: @@ -297,7 +297,7 @@ protected:
297 for (int i=0; i<keep; i++) { 297 for (int i=0; i<keep; i++) {
298 int index = i+drop; 298 int index = i+drop;
299 eVals(i) = allEVals(index); 299 eVals(i) = allEVals(index);
300 - eVecs.col(i) = allEVecs.col(index).cast<float>() / allEVecs.col(index).norm(); 300 + eVecs.col(i) = allEVecs.col(index).cast<float>();
301 if (whiten) eVecs.col(i) /= sqrt(eVals(i)); 301 if (whiten) eVecs.col(i) /= sqrt(eVals(i));
302 } 302 }
303 303
@@ -519,6 +519,29 @@ protected: @@ -519,6 +519,29 @@ protected:
519 dimsIn // ldc 519 dimsIn // ldc
520 ); 520 );
521 521
  522 + // normalize result then divide the column by the norm
  523 + for (int i=0; i < instances; i++) {
  524 + // compute the norm
  525 + double norm;
  526 + cublasDnrm2(
  527 + cublasHandle,
  528 + dimsIn,
  529 + cudaMultedAllEVecs+i*dimsIn,
  530 + 1,
  531 + &norm
  532 + );
  533 +
  534 + // now divide by it
  535 + norm = 1.0/norm;
  536 + cublasDscal(
  537 + cublasHandle,
  538 + dimsIn,
  539 + &norm,
  540 + cudaMultedAllEVecs+i*dimsIn,
  541 + 1
  542 + );
  543 + }
  544 +
522 // get the eigenvectors from the multiplied value 545 // get the eigenvectors from the multiplied value
523 cublasGetMatrix( 546 cublasGetMatrix(
524 dimsIn, 547 dimsIn,
@@ -533,6 +556,30 @@ protected: @@ -533,6 +556,30 @@ protected:
533 // free the memory used for multiplication 556 // free the memory used for multiplication
534 CUDA_SAFE_FREE(cudaMultedAllEVecs, &cudaError); 557 CUDA_SAFE_FREE(cudaMultedAllEVecs, &cudaError);
535 } else { 558 } else {
  559 + // normalize result then divide the column by the norm
  560 + for (int i=0; i < instances; i++) {
  561 + // compute the norm
  562 + double norm;
  563 + cublasDnrm2(
  564 + cublasHandle,
  565 + covRows,
  566 + cudaUPtr+i*covRows,
  567 + 1,
  568 + &norm
  569 + );
  570 +
  571 + // now divide by it
  572 + norm = 1.0/norm;
  573 + cublasDscal(
  574 + cublasHandle,
  575 + covRows,
  576 + &norm,
  577 + cudaUPtr+i*covRows,
  578 + 1
  579 + );
  580 + }
  581 +
  582 +
536 // get the eigenvectors straight from the SVD 583 // get the eigenvectors straight from the SVD
537 cublasGetMatrix( 584 cublasGetMatrix(
538 covRows, 585 covRows,