Commit eda3d2e71f28930744c60d5ac9d1b8adf7cef4e6

Authored by DepthDeluxe
1 parent f5f14649

added CUDA matrix multiplication of eigenvectors

openbr/plugins/cuda/cublaspca.cpp
@@ -253,7 +253,7 @@ protected: @@ -253,7 +253,7 @@ protected:
253 // allocate the eigenvectors 253 // allocate the eigenvectors
254 if (dominantEigenEstimation) { 254 if (dominantEigenEstimation) {
255 allEVals = Eigen::MatrixXd(instances, 1); 255 allEVals = Eigen::MatrixXd(instances, 1);
256 - allEVecs = Eigen::MatrixXd(instances, instances); 256 + allEVecs = Eigen::MatrixXd(dimsIn, instances);
257 } else { 257 } else {
258 allEVals = Eigen::MatrixXd(dimsIn, 1); 258 allEVals = Eigen::MatrixXd(dimsIn, 1);
259 allEVecs = Eigen::MatrixXd(dimsIn, dimsIn); 259 allEVecs = Eigen::MatrixXd(dimsIn, dimsIn);
@@ -261,7 +261,6 @@ protected: @@ -261,7 +261,6 @@ protected:
261 261
262 if (keep != 0) { 262 if (keep != 0) {
263 performCovarianceSVD(data, allEVals, allEVecs); 263 performCovarianceSVD(data, allEVals, allEVecs);
264 - if (dominantEigenEstimation) allEVecs = data * allEVecs;  
265 } else { 264 } else {
266 // null case 265 // null case
267 mean = Eigen::VectorXf::Zero(dimsIn); 266 mean = Eigen::VectorXf::Zero(dimsIn);
@@ -449,11 +448,7 @@ protected: @@ -449,11 +448,7 @@ protected:
449 ); 448 );
450 CUSOLVER_ERROR_CHECK(cusolverStatus); 449 CUSOLVER_ERROR_CHECK(cusolverStatus);
451 450
452 - // get devInfo for status  
453 - cublasGetVector(1, sizeof(svdDevInfo), cudaSvdDevInfoPtr, 1, &svdDevInfo, 1);  
454 - cout << "SVD devInfo: " << svdDevInfo << endl;  
455 -  
456 - // get the results 451 + // get the eigenvalues and free memory
457 cublasGetVector( 452 cublasGetVector(
458 covRows, 453 covRows,
459 sizeof(cudaSPtr[0]), 454 sizeof(cudaSPtr[0]),
@@ -462,27 +457,68 @@ protected: @@ -462,27 +457,68 @@ protected:
462 allEVals.data(), 457 allEVals.data(),
463 1 458 1
464 ); 459 );
465 - cublasGetMatrix(  
466 - covRows,  
467 - covRows,  
468 - sizeof(cudaUPtr[0]),  
469 - cudaUPtr,  
470 - covRows,  
471 - allEVecs.data(),  
472 - covRows  
473 - ); 460 + CUDA_SAFE_FREE(cudaSvdWork, &cudaError);
  461 + CUDA_SAFE_FREE(cudaSPtr, &cudaError);
  462 + CUDA_SAFE_FREE(cudaVTPtr, &cudaError);
  463 + CUDA_SAFE_FREE(cudaSvdDevInfoPtr, &cudaError);
  464 +
  465 + // if this is a dominant eigen estimation, then perform matrix multiplication again
  466 + // if (dominantEigenEstimation) allEVecs = data * allEVecs;
  467 + if (dominantEigenEstimation) {
  468 + double* cudaMultedAllEVecs;
  469 + CUDA_SAFE_MALLOC(&cudaMultedAllEVecs, dimsIn*instances*sizeof(cudaMultedAllEVecs[0]), &cudaError);
  470 + const double one = 1.0;
  471 + const double zero = 0;
  472 +
  473 + cublasDgemm(
  474 + cublasHandle, // handle
  475 + CUBLAS_OP_N, // transa
  476 + CUBLAS_OP_N, // transb
  477 + dimsIn, // m
  478 + instances, // n
  479 + instances, // k
  480 + &one, // alpha
  481 + cudaDataPtr, // A
  482 + dimsIn, // lda
  483 + cudaUPtr, // B
  484 + instances, // ldb
  485 + &zero, // beta
  486 + cudaMultedAllEVecs, // C
  487 + dimsIn // ldc
  488 + );
  489 +
  490 + // get the eigenvectors from the multiplied value
  491 + cublasGetMatrix(
  492 + dimsIn,
  493 + instances,
  494 + sizeof(cudaMultedAllEVecs[0]),
  495 + cudaMultedAllEVecs,
  496 + dimsIn,
  497 + allEVecs.data(),
  498 + dimsIn
  499 + );
  500 +
  501 + // free the memory used for multiplication
  502 + CUDA_SAFE_FREE(cudaMultedAllEVecs, &cudaError);
  503 + } else {
  504 + // get the eigenvectors straight from the SVD
  505 + cublasGetMatrix(
  506 + covRows,
  507 + covRows,
  508 + sizeof(cudaUPtr[0]),
  509 + cudaUPtr,
  510 + covRows,
  511 + allEVecs.data(),
  512 + covRows
  513 + );
  514 + }
  515 +
474 516
475 // free all the memory 517 // free all the memory
476 CUDA_SAFE_FREE(cudaDataPtr, &cudaError); 518 CUDA_SAFE_FREE(cudaDataPtr, &cudaError);
477 CUDA_SAFE_FREE(cudaCovariancePtr, &cudaError); 519 CUDA_SAFE_FREE(cudaCovariancePtr, &cudaError);
478 - CUDA_SAFE_FREE(cudaSvdWork, &cudaError);  
479 CUDA_SAFE_FREE(cudaUPtr, &cudaError); 520 CUDA_SAFE_FREE(cudaUPtr, &cudaError);
480 - CUDA_SAFE_FREE(cudaSPtr, &cudaError);  
481 - CUDA_SAFE_FREE(cudaVTPtr, &cudaError);  
482 - CUDA_SAFE_FREE(cudaSvdDevInfoPtr, &cudaError);  
483 cusolverDnDestroy(cusolverHandle); 521 cusolverDnDestroy(cusolverHandle);
484 -  
485 - cout << "Success!" << endl;  
486 } 522 }
487 }; 523 };
488 524