Commit eda3d2e71f28930744c60d5ac9d1b8adf7cef4e6
1 parent
f5f14649
added CUDA matrix multiplication of eigenvectors
Showing
1 changed file
with
58 additions
and
22 deletions
openbr/plugins/cuda/cublaspca.cpp
| @@ -253,7 +253,7 @@ protected: | @@ -253,7 +253,7 @@ protected: | ||
| 253 | // allocate the eigenvectors | 253 | // allocate the eigenvectors |
| 254 | if (dominantEigenEstimation) { | 254 | if (dominantEigenEstimation) { |
| 255 | allEVals = Eigen::MatrixXd(instances, 1); | 255 | allEVals = Eigen::MatrixXd(instances, 1); |
| 256 | - allEVecs = Eigen::MatrixXd(instances, instances); | 256 | + allEVecs = Eigen::MatrixXd(dimsIn, instances); |
| 257 | } else { | 257 | } else { |
| 258 | allEVals = Eigen::MatrixXd(dimsIn, 1); | 258 | allEVals = Eigen::MatrixXd(dimsIn, 1); |
| 259 | allEVecs = Eigen::MatrixXd(dimsIn, dimsIn); | 259 | allEVecs = Eigen::MatrixXd(dimsIn, dimsIn); |
| @@ -261,7 +261,6 @@ protected: | @@ -261,7 +261,6 @@ protected: | ||
| 261 | 261 | ||
| 262 | if (keep != 0) { | 262 | if (keep != 0) { |
| 263 | performCovarianceSVD(data, allEVals, allEVecs); | 263 | performCovarianceSVD(data, allEVals, allEVecs); |
| 264 | - if (dominantEigenEstimation) allEVecs = data * allEVecs; | ||
| 265 | } else { | 264 | } else { |
| 266 | // null case | 265 | // null case |
| 267 | mean = Eigen::VectorXf::Zero(dimsIn); | 266 | mean = Eigen::VectorXf::Zero(dimsIn); |
| @@ -449,11 +448,7 @@ protected: | @@ -449,11 +448,7 @@ protected: | ||
| 449 | ); | 448 | ); |
| 450 | CUSOLVER_ERROR_CHECK(cusolverStatus); | 449 | CUSOLVER_ERROR_CHECK(cusolverStatus); |
| 451 | 450 | ||
| 452 | - // get devInfo for status | ||
| 453 | - cublasGetVector(1, sizeof(svdDevInfo), cudaSvdDevInfoPtr, 1, &svdDevInfo, 1); | ||
| 454 | - cout << "SVD devInfo: " << svdDevInfo << endl; | ||
| 455 | - | ||
| 456 | - // get the results | 451 | + // get the eigenvalues and free memory |
| 457 | cublasGetVector( | 452 | cublasGetVector( |
| 458 | covRows, | 453 | covRows, |
| 459 | sizeof(cudaSPtr[0]), | 454 | sizeof(cudaSPtr[0]), |
| @@ -462,27 +457,68 @@ protected: | @@ -462,27 +457,68 @@ protected: | ||
| 462 | allEVals.data(), | 457 | allEVals.data(), |
| 463 | 1 | 458 | 1 |
| 464 | ); | 459 | ); |
| 465 | - cublasGetMatrix( | ||
| 466 | - covRows, | ||
| 467 | - covRows, | ||
| 468 | - sizeof(cudaUPtr[0]), | ||
| 469 | - cudaUPtr, | ||
| 470 | - covRows, | ||
| 471 | - allEVecs.data(), | ||
| 472 | - covRows | ||
| 473 | - ); | 460 | + CUDA_SAFE_FREE(cudaSvdWork, &cudaError); |
| 461 | + CUDA_SAFE_FREE(cudaSPtr, &cudaError); | ||
| 462 | + CUDA_SAFE_FREE(cudaVTPtr, &cudaError); | ||
| 463 | + CUDA_SAFE_FREE(cudaSvdDevInfoPtr, &cudaError); | ||
| 464 | + | ||
| 465 | + // if this is a dominant eigen estimation, then perform matrix multiplication again | ||
| 466 | + // if (dominantEigenEstimation) allEVecs = data * allEVecs; | ||
| 467 | + if (dominantEigenEstimation) { | ||
| 468 | + double* cudaMultedAllEVecs; | ||
| 469 | + CUDA_SAFE_MALLOC(&cudaMultedAllEVecs, dimsIn*instances*sizeof(cudaMultedAllEVecs[0]), &cudaError); | ||
| 470 | + const double one = 1.0; | ||
| 471 | + const double zero = 0; | ||
| 472 | + | ||
| 473 | + cublasDgemm( | ||
| 474 | + cublasHandle, // handle | ||
| 475 | + CUBLAS_OP_N, // transa | ||
| 476 | + CUBLAS_OP_N, // transb | ||
| 477 | + dimsIn, // m | ||
| 478 | + instances, // n | ||
| 479 | + instances, // k | ||
| 480 | + &one, // alpha | ||
| 481 | + cudaDataPtr, // A | ||
| 482 | + dimsIn, // lda | ||
| 483 | + cudaUPtr, // B | ||
| 484 | + instances, // ldb | ||
| 485 | + &zero, // beta | ||
| 486 | + cudaMultedAllEVecs, // C | ||
| 487 | + dimsIn // ldc | ||
| 488 | + ); | ||
| 489 | + | ||
| 490 | + // get the eigenvectors from the multiplied value | ||
| 491 | + cublasGetMatrix( | ||
| 492 | + dimsIn, | ||
| 493 | + instances, | ||
| 494 | + sizeof(cudaMultedAllEVecs[0]), | ||
| 495 | + cudaMultedAllEVecs, | ||
| 496 | + dimsIn, | ||
| 497 | + allEVecs.data(), | ||
| 498 | + dimsIn | ||
| 499 | + ); | ||
| 500 | + | ||
| 501 | + // free the memory used for multiplication | ||
| 502 | + CUDA_SAFE_FREE(cudaMultedAllEVecs, &cudaError); | ||
| 503 | + } else { | ||
| 504 | + // get the eigenvectors straight from the SVD | ||
| 505 | + cublasGetMatrix( | ||
| 506 | + covRows, | ||
| 507 | + covRows, | ||
| 508 | + sizeof(cudaUPtr[0]), | ||
| 509 | + cudaUPtr, | ||
| 510 | + covRows, | ||
| 511 | + allEVecs.data(), | ||
| 512 | + covRows | ||
| 513 | + ); | ||
| 514 | + } | ||
| 515 | + | ||
| 474 | 516 | ||
| 475 | // free all the memory | 517 | // free all the memory |
| 476 | CUDA_SAFE_FREE(cudaDataPtr, &cudaError); | 518 | CUDA_SAFE_FREE(cudaDataPtr, &cudaError); |
| 477 | CUDA_SAFE_FREE(cudaCovariancePtr, &cudaError); | 519 | CUDA_SAFE_FREE(cudaCovariancePtr, &cudaError); |
| 478 | - CUDA_SAFE_FREE(cudaSvdWork, &cudaError); | ||
| 479 | CUDA_SAFE_FREE(cudaUPtr, &cudaError); | 520 | CUDA_SAFE_FREE(cudaUPtr, &cudaError); |
| 480 | - CUDA_SAFE_FREE(cudaSPtr, &cudaError); | ||
| 481 | - CUDA_SAFE_FREE(cudaVTPtr, &cudaError); | ||
| 482 | - CUDA_SAFE_FREE(cudaSvdDevInfoPtr, &cudaError); | ||
| 483 | cusolverDnDestroy(cusolverHandle); | 521 | cusolverDnDestroy(cusolverHandle); |
| 484 | - | ||
| 485 | - cout << "Success!" << endl; | ||
| 486 | } | 522 | } |
| 487 | }; | 523 | }; |
| 488 | 524 |