Commit eda3d2e71f28930744c60d5ac9d1b8adf7cef4e6

Authored by DepthDeluxe
1 parent f5f14649

added CUDA matrix multiplication of eigenvectors

openbr/plugins/cuda/cublaspca.cpp
... ... @@ -253,7 +253,7 @@ protected:
253 253 // allocate the eigenvectors
254 254 if (dominantEigenEstimation) {
255 255 allEVals = Eigen::MatrixXd(instances, 1);
256   - allEVecs = Eigen::MatrixXd(instances, instances);
  256 + allEVecs = Eigen::MatrixXd(dimsIn, instances);
257 257 } else {
258 258 allEVals = Eigen::MatrixXd(dimsIn, 1);
259 259 allEVecs = Eigen::MatrixXd(dimsIn, dimsIn);
... ... @@ -261,7 +261,6 @@ protected:
261 261  
262 262 if (keep != 0) {
263 263 performCovarianceSVD(data, allEVals, allEVecs);
264   - if (dominantEigenEstimation) allEVecs = data * allEVecs;
265 264 } else {
266 265 // null case
267 266 mean = Eigen::VectorXf::Zero(dimsIn);
... ... @@ -449,11 +448,7 @@ protected:
449 448 );
450 449 CUSOLVER_ERROR_CHECK(cusolverStatus);
451 450  
452   - // get devInfo for status
453   - cublasGetVector(1, sizeof(svdDevInfo), cudaSvdDevInfoPtr, 1, &svdDevInfo, 1);
454   - cout << "SVD devInfo: " << svdDevInfo << endl;
455   -
456   - // get the results
  451 + // get the eigenvalues and free memory
457 452 cublasGetVector(
458 453 covRows,
459 454 sizeof(cudaSPtr[0]),
... ... @@ -462,27 +457,68 @@ protected:
462 457 allEVals.data(),
463 458 1
464 459 );
465   - cublasGetMatrix(
466   - covRows,
467   - covRows,
468   - sizeof(cudaUPtr[0]),
469   - cudaUPtr,
470   - covRows,
471   - allEVecs.data(),
472   - covRows
473   - );
  460 + CUDA_SAFE_FREE(cudaSvdWork, &cudaError);
  461 + CUDA_SAFE_FREE(cudaSPtr, &cudaError);
  462 + CUDA_SAFE_FREE(cudaVTPtr, &cudaError);
  463 + CUDA_SAFE_FREE(cudaSvdDevInfoPtr, &cudaError);
  464 +
  465 + // if this is a dominant eigen estimation, then perform matrix multiplication again
  466 + // if (dominantEigenEstimation) allEVecs = data * allEVecs;
  467 + if (dominantEigenEstimation) {
  468 + double* cudaMultedAllEVecs;
  469 + CUDA_SAFE_MALLOC(&cudaMultedAllEVecs, dimsIn*instances*sizeof(cudaMultedAllEVecs[0]), &cudaError);
  470 + const double one = 1.0;
  471 + const double zero = 0;
  472 +
  473 + cublasDgemm(
  474 + cublasHandle, // handle
  475 + CUBLAS_OP_N, // transa
  476 + CUBLAS_OP_N, // transb
  477 + dimsIn, // m
  478 + instances, // n
  479 + instances, // k
  480 + &one, // alpha
  481 + cudaDataPtr, // A
  482 + dimsIn, // lda
  483 + cudaUPtr, // B
  484 + instances, // ldb
  485 + &zero, // beta
  486 + cudaMultedAllEVecs, // C
  487 + dimsIn // ldc
  488 + );
  489 +
  490 + // get the eigenvectors from the multiplied value
  491 + cublasGetMatrix(
  492 + dimsIn,
  493 + instances,
  494 + sizeof(cudaMultedAllEVecs[0]),
  495 + cudaMultedAllEVecs,
  496 + dimsIn,
  497 + allEVecs.data(),
  498 + dimsIn
  499 + );
  500 +
  501 + // free the memory used for multiplication
  502 + CUDA_SAFE_FREE(cudaMultedAllEVecs, &cudaError);
  503 + } else {
  504 + // get the eigenvectors straight from the SVD
  505 + cublasGetMatrix(
  506 + covRows,
  507 + covRows,
  508 + sizeof(cudaUPtr[0]),
  509 + cudaUPtr,
  510 + covRows,
  511 + allEVecs.data(),
  512 + covRows
  513 + );
  514 + }
  515 +
474 516  
475 517 // free all the memory
476 518 CUDA_SAFE_FREE(cudaDataPtr, &cudaError);
477 519 CUDA_SAFE_FREE(cudaCovariancePtr, &cudaError);
478   - CUDA_SAFE_FREE(cudaSvdWork, &cudaError);
479 520 CUDA_SAFE_FREE(cudaUPtr, &cudaError);
480   - CUDA_SAFE_FREE(cudaSPtr, &cudaError);
481   - CUDA_SAFE_FREE(cudaVTPtr, &cudaError);
482   - CUDA_SAFE_FREE(cudaSvdDevInfoPtr, &cudaError);
483 521 cusolverDnDestroy(cusolverHandle);
484   -
485   - cout << "Success!" << endl;
486 522 }
487 523 };
488 524  
... ...