Commit 233d59aefd4f351d8d9d4b14085fbdb0250e9cef

Authored by DepthDeluxe
1 parent b6adde60

pre-chopping

Showing 1 changed file with 392 additions and 96 deletions
openbr/plugins/cuda/cublaspca.cpp
@@ -32,6 +32,7 @@ using namespace cv; @@ -32,6 +32,7 @@ using namespace cv;
32 32
33 #include <cuda_runtime.h> 33 #include <cuda_runtime.h>
34 #include <cublas_v2.h> 34 #include <cublas_v2.h>
  35 +#include <cusolverDn.h>
35 #include "cudadefines.hpp" 36 #include "cudadefines.hpp"
36 37
37 namespace br 38 namespace br
@@ -58,7 +59,7 @@ protected: @@ -58,7 +59,7 @@ protected:
58 BR_PROPERTY(int, drop, 0) 59 BR_PROPERTY(int, drop, 0)
59 BR_PROPERTY(bool, whiten, false) 60 BR_PROPERTY(bool, whiten, false)
60 61
61 - Eigen::VectorXf mean, eVals; 62 + Eigen::VectorXf mean;
62 Eigen::MatrixXf eVecs; 63 Eigen::MatrixXf eVecs;
63 64
64 int originalRows; 65 int originalRows;
@@ -94,41 +95,40 @@ private: @@ -94,41 +95,40 @@ private:
94 95
95 void train(const TemplateList &cudaTrainingSet) 96 void train(const TemplateList &cudaTrainingSet)
96 { 97 {
97 - // copy the data back from the graphics card so the training can be done on the CPU  
98 - const int instances = cudaTrainingSet.size(); // get the number of training set instances  
99 - QList<Template> trainingQlist;  
100 - for(int i=0; i<instances; i++) {  
101 - Template currentTemplate = cudaTrainingSet[i];  
102 - void* const* srcDataPtr = currentTemplate.m().ptr<void*>();  
103 - void* cudaMemPtr = srcDataPtr[0];  
104 - int rows = *((int*)srcDataPtr[1]);  
105 - int cols = *((int*)srcDataPtr[2]);  
106 - int type = *((int*)srcDataPtr[3]);  
107 -  
108 - if (type != CV_32FC1) {  
109 - qFatal("Requires single channel 32-bit floating point matrices.");  
110 - } 98 + cublasStatus_t cublasStatus;
  99 + cudaError_t cudaError;
111 100
112 - // copy GPU mat data back to the CPU so we can do the training on the CPU  
113 - Mat mat = Mat(rows, cols, type);  
114 - cudaError_t err;  
115 - CUDA_SAFE_MEMCPY(mat.ptr<float>(), cudaMemPtr, rows*cols*sizeof(float), cudaMemcpyDeviceToHost, &err);  
116 - trainingQlist.append(Template(mat));  
117 - } 101 + // put all the data into a single matrix to perform PCA
  102 + const int instances = cudaTrainingSet.size();
  103 + const int instanceSize = *(int*)cudaTrainingSet.first().m().ptr<void*>()[1]
  104 + * *(int*)cudaTrainingSet.first().m().ptr<void*>()[2];
118 105
119 - // assemble a TemplateList from the list of data  
120 - TemplateList trainingSet(trainingQlist); 106 + // get all the vectors from memory
  107 + Eigen::MatrixXf data(instanceSize, instances);
  108 + for (int i=0; i < instances; i++) {
  109 + float* currentCudaMatPtr = (float*)cudaTrainingSet[i].m().ptr<void*>()[0];
121 110
122 - originalRows = trainingSet.first().m().rows; // get number of rows of first image  
123 - int dimsIn = trainingSet.first().m().rows * trainingSet.first().m().cols; // get the size of the first image 111 + cublasGetVector(
  112 + instanceSize,
  113 + sizeof(float),
  114 + currentCudaMatPtr,
  115 + 1,
  116 + data.data()+i*instanceSize,
  117 + 1
  118 + );
  119 + }
124 120
125 - // Map into 64-bit Eigen matrix - perform the column major conversion  
126 - Eigen::MatrixXd data(dimsIn, instances); // create a mat  
127 - for (int i=0; i<instances; i++) {  
128 - data.col(i) = Eigen::Map<const Eigen::MatrixXf>(trainingSet[i].m().ptr<float>(), dimsIn, 1).cast<double>();  
129 - } 121 + Eigen::MatrixXd dataDouble(instanceSize, instances);
  122 + for (int i=0; i < instanceSize*instances; i++) {
  123 + dataDouble.data()[i] = (double)data.data()[i];
  124 + }
  125 +
  126 + // XXX: remove me
  127 + Eigen::MatrixXd test(3,3);
  128 + test << 1,2,3,4,5,6,7,8,9;
  129 + trainCore(test);
130 130
131 - trainCore(data); 131 + // trainCore(dataDouble);
132 } 132 }
133 133
134 void project(const Template &src, Template &dst) const 134 void project(const Template &src, Template &dst) const
@@ -164,8 +164,6 @@ private: @@ -164,8 +164,6 @@ private:
164 cudaMemset(*dstGpuMatPtrPtr, 0, dstRows*sizeof(float)); 164 cudaMemset(*dstGpuMatPtrPtr, 0, dstRows*sizeof(float));
165 165
166 { 166 {
167 - //cout << "Ax + y" << endl;  
168 - // subtract out the average  
169 float negativeOne = -1.0f; 167 float negativeOne = -1.0f;
170 status = cublasSaxpy( 168 status = cublasSaxpy(
171 cublasHandle, // handle 169 cublasHandle, // handle
@@ -180,8 +178,6 @@ private: @@ -180,8 +178,6 @@ private:
180 } 178 }
181 179
182 { 180 {
183 - //cout << "Matrix-Vector multiplication" << endl;  
184 -  
185 float one = 1.0f; 181 float one = 1.0f;
186 float zero = 0.0f; 182 float zero = 0.0f;
187 status = cublasSgemv( 183 status = cublasSgemv(
@@ -207,12 +203,12 @@ private: @@ -207,12 +203,12 @@ private:
207 203
208 void store(QDataStream &stream) const 204 void store(QDataStream &stream) const
209 { 205 {
210 - stream << keep << drop << whiten << originalRows << mean << eVals << eVecs; 206 + stream << keep << drop << whiten << originalRows << mean << eVecs;
211 } 207 }
212 208
213 void load(QDataStream &stream) 209 void load(QDataStream &stream)
214 { 210 {
215 - stream >> keep >> drop >> whiten >> originalRows >> mean >> eVals >> eVecs; 211 + stream >> keep >> drop >> whiten >> originalRows >> mean >> eVecs;
216 212
217 //cout << "Starting load process" << endl; 213 //cout << "Starting load process" << endl;
218 214
@@ -248,73 +244,373 @@ private: @@ -248,73 +244,373 @@ private:
248 } 244 }
249 245
250 protected: 246 protected:
251 - void trainCore(Eigen::MatrixXd data)  
252 - {  
253 - int dimsIn = data.rows();  
254 - int instances = data.cols();  
255 - const bool dominantEigenEstimation = (dimsIn > instances);  
256 -  
257 - Eigen::MatrixXd allEVals, allEVecs;  
258 - if (keep != 0) {  
259 - // Compute and remove mean  
260 - mean = Eigen::VectorXf(dimsIn);  
261 - for (int i=0; i<dimsIn; i++) mean(i) = data.row(i).sum() / (float)instances;  
262 - for (int i=0; i<dimsIn; i++) data.row(i).array() -= mean(i);  
263 -  
264 - // Calculate covariance matrix  
265 - Eigen::MatrixXd cov;  
266 - if (dominantEigenEstimation) cov = data.transpose() * data / (instances-1.0);  
267 - else cov = data * data.transpose() / (instances-1.0);  
268 -  
269 - // Compute eigendecomposition. Returns eigenvectors/eigenvalues in increasing order by eigenvalue.  
270 - Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> eSolver(cov);  
271 - allEVals = eSolver.eigenvalues();  
272 - allEVecs = eSolver.eigenvectors();  
273 - if (dominantEigenEstimation) allEVecs = data * allEVecs;  
274 - } else {  
275 - // Null case  
276 - mean = Eigen::VectorXf::Zero(dimsIn);  
277 - allEVecs = Eigen::MatrixXd::Identity(dimsIn, dimsIn);  
278 - allEVals = Eigen::VectorXd::Ones(dimsIn); 247 + void trainCore(Eigen::MatrixXd data) {
  248 + cudaError_t cudaError;
  249 +
  250 + // utility variables
  251 + const double one = 1.0;
  252 + const double negativeOne = -1.0;
  253 + const double zero = 0.0;
  254 +
  255 + static int numTimesThrough = 0;
  256 + numTimesThrough++;
  257 +
  258 + int dimsIn = data.rows(); // the number of rows of the covariance matrix
  259 + int instances = data.cols(); // the number of columns of the covariance matrix
  260 + const bool dominantEigenEstimation = (dimsIn > instances);
  261 +
  262 + // Compute and remove mean
  263 + //mean = Eigen::VectorXf(dimsIn);
  264 + //for (int i=0; i<dimsIn; i++) mean(i) = data.row(i).sum() / (float)instances;
  265 + //for (int i=0; i<dimsIn; i++) data.row(i).array() -= mean(i);
  266 +
  267 + // allocate and place data in GPU memory
  268 + double* cudaDataPtr;
  269 + CUDA_SAFE_MALLOC(&cudaDataPtr, data.rows()*data.cols()*sizeof(cudaDataPtr[0]), &cudaError);
  270 + cublasSetMatrix(
  271 + data.rows(),
  272 + data.cols(),
  273 + sizeof(cudaDataPtr[0]),
  274 + data.data(),
  275 + data.rows(),
  276 + cudaDataPtr,
  277 + data.rows()
  278 + );
  279 +
  280 + // allocate space for the covariance matrix
  281 + double* cudaCovariancePtr;
  282 + int covRows = data.cols();
  283 + CUDA_SAFE_MALLOC(&cudaCovariancePtr, covRows*covRows*sizeof(cudaCovariancePtr[0]), &cudaError);
  284 +
  285 + // compute the covariance matrix
  286 + // cov = data.transpose() * data / (instances-1.0);
  287 + {
  288 + double scaleFactor = 1.0/(instances-1.0);
  289 + cublasDgemm(
  290 + cublasHandle,
  291 + CUBLAS_OP_T,
  292 + CUBLAS_OP_N,
  293 + data.cols(),
  294 + data.cols(),
  295 + data.rows(),
  296 + &scaleFactor,
  297 + cudaDataPtr,
  298 + data.rows(),
  299 + cudaDataPtr,
  300 + data.rows(),
  301 + &zero,
  302 + cudaCovariancePtr,
  303 + covRows
  304 + );
  305 + }
  306 +
  307 + // XXX: download the covariace matrix for debugging
  308 + Eigen::MatrixXd cov(covRows, covRows);
  309 + cublasGetMatrix(
  310 + covRows,
  311 + covRows,
  312 + sizeof(cov.data()[0]),
  313 + cudaCovariancePtr,
  314 + covRows,
  315 + cov.data(),
  316 + covRows
  317 + );
  318 +
  319 + // initialize cuSolver for the next part
  320 + cusolverDnHandle_t cusolverHandle;
  321 + cusolverDnCreate(&cusolverHandle);
  322 +
  323 + double* cudaEigenvaluesPtr;
  324 + {
  325 + double* cudaDiagonalPtr;
  326 + CUDA_SAFE_MALLOC(&cudaDiagonalPtr, covRows*sizeof(double), &cudaError);
  327 + double* cudaOffdiagonalPtr;
  328 + CUDA_SAFE_MALLOC(&cudaOffdiagonalPtr, covRows*sizeof(double), &cudaError);
  329 + double* cudaTauqPtr;
  330 + CUDA_SAFE_MALLOC(&cudaTauqPtr, covRows*sizeof(double), &cudaError);
  331 + double* cudaTaupPtr;
  332 + CUDA_SAFE_MALLOC(&cudaTaupPtr, covRows*sizeof(double), &cudaError);
  333 +
  334 + // calculate lwork
  335 + int lwork;
  336 + cusolverDnSgebrd_bufferSize(
  337 + cusolverHandle,
  338 + covRows,
  339 + covRows,
  340 + &lwork
  341 + );
  342 + double* cudaWorkBufferPtr;
  343 + CUDA_SAFE_MALLOC(&cudaWorkBufferPtr, lwork, &cudaError);
  344 +
  345 + int* cudaDevInfoPtr;
  346 + CUDA_SAFE_MALLOC(&cudaDevInfoPtr, sizeof(int), &cudaError);
  347 +
  348 + // call the eigenvalue decomposer
  349 + cusolverDnDgebrd(
  350 + cusolverHandle,
  351 + covRows,
  352 + covRows,
  353 + cudaCovariancePtr,
  354 + covRows,
  355 + cudaDiagonalPtr,
  356 + cudaOffdiagonalPtr,
  357 + cudaTauqPtr,
  358 + cudaTaupPtr,
  359 + cudaWorkBufferPtr,
  360 + lwork,
  361 + cudaDevInfoPtr
  362 + );
  363 +
  364 + // the eigenvalues are on the diagonal
  365 + cudaEigenvaluesPtr = cudaOffdiagonalPtr;
  366 +
  367 + /*
  368 + // initialize the result buffers
  369 + double* cudaOffdiagonalPtr;
  370 + CUDA_SAFE_MALLOC(&cudaEigenvaluesPtr, covRows*sizeof(cudaEigenvaluesPtr[0]), &cudaError);
  371 + CUDA_SAFE_MALLOC(&cudaOffdiagonalPtr, covRows*sizeof(cudaOffdiagonalPtr[0]), &cudaError);
  372 +
  373 + // initialize the tauq and taup buffers
  374 + double* cudaTauqPtr;
  375 + double* cudaTaupPtr;
  376 + CUDA_SAFE_MALLOC(&cudaTauqPtr, covRows*sizeof(cudaTauqPtr[0]), &cudaError);
  377 + CUDA_SAFE_MALLOC(&cudaTaupPtr, covRows*sizeof(cudaTaupPtr[0]), &cudaError);
  378 +
  379 + // build the work buffer
  380 + double* cudaWorkPtr;
  381 + int workBufferSize;
  382 + cusolverDnSgesvd_bufferSize(cusolverHandle, covRows, covRows, &workBufferSize);
  383 + CUDA_SAFE_MALLOC(&cudaWorkPtr, workBufferSize, &cudaError);
  384 +
  385 + int* cudaDevInfoPtr;
  386 + CUDA_SAFE_MALLOC(&cudaDevInfoPtr, sizeof(*cudaDevInfoPtr), &cudaError);
  387 +
  388 + // now pull the eigenvalues out
  389 + cusolverStatus_t cusolverStatus;
  390 + cusolverStatus = cusolverDnDgebrd(
  391 + cusolverHandle, // handle
  392 + covRows, // rows of Matrix A
  393 + covRows, // cols of Matrix A
  394 + cudaCovariancePtr, // CUDA pointer to matrix A
  395 + covRows, // leading dimension of A
  396 + cudaEigenvaluesPtr, // diagonal elements of bidiagonal matrix
  397 + cudaOffdiagonalPtr, // off-diagonal elements of matrix
  398 + cudaTauqPtr,
  399 + cudaTaupPtr,
  400 + cudaWorkPtr,
  401 + workBufferSize,
  402 + cudaDevInfoPtr
  403 + );
  404 +
  405 + // print out the devInfo
  406 + int devInfo;
  407 + cudaMemcpy(&devInfo, cudaDevInfoPtr, sizeof(devInfo), cudaMemcpyDeviceToHost);
  408 +
  409 + // now we have the eigenvalues
  410 +
  411 + // XXX: the off diagonal values
  412 + Eigen::VectorXd offDiagonal(covRows);
  413 + cublasGetVector(
  414 + covRows,
  415 + sizeof(offDiagonal.data()[0]),
  416 + cudaOffdiagonalPtr,
  417 + 1,
  418 + offDiagonal.data(),
  419 + 1
  420 + );
  421 +
  422 + // clean up
  423 + CUDA_SAFE_FREE(cudaOffdiagonalPtr, &cudaError);
  424 + CUDA_SAFE_FREE(cudaTauqPtr, &cudaError);
  425 + CUDA_SAFE_FREE(cudaTaupPtr, &cudaError);
  426 + CUDA_SAFE_FREE(cudaWorkPtr, &cudaError);
  427 + CUDA_SAFE_FREE(cudaDevInfoPtr, &cudaError);
  428 + */
  429 + }
  430 +
  431 + // copy the eigenvalues back to the CPU
  432 + Eigen::VectorXd allEVals(covRows);
  433 + cublasGetVector(
  434 + covRows,
  435 + sizeof(allEVals.data()[0]),
  436 + cudaEigenvaluesPtr,
  437 + 1,
  438 + allEVals.data(),
  439 + 1
  440 + );
  441 + CUDA_SAFE_FREE(cudaEigenvaluesPtr, &cudaError);
  442 +
  443 + // now find the eigenvectors
  444 + Eigen::MatrixXd allEVecs(covRows, covRows);
  445 + double* cudaCoefficientMatrix;
  446 + CUDA_SAFE_MALLOC(&cudaCoefficientMatrix, covRows*covRows*sizeof(cudaCoefficientMatrix[0]), &cudaError);
  447 + for (int i=0; i < covRows; i++) {
  448 + // load cov into matrix
  449 + cublasSetMatrix(
  450 + covRows,
  451 + covRows,
  452 + sizeof(cov.data()[0]),
  453 + cov.data(),
  454 + covRows,
  455 + cudaCoefficientMatrix,
  456 + covRows
  457 + );
  458 +
  459 + // subtract out the Eigenvalue from the center of the matrix
  460 + // first copy the eigenvalue into a single buffer
  461 + double* cudaEigenvalueSubtractBuffer;
  462 + CUDA_SAFE_MALLOC(&cudaEigenvalueSubtractBuffer, covRows*sizeof(cudaEigenvalueSubtractBuffer[0]), &cudaError);
  463 + for(int j = 0; j < covRows; j++) {
  464 + cublasSetVector(
  465 + 1,
  466 + sizeof(cudaEigenvalueSubtractBuffer[0]),
  467 + &allEVals.data()[i],
  468 + 1,
  469 + &cudaEigenvalueSubtractBuffer[j],
  470 + 1
  471 + );
279 } 472 }
280 473
281 - if (keep <= 0) {  
282 - keep = dimsIn - drop;  
283 - } else if (keep < 1) {  
284 - // Keep eigenvectors that retain a certain energy percentage.  
285 - const double totalEnergy = allEVals.sum();  
286 - if (totalEnergy == 0) {  
287 - keep = 0;  
288 - } else {  
289 - double currentEnergy = 0;  
290 - int i=0;  
291 - while ((currentEnergy / totalEnergy < keep) && (i < allEVals.rows())) {  
292 - currentEnergy += allEVals(allEVals.rows()-(i+1));  
293 - i++;  
294 - }  
295 - keep = i - drop;  
296 - }  
297 - } else {  
298 - if (keep + drop > allEVals.rows()) {  
299 - qWarning("Insufficient samples, needed at least %d but only got %d.", (int)keep + drop, (int)allEVals.rows());  
300 - keep = allEVals.rows() - drop;  
301 - } 474 + // perform the subtraction
  475 + cublasDaxpy(
  476 + cublasHandle,
  477 + covRows,
  478 + &negativeOne,
  479 + cudaEigenvalueSubtractBuffer,
  480 + 1,
  481 + cudaCoefficientMatrix,
  482 + covRows+1 // move across the diagonal
  483 + );
  484 +
  485 + // perform the Cholesky factorization of the coefficient matrix
  486 + double* cudaWorkBufferPtr;
  487 + int lwork;
  488 + cusolverDnDpotrf_bufferSize(
  489 + cusolverHandle,
  490 + CUBLAS_FILL_MODE_UPPER,
  491 + covRows,
  492 + cudaCoefficientMatrix,
  493 + covRows,
  494 + &lwork
  495 + );
  496 + CUDA_SAFE_MALLOC(&cudaWorkBufferPtr, lwork, &cudaError);
  497 +
  498 + int* cudaDevInfoPtr;
  499 + CUDA_SAFE_MALLOC(&cudaDevInfoPtr, sizeof(*cudaDevInfoPtr), &cudaError);
  500 +
  501 + cusolverDnDpotrf(
  502 + cusolverHandle,
  503 + CUBLAS_FILL_MODE_UPPER,
  504 + covRows,
  505 + cudaCoefficientMatrix,
  506 + covRows,
  507 + cudaWorkBufferPtr,
  508 + lwork,
  509 + cudaDevInfoPtr
  510 + );
  511 + int devInfo;
  512 + CUDA_SAFE_MEMCPY(&devInfo, cudaDevInfoPtr, sizeof(devInfo), cudaMemcpyDeviceToHost, &cudaError);
  513 + cout << "DevInfo: " << devInfo << endl;
  514 +
  515 +
  516 + // XXX: remove after dbugging
  517 + Eigen::MatrixXd anotherMatrix(covRows, covRows);
  518 + cublasGetMatrix(
  519 + covRows,
  520 + covRows,
  521 + sizeof(anotherMatrix.data()[0]),
  522 + cudaCoefficientMatrix,
  523 + 1,
  524 + anotherMatrix.data(),
  525 + 1
  526 + );
  527 +
  528 + // the first element of B is equal to the covariance matrix, the rest are zeroes
  529 + double* cudaBVector;
  530 + CUDA_SAFE_MALLOC(&cudaBVector, covRows*sizeof(cudaBVector[0]), &cudaError);
  531 + // load the top element to be the same as first of coefficient
  532 + // this results in the first variable being zero and assigning
  533 + // values for the rest of the matrix
  534 + cublasDcopy(
  535 + cublasHandle,
  536 + 1,
  537 + cudaCoefficientMatrix,
  538 + 1,
  539 + cudaBVector,
  540 + 1
  541 + );
  542 + // load the rest 0's
  543 + for (int j = 1; j < covRows; j++) {
  544 + cublasSetVector(
  545 + 1,
  546 + sizeof(cudaBVector[0]),
  547 + &zero,
  548 + 1,
  549 + &cudaBVector[j],
  550 + 1
  551 + );
302 } 552 }
303 553
304 - // Keep highest energy vectors  
305 - eVals = Eigen::VectorXf((int)keep, 1);  
306 - eVecs = Eigen::MatrixXf(allEVecs.rows(), (int)keep);  
307 - for (int i=0; i<keep; i++) {  
308 - int index = allEVals.rows()-(i+drop+1);  
309 - eVals(i) = allEVals(index);  
310 - eVecs.col(i) = allEVecs.col(index).cast<float>() / allEVecs.col(index).norm();  
311 - if (whiten) eVecs.col(i) /= sqrt(eVals(i)); 554 + // solve the system of linear equations
  555 + cusolverDnDpotrs(
  556 + cusolverHandle,
  557 + CUBLAS_FILL_MODE_LOWER,
  558 + covRows,
  559 + 1, // we are solving a single system of equations
  560 + cudaCoefficientMatrix,
  561 + covRows,
  562 + cudaBVector,
  563 + covRows,
  564 + cudaDevInfoPtr
  565 + );
  566 + CUDA_SAFE_MEMCPY(&devInfo, cudaDevInfoPtr, sizeof(devInfo), cudaMemcpyDeviceToHost, &cudaError);
  567 + cout << "DevInfo: " << devInfo << endl;
  568 +
  569 + // should have the solution
  570 + Eigen::VectorXd solutionVector(covRows);
  571 + cublasGetVector(
  572 + covRows,
  573 + sizeof(solutionVector.data()[0]),
  574 + solutionVector.data(),
  575 + 1,
  576 + cudaBVector,
  577 + 1
  578 + );
  579 +
  580 + cout << "solution: [";
  581 + for (int i=0; i < covRows; i++) {
  582 + cout << solutionVector.data()[i] << ", ";
312 } 583 }
  584 + cout << "];" << endl;
313 585
314 - // Debug output  
315 - if (Globals->verbose) qDebug() << "PCA Training:\n\tDimsIn =" << dimsIn << "\n\tKeep =" << keep; 586 + }
  587 +
  588 + // Keep eigenvectors that retain a certain energy percentage.
  589 + const float totalEnergy = allEVals.sum();
  590 + if (totalEnergy == 0) {
  591 + keep = 0;
  592 + } else {
  593 + float currentEnergy = 0;
  594 + int i=0;
  595 + while ((currentEnergy / totalEnergy < keep) && (i < allEVals.rows())) {
  596 + currentEnergy += allEVals(allEVals.rows()-(i+1));
  597 + i++;
  598 + }
  599 + keep = i - drop;
  600 + }
  601 +
  602 + // Keep highest energy vectors
  603 + Eigen::VectorXf eVals = Eigen::VectorXf((int)keep, 1);
  604 + for (int i=0; i<keep; i++) {
  605 + int index = allEVals.rows()-(i+drop+1);
  606 + eVals(i) = allEVals(index);
  607 + }
  608 +
  609 + cusolverDnDestroy(cusolverHandle);
  610 + cout << "DONE" << endl;
316 } 611 }
317 612
  613 +
318 void writeEigenVectors(const Eigen::MatrixXd &allEVals, const Eigen::MatrixXd &allEVecs) const 614 void writeEigenVectors(const Eigen::MatrixXd &allEVals, const Eigen::MatrixXd &allEVecs) const
319 { 615 {
320 const int originalCols = mean.rows() / originalRows; 616 const int originalCols = mean.rows() / originalRows;