diff --git a/openbr/plugins/cuda/cudadefines.hpp b/openbr/plugins/cuda/cudadefines.hpp index 4f78b00..9872158 100644 --- a/openbr/plugins/cuda/cudadefines.hpp +++ b/openbr/plugins/cuda/cudadefines.hpp @@ -19,6 +19,9 @@ using namespace std; #include +#include + + #define CUDA_SAFE_FREE(cudaPtr, errPtr) \ /*cout << pthread_self() << ": CUDA Free: " << cudaPtr << endl;*/ \ *errPtr = cudaFree(cudaPtr); \ @@ -48,3 +51,53 @@ using namespace std; cout << pthread_self() << ": Kernel Call Err(" << *errPtr << "): " << cudaGetErrorString(*errPtr) << endl; \ throw 0; \ } + +#define CUBLAS_ERROR_CHECK(error) { \ + switch (error) { \ + case CUBLAS_STATUS_SUCCESS: \ + break; \ + case CUBLAS_STATUS_NOT_INITIALIZED: \ + cout << "CUBLAS_STATUS_NOT_INITIALIZED" << endl; \ + break; \ + case CUBLAS_STATUS_ALLOC_FAILED: \ + cout << "CUBLAS_STATUS_ALLOC_FAILED" << endl; \ + break; \ + case CUBLAS_STATUS_INVALID_VALUE: \ + cout << "CUBLAS_STATUS_INVALID_VALUE" << endl;; \ + break; \ + case CUBLAS_STATUS_ARCH_MISMATCH: \ + cout << "CUBLAS_STATUS_ARCH_MISMATCH" << endl;; \ + break; \ + case CUBLAS_STATUS_MAPPING_ERROR: \ + cout << "CUBLAS_STATUS_MAPPING_ERROR" << endl; \ + break; \ + case CUBLAS_STATUS_EXECUTION_FAILED: \ + cout << "CUBLAS_STATUS_EXECUTION_FAILED" << endl; \ + break; \ + case CUBLAS_STATUS_INTERNAL_ERROR: \ + cout << "CUBLAS_STATUS_INTERNAL_ERROR" << endl; \ + break; \ + default: \ + cout << ": " << error << endl; \ + break; \ + } \ +} + +#define CUSOLVER_ERROR_CHECK(error) { \ + switch(error) { \ + case CUSOLVER_STATUS_SUCCESS: \ + break; \ + case CUSOLVER_STATUS_NOT_INITIALIZED: \ + cout << "CUSOLVER_STATUS_NOT_INITIALIZED" << endl; \ + break; \ + case CUSOLVER_STATUS_ALLOC_FAILED: \ + cout << "CUSOLVER_STATUS_ALLOC_FAILED" << endl; \ + break; \ + case CUSOLVER_STATUS_ARCH_MISMATCH: \ + cout << "CUSOLVER_STATUS_ARCH_MISMATCH" << endl; \ + break; \ + default: \ + cout << ": " << error << endl; \ + break; \ + } \ +} diff --git a/openbr/plugins/cuda/cudapca.cpp b/openbr/plugins/cuda/cudapca.cpp index dd51a81..b03f6a8 100644 --- a/openbr/plugins/cuda/cudapca.cpp +++ b/openbr/plugins/cuda/cudapca.cpp @@ -30,11 +30,14 @@ using namespace cv; #include #include -// definitions from the CUDA source file +#include +#include +#include +#include "cudadefines.hpp" + namespace br { namespace cuda { namespace pca { - void initializeWrapper(float* evPtr, int evRows, int evCols, float* meanPtr, int meanElems); - void trainWrapper(void* cudaSrc, float* dst, int rows, int cols); - void wrapper(void* src, void** dst, int imgRows, int imgCols); + void castFloatToDouble(float* a, int inca, double* b, int incb, int numElems); + void castDoubleToFloat(double* a, int inca, float* b, int incb, int numElems); }}} namespace br @@ -61,13 +64,26 @@ protected: BR_PROPERTY(int, drop, 0) BR_PROPERTY(bool, whiten, false) - Eigen::VectorXf mean, eVals; + Eigen::VectorXf mean; + Eigen::VectorXf eVals; Eigen::MatrixXf eVecs; - int originalRows; + cublasHandle_t cublasHandle; + float* cudaMeanPtr; // holds the "keep" long vector + float* cudaEvPtr; // holds all the eigenvectors public: - CUDAPCATransform() : keep(0.95), drop(0), whiten(false) {} + CUDAPCATransform() : keep(0.95), drop(0), whiten(false) { + // try to initialize CUBLAS + cublasStatus_t status; + status = cublasCreate(&cublasHandle); + CUBLAS_ERROR_CHECK(status); + } + + ~CUDAPCATransform() { + // tear down CUBLAS + cublasDestroy(cublasHandle); + } private: double residualReconstructionError(const Template &src) const @@ -83,45 +99,38 @@ private: void train(const TemplateList &cudaTrainingSet) { - // copy the data back from the graphics card so the training can be done on the CPU - const int instances = cudaTrainingSet.size(); // get the number of training set instances - QList