diff --git a/openbr/plugins/liblinear.cmake b/openbr/plugins/liblinear.cmake new file mode 100644 index 0000000..3845f08 --- /dev/null +++ b/openbr/plugins/liblinear.cmake @@ -0,0 +1,7 @@ +set(BR_WITH_LIBLINEAR OFF CACHE BOOL "Build with LibLinear") + +if(${BR_WITH_LIBLINEAR}) + find_package(LibLinear REQUIRED) + set(BR_THIRDPARTY_SRC ${BR_THIRDPARTY_SRC} ${LibLinear_SRC}) + set(BR_THIRDPARTY_SRC ${BR_THIRDPARTY_SRC} plugins/liblinear.cpp) +endif() diff --git a/openbr/plugins/liblinear.cpp b/openbr/plugins/liblinear.cpp new file mode 100644 index 0000000..1b9649e --- /dev/null +++ b/openbr/plugins/liblinear.cpp @@ -0,0 +1,206 @@ +#include +#include +#include + +#include "openbr_internal.h" +#include "openbr/core/opencvutils.h" + +#include + +using namespace cv; + +namespace br +{ + +static void storeModel(const model &m, QDataStream &stream) +{ + // Create local file + QTemporaryFile tempFile; + tempFile.open(); + tempFile.close(); + + // Save MLP to local file + save_model(qPrintable(tempFile.fileName()),&m); + + // Copy local file contents to stream + tempFile.open(); + QByteArray data = tempFile.readAll(); + tempFile.close(); + stream << data; +} + +static void loadModel(model &m, QDataStream &stream) +{ + // Copy local file contents from stream + QByteArray data; + stream >> data; + + // Create local file + QTemporaryFile tempFile(QDir::tempPath()+"/model"); + tempFile.open(); + tempFile.write(data); + tempFile.close(); + + // Load MLP from local file + m = *load_model(qPrintable(tempFile.fileName())); +} + +class Linear : public Transform +{ + Q_OBJECT + Q_ENUMS(Solver) + Q_PROPERTY(Solver solver READ get_solver WRITE set_solver RESET reset_solver STORED false) + Q_PROPERTY(float C READ get_C WRITE set_C RESET reset_C STORED false) + Q_PROPERTY(QString inputVariable READ get_inputVariable WRITE set_inputVariable RESET reset_inputVariable STORED false) + Q_PROPERTY(QString outputVariable READ get_outputVariable WRITE set_outputVariable RESET reset_outputVariable STORED false) + Q_PROPERTY(bool returnDFVal READ get_returnDFVal WRITE set_returnDFVal RESET reset_returnDFVal STORED false) + Q_PROPERTY(bool overwriteMat READ get_overwriteMat WRITE set_overwriteMat RESET reset_overwriteMat STORED false) + Q_PROPERTY(bool weight READ get_weight WRITE set_weight RESET reset_weight STORED false) + +public: + enum Solver { L2R_LR = ::L2R_LR, + L2R_L2LOSS_SVC_DUAL = ::L2R_L2LOSS_SVC_DUAL, + L2R_L2LOSS_SVC = ::L2R_L2LOSS_SVC, + L2R_L1LOSS_SVC_DUAL = ::L2R_L1LOSS_SVC_DUAL, + MCSVM_CS = ::MCSVM_CS, + L1R_L2LOSS_SVC = ::L1R_L2LOSS_SVC, + L1R_LR = ::L1R_LR, + L2R_LR_DUAL = ::L2R_LR_DUAL, + L2R_L2LOSS_SVR = ::L2R_L2LOSS_SVR, + L2R_L2LOSS_SVR_DUAL = ::L2R_L2LOSS_SVR_DUAL, + L2R_L1LOSS_SVR_DUAL = ::L2R_L1LOSS_SVR_DUAL }; + +private: + BR_PROPERTY(Solver, solver, L2R_L2LOSS_SVC_DUAL) + BR_PROPERTY(float, C, 1) + BR_PROPERTY(QString, inputVariable, "Label") + BR_PROPERTY(QString, outputVariable, "") + BR_PROPERTY(bool, returnDFVal, false) + BR_PROPERTY(bool, overwriteMat, true) + BR_PROPERTY(bool, weight, false) + + model m; + + void train(const TemplateList &data) + { + Mat samples = OpenCVUtils::toMat(data.data()); + Mat labels = OpenCVUtils::toMat(File::get(data, inputVariable)); + + problem prob; + prob.n = samples.cols; + prob.l = samples.rows; + prob.bias = -1; + prob.y = new double[prob.l]; + + for (int i=0; i(i,0); + + // Allocate enough memory for l feature_nodes pointers + prob.x = new feature_node*[prob.l]; + feature_node *x_space = new feature_node[(prob.n+1)*prob.l]; + + int k = 0; + for (int i=0; i(i,j); + k++; + } + x_space[k++].index = -1; + } + + parameter param; + + // TODO: Support grid search + param.C = C; + param.p = 1; + param.eps = FLT_EPSILON; + param.solver_type = solver; + + if (weight) { + param.nr_weight = 2; + param.weight_label = new int[2]; + param.weight = new double[2]; + param.weight_label[0] = 0; + param.weight_label[1] = 1; + int nonZero = countNonZero(labels); + param.weight[0] = 1; + param.weight[1] = (double)(prob.l-nonZero)/nonZero; + qDebug() << param.weight[0] << param.weight[1]; + } else { + param.nr_weight = 0; + param.weight_label = NULL; + param.weight = NULL; + } + + m = *train_svm(&prob, ¶m); + + delete[] param.weight; + delete[] param.weight_label; + delete[] prob.y; + delete[] prob.x; + delete[] x_space; + } + + void project(const Template &src, Template &dst) const + { + dst = src; + + Mat sample = src.m().reshape(1,1); + feature_node *x_space = new feature_node[sample.cols+1]; + + for (int j=0; j(0,j); + } + x_space[sample.cols].index = -1; + + float prediction; + double prob_estimates[m.nr_class]; + + if (solver == L2R_L2LOSS_SVR || + solver == L2R_L1LOSS_SVR_DUAL || + solver == L2R_L2LOSS_SVR_DUAL || + solver == L2R_L2LOSS_SVC_DUAL || + solver == L2R_L2LOSS_SVC || + solver == L2R_L1LOSS_SVC_DUAL || + solver == MCSVM_CS || + solver == L1R_L2LOSS_SVC) + { + prediction = predict_values(&m,x_space,prob_estimates); + if (returnDFVal) prediction = prob_estimates[0]; + } else if (solver == L2R_LR || + solver == L2R_LR_DUAL || + solver == L1R_LR) + { + prediction = predict_probability(&m,x_space,prob_estimates); + if (returnDFVal) prediction = prob_estimates[0]; + } + + if (overwriteMat) { + dst.m() = Mat(1, 1, CV_32F); + dst.m().at(0, 0) = prediction; + } else { + dst.file.set(outputVariable,prediction); + } + + delete[] x_space; + } + + void store(QDataStream &stream) const + { + storeModel(m,stream); + } + + void load(QDataStream &stream) + { + loadModel(m,stream); + } +}; + +BR_REGISTER(Transform, Linear) + +} // namespace br + +#include "liblinear.moc" diff --git a/openbr/plugins/tree.cpp b/openbr/plugins/tree.cpp index 9cba347..61a4e1f 100644 --- a/openbr/plugins/tree.cpp +++ b/openbr/plugins/tree.cpp @@ -35,7 +35,7 @@ static void loadModel(CvStatModel &model, QDataStream &stream) stream >> data; // Create local file - QTemporaryFile tempFile(QDir::tempPath()+"/model"); + QTemporaryFile tempFile(QDir::tempPath()+"/"+QString::number(rand())); tempFile.open(); tempFile.write(data); tempFile.close(); @@ -53,6 +53,50 @@ static void loadModel(CvStatModel &model, QDataStream &stream) class ForestTransform : public Transform { Q_OBJECT + + void train(const TemplateList &data) + { + trainForest(data); + } + + void project(const Template &src, Template &dst) const + { + dst = src; + + float response; + if (classification && returnConfidence) { + // Fuzzy class label + response = forest.predict_prob(src.m().reshape(1,1)); + } else { + response = forest.predict(src.m().reshape(1,1)); + } + + if (overwriteMat) { + dst.m() = Mat(1, 1, CV_32F); + dst.m().at(0, 0) = response; + } else { + dst.file.set(outputVariable, response); + } + } + + void load(QDataStream &stream) + { + loadModel(forest,stream); + } + + void store(QDataStream &stream) const + { + storeModel(forest,stream); + } + + void init() + { + if (outputVariable.isEmpty()) + outputVariable = inputVariable; + } + +protected: + Q_ENUMS(TerminationCriteria) Q_PROPERTY(bool classification READ get_classification WRITE set_classification RESET reset_classification STORED false) Q_PROPERTY(float splitPercentage READ get_splitPercentage WRITE set_splitPercentage RESET reset_splitPercentage STORED false) Q_PROPERTY(int maxDepth READ get_maxDepth WRITE set_maxDepth RESET reset_maxDepth STORED false) @@ -62,6 +106,15 @@ class ForestTransform : public Transform Q_PROPERTY(bool overwriteMat READ get_overwriteMat WRITE set_overwriteMat RESET reset_overwriteMat STORED false) Q_PROPERTY(QString inputVariable READ get_inputVariable WRITE set_inputVariable RESET reset_inputVariable STORED false) Q_PROPERTY(QString outputVariable READ get_outputVariable WRITE set_outputVariable RESET reset_outputVariable STORED false) + Q_PROPERTY(bool weight READ get_weight WRITE set_weight RESET reset_weight STORED false) + Q_PROPERTY(TerminationCriteria termCrit READ get_termCrit WRITE set_termCrit RESET reset_termCrit STORED false) + +public: + enum TerminationCriteria { Iter = CV_TERMCRIT_ITER, + EPS = CV_TERMCRIT_EPS, + Both = CV_TERMCRIT_EPS | CV_TERMCRIT_ITER}; + +protected: BR_PROPERTY(bool, classification, true) BR_PROPERTY(float, splitPercentage, .01) BR_PROPERTY(int, maxDepth, std::numeric_limits::max()) @@ -71,10 +124,12 @@ class ForestTransform : public Transform BR_PROPERTY(bool, overwriteMat, true) BR_PROPERTY(QString, inputVariable, "Label") BR_PROPERTY(QString, outputVariable, "") + BR_PROPERTY(bool, weight, false) + BR_PROPERTY(TerminationCriteria, termCrit, Iter) CvRTrees forest; - void train(const TemplateList &data) + void trainForest(const TemplateList &data) { Mat samples = OpenCVUtils::toMat(data.data()); Mat labels = OpenCVUtils::toMat(File::get(data, inputVariable)); @@ -88,6 +143,14 @@ class ForestTransform : public Transform types.at(samples.cols, 0) = CV_VAR_NUMERICAL; } + bool usePrior = classification && weight; + float priors[2]; + if (usePrior) { + int nonZero = countNonZero(labels); + priors[0] = 1; + priors[1] = (float)(samples.rows-nonZero)/nonZero; + } + int minSamplesForSplit = data.size()*splitPercentage; forest.train( samples, CV_ROW_SAMPLE, labels, Mat(), Mat(), types, Mat(), CvRTParams(maxDepth, @@ -95,54 +158,133 @@ class ForestTransform : public Transform 0, false, 2, - 0, // priors + usePrior ? priors : 0, false, 0, maxTrees, forestAccuracy, - CV_TERMCRIT_ITER | CV_TERMCRIT_EPS)); + termCrit)); + + if (Globals->verbose) { + qDebug() << "Number of trees:" << forest.get_tree_count(); + + if (classification) { + QTime timer; + timer.start(); + int correctClassification = 0; + float regressionError = 0; + for (int i=0; i(i,0)) { + correctClassification++; + } + regressionError += fabs(prediction-labels.at(i,0)); + } + + qDebug("Time to classify %d samples: %d ms\n \ + Classification Accuracy: %f\n \ + MAE: %f\n \ + Sample dimensionality: %d", + samples.rows,timer.elapsed(),(float)correctClassification/samples.rows,regressionError/samples.rows,samples.cols); + } + } + } +}; + +BR_REGISTER(Transform, ForestTransform) + +/*! + * \ingroup transforms + * \brief Wraps OpenCV's random trees framework to induce features + * \author Scott Klum \cite sklum + * \brief https://lirias.kuleuven.be/bitstream/123456789/316661/1/icdm11-camready.pdf + */ +class ForestInductionTransform : public ForestTransform +{ + Q_OBJECT + Q_PROPERTY(bool useRegressionValue READ get_useRegressionValue WRITE set_useRegressionValue RESET reset_useRegressionValue STORED false) + BR_PROPERTY(bool, useRegressionValue, false) + + int totalSize; + QList< QList > nodes; + + void fillNodes() + { + for (int i=0; i()); + const CvDTreeNode* node = forest.get_tree(i)->get_root(); + + // traverse the tree and save all the nodes in depth-first order + for(;;) + { + CvDTreeNode* parent; + for(;;) + { + if( !node->left ) + break; + node = node->left; + } + + nodes.last().append(node); + + for( parent = node->parent; parent && parent->right == node; + node = parent, parent = parent->parent ) + ; + + if( !parent ) + break; + + node = parent->right; + } + + totalSize += nodes.last().size(); + } + } - qDebug() << "Number of trees:" << forest.get_tree_count(); + void train(const TemplateList &data) + { + trainForest(data); + if (!useRegressionValue) fillNodes(); } void project(const Template &src, Template &dst) const { dst = src; - float response; - if (classification && returnConfidence) { - // Fuzzy class label - response = forest.predict_prob(src.m().reshape(1,1)); - } else { - response = forest.predict(src.m().reshape(1,1)); - } + Mat responses; - if (overwriteMat) { - dst.m() = Mat(1, 1, CV_32F); - dst.m().at(0, 0) = response; + if (useRegressionValue) { + responses = Mat::zeros(forest.get_tree_count(),1,CV_32F); + for (int i=0; i(i,0) = forest.get_tree(i)->predict(src.m().reshape(1,1))->value; + } } else { - dst.file.set(outputVariable, response); + responses = Mat::zeros(totalSize,1,CV_32F); + int offset = 0; + for (int i=0; ipredict(src.m().reshape(1,1))); + responses.at(offset+index,0) = 1; + offset += nodes[i].size(); + } } + + dst.m() = responses; } void load(QDataStream &stream) { loadModel(forest,stream); + if (!useRegressionValue) fillNodes(); } void store(QDataStream &stream) const { storeModel(forest,stream); } - - void init() - { - if (outputVariable.isEmpty()) - outputVariable = inputVariable; - } }; -BR_REGISTER(Transform, ForestTransform) +BR_REGISTER(Transform, ForestInductionTransform) /*! * \ingroup transforms diff --git a/share/openbr/cmake/FindLibLinear.cmake b/share/openbr/cmake/FindLibLinear.cmake new file mode 100644 index 0000000..795f4dc --- /dev/null +++ b/share/openbr/cmake/FindLibLinear.cmake @@ -0,0 +1,13 @@ +find_path(LibLinear_DIR linear.h ${CMAKE_SOURCE_DIR}/3rdparty/*) + +message(${LibLinear_DIR}) +mark_as_advanced(LibLinear_DIR) +include_directories(${LibLinear_DIR}) +include_directories(${LibLinear_DIR}/blas) + +set(LibLinear_SRC ${LibLinear_DIR}/linear.cpp + ${LibLinear_DIR}/tron.cpp + ${LibLinear_DIR}/blas/daxpy.c + ${LibLinear_DIR}/blas/ddot.c + ${LibLinear_DIR}/blas/dnrm2.c + ${LibLinear_DIR}/blas/dscal.c)