diff --git a/openbr/plugins/classification/caffe.cpp b/openbr/plugins/classification/caffe.cpp index f51fe1d..01ad1bb 100644 --- a/openbr/plugins/classification/caffe.cpp +++ b/openbr/plugins/classification/caffe.cpp @@ -52,21 +52,21 @@ private: }; /*! - * \brief A transform that wraps the Caffe deep learning library. This transform expects the input to a given Caffe model to be a MemoryDataLayer. - * The output of the Caffe network is treated as a feature vector and is stored in dst. Batch processing is possible. For a given batch size set in - * the memory data layer, src is expected to have an equal number of mats. Dst will always have the same size (number of mats) as src and the ordering - * will be preserved, so dst[1] is the output of src[1] after it passes through the neural net. - * \author Jordan Cheney \cite jcheney + * \brief The base transform for wrapping the Caffe deep learning library. This transform expects the input to a given Caffe model to be a MemoryDataLayer. + * The output of the forward pass of the Caffe network is stored in dst as a list of matrices, the size of which is equal to the batch_size of the network. + * Children of this transform should process dst to acheieve specifc use cases. + * \author Jordan Cheney \cite JordanCheney * \br_property QString model path to prototxt model file * \br_property QString weights path to caffemodel file * \br_property int gpuDevice ID of GPU to use. gpuDevice < 0 runs on the CPU only. * \br_link Caffe Integration Tutorial ../tutorials.md#caffe * \br_link Caffe website http://caffe.berkeleyvision.org */ -class CaffeFVTransform : public UntrainableMetaTransform +class CaffeBaseTransform : public UntrainableMetaTransform { Q_OBJECT +public: Q_PROPERTY(QString model READ get_model WRITE set_model RESET reset_model STORED false) Q_PROPERTY(QString weights READ get_weights WRITE set_weights RESET reset_weights STORED false) Q_PROPERTY(int gpuDevice READ get_gpuDevice WRITE set_gpuDevice RESET reset_gpuDevice STORED false) @@ -76,6 +76,7 @@ class CaffeFVTransform : public UntrainableMetaTransform Resource caffeResource; +protected: void init() { caffeResource.setResourceMaker(new CaffeResourceMaker(model, weights, gpuDevice)); @@ -98,8 +99,6 @@ class CaffeFVTransform : public UntrainableMetaTransform if (src.size() != dataLayer->batch_size()) qFatal("src should have %d (batch size) mats. It has %d mats.", dataLayer->batch_size(), src.size()); - dst.file = src.file; - dataLayer->AddMatVector(src.toVector().toStdVector(), std::vector(src.size(), 0)); Blob *output = net->ForwardPrefilled()[1]; // index 0 is the labels from the data layer (in this case the 0 array we passed in above). @@ -112,77 +111,53 @@ class CaffeFVTransform : public UntrainableMetaTransform } }; -BR_REGISTER(Transform, CaffeFVTransform) - /*! - * \brief A transform that wraps the Caffe deep learning library. This transform expects the input to a given Caffe model to be a MemoryDataLayer. - * The output of the Caffe network is treated as a classifier with one node per class. Dst is set equal to src and a list of labels and confidences - * is stored in the metadata using the tags "Labels" and "Confidences". The size of the lists is equal to the batch size of the network. If the batch - * size is 1 the lists are converted to an integer and floating point value respectively and are stored in metadata using "Label" and "Confidence" instead. - * \author Jordan Cheney \cite jcheney - * \br_property QString model path to prototxt model file - * \br_property QString weights path to caffemodel file - * \br_property int gpuDevice ID of GPU to use. gpuDevice < 0 runs on the CPU only. - * \br_link Caffe Integration Tutorial ../tutorials.md#caffe - * \br_link Caffe website http://caffe.berkeleyvision.org + * \brief This transform treats the output of the network as a feature vector and appends it unchanged to dst. Dst will have + * length equal to the batch size of the network. + * \author Jordan Cheney \cite JordanCheney */ -class CaffeClassifierTransform : public UntrainableMetaTransform +class CaffeFVTransform : public CaffeBaseTransform { Q_OBJECT - Q_PROPERTY(QString model READ get_model WRITE set_model RESET reset_model STORED false) - Q_PROPERTY(QString weights READ get_weights WRITE set_weights RESET reset_weights STORED false) - Q_PROPERTY(int gpuDevice READ get_gpuDevice WRITE set_gpuDevice RESET reset_gpuDevice STORED false) - BR_PROPERTY(QString, model, "") - BR_PROPERTY(QString, weights, "") - BR_PROPERTY(int, gpuDevice, -1) - - Resource caffeResource; - - void init() + void project(const Template &src, Template &dst) const { - caffeResource.setResourceMaker(new CaffeResourceMaker(model, weights, gpuDevice)); - } + Template caffeOutput; + CaffeBaseTransform::project(src, caffeOutput); - bool timeVarying() const - { - return gpuDevice < 0 ? false : true; + dst.file = src.file; + dst.append(caffeOutput); } +}; - void project(const Template &src, Template &dst) const - { - CaffeNet *net = caffeResource.acquire(); - - if (net->layers()[0]->layer_param().type() != "MemoryData") - qFatal("Integrating OpenBr with caffe requires the first layer in the network to be a MemoryDataLayer"); +BR_REGISTER(Transform, CaffeFVTransform) - MemoryDataLayer *dataLayer = static_cast *>(net->layers()[0].get()); +/*! + * \brief This transform treats the output of the network as a score distribution for an arbitrary number of classes. + * The maximum score and location for each input image is determined and stored in the template metadata. The template + * matrix is not changed. If the network batch size is > 1, the results are stored as lists in the dst template's metadata + * using the keys "Labels" and "Confidences" respectively. The length of these lists is equivalent to the provided batch size. + * If batch size == 1, the results are stored as a float and int using the keys "Label", and "Confidence" respectively. + * \author Jordan Cheney \cite jcheney + */ +class CaffeClassifierTransform : public CaffeBaseTransform +{ + Q_OBJECT - if (src.size() != dataLayer->batch_size()) - qFatal("src should have %d (batch size) mats. It has %d mats.", dataLayer->batch_size(), src.size()); + void project(const Template &src, Template &dst) const + { + Template caffeOutput; + CaffeBaseTransform::project(src, caffeOutput); dst = src; - dataLayer->AddMatVector(src.toVector().toStdVector(), std::vector(src.size(), 0)); - - Blob *output = net->ForwardPrefilled()[1]; // index 0 is the labels from the data layer (in this case the 0 array we passed in above). - // index 1 is the ouput of the final layer, which is what we want - QList labels; QList confidences; - int dimFeatures = output->count() / dataLayer->batch_size(); - for (int n = 0; n < dataLayer->batch_size(); n++) { - const float *data = output->cpu_data() + output->offset(n); - - int maxIdx = -1; float maxVal = -std::numeric_limits::max(); - for (int d = 0; d < dimFeatures; d++) { - if (data[d] > maxVal) { - maxVal = data[d]; - maxIdx = d; - } - } - - labels.append(maxIdx); + foreach (const Mat &m, caffeOutput) { + double maxVal; int maxLoc; + minMaxIdx(m, NULL, &maxVal, NULL, &maxLoc); + + labels.append(maxLoc); confidences.append(maxVal); } @@ -193,8 +168,6 @@ class CaffeClassifierTransform : public UntrainableMetaTransform dst.file.setList("Labels", labels); dst.file.setList("Confidences", confidences); } - - caffeResource.release(net); } };