diff --git a/openbr/plugins/classification/caffe.cpp b/openbr/plugins/classification/caffe.cpp index e35577d..7445391 100644 --- a/openbr/plugins/classification/caffe.cpp +++ b/openbr/plugins/classification/caffe.cpp @@ -1,8 +1,6 @@ #include #include -#include -#include #include using caffe::Caffe; @@ -52,21 +50,21 @@ private: }; /*! - * \brief A transform that wraps the Caffe deep learning library. This transform expects the input to a given Caffe model to be a MemoryDataLayer. - * The output of the Caffe network is treated as a feature vector and is stored in dst. Batch processing is possible. For a given batch size set in - * the memory data layer, src is expected to have an equal number of mats. Dst will always have the same size (number of mats) as src and the ordering - * will be preserved, so dst[1] is the output of src[1] after it passes through the neural net. - * \author Jordan Cheney \cite jcheney + * \brief The base transform for wrapping the Caffe deep learning library. This transform expects the input to a given Caffe model to be a MemoryDataLayer. + * The output of the forward pass of the Caffe network is stored in dst as a list of matrices, the size of which is equal to the batch_size of the network. + * Children of this transform should process dst to acheieve specifc use cases. + * \author Jordan Cheney \cite JordanCheney * \br_property QString model path to prototxt model file * \br_property QString weights path to caffemodel file * \br_property int gpuDevice ID of GPU to use. gpuDevice < 0 runs on the CPU only. * \br_link Caffe Integration Tutorial ../tutorials.md#caffe * \br_link Caffe website http://caffe.berkeleyvision.org */ -class CaffeFVTransform : public UntrainableTransform +class CaffeBaseTransform : public UntrainableMetaTransform { Q_OBJECT +public: Q_PROPERTY(QString model READ get_model WRITE set_model RESET reset_model STORED false) Q_PROPERTY(QString weights READ get_weights WRITE set_weights RESET reset_weights STORED false) Q_PROPERTY(int gpuDevice READ get_gpuDevice WRITE set_gpuDevice RESET reset_gpuDevice STORED false) @@ -76,6 +74,7 @@ class CaffeFVTransform : public UntrainableTransform Resource caffeResource; +protected: void init() { caffeResource.setResourceMaker(new CaffeResourceMaker(model, weights, gpuDevice)); @@ -90,27 +89,95 @@ class CaffeFVTransform : public UntrainableTransform { CaffeNet *net = caffeResource.acquire(); - MemoryDataLayer *data_layer = static_cast *>(net->layers()[0].get()); + if (net->layers()[0]->layer_param().type() != "MemoryData") + qFatal("OpenBR requires the first layer in the network to be a MemoryDataLayer"); - if (src.size() != data_layer->batch_size()) - qFatal("src should have %d (batch size) mats. It has %d mats.", data_layer->batch_size(), src.size()); + MemoryDataLayer *dataLayer = static_cast *>(net->layers()[0].get()); - dst.file = src.file; + if (src.size() != dataLayer->batch_size()) + qFatal("src should have %d (batch size) mats. It has %d mats.", dataLayer->batch_size(), src.size()); - data_layer->AddMatVector(src.toVector().toStdVector(), std::vector(src.size(), 0)); + dataLayer->AddMatVector(src.toVector().toStdVector(), std::vector(src.size(), 0)); - Blob *output = net->ForwardPrefilled()[1]; // index 0 is the labels from the data layer (in this case the 0 array we passed in above). - // index 1 is the ouput of the final layer, which is what we want - int dim_features = output->count() / data_layer->batch_size(); - for (int n = 0; n < data_layer->batch_size(); n++) - dst += Mat(1, dim_features, CV_32FC1, output->mutable_cpu_data() + output->offset(n)); + net->ForwardPrefilled(); + Blob *output = net->blobs().back().get(); + + int dimFeatures = output->count() / dataLayer->batch_size(); + for (int n = 0; n < dataLayer->batch_size(); n++) + dst += Mat(1, dimFeatures, CV_32FC1, output->mutable_cpu_data() + output->offset(n)); caffeResource.release(net); } }; +/*! + * \brief This transform treats the output of the network as a feature vector and appends it unchanged to dst. Dst will have + * length equal to the batch size of the network. + * \author Jordan Cheney \cite JordanCheney + * \br_property QString model path to prototxt model file + * \br_property QString weights path to caffemodel file + * \br_property int gpuDevice ID of GPU to use. gpuDevice < 0 runs on the CPU only. + */ +class CaffeFVTransform : public CaffeBaseTransform +{ + Q_OBJECT + + void project(const Template &src, Template &dst) const + { + Template caffeOutput; + CaffeBaseTransform::project(src, caffeOutput); + + dst.file = src.file; + dst.append(caffeOutput); + } +}; + BR_REGISTER(Transform, CaffeFVTransform) +/*! + * \brief This transform treats the output of the network as a score distribution for an arbitrary number of classes. + * The maximum score and location for each input image is determined and stored in the template metadata. The template + * matrix is not changed. If the network batch size is > 1, the results are stored as lists in the dst template's metadata + * using the keys "Labels" and "Confidences" respectively. The length of these lists is equivalent to the provided batch size. + * If batch size == 1, the results are stored as a float and int using the keys "Label", and "Confidence" respectively. + * \author Jordan Cheney \cite jcheney + * \br_property QString model path to prototxt model file + * \br_property QString weights path to caffemodel file + * \br_property int gpuDevice ID of GPU to use. gpuDevice < 0 runs on the CPU only. + */ +class CaffeClassifierTransform : public CaffeBaseTransform +{ + Q_OBJECT + + void project(const Template &src, Template &dst) const + { + Template caffeOutput; + CaffeBaseTransform::project(src, caffeOutput); + + dst = src; + + QList labels; QList confidences; + + foreach (const Mat &m, caffeOutput) { + double maxVal; int maxLoc; + minMaxIdx(m, NULL, &maxVal, NULL, &maxLoc); + + labels.append(maxLoc); + confidences.append(maxVal); + } + + if (labels.size() == 1) { + dst.file.set("Label", labels[0]); + dst.file.set("Confidence", confidences[0]); + } else { + dst.file.setList("Labels", labels); + dst.file.setList("Confidences", confidences); + } + } +}; + +BR_REGISTER(Transform, CaffeClassifierTransform) + } // namespace br #include "classification/caffe.moc"