Refactor of caffe transforms

Jordan Cheney
1 parent 205b60f8
Showing 1 changed file with 38 additions and 65 deletions
openbr/plugins/classification/caffe.cpp
@@ -52,21 +52,21 @@ private:
 };
  
 /*!
- * \brief A transform that wraps the Caffe deep learning library. This transform expects the input to a given Caffe model to be a MemoryDataLayer.
- * The output of the Caffe network is treated as a feature vector and is stored in dst. Batch processing is possible. For a given batch size set in
- * the memory data layer, src is expected to have an equal number of mats. Dst will always have the same size (number of mats) as src and the ordering
- * will be preserved, so dst[1] is the output of src[1] after it passes through the neural net.
- * \author Jordan Cheney \cite jcheney
+ * \brief The base transform for wrapping the Caffe deep learning library. This transform expects the input to a given Caffe model to be a MemoryDataLayer.
+ * The output of the forward pass of the Caffe network is stored in dst as a list of matrices, the size of which is equal to the batch_size of the network.
+ * Children of this transform should process dst to acheieve specifc use cases.
+ * \author Jordan Cheney \cite JordanCheney
  * \br_property QString model path to prototxt model file
  * \br_property QString weights path to caffemodel file
  * \br_property int gpuDevice ID of GPU to use. gpuDevice < 0 runs on the CPU only.
  * \br_link Caffe Integration Tutorial ../tutorials.md#caffe
  * \br_link Caffe website http://caffe.berkeleyvision.org
  */
-class CaffeFVTransform : public UntrainableMetaTransform
+class CaffeBaseTransform : public UntrainableMetaTransform
 {
     Q_OBJECT
  
+public:
     Q_PROPERTY(QString model READ get_model WRITE set_model RESET reset_model STORED false)
     Q_PROPERTY(QString weights READ get_weights WRITE set_weights RESET reset_weights STORED false)
     Q_PROPERTY(int gpuDevice READ get_gpuDevice WRITE set_gpuDevice RESET reset_gpuDevice STORED false)
@@ -76,6 +76,7 @@ class CaffeFVTransform : public UntrainableMetaTransform
  
     Resource<CaffeNet> caffeResource;
  
+protected:
     void init()
     {
         caffeResource.setResourceMaker(new CaffeResourceMaker(model, weights, gpuDevice));
@@ -98,8 +99,6 @@ class CaffeFVTransform : public UntrainableMetaTransform
         if (src.size() != dataLayer->batch_size())
             qFatal("src should have %d (batch size) mats. It has %d mats.", dataLayer->batch_size(), src.size());
  
-        dst.file = src.file;
-
         dataLayer->AddMatVector(src.toVector().toStdVector(), std::vector<int>(src.size(), 0));
  
         Blob<float> *output = net->ForwardPrefilled()[1]; // index 0 is the labels from the data layer (in this case the 0 array we passed in above).
@@ -112,77 +111,53 @@ class CaffeFVTransform : public UntrainableMetaTransform
     }
 };
  
-BR_REGISTER(Transform, CaffeFVTransform)
-
 /*!
- * \brief A transform that wraps the Caffe deep learning library. This transform expects the input to a given Caffe model to be a MemoryDataLayer.
- * The output of the Caffe network is treated as a classifier with one node per class. Dst is set equal to src and a list of labels and confidences
- * is stored in the metadata using the tags "Labels" and "Confidences". The size of the lists is equal to the batch size of the network. If the batch
- * size is 1 the lists are converted to an integer and floating point value respectively and are stored in metadata using "Label" and "Confidence" instead.
- * \author Jordan Cheney \cite jcheney
- * \br_property QString model path to prototxt model file
- * \br_property QString weights path to caffemodel file
- * \br_property int gpuDevice ID of GPU to use. gpuDevice < 0 runs on the CPU only.
- * \br_link Caffe Integration Tutorial ../tutorials.md#caffe
- * \br_link Caffe website http://caffe.berkeleyvision.org
+ * \brief This transform treats the output of the network as a feature vector and appends it unchanged to dst. Dst will have
+ * length equal to the batch size of the network.
+ * \author Jordan Cheney \cite JordanCheney
  */
-class CaffeClassifierTransform : public UntrainableMetaTransform
+class CaffeFVTransform : public CaffeBaseTransform
 {
     Q_OBJECT
  
-    Q_PROPERTY(QString model READ get_model WRITE set_model RESET reset_model STORED false)
-    Q_PROPERTY(QString weights READ get_weights WRITE set_weights RESET reset_weights STORED false)
-    Q_PROPERTY(int gpuDevice READ get_gpuDevice WRITE set_gpuDevice RESET reset_gpuDevice STORED false)
-    BR_PROPERTY(QString, model, "")
-    BR_PROPERTY(QString, weights, "")
-    BR_PROPERTY(int, gpuDevice, -1)
-
-    Resource<CaffeNet> caffeResource;
-
-    void init()
+    void project(const Template &src, Template &dst) const
     {
-        caffeResource.setResourceMaker(new CaffeResourceMaker(model, weights, gpuDevice));
-    }
+        Template caffeOutput;
+        CaffeBaseTransform::project(src, caffeOutput);
  
-    bool timeVarying() const
-    {
-        return gpuDevice < 0 ? false : true;
+        dst.file = src.file;
+        dst.append(caffeOutput);
     }
+};
  
-    void project(const Template &src, Template &dst) const
-    {
-        CaffeNet *net = caffeResource.acquire();
-
-        if (net->layers()[0]->layer_param().type() != "MemoryData")
-            qFatal("Integrating OpenBr with caffe requires the first layer in the network to be a MemoryDataLayer");
+BR_REGISTER(Transform, CaffeFVTransform)
  
-        MemoryDataLayer<float> *dataLayer = static_cast<MemoryDataLayer<float> *>(net->layers()[0].get());
+/*!
+ * \brief This transform treats the output of the network as a score distribution for an arbitrary number of classes.
+ * The maximum score and location for each input image is determined and stored in the template metadata. The template
+ * matrix is not changed. If the network batch size is > 1, the results are stored as lists in the dst template's metadata
+ * using the keys "Labels" and "Confidences" respectively. The length of these lists is equivalent to the provided batch size.
+ * If batch size == 1, the results are stored as a float and int using the keys "Label", and "Confidence" respectively.
+ * \author Jordan Cheney \cite jcheney
+ */
+class CaffeClassifierTransform : public CaffeBaseTransform
+{
+    Q_OBJECT
  
-        if (src.size() != dataLayer->batch_size())
-            qFatal("src should have %d (batch size) mats. It has %d mats.", dataLayer->batch_size(), src.size());
+    void project(const Template &src, Template &dst) const
+    {
+        Template caffeOutput;
+        CaffeBaseTransform::project(src, caffeOutput);
  
         dst = src;
  
-        dataLayer->AddMatVector(src.toVector().toStdVector(), std::vector<int>(src.size(), 0));
-
-        Blob<float> *output = net->ForwardPrefilled()[1]; // index 0 is the labels from the data layer (in this case the 0 array we passed in above).
-                                                          // index 1 is the ouput of the final layer, which is what we want
-
         QList<int> labels; QList<float> confidences;
  
-        int dimFeatures = output->count() / dataLayer->batch_size();
-        for (int n = 0; n < dataLayer->batch_size(); n++) {
-            const float *data = output->cpu_data() + output->offset(n);
-
-            int maxIdx = -1; float maxVal = -std::numeric_limits<float>::max();
-            for (int d = 0; d < dimFeatures; d++) {
-                if (data[d] > maxVal) {
-                    maxVal = data[d];
-                    maxIdx = d;
-                }
-            }
-
-            labels.append(maxIdx);
+        foreach (const Mat &m, caffeOutput) {
+            double maxVal; int maxLoc;
+            minMaxIdx(m, NULL, &maxVal, NULL, &maxLoc);
+
+            labels.append(maxLoc);
             confidences.append(maxVal);
         }
  
@@ -193,8 +168,6 @@ class CaffeClassifierTransform : public UntrainableMetaTransform
             dst.file.setList<int>("Labels", labels);
             dst.file.setList<float>("Confidences", confidences);
         }
-
-        caffeResource.release(net);
     }
 };