Commit 66a1fdbf7af2b9867dbed9df2f67eac95eb3deaa

Authored by Jordan Cheney
1 parent 205b60f8

Refactor of caffe transforms

openbr/plugins/classification/caffe.cpp
@@ -52,21 +52,21 @@ private: @@ -52,21 +52,21 @@ private:
52 }; 52 };
53 53
54 /*! 54 /*!
55 - * \brief A transform that wraps the Caffe deep learning library. This transform expects the input to a given Caffe model to be a MemoryDataLayer.  
56 - * The output of the Caffe network is treated as a feature vector and is stored in dst. Batch processing is possible. For a given batch size set in  
57 - * the memory data layer, src is expected to have an equal number of mats. Dst will always have the same size (number of mats) as src and the ordering  
58 - * will be preserved, so dst[1] is the output of src[1] after it passes through the neural net.  
59 - * \author Jordan Cheney \cite jcheney 55 + * \brief The base transform for wrapping the Caffe deep learning library. This transform expects the input to a given Caffe model to be a MemoryDataLayer.
  56 + * The output of the forward pass of the Caffe network is stored in dst as a list of matrices, the size of which is equal to the batch_size of the network.
  57 + * Children of this transform should process dst to acheieve specifc use cases.
  58 + * \author Jordan Cheney \cite JordanCheney
60 * \br_property QString model path to prototxt model file 59 * \br_property QString model path to prototxt model file
61 * \br_property QString weights path to caffemodel file 60 * \br_property QString weights path to caffemodel file
62 * \br_property int gpuDevice ID of GPU to use. gpuDevice < 0 runs on the CPU only. 61 * \br_property int gpuDevice ID of GPU to use. gpuDevice < 0 runs on the CPU only.
63 * \br_link Caffe Integration Tutorial ../tutorials.md#caffe 62 * \br_link Caffe Integration Tutorial ../tutorials.md#caffe
64 * \br_link Caffe website http://caffe.berkeleyvision.org 63 * \br_link Caffe website http://caffe.berkeleyvision.org
65 */ 64 */
66 -class CaffeFVTransform : public UntrainableMetaTransform 65 +class CaffeBaseTransform : public UntrainableMetaTransform
67 { 66 {
68 Q_OBJECT 67 Q_OBJECT
69 68
  69 +public:
70 Q_PROPERTY(QString model READ get_model WRITE set_model RESET reset_model STORED false) 70 Q_PROPERTY(QString model READ get_model WRITE set_model RESET reset_model STORED false)
71 Q_PROPERTY(QString weights READ get_weights WRITE set_weights RESET reset_weights STORED false) 71 Q_PROPERTY(QString weights READ get_weights WRITE set_weights RESET reset_weights STORED false)
72 Q_PROPERTY(int gpuDevice READ get_gpuDevice WRITE set_gpuDevice RESET reset_gpuDevice STORED false) 72 Q_PROPERTY(int gpuDevice READ get_gpuDevice WRITE set_gpuDevice RESET reset_gpuDevice STORED false)
@@ -76,6 +76,7 @@ class CaffeFVTransform : public UntrainableMetaTransform @@ -76,6 +76,7 @@ class CaffeFVTransform : public UntrainableMetaTransform
76 76
77 Resource<CaffeNet> caffeResource; 77 Resource<CaffeNet> caffeResource;
78 78
  79 +protected:
79 void init() 80 void init()
80 { 81 {
81 caffeResource.setResourceMaker(new CaffeResourceMaker(model, weights, gpuDevice)); 82 caffeResource.setResourceMaker(new CaffeResourceMaker(model, weights, gpuDevice));
@@ -98,8 +99,6 @@ class CaffeFVTransform : public UntrainableMetaTransform @@ -98,8 +99,6 @@ class CaffeFVTransform : public UntrainableMetaTransform
98 if (src.size() != dataLayer->batch_size()) 99 if (src.size() != dataLayer->batch_size())
99 qFatal("src should have %d (batch size) mats. It has %d mats.", dataLayer->batch_size(), src.size()); 100 qFatal("src should have %d (batch size) mats. It has %d mats.", dataLayer->batch_size(), src.size());
100 101
101 - dst.file = src.file;  
102 -  
103 dataLayer->AddMatVector(src.toVector().toStdVector(), std::vector<int>(src.size(), 0)); 102 dataLayer->AddMatVector(src.toVector().toStdVector(), std::vector<int>(src.size(), 0));
104 103
105 Blob<float> *output = net->ForwardPrefilled()[1]; // index 0 is the labels from the data layer (in this case the 0 array we passed in above). 104 Blob<float> *output = net->ForwardPrefilled()[1]; // index 0 is the labels from the data layer (in this case the 0 array we passed in above).
@@ -112,77 +111,53 @@ class CaffeFVTransform : public UntrainableMetaTransform @@ -112,77 +111,53 @@ class CaffeFVTransform : public UntrainableMetaTransform
112 } 111 }
113 }; 112 };
114 113
115 -BR_REGISTER(Transform, CaffeFVTransform)  
116 -  
117 /*! 114 /*!
118 - * \brief A transform that wraps the Caffe deep learning library. This transform expects the input to a given Caffe model to be a MemoryDataLayer.  
119 - * The output of the Caffe network is treated as a classifier with one node per class. Dst is set equal to src and a list of labels and confidences  
120 - * is stored in the metadata using the tags "Labels" and "Confidences". The size of the lists is equal to the batch size of the network. If the batch  
121 - * size is 1 the lists are converted to an integer and floating point value respectively and are stored in metadata using "Label" and "Confidence" instead.  
122 - * \author Jordan Cheney \cite jcheney  
123 - * \br_property QString model path to prototxt model file  
124 - * \br_property QString weights path to caffemodel file  
125 - * \br_property int gpuDevice ID of GPU to use. gpuDevice < 0 runs on the CPU only.  
126 - * \br_link Caffe Integration Tutorial ../tutorials.md#caffe  
127 - * \br_link Caffe website http://caffe.berkeleyvision.org 115 + * \brief This transform treats the output of the network as a feature vector and appends it unchanged to dst. Dst will have
  116 + * length equal to the batch size of the network.
  117 + * \author Jordan Cheney \cite JordanCheney
128 */ 118 */
129 -class CaffeClassifierTransform : public UntrainableMetaTransform 119 +class CaffeFVTransform : public CaffeBaseTransform
130 { 120 {
131 Q_OBJECT 121 Q_OBJECT
132 122
133 - Q_PROPERTY(QString model READ get_model WRITE set_model RESET reset_model STORED false)  
134 - Q_PROPERTY(QString weights READ get_weights WRITE set_weights RESET reset_weights STORED false)  
135 - Q_PROPERTY(int gpuDevice READ get_gpuDevice WRITE set_gpuDevice RESET reset_gpuDevice STORED false)  
136 - BR_PROPERTY(QString, model, "")  
137 - BR_PROPERTY(QString, weights, "")  
138 - BR_PROPERTY(int, gpuDevice, -1)  
139 -  
140 - Resource<CaffeNet> caffeResource;  
141 -  
142 - void init() 123 + void project(const Template &src, Template &dst) const
143 { 124 {
144 - caffeResource.setResourceMaker(new CaffeResourceMaker(model, weights, gpuDevice));  
145 - } 125 + Template caffeOutput;
  126 + CaffeBaseTransform::project(src, caffeOutput);
146 127
147 - bool timeVarying() const  
148 - {  
149 - return gpuDevice < 0 ? false : true; 128 + dst.file = src.file;
  129 + dst.append(caffeOutput);
150 } 130 }
  131 +};
151 132
152 - void project(const Template &src, Template &dst) const  
153 - {  
154 - CaffeNet *net = caffeResource.acquire();  
155 -  
156 - if (net->layers()[0]->layer_param().type() != "MemoryData")  
157 - qFatal("Integrating OpenBr with caffe requires the first layer in the network to be a MemoryDataLayer"); 133 +BR_REGISTER(Transform, CaffeFVTransform)
158 134
159 - MemoryDataLayer<float> *dataLayer = static_cast<MemoryDataLayer<float> *>(net->layers()[0].get()); 135 +/*!
  136 + * \brief This transform treats the output of the network as a score distribution for an arbitrary number of classes.
  137 + * The maximum score and location for each input image is determined and stored in the template metadata. The template
  138 + * matrix is not changed. If the network batch size is > 1, the results are stored as lists in the dst template's metadata
  139 + * using the keys "Labels" and "Confidences" respectively. The length of these lists is equivalent to the provided batch size.
  140 + * If batch size == 1, the results are stored as a float and int using the keys "Label", and "Confidence" respectively.
  141 + * \author Jordan Cheney \cite jcheney
  142 + */
  143 +class CaffeClassifierTransform : public CaffeBaseTransform
  144 +{
  145 + Q_OBJECT
160 146
161 - if (src.size() != dataLayer->batch_size())  
162 - qFatal("src should have %d (batch size) mats. It has %d mats.", dataLayer->batch_size(), src.size()); 147 + void project(const Template &src, Template &dst) const
  148 + {
  149 + Template caffeOutput;
  150 + CaffeBaseTransform::project(src, caffeOutput);
163 151
164 dst = src; 152 dst = src;
165 153
166 - dataLayer->AddMatVector(src.toVector().toStdVector(), std::vector<int>(src.size(), 0));  
167 -  
168 - Blob<float> *output = net->ForwardPrefilled()[1]; // index 0 is the labels from the data layer (in this case the 0 array we passed in above).  
169 - // index 1 is the ouput of the final layer, which is what we want  
170 -  
171 QList<int> labels; QList<float> confidences; 154 QList<int> labels; QList<float> confidences;
172 155
173 - int dimFeatures = output->count() / dataLayer->batch_size();  
174 - for (int n = 0; n < dataLayer->batch_size(); n++) {  
175 - const float *data = output->cpu_data() + output->offset(n);  
176 -  
177 - int maxIdx = -1; float maxVal = -std::numeric_limits<float>::max();  
178 - for (int d = 0; d < dimFeatures; d++) {  
179 - if (data[d] > maxVal) {  
180 - maxVal = data[d];  
181 - maxIdx = d;  
182 - }  
183 - }  
184 -  
185 - labels.append(maxIdx); 156 + foreach (const Mat &m, caffeOutput) {
  157 + double maxVal; int maxLoc;
  158 + minMaxIdx(m, NULL, &maxVal, NULL, &maxLoc);
  159 +
  160 + labels.append(maxLoc);
186 confidences.append(maxVal); 161 confidences.append(maxVal);
187 } 162 }
188 163
@@ -193,8 +168,6 @@ class CaffeClassifierTransform : public UntrainableMetaTransform @@ -193,8 +168,6 @@ class CaffeClassifierTransform : public UntrainableMetaTransform
193 dst.file.setList<int>("Labels", labels); 168 dst.file.setList<int>("Labels", labels);
194 dst.file.setList<float>("Confidences", confidences); 169 dst.file.setList<float>("Confidences", confidences);
195 } 170 }
196 -  
197 - caffeResource.release(net);  
198 } 171 }
199 }; 172 };
200 173