Commit 66a1fdbf7af2b9867dbed9df2f67eac95eb3deaa
1 parent
205b60f8
Refactor of caffe transforms
Showing
1 changed file
with
38 additions
and
65 deletions
openbr/plugins/classification/caffe.cpp
| ... | ... | @@ -52,21 +52,21 @@ private: |
| 52 | 52 | }; |
| 53 | 53 | |
| 54 | 54 | /*! |
| 55 | - * \brief A transform that wraps the Caffe deep learning library. This transform expects the input to a given Caffe model to be a MemoryDataLayer. | |
| 56 | - * The output of the Caffe network is treated as a feature vector and is stored in dst. Batch processing is possible. For a given batch size set in | |
| 57 | - * the memory data layer, src is expected to have an equal number of mats. Dst will always have the same size (number of mats) as src and the ordering | |
| 58 | - * will be preserved, so dst[1] is the output of src[1] after it passes through the neural net. | |
| 59 | - * \author Jordan Cheney \cite jcheney | |
| 55 | + * \brief The base transform for wrapping the Caffe deep learning library. This transform expects the input to a given Caffe model to be a MemoryDataLayer. | |
| 56 | + * The output of the forward pass of the Caffe network is stored in dst as a list of matrices, the size of which is equal to the batch_size of the network. | |
| 57 | + * Children of this transform should process dst to acheieve specifc use cases. | |
| 58 | + * \author Jordan Cheney \cite JordanCheney | |
| 60 | 59 | * \br_property QString model path to prototxt model file |
| 61 | 60 | * \br_property QString weights path to caffemodel file |
| 62 | 61 | * \br_property int gpuDevice ID of GPU to use. gpuDevice < 0 runs on the CPU only. |
| 63 | 62 | * \br_link Caffe Integration Tutorial ../tutorials.md#caffe |
| 64 | 63 | * \br_link Caffe website http://caffe.berkeleyvision.org |
| 65 | 64 | */ |
| 66 | -class CaffeFVTransform : public UntrainableMetaTransform | |
| 65 | +class CaffeBaseTransform : public UntrainableMetaTransform | |
| 67 | 66 | { |
| 68 | 67 | Q_OBJECT |
| 69 | 68 | |
| 69 | +public: | |
| 70 | 70 | Q_PROPERTY(QString model READ get_model WRITE set_model RESET reset_model STORED false) |
| 71 | 71 | Q_PROPERTY(QString weights READ get_weights WRITE set_weights RESET reset_weights STORED false) |
| 72 | 72 | Q_PROPERTY(int gpuDevice READ get_gpuDevice WRITE set_gpuDevice RESET reset_gpuDevice STORED false) |
| ... | ... | @@ -76,6 +76,7 @@ class CaffeFVTransform : public UntrainableMetaTransform |
| 76 | 76 | |
| 77 | 77 | Resource<CaffeNet> caffeResource; |
| 78 | 78 | |
| 79 | +protected: | |
| 79 | 80 | void init() |
| 80 | 81 | { |
| 81 | 82 | caffeResource.setResourceMaker(new CaffeResourceMaker(model, weights, gpuDevice)); |
| ... | ... | @@ -98,8 +99,6 @@ class CaffeFVTransform : public UntrainableMetaTransform |
| 98 | 99 | if (src.size() != dataLayer->batch_size()) |
| 99 | 100 | qFatal("src should have %d (batch size) mats. It has %d mats.", dataLayer->batch_size(), src.size()); |
| 100 | 101 | |
| 101 | - dst.file = src.file; | |
| 102 | - | |
| 103 | 102 | dataLayer->AddMatVector(src.toVector().toStdVector(), std::vector<int>(src.size(), 0)); |
| 104 | 103 | |
| 105 | 104 | Blob<float> *output = net->ForwardPrefilled()[1]; // index 0 is the labels from the data layer (in this case the 0 array we passed in above). |
| ... | ... | @@ -112,77 +111,53 @@ class CaffeFVTransform : public UntrainableMetaTransform |
| 112 | 111 | } |
| 113 | 112 | }; |
| 114 | 113 | |
| 115 | -BR_REGISTER(Transform, CaffeFVTransform) | |
| 116 | - | |
| 117 | 114 | /*! |
| 118 | - * \brief A transform that wraps the Caffe deep learning library. This transform expects the input to a given Caffe model to be a MemoryDataLayer. | |
| 119 | - * The output of the Caffe network is treated as a classifier with one node per class. Dst is set equal to src and a list of labels and confidences | |
| 120 | - * is stored in the metadata using the tags "Labels" and "Confidences". The size of the lists is equal to the batch size of the network. If the batch | |
| 121 | - * size is 1 the lists are converted to an integer and floating point value respectively and are stored in metadata using "Label" and "Confidence" instead. | |
| 122 | - * \author Jordan Cheney \cite jcheney | |
| 123 | - * \br_property QString model path to prototxt model file | |
| 124 | - * \br_property QString weights path to caffemodel file | |
| 125 | - * \br_property int gpuDevice ID of GPU to use. gpuDevice < 0 runs on the CPU only. | |
| 126 | - * \br_link Caffe Integration Tutorial ../tutorials.md#caffe | |
| 127 | - * \br_link Caffe website http://caffe.berkeleyvision.org | |
| 115 | + * \brief This transform treats the output of the network as a feature vector and appends it unchanged to dst. Dst will have | |
| 116 | + * length equal to the batch size of the network. | |
| 117 | + * \author Jordan Cheney \cite JordanCheney | |
| 128 | 118 | */ |
| 129 | -class CaffeClassifierTransform : public UntrainableMetaTransform | |
| 119 | +class CaffeFVTransform : public CaffeBaseTransform | |
| 130 | 120 | { |
| 131 | 121 | Q_OBJECT |
| 132 | 122 | |
| 133 | - Q_PROPERTY(QString model READ get_model WRITE set_model RESET reset_model STORED false) | |
| 134 | - Q_PROPERTY(QString weights READ get_weights WRITE set_weights RESET reset_weights STORED false) | |
| 135 | - Q_PROPERTY(int gpuDevice READ get_gpuDevice WRITE set_gpuDevice RESET reset_gpuDevice STORED false) | |
| 136 | - BR_PROPERTY(QString, model, "") | |
| 137 | - BR_PROPERTY(QString, weights, "") | |
| 138 | - BR_PROPERTY(int, gpuDevice, -1) | |
| 139 | - | |
| 140 | - Resource<CaffeNet> caffeResource; | |
| 141 | - | |
| 142 | - void init() | |
| 123 | + void project(const Template &src, Template &dst) const | |
| 143 | 124 | { |
| 144 | - caffeResource.setResourceMaker(new CaffeResourceMaker(model, weights, gpuDevice)); | |
| 145 | - } | |
| 125 | + Template caffeOutput; | |
| 126 | + CaffeBaseTransform::project(src, caffeOutput); | |
| 146 | 127 | |
| 147 | - bool timeVarying() const | |
| 148 | - { | |
| 149 | - return gpuDevice < 0 ? false : true; | |
| 128 | + dst.file = src.file; | |
| 129 | + dst.append(caffeOutput); | |
| 150 | 130 | } |
| 131 | +}; | |
| 151 | 132 | |
| 152 | - void project(const Template &src, Template &dst) const | |
| 153 | - { | |
| 154 | - CaffeNet *net = caffeResource.acquire(); | |
| 155 | - | |
| 156 | - if (net->layers()[0]->layer_param().type() != "MemoryData") | |
| 157 | - qFatal("Integrating OpenBr with caffe requires the first layer in the network to be a MemoryDataLayer"); | |
| 133 | +BR_REGISTER(Transform, CaffeFVTransform) | |
| 158 | 134 | |
| 159 | - MemoryDataLayer<float> *dataLayer = static_cast<MemoryDataLayer<float> *>(net->layers()[0].get()); | |
| 135 | +/*! | |
| 136 | + * \brief This transform treats the output of the network as a score distribution for an arbitrary number of classes. | |
| 137 | + * The maximum score and location for each input image is determined and stored in the template metadata. The template | |
| 138 | + * matrix is not changed. If the network batch size is > 1, the results are stored as lists in the dst template's metadata | |
| 139 | + * using the keys "Labels" and "Confidences" respectively. The length of these lists is equivalent to the provided batch size. | |
| 140 | + * If batch size == 1, the results are stored as a float and int using the keys "Label", and "Confidence" respectively. | |
| 141 | + * \author Jordan Cheney \cite jcheney | |
| 142 | + */ | |
| 143 | +class CaffeClassifierTransform : public CaffeBaseTransform | |
| 144 | +{ | |
| 145 | + Q_OBJECT | |
| 160 | 146 | |
| 161 | - if (src.size() != dataLayer->batch_size()) | |
| 162 | - qFatal("src should have %d (batch size) mats. It has %d mats.", dataLayer->batch_size(), src.size()); | |
| 147 | + void project(const Template &src, Template &dst) const | |
| 148 | + { | |
| 149 | + Template caffeOutput; | |
| 150 | + CaffeBaseTransform::project(src, caffeOutput); | |
| 163 | 151 | |
| 164 | 152 | dst = src; |
| 165 | 153 | |
| 166 | - dataLayer->AddMatVector(src.toVector().toStdVector(), std::vector<int>(src.size(), 0)); | |
| 167 | - | |
| 168 | - Blob<float> *output = net->ForwardPrefilled()[1]; // index 0 is the labels from the data layer (in this case the 0 array we passed in above). | |
| 169 | - // index 1 is the ouput of the final layer, which is what we want | |
| 170 | - | |
| 171 | 154 | QList<int> labels; QList<float> confidences; |
| 172 | 155 | |
| 173 | - int dimFeatures = output->count() / dataLayer->batch_size(); | |
| 174 | - for (int n = 0; n < dataLayer->batch_size(); n++) { | |
| 175 | - const float *data = output->cpu_data() + output->offset(n); | |
| 176 | - | |
| 177 | - int maxIdx = -1; float maxVal = -std::numeric_limits<float>::max(); | |
| 178 | - for (int d = 0; d < dimFeatures; d++) { | |
| 179 | - if (data[d] > maxVal) { | |
| 180 | - maxVal = data[d]; | |
| 181 | - maxIdx = d; | |
| 182 | - } | |
| 183 | - } | |
| 184 | - | |
| 185 | - labels.append(maxIdx); | |
| 156 | + foreach (const Mat &m, caffeOutput) { | |
| 157 | + double maxVal; int maxLoc; | |
| 158 | + minMaxIdx(m, NULL, &maxVal, NULL, &maxLoc); | |
| 159 | + | |
| 160 | + labels.append(maxLoc); | |
| 186 | 161 | confidences.append(maxVal); |
| 187 | 162 | } |
| 188 | 163 | |
| ... | ... | @@ -193,8 +168,6 @@ class CaffeClassifierTransform : public UntrainableMetaTransform |
| 193 | 168 | dst.file.setList<int>("Labels", labels); |
| 194 | 169 | dst.file.setList<float>("Confidences", confidences); |
| 195 | 170 | } |
| 196 | - | |
| 197 | - caffeResource.release(net); | |
| 198 | 171 | } |
| 199 | 172 | }; |
| 200 | 173 | ... | ... |