Commit 66a1fdbf7af2b9867dbed9df2f67eac95eb3deaa
1 parent
205b60f8
Refactor of caffe transforms
Showing
1 changed file
with
38 additions
and
65 deletions
openbr/plugins/classification/caffe.cpp
| @@ -52,21 +52,21 @@ private: | @@ -52,21 +52,21 @@ private: | ||
| 52 | }; | 52 | }; |
| 53 | 53 | ||
| 54 | /*! | 54 | /*! |
| 55 | - * \brief A transform that wraps the Caffe deep learning library. This transform expects the input to a given Caffe model to be a MemoryDataLayer. | ||
| 56 | - * The output of the Caffe network is treated as a feature vector and is stored in dst. Batch processing is possible. For a given batch size set in | ||
| 57 | - * the memory data layer, src is expected to have an equal number of mats. Dst will always have the same size (number of mats) as src and the ordering | ||
| 58 | - * will be preserved, so dst[1] is the output of src[1] after it passes through the neural net. | ||
| 59 | - * \author Jordan Cheney \cite jcheney | 55 | + * \brief The base transform for wrapping the Caffe deep learning library. This transform expects the input to a given Caffe model to be a MemoryDataLayer. |
| 56 | + * The output of the forward pass of the Caffe network is stored in dst as a list of matrices, the size of which is equal to the batch_size of the network. | ||
| 57 | + * Children of this transform should process dst to acheieve specifc use cases. | ||
| 58 | + * \author Jordan Cheney \cite JordanCheney | ||
| 60 | * \br_property QString model path to prototxt model file | 59 | * \br_property QString model path to prototxt model file |
| 61 | * \br_property QString weights path to caffemodel file | 60 | * \br_property QString weights path to caffemodel file |
| 62 | * \br_property int gpuDevice ID of GPU to use. gpuDevice < 0 runs on the CPU only. | 61 | * \br_property int gpuDevice ID of GPU to use. gpuDevice < 0 runs on the CPU only. |
| 63 | * \br_link Caffe Integration Tutorial ../tutorials.md#caffe | 62 | * \br_link Caffe Integration Tutorial ../tutorials.md#caffe |
| 64 | * \br_link Caffe website http://caffe.berkeleyvision.org | 63 | * \br_link Caffe website http://caffe.berkeleyvision.org |
| 65 | */ | 64 | */ |
| 66 | -class CaffeFVTransform : public UntrainableMetaTransform | 65 | +class CaffeBaseTransform : public UntrainableMetaTransform |
| 67 | { | 66 | { |
| 68 | Q_OBJECT | 67 | Q_OBJECT |
| 69 | 68 | ||
| 69 | +public: | ||
| 70 | Q_PROPERTY(QString model READ get_model WRITE set_model RESET reset_model STORED false) | 70 | Q_PROPERTY(QString model READ get_model WRITE set_model RESET reset_model STORED false) |
| 71 | Q_PROPERTY(QString weights READ get_weights WRITE set_weights RESET reset_weights STORED false) | 71 | Q_PROPERTY(QString weights READ get_weights WRITE set_weights RESET reset_weights STORED false) |
| 72 | Q_PROPERTY(int gpuDevice READ get_gpuDevice WRITE set_gpuDevice RESET reset_gpuDevice STORED false) | 72 | Q_PROPERTY(int gpuDevice READ get_gpuDevice WRITE set_gpuDevice RESET reset_gpuDevice STORED false) |
| @@ -76,6 +76,7 @@ class CaffeFVTransform : public UntrainableMetaTransform | @@ -76,6 +76,7 @@ class CaffeFVTransform : public UntrainableMetaTransform | ||
| 76 | 76 | ||
| 77 | Resource<CaffeNet> caffeResource; | 77 | Resource<CaffeNet> caffeResource; |
| 78 | 78 | ||
| 79 | +protected: | ||
| 79 | void init() | 80 | void init() |
| 80 | { | 81 | { |
| 81 | caffeResource.setResourceMaker(new CaffeResourceMaker(model, weights, gpuDevice)); | 82 | caffeResource.setResourceMaker(new CaffeResourceMaker(model, weights, gpuDevice)); |
| @@ -98,8 +99,6 @@ class CaffeFVTransform : public UntrainableMetaTransform | @@ -98,8 +99,6 @@ class CaffeFVTransform : public UntrainableMetaTransform | ||
| 98 | if (src.size() != dataLayer->batch_size()) | 99 | if (src.size() != dataLayer->batch_size()) |
| 99 | qFatal("src should have %d (batch size) mats. It has %d mats.", dataLayer->batch_size(), src.size()); | 100 | qFatal("src should have %d (batch size) mats. It has %d mats.", dataLayer->batch_size(), src.size()); |
| 100 | 101 | ||
| 101 | - dst.file = src.file; | ||
| 102 | - | ||
| 103 | dataLayer->AddMatVector(src.toVector().toStdVector(), std::vector<int>(src.size(), 0)); | 102 | dataLayer->AddMatVector(src.toVector().toStdVector(), std::vector<int>(src.size(), 0)); |
| 104 | 103 | ||
| 105 | Blob<float> *output = net->ForwardPrefilled()[1]; // index 0 is the labels from the data layer (in this case the 0 array we passed in above). | 104 | Blob<float> *output = net->ForwardPrefilled()[1]; // index 0 is the labels from the data layer (in this case the 0 array we passed in above). |
| @@ -112,77 +111,53 @@ class CaffeFVTransform : public UntrainableMetaTransform | @@ -112,77 +111,53 @@ class CaffeFVTransform : public UntrainableMetaTransform | ||
| 112 | } | 111 | } |
| 113 | }; | 112 | }; |
| 114 | 113 | ||
| 115 | -BR_REGISTER(Transform, CaffeFVTransform) | ||
| 116 | - | ||
| 117 | /*! | 114 | /*! |
| 118 | - * \brief A transform that wraps the Caffe deep learning library. This transform expects the input to a given Caffe model to be a MemoryDataLayer. | ||
| 119 | - * The output of the Caffe network is treated as a classifier with one node per class. Dst is set equal to src and a list of labels and confidences | ||
| 120 | - * is stored in the metadata using the tags "Labels" and "Confidences". The size of the lists is equal to the batch size of the network. If the batch | ||
| 121 | - * size is 1 the lists are converted to an integer and floating point value respectively and are stored in metadata using "Label" and "Confidence" instead. | ||
| 122 | - * \author Jordan Cheney \cite jcheney | ||
| 123 | - * \br_property QString model path to prototxt model file | ||
| 124 | - * \br_property QString weights path to caffemodel file | ||
| 125 | - * \br_property int gpuDevice ID of GPU to use. gpuDevice < 0 runs on the CPU only. | ||
| 126 | - * \br_link Caffe Integration Tutorial ../tutorials.md#caffe | ||
| 127 | - * \br_link Caffe website http://caffe.berkeleyvision.org | 115 | + * \brief This transform treats the output of the network as a feature vector and appends it unchanged to dst. Dst will have |
| 116 | + * length equal to the batch size of the network. | ||
| 117 | + * \author Jordan Cheney \cite JordanCheney | ||
| 128 | */ | 118 | */ |
| 129 | -class CaffeClassifierTransform : public UntrainableMetaTransform | 119 | +class CaffeFVTransform : public CaffeBaseTransform |
| 130 | { | 120 | { |
| 131 | Q_OBJECT | 121 | Q_OBJECT |
| 132 | 122 | ||
| 133 | - Q_PROPERTY(QString model READ get_model WRITE set_model RESET reset_model STORED false) | ||
| 134 | - Q_PROPERTY(QString weights READ get_weights WRITE set_weights RESET reset_weights STORED false) | ||
| 135 | - Q_PROPERTY(int gpuDevice READ get_gpuDevice WRITE set_gpuDevice RESET reset_gpuDevice STORED false) | ||
| 136 | - BR_PROPERTY(QString, model, "") | ||
| 137 | - BR_PROPERTY(QString, weights, "") | ||
| 138 | - BR_PROPERTY(int, gpuDevice, -1) | ||
| 139 | - | ||
| 140 | - Resource<CaffeNet> caffeResource; | ||
| 141 | - | ||
| 142 | - void init() | 123 | + void project(const Template &src, Template &dst) const |
| 143 | { | 124 | { |
| 144 | - caffeResource.setResourceMaker(new CaffeResourceMaker(model, weights, gpuDevice)); | ||
| 145 | - } | 125 | + Template caffeOutput; |
| 126 | + CaffeBaseTransform::project(src, caffeOutput); | ||
| 146 | 127 | ||
| 147 | - bool timeVarying() const | ||
| 148 | - { | ||
| 149 | - return gpuDevice < 0 ? false : true; | 128 | + dst.file = src.file; |
| 129 | + dst.append(caffeOutput); | ||
| 150 | } | 130 | } |
| 131 | +}; | ||
| 151 | 132 | ||
| 152 | - void project(const Template &src, Template &dst) const | ||
| 153 | - { | ||
| 154 | - CaffeNet *net = caffeResource.acquire(); | ||
| 155 | - | ||
| 156 | - if (net->layers()[0]->layer_param().type() != "MemoryData") | ||
| 157 | - qFatal("Integrating OpenBr with caffe requires the first layer in the network to be a MemoryDataLayer"); | 133 | +BR_REGISTER(Transform, CaffeFVTransform) |
| 158 | 134 | ||
| 159 | - MemoryDataLayer<float> *dataLayer = static_cast<MemoryDataLayer<float> *>(net->layers()[0].get()); | 135 | +/*! |
| 136 | + * \brief This transform treats the output of the network as a score distribution for an arbitrary number of classes. | ||
| 137 | + * The maximum score and location for each input image is determined and stored in the template metadata. The template | ||
| 138 | + * matrix is not changed. If the network batch size is > 1, the results are stored as lists in the dst template's metadata | ||
| 139 | + * using the keys "Labels" and "Confidences" respectively. The length of these lists is equivalent to the provided batch size. | ||
| 140 | + * If batch size == 1, the results are stored as a float and int using the keys "Label", and "Confidence" respectively. | ||
| 141 | + * \author Jordan Cheney \cite jcheney | ||
| 142 | + */ | ||
| 143 | +class CaffeClassifierTransform : public CaffeBaseTransform | ||
| 144 | +{ | ||
| 145 | + Q_OBJECT | ||
| 160 | 146 | ||
| 161 | - if (src.size() != dataLayer->batch_size()) | ||
| 162 | - qFatal("src should have %d (batch size) mats. It has %d mats.", dataLayer->batch_size(), src.size()); | 147 | + void project(const Template &src, Template &dst) const |
| 148 | + { | ||
| 149 | + Template caffeOutput; | ||
| 150 | + CaffeBaseTransform::project(src, caffeOutput); | ||
| 163 | 151 | ||
| 164 | dst = src; | 152 | dst = src; |
| 165 | 153 | ||
| 166 | - dataLayer->AddMatVector(src.toVector().toStdVector(), std::vector<int>(src.size(), 0)); | ||
| 167 | - | ||
| 168 | - Blob<float> *output = net->ForwardPrefilled()[1]; // index 0 is the labels from the data layer (in this case the 0 array we passed in above). | ||
| 169 | - // index 1 is the ouput of the final layer, which is what we want | ||
| 170 | - | ||
| 171 | QList<int> labels; QList<float> confidences; | 154 | QList<int> labels; QList<float> confidences; |
| 172 | 155 | ||
| 173 | - int dimFeatures = output->count() / dataLayer->batch_size(); | ||
| 174 | - for (int n = 0; n < dataLayer->batch_size(); n++) { | ||
| 175 | - const float *data = output->cpu_data() + output->offset(n); | ||
| 176 | - | ||
| 177 | - int maxIdx = -1; float maxVal = -std::numeric_limits<float>::max(); | ||
| 178 | - for (int d = 0; d < dimFeatures; d++) { | ||
| 179 | - if (data[d] > maxVal) { | ||
| 180 | - maxVal = data[d]; | ||
| 181 | - maxIdx = d; | ||
| 182 | - } | ||
| 183 | - } | ||
| 184 | - | ||
| 185 | - labels.append(maxIdx); | 156 | + foreach (const Mat &m, caffeOutput) { |
| 157 | + double maxVal; int maxLoc; | ||
| 158 | + minMaxIdx(m, NULL, &maxVal, NULL, &maxLoc); | ||
| 159 | + | ||
| 160 | + labels.append(maxLoc); | ||
| 186 | confidences.append(maxVal); | 161 | confidences.append(maxVal); |
| 187 | } | 162 | } |
| 188 | 163 | ||
| @@ -193,8 +168,6 @@ class CaffeClassifierTransform : public UntrainableMetaTransform | @@ -193,8 +168,6 @@ class CaffeClassifierTransform : public UntrainableMetaTransform | ||
| 193 | dst.file.setList<int>("Labels", labels); | 168 | dst.file.setList<int>("Labels", labels); |
| 194 | dst.file.setList<float>("Confidences", confidences); | 169 | dst.file.setList<float>("Confidences", confidences); |
| 195 | } | 170 | } |
| 196 | - | ||
| 197 | - caffeResource.release(net); | ||
| 198 | } | 171 | } |
| 199 | }; | 172 | }; |
| 200 | 173 |