diff --git a/.gitignore b/.gitignore index 25abe9f..4727cfa 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,6 @@ scripts/results ### vim ### *.swp + +### autogenerated sigsets ### +data/INRIAPerson/sigset diff --git a/data/INRIAPerson/README.md b/data/INRIAPerson/README.md new file mode 100644 index 0000000..61d79f6 --- /dev/null +++ b/data/INRIAPerson/README.md @@ -0,0 +1,3 @@ +## INRIA Person Database +Dataset for human detection in several formats: original positive and negative images with bounding box annotations and normalized positive images (just the bounding box). +* [Website](http://pascal.inrialpes.fr/data/human/) diff --git a/openbr/core/bee.cpp b/openbr/core/bee.cpp index e23c9b2..d9b6e46 100644 --- a/openbr/core/bee.cpp +++ b/openbr/core/bee.cpp @@ -75,6 +75,21 @@ FileList BEE::readSigset(const File &sigset, bool ignoreMetadata) else if (!ignoreMetadata) file.set(key, value); } + // add bounding boxes, if they exist (will be child elements of ) + if (fileNode.hasChildNodes()) { + QList rects; + QDomNodeList bboxes = fileNode.childNodes(); + for (int i=0; i | List of unnamed rects * Age | float | Age used for demographic filtering * Gender | QString | Subject gender + * Train | bool | The data is for training, as opposed to enrollment * _* | * | Reserved for internal use */ struct BR_EXPORT File diff --git a/openbr/plugins/slidingwindow.cpp b/openbr/plugins/slidingwindow.cpp new file mode 100644 index 0000000..27a37a2 --- /dev/null +++ b/openbr/plugins/slidingwindow.cpp @@ -0,0 +1,105 @@ +#include "openbr_internal.h" +#include "openbr/core/opencvutils.h" +#include "openbr/core/common.h" + +using namespace cv; + +namespace br +{ + +/*! + * \ingroup transforms + * \brief Applies a transform to a sliding window. + * Discards negative detections. + * \author Austin Blanton \cite imaus10 + */ +class SlidingWindowTransform : public Transform +{ + Q_OBJECT + Q_PROPERTY(br::Transform *transform READ get_transform WRITE set_transform RESET reset_transform STORED false) + Q_PROPERTY(int minSize READ get_minSize WRITE set_minSize RESET reset_minSize STORED false) + Q_PROPERTY(double scaleFactor READ get_scaleFactor WRITE set_scaleFactor RESET reset_scaleFactor STORED false) + Q_PROPERTY(double stepSize READ get_stepSize WRITE set_stepSize RESET reset_stepSize STORED false) + Q_PROPERTY(bool takeLargestScale READ get_takeLargestScale WRITE set_takeLargestScale RESET reset_takeLargestScale STORED false) + Q_PROPERTY(bool negSamples READ get_negSamples WRITE set_negSamples RESET reset_negSamples STORED false) + Q_PROPERTY(int negToPosRatio READ get_negToPosRatio WRITE set_negToPosRatio RESET reset_negToPosRatio STORED false) + Q_PROPERTY(double maxOverlap READ get_maxOverlap WRITE set_maxOverlap RESET reset_maxOverlap STORED false) + BR_PROPERTY(br::Transform *, transform, NULL) + BR_PROPERTY(int, minSize, 8) + BR_PROPERTY(double, scaleFactor, 0.75) + BR_PROPERTY(double, stepSize, 1) + BR_PROPERTY(bool, takeLargestScale, true) + BR_PROPERTY(bool, negSamples, true) + BR_PROPERTY(int, negToPosRatio, 1) + BR_PROPERTY(double, maxOverlap, 0) + +public: + SlidingWindowTransform() : Transform(false, true) {} + +private: + void train(const TemplateList &data) + { + if (transform->trainable) { + TemplateList full; + foreach (const Template &tmpl, data) { + foreach (const Rect &rect, OpenCVUtils::toRects(tmpl.file.rects())) { + Template pos(tmpl.file, Mat(tmpl, rect)); + full += pos; + + // add random negative samples + if (negSamples) { + Mat m = tmpl.m(); + int sample = 0; + while (sample < negToPosRatio) { + int x = Common::RandSample(1, m.cols)[0]; + int y = Common::RandSample(1, m.rows)[0]; + int maxWidth = m.cols - x, maxHeight = m.rows - y; + int maxSize = std::min(maxWidth, maxHeight); + int size = (maxSize <= minSize ? maxSize : Common::RandSample(1, maxSize, minSize)[0]); + Rect negRect(x, y, size, size); + Rect intersect = negRect & rect; + if (intersect.area() > maxOverlap*rect.area()) + continue; + Template neg(tmpl.file, Mat(tmpl, negRect)); + neg.file.set("Label", QString("neg")); + full += neg; + sample++; + } + } + } + } + transform->train(full); + } + } + + void project(const Template &src, Template &dst) const + { + dst = src; + // no need to slide a window over ground truth data + if (src.file.getBool("Train", false)) return; + + dst.file.clearRects(); + int rows = src.m().rows, cols = src.m().cols; + for (double size=std::min(rows, cols); size>=minSize; size*=scaleFactor) { + for (double y=0; y+sizeproject(windowMat, detect); + // the result will be in the Label + if (detect.file.get(QString("Label")) == "pos") { + dst.file.appendRect(OpenCVUtils::fromRect(window)); + if (takeLargestScale) return; + } + } + } + } + } +}; + +BR_REGISTER(Transform, SlidingWindowTransform) + +} // namespace br + +#include "slidingwindow.moc" diff --git a/scripts/downloadDatasets.sh b/scripts/downloadDatasets.sh index 37daff3..734573c 100755 --- a/scripts/downloadDatasets.sh +++ b/scripts/downloadDatasets.sh @@ -35,6 +35,24 @@ if [ ! -d ../data/BioID/img ]; then rm *.eye description.txt BioID-FaceDatabase-V1.2.zip fi +# INRIA person +if [ ! -d ../data/INRIAPerson/img ]; then + echo "Downloading INRIA person dataset..." + if hash curl 2>/dev/null; then + curl -OL http://pascal.inrialpes.fr/data/human/INRIAPerson.tar + else + wget http://pascal.inrialpes.fr/data/human/INRIAPerson.tar + fi + tar -xf INRIAPerson.tar + mkdir ../data/INRIAPerson/img ../data/INRIAPerson/sigset + ./writeINRIAPersonSigset.sh Train > ../data/INRIAPerson/sigset/train.xml + ./writeINRIAPersonSigset.sh Test > ../data/INRIAPerson/sigset/test.xml + ./writeINRIAPersonSigset.sh train_64x128_H96 > ../data/INRIAPerson/sigset/train_normalized.xml + ./writeINRIAPersonSigset.sh test_64x128_H96 > ../data/INRIAPerson/sigset/test_normalized.xml + mv INRIAPerson/* ../data/INRIAPerson/img + rm -r INRIAPerson* +fi + # KTH if [ ! -d ../data/KTH/vid ]; then echo "Downloading KTH..." diff --git a/scripts/writeINRIAPersonSigset.sh b/scripts/writeINRIAPersonSigset.sh new file mode 100755 index 0000000..b3aac66 --- /dev/null +++ b/scripts/writeINRIAPersonSigset.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +# prints out a element from rectangle coordinates +# (from the ViPER standard: http://viper-toolkit.sourceforge.net/docs/file/) +printBBox() +{ + width=$(($3-$1)) + height=$(($4-$2)) + echo -e "\t\t\t" +} +# export printBBox so xargs can call it using bash -c below +export -f printBBox +SEDREGEX='s/.*(\([0-9]*\), \([0-9]*\)) - (\([0-9]*\), \([0-9]*\))/printBBox \1 \2 \3 \4/' + +echo '' +echo '' + +# print out the positive image sigs +for fullpath in INRIAPerson/$1/pos/*; do + # get just the filename, minus the path + filename=$(basename "$fullpath") + echo -e "\t" + + # if this folder has annotations, add bounding boxes + echo -en "\t\t" + annotation="INRIAPerson/$1/annotations/${filename%.*}.txt" + grep 'Bounding box' $annotation | sed "$SEDREGEX" | xargs -n 5 bash -c 'printBBox $@' + echo -e "\t\t" + # otherwise, just end the presentation + else + echo " />" + fi + + echo -e '\t' +done + +# print out the negative image sigs +for fullpath in INRIAPerson/$1/neg/*; do + filename=$(basename "$fullpath") + echo -e "\t" + echo -e "\t\t" + echo -e '\t' +done + +echo ''