diff --git a/openbr/core/cluster.cpp b/openbr/core/cluster.cpp index 00bfa0d..a16a3e0 100644 --- a/openbr/core/cluster.cpp +++ b/openbr/core/cluster.cpp @@ -82,7 +82,7 @@ float normalizedROD(const Neighborhood &neighborhood, int a, int b) return 1.f * (distanceA + distanceB) / std::min(indexA+1, indexB+1); } -Neighborhood getNeighborhood(const QList &simmats) +Neighborhood br::knnFromSimmat(const QList &simmats, int k) { Neighborhood neighborhood; @@ -130,36 +130,164 @@ Neighborhood getNeighborhood(const QList &simmats) // Keep the top matches for (int j=0; j::infinity()) - neighbor.second = 0; - else if (neighbor.second == std::numeric_limits::infinity()) - neighbor.second = 1; - else - neighbor.second = (neighbor.second - globalMin) / (globalMax - globalMin); + return neighborhood; +} + +// generate k-NN graph from pre-computed similarity matrices +Neighborhood br::knnFromSimmat(const QStringList &simmats, int k) +{ + QList mats; + foreach (const QString &simmat, simmats) { + QScopedPointer format(br::Factory::make(simmat)); + br::Template t = format->read(); + mats.append(t); + } + return knnFromSimmat(mats, k); +} + +TemplateList knnFromGallery(const QString & galleryName, bool inMemory, const QString & outFile, int k) +{ + QSharedPointer comparison = Transform::fromComparison(Globals->algorithm); + + Gallery *tempG = Gallery::make(galleryName); + qint64 total = tempG->totalSize(); + delete tempG; + comparison->setPropertyRecursive("galleryName", galleryName+"[dropMetadata=true]"); + + bool multiProcess = Globals->file.getBool("multiProcess", false); + if (multiProcess) + comparison = QSharedPointer (br::wrapTransform(comparison.data(), "ProcessWrapper")); + + QScopedPointer collect(Transform::make("CollectNN+ProgressCounter+Discard", NULL)); + collect->setPropertyRecursive("totalProgress", total); + collect->setPropertyRecursive("keep", k); + + QList tforms; + tforms.append(comparison.data()); + tforms.append(collect.data()); + + QScopedPointer compareCollect(br::pipeTransforms(tforms)); + + QSharedPointer projector; + if (inMemory) + projector = QSharedPointer (br::wrapTransform(compareCollect.data(), "Stream(readMode=StreamGallery, endPoint=Discard")); + else + projector = QSharedPointer (br::wrapTransform(compareCollect.data(), "Stream(readMode=StreamGallery, endPoint=LogNN("+outFile+")+DiscardTemplates)")); + + TemplateList input; + input.append(Template(galleryName)); + TemplateList output; + + projector->init(); + projector->projectUpdate(input, output); + + return output; +} + +// Generate k-NN graph from a gallery, using the current algorithm for comparison. +// Direct serialization to file system, k-NN graph is not retained in memory +void br::knnFromGallery(const QString &galleryName, const QString &outFile, int k) +{ + knnFromGallery(galleryName, false, outFile, k); +} + +// In-memory graph construction +Neighborhood br::knnFromGallery(const QString &gallery, int k) +{ + // Nearest neighbor data current stored as template metadata, so retrieve it + TemplateList res = knnFromGallery(gallery, true, "", k); + + Neighborhood neighborhood; + foreach (const Template &t, res) { + Neighbors neighbors = t.file.get("neighbors"); + neighbors.append(neighbors); + } + + return neighborhood; +} + +Neighborhood br::loadkNN(const QString &infile) +{ + Neighborhood neighborhood; + QFile file(infile); + bool success = file.open(QFile::ReadOnly); + if (!success) qFatal("Failed to open %s for reading.", qPrintable(infile)); + QStringList lines = QString(file.readAll()).split("\n"); + file.close(); + int min_idx = INT_MAX; + int max_idx = -1; + int count = 0; + + foreach (const QString &line, lines) { + Neighbors neighbors; + count++; + if (line.trimmed().isEmpty()) { + neighborhood.append(neighbors); + continue; + } + bool off = false; + QStringList list = line.trimmed().split(",", QString::SkipEmptyParts); + foreach (const QString &item, list) { + QStringList parts = item.trimmed().split(":", QString::SkipEmptyParts); + bool intOK = true; + bool floatOK = true; + int idx = parts[0].toInt(&intOK); + float score = parts[1].toFloat(&floatOK); + + if (idx > max_idx) + max_idx = idx; + if (idx = lines.size()) { + off = true; + continue; + } + neighbors.append(qMakePair(idx, score)); + + + if (!intOK && floatOK) + qFatal("Failed to parse word: %s", qPrintable(item)); } + neighborhood.append(neighbors); } return neighborhood; } -// Zhu et al. "A Rank-Order Distance based Clustering Algorithm for Face Tagging", CVPR 2011 -br::Clusters br::ClusterGallery(const QList &simmats, float aggressiveness) +bool br::savekNN(const Neighborhood &neighborhood, const QString &outfile) +{ + QFile file(outfile); + bool success = file.open(QFile::WriteOnly); + if (!success) qFatal("Failed to open %s for writing.", qPrintable(outfile)); + + foreach (Neighbors neighbors, neighborhood) { + QString aLine; + if (!neighbors.empty()) + { + aLine.append(QString::number(neighbors[0].first)+":"+QString::number(neighbors[0].second)); + for (int i=1; i < neighbors.size();i++) { + aLine.append(","+QString::number(neighbors[i].first)+":"+QString::number(neighbors[i].second)); + } + } + aLine += "\n"; + file.write(qPrintable(aLine)); + } + file.close(); + return true; +} + + +// Rank-order clustering on a pre-computed k-NN graph +Clusters br::ClusterGraph(Neighborhood neighborhood, float aggressiveness, const QString &csv) { - qDebug("Clustering %d simmat(s), aggressiveness %f", simmats.size(), aggressiveness); - // Read in gallery parts, keeping top neighbors of each template - Neighborhood neighborhood = getNeighborhood(simmats); const int cutoff = neighborhood.first().size(); const float threshold = 3*cutoff/4 * aggressiveness/5; @@ -235,10 +363,31 @@ br::Clusters br::ClusterGallery(const QList &simmats, float aggressiven clusters = newClusters; neighborhood = newNeighborhood; } + + if (!csv.isEmpty()) + WriteClusters(clusters, csv); + return clusters; } -br::Clusters br::ClusterGallery(const QStringList &simmats, float aggressiveness, const QString &csv) +Clusters br::ClusterGraph(const QString & knnName, float aggressiveness, const QString &csv) +{ + Neighborhood neighbors = loadkNN(knnName); + return ClusterGraph(neighbors, aggressiveness, csv); +} + +// Zhu et al. "A Rank-Order Distance based Clustering Algorithm for Face Tagging", CVPR 2011 +br::Clusters br::ClusterSimmat(const QList &simmats, float aggressiveness, const QString &csv) +{ + qDebug("Clustering %d simmat(s), aggressiveness %f", simmats.size(), aggressiveness); + + // Read in gallery parts, keeping top neighbors of each template + Neighborhood neighborhood = knnFromSimmat(simmats); + + return ClusterGraph(neighborhood, aggressiveness, csv); +} + +br::Clusters br::ClusterSimmat(const QStringList &simmats, float aggressiveness, const QString &csv) { QList mats; foreach (const QString &simmat, simmats) { @@ -247,11 +396,7 @@ br::Clusters br::ClusterGallery(const QStringList &simmats, float aggressiveness mats.append(t); } - Clusters clusters = ClusterGallery(mats, aggressiveness); - - // Save clusters - if (!csv.isEmpty()) - WriteClusters(clusters, csv); + Clusters clusters = ClusterSimmat(mats, aggressiveness, csv); return clusters; } diff --git a/openbr/core/cluster.h b/openbr/core/cluster.h index c1e49ec..3def28d 100644 --- a/openbr/core/cluster.h +++ b/openbr/core/cluster.h @@ -22,16 +22,45 @@ #include #include #include +#include namespace br { typedef QList Cluster; // List of indices into galleries typedef QVector Clusters; - Clusters ClusterGallery(const QList &simmats, float aggressiveness); - Clusters ClusterGallery(const QStringList &simmats, float aggressiveness, const QString &csv); + // generate k-NN graph from pre-computed similarity matrices + Neighborhood knnFromSimmat(const QStringList &simmats, int k = 20); + Neighborhood knnFromSimmat(const QList &simmats, int k = 20); + + // Generate k-NN graph from a gallery, using the current algorithm for comparison. + // direct serialization to file system. + void knnFromGallery(const QString &galleryName, const QString & outFile, int k = 20); + // in memory graph computation + Neighborhood knnFromGallery(const QString &gallery, int k = 20); + + // Load k-NN graph from a file with the following ascii format: + // One line per sample, each line lists the top k neighbors for the sample as follows: + // index1:score1,index2:score2,...,indexk:scorek + Neighborhood loadkNN(const QString &fname); + + // Save k-NN graph to file + bool savekNN(const Neighborhood &neighborhood, const QString &outfile); + + // Rank-order clustering on a pre-computed k-NN graph + Clusters ClusterGraph(Neighborhood neighbors, float aggresssiveness, const QString &csv = ""); + Clusters ClusterGraph(const QString & knnName, float aggressiveness, const QString &csv = ""); + + // Given a similarity matrix, compute the k-NN graph, then perform rank-order clustering. + Clusters ClusterSimmat(const QList &simmats, float aggressiveness, const QString &csv = ""); + Clusters ClusterSimmat(const QStringList &simmats, float aggressiveness, const QString &csv = ""); + + // evaluate clustering results in csv, reading ground truth data from gallery input, using truth_property + // as the key for ground truth labels. void EvalClustering(const QString &csv, const QString &input, QString truth_property); + // Read/write clusters from a text format, 1 line = 1 cluster, each line contains comma separated list + // of assigned indices. Clusters ReadClusters(const QString &csv); void WriteClusters(const Clusters &clusters, const QString &csv); } diff --git a/openbr/openbr.cpp b/openbr/openbr.cpp index 3162371..fd4814c 100644 --- a/openbr/openbr.cpp +++ b/openbr/openbr.cpp @@ -59,7 +59,7 @@ void br_cat(int num_input_galleries, const char *input_galleries[], const char * void br_cluster(int num_simmats, const char *simmats[], float aggressiveness, const char *csv) { - ClusterGallery(QtUtils::toStringList(num_simmats, simmats), aggressiveness, csv); + ClusterSimmat(QtUtils::toStringList(num_simmats, simmats), aggressiveness, csv); } void br_combine_masks(int num_input_masks, const char *input_masks[], const char *output_mask, const char *method)