diff --git a/openbr/core/core.cpp b/openbr/core/core.cpp index 86586ec..28b213d 100644 --- a/openbr/core/core.cpp +++ b/openbr/core/core.cpp @@ -263,33 +263,42 @@ struct AlgorithmCore TemplateList t = i->read(); - QList duplicates; + Output *o = Output::make(QString("buffer.tail[selfSimilar,threshold=%1,atLeast=0]").arg(QString::number(threshold)),inputFiles,inputFiles); - Globals->totalSteps = inputFiles.size(); - Globals->currentStep = 0; + // Compare to global tail output + distance->compare(t,t,o); - for (int i=0; icompare(t[i], t[j]); - if (score >= threshold) { - duplicates.append(i); - break; - } - } - Globals->currentStep++; - Globals->printStatus(); - } + delete o; + + QString buffer(Globals->buffer); + + QStringList tail = buffer.split("\n"); + + // Remove header + tail.removeFirst(); + + QStringList toRemove; + foreach(const QString &s, tail) + toRemove.append(s.split(',').at(1)); + + QSet duplicates = QSet::fromList(toRemove); + + QStringList fileNames = inputFiles.names(); + + QList indices; + foreach(const QString &d, duplicates) + indices.append(fileNames.indexOf(d)); - std::sort(duplicates.begin(),duplicates.end(),std::greater()); + std::sort(indices.begin(),indices.end(),std::greater()); - qDebug("\n%d duplicates removed.", duplicates.size()); + qDebug("\n%d duplicates removed.", indices.size()); - for (int i=0; i o(Gallery::make(outputGallery)); + QScopedPointer og(Gallery::make(outputGallery)); - o->writeBlock(inputFiles); + og->writeBlock(inputFiles); } void compare(File targetGallery, File queryGallery, File output) diff --git a/openbr/openbr.cpp b/openbr/openbr.cpp index 58bbeb7..7e22278 100644 --- a/openbr/openbr.cpp +++ b/openbr/openbr.cpp @@ -471,7 +471,7 @@ void br_close_gallery(br_gallery gallery) delete gal; } -void br_deduplicate(const char *inputGallery, const char *outputGallery, const char *threshold) +void br_deduplicate(const char *input_gallery, const char *output_gallery, const char *threshold) { - br::Deduplicate(inputGallery, outputGallery, threshold); + br::Deduplicate(input_gallery, output_gallery, threshold); } diff --git a/openbr/openbr.h b/openbr/openbr.h index 52437ed..da3fd13 100644 --- a/openbr/openbr.h +++ b/openbr/openbr.h @@ -67,10 +67,15 @@ BR_EXPORT const char *br_about(); BR_EXPORT void br_cat(int num_input_galleries, const char *input_galleries[], const char *output_gallery); /*! - * \brief Wraps br::Deduplicate() + * \brief Removes duplicate templates in a gallery. + * \param input_gallery Gallery to be deduplicated. + * \param output_gallery Deduplicated gallery. + * \param threshold Comparisons with a match score >= this value are designated to be duplicates. + * \note If a gallery contains n duplicates, the first n-1 duplicates in the gallery will be removed and the nth will be kept. + * \note Users are encouraged to use binary gallery formats as the entire gallery is read into memory in one call to Gallery::read. */ -BR_EXPORT void br_deduplicate(const char *inputGallery, const char *outputGallery, const char *threshold); +BR_EXPORT void br_deduplicate(const char *input_gallery, const char *output_gallery, const char *threshold); /*! * \brief Clusters one or more similarity matrices into a list of subjects. diff --git a/openbr/plugins/output.cpp b/openbr/plugins/output.cpp index 1ad0995..512a7bb 100644 --- a/openbr/plugins/output.cpp +++ b/openbr/plugins/output.cpp @@ -528,7 +528,7 @@ class tailOutput : public Output } else { // General case for (int k=0; k atLeast) && (comparisons.last().value < threshold)) comparisons.removeLast(); + lastValue = comparisons.last().value; comparisonsLock.unlock(); }