Commit e1b598362b071deded00cafd830db4d34d1ea3dc
1 parent
d5486fb5
Updated deduplicate to use tailOutput
Showing
4 changed files
with
40 additions
and
25 deletions
openbr/core/core.cpp
| @@ -263,33 +263,42 @@ struct AlgorithmCore | @@ -263,33 +263,42 @@ struct AlgorithmCore | ||
| 263 | 263 | ||
| 264 | TemplateList t = i->read(); | 264 | TemplateList t = i->read(); |
| 265 | 265 | ||
| 266 | - QList<int> duplicates; | 266 | + Output *o = Output::make(QString("buffer.tail[selfSimilar,threshold=%1,atLeast=0]").arg(QString::number(threshold)),inputFiles,inputFiles); |
| 267 | 267 | ||
| 268 | - Globals->totalSteps = inputFiles.size(); | ||
| 269 | - Globals->currentStep = 0; | 268 | + // Compare to global tail output |
| 269 | + distance->compare(t,t,o); | ||
| 270 | 270 | ||
| 271 | - for (int i=0; i<t.size(); i++) { | ||
| 272 | - for (int j=0; j<i; j++) { | ||
| 273 | - float score = distance->compare(t[i], t[j]); | ||
| 274 | - if (score >= threshold) { | ||
| 275 | - duplicates.append(i); | ||
| 276 | - break; | ||
| 277 | - } | ||
| 278 | - } | ||
| 279 | - Globals->currentStep++; | ||
| 280 | - Globals->printStatus(); | ||
| 281 | - } | 271 | + delete o; |
| 272 | + | ||
| 273 | + QString buffer(Globals->buffer); | ||
| 274 | + | ||
| 275 | + QStringList tail = buffer.split("\n"); | ||
| 276 | + | ||
| 277 | + // Remove header | ||
| 278 | + tail.removeFirst(); | ||
| 279 | + | ||
| 280 | + QStringList toRemove; | ||
| 281 | + foreach(const QString &s, tail) | ||
| 282 | + toRemove.append(s.split(',').at(1)); | ||
| 283 | + | ||
| 284 | + QSet<QString> duplicates = QSet<QString>::fromList(toRemove); | ||
| 285 | + | ||
| 286 | + QStringList fileNames = inputFiles.names(); | ||
| 287 | + | ||
| 288 | + QList<int> indices; | ||
| 289 | + foreach(const QString &d, duplicates) | ||
| 290 | + indices.append(fileNames.indexOf(d)); | ||
| 282 | 291 | ||
| 283 | - std::sort(duplicates.begin(),duplicates.end(),std::greater<float>()); | 292 | + std::sort(indices.begin(),indices.end(),std::greater<float>()); |
| 284 | 293 | ||
| 285 | - qDebug("\n%d duplicates removed.", duplicates.size()); | 294 | + qDebug("\n%d duplicates removed.", indices.size()); |
| 286 | 295 | ||
| 287 | - for (int i=0; i<duplicates.size(); i++) | ||
| 288 | - inputFiles.removeAt(duplicates[i]); | 296 | + for (int i=0; i<indices.size(); i++) |
| 297 | + inputFiles.removeAt(indices[i]); | ||
| 289 | 298 | ||
| 290 | - QScopedPointer<Gallery> o(Gallery::make(outputGallery)); | 299 | + QScopedPointer<Gallery> og(Gallery::make(outputGallery)); |
| 291 | 300 | ||
| 292 | - o->writeBlock(inputFiles); | 301 | + og->writeBlock(inputFiles); |
| 293 | } | 302 | } |
| 294 | 303 | ||
| 295 | void compare(File targetGallery, File queryGallery, File output) | 304 | void compare(File targetGallery, File queryGallery, File output) |
openbr/openbr.cpp
| @@ -471,7 +471,7 @@ void br_close_gallery(br_gallery gallery) | @@ -471,7 +471,7 @@ void br_close_gallery(br_gallery gallery) | ||
| 471 | delete gal; | 471 | delete gal; |
| 472 | } | 472 | } |
| 473 | 473 | ||
| 474 | -void br_deduplicate(const char *inputGallery, const char *outputGallery, const char *threshold) | 474 | +void br_deduplicate(const char *input_gallery, const char *output_gallery, const char *threshold) |
| 475 | { | 475 | { |
| 476 | - br::Deduplicate(inputGallery, outputGallery, threshold); | 476 | + br::Deduplicate(input_gallery, output_gallery, threshold); |
| 477 | } | 477 | } |
openbr/openbr.h
| @@ -67,10 +67,15 @@ BR_EXPORT const char *br_about(); | @@ -67,10 +67,15 @@ BR_EXPORT const char *br_about(); | ||
| 67 | BR_EXPORT void br_cat(int num_input_galleries, const char *input_galleries[], const char *output_gallery); | 67 | BR_EXPORT void br_cat(int num_input_galleries, const char *input_galleries[], const char *output_gallery); |
| 68 | 68 | ||
| 69 | /*! | 69 | /*! |
| 70 | - * \brief Wraps br::Deduplicate() | 70 | + * \brief Removes duplicate templates in a gallery. |
| 71 | + * \param input_gallery Gallery to be deduplicated. | ||
| 72 | + * \param output_gallery Deduplicated gallery. | ||
| 73 | + * \param threshold Comparisons with a match score >= this value are designated to be duplicates. | ||
| 74 | + * \note If a gallery contains n duplicates, the first n-1 duplicates in the gallery will be removed and the nth will be kept. | ||
| 75 | + * \note Users are encouraged to use binary gallery formats as the entire gallery is read into memory in one call to Gallery::read. | ||
| 71 | */ | 76 | */ |
| 72 | 77 | ||
| 73 | -BR_EXPORT void br_deduplicate(const char *inputGallery, const char *outputGallery, const char *threshold); | 78 | +BR_EXPORT void br_deduplicate(const char *input_gallery, const char *output_gallery, const char *threshold); |
| 74 | 79 | ||
| 75 | /*! | 80 | /*! |
| 76 | * \brief Clusters one or more similarity matrices into a list of subjects. | 81 | * \brief Clusters one or more similarity matrices into a list of subjects. |
openbr/plugins/output.cpp
| @@ -528,7 +528,7 @@ class tailOutput : public Output | @@ -528,7 +528,7 @@ class tailOutput : public Output | ||
| 528 | } else { | 528 | } else { |
| 529 | // General case | 529 | // General case |
| 530 | for (int k=0; k<comparisons.size(); k++) { | 530 | for (int k=0; k<comparisons.size(); k++) { |
| 531 | - if (comparisons[k].value < value) { | 531 | + if (comparisons[k].value <= value) { |
| 532 | comparisons.insert(k, Comparison(queryFiles[i], targetFiles[j], value)); | 532 | comparisons.insert(k, Comparison(queryFiles[i], targetFiles[j], value)); |
| 533 | break; | 533 | break; |
| 534 | } | 534 | } |
| @@ -539,6 +539,7 @@ class tailOutput : public Output | @@ -539,6 +539,7 @@ class tailOutput : public Output | ||
| 539 | comparisons.removeLast(); | 539 | comparisons.removeLast(); |
| 540 | while ((comparisons.size() > atLeast) && (comparisons.last().value < threshold)) | 540 | while ((comparisons.size() > atLeast) && (comparisons.last().value < threshold)) |
| 541 | comparisons.removeLast(); | 541 | comparisons.removeLast(); |
| 542 | + | ||
| 542 | lastValue = comparisons.last().value; | 543 | lastValue = comparisons.last().value; |
| 543 | comparisonsLock.unlock(); | 544 | comparisonsLock.unlock(); |
| 544 | } | 545 | } |