Commit e1b598362b071deded00cafd830db4d34d1ea3dc
1 parent
d5486fb5
Updated deduplicate to use tailOutput
Showing
4 changed files
with
40 additions
and
25 deletions
openbr/core/core.cpp
| ... | ... | @@ -263,33 +263,42 @@ struct AlgorithmCore |
| 263 | 263 | |
| 264 | 264 | TemplateList t = i->read(); |
| 265 | 265 | |
| 266 | - QList<int> duplicates; | |
| 266 | + Output *o = Output::make(QString("buffer.tail[selfSimilar,threshold=%1,atLeast=0]").arg(QString::number(threshold)),inputFiles,inputFiles); | |
| 267 | 267 | |
| 268 | - Globals->totalSteps = inputFiles.size(); | |
| 269 | - Globals->currentStep = 0; | |
| 268 | + // Compare to global tail output | |
| 269 | + distance->compare(t,t,o); | |
| 270 | 270 | |
| 271 | - for (int i=0; i<t.size(); i++) { | |
| 272 | - for (int j=0; j<i; j++) { | |
| 273 | - float score = distance->compare(t[i], t[j]); | |
| 274 | - if (score >= threshold) { | |
| 275 | - duplicates.append(i); | |
| 276 | - break; | |
| 277 | - } | |
| 278 | - } | |
| 279 | - Globals->currentStep++; | |
| 280 | - Globals->printStatus(); | |
| 281 | - } | |
| 271 | + delete o; | |
| 272 | + | |
| 273 | + QString buffer(Globals->buffer); | |
| 274 | + | |
| 275 | + QStringList tail = buffer.split("\n"); | |
| 276 | + | |
| 277 | + // Remove header | |
| 278 | + tail.removeFirst(); | |
| 279 | + | |
| 280 | + QStringList toRemove; | |
| 281 | + foreach(const QString &s, tail) | |
| 282 | + toRemove.append(s.split(',').at(1)); | |
| 283 | + | |
| 284 | + QSet<QString> duplicates = QSet<QString>::fromList(toRemove); | |
| 285 | + | |
| 286 | + QStringList fileNames = inputFiles.names(); | |
| 287 | + | |
| 288 | + QList<int> indices; | |
| 289 | + foreach(const QString &d, duplicates) | |
| 290 | + indices.append(fileNames.indexOf(d)); | |
| 282 | 291 | |
| 283 | - std::sort(duplicates.begin(),duplicates.end(),std::greater<float>()); | |
| 292 | + std::sort(indices.begin(),indices.end(),std::greater<float>()); | |
| 284 | 293 | |
| 285 | - qDebug("\n%d duplicates removed.", duplicates.size()); | |
| 294 | + qDebug("\n%d duplicates removed.", indices.size()); | |
| 286 | 295 | |
| 287 | - for (int i=0; i<duplicates.size(); i++) | |
| 288 | - inputFiles.removeAt(duplicates[i]); | |
| 296 | + for (int i=0; i<indices.size(); i++) | |
| 297 | + inputFiles.removeAt(indices[i]); | |
| 289 | 298 | |
| 290 | - QScopedPointer<Gallery> o(Gallery::make(outputGallery)); | |
| 299 | + QScopedPointer<Gallery> og(Gallery::make(outputGallery)); | |
| 291 | 300 | |
| 292 | - o->writeBlock(inputFiles); | |
| 301 | + og->writeBlock(inputFiles); | |
| 293 | 302 | } |
| 294 | 303 | |
| 295 | 304 | void compare(File targetGallery, File queryGallery, File output) | ... | ... |
openbr/openbr.cpp
| ... | ... | @@ -471,7 +471,7 @@ void br_close_gallery(br_gallery gallery) |
| 471 | 471 | delete gal; |
| 472 | 472 | } |
| 473 | 473 | |
| 474 | -void br_deduplicate(const char *inputGallery, const char *outputGallery, const char *threshold) | |
| 474 | +void br_deduplicate(const char *input_gallery, const char *output_gallery, const char *threshold) | |
| 475 | 475 | { |
| 476 | - br::Deduplicate(inputGallery, outputGallery, threshold); | |
| 476 | + br::Deduplicate(input_gallery, output_gallery, threshold); | |
| 477 | 477 | } | ... | ... |
openbr/openbr.h
| ... | ... | @@ -67,10 +67,15 @@ BR_EXPORT const char *br_about(); |
| 67 | 67 | BR_EXPORT void br_cat(int num_input_galleries, const char *input_galleries[], const char *output_gallery); |
| 68 | 68 | |
| 69 | 69 | /*! |
| 70 | - * \brief Wraps br::Deduplicate() | |
| 70 | + * \brief Removes duplicate templates in a gallery. | |
| 71 | + * \param input_gallery Gallery to be deduplicated. | |
| 72 | + * \param output_gallery Deduplicated gallery. | |
| 73 | + * \param threshold Comparisons with a match score >= this value are designated to be duplicates. | |
| 74 | + * \note If a gallery contains n duplicates, the first n-1 duplicates in the gallery will be removed and the nth will be kept. | |
| 75 | + * \note Users are encouraged to use binary gallery formats as the entire gallery is read into memory in one call to Gallery::read. | |
| 71 | 76 | */ |
| 72 | 77 | |
| 73 | -BR_EXPORT void br_deduplicate(const char *inputGallery, const char *outputGallery, const char *threshold); | |
| 78 | +BR_EXPORT void br_deduplicate(const char *input_gallery, const char *output_gallery, const char *threshold); | |
| 74 | 79 | |
| 75 | 80 | /*! |
| 76 | 81 | * \brief Clusters one or more similarity matrices into a list of subjects. | ... | ... |
openbr/plugins/output.cpp
| ... | ... | @@ -528,7 +528,7 @@ class tailOutput : public Output |
| 528 | 528 | } else { |
| 529 | 529 | // General case |
| 530 | 530 | for (int k=0; k<comparisons.size(); k++) { |
| 531 | - if (comparisons[k].value < value) { | |
| 531 | + if (comparisons[k].value <= value) { | |
| 532 | 532 | comparisons.insert(k, Comparison(queryFiles[i], targetFiles[j], value)); |
| 533 | 533 | break; |
| 534 | 534 | } |
| ... | ... | @@ -539,6 +539,7 @@ class tailOutput : public Output |
| 539 | 539 | comparisons.removeLast(); |
| 540 | 540 | while ((comparisons.size() > atLeast) && (comparisons.last().value < threshold)) |
| 541 | 541 | comparisons.removeLast(); |
| 542 | + | |
| 542 | 543 | lastValue = comparisons.last().value; |
| 543 | 544 | comparisonsLock.unlock(); |
| 544 | 545 | } | ... | ... |