Commit e1b598362b071deded00cafd830db4d34d1ea3dc

Authored by Scott Klum
1 parent d5486fb5

Updated deduplicate to use tailOutput

openbr/core/core.cpp
... ... @@ -263,33 +263,42 @@ struct AlgorithmCore
263 263  
264 264 TemplateList t = i->read();
265 265  
266   - QList<int> duplicates;
  266 + Output *o = Output::make(QString("buffer.tail[selfSimilar,threshold=%1,atLeast=0]").arg(QString::number(threshold)),inputFiles,inputFiles);
267 267  
268   - Globals->totalSteps = inputFiles.size();
269   - Globals->currentStep = 0;
  268 + // Compare to global tail output
  269 + distance->compare(t,t,o);
270 270  
271   - for (int i=0; i<t.size(); i++) {
272   - for (int j=0; j<i; j++) {
273   - float score = distance->compare(t[i], t[j]);
274   - if (score >= threshold) {
275   - duplicates.append(i);
276   - break;
277   - }
278   - }
279   - Globals->currentStep++;
280   - Globals->printStatus();
281   - }
  271 + delete o;
  272 +
  273 + QString buffer(Globals->buffer);
  274 +
  275 + QStringList tail = buffer.split("\n");
  276 +
  277 + // Remove header
  278 + tail.removeFirst();
  279 +
  280 + QStringList toRemove;
  281 + foreach(const QString &s, tail)
  282 + toRemove.append(s.split(',').at(1));
  283 +
  284 + QSet<QString> duplicates = QSet<QString>::fromList(toRemove);
  285 +
  286 + QStringList fileNames = inputFiles.names();
  287 +
  288 + QList<int> indices;
  289 + foreach(const QString &d, duplicates)
  290 + indices.append(fileNames.indexOf(d));
282 291  
283   - std::sort(duplicates.begin(),duplicates.end(),std::greater<float>());
  292 + std::sort(indices.begin(),indices.end(),std::greater<float>());
284 293  
285   - qDebug("\n%d duplicates removed.", duplicates.size());
  294 + qDebug("\n%d duplicates removed.", indices.size());
286 295  
287   - for (int i=0; i<duplicates.size(); i++)
288   - inputFiles.removeAt(duplicates[i]);
  296 + for (int i=0; i<indices.size(); i++)
  297 + inputFiles.removeAt(indices[i]);
289 298  
290   - QScopedPointer<Gallery> o(Gallery::make(outputGallery));
  299 + QScopedPointer<Gallery> og(Gallery::make(outputGallery));
291 300  
292   - o->writeBlock(inputFiles);
  301 + og->writeBlock(inputFiles);
293 302 }
294 303  
295 304 void compare(File targetGallery, File queryGallery, File output)
... ...
openbr/openbr.cpp
... ... @@ -471,7 +471,7 @@ void br_close_gallery(br_gallery gallery)
471 471 delete gal;
472 472 }
473 473  
474   -void br_deduplicate(const char *inputGallery, const char *outputGallery, const char *threshold)
  474 +void br_deduplicate(const char *input_gallery, const char *output_gallery, const char *threshold)
475 475 {
476   - br::Deduplicate(inputGallery, outputGallery, threshold);
  476 + br::Deduplicate(input_gallery, output_gallery, threshold);
477 477 }
... ...
openbr/openbr.h
... ... @@ -67,10 +67,15 @@ BR_EXPORT const char *br_about();
67 67 BR_EXPORT void br_cat(int num_input_galleries, const char *input_galleries[], const char *output_gallery);
68 68  
69 69 /*!
70   - * \brief Wraps br::Deduplicate()
  70 + * \brief Removes duplicate templates in a gallery.
  71 + * \param input_gallery Gallery to be deduplicated.
  72 + * \param output_gallery Deduplicated gallery.
  73 + * \param threshold Comparisons with a match score >= this value are designated to be duplicates.
  74 + * \note If a gallery contains n duplicates, the first n-1 duplicates in the gallery will be removed and the nth will be kept.
  75 + * \note Users are encouraged to use binary gallery formats as the entire gallery is read into memory in one call to Gallery::read.
71 76 */
72 77  
73   -BR_EXPORT void br_deduplicate(const char *inputGallery, const char *outputGallery, const char *threshold);
  78 +BR_EXPORT void br_deduplicate(const char *input_gallery, const char *output_gallery, const char *threshold);
74 79  
75 80 /*!
76 81 * \brief Clusters one or more similarity matrices into a list of subjects.
... ...
openbr/plugins/output.cpp
... ... @@ -528,7 +528,7 @@ class tailOutput : public Output
528 528 } else {
529 529 // General case
530 530 for (int k=0; k<comparisons.size(); k++) {
531   - if (comparisons[k].value < value) {
  531 + if (comparisons[k].value <= value) {
532 532 comparisons.insert(k, Comparison(queryFiles[i], targetFiles[j], value));
533 533 break;
534 534 }
... ... @@ -539,6 +539,7 @@ class tailOutput : public Output
539 539 comparisons.removeLast();
540 540 while ((comparisons.size() > atLeast) && (comparisons.last().value < threshold))
541 541 comparisons.removeLast();
  542 +
542 543 lastValue = comparisons.last().value;
543 544 comparisonsLock.unlock();
544 545 }
... ...