Commit e1b598362b071deded00cafd830db4d34d1ea3dc

Authored by Scott Klum
1 parent d5486fb5

Updated deduplicate to use tailOutput

openbr/core/core.cpp
@@ -263,33 +263,42 @@ struct AlgorithmCore @@ -263,33 +263,42 @@ struct AlgorithmCore
263 263
264 TemplateList t = i->read(); 264 TemplateList t = i->read();
265 265
266 - QList<int> duplicates; 266 + Output *o = Output::make(QString("buffer.tail[selfSimilar,threshold=%1,atLeast=0]").arg(QString::number(threshold)),inputFiles,inputFiles);
267 267
268 - Globals->totalSteps = inputFiles.size();  
269 - Globals->currentStep = 0; 268 + // Compare to global tail output
  269 + distance->compare(t,t,o);
270 270
271 - for (int i=0; i<t.size(); i++) {  
272 - for (int j=0; j<i; j++) {  
273 - float score = distance->compare(t[i], t[j]);  
274 - if (score >= threshold) {  
275 - duplicates.append(i);  
276 - break;  
277 - }  
278 - }  
279 - Globals->currentStep++;  
280 - Globals->printStatus();  
281 - } 271 + delete o;
  272 +
  273 + QString buffer(Globals->buffer);
  274 +
  275 + QStringList tail = buffer.split("\n");
  276 +
  277 + // Remove header
  278 + tail.removeFirst();
  279 +
  280 + QStringList toRemove;
  281 + foreach(const QString &s, tail)
  282 + toRemove.append(s.split(',').at(1));
  283 +
  284 + QSet<QString> duplicates = QSet<QString>::fromList(toRemove);
  285 +
  286 + QStringList fileNames = inputFiles.names();
  287 +
  288 + QList<int> indices;
  289 + foreach(const QString &d, duplicates)
  290 + indices.append(fileNames.indexOf(d));
282 291
283 - std::sort(duplicates.begin(),duplicates.end(),std::greater<float>()); 292 + std::sort(indices.begin(),indices.end(),std::greater<float>());
284 293
285 - qDebug("\n%d duplicates removed.", duplicates.size()); 294 + qDebug("\n%d duplicates removed.", indices.size());
286 295
287 - for (int i=0; i<duplicates.size(); i++)  
288 - inputFiles.removeAt(duplicates[i]); 296 + for (int i=0; i<indices.size(); i++)
  297 + inputFiles.removeAt(indices[i]);
289 298
290 - QScopedPointer<Gallery> o(Gallery::make(outputGallery)); 299 + QScopedPointer<Gallery> og(Gallery::make(outputGallery));
291 300
292 - o->writeBlock(inputFiles); 301 + og->writeBlock(inputFiles);
293 } 302 }
294 303
295 void compare(File targetGallery, File queryGallery, File output) 304 void compare(File targetGallery, File queryGallery, File output)
openbr/openbr.cpp
@@ -471,7 +471,7 @@ void br_close_gallery(br_gallery gallery) @@ -471,7 +471,7 @@ void br_close_gallery(br_gallery gallery)
471 delete gal; 471 delete gal;
472 } 472 }
473 473
474 -void br_deduplicate(const char *inputGallery, const char *outputGallery, const char *threshold) 474 +void br_deduplicate(const char *input_gallery, const char *output_gallery, const char *threshold)
475 { 475 {
476 - br::Deduplicate(inputGallery, outputGallery, threshold); 476 + br::Deduplicate(input_gallery, output_gallery, threshold);
477 } 477 }
openbr/openbr.h
@@ -67,10 +67,15 @@ BR_EXPORT const char *br_about(); @@ -67,10 +67,15 @@ BR_EXPORT const char *br_about();
67 BR_EXPORT void br_cat(int num_input_galleries, const char *input_galleries[], const char *output_gallery); 67 BR_EXPORT void br_cat(int num_input_galleries, const char *input_galleries[], const char *output_gallery);
68 68
69 /*! 69 /*!
70 - * \brief Wraps br::Deduplicate() 70 + * \brief Removes duplicate templates in a gallery.
  71 + * \param input_gallery Gallery to be deduplicated.
  72 + * \param output_gallery Deduplicated gallery.
  73 + * \param threshold Comparisons with a match score >= this value are designated to be duplicates.
  74 + * \note If a gallery contains n duplicates, the first n-1 duplicates in the gallery will be removed and the nth will be kept.
  75 + * \note Users are encouraged to use binary gallery formats as the entire gallery is read into memory in one call to Gallery::read.
71 */ 76 */
72 77
73 -BR_EXPORT void br_deduplicate(const char *inputGallery, const char *outputGallery, const char *threshold); 78 +BR_EXPORT void br_deduplicate(const char *input_gallery, const char *output_gallery, const char *threshold);
74 79
75 /*! 80 /*!
76 * \brief Clusters one or more similarity matrices into a list of subjects. 81 * \brief Clusters one or more similarity matrices into a list of subjects.
openbr/plugins/output.cpp
@@ -528,7 +528,7 @@ class tailOutput : public Output @@ -528,7 +528,7 @@ class tailOutput : public Output
528 } else { 528 } else {
529 // General case 529 // General case
530 for (int k=0; k<comparisons.size(); k++) { 530 for (int k=0; k<comparisons.size(); k++) {
531 - if (comparisons[k].value < value) { 531 + if (comparisons[k].value <= value) {
532 comparisons.insert(k, Comparison(queryFiles[i], targetFiles[j], value)); 532 comparisons.insert(k, Comparison(queryFiles[i], targetFiles[j], value));
533 break; 533 break;
534 } 534 }
@@ -539,6 +539,7 @@ class tailOutput : public Output @@ -539,6 +539,7 @@ class tailOutput : public Output
539 comparisons.removeLast(); 539 comparisons.removeLast();
540 while ((comparisons.size() > atLeast) && (comparisons.last().value < threshold)) 540 while ((comparisons.size() > atLeast) && (comparisons.last().value < threshold))
541 comparisons.removeLast(); 541 comparisons.removeLast();
  542 +
542 lastValue = comparisons.last().value; 543 lastValue = comparisons.last().value;
543 comparisonsLock.unlock(); 544 comparisonsLock.unlock();
544 } 545 }