Commit d0937dd7dfc90d6d8d2fd8b1a745056f66f384da

Authored by Scott Klum
1 parent 16e9fb65

Switched back to read all after discovering bug

app/br/br.cpp
@@ -159,6 +159,7 @@ public: @@ -159,6 +159,7 @@ public:
159 check(parc >= 2, "Incorrect parameter count for 'plotMetadata'."); 159 check(parc >= 2, "Incorrect parameter count for 'plotMetadata'.");
160 br_plot_metadata(parc-1, parv, parv[parc-1], true); 160 br_plot_metadata(parc-1, parv, parv[parc-1], true);
161 } else if (!strcmp(fun, "deduplicate")) { 161 } else if (!strcmp(fun, "deduplicate")) {
  162 + check(parc == 3, "Incorrect parameter count for 'deduplicate'.");
162 br_deduplicate(parv[0], parv[1], parv[2]); 163 br_deduplicate(parv[0], parv[1], parv[2]);
163 } 164 }
164 165
openbr/core/core.cpp
@@ -251,13 +251,9 @@ struct AlgorithmCore @@ -251,13 +251,9 @@ struct AlgorithmCore
251 Globals->blockSize = old_block_size; 251 Globals->blockSize = old_block_size;
252 } 252 }
253 253
254 - void deduplicate(const QString &inputGallery, const QString &outputGallery, const float threshold) 254 + void deduplicate(const File &inputGallery, const File &outputGallery, const float threshold)
255 { 255 {
256 - /*qDebug("Deduplicating %s to %s%s", qPrintable(inputGallery.flat()),  
257 - qPrintable(outputGallery.flat()),  
258 - output.isNull() ? "" : qPrintable(" to " + output.flat()));*/  
259 -  
260 - //Globals->blockSize = INT_MAX; 256 + qDebug("Deduplicating %s to %s with a score threshold of %f", qPrintable(inputGallery.flat()), qPrintable(outputGallery.flat()), threshold);
261 257
262 if (distance.isNull()) qFatal("Null distance."); 258 if (distance.isNull()) qFatal("Null distance.");
263 259
@@ -265,67 +261,37 @@ struct AlgorithmCore @@ -265,67 +261,37 @@ struct AlgorithmCore
265 FileList inputFiles; 261 FileList inputFiles;
266 retrieveOrEnroll(inputGallery, i, inputFiles); 262 retrieveOrEnroll(inputGallery, i, inputFiles);
267 263
268 - //TemplateList t = i->read(); 264 + TemplateList t = i->read();
269 265
270 QList<int> duplicates; 266 QList<int> duplicates;
271 267
272 - /* 268 + Globals->totalSteps = inputFiles.size();
  269 + Globals->currentStep = 0;
  270 +
273 for (int i=0; i<t.size(); i++) { 271 for (int i=0; i<t.size(); i++) {
274 - fprintf(stderr, "%05.2f%% duplicates considered.\r", (float)i/(float)t.size()*100.);  
275 for (int j=0; j<i; j++) { 272 for (int j=0; j<i; j++) {
276 float score = distance->compare(t[i], t[j]); 273 float score = distance->compare(t[i], t[j]);
277 if (score >= threshold) { 274 if (score >= threshold) {
  275 + qDebug() << t[i].file.baseName() << t[j].file.baseName() << score << i;
278 duplicates.append(i); 276 duplicates.append(i);
279 - break;  
280 - }  
281 - }  
282 - }*/  
283 -  
284 - int queryBlock = -1;  
285 - bool queryDone = false;  
286 - while (!queryDone) {  
287 - queryBlock++;  
288 - TemplateList queries = i->readBlock(&queryDone);  
289 -  
290 - QList<TemplateList> queryPartitions;  
291 - if (!partitionSizes.empty()) queryPartitions = queries.partition(partitionSizes);  
292 - else queryPartitions.append(queries);  
293 -  
294 - for (int i=0; i<queryPartitions.size(); i++) {  
295 - int targetBlock = -1;  
296 - bool targetDone = false;  
297 - while (!targetDone) {  
298 - targetBlock++;  
299 -  
300 - TemplateList targets = t->readBlock(&targetDone);  
301 -  
302 - QList<TemplateList> targetPartitions;  
303 - if (!partitionSizes.empty()) targetPartitions = targets.partition(partitionSizes);  
304 - else targetPartitions.append(targets);  
305 -  
306 - outputs[i]->setBlock(queryBlock, targetBlock);  
307 -  
308 - distance->compare(targetPartitions[i], queryPartitions[i], outputs[i]);  
309 -  
310 - Globals->currentStep += double(targets.size()) * double(queries.size());  
311 - Globals->printStatus();  
312 } 277 }
313 } 278 }
314 } 279 }
315 280
316 - qDebug("\n%d duplicates removed.", duplicates.size()); 281 + std::sort(duplicates.begin(),duplicates.end(),std::greater<float>());
317 282
318 - std::sort(duplicates.begin(), duplicates.end(),std::greater<float>()); 283 + qDebug("\n%d duplicates removed.", duplicates.size());
319 284
320 - foreach(int index, duplicates)  
321 - qDebug() << t.at(index).file.baseName(); 285 + qDebug() << duplicates;
322 286
323 - for (int i=duplicates.size()-1; i>=0; i--)  
324 - t.removeAt(duplicates[i]); 287 + for (int i=0; i<duplicates.size(); i++) {
  288 + qDebug() << "Removing" << inputFiles.at(duplicates[i]);
  289 + inputFiles.removeAt(duplicates[i]);
  290 + }
325 291
326 QScopedPointer<Gallery> o(Gallery::make(outputGallery)); 292 QScopedPointer<Gallery> o(Gallery::make(outputGallery));
327 293
328 - o->writeBlock(t); 294 + o->writeBlock(inputFiles);
329 } 295 }
330 296
331 void compare(File targetGallery, File queryGallery, File output) 297 void compare(File targetGallery, File queryGallery, File output)
@@ -573,10 +539,12 @@ void br::Cat(const QStringList &amp;inputGalleries, const QString &amp;outputGallery) @@ -573,10 +539,12 @@ void br::Cat(const QStringList &amp;inputGalleries, const QString &amp;outputGallery)
573 } 539 }
574 } 540 }
575 541
576 -void br::Deduplicate(const QString &inputGallery, const QString &outputGallery, const QString &threshold) 542 +void br::Deduplicate(const File &inputGallery, const File &outputGallery, const QString &threshold)
577 { 543 {
578 - File output;  
579 - AlgorithmManager::getAlgorithm(output.get<QString>("algorithm"))->deduplicate(inputGallery, outputGallery, threshold.toFloat()); 544 + bool ok;
  545 + float thresh = threshold.toFloat(&ok);
  546 + if (ok) AlgorithmManager::getAlgorithm(inputGallery.get<QString>("algorithm"))->deduplicate(inputGallery, outputGallery, thresh);
  547 + else qFatal("Unable to convert deduplication threshold to float.");
580 } 548 }
581 549
582 QSharedPointer<br::Transform> br::Transform::fromAlgorithm(const QString &algorithm, bool preprocess) 550 QSharedPointer<br::Transform> br::Transform::fromAlgorithm(const QString &algorithm, bool preprocess)
openbr/openbr_plugin.h
@@ -1371,7 +1371,13 @@ BR_EXPORT void Convert(const File &amp;fileType, const File &amp;inputFile, const File &amp; @@ -1371,7 +1371,13 @@ BR_EXPORT void Convert(const File &amp;fileType, const File &amp;inputFile, const File &amp;
1371 */ 1371 */
1372 BR_EXPORT void Cat(const QStringList &inputGalleries, const QString &outputGallery); 1372 BR_EXPORT void Cat(const QStringList &inputGalleries, const QString &outputGallery);
1373 1373
1374 -BR_EXPORT void Deduplicate(const QString &inputGallery, const QString &outputGallery, const QString &threshold); 1374 +/*!
  1375 + * \brief Deduplicate a gallery.
  1376 + * \param inputGalleries Gallery to deduplicate.
  1377 + * \param outputGallery Gallery to store the deduplicated result.
  1378 + * \param threshold Match score threshold to determine duplicates.
  1379 + */
  1380 +BR_EXPORT void Deduplicate(const File &inputGallery, const File &outputGallery, const QString &threshold);
1375 1381
1376 /*! @}*/ 1382 /*! @}*/
1377 1383
openbr/plugins/gallery.cpp
@@ -120,7 +120,6 @@ class galGallery : public Gallery @@ -120,7 +120,6 @@ class galGallery : public Gallery
120 TemplateList templates; 120 TemplateList templates;
121 while ((templates.size() < Globals->blockSize) && !stream.atEnd()) { 121 while ((templates.size() < Globals->blockSize) && !stream.atEnd()) {
122 Template m; 122 Template m;
123 - qDebug() << templates.size();  
124 stream >> m; 123 stream >> m;
125 templates.append(m); 124 templates.append(m);
126 } 125 }