Commit 3550feb148dbed310d9132c40569e06ea386e4d6

Authored by Josh Klontz
2 parents 76e69ec6 5ffd5dc3

Merge pull request #175 from biometrics/stream_based_comparison

Stream based comparison
openbr/core/core.cpp
... ... @@ -47,12 +47,12 @@ struct AlgorithmCore
47 47 qDebug("Training on %s%s", qPrintable(input.flat()),
48 48 model.isEmpty() ? "" : qPrintable(" to " + model));
49 49  
50   - QScopedPointer<Transform> trainingWrapper(Transform::make("DirectStream([Identity], readMode=DistributeFrames)", NULL));
  50 + QScopedPointer<Transform> trainingWrapper(Transform::make("DirectStream(readMode=DistributeFrames)", NULL));
51 51  
52 52 CompositeTransform * downcast = dynamic_cast<CompositeTransform *>(trainingWrapper.data());
53 53 if (downcast == NULL)
54 54 qFatal("downcast failed?");
55   - downcast->transforms[0] = this->transform.data();
  55 + downcast->transforms.append(this->transform.data());
56 56  
57 57 downcast->init();
58 58  
... ... @@ -163,14 +163,14 @@ struct AlgorithmCore
163 163  
164 164 if (!multiProcess)
165 165 {
166   - QString pipeDesc = "Identity+GalleryOutput("+gallery.flat()+")+ProgressCounter("+QString::number(data.length())+")+Discard";
  166 + QString pipeDesc = "GalleryOutput("+gallery.flat()+")+ProgressCounter("+QString::number(data.length())+")+Discard";
167 167 basePipe.reset(Transform::make(pipeDesc,NULL));
168 168 CompositeTransform * downcast = dynamic_cast<CompositeTransform *>(basePipe.data());
169 169 if (downcast == NULL)
170 170 qFatal("downcast failed?");
171 171  
172 172 // replace that placeholder with the current algorithm
173   - downcast->transforms[0] = this->transform.data();
  173 + downcast->transforms.prepend(this->transform.data());
174 174  
175 175 // call init on the pipe to collapse the algorithm (if its top level is a pipe)
176 176 downcast->init();
... ... @@ -182,7 +182,7 @@ struct AlgorithmCore
182 182 }
183 183  
184 184 // Next, we make a Stream (with placeholder transform)
185   - QString streamDesc = "Stream(Identity, readMode=DistributeFrames)";
  185 + QString streamDesc = "Stream(readMode=DistributeFrames)";
186 186 QScopedPointer<Transform> baseStream(Transform::make(streamDesc, NULL));
187 187 WrapperTransform * wrapper = dynamic_cast<WrapperTransform *> (baseStream.data());
188 188  
... ... @@ -322,72 +322,194 @@ struct AlgorithmCore
322 322 if (distance->compare(targetGallery, queryGallery, output))
323 323 return;
324 324  
325   - if (output.exists() && output.get<bool>("cache")) return;
  325 + // Are we comparing the same gallery against itself?
  326 + bool selfCompare = targetGallery == queryGallery;
  327 +
  328 + // Should we use multiple processes to do enrollment/comparison? If not, we just do multi-threading.
  329 + bool multiProcess = Globals->file.getBool("multiProcess", false);
  330 +
  331 + // In comparing two galleries, we will keep the smaller one in memory, and load the larger one
  332 + // incrementally. If the gallery set is larger than the probe set, we operate in transpose mode
  333 + // i.e. we must transpose our output, to still write the output matrix in row-major order.
  334 + bool transposeMode = false;
  335 +
  336 + // Is the larger gallery already enrolled? If not, we will enroll those images in-line with their
  337 + // comparison against the smaller gallery (which will be enrolled, and stored in memory).
  338 + bool needEnrollRows = false;
  339 +
  340 +
  341 +
  342 +
  343 + if (output.exists() && output.get<bool>("cache", false)) return;
326 344 if (queryGallery == ".") queryGallery = targetGallery;
327 345  
328   - QScopedPointer<Gallery> t, q;
329   - FileList targetFiles, queryFiles;
330   - retrieveOrEnroll(targetGallery, t, targetFiles);
331   - retrieveOrEnroll(queryGallery, q, queryFiles);
332 346  
333   - QList<int> partitionSizes;
334   - QList<File> outputFiles;
335   - if (output.contains("split")) {
336   - if (!output.fileName().contains("%1")) qFatal("Output file name missing split number place marker (%%1)");
337   - partitionSizes = output.getList<int>("split");
338   - for (int i=0; i<partitionSizes.size(); i++) {
339   - File splitOutputFile = output.name.arg(i);
340   - outputFiles.append(splitOutputFile);
341   - }
342   - } else {
343   - outputFiles.append(output);
  347 + // To decide which gallery is larger, we need to read both, but at this point we just want the
  348 + // metadata, and don't need the enrolled matrices.
  349 + FileList targetMetadata;
  350 + FileList queryMetadata;
  351 +
  352 + // Emptyread reads a gallery, and discards any matrices present, keeping only the metadata.
  353 + targetMetadata = FileList::fromGallery(targetGallery, true);
  354 + queryMetadata = FileList::fromGallery(queryGallery, true);
  355 +
  356 +
  357 + // Is the target or query set larger? We will use the larger as the rows of our comparison matrix (and transpose the output if necessary)
  358 + transposeMode = targetMetadata.size() > queryMetadata.size();
  359 +
  360 + File rowGallery = queryGallery;
  361 + File colGallery = targetGallery;
  362 + int rowSize = queryMetadata.size();
  363 +
  364 + if (transposeMode)
  365 + {
  366 + rowGallery = targetGallery;
  367 + colGallery = queryGallery;
  368 + rowSize = targetMetadata.size();
344 369 }
345 370  
346   - QList<Output*> outputs;
347   - foreach (const File &outputFile, outputFiles)
348   - outputs.append(Output::make(outputFile, targetFiles, queryFiles));
349 371  
350   - if (distance.isNull()) qFatal("Null distance.");
351   - Globals->currentStep = 0;
352   - Globals->totalSteps = double(targetFiles.size()) * double(queryFiles.size());
353   - Globals->startTime.start();
  372 + // Is the column gallery already enrolled? We keep the enrolled column gallery in memory, and in multi-process
  373 + // mode, every worker process retains a copy of this gallery in memory. When not in multi-process mode, we can
  374 + // simple make sure the enrolled data is stored in a memGallery, but in multi-process mode we save the enrolled
  375 + // data to disk (as a .gal file) so that each worker process can read it without re-doing enrollment.
  376 + File colEnrolledGallery = colGallery;
  377 + QString targetExtension = multiProcess ? "gal" : "mem";
  378 +
  379 + // If the column gallery is not already of the appropriate type, we need to do something
  380 + if (colGallery.suffix() != targetExtension)
  381 + {
  382 + // Build the name of a gallery containing the enrolled data, of the appropriate type.
  383 + colEnrolledGallery = colGallery.baseName() + colGallery.hash() + (multiProcess ? ".gal" : ".mem");
354 384  
355   - int queryBlock = -1;
356   - bool queryDone = false;
357   - while (!queryDone) {
358   - queryBlock++;
359   - TemplateList queries = q->readBlock(&queryDone);
  385 + // Check if we have to do real enrollment, and not just convert the gallery's type.
  386 + if (!(QStringList() << "gal" << "template" << "mem").contains(colGallery.suffix()))
  387 + {
  388 + enroll(colGallery, colEnrolledGallery);
  389 + }
  390 + // If the gallery does have enrolled templates, but is not the right type, we do a simple
  391 + // type conversion for it.
  392 + else
  393 + {
  394 + QScopedPointer<Gallery> readColGallery(Gallery::make(colGallery));
  395 + TemplateList templates = readColGallery->read();
  396 + QScopedPointer<Gallery> enrolledColOutput(Gallery::make(colEnrolledGallery));
  397 + enrolledColOutput->writeBlock(templates);
  398 + }
  399 + }
  400 +
  401 + // We have handled the column gallery, now decide whehter or not we have to enroll the row gallery.
  402 + if (selfCompare)
  403 + {
  404 + // For self-comparisons, we just use the already enrolled column set.
  405 + rowGallery = colEnrolledGallery;
  406 + }
  407 + // Otherwise, we will need to enroll the row set. Since the actual comparison is defined via a transform
  408 + // which compares incoming templates against a gallery, we will handle enrollment of the row set by simply
  409 + // building a transform that does enrollment (using the current algorithm), then does the comparison in one
  410 + // step. This way, we don't have to retain the complete enrolled row gallery in memory, or on disk.
  411 + else if(!(QStringList() << "gal" << "mem" << "template").contains(rowGallery.suffix()))
  412 + {
  413 + needEnrollRows = true;
  414 + }
360 415  
361   - QList<TemplateList> queryPartitions;
362   - if (!partitionSizes.empty()) queryPartitions = queries.partition(partitionSizes);
363   - else queryPartitions.append(queries);
  416 + // At this point, we have decided how we will structure the comparison (either in transpose mode, or not),
  417 + // and have the column gallery enrolled, and have decided whether or not we need to enroll the row gallery.
  418 + // From this point, we will build a single algorithm that (optionally) does enrollment, then does comparisons
  419 + // and output, optionally using ProcessWrapper to do the enrollment and comparison in separate processes.
  420 + //
  421 + // There are two main components to this algorithm. The first is the (optional) enrollment and then the
  422 + // comparison step (built from a GalleryCompare transform), and the second is the sequential matrix output and
  423 + // progress counting step.
  424 + // After the base algorithm is built, the whole thing will be run in a stream, so that I/O can be handled sequentially.
364 425  
365   - for (int i=0; i<queryPartitions.size(); i++) {
366   - int targetBlock = -1;
367   - bool targetDone = false;
368   - while (!targetDone) {
369   - targetBlock++;
370 426  
371   - TemplateList targets = t->readBlock(&targetDone);
372 427  
373   - QList<TemplateList> targetPartitions;
374   - if (!partitionSizes.empty()) targetPartitions = targets.partition(partitionSizes);
375   - else targetPartitions.append(targets);
  428 + // The actual comparison step is done by a GalleryCompare transform, which has a Distance, and a gallery as data.
  429 + // Incoming templates are compared against the templates in the gallery, and the output is the resulting score
  430 + // vector.
  431 + QString compareRegionDesc = "Pipe([GalleryCompare("+Globals->algorithm + "," + colEnrolledGallery.flat() + ")])";
376 432  
377   - outputs[i]->setBlock(queryBlock, targetBlock);
378   - distance->compare(targetPartitions[i], queryPartitions[i], outputs[i]);
379 433  
380   - Globals->currentStep += double(targets.size()) * double(queries.size());
381   - Globals->printStatus();
382   - }
  434 + QScopedPointer<Transform> compareRegion;
  435 + // If we need to enroll the row set, we add the current algorithm's enrollment transform before the
  436 + // GalleryCompare in a pipe.
  437 + if (needEnrollRows)
  438 + {
  439 + if (!multiProcess)
  440 + {
  441 + compareRegionDesc = compareRegionDesc;
  442 + compareRegion.reset(Transform::make(compareRegionDesc,NULL));
  443 + CompositeTransform * downcast = dynamic_cast<CompositeTransform *> (compareRegion.data());
  444 + if (downcast == NULL)
  445 + qFatal("Pipe downcast failed in compare");
  446 +
  447 + downcast->transforms.prepend(this->transform.data());
  448 + downcast->init();
383 449 }
  450 + else
  451 + {
  452 + compareRegionDesc = "ProcessWrapper(" + this->transformString + "+" + compareRegionDesc + ")";
  453 + compareRegion.reset(Transform::make(compareRegionDesc, NULL));
  454 + }
  455 + }
  456 + else {
  457 + if (multiProcess)
  458 + compareRegionDesc = "ProcessWrapper(" + compareRegionDesc + ")";
  459 + compareRegion.reset(Transform::make(compareRegionDesc,NULL));
384 460 }
385 461  
386   - qDeleteAll(outputs);
  462 + // At this point, compareRegion is a transform, which optionally does enrollment, then compares the row
  463 + // set against the column set. If in multi-process mode, the enrollment and comparison are wrapped in a
  464 + // ProcessWrapper transform, and will be transparently run in multiple processes.
  465 + compareRegion->init();
  466 +
  467 +
  468 + // We also need to add Output and progress counting to the algorithm we are building, so we will assign them to
  469 + // two stages of a pipe.
  470 + QString joinDesc = "Pipe()";
  471 + QScopedPointer<Transform> join(Transform::make(joinDesc, NULL));
  472 +
  473 + // The output transform takes the metadata memGalleries we set up previously as input, along with the
  474 + // output specification we were passed. Gallery metadata is necessary for some Outputs to function correctly.
  475 + QString outputString = output.flat().isEmpty() ? "Empty" : output.flat();
  476 + QString outputRegionDesc = "Output("+ outputString +"," + targetGallery.flat() +"," + queryGallery.flat() + ","+ QString::number(transposeMode ? 1 : 0) + ")";
  477 + // The ProgressCounter transform will simply provide a display about the number of rows completed.
  478 + outputRegionDesc += "+ProgressCounter("+QString::number(rowSize)+")+Discard";
  479 + QScopedPointer<Transform> outputTform(Transform::make(outputRegionDesc, NULL));
  480 +
  481 + // Assign the comparison transform we previously built, and the output transform we just built to
  482 + // two stages of a pipe.
  483 + CompositeTransform * downcast = dynamic_cast<CompositeTransform *> (join.data());
  484 + downcast->transforms.append(compareRegion.data());
  485 + downcast->transforms.append(outputTform.data());
  486 +
  487 + // With this, we have set up a transform which (optionally) enrolls templates, compares them
  488 + // against a gallery, and outputs them.
  489 + join->init();
  490 +
  491 + // Now, we will give that base transform to a stream, which will incrementally read the row gallery
  492 + // and pass the transforms it reads through the base algorithm.
  493 + QString streamDesc = "Stream(readMode=StreamGallery)";
  494 + QScopedPointer<Transform> streamBase(Transform::make(streamDesc, NULL));
  495 + WrapperTransform * streamWrapper = dynamic_cast<WrapperTransform *> (streamBase.data());
  496 + streamWrapper->transform = join.data();
  497 +
  498 + // The transform we will use is now complete.
  499 + streamWrapper->init();
  500 +
  501 + // We set up a template containing the rowGallery we want to compare.
  502 + TemplateList rowGalleryTemplate;
  503 + rowGalleryTemplate.append(Template(rowGallery));
  504 + TemplateList outputGallery;
  505 +
  506 + // Set up progress counting variables
  507 + Globals->currentStep = 0;
  508 + Globals->totalSteps = rowSize;
  509 + Globals->startTime.start();
387 510  
388   - const float speed = 1000 * Globals->totalSteps / Globals->startTime.elapsed() / std::max(1, abs(Globals->parallelism));
389   - if (!Globals->quiet && (Globals->totalSteps > 1)) fprintf(stderr, "\rSPEED=%.1e \n", speed);
390   - Globals->totalSteps = 0;
  511 + // Do the actual comparisons
  512 + streamWrapper->projectUpdate(rowGalleryTemplate, outputGallery);
391 513 }
392 514  
393 515 private:
... ... @@ -584,7 +706,7 @@ QSharedPointer&lt;br::Transform&gt; br::Transform::fromAlgorithm(const QString &amp;algori
584 706 return AlgorithmManager::getAlgorithm(algorithm)->transform;
585 707 else {
586 708 QSharedPointer<Transform> orig_tform = AlgorithmManager::getAlgorithm(algorithm)->transform;
587   - QSharedPointer<Transform> newRoot = QSharedPointer<Transform>(Transform::make("Stream(Identity)", NULL));
  709 + QSharedPointer<Transform> newRoot = QSharedPointer<Transform>(Transform::make("Stream(readMode=DistributeFrames)", NULL));
588 710 WrapperTransform * downcast = dynamic_cast<WrapperTransform *> (newRoot.data());
589 711 downcast->transform = orig_tform.data();
590 712 downcast->init();
... ... @@ -597,4 +719,5 @@ QSharedPointer&lt;br::Distance&gt; br::Distance::fromAlgorithm(const QString &amp;algorith
597 719 return AlgorithmManager::getAlgorithm(algorithm)->distance;
598 720 }
599 721  
  722 +
600 723 #include "core.moc"
... ...
openbr/openbr_plugin.h
... ... @@ -369,6 +369,8 @@ struct BR_EXPORT FileList : public QList&lt;File&gt;
369 369  
370 370 QList<int> crossValidationPartitions() const; /*!< \brief Returns the cross-validation partition (default=0) for each file in the list. */
371 371 int failures() const; /*!< \brief Returns the number of files with br::File::failed(). */
  372 +
  373 + static FileList fromGallery(const File &gallery, bool cache = false); /*!< \brief Create a file list from a br::Gallery. */
372 374 };
373 375  
374 376 /*!
... ...
openbr/plugins/distance.cpp
... ... @@ -488,6 +488,8 @@ class GalleryCompareTransform : public Transform
488 488 distance = Distance::fromAlgorithm(distanceAlgorithm);
489 489 }
490 490 }
  491 +public:
  492 + GalleryCompareTransform() : Transform(false, false) {}
491 493 };
492 494  
493 495 BR_REGISTER(Transform, GalleryCompareTransform)
... ...
openbr/plugins/gallery.cpp
... ... @@ -393,6 +393,51 @@ class memGallery : public Gallery
393 393  
394 394 BR_REGISTER(Gallery, memGallery)
395 395  
  396 +FileList FileList::fromGallery(const File & file, bool cache)
  397 +{
  398 + File targetMeta = file;
  399 + targetMeta.name = targetMeta.path() + targetMeta.baseName() + "_meta" + targetMeta.hash() + ".mem";
  400 +
  401 + FileList fileData;
  402 +
  403 + // Did we already read the data?
  404 + if (MemoryGalleries::galleries.contains(targetMeta))
  405 + {
  406 + return MemoryGalleries::galleries[targetMeta].files();
  407 + }
  408 +
  409 + TemplateList templates;
  410 + // OK we read the data in some form, does the gallery type containing matrices?
  411 + if ((QStringList() << "gal" << "mem" << "template").contains(file.suffix())) {
  412 + // Retrieve it block by block, dropping matrices from read templates.
  413 + QScopedPointer<Gallery> gallery(Gallery::make(file));
  414 + gallery->set_readBlockSize(10);
  415 + bool done = false;
  416 + while (!done)
  417 + {
  418 + TemplateList tList = gallery->readBlock(&done);
  419 + for (int i=0; i < tList.size();i++)
  420 + {
  421 + tList[i].clear();
  422 + templates.append(tList[i].file);
  423 + }
  424 + }
  425 + }
  426 + else {
  427 + // this is a gallery format that doesn't include matrices, so we can just read it
  428 + QScopedPointer<Gallery> gallery(Gallery::make(file));
  429 + templates= gallery->read();
  430 + }
  431 +
  432 + if (cache)
  433 + {
  434 + QScopedPointer<Gallery> memOutput(Gallery::make(targetMeta));
  435 + memOutput->writeBlock(templates);
  436 + }
  437 + fileData = templates.files();
  438 + return fileData;
  439 +}
  440 +
396 441 /*!
397 442 * \ingroup galleries
398 443 * \brief Treats each line as a file.
... ...
openbr/plugins/misc.cpp
... ... @@ -636,11 +636,8 @@ class OutputTransform : public TimeVaryingTransform
636 636 if (targetName.isEmpty() || queryName.isEmpty() || outputString.isEmpty())
637 637 return;
638 638  
639   - QScopedPointer<Gallery> tGallery(Gallery::make(targetName));
640   - QScopedPointer<Gallery> qGallery(Gallery::make(queryName));
641   -
642   - FileList targetFiles = tGallery->files();
643   - FileList queryFiles = qGallery->files();
  639 + FileList targetFiles = FileList::fromGallery(targetName);
  640 + FileList queryFiles = FileList::fromGallery(queryName);
644 641  
645 642 currentBlockRow = 0;
646 643 currentBlockCol = 0;
... ...