Commit 3550feb148dbed310d9132c40569e06ea386e4d6
Merge pull request #175 from biometrics/stream_based_comparison
Stream based comparison
Showing
5 changed files
with
229 additions
and
60 deletions
openbr/core/core.cpp
| ... | ... | @@ -47,12 +47,12 @@ struct AlgorithmCore |
| 47 | 47 | qDebug("Training on %s%s", qPrintable(input.flat()), |
| 48 | 48 | model.isEmpty() ? "" : qPrintable(" to " + model)); |
| 49 | 49 | |
| 50 | - QScopedPointer<Transform> trainingWrapper(Transform::make("DirectStream([Identity], readMode=DistributeFrames)", NULL)); | |
| 50 | + QScopedPointer<Transform> trainingWrapper(Transform::make("DirectStream(readMode=DistributeFrames)", NULL)); | |
| 51 | 51 | |
| 52 | 52 | CompositeTransform * downcast = dynamic_cast<CompositeTransform *>(trainingWrapper.data()); |
| 53 | 53 | if (downcast == NULL) |
| 54 | 54 | qFatal("downcast failed?"); |
| 55 | - downcast->transforms[0] = this->transform.data(); | |
| 55 | + downcast->transforms.append(this->transform.data()); | |
| 56 | 56 | |
| 57 | 57 | downcast->init(); |
| 58 | 58 | |
| ... | ... | @@ -163,14 +163,14 @@ struct AlgorithmCore |
| 163 | 163 | |
| 164 | 164 | if (!multiProcess) |
| 165 | 165 | { |
| 166 | - QString pipeDesc = "Identity+GalleryOutput("+gallery.flat()+")+ProgressCounter("+QString::number(data.length())+")+Discard"; | |
| 166 | + QString pipeDesc = "GalleryOutput("+gallery.flat()+")+ProgressCounter("+QString::number(data.length())+")+Discard"; | |
| 167 | 167 | basePipe.reset(Transform::make(pipeDesc,NULL)); |
| 168 | 168 | CompositeTransform * downcast = dynamic_cast<CompositeTransform *>(basePipe.data()); |
| 169 | 169 | if (downcast == NULL) |
| 170 | 170 | qFatal("downcast failed?"); |
| 171 | 171 | |
| 172 | 172 | // replace that placeholder with the current algorithm |
| 173 | - downcast->transforms[0] = this->transform.data(); | |
| 173 | + downcast->transforms.prepend(this->transform.data()); | |
| 174 | 174 | |
| 175 | 175 | // call init on the pipe to collapse the algorithm (if its top level is a pipe) |
| 176 | 176 | downcast->init(); |
| ... | ... | @@ -182,7 +182,7 @@ struct AlgorithmCore |
| 182 | 182 | } |
| 183 | 183 | |
| 184 | 184 | // Next, we make a Stream (with placeholder transform) |
| 185 | - QString streamDesc = "Stream(Identity, readMode=DistributeFrames)"; | |
| 185 | + QString streamDesc = "Stream(readMode=DistributeFrames)"; | |
| 186 | 186 | QScopedPointer<Transform> baseStream(Transform::make(streamDesc, NULL)); |
| 187 | 187 | WrapperTransform * wrapper = dynamic_cast<WrapperTransform *> (baseStream.data()); |
| 188 | 188 | |
| ... | ... | @@ -322,72 +322,194 @@ struct AlgorithmCore |
| 322 | 322 | if (distance->compare(targetGallery, queryGallery, output)) |
| 323 | 323 | return; |
| 324 | 324 | |
| 325 | - if (output.exists() && output.get<bool>("cache")) return; | |
| 325 | + // Are we comparing the same gallery against itself? | |
| 326 | + bool selfCompare = targetGallery == queryGallery; | |
| 327 | + | |
| 328 | + // Should we use multiple processes to do enrollment/comparison? If not, we just do multi-threading. | |
| 329 | + bool multiProcess = Globals->file.getBool("multiProcess", false); | |
| 330 | + | |
| 331 | + // In comparing two galleries, we will keep the smaller one in memory, and load the larger one | |
| 332 | + // incrementally. If the gallery set is larger than the probe set, we operate in transpose mode | |
| 333 | + // i.e. we must transpose our output, to still write the output matrix in row-major order. | |
| 334 | + bool transposeMode = false; | |
| 335 | + | |
| 336 | + // Is the larger gallery already enrolled? If not, we will enroll those images in-line with their | |
| 337 | + // comparison against the smaller gallery (which will be enrolled, and stored in memory). | |
| 338 | + bool needEnrollRows = false; | |
| 339 | + | |
| 340 | + | |
| 341 | + | |
| 342 | + | |
| 343 | + if (output.exists() && output.get<bool>("cache", false)) return; | |
| 326 | 344 | if (queryGallery == ".") queryGallery = targetGallery; |
| 327 | 345 | |
| 328 | - QScopedPointer<Gallery> t, q; | |
| 329 | - FileList targetFiles, queryFiles; | |
| 330 | - retrieveOrEnroll(targetGallery, t, targetFiles); | |
| 331 | - retrieveOrEnroll(queryGallery, q, queryFiles); | |
| 332 | 346 | |
| 333 | - QList<int> partitionSizes; | |
| 334 | - QList<File> outputFiles; | |
| 335 | - if (output.contains("split")) { | |
| 336 | - if (!output.fileName().contains("%1")) qFatal("Output file name missing split number place marker (%%1)"); | |
| 337 | - partitionSizes = output.getList<int>("split"); | |
| 338 | - for (int i=0; i<partitionSizes.size(); i++) { | |
| 339 | - File splitOutputFile = output.name.arg(i); | |
| 340 | - outputFiles.append(splitOutputFile); | |
| 341 | - } | |
| 342 | - } else { | |
| 343 | - outputFiles.append(output); | |
| 347 | + // To decide which gallery is larger, we need to read both, but at this point we just want the | |
| 348 | + // metadata, and don't need the enrolled matrices. | |
| 349 | + FileList targetMetadata; | |
| 350 | + FileList queryMetadata; | |
| 351 | + | |
| 352 | + // Emptyread reads a gallery, and discards any matrices present, keeping only the metadata. | |
| 353 | + targetMetadata = FileList::fromGallery(targetGallery, true); | |
| 354 | + queryMetadata = FileList::fromGallery(queryGallery, true); | |
| 355 | + | |
| 356 | + | |
| 357 | + // Is the target or query set larger? We will use the larger as the rows of our comparison matrix (and transpose the output if necessary) | |
| 358 | + transposeMode = targetMetadata.size() > queryMetadata.size(); | |
| 359 | + | |
| 360 | + File rowGallery = queryGallery; | |
| 361 | + File colGallery = targetGallery; | |
| 362 | + int rowSize = queryMetadata.size(); | |
| 363 | + | |
| 364 | + if (transposeMode) | |
| 365 | + { | |
| 366 | + rowGallery = targetGallery; | |
| 367 | + colGallery = queryGallery; | |
| 368 | + rowSize = targetMetadata.size(); | |
| 344 | 369 | } |
| 345 | 370 | |
| 346 | - QList<Output*> outputs; | |
| 347 | - foreach (const File &outputFile, outputFiles) | |
| 348 | - outputs.append(Output::make(outputFile, targetFiles, queryFiles)); | |
| 349 | 371 | |
| 350 | - if (distance.isNull()) qFatal("Null distance."); | |
| 351 | - Globals->currentStep = 0; | |
| 352 | - Globals->totalSteps = double(targetFiles.size()) * double(queryFiles.size()); | |
| 353 | - Globals->startTime.start(); | |
| 372 | + // Is the column gallery already enrolled? We keep the enrolled column gallery in memory, and in multi-process | |
| 373 | + // mode, every worker process retains a copy of this gallery in memory. When not in multi-process mode, we can | |
| 374 | + // simple make sure the enrolled data is stored in a memGallery, but in multi-process mode we save the enrolled | |
| 375 | + // data to disk (as a .gal file) so that each worker process can read it without re-doing enrollment. | |
| 376 | + File colEnrolledGallery = colGallery; | |
| 377 | + QString targetExtension = multiProcess ? "gal" : "mem"; | |
| 378 | + | |
| 379 | + // If the column gallery is not already of the appropriate type, we need to do something | |
| 380 | + if (colGallery.suffix() != targetExtension) | |
| 381 | + { | |
| 382 | + // Build the name of a gallery containing the enrolled data, of the appropriate type. | |
| 383 | + colEnrolledGallery = colGallery.baseName() + colGallery.hash() + (multiProcess ? ".gal" : ".mem"); | |
| 354 | 384 | |
| 355 | - int queryBlock = -1; | |
| 356 | - bool queryDone = false; | |
| 357 | - while (!queryDone) { | |
| 358 | - queryBlock++; | |
| 359 | - TemplateList queries = q->readBlock(&queryDone); | |
| 385 | + // Check if we have to do real enrollment, and not just convert the gallery's type. | |
| 386 | + if (!(QStringList() << "gal" << "template" << "mem").contains(colGallery.suffix())) | |
| 387 | + { | |
| 388 | + enroll(colGallery, colEnrolledGallery); | |
| 389 | + } | |
| 390 | + // If the gallery does have enrolled templates, but is not the right type, we do a simple | |
| 391 | + // type conversion for it. | |
| 392 | + else | |
| 393 | + { | |
| 394 | + QScopedPointer<Gallery> readColGallery(Gallery::make(colGallery)); | |
| 395 | + TemplateList templates = readColGallery->read(); | |
| 396 | + QScopedPointer<Gallery> enrolledColOutput(Gallery::make(colEnrolledGallery)); | |
| 397 | + enrolledColOutput->writeBlock(templates); | |
| 398 | + } | |
| 399 | + } | |
| 400 | + | |
| 401 | + // We have handled the column gallery, now decide whehter or not we have to enroll the row gallery. | |
| 402 | + if (selfCompare) | |
| 403 | + { | |
| 404 | + // For self-comparisons, we just use the already enrolled column set. | |
| 405 | + rowGallery = colEnrolledGallery; | |
| 406 | + } | |
| 407 | + // Otherwise, we will need to enroll the row set. Since the actual comparison is defined via a transform | |
| 408 | + // which compares incoming templates against a gallery, we will handle enrollment of the row set by simply | |
| 409 | + // building a transform that does enrollment (using the current algorithm), then does the comparison in one | |
| 410 | + // step. This way, we don't have to retain the complete enrolled row gallery in memory, or on disk. | |
| 411 | + else if(!(QStringList() << "gal" << "mem" << "template").contains(rowGallery.suffix())) | |
| 412 | + { | |
| 413 | + needEnrollRows = true; | |
| 414 | + } | |
| 360 | 415 | |
| 361 | - QList<TemplateList> queryPartitions; | |
| 362 | - if (!partitionSizes.empty()) queryPartitions = queries.partition(partitionSizes); | |
| 363 | - else queryPartitions.append(queries); | |
| 416 | + // At this point, we have decided how we will structure the comparison (either in transpose mode, or not), | |
| 417 | + // and have the column gallery enrolled, and have decided whether or not we need to enroll the row gallery. | |
| 418 | + // From this point, we will build a single algorithm that (optionally) does enrollment, then does comparisons | |
| 419 | + // and output, optionally using ProcessWrapper to do the enrollment and comparison in separate processes. | |
| 420 | + // | |
| 421 | + // There are two main components to this algorithm. The first is the (optional) enrollment and then the | |
| 422 | + // comparison step (built from a GalleryCompare transform), and the second is the sequential matrix output and | |
| 423 | + // progress counting step. | |
| 424 | + // After the base algorithm is built, the whole thing will be run in a stream, so that I/O can be handled sequentially. | |
| 364 | 425 | |
| 365 | - for (int i=0; i<queryPartitions.size(); i++) { | |
| 366 | - int targetBlock = -1; | |
| 367 | - bool targetDone = false; | |
| 368 | - while (!targetDone) { | |
| 369 | - targetBlock++; | |
| 370 | 426 | |
| 371 | - TemplateList targets = t->readBlock(&targetDone); | |
| 372 | 427 | |
| 373 | - QList<TemplateList> targetPartitions; | |
| 374 | - if (!partitionSizes.empty()) targetPartitions = targets.partition(partitionSizes); | |
| 375 | - else targetPartitions.append(targets); | |
| 428 | + // The actual comparison step is done by a GalleryCompare transform, which has a Distance, and a gallery as data. | |
| 429 | + // Incoming templates are compared against the templates in the gallery, and the output is the resulting score | |
| 430 | + // vector. | |
| 431 | + QString compareRegionDesc = "Pipe([GalleryCompare("+Globals->algorithm + "," + colEnrolledGallery.flat() + ")])"; | |
| 376 | 432 | |
| 377 | - outputs[i]->setBlock(queryBlock, targetBlock); | |
| 378 | - distance->compare(targetPartitions[i], queryPartitions[i], outputs[i]); | |
| 379 | 433 | |
| 380 | - Globals->currentStep += double(targets.size()) * double(queries.size()); | |
| 381 | - Globals->printStatus(); | |
| 382 | - } | |
| 434 | + QScopedPointer<Transform> compareRegion; | |
| 435 | + // If we need to enroll the row set, we add the current algorithm's enrollment transform before the | |
| 436 | + // GalleryCompare in a pipe. | |
| 437 | + if (needEnrollRows) | |
| 438 | + { | |
| 439 | + if (!multiProcess) | |
| 440 | + { | |
| 441 | + compareRegionDesc = compareRegionDesc; | |
| 442 | + compareRegion.reset(Transform::make(compareRegionDesc,NULL)); | |
| 443 | + CompositeTransform * downcast = dynamic_cast<CompositeTransform *> (compareRegion.data()); | |
| 444 | + if (downcast == NULL) | |
| 445 | + qFatal("Pipe downcast failed in compare"); | |
| 446 | + | |
| 447 | + downcast->transforms.prepend(this->transform.data()); | |
| 448 | + downcast->init(); | |
| 383 | 449 | } |
| 450 | + else | |
| 451 | + { | |
| 452 | + compareRegionDesc = "ProcessWrapper(" + this->transformString + "+" + compareRegionDesc + ")"; | |
| 453 | + compareRegion.reset(Transform::make(compareRegionDesc, NULL)); | |
| 454 | + } | |
| 455 | + } | |
| 456 | + else { | |
| 457 | + if (multiProcess) | |
| 458 | + compareRegionDesc = "ProcessWrapper(" + compareRegionDesc + ")"; | |
| 459 | + compareRegion.reset(Transform::make(compareRegionDesc,NULL)); | |
| 384 | 460 | } |
| 385 | 461 | |
| 386 | - qDeleteAll(outputs); | |
| 462 | + // At this point, compareRegion is a transform, which optionally does enrollment, then compares the row | |
| 463 | + // set against the column set. If in multi-process mode, the enrollment and comparison are wrapped in a | |
| 464 | + // ProcessWrapper transform, and will be transparently run in multiple processes. | |
| 465 | + compareRegion->init(); | |
| 466 | + | |
| 467 | + | |
| 468 | + // We also need to add Output and progress counting to the algorithm we are building, so we will assign them to | |
| 469 | + // two stages of a pipe. | |
| 470 | + QString joinDesc = "Pipe()"; | |
| 471 | + QScopedPointer<Transform> join(Transform::make(joinDesc, NULL)); | |
| 472 | + | |
| 473 | + // The output transform takes the metadata memGalleries we set up previously as input, along with the | |
| 474 | + // output specification we were passed. Gallery metadata is necessary for some Outputs to function correctly. | |
| 475 | + QString outputString = output.flat().isEmpty() ? "Empty" : output.flat(); | |
| 476 | + QString outputRegionDesc = "Output("+ outputString +"," + targetGallery.flat() +"," + queryGallery.flat() + ","+ QString::number(transposeMode ? 1 : 0) + ")"; | |
| 477 | + // The ProgressCounter transform will simply provide a display about the number of rows completed. | |
| 478 | + outputRegionDesc += "+ProgressCounter("+QString::number(rowSize)+")+Discard"; | |
| 479 | + QScopedPointer<Transform> outputTform(Transform::make(outputRegionDesc, NULL)); | |
| 480 | + | |
| 481 | + // Assign the comparison transform we previously built, and the output transform we just built to | |
| 482 | + // two stages of a pipe. | |
| 483 | + CompositeTransform * downcast = dynamic_cast<CompositeTransform *> (join.data()); | |
| 484 | + downcast->transforms.append(compareRegion.data()); | |
| 485 | + downcast->transforms.append(outputTform.data()); | |
| 486 | + | |
| 487 | + // With this, we have set up a transform which (optionally) enrolls templates, compares them | |
| 488 | + // against a gallery, and outputs them. | |
| 489 | + join->init(); | |
| 490 | + | |
| 491 | + // Now, we will give that base transform to a stream, which will incrementally read the row gallery | |
| 492 | + // and pass the transforms it reads through the base algorithm. | |
| 493 | + QString streamDesc = "Stream(readMode=StreamGallery)"; | |
| 494 | + QScopedPointer<Transform> streamBase(Transform::make(streamDesc, NULL)); | |
| 495 | + WrapperTransform * streamWrapper = dynamic_cast<WrapperTransform *> (streamBase.data()); | |
| 496 | + streamWrapper->transform = join.data(); | |
| 497 | + | |
| 498 | + // The transform we will use is now complete. | |
| 499 | + streamWrapper->init(); | |
| 500 | + | |
| 501 | + // We set up a template containing the rowGallery we want to compare. | |
| 502 | + TemplateList rowGalleryTemplate; | |
| 503 | + rowGalleryTemplate.append(Template(rowGallery)); | |
| 504 | + TemplateList outputGallery; | |
| 505 | + | |
| 506 | + // Set up progress counting variables | |
| 507 | + Globals->currentStep = 0; | |
| 508 | + Globals->totalSteps = rowSize; | |
| 509 | + Globals->startTime.start(); | |
| 387 | 510 | |
| 388 | - const float speed = 1000 * Globals->totalSteps / Globals->startTime.elapsed() / std::max(1, abs(Globals->parallelism)); | |
| 389 | - if (!Globals->quiet && (Globals->totalSteps > 1)) fprintf(stderr, "\rSPEED=%.1e \n", speed); | |
| 390 | - Globals->totalSteps = 0; | |
| 511 | + // Do the actual comparisons | |
| 512 | + streamWrapper->projectUpdate(rowGalleryTemplate, outputGallery); | |
| 391 | 513 | } |
| 392 | 514 | |
| 393 | 515 | private: |
| ... | ... | @@ -584,7 +706,7 @@ QSharedPointer<br::Transform> br::Transform::fromAlgorithm(const QString &algori |
| 584 | 706 | return AlgorithmManager::getAlgorithm(algorithm)->transform; |
| 585 | 707 | else { |
| 586 | 708 | QSharedPointer<Transform> orig_tform = AlgorithmManager::getAlgorithm(algorithm)->transform; |
| 587 | - QSharedPointer<Transform> newRoot = QSharedPointer<Transform>(Transform::make("Stream(Identity)", NULL)); | |
| 709 | + QSharedPointer<Transform> newRoot = QSharedPointer<Transform>(Transform::make("Stream(readMode=DistributeFrames)", NULL)); | |
| 588 | 710 | WrapperTransform * downcast = dynamic_cast<WrapperTransform *> (newRoot.data()); |
| 589 | 711 | downcast->transform = orig_tform.data(); |
| 590 | 712 | downcast->init(); |
| ... | ... | @@ -597,4 +719,5 @@ QSharedPointer<br::Distance> br::Distance::fromAlgorithm(const QString &algorith |
| 597 | 719 | return AlgorithmManager::getAlgorithm(algorithm)->distance; |
| 598 | 720 | } |
| 599 | 721 | |
| 722 | + | |
| 600 | 723 | #include "core.moc" | ... | ... |
openbr/openbr_plugin.h
| ... | ... | @@ -369,6 +369,8 @@ struct BR_EXPORT FileList : public QList<File> |
| 369 | 369 | |
| 370 | 370 | QList<int> crossValidationPartitions() const; /*!< \brief Returns the cross-validation partition (default=0) for each file in the list. */ |
| 371 | 371 | int failures() const; /*!< \brief Returns the number of files with br::File::failed(). */ |
| 372 | + | |
| 373 | + static FileList fromGallery(const File &gallery, bool cache = false); /*!< \brief Create a file list from a br::Gallery. */ | |
| 372 | 374 | }; |
| 373 | 375 | |
| 374 | 376 | /*! | ... | ... |
openbr/plugins/distance.cpp
| ... | ... | @@ -488,6 +488,8 @@ class GalleryCompareTransform : public Transform |
| 488 | 488 | distance = Distance::fromAlgorithm(distanceAlgorithm); |
| 489 | 489 | } |
| 490 | 490 | } |
| 491 | +public: | |
| 492 | + GalleryCompareTransform() : Transform(false, false) {} | |
| 491 | 493 | }; |
| 492 | 494 | |
| 493 | 495 | BR_REGISTER(Transform, GalleryCompareTransform) | ... | ... |
openbr/plugins/gallery.cpp
| ... | ... | @@ -393,6 +393,51 @@ class memGallery : public Gallery |
| 393 | 393 | |
| 394 | 394 | BR_REGISTER(Gallery, memGallery) |
| 395 | 395 | |
| 396 | +FileList FileList::fromGallery(const File & file, bool cache) | |
| 397 | +{ | |
| 398 | + File targetMeta = file; | |
| 399 | + targetMeta.name = targetMeta.path() + targetMeta.baseName() + "_meta" + targetMeta.hash() + ".mem"; | |
| 400 | + | |
| 401 | + FileList fileData; | |
| 402 | + | |
| 403 | + // Did we already read the data? | |
| 404 | + if (MemoryGalleries::galleries.contains(targetMeta)) | |
| 405 | + { | |
| 406 | + return MemoryGalleries::galleries[targetMeta].files(); | |
| 407 | + } | |
| 408 | + | |
| 409 | + TemplateList templates; | |
| 410 | + // OK we read the data in some form, does the gallery type containing matrices? | |
| 411 | + if ((QStringList() << "gal" << "mem" << "template").contains(file.suffix())) { | |
| 412 | + // Retrieve it block by block, dropping matrices from read templates. | |
| 413 | + QScopedPointer<Gallery> gallery(Gallery::make(file)); | |
| 414 | + gallery->set_readBlockSize(10); | |
| 415 | + bool done = false; | |
| 416 | + while (!done) | |
| 417 | + { | |
| 418 | + TemplateList tList = gallery->readBlock(&done); | |
| 419 | + for (int i=0; i < tList.size();i++) | |
| 420 | + { | |
| 421 | + tList[i].clear(); | |
| 422 | + templates.append(tList[i].file); | |
| 423 | + } | |
| 424 | + } | |
| 425 | + } | |
| 426 | + else { | |
| 427 | + // this is a gallery format that doesn't include matrices, so we can just read it | |
| 428 | + QScopedPointer<Gallery> gallery(Gallery::make(file)); | |
| 429 | + templates= gallery->read(); | |
| 430 | + } | |
| 431 | + | |
| 432 | + if (cache) | |
| 433 | + { | |
| 434 | + QScopedPointer<Gallery> memOutput(Gallery::make(targetMeta)); | |
| 435 | + memOutput->writeBlock(templates); | |
| 436 | + } | |
| 437 | + fileData = templates.files(); | |
| 438 | + return fileData; | |
| 439 | +} | |
| 440 | + | |
| 396 | 441 | /*! |
| 397 | 442 | * \ingroup galleries |
| 398 | 443 | * \brief Treats each line as a file. | ... | ... |
openbr/plugins/misc.cpp
| ... | ... | @@ -636,11 +636,8 @@ class OutputTransform : public TimeVaryingTransform |
| 636 | 636 | if (targetName.isEmpty() || queryName.isEmpty() || outputString.isEmpty()) |
| 637 | 637 | return; |
| 638 | 638 | |
| 639 | - QScopedPointer<Gallery> tGallery(Gallery::make(targetName)); | |
| 640 | - QScopedPointer<Gallery> qGallery(Gallery::make(queryName)); | |
| 641 | - | |
| 642 | - FileList targetFiles = tGallery->files(); | |
| 643 | - FileList queryFiles = qGallery->files(); | |
| 639 | + FileList targetFiles = FileList::fromGallery(targetName); | |
| 640 | + FileList queryFiles = FileList::fromGallery(queryName); | |
| 644 | 641 | |
| 645 | 642 | currentBlockRow = 0; |
| 646 | 643 | currentBlockCol = 0; | ... | ... |