Commit 0fc6c503df0e3c8b4de9d4f1e85de2edc87ee1c1
1 parent
14026845
Add support for multi-threaded execution of time invariant stages of a stream
Showing
1 changed file
with
142 additions
and
30 deletions
sdk/plugins/stream.cpp
| @@ -2,6 +2,8 @@ | @@ -2,6 +2,8 @@ | ||
| 2 | #include <QReadWriteLock> | 2 | #include <QReadWriteLock> |
| 3 | #include <QWaitCondition> | 3 | #include <QWaitCondition> |
| 4 | #include <QThreadPool> | 4 | #include <QThreadPool> |
| 5 | +#include <QSemaphore> | ||
| 6 | +#include <QMap> | ||
| 5 | 7 | ||
| 6 | #include "core/common.h" | 8 | #include "core/common.h" |
| 7 | #include "core/opencvutils.h" | 9 | #include "core/opencvutils.h" |
| @@ -38,7 +40,7 @@ public: | @@ -38,7 +40,7 @@ public: | ||
| 38 | virtual void startInput() = 0; | 40 | virtual void startInput() = 0; |
| 39 | }; | 41 | }; |
| 40 | 42 | ||
| 41 | -// For 1 - 1 boundaries, a buffer class with a single shared buffer, a mutex | 43 | +// For 1 - n boundaries, a buffer class with a single shared buffer, a mutex |
| 42 | // is used to serialize all access to the buffer. | 44 | // is used to serialize all access to the buffer. |
| 43 | class SingleBuffer : public SharedBuffer | 45 | class SingleBuffer : public SharedBuffer |
| 44 | { | 46 | { |
| @@ -98,6 +100,85 @@ private: | @@ -98,6 +100,85 @@ private: | ||
| 98 | QList<FrameData *> buffer; | 100 | QList<FrameData *> buffer; |
| 99 | }; | 101 | }; |
| 100 | 102 | ||
| 103 | +// for n - 1 boundaries, multiple threads call addItem, the frames are | ||
| 104 | +// sequenced based on FrameData::sequence_number, and calls to getItem | ||
| 105 | +// receive them in that order | ||
| 106 | +class SequencingBuffer : public SharedBuffer | ||
| 107 | +{ | ||
| 108 | +public: | ||
| 109 | + SequencingBuffer() | ||
| 110 | + { | ||
| 111 | + no_input = false; | ||
| 112 | + next_target = 0; | ||
| 113 | + } | ||
| 114 | + | ||
| 115 | + void stoppedInput() | ||
| 116 | + { | ||
| 117 | + QMutexLocker bufferLock(&bufferGuard); | ||
| 118 | + no_input = true; | ||
| 119 | + // Release anything waiting for input items. | ||
| 120 | + availableInput.wakeAll(); | ||
| 121 | + } | ||
| 122 | + | ||
| 123 | + // There will be more input | ||
| 124 | + void startInput() | ||
| 125 | + { | ||
| 126 | + QMutexLocker bufferLock(&bufferGuard); | ||
| 127 | + no_input = false; | ||
| 128 | + } | ||
| 129 | + | ||
| 130 | + void addItem(FrameData * input) | ||
| 131 | + { | ||
| 132 | + QMutexLocker bufferLock(&bufferGuard); | ||
| 133 | + | ||
| 134 | + buffer.insert(input->sequenceNumber, input); | ||
| 135 | + | ||
| 136 | + if (input->sequenceNumber == next_target) { | ||
| 137 | + availableInput.wakeOne(); | ||
| 138 | + } | ||
| 139 | + } | ||
| 140 | + | ||
| 141 | + FrameData * getItem() | ||
| 142 | + { | ||
| 143 | + QMutexLocker bufferLock(&bufferGuard); | ||
| 144 | + | ||
| 145 | + if (buffer.empty() || buffer.begin().key() != this->next_target) { | ||
| 146 | + if (buffer.empty() && no_input) { | ||
| 147 | + next_target = 0; | ||
| 148 | + return NULL; | ||
| 149 | + } | ||
| 150 | + availableInput.wait(&bufferGuard); | ||
| 151 | + } | ||
| 152 | + | ||
| 153 | + // availableInput was signalled, but the buffer is empty? We're done here. | ||
| 154 | + if (buffer.empty()) { | ||
| 155 | + next_target = 0; | ||
| 156 | + return NULL; | ||
| 157 | + } | ||
| 158 | + | ||
| 159 | + QMap<int, FrameData *>::Iterator result = buffer.begin(); | ||
| 160 | + //next_target++; | ||
| 161 | + if (next_target != result.value()->sequenceNumber) { | ||
| 162 | + qWarning("mismatched targets!"); | ||
| 163 | + } | ||
| 164 | + | ||
| 165 | + next_target = next_target + 1; | ||
| 166 | + | ||
| 167 | + FrameData * output = result.value(); | ||
| 168 | + buffer.erase(result); | ||
| 169 | + return output; | ||
| 170 | + } | ||
| 171 | + | ||
| 172 | +private: | ||
| 173 | + QMutex bufferGuard; | ||
| 174 | + QWaitCondition availableInput; | ||
| 175 | + bool no_input; | ||
| 176 | + | ||
| 177 | + int next_target; | ||
| 178 | + | ||
| 179 | + QMap<int, FrameData *> buffer; | ||
| 180 | +}; | ||
| 181 | + | ||
| 101 | // For 1 - 1 boundaries, a double buffering scheme | 182 | // For 1 - 1 boundaries, a double buffering scheme |
| 102 | // Producer/consumer read/write from separate buffers, and switch if their | 183 | // Producer/consumer read/write from separate buffers, and switch if their |
| 103 | // buffer runs out/overflows. Synchronization is handled by a read/write lock | 184 | // buffer runs out/overflows. Synchronization is handled by a read/write lock |
| @@ -372,7 +453,6 @@ public: | @@ -372,7 +453,6 @@ public: | ||
| 372 | if (input.empty()) { | 453 | if (input.empty()) { |
| 373 | actualSource = new VideoDataSource(0); | 454 | actualSource = new VideoDataSource(0); |
| 374 | open_res = actualSource->open(input); | 455 | open_res = actualSource->open(input); |
| 375 | - qDebug("created video resource status %d", open_res); | ||
| 376 | } | 456 | } |
| 377 | else { | 457 | else { |
| 378 | // create frame dealer | 458 | // create frame dealer |
| @@ -402,37 +482,32 @@ class ProcessingStage : public QRunnable | @@ -402,37 +482,32 @@ class ProcessingStage : public QRunnable | ||
| 402 | { | 482 | { |
| 403 | friend class StreamTransform; | 483 | friend class StreamTransform; |
| 404 | public: | 484 | public: |
| 405 | - ProcessingStage() | 485 | + ProcessingStage(int nThreads = 1) |
| 406 | { | 486 | { |
| 487 | + thread_count = nThreads; | ||
| 488 | + activeThreads.release(thread_count); | ||
| 407 | setAutoDelete(false); | 489 | setAutoDelete(false); |
| 408 | } | 490 | } |
| 409 | 491 | ||
| 410 | void markStart() | 492 | void markStart() |
| 411 | { | 493 | { |
| 412 | - QMutexLocker lock(&stoppedGuard); | ||
| 413 | - stopped = false; | 494 | + activeThreads.acquire(); |
| 414 | } | 495 | } |
| 415 | 496 | ||
| 416 | void waitStop() | 497 | void waitStop() |
| 417 | { | 498 | { |
| 418 | - stoppedGuard.lock(); | ||
| 419 | - while (!stopped) | ||
| 420 | - { | ||
| 421 | - waitStopped.wait(&stoppedGuard); | ||
| 422 | - } | ||
| 423 | - stoppedGuard.unlock(); | 499 | + // Wait until all threads have stopped |
| 500 | + activeThreads.acquire(thread_count); | ||
| 501 | + activeThreads.release(thread_count); | ||
| 424 | } | 502 | } |
| 425 | 503 | ||
| 426 | protected: | 504 | protected: |
| 427 | void markStop() | 505 | void markStop() |
| 428 | { | 506 | { |
| 429 | - QMutexLocker lock(&stoppedGuard); | ||
| 430 | - stopped = true; | ||
| 431 | - this->waitStopped.wakeAll(); | 507 | + activeThreads.release(); |
| 432 | } | 508 | } |
| 433 | - QMutex stoppedGuard; | ||
| 434 | - QWaitCondition waitStopped; | ||
| 435 | - bool stopped; | 509 | + QSemaphore activeThreads; |
| 510 | + int thread_count; | ||
| 436 | 511 | ||
| 437 | SharedBuffer * inputBuffer; | 512 | SharedBuffer * inputBuffer; |
| 438 | SharedBuffer * outputBuffer; | 513 | SharedBuffer * outputBuffer; |
| @@ -443,6 +518,7 @@ public: | @@ -443,6 +518,7 @@ public: | ||
| 443 | // We should start, and enter a wait on input data | 518 | // We should start, and enter a wait on input data |
| 444 | void run() | 519 | void run() |
| 445 | { | 520 | { |
| 521 | + markStart(); | ||
| 446 | forever | 522 | forever |
| 447 | { | 523 | { |
| 448 | FrameData * currentItem = inputBuffer->getItem(); | 524 | FrameData * currentItem = inputBuffer->getItem(); |
| @@ -455,9 +531,9 @@ public: | @@ -455,9 +531,9 @@ public: | ||
| 455 | } | 531 | } |
| 456 | markStop(); | 532 | markStop(); |
| 457 | } | 533 | } |
| 458 | - | ||
| 459 | }; | 534 | }; |
| 460 | 535 | ||
| 536 | + | ||
| 461 | // No input buffer, instead we draw templates from some data source | 537 | // No input buffer, instead we draw templates from some data source |
| 462 | // Will be operated by the main thread for the stream | 538 | // Will be operated by the main thread for the stream |
| 463 | class FirstStage : public ProcessingStage | 539 | class FirstStage : public ProcessingStage |
| @@ -511,6 +587,7 @@ public: | @@ -511,6 +587,7 @@ public: | ||
| 511 | class StreamTransform : public CompositeTransform | 587 | class StreamTransform : public CompositeTransform |
| 512 | { | 588 | { |
| 513 | Q_OBJECT | 589 | Q_OBJECT |
| 590 | + int threads_per_multi_stage; | ||
| 514 | public: | 591 | public: |
| 515 | void train(const TemplateList & data) | 592 | void train(const TemplateList & data) |
| 516 | { | 593 | { |
| @@ -558,12 +635,11 @@ public: | @@ -558,12 +635,11 @@ public: | ||
| 558 | 635 | ||
| 559 | // Start our processing stages | 636 | // Start our processing stages |
| 560 | for (int i=0; i < this->processingStages.size(); i++) { | 637 | for (int i=0; i < this->processingStages.size(); i++) { |
| 561 | - processingStages[i]->markStart(); | ||
| 562 | - processingThreads.start(processingStages[i]); | 638 | + int count = stage_variance[i] ? 1 : threads_per_multi_stage; |
| 639 | + for (int j =0; j < count; j ++) processingThreads.start(processingStages[i]); | ||
| 563 | } | 640 | } |
| 564 | 641 | ||
| 565 | // Start the final stage | 642 | // Start the final stage |
| 566 | - collectionStage.markStart(); | ||
| 567 | processingThreads.start(&collectionStage); | 643 | processingThreads.start(&collectionStage); |
| 568 | 644 | ||
| 569 | // Run the read stage ourselves | 645 | // Run the read stage ourselves |
| @@ -597,18 +673,28 @@ public: | @@ -597,18 +673,28 @@ public: | ||
| 597 | // Create and link stages | 673 | // Create and link stages |
| 598 | void init() | 674 | void init() |
| 599 | { | 675 | { |
| 600 | - // Set up the thread pool, 1 stage for each transform, as well as first | ||
| 601 | - // and last stages, but the first stage is operated by the thread that | ||
| 602 | - // calls project so the pool only needs nTransforms+1 total. | ||
| 603 | - processingThreads.setMaxThreadCount(transforms.size() + 1); | ||
| 604 | - | 676 | + int thread_count = 0; |
| 677 | + threads_per_multi_stage = 4; | ||
| 605 | stage_variance.reserve(transforms.size()); | 678 | stage_variance.reserve(transforms.size()); |
| 606 | foreach (const br::Transform *transform, transforms) { | 679 | foreach (const br::Transform *transform, transforms) { |
| 607 | stage_variance.append(transform->timeVarying()); | 680 | stage_variance.append(transform->timeVarying()); |
| 681 | + thread_count += transform->timeVarying() ? 1 : threads_per_multi_stage; | ||
| 608 | } | 682 | } |
| 609 | 683 | ||
| 610 | - // buffer 0 -- output buffer for the read stage | ||
| 611 | - sharedBuffers.append(new DoubleBuffer()); | 684 | + // Set up the thread pool, 1 stage for each transform, as well as first |
| 685 | + // and last stages, but the first stage is operated by the thread that | ||
| 686 | + // calls project so the pool only needs nTransforms+1 total. | ||
| 687 | + processingThreads.setMaxThreadCount(thread_count + 1); | ||
| 688 | + | ||
| 689 | + | ||
| 690 | + // buffer 0 -- output buffer for the read stage, input buffer for | ||
| 691 | + // first transform. Is that transform time-varying? | ||
| 692 | + if (stage_variance[0]) | ||
| 693 | + sharedBuffers.append(new DoubleBuffer()); | ||
| 694 | + // If not, we can run multiple threads | ||
| 695 | + else | ||
| 696 | + sharedBuffers.append(new SingleBuffer()); | ||
| 697 | + | ||
| 612 | readStage.outputBuffer = sharedBuffers.last(); | 698 | readStage.outputBuffer = sharedBuffers.last(); |
| 613 | readStage.stage_id = 0; | 699 | readStage.stage_id = 0; |
| 614 | 700 | ||
| @@ -618,13 +704,39 @@ public: | @@ -618,13 +704,39 @@ public: | ||
| 618 | for (int i =0; i < transforms.size(); i++) | 704 | for (int i =0; i < transforms.size(); i++) |
| 619 | { | 705 | { |
| 620 | // Set up this stage | 706 | // Set up this stage |
| 621 | - processingStages.append(new ProcessingStage()); | 707 | + processingStages.append(new ProcessingStage(stage_variance[i] ? 1 : threads_per_multi_stage)); |
| 622 | 708 | ||
| 623 | processingStages.last()->stage_id = next_stage_id++; | 709 | processingStages.last()->stage_id = next_stage_id++; |
| 624 | processingStages.last()->inputBuffer = sharedBuffers[lastBufferIdx]; | 710 | processingStages.last()->inputBuffer = sharedBuffers[lastBufferIdx]; |
| 625 | lastBufferIdx++; | 711 | lastBufferIdx++; |
| 626 | 712 | ||
| 627 | - sharedBuffers.append(new DoubleBuffer()); | 713 | + // This stage's output buffer, next stage's input buffer. If this is |
| 714 | + // the last transform, the next stage is the (time varying) collection | ||
| 715 | + // stage | ||
| 716 | + bool next_variance = (i+1) < transforms.size() ? stage_variance[i+1] : true; | ||
| 717 | + bool current_variance = stage_variance[i]; | ||
| 718 | + // if this is a single threaded stage | ||
| 719 | + if (current_variance) | ||
| 720 | + { | ||
| 721 | + // 1 - 1 case | ||
| 722 | + if (next_variance) | ||
| 723 | + sharedBuffers.append(new DoubleBuffer()); | ||
| 724 | + // 1 - n case | ||
| 725 | + else | ||
| 726 | + sharedBuffers.append(new SingleBuffer()); | ||
| 727 | + } | ||
| 728 | + // This is a multi-threaded stage | ||
| 729 | + else | ||
| 730 | + { | ||
| 731 | + // If the next stage is single threaded, we need to sequence our | ||
| 732 | + // output (n - 1 case) | ||
| 733 | + if (next_variance) | ||
| 734 | + sharedBuffers.append(new SequencingBuffer()); | ||
| 735 | + // Otherwise, this is an n-n boundary and we don't need to | ||
| 736 | + // adhere to any particular sequence | ||
| 737 | + else | ||
| 738 | + sharedBuffers.append(new SingleBuffer()); | ||
| 739 | + } | ||
| 628 | processingStages.last()->outputBuffer = sharedBuffers.last(); | 740 | processingStages.last()->outputBuffer = sharedBuffers.last(); |
| 629 | processingStages.last()->transform = transforms[i]; | 741 | processingStages.last()->transform = transforms[i]; |
| 630 | } | 742 | } |