Commit 0fc6c503df0e3c8b4de9d4f1e85de2edc87ee1c1

Authored by Charles Otto
1 parent 14026845

Add support for multi-threaded execution of time invariant stages of a stream

Showing 1 changed file with 142 additions and 30 deletions
sdk/plugins/stream.cpp
@@ -2,6 +2,8 @@ @@ -2,6 +2,8 @@
2 #include <QReadWriteLock> 2 #include <QReadWriteLock>
3 #include <QWaitCondition> 3 #include <QWaitCondition>
4 #include <QThreadPool> 4 #include <QThreadPool>
  5 +#include <QSemaphore>
  6 +#include <QMap>
5 7
6 #include "core/common.h" 8 #include "core/common.h"
7 #include "core/opencvutils.h" 9 #include "core/opencvutils.h"
@@ -38,7 +40,7 @@ public: @@ -38,7 +40,7 @@ public:
38 virtual void startInput() = 0; 40 virtual void startInput() = 0;
39 }; 41 };
40 42
41 -// For 1 - 1 boundaries, a buffer class with a single shared buffer, a mutex 43 +// For 1 - n boundaries, a buffer class with a single shared buffer, a mutex
42 // is used to serialize all access to the buffer. 44 // is used to serialize all access to the buffer.
43 class SingleBuffer : public SharedBuffer 45 class SingleBuffer : public SharedBuffer
44 { 46 {
@@ -98,6 +100,85 @@ private: @@ -98,6 +100,85 @@ private:
98 QList<FrameData *> buffer; 100 QList<FrameData *> buffer;
99 }; 101 };
100 102
  103 +// for n - 1 boundaries, multiple threads call addItem, the frames are
  104 +// sequenced based on FrameData::sequence_number, and calls to getItem
  105 +// receive them in that order
  106 +class SequencingBuffer : public SharedBuffer
  107 +{
  108 +public:
  109 + SequencingBuffer()
  110 + {
  111 + no_input = false;
  112 + next_target = 0;
  113 + }
  114 +
  115 + void stoppedInput()
  116 + {
  117 + QMutexLocker bufferLock(&bufferGuard);
  118 + no_input = true;
  119 + // Release anything waiting for input items.
  120 + availableInput.wakeAll();
  121 + }
  122 +
  123 + // There will be more input
  124 + void startInput()
  125 + {
  126 + QMutexLocker bufferLock(&bufferGuard);
  127 + no_input = false;
  128 + }
  129 +
  130 + void addItem(FrameData * input)
  131 + {
  132 + QMutexLocker bufferLock(&bufferGuard);
  133 +
  134 + buffer.insert(input->sequenceNumber, input);
  135 +
  136 + if (input->sequenceNumber == next_target) {
  137 + availableInput.wakeOne();
  138 + }
  139 + }
  140 +
  141 + FrameData * getItem()
  142 + {
  143 + QMutexLocker bufferLock(&bufferGuard);
  144 +
  145 + if (buffer.empty() || buffer.begin().key() != this->next_target) {
  146 + if (buffer.empty() && no_input) {
  147 + next_target = 0;
  148 + return NULL;
  149 + }
  150 + availableInput.wait(&bufferGuard);
  151 + }
  152 +
  153 + // availableInput was signalled, but the buffer is empty? We're done here.
  154 + if (buffer.empty()) {
  155 + next_target = 0;
  156 + return NULL;
  157 + }
  158 +
  159 + QMap<int, FrameData *>::Iterator result = buffer.begin();
  160 + //next_target++;
  161 + if (next_target != result.value()->sequenceNumber) {
  162 + qWarning("mismatched targets!");
  163 + }
  164 +
  165 + next_target = next_target + 1;
  166 +
  167 + FrameData * output = result.value();
  168 + buffer.erase(result);
  169 + return output;
  170 + }
  171 +
  172 +private:
  173 + QMutex bufferGuard;
  174 + QWaitCondition availableInput;
  175 + bool no_input;
  176 +
  177 + int next_target;
  178 +
  179 + QMap<int, FrameData *> buffer;
  180 +};
  181 +
101 // For 1 - 1 boundaries, a double buffering scheme 182 // For 1 - 1 boundaries, a double buffering scheme
102 // Producer/consumer read/write from separate buffers, and switch if their 183 // Producer/consumer read/write from separate buffers, and switch if their
103 // buffer runs out/overflows. Synchronization is handled by a read/write lock 184 // buffer runs out/overflows. Synchronization is handled by a read/write lock
@@ -372,7 +453,6 @@ public: @@ -372,7 +453,6 @@ public:
372 if (input.empty()) { 453 if (input.empty()) {
373 actualSource = new VideoDataSource(0); 454 actualSource = new VideoDataSource(0);
374 open_res = actualSource->open(input); 455 open_res = actualSource->open(input);
375 - qDebug("created video resource status %d", open_res);  
376 } 456 }
377 else { 457 else {
378 // create frame dealer 458 // create frame dealer
@@ -402,37 +482,32 @@ class ProcessingStage : public QRunnable @@ -402,37 +482,32 @@ class ProcessingStage : public QRunnable
402 { 482 {
403 friend class StreamTransform; 483 friend class StreamTransform;
404 public: 484 public:
405 - ProcessingStage() 485 + ProcessingStage(int nThreads = 1)
406 { 486 {
  487 + thread_count = nThreads;
  488 + activeThreads.release(thread_count);
407 setAutoDelete(false); 489 setAutoDelete(false);
408 } 490 }
409 491
410 void markStart() 492 void markStart()
411 { 493 {
412 - QMutexLocker lock(&stoppedGuard);  
413 - stopped = false; 494 + activeThreads.acquire();
414 } 495 }
415 496
416 void waitStop() 497 void waitStop()
417 { 498 {
418 - stoppedGuard.lock();  
419 - while (!stopped)  
420 - {  
421 - waitStopped.wait(&stoppedGuard);  
422 - }  
423 - stoppedGuard.unlock(); 499 + // Wait until all threads have stopped
  500 + activeThreads.acquire(thread_count);
  501 + activeThreads.release(thread_count);
424 } 502 }
425 503
426 protected: 504 protected:
427 void markStop() 505 void markStop()
428 { 506 {
429 - QMutexLocker lock(&stoppedGuard);  
430 - stopped = true;  
431 - this->waitStopped.wakeAll(); 507 + activeThreads.release();
432 } 508 }
433 - QMutex stoppedGuard;  
434 - QWaitCondition waitStopped;  
435 - bool stopped; 509 + QSemaphore activeThreads;
  510 + int thread_count;
436 511
437 SharedBuffer * inputBuffer; 512 SharedBuffer * inputBuffer;
438 SharedBuffer * outputBuffer; 513 SharedBuffer * outputBuffer;
@@ -443,6 +518,7 @@ public: @@ -443,6 +518,7 @@ public:
443 // We should start, and enter a wait on input data 518 // We should start, and enter a wait on input data
444 void run() 519 void run()
445 { 520 {
  521 + markStart();
446 forever 522 forever
447 { 523 {
448 FrameData * currentItem = inputBuffer->getItem(); 524 FrameData * currentItem = inputBuffer->getItem();
@@ -455,9 +531,9 @@ public: @@ -455,9 +531,9 @@ public:
455 } 531 }
456 markStop(); 532 markStop();
457 } 533 }
458 -  
459 }; 534 };
460 535
  536 +
461 // No input buffer, instead we draw templates from some data source 537 // No input buffer, instead we draw templates from some data source
462 // Will be operated by the main thread for the stream 538 // Will be operated by the main thread for the stream
463 class FirstStage : public ProcessingStage 539 class FirstStage : public ProcessingStage
@@ -511,6 +587,7 @@ public: @@ -511,6 +587,7 @@ public:
511 class StreamTransform : public CompositeTransform 587 class StreamTransform : public CompositeTransform
512 { 588 {
513 Q_OBJECT 589 Q_OBJECT
  590 + int threads_per_multi_stage;
514 public: 591 public:
515 void train(const TemplateList & data) 592 void train(const TemplateList & data)
516 { 593 {
@@ -558,12 +635,11 @@ public: @@ -558,12 +635,11 @@ public:
558 635
559 // Start our processing stages 636 // Start our processing stages
560 for (int i=0; i < this->processingStages.size(); i++) { 637 for (int i=0; i < this->processingStages.size(); i++) {
561 - processingStages[i]->markStart();  
562 - processingThreads.start(processingStages[i]); 638 + int count = stage_variance[i] ? 1 : threads_per_multi_stage;
  639 + for (int j =0; j < count; j ++) processingThreads.start(processingStages[i]);
563 } 640 }
564 641
565 // Start the final stage 642 // Start the final stage
566 - collectionStage.markStart();  
567 processingThreads.start(&collectionStage); 643 processingThreads.start(&collectionStage);
568 644
569 // Run the read stage ourselves 645 // Run the read stage ourselves
@@ -597,18 +673,28 @@ public: @@ -597,18 +673,28 @@ public:
597 // Create and link stages 673 // Create and link stages
598 void init() 674 void init()
599 { 675 {
600 - // Set up the thread pool, 1 stage for each transform, as well as first  
601 - // and last stages, but the first stage is operated by the thread that  
602 - // calls project so the pool only needs nTransforms+1 total.  
603 - processingThreads.setMaxThreadCount(transforms.size() + 1);  
604 - 676 + int thread_count = 0;
  677 + threads_per_multi_stage = 4;
605 stage_variance.reserve(transforms.size()); 678 stage_variance.reserve(transforms.size());
606 foreach (const br::Transform *transform, transforms) { 679 foreach (const br::Transform *transform, transforms) {
607 stage_variance.append(transform->timeVarying()); 680 stage_variance.append(transform->timeVarying());
  681 + thread_count += transform->timeVarying() ? 1 : threads_per_multi_stage;
608 } 682 }
609 683
610 - // buffer 0 -- output buffer for the read stage  
611 - sharedBuffers.append(new DoubleBuffer()); 684 + // Set up the thread pool, 1 stage for each transform, as well as first
  685 + // and last stages, but the first stage is operated by the thread that
  686 + // calls project so the pool only needs nTransforms+1 total.
  687 + processingThreads.setMaxThreadCount(thread_count + 1);
  688 +
  689 +
  690 + // buffer 0 -- output buffer for the read stage, input buffer for
  691 + // first transform. Is that transform time-varying?
  692 + if (stage_variance[0])
  693 + sharedBuffers.append(new DoubleBuffer());
  694 + // If not, we can run multiple threads
  695 + else
  696 + sharedBuffers.append(new SingleBuffer());
  697 +
612 readStage.outputBuffer = sharedBuffers.last(); 698 readStage.outputBuffer = sharedBuffers.last();
613 readStage.stage_id = 0; 699 readStage.stage_id = 0;
614 700
@@ -618,13 +704,39 @@ public: @@ -618,13 +704,39 @@ public:
618 for (int i =0; i < transforms.size(); i++) 704 for (int i =0; i < transforms.size(); i++)
619 { 705 {
620 // Set up this stage 706 // Set up this stage
621 - processingStages.append(new ProcessingStage()); 707 + processingStages.append(new ProcessingStage(stage_variance[i] ? 1 : threads_per_multi_stage));
622 708
623 processingStages.last()->stage_id = next_stage_id++; 709 processingStages.last()->stage_id = next_stage_id++;
624 processingStages.last()->inputBuffer = sharedBuffers[lastBufferIdx]; 710 processingStages.last()->inputBuffer = sharedBuffers[lastBufferIdx];
625 lastBufferIdx++; 711 lastBufferIdx++;
626 712
627 - sharedBuffers.append(new DoubleBuffer()); 713 + // This stage's output buffer, next stage's input buffer. If this is
  714 + // the last transform, the next stage is the (time varying) collection
  715 + // stage
  716 + bool next_variance = (i+1) < transforms.size() ? stage_variance[i+1] : true;
  717 + bool current_variance = stage_variance[i];
  718 + // if this is a single threaded stage
  719 + if (current_variance)
  720 + {
  721 + // 1 - 1 case
  722 + if (next_variance)
  723 + sharedBuffers.append(new DoubleBuffer());
  724 + // 1 - n case
  725 + else
  726 + sharedBuffers.append(new SingleBuffer());
  727 + }
  728 + // This is a multi-threaded stage
  729 + else
  730 + {
  731 + // If the next stage is single threaded, we need to sequence our
  732 + // output (n - 1 case)
  733 + if (next_variance)
  734 + sharedBuffers.append(new SequencingBuffer());
  735 + // Otherwise, this is an n-n boundary and we don't need to
  736 + // adhere to any particular sequence
  737 + else
  738 + sharedBuffers.append(new SingleBuffer());
  739 + }
628 processingStages.last()->outputBuffer = sharedBuffers.last(); 740 processingStages.last()->outputBuffer = sharedBuffers.last();
629 processingStages.last()->transform = transforms[i]; 741 processingStages.last()->transform = transforms[i];
630 } 742 }