Commit 0fd33775723c650e7ff7105aec0b2536dcb3a49d

Authored by Scott Klum
2 parents 170384ce cf6ae36b

Merge branch 'cascade'

Showing 1 changed file with 254 additions and 251 deletions
openbr/core/boost.cpp
1 1 #include <opencv2/imgproc/imgproc.hpp>
  2 +#include <openbr/core/opencvutils.h>
  3 +#include <QDebug>
2 4  
3 5 #include "boost.h"
4 6 #include "cxmisc.h"
... ... @@ -131,10 +133,10 @@ static CvMat* cvPreprocessIndexArray( const CvMat* idx_arr, int data_arr_size, b
131 133  
132 134 //------------------------------------- FeatureEvaluator ---------------------------------------
133 135  
134   -void FeatureEvaluator::init(Representation *_representation, int _maxSampleCount)
  136 +void FeatureEvaluator::init(Representation *_representation, int maxSampleCount)
135 137 {
136 138 representation = _representation;
137   - cls.create( (int)_maxSampleCount, 1, CV_32FC1 );
  139 + cls.create(maxSampleCount, 1, CV_32FC1);
138 140 }
139 141  
140 142 void FeatureEvaluator::setImage(const Template &src, uchar clsLabel, int idx)
... ... @@ -173,10 +175,13 @@ struct CascadeBoostTrainData : CvDTreeTrainData
173 175 virtual void setData(const FeatureEvaluator* _featureEvaluator,
174 176 int _numSamples, int _precalcValBufSize, int _precalcIdxBufSize,
175 177 const CvDTreeParams& _params=CvDTreeParams());
  178 +
  179 + void initVarType();
176 180 void precalculate();
177 181  
178 182 virtual CvDTreeNode* subsample_data(const CvMat* _subsample_idx);
179 183  
  184 + virtual const int* getBufferValues(CvDTreeNode* n, int* labelsBuf, uint64_t vi);
180 185 virtual const int* get_class_labels(CvDTreeNode* n, int* labelsBuf);
181 186 virtual const int* get_cv_labels(CvDTreeNode* n, int* labelsBuf);
182 187 virtual const int* get_sample_indices(CvDTreeNode* n, int* indicesBuf);
... ... @@ -188,8 +193,10 @@ struct CascadeBoostTrainData : CvDTreeTrainData
188 193 virtual void free_train_data();
189 194  
190 195 const FeatureEvaluator* featureEvaluator;
  196 +
191 197 cv::Mat valCache; // precalculated feature values (CV_32FC1)
192 198 CvMat _resp; // for casting
  199 +
193 200 int numPrecalcVal, numPrecalcIdx, channels;
194 201 };
195 202  
... ... @@ -370,6 +377,19 @@ CascadeBoostTrainData::CascadeBoostTrainData(const FeatureEvaluator* _featureEva
370 377 shared = true;
371 378 set_params( _params );
372 379 max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
  380 +
  381 + initVarType();
  382 +
  383 + int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) + (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
  384 + int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
  385 + treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
  386 + tree_storage = cvCreateMemStorage( treeBlockSize );
  387 + node_heap = cvCreateSet( 0, sizeof(node_heap[0]), sizeof(CvDTreeNode), tree_storage );
  388 + split_heap = cvCreateSet( 0, sizeof(split_heap[0]), maxSplitSize, tree_storage );
  389 +}
  390 +
  391 +void CascadeBoostTrainData::initVarType()
  392 +{
373 393 var_type = cvCreateMat( 1, var_count + 2, CV_32SC(channels) );
374 394 if ( featureEvaluator->getMaxCatCount() > 0 )
375 395 {
... ... @@ -392,13 +412,6 @@ CascadeBoostTrainData::CascadeBoostTrainData(const FeatureEvaluator* _featureEva
392 412 }
393 413 var_type->data.i[var_count] = cat_var_count;
394 414 var_type->data.i[var_count+1] = cat_var_count+1;
395   -
396   - int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) + (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
397   - int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
398   - treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
399   - tree_storage = cvCreateMemStorage( treeBlockSize );
400   - node_heap = cvCreateSet( 0, sizeof(node_heap[0]), sizeof(CvDTreeNode), tree_storage );
401   - split_heap = cvCreateSet( 0, sizeof(split_heap[0]), maxSplitSize, tree_storage );
402 415 }
403 416  
404 417 CascadeBoostTrainData::CascadeBoostTrainData(const FeatureEvaluator* _featureEvaluator,
... ... @@ -449,14 +462,20 @@ void CascadeBoostTrainData::setData( const FeatureEvaluator* _featureEvaluator,
449 462 if (sample_count < 65536)
450 463 is_buf_16u = true;
451 464  
  465 + // 1048576 is the number of bytes in a megabyte
452 466 numPrecalcVal = min( cvRound((double)_precalcValBufSize*1048576. / (sizeof(float)*sample_count)), var_count );
453   - numPrecalcIdx = min( cvRound((double)_precalcIdxBufSize*1048576. /
454   - ((is_buf_16u ? sizeof(unsigned short) : sizeof (int))*sample_count)), var_count );
  467 + qDebug("MB required to cache all %d features: %d", var_count, (sizeof(float)*(uint64_t)sample_count*var_count)/1048576);
  468 + qDebug("Features cached: %.2f", float(numPrecalcVal)/var_count);
  469 +
  470 + numPrecalcIdx = min( cvRound((double)_precalcIdxBufSize*1048576. / ((is_buf_16u ? sizeof(unsigned short) : sizeof (int))*sample_count)), var_count );
  471 + qDebug("MB required to cache all %d sorted indices: %d", var_count, ((is_buf_16u ? sizeof(unsigned short) : sizeof (int))*(uint64_t)sample_count*var_count)/1048576);
  472 + qDebug("Indices cached: %.2f", float(numPrecalcIdx)/var_count);
455 473  
456 474 assert( numPrecalcIdx >= 0 && numPrecalcVal >= 0 );
457 475  
458 476 valCache.create( numPrecalcVal, sample_count, CV_32FC1 );
459 477 var_type = cvCreateMat( 1, var_count + 2, CV_32SC(channels) );
  478 +
460 479 if ( featureEvaluator->getMaxCatCount() > 0 )
461 480 {
462 481 numPrecalcIdx = 0;
... ... @@ -478,15 +497,18 @@ void CascadeBoostTrainData::setData( const FeatureEvaluator* _featureEvaluator,
478 497 }
479 498 var_type->data.i[var_count] = cat_var_count;
480 499 var_type->data.i[var_count+1] = cat_var_count+1;
  500 +
  501 + initVarType();
  502 +
481 503 work_var_count = ( cat_var_count ? 0 : numPrecalcIdx ) + 1/*cv_lables*/;
482 504 buf_count = 2;
483 505  
484 506 buf_size = -1; // the member buf_size is obsolete
485 507  
486 508 effective_buf_size = (uint64)(work_var_count + 1)*(uint64)sample_count * buf_count; // this is the total size of "CvMat buf" to be allocated
  509 +
487 510 effective_buf_width = sample_count;
488 511 effective_buf_height = work_var_count+1;
489   -
490 512 if (effective_buf_width >= effective_buf_height)
491 513 effective_buf_height *= buf_count;
492 514 else
... ... @@ -509,8 +531,7 @@ void CascadeBoostTrainData::setData( const FeatureEvaluator* _featureEvaluator,
509 531 // now calculate the maximum size of split,
510 532 // create memory storage that will keep nodes and splits of the decision tree
511 533 // allocate root node and the buffer for the whole training data
512   - int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
513   - (MAX(0,sample_count - 33)/32)*sizeof(int),sizeof(void*));
  534 + int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) + (MAX(0,sample_count - 33)/32)*sizeof(int),sizeof(void*));
514 535 int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
515 536 treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
516 537 tree_storage = cvCreateMemStorage( treeBlockSize );
... ... @@ -527,9 +548,9 @@ void CascadeBoostTrainData::setData( const FeatureEvaluator* _featureEvaluator,
527 548  
528 549 // set sample labels
529 550 if (is_buf_16u)
530   - udst = (unsigned short*)(buf->data.s + work_var_count*sample_count);
  551 + udst = (unsigned short*)(buf->data.s + (uint64)work_var_count*sample_count);
531 552 else
532   - idst = buf->data.i + work_var_count*sample_count;
  553 + idst = buf->data.i + (uint64)work_var_count*sample_count;
533 554  
534 555 for (int si = 0; si < sample_count; si++)
535 556 {
... ... @@ -568,7 +589,7 @@ const int* CascadeBoostTrainData::get_class_labels( CvDTreeNode* n, int* labelsB
568 589 int nodeSampleCount = n->sample_count;
569 590 int rStep = CV_IS_MAT_CONT( responses->type ) ? 1 : responses->step / CV_ELEM_SIZE( responses->type );
570 591  
571   - int* sampleIndicesBuf = labelsBuf; //
  592 + int* sampleIndicesBuf = labelsBuf;
572 593 const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
573 594 for( int si = 0; si < nodeSampleCount; si++ )
574 595 {
... ... @@ -578,79 +599,85 @@ const int* CascadeBoostTrainData::get_class_labels( CvDTreeNode* n, int* labelsB
578 599 return labelsBuf;
579 600 }
580 601  
  602 +const int* CascadeBoostTrainData::getBufferValues(CvDTreeNode* n, int* indicesBuf, uint64_t vi)
  603 +{
  604 + const int* cat_values = 0;
  605 + if (!is_buf_16u)
  606 + cat_values = buf->data.i + n->buf_idx*get_length_subbuf() + vi*sample_count + n->offset;
  607 + else {
  608 + const unsigned short* short_values = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() + vi*sample_count + n->offset);
  609 + for (int i = 0; i < n->sample_count; i++)
  610 + indicesBuf[i] = short_values[i];
  611 + cat_values = indicesBuf;
  612 + }
  613 +
  614 + return cat_values;
  615 +}
  616 +
581 617 const int* CascadeBoostTrainData::get_sample_indices( CvDTreeNode* n, int* indicesBuf )
582 618 {
583   - return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count(), indicesBuf );
  619 + return getBufferValues(n,indicesBuf,get_work_var_count());
584 620 }
585 621  
586   -const int* CascadeBoostTrainData::get_cv_labels( CvDTreeNode* n, int* labels_buf )
  622 +const int* CascadeBoostTrainData::get_cv_labels( CvDTreeNode* n, int* indicesBuf )
587 623 {
588   - return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count() - 1, labels_buf );
  624 + return getBufferValues(n,indicesBuf,get_work_var_count()-1);
589 625 }
590 626  
591   -void CascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ordValuesBuf, int* sortedIndicesBuf,
592   - const float** ordValues, const int** sortedIndices, int* sampleIndicesBuf )
  627 +void CascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ordValuesBuf, int* sortedIndicesBuf, const float** ordValues, const int** sortedIndices, int* sampleIndicesBuf )
593 628 {
594 629 int nodeSampleCount = n->sample_count;
  630 +
  631 + // For this node, get our sample indices
595 632 const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
596 633  
597   - if ( vi < numPrecalcIdx )
598   - {
599   - if( !is_buf_16u )
600   - *sortedIndices = buf->data.i + n->buf_idx*get_length_subbuf() + vi*sample_count + n->offset;
601   - else
602   - {
603   - const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() +
604   - vi*sample_count + n->offset );
605   - for( int i = 0; i < nodeSampleCount; i++ )
  634 + // For this feature (this code refers to features as values, hence vi == value index),
  635 + // have we precalculated (presorted) the training samples by their feature response?
  636 + if (vi < numPrecalcIdx) {
  637 + if (!is_buf_16u)
  638 + *sortedIndices = buf->data.i + n->buf_idx*get_length_subbuf() + (uint64)vi*sample_count + n->offset;
  639 + else {
  640 + const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() + (uint64)vi*sample_count + n->offset );
  641 + for (int i = 0; i < nodeSampleCount; i++)
606 642 sortedIndicesBuf[i] = shortIndices[i];
607   -
608 643 *sortedIndices = sortedIndicesBuf;
609 644 }
610 645  
611   - if( vi < numPrecalcVal )
612   - {
613   - for( int i = 0; i < nodeSampleCount; i++ )
614   - {
  646 + // For this feature, have we precalculated all of the feature responses?
  647 + if (vi < numPrecalcVal) {
  648 + for (int i = 0; i < nodeSampleCount; i++) {
615 649 int idx = (*sortedIndices)[i];
616 650 idx = sampleIndices[idx];
617   - ordValuesBuf[i] = valCache.at<float>( vi, idx);
  651 + ordValuesBuf[i] = valCache.at<float>(vi, idx);
618 652 }
619   - }
620   - else
621   - {
622   - for( int i = 0; i < nodeSampleCount; i++ )
623   - {
  653 + } else {
  654 + for (int i = 0; i < nodeSampleCount; i++) {
624 655 int idx = (*sortedIndices)[i];
625 656 idx = sampleIndices[idx];
626   - ordValuesBuf[i] = (*featureEvaluator)( vi, idx);
  657 + ordValuesBuf[i] = (*featureEvaluator)(vi, idx);
627 658 }
628 659 }
629   - }
630   - else // vi >= numPrecalcIdx
631   - {
  660 + } else {
632 661 cv::AutoBuffer<float> abuf(nodeSampleCount);
633 662 float* sampleValues = &abuf[0];
634 663  
635   - if ( vi < numPrecalcVal )
636   - {
637   - for( int i = 0; i < nodeSampleCount; i++ )
638   - {
  664 + if (vi < numPrecalcVal) {
  665 + for (int i = 0; i < nodeSampleCount; i++) {
639 666 sortedIndicesBuf[i] = i;
640 667 sampleValues[i] = valCache.at<float>( vi, sampleIndices[i] );
641 668 }
642   - }
643   - else
644   - {
645   - for( int i = 0; i < nodeSampleCount; i++ )
646   - {
  669 + } else {
  670 + for (int i = 0; i < nodeSampleCount; i++) {
647 671 sortedIndicesBuf[i] = i;
648 672 sampleValues[i] = (*featureEvaluator)( vi, sampleIndices[i]);
649 673 }
650 674 }
  675 +
651 676 icvSortIntAux( sortedIndicesBuf, nodeSampleCount, &sampleValues[0] );
652   - for( int i = 0; i < nodeSampleCount; i++ )
  677 +
  678 + for (int i = 0; i < nodeSampleCount; i++)
653 679 ordValuesBuf[i] = (&sampleValues[0])[sortedIndicesBuf[i]];
  680 +
654 681 *sortedIndices = sortedIndicesBuf;
655 682 }
656 683  
... ... @@ -660,7 +687,8 @@ void CascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ord
660 687 const int* CascadeBoostTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* catValuesBuf )
661 688 {
662 689 int nodeSampleCount = n->sample_count;
663   - int* sampleIndicesBuf = catValuesBuf; //
  690 + int* sampleIndicesBuf = catValuesBuf;
  691 +
664 692 const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
665 693  
666 694 if ( vi < numPrecalcVal )
... ... @@ -691,112 +719,117 @@ float CascadeBoostTrainData::getVarValue( int vi, int si )
691 719 return (*featureEvaluator)( vi, si );
692 720 }
693 721  
694   -struct FeatureIdxOnlyPrecalc : ParallelLoopBody
  722 +struct Precalc : ParallelLoopBody
  723 +{
  724 + const FeatureEvaluator* featureEvaluator;
  725 + int sampleCount;
  726 +
  727 + Precalc(const FeatureEvaluator* featureEvaluator, int sampleCount) :
  728 + featureEvaluator(featureEvaluator),
  729 + sampleCount(sampleCount)
  730 + {}
  731 +
  732 + virtual void operator()(const Range& range) const = 0;
  733 +};
  734 +
  735 +struct IndexPrecalc : Precalc
695 736 {
696   - FeatureIdxOnlyPrecalc( const FeatureEvaluator* _featureEvaluator, CvMat* _buf, int _sample_count, bool _is_buf_16u )
  737 + int* idst;
  738 + unsigned short* udst;
  739 + bool isBufShort;
  740 +
  741 + IndexPrecalc(const FeatureEvaluator* featureEvaluator, CvMat* buf, int sampleCount, bool isBufShort) :
  742 + Precalc(featureEvaluator, sampleCount),
  743 + isBufShort(isBufShort)
  744 + {
  745 + udst = (unsigned short*)buf->data.s;
  746 + idst = buf->data.i;
  747 + }
  748 +
  749 + void setBuffer(int fi, int si) const
  750 + {
  751 + if (isBufShort) *(udst + (uint64)fi*sampleCount + si) = (unsigned short)si;
  752 + else *(idst + (uint64)fi*sampleCount + si) = si;
  753 + }
  754 +
  755 + void sortBuffer(int fi, float *valCachePtr) const
697 756 {
698   - featureEvaluator = _featureEvaluator;
699   - sample_count = _sample_count;
700   - udst = (unsigned short*)_buf->data.s;
701   - idst = _buf->data.i;
702   - is_buf_16u = _is_buf_16u;
  757 + if (isBufShort) icvSortUShAux(udst + (uint64)fi*sampleCount, sampleCount, valCachePtr);
  758 + else icvSortIntAux(idst + (uint64)fi*sampleCount, sampleCount, valCachePtr);
703 759 }
704   - void operator()( const Range& range ) const
  760 +
  761 + virtual void operator()(const Range& range) const
705 762 {
706   - cv::AutoBuffer<float> valCache(sample_count);
  763 + cv::AutoBuffer<float> valCache(sampleCount);
707 764 float* valCachePtr = (float*)valCache;
708   - for ( int fi = range.start; fi < range.end; fi++)
709   - {
710   - for( int si = 0; si < sample_count; si++ )
711   - {
712   - valCachePtr[si] = (*featureEvaluator)( fi, si );
713   - if ( is_buf_16u )
714   - *(udst + fi*sample_count + si) = (unsigned short)si;
715   - else
716   - *(idst + fi*sample_count + si) = si;
  765 + for (int fi = range.start; fi < range.end; fi++) {
  766 + for (int si = 0; si < sampleCount; si++) {
  767 + valCachePtr[si] = (*featureEvaluator)(fi, si);
  768 + setBuffer(fi, si);
717 769 }
718   - if ( is_buf_16u )
719   - icvSortUShAux( udst + fi*sample_count, sample_count, valCachePtr );
720   - else
721   - icvSortIntAux( idst + fi*sample_count, sample_count, valCachePtr );
  770 + sortBuffer(fi, valCachePtr);
722 771 }
723 772 }
724   - const FeatureEvaluator* featureEvaluator;
725   - int sample_count;
726   - int* idst;
727   - unsigned short* udst;
728   - bool is_buf_16u;
729 773 };
730 774  
731   -struct FeatureValAndIdxPrecalc : ParallelLoopBody
  775 +struct FeatureAndIndexPrecalc : IndexPrecalc
732 776 {
733   - FeatureValAndIdxPrecalc( const FeatureEvaluator* _featureEvaluator, CvMat* _buf, Mat* _valCache, int _sample_count, bool _is_buf_16u )
734   - {
735   - featureEvaluator = _featureEvaluator;
736   - valCache = _valCache;
737   - sample_count = _sample_count;
738   - udst = (unsigned short*)_buf->data.s;
739   - idst = _buf->data.i;
740   - is_buf_16u = _is_buf_16u;
741   - }
742   - void operator()( const Range& range ) const
  777 + Mat *valCache;
  778 +
  779 + FeatureAndIndexPrecalc(const FeatureEvaluator* featureEvaluator, CvMat* buf, Mat* valCache, int sampleCount, bool isBufShort) :
  780 + IndexPrecalc(featureEvaluator, buf, sampleCount, isBufShort),
  781 + valCache(valCache)
  782 + {}
  783 +
  784 + virtual void operator()(const Range& range) const
743 785 {
744   - for ( int fi = range.start; fi < range.end; fi++)
745   - {
746   - for( int si = 0; si < sample_count; si++ )
747   - {
748   - valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
749   - if ( is_buf_16u )
750   - *(udst + fi*sample_count + si) = (unsigned short)si;
751   - else
752   - *(idst + fi*sample_count + si) = si;
  786 + for (int fi = range.start; fi < range.end; fi++) {
  787 + for (int si = 0; si < sampleCount; si++) {
  788 + valCache->at<float>(fi,si) = (*featureEvaluator)(fi, si);
  789 + setBuffer(fi, si);
753 790 }
754   - if ( is_buf_16u )
755   - icvSortUShAux( udst + fi*sample_count, sample_count, valCache->ptr<float>(fi) );
756   - else
757   - icvSortIntAux( idst + fi*sample_count, sample_count, valCache->ptr<float>(fi) );
  791 + sortBuffer(fi, valCache->ptr<float>(fi));
758 792 }
759 793 }
760   - const FeatureEvaluator* featureEvaluator;
761   - Mat* valCache;
762   - int sample_count;
763   - int* idst;
764   - unsigned short* udst;
765   - bool is_buf_16u;
766 794 };
767 795  
768   -struct FeatureValOnlyPrecalc : ParallelLoopBody
  796 +struct FeaturePrecalc : Precalc
769 797 {
770   - FeatureValOnlyPrecalc( const FeatureEvaluator* _featureEvaluator, Mat* _valCache, int _sample_count )
771   - {
772   - featureEvaluator = _featureEvaluator;
773   - valCache = _valCache;
774   - sample_count = _sample_count;
775   - }
776   - void operator()( const Range& range ) const
  798 + Mat *valCache;
  799 +
  800 + FeaturePrecalc(const FeatureEvaluator* featureEvaluator, Mat* valCache, int sampleCount) :
  801 + Precalc(featureEvaluator, sampleCount),
  802 + valCache(valCache)
  803 + {}
  804 +
  805 + virtual void operator()(const Range& range) const
777 806 {
778   - for ( int fi = range.start; fi < range.end; fi++)
779   - for( int si = 0; si < sample_count; si++ )
780   - valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
  807 + for (int fi = range.start; fi < range.end; fi++)
  808 + for (int si = 0; si < sampleCount; si++)
  809 + valCache->at<float>(fi,si) = (*featureEvaluator)(fi, si);
781 810 }
782   - const FeatureEvaluator* featureEvaluator;
783   - Mat* valCache;
784   - int sample_count;
785 811 };
786 812  
787 813 void CascadeBoostTrainData::precalculate()
788 814 {
789   - int minNum = MIN( numPrecalcVal, numPrecalcIdx);
  815 + int minPrecalc = std::min(numPrecalcVal, numPrecalcIdx);
  816 +
  817 + qDebug() << "Starting precalculation...";
790 818  
791 819 QTime time;
792 820 time.start();
793 821  
794   - parallel_for_( Range(numPrecalcVal, numPrecalcIdx),
795   - FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) );
796   - parallel_for_( Range(0, minNum),
797   - FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) );
798   - parallel_for_( Range(minNum, numPrecalcVal),
799   - FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) );
  822 + // Compute features and sort training samples for feature indices we are not going to cache
  823 + parallel_for_(Range(numPrecalcVal, numPrecalcIdx),
  824 + IndexPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0));
  825 +
  826 + // Compute features and sort training samples for features indices we are going to cache
  827 + parallel_for_(Range(0, minPrecalc),
  828 + FeatureAndIndexPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0));
  829 +
  830 + // Compute feature values for feature indices for which we are not going to sort training samples
  831 + parallel_for_(Range(minPrecalc, numPrecalcVal),
  832 + FeaturePrecalc(featureEvaluator, &valCache, sample_count));
800 833  
801 834 cout << "Precalculation time (ms): " << time.elapsed() << endl;
802 835 }
... ... @@ -830,89 +863,84 @@ CvDTreeNode* CascadeBoostTree::predict( int sampleIdx ) const
830 863 return node;
831 864 }
832 865  
  866 +// This function splits the training data from the parent node into training
  867 +// data for both child nodes
833 868 void CascadeBoostTree::split_node_data( CvDTreeNode* node )
834 869 {
835   - int n = node->sample_count, nl, nr, scount = data->sample_count;
836   - char* dir = (char*)data->direction->data.ptr;
837 870 CvDTreeNode *left = 0, *right = 0;
  871 +
  872 + uint64_t nodeSampleCount = node->sample_count;
  873 + uint64_t sampleCount = data->sample_count;
  874 + uint64_t nLeft, nRight;
  875 +
  876 + int workVarCount = data->get_work_var_count();
  877 +
  878 + char* dir = (char*)data->direction->data.ptr;
838 879 int* newIdx = data->split_buf->data.i;
839 880 int newBufIdx = data->get_child_buf_idx( node );
840   - int workVarCount = data->get_work_var_count();
841 881 CvMat* buf = data->buf;
842 882 size_t length_buf_row = data->get_length_subbuf();
843   - cv::AutoBuffer<uchar> inn_buf(n*(3*sizeof(int)+sizeof(float)));
  883 + cv::AutoBuffer<uchar> inn_buf(nodeSampleCount*(3*sizeof(int)+sizeof(float)));
844 884 int* tempBuf = (int*)(uchar*)inn_buf;
845   - bool splitInputData;
846 885  
847 886 complete_node_dir(node);
848 887  
849   - for( int i = nl = nr = 0; i < n; i++ )
  888 + for (uint64_t i = nLeft = nRight = 0; i < nodeSampleCount; i++)
850 889 {
851 890 int d = dir[i];
852 891 // initialize new indices for splitting ordered variables
853   - newIdx[i] = (nl & (d-1)) | (nr & -d); // d ? ri : li
854   - nr += d;
855   - nl += d^1;
  892 + newIdx[i] = (nLeft & (d-1)) | (nRight & -d); // d ? ri : li
  893 + nRight += d;
  894 + nLeft += d^1;
856 895 }
857 896  
858   - node->left = left = data->new_node( node, nl, newBufIdx, node->offset );
859   - node->right = right = data->new_node( node, nr, newBufIdx, node->offset + nl );
  897 + node->left = left = data->new_node( node, nLeft, newBufIdx, node->offset );
  898 + node->right = right = data->new_node( node, nRight, newBufIdx, node->offset + nLeft );
860 899  
861   - splitInputData = node->depth + 1 < data->params.max_depth &&
862   - (node->left->sample_count > data->params.min_sample_count ||
863   - node->right->sample_count > data->params.min_sample_count);
  900 + bool splitInputData = node->depth + 1 < data->params.max_depth && (node->left->sample_count > data->params.min_sample_count || node->right->sample_count > data->params.min_sample_count);
864 901  
865   - // split ordered variables, keep both halves sorted.
866   - for( int vi = 0; vi < ((CascadeBoostTrainData*)data)->numPrecalcIdx; vi++ )
867   - {
  902 + const int numPreculatedIndices = ((CascadeBoostTrainData*)data)->numPrecalcIdx;
  903 + for (int vi = 0; vi < numPreculatedIndices; vi++) {
868 904 int ci = data->get_var_type(vi);
869 905 if( ci >= 0 || !splitInputData )
870 906 continue;
871 907  
872 908 int n1 = node->get_num_valid(vi);
873   - float *src_val_buf = (float*)(tempBuf + n);
874   - int *src_sorted_idx_buf = (int*)(src_val_buf + n);
875   - int *src_sample_idx_buf = src_sorted_idx_buf + n;
  909 + float *src_val_buf = (float*)(tempBuf + nodeSampleCount);
  910 + int *src_sorted_idx_buf = (int*)(src_val_buf + nodeSampleCount);
  911 + int *src_sample_idx_buf = src_sorted_idx_buf + nodeSampleCount;
876 912 const int* src_sorted_idx = 0;
877 913 const float* src_val = 0;
  914 +
878 915 data->get_ord_var_data(node, vi, src_val_buf, src_sorted_idx_buf, &src_val, &src_sorted_idx, src_sample_idx_buf);
879 916  
880   - for(int i = 0; i < n; i++)
  917 + for(uint64_t i = 0; i < nodeSampleCount; i++)
881 918 tempBuf[i] = src_sorted_idx[i];
882 919  
883   - if (data->is_buf_16u)
884   - {
  920 + if (data->is_buf_16u) {
885 921 ushort *ldst, *rdst;
886   - ldst = (ushort*)(buf->data.s + left->buf_idx*length_buf_row +
887   - vi*scount + left->offset);
888   - rdst = (ushort*)(ldst + nl);
  922 + ldst = (ushort*)(buf->data.s + left->buf_idx*length_buf_row + vi*sampleCount + left->offset);
  923 + rdst = (ushort*)(ldst + nLeft);
889 924  
890 925 // split sorted
891   - for( int i = 0; i < n1; i++ )
892   - {
  926 + for (int i = 0; i < n1; i++) {
893 927 int idx = tempBuf[i];
894 928 int d = dir[idx];
895 929 idx = newIdx[idx];
896   - if (d)
897   - {
  930 + if (d) {
898 931 *rdst = (ushort)idx;
899 932 rdst++;
900   - }
901   - else
902   - {
  933 + } else {
903 934 *ldst = (ushort)idx;
904 935 ldst++;
905 936 }
906 937 }
907   - CV_Assert( n1 == n );
908 938 }
909 939 else
910 940 {
911 941 int *ldst, *rdst;
912   - ldst = buf->data.i + left->buf_idx*length_buf_row +
913   - vi*scount + left->offset;
914   - rdst = buf->data.i + right->buf_idx*length_buf_row +
915   - vi*scount + right->offset;
  942 + ldst = buf->data.i + left->buf_idx*length_buf_row + vi*sampleCount + left->offset;
  943 + rdst = buf->data.i + right->buf_idx*length_buf_row + vi*sampleCount + right->offset;
916 944  
917 945 // split sorted
918 946 for( int i = 0; i < n1; i++ )
... ... @@ -931,34 +959,26 @@ void CascadeBoostTree::split_node_data( CvDTreeNode* node )
931 959 ldst++;
932 960 }
933 961 }
934   - CV_Assert( n1 == n );
935 962 }
936 963 }
937 964  
938 965 // split cv_labels using newIdx relocation table
939   - int *src_lbls_buf = tempBuf + n;
  966 + int *src_lbls_buf = tempBuf + nodeSampleCount;
940 967 const int* src_lbls = data->get_cv_labels(node, src_lbls_buf);
941 968  
942   - for(int i = 0; i < n; i++)
  969 + for(uint64_t i = 0; i < nodeSampleCount; i++)
943 970 tempBuf[i] = src_lbls[i];
944 971  
945   - if (data->is_buf_16u)
946   - {
947   - unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row +
948   - (workVarCount-1)*scount + left->offset);
949   - unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row +
950   - (workVarCount-1)*scount + right->offset);
  972 + if (data->is_buf_16u) {
  973 + unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row + (workVarCount-1)*sampleCount + left->offset);
  974 + unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row + (workVarCount-1)*sampleCount + right->offset);
951 975  
952   - for( int i = 0; i < n; i++ )
953   - {
  976 + for( uint64_t i = 0; i < nodeSampleCount; i++ ) {
954 977 int idx = tempBuf[i];
955   - if (dir[i])
956   - {
  978 + if (dir[i]) {
957 979 *rdst = (unsigned short)idx;
958 980 rdst++;
959   - }
960   - else
961   - {
  981 + } else {
962 982 *ldst = (unsigned short)idx;
963 983 ldst++;
964 984 }
... ... @@ -967,12 +987,10 @@ void CascadeBoostTree::split_node_data( CvDTreeNode* node )
967 987 }
968 988 else
969 989 {
970   - int *ldst = buf->data.i + left->buf_idx*length_buf_row +
971   - (workVarCount-1)*scount + left->offset;
972   - int *rdst = buf->data.i + right->buf_idx*length_buf_row +
973   - (workVarCount-1)*scount + right->offset;
  990 + int *ldst = buf->data.i + left->buf_idx*length_buf_row + (workVarCount-1)*sampleCount + left->offset;
  991 + int *rdst = buf->data.i + right->buf_idx*length_buf_row + (workVarCount-1)*sampleCount + right->offset;
974 992  
975   - for( int i = 0; i < n; i++ )
  993 + for( uint64_t i = 0; i < nodeSampleCount; i++ )
976 994 {
977 995 int idx = tempBuf[i];
978 996 if (dir[i])
... ... @@ -989,28 +1007,21 @@ void CascadeBoostTree::split_node_data( CvDTreeNode* node )
989 1007 }
990 1008  
991 1009 // split sample indices
992   - int *sampleIdx_src_buf = tempBuf + n;
  1010 + int *sampleIdx_src_buf = tempBuf + nodeSampleCount;
993 1011 const int* sampleIdx_src = data->get_sample_indices(node, sampleIdx_src_buf);
994 1012  
995   - for(int i = 0; i < n; i++)
  1013 + for(uint64_t i = 0; i < nodeSampleCount; i++)
996 1014 tempBuf[i] = sampleIdx_src[i];
997 1015  
998   - if (data->is_buf_16u)
999   - {
1000   - unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row +
1001   - workVarCount*scount + left->offset);
1002   - unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row +
1003   - workVarCount*scount + right->offset);
1004   - for (int i = 0; i < n; i++)
1005   - {
  1016 + if (data->is_buf_16u) {
  1017 + unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row + workVarCount*sampleCount + left->offset);
  1018 + unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row + workVarCount*sampleCount + right->offset);
  1019 + for (uint64_t i = 0; i < nodeSampleCount; i++) {
1006 1020 unsigned short idx = (unsigned short)tempBuf[i];
1007   - if (dir[i])
1008   - {
  1021 + if (dir[i]) {
1009 1022 *rdst = idx;
1010 1023 rdst++;
1011   - }
1012   - else
1013   - {
  1024 + } else {
1014 1025 *ldst = idx;
1015 1026 ldst++;
1016 1027 }
... ... @@ -1018,15 +1029,14 @@ void CascadeBoostTree::split_node_data( CvDTreeNode* node )
1018 1029 }
1019 1030 else
1020 1031 {
1021   - int* ldst = buf->data.i + left->buf_idx*length_buf_row +
1022   - workVarCount*scount + left->offset;
1023   - int* rdst = buf->data.i + right->buf_idx*length_buf_row +
1024   - workVarCount*scount + right->offset;
1025   - for (int i = 0; i < n; i++)
  1032 + int* ldst = buf->data.i + left->buf_idx*length_buf_row + workVarCount*sampleCount + left->offset;
  1033 + int* rdst = buf->data.i + right->buf_idx*length_buf_row + workVarCount*sampleCount + right->offset;
  1034 + for (uint64_t i = 0; i < nodeSampleCount; i++)
1026 1035 {
1027 1036 int idx = tempBuf[i];
1028 1037 if (dir[i])
1029 1038 {
  1039 +
1030 1040 *rdst = idx;
1031 1041 rdst++;
1032 1042 }
... ... @@ -1038,10 +1048,10 @@ void CascadeBoostTree::split_node_data( CvDTreeNode* node )
1038 1048 }
1039 1049 }
1040 1050  
1041   - for( int vi = 0; vi < data->var_count; vi++ )
1042   - {
1043   - left->set_num_valid(vi, (int)(nl));
1044   - right->set_num_valid(vi, (int)(nr));
  1051 + const int variableCount = data->var_count;
  1052 + for (int vi = 0; vi < variableCount; vi++) {
  1053 + left->set_num_valid(vi, nLeft);
  1054 + right->set_num_valid(vi, nRight);
1045 1055 }
1046 1056  
1047 1057 // deallocate the parent node data that is not needed anymore
... ... @@ -1052,7 +1062,8 @@ void CascadeBoostTree::split_node_data( CvDTreeNode* node )
1052 1062  
1053 1063 void CascadeBoost::train(const FeatureEvaluator* _featureEvaluator,
1054 1064 int _numSamples,
1055   - int _precalcValBufSize, int _precalcIdxBufSize,
  1065 + int _precalcValBufSize,
  1066 + int _precalcIdxBufSize,
1056 1067 int _channels,
1057 1068 const CascadeBoostParams& _params)
1058 1069 {
... ... @@ -1074,8 +1085,7 @@ void CascadeBoost::train(const FeatureEvaluator* _featureEvaluator,
1074 1085 cout << "| N | HR | FA |" << endl;
1075 1086 cout << "+----+---------+---------+" << endl;
1076 1087  
1077   - do
1078   - {
  1088 + do {
1079 1089 CascadeBoostTree* tree = new CascadeBoostTree;
1080 1090 if (!tree->train( data, subsample_mask, this)) {
1081 1091 delete tree;
... ... @@ -1085,12 +1095,13 @@ void CascadeBoost::train(const FeatureEvaluator* _featureEvaluator,
1085 1095 classifiers.append(tree);
1086 1096 update_weights(tree);
1087 1097 trim_weights();
1088   - if (cvCountNonZero(subsample_mask) == 0)
  1098 + if (cvCountNonZero(subsample_mask) == 0) {
1089 1099 return;
  1100 + }
1090 1101 }
1091 1102 while (!isErrDesired() && (classifiers.size() < params.weak_count));
1092 1103  
1093   - clear();
  1104 + //clear();
1094 1105 }
1095 1106  
1096 1107 float CascadeBoost::predict(int sampleIdx, bool returnSum) const
... ... @@ -1101,6 +1112,7 @@ float CascadeBoost::predict(int sampleIdx, bool returnSum) const
1101 1112  
1102 1113 if (!returnSum)
1103 1114 sum = sum < threshold - CV_THRESHOLD_EPS ? 0.0 : 1.0;
  1115 +
1104 1116 return (float)sum;
1105 1117 }
1106 1118  
... ... @@ -1125,6 +1137,7 @@ void CascadeBoost::update_weights(CvBoostTree* tree)
1125 1137 ( !tree ? n*sizeof(int) : 0 );
1126 1138 cv::AutoBuffer<uchar> inn_buf(inn_buf_size);
1127 1139 uchar* cur_inn_buf_pos = (uchar*)inn_buf;
  1140 +
1128 1141 if ( (params.boost_type == LOGIT) || (params.boost_type == GENTLE) )
1129 1142 {
1130 1143 step = CV_IS_MAT_CONT(data->responses_copy->type) ?
... ... @@ -1133,6 +1146,7 @@ void CascadeBoost::update_weights(CvBoostTree* tree)
1133 1146 sampleIdxBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(sampleIdxBuf + n);
1134 1147 sampleIdx = data->get_sample_indices( data->data_root, sampleIdxBuf );
1135 1148 }
  1149 +
1136 1150 CvMat* buf = data->buf;
1137 1151 size_t length_buf_row = data->get_length_subbuf();
1138 1152 if( !tree ) // before training the first tree, initialize weights and other parameters
... ... @@ -1156,37 +1170,26 @@ void CascadeBoost::update_weights(CvBoostTree* tree)
1156 1170 weights = cvCreateMat( 1, n, CV_64F );
1157 1171 subtree_weights = cvCreateMat( 1, n + 2, CV_64F );
1158 1172  
1159   - if (data->is_buf_16u)
1160   - {
1161   - unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*length_buf_row +
1162   - data->data_root->offset + (data->work_var_count-1)*data->sample_count);
1163   - for( int i = 0; i < n; i++ )
1164   - {
1165   - // save original categorical responses {0,1}, convert them to {-1,1}
1166   - orig_response->data.i[i] = classLabels[i]*2 - 1;
1167   - // make all the samples active at start.
1168   - // later, in trim_weights() deactivate/reactive again some, if need
1169   - subsample_mask->data.ptr[i] = (uchar)1;
1170   - // make all the initial weights the same.
1171   - weights->data.db[i] = w0*p[classLabels[i]];
1172   - // set the labels to find (from within weak tree learning proc)
1173   - // the particular sample weight, and where to store the response.
  1173 + // set the labels to find (from within weak tree learning proc)
  1174 + // the particular sample weight, and where to store the response.
  1175 + if (data->is_buf_16u) {
  1176 + unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*length_buf_row + data->data_root->offset + (uint64)(data->work_var_count-1)*data->sample_count);
  1177 + for (int i = 0; i < n; i++)
1174 1178 labels[i] = (unsigned short)i;
1175   - }
1176   - }
1177   - else
1178   - {
1179   - int* labels = buf->data.i + data->data_root->buf_idx*length_buf_row +
1180   - data->data_root->offset + (data->work_var_count-1)*data->sample_count;
1181   -
  1179 + } else {
  1180 + int* labels = buf->data.i + data->data_root->buf_idx*length_buf_row + data->data_root->offset + (uint64)(data->work_var_count-1)*data->sample_count;
1182 1181 for( int i = 0; i < n; i++ )
1183   - {
1184   - // save original categorical responses {0,1}, convert them to {-1,1}
1185   - orig_response->data.i[i] = classLabels[i]*2 - 1;
1186   - subsample_mask->data.ptr[i] = (uchar)1;
1187   - weights->data.db[i] = w0*p[classLabels[i]];
1188 1182 labels[i] = i;
1189   - }
  1183 + }
  1184 +
  1185 + for (int i = 0; i < n; i++) {
  1186 + // save original categorical responses {0,1}, convert them to {-1,1}
  1187 + orig_response->data.i[i] = classLabels[i]*2 - 1;
  1188 + // make all the samples active at start.
  1189 + // later, in trim_weights() deactivate/reactive again some, if need
  1190 + subsample_mask->data.ptr[i] = (uchar)1;
  1191 + // make all the initial weights the same.
  1192 + weights->data.db[i] = w0*p[classLabels[i]];
1190 1193 }
1191 1194  
1192 1195 if( params.boost_type == LOGIT )
... ...