From 42c4fdadd5e1cb7937f1be3b66c5493e9098a2c1 Mon Sep 17 00:00:00 2001 From: Scott Klum Date: Tue, 29 Nov 2016 01:36:12 -0700 Subject: [PATCH] Code simplification by removing short type optimization --- openbr/core/boost.cpp | 272 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 1 file changed, 64 insertions(+), 208 deletions(-) diff --git a/openbr/core/boost.cpp b/openbr/core/boost.cpp index 431439c..515b166 100644 --- a/openbr/core/boost.cpp +++ b/openbr/core/boost.cpp @@ -21,7 +21,6 @@ logRatio( double val ) #define CV_CMP_NUM_IDX(i,j) (aux[i] < aux[j]) static CV_IMPLEMENT_QSORT_EX( icvSortIntAux, int, CV_CMP_NUM_IDX, const float* ) -static CV_IMPLEMENT_QSORT_EX( icvSortUShAux, unsigned short, CV_CMP_NUM_IDX, const float* ) #define CV_THRESHOLD_EPS (0.00001F) @@ -186,7 +185,7 @@ struct CascadeBoostTrainData : CvDTreeTrainData return ((uint64_t)work_var_count + 1) * sample_count; } - virtual const int* getBufferValues(CvDTreeNode* n, int* labelsBuf, uint64_t vi); + virtual const int* getBufferValues(CvDTreeNode* n, uint64_t vi); virtual const int* get_class_labels(CvDTreeNode* n, int* labelsBuf); virtual const int* get_cv_labels(CvDTreeNode* n, int* labelsBuf); virtual const int* get_sample_indices(CvDTreeNode* n, int* indicesBuf); @@ -298,62 +297,29 @@ CvDTreeNode* CascadeBoostTrainData::subsample_data( const CvMat* _subsample_idx int num_valid = data_root->get_num_valid(vi); CV_Assert( num_valid == sample_count ); - if (is_buf_16u) + int* idst_idx = buf->data.i + root->buf_idx*getLength() + (uint64_t)vi*sample_count + root->offset; + for( int i = 0; i < num_valid; i++ ) { - unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*getLength() + (uint64_t)vi*sample_count + data_root->offset); - for( int i = 0; i < num_valid; i++ ) - { - idx = src_idx[i]; - count_i = co[idx*2]; - if( count_i ) - for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ ) - udst_idx[j] = (unsigned short)cur_ofs; - } - } - else - { - int* idst_idx = buf->data.i + root->buf_idx*getLength() + (uint64_t)vi*sample_count + root->offset; - for( int i = 0; i < num_valid; i++ ) - { - idx = src_idx[i]; - count_i = co[idx*2]; - if( count_i ) - for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ ) - idst_idx[j] = cur_ofs; - } + idx = src_idx[i]; + count_i = co[idx*2]; + if( count_i ) + for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ ) + idst_idx[j] = cur_ofs; } } // subsample cv_lables const int* src_lbls = get_cv_labels(data_root, (int*)(uchar*)inn_buf); - if (is_buf_16u) - { - unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*getLength() + (workVarCount-1)*sample_count + root->offset); - for( int i = 0; i < count; i++ ) - udst[i] = (unsigned short)src_lbls[sidx[i]]; - } - else - { - int* idst = buf->data.i + root->buf_idx*getLength() + - (workVarCount-1)*sample_count + root->offset; - for( int i = 0; i < count; i++ ) - idst[i] = src_lbls[sidx[i]]; - } + int* idst = buf->data.i + root->buf_idx*getLength() + + (workVarCount-1)*sample_count + root->offset; + for( int i = 0; i < count; i++ ) + idst[i] = src_lbls[sidx[i]]; // subsample sample_indices const int* sample_idx_src = get_sample_indices(data_root, (int*)(uchar*)inn_buf); - if (is_buf_16u) - { - unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*getLength() + workVarCount*sample_count + root->offset); - for( int i = 0; i < count; i++ ) - sample_idx_dst[i] = (unsigned short)sample_idx_src[sidx[i]]; - } - else - { - int* sample_idx_dst = buf->data.i + root->buf_idx*getLength() + workVarCount*sample_count + root->offset; - for( int i = 0; i < count; i++ ) - sample_idx_dst[i] = sample_idx_src[sidx[i]]; - } + int* sample_idx_dst = buf->data.i + root->buf_idx*getLength() + workVarCount*sample_count + root->offset; + for( int i = 0; i < count; i++ ) + sample_idx_dst[i] = sample_idx_src[sidx[i]]; for( int vi = 0; vi < var_count; vi++ ) root->set_num_valid(vi, count); @@ -459,12 +425,10 @@ void CascadeBoostTrainData::setData( const FeatureEvaluator* _featureEvaluator, sample_count = _numSamples; is_buf_16u = false; - if (sample_count < 65536) - is_buf_16u = true; // 1048576 is the number of bytes in a megabyte numPrecalcVal = min( cvRound((double)_precalcValBufSize*1048576. / (sizeof(float)*sample_count)), var_count ); - numPrecalcIdx = min( cvRound((double)_precalcIdxBufSize*1048576. / ((is_buf_16u ? sizeof(unsigned short) : sizeof (int))*sample_count)), var_count ); + numPrecalcIdx = min( cvRound((double)_precalcIdxBufSize*1048576. / (sizeof (int)*sample_count)), var_count ); assert( numPrecalcIdx >= 0 && numPrecalcVal >= 0 ); @@ -510,13 +474,9 @@ void CascadeBoostTrainData::setData( const FeatureEvaluator* _featureEvaluator, effective_buf_width *= buf_count; if (effective_buf_width * effective_buf_height != effective_buf_size) - { CV_Error(CV_StsBadArg, "The memory buffer cannot be allocated since its size exceeds integer fields limit"); - } - if ( is_buf_16u ) - buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_16UC(channels) ); - else - buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_32SC(channels) ); + + buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_32SC(channels) ); cat_count = cvCreateMat( 1, cat_var_count + 1, CV_32SC(channels) ); @@ -542,10 +502,7 @@ void CascadeBoostTrainData::setData( const FeatureEvaluator* _featureEvaluator, data_root = new_node( 0, sample_count, 0, 0 ); // set sample labels - if (is_buf_16u) - udst = (unsigned short*)(buf->data.s + (uint64_t)work_var_count*sample_count); - else - idst = buf->data.i + (uint64_t)work_var_count*sample_count; + idst = buf->data.i + (uint64_t)work_var_count*sample_count; for (int si = 0; si < sample_count; si++) { @@ -594,29 +551,21 @@ const int* CascadeBoostTrainData::get_class_labels( CvDTreeNode* n, int* labelsB return labelsBuf; } -const int* CascadeBoostTrainData::getBufferValues(CvDTreeNode* n, int* indicesBuf, uint64_t vi) +const int* CascadeBoostTrainData::getBufferValues(CvDTreeNode* n, uint64_t vi) { - const int* cat_values = 0; - if (!is_buf_16u) - cat_values = buf->data.i + n->buf_idx*getLength() + vi*sample_count + n->offset; - else { - const unsigned short* short_values = (const unsigned short*)(buf->data.s + n->buf_idx*getLength() + vi*sample_count + n->offset); - for (int i = 0; i < n->sample_count; i++) - indicesBuf[i] = short_values[i]; - cat_values = indicesBuf; - } - - return cat_values; + return buf->data.i + n->buf_idx*getLength() + vi*sample_count + n->offset; } -const int* CascadeBoostTrainData::get_sample_indices( CvDTreeNode* n, int* indicesBuf ) +const int* CascadeBoostTrainData::get_sample_indices(CvDTreeNode* n, int* indicesBuf) { - return getBufferValues(n,indicesBuf,get_work_var_count()); + Q_UNUSED(indicesBuf) + return getBufferValues(n,get_work_var_count()); } -const int* CascadeBoostTrainData::get_cv_labels( CvDTreeNode* n, int* indicesBuf ) +const int* CascadeBoostTrainData::get_cv_labels(CvDTreeNode* n, int* indicesBuf) { - return getBufferValues(n,indicesBuf,get_work_var_count()-1); + Q_UNUSED(indicesBuf) + return getBufferValues(n,get_work_var_count()-1); } void CascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ordValuesBuf, int* sortedIndicesBuf, const float** ordValues, const int** sortedIndices, int* sampleIndicesBuf ) @@ -629,14 +578,7 @@ void CascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ord // For this feature (this code refers to features as values, hence vi == value index), // have we precalculated (presorted) the training samples by their feature response? if (vi < numPrecalcIdx) { - if (!is_buf_16u) - *sortedIndices = buf->data.i + n->buf_idx*getLength() + (uint64_t)vi*sample_count + n->offset; - else { - const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*getLength() + (uint64_t)vi*sample_count + n->offset ); - for (int i = 0; i < nodeSampleCount; i++) - sortedIndicesBuf[i] = shortIndices[i]; - *sortedIndices = sortedIndicesBuf; - } + *sortedIndices = getBufferValues(n, vi); // For this feature, have we precalculated all of the feature responses? if (vi < numPrecalcVal) { @@ -731,26 +673,20 @@ struct IndexPrecalc : Precalc { int* idst; unsigned short* udst; - bool isBufShort; - IndexPrecalc(const FeatureEvaluator* featureEvaluator, CvMat* buf, int sampleCount, bool isBufShort) : + IndexPrecalc(const FeatureEvaluator* featureEvaluator, CvMat* buf, int sampleCount) : Precalc(featureEvaluator, sampleCount), - isBufShort(isBufShort) - { - udst = (unsigned short*)buf->data.s; - idst = buf->data.i; - } + idst(buf->data.i) + {} void setBuffer(uint64_t fi, uint64_t si) const { - if (isBufShort) *(udst + fi*sampleCount + si) = (unsigned short)si; - else *(idst + fi*sampleCount + si) = si; + *(idst + fi*sampleCount + si) = si; } void sortBuffer(uint64_t fi, float *valCachePtr) const { - if (isBufShort) icvSortUShAux(udst + fi*sampleCount, sampleCount, valCachePtr); - else icvSortIntAux(idst + fi*sampleCount, sampleCount, valCachePtr); + icvSortIntAux(idst + fi*sampleCount, sampleCount, valCachePtr); } virtual void operator()(const Range& range) const @@ -771,8 +707,8 @@ struct FeatureAndIndexPrecalc : IndexPrecalc { Mat *valCache; - FeatureAndIndexPrecalc(const FeatureEvaluator* featureEvaluator, CvMat* buf, Mat* valCache, int sampleCount, bool isBufShort) : - IndexPrecalc(featureEvaluator, buf, sampleCount, isBufShort), + FeatureAndIndexPrecalc(const FeatureEvaluator* featureEvaluator, CvMat* buf, Mat* valCache, int sampleCount) : + IndexPrecalc(featureEvaluator, buf, sampleCount), valCache(valCache) {} @@ -816,11 +752,11 @@ void CascadeBoostTrainData::precalculate() // Compute features and sort training samples for feature indices we are not going to cache parallel_for_(Range(numPrecalcVal, numPrecalcIdx), - IndexPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0)); + IndexPrecalc(featureEvaluator, buf, sample_count)); // Compute features and sort training samples for features indices we are going to cache parallel_for_(Range(0, minPrecalc), - FeatureAndIndexPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0)); + FeatureAndIndexPrecalc(featureEvaluator, buf, &valCache, sample_count)); // Compute feature values for feature indices for which we are not going to sort training samples parallel_for_(Range(minPrecalc, numPrecalcVal), @@ -880,8 +816,7 @@ void CascadeBoostTree::split_node_data( CvDTreeNode* node ) complete_node_dir(node); - for (uint64_t i = nLeft = nRight = 0; i < nodeSampleCount; i++) - { + for (uint64_t i = nLeft = nRight = 0; i < nodeSampleCount; i++) { int d = dir[i]; // initialize new indices for splitting ordered variables newIdx[i] = (nLeft & (d-1)) | (nRight & -d); // d ? ri : li @@ -900,59 +835,27 @@ void CascadeBoostTree::split_node_data( CvDTreeNode* node ) if( ci >= 0 || !splitInputData ) continue; - int n1 = node->get_num_valid(vi); - float *src_val_buf = (float*)(tempBuf + nodeSampleCount); - int *src_sorted_idx_buf = (int*)(src_val_buf + nodeSampleCount); - int *src_sample_idx_buf = src_sorted_idx_buf + nodeSampleCount; - const int* src_sorted_idx = 0; - const float* src_val = 0; - - data->get_ord_var_data(node, vi, src_val_buf, src_sorted_idx_buf, &src_val, &src_sorted_idx, src_sample_idx_buf); - + const int* src_sorted_idx = ((CascadeBoostTrainData*)data)->getBufferValues(node, vi); for(uint64_t i = 0; i < nodeSampleCount; i++) tempBuf[i] = src_sorted_idx[i]; - if (data->is_buf_16u) { - ushort *ldst, *rdst; - ldst = (ushort*)(buf->data.s + left->buf_idx*length_buf_row + vi*sampleCount + left->offset); - rdst = (ushort*)(ldst + nLeft); - - // split sorted - for (int i = 0; i < n1; i++) { - int idx = tempBuf[i]; - int d = dir[idx]; - idx = newIdx[idx]; - if (d) { - *rdst = (ushort)idx; - rdst++; - } else { - *ldst = (ushort)idx; - ldst++; - } - } - } - else - { - int *ldst, *rdst; - ldst = buf->data.i + left->buf_idx*length_buf_row + vi*sampleCount + left->offset; - rdst = buf->data.i + right->buf_idx*length_buf_row + vi*sampleCount + right->offset; + int *ldst, *rdst; + ldst = buf->data.i + left->buf_idx*length_buf_row + vi*sampleCount + left->offset; + rdst = buf->data.i + right->buf_idx*length_buf_row + vi*sampleCount + right->offset; - // split sorted - for( int i = 0; i < n1; i++ ) - { - int idx = tempBuf[i]; - int d = dir[idx]; - idx = newIdx[idx]; - if (d) - { - *rdst = idx; - rdst++; - } - else - { - *ldst = idx; - ldst++; - } + int n1 = node->get_num_valid(vi); + + // split sorted + for (int i = 0; i < n1; i++) { + int idx = tempBuf[i]; + int d = dir[idx]; + idx = newIdx[idx]; + if (d) { + *rdst = idx; + rdst++; + } else { + *ldst = idx; + ldst++; } } } @@ -964,37 +867,16 @@ void CascadeBoostTree::split_node_data( CvDTreeNode* node ) for(uint64_t i = 0; i < nodeSampleCount; i++) tempBuf[i] = src_lbls[i]; - if (data->is_buf_16u) { - unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row + (workVarCount-1)*sampleCount + left->offset); - unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row + (workVarCount-1)*sampleCount + right->offset); - - for( uint64_t i = 0; i < nodeSampleCount; i++ ) { - int idx = tempBuf[i]; - if (dir[i]) { - *rdst = (unsigned short)idx; - rdst++; - } else { - *ldst = (unsigned short)idx; - ldst++; - } - } - - } - else { int *ldst = buf->data.i + left->buf_idx*length_buf_row + (workVarCount-1)*sampleCount + left->offset; int *rdst = buf->data.i + right->buf_idx*length_buf_row + (workVarCount-1)*sampleCount + right->offset; - for( uint64_t i = 0; i < nodeSampleCount; i++ ) - { + for (uint64_t i = 0; i < nodeSampleCount; i++) { int idx = tempBuf[i]; - if (dir[i]) - { + if (dir[i]) { *rdst = idx; rdst++; - } - else - { + } else { *ldst = idx; ldst++; } @@ -1008,35 +890,15 @@ void CascadeBoostTree::split_node_data( CvDTreeNode* node ) for(uint64_t i = 0; i < nodeSampleCount; i++) tempBuf[i] = sampleIdx_src[i]; - if (data->is_buf_16u) { - unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row + workVarCount*sampleCount + left->offset); - unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row + workVarCount*sampleCount + right->offset); - for (uint64_t i = 0; i < nodeSampleCount; i++) { - unsigned short idx = (unsigned short)tempBuf[i]; - if (dir[i]) { - *rdst = idx; - rdst++; - } else { - *ldst = idx; - ldst++; - } - } - } - else { int* ldst = buf->data.i + left->buf_idx*length_buf_row + workVarCount*sampleCount + left->offset; int* rdst = buf->data.i + right->buf_idx*length_buf_row + workVarCount*sampleCount + right->offset; - for (uint64_t i = 0; i < nodeSampleCount; i++) - { + for (uint64_t i = 0; i < nodeSampleCount; i++) { int idx = tempBuf[i]; - if (dir[i]) - { - + if (dir[i]) { *rdst = idx; rdst++; - } - else - { + } else { *ldst = idx; ldst++; } @@ -1167,15 +1029,9 @@ void CascadeBoost::update_weights(CvBoostTree* tree) // set the labels to find (from within weak tree learning proc) // the particular sample weight, and where to store the response. - if (data->is_buf_16u) { - unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*length_buf_row + data->data_root->offset + (uint64_t)(data->work_var_count-1)*data->sample_count); - for (int i = 0; i < n; i++) - labels[i] = (unsigned short)i; - } else { - int* labels = buf->data.i + data->data_root->buf_idx*length_buf_row + data->data_root->offset + (uint64_t)(data->work_var_count-1)*data->sample_count; - for( int i = 0; i < n; i++ ) - labels[i] = i; - } + int* labels = buf->data.i + data->data_root->buf_idx*length_buf_row + data->data_root->offset + (uint64_t)(data->work_var_count-1)*data->sample_count; + for( int i = 0; i < n; i++ ) + labels[i] = i; for (int i = 0; i < n; i++) { // save original categorical responses {0,1}, convert them to {-1,1} -- libgit2 0.21.4