/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Copyright 2012 The MITRE Corporation * * * * Licensed under the Apache License, Version 2.0 (the "License"); * * you may not use this file except in compliance with the License. * * You may obtain a copy of the License at * * * * http://www.apache.org/licenses/LICENSE-2.0 * * * * Unless required by applicable law or agreed to in writing, software * * distributed under the License is distributed on an "AS IS" BASIS, * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * * See the License for the specific language governing permissions and * * limitations under the License. * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #ifndef __COMMON_H #define __COMMON_H #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace Common { /*! * \brief Round floating point to nearest integer. */ template int round(T r) { return (r > 0.0) ? floor(r + 0.5) : ceil(r - 0.5); } /*! * \brief Returns a list of pairs sorted by value where: * pair.first = original value * pair.second = original index */ template QList< QPair > Sort(const QList &vals, bool decending = false, int n = std::numeric_limits::max()) { const int size = vals.size(); QList< QPair > pairs; pairs.reserve(size); for (int i=0; i(vals[i], i)); if (n >= pairs.size()) { if (decending) std::sort(pairs.begin(), pairs.end(), std::greater< QPair >()); else std::sort(pairs.begin(), pairs.end(), std::less< QPair >()); } else { if (decending) std::partial_sort(pairs.begin(), pairs.begin()+n, pairs.end(), std::greater< QPair >()); else std::partial_sort(pairs.begin(), pairs.begin()+n, pairs.end(), std::less< QPair >()); pairs = pairs.mid(0, n); } return pairs; } /*! * \brief Returns the minimum, maximum, minimum index, and maximum index of a vector of values. */ template void MinMax(const QList &vals, T *min, T *max, int *min_index, int *max_index) { const int size = vals.size(); assert(size > 0); *min = *max = vals[0]; *min_index = *max_index = 0; for (int i=1; i *max) { *max = val; *max_index = i; } } } template void MinMax(const QList &vals, T *min, T *max) { int min_index, max_index; MinMax(vals, min, max, &min_index, &max_index); } template T Min(const QList &vals) { int min, max; MinMax(vals, &min, &max); return min; } template T Max(const QList &vals) { int min, max; MinMax(vals, &min, &max); return max; } /*! * \brief Returns the mean and standard deviation of a vector of values. */ template void Mean(const QList &vals, double *mean) { const int size = vals.size(); // Compute Mean double sum = 0; for (int i=0; i void MeanStdDev(const QList &vals, double *mean, double *stddev) { const int size = vals.size(); Mean(vals, mean); // Compute Standard Deviation double variance = 0; for (int i=0; i class C, typename T> T Median(C vals, T *q1 = 0, T *q3 = 0) { if (vals.isEmpty()) return std::numeric_limits::quiet_NaN(); qSort(vals); if (q1 != 0) *q1 = vals[1*vals.size()/4]; if (q3 != 0) *q3 = vals[3*vals.size()/4]; return vals[vals.size()/2]; } /*! * \brief Computes the mode of a list. */ template T Mode(const QList &vals) { QMap counts; foreach (const T &val, vals) { if (!counts.contains(val)) counts[val] = 0; counts[val]++; } return counts.key(Max(counts.values())); } /*! * \brief Returns the cumulative sum of a vector of values. */ template QList CumSum(const QList &vals) { QList cumsum; cumsum.reserve(vals.size()+1); cumsum.append(0); foreach (const T &val, vals) cumsum.append(cumsum.last()+val); return cumsum; } /*! * \brief Calculate DKE bandwidth parameter 'h' */ template double KernelDensityBandwidth(const QList &vals) { double mean, stddev; MeanStdDev(vals, &mean, &stddev); return pow(4 * pow(stddev, 5.0) / (3 * vals.size()), 0.2); } /*! * \brief Compute kernel density at value x with bandwidth h. */ template double KernelDensityEstimation(const QList &vals, double x, double h) { double y = 0; foreach (T val, vals) y += exp(-pow((val-x)/h,2)/2)/sqrt(2*3.1415926353898); return y / (vals.size() * h); } /*! * \brief Returns a vector of n integers sampled in the range RandSample(int n, int max, int min = 0, bool unique = false); QList RandSample(int n, const QSet &values, bool unique = false); /*! * \brief Weighted random sample, each entry in weights should be >= 0. */ template QList RandSample(int n, const QList &weights, bool unique = false) { static bool seeded = false; if (!seeded) { srand(time(NULL)); seeded = true; } QList cdf = CumSum(weights); for (int i=0; i samples; samples.reserve(n); while (samples.size() < n) { T r = (T)rand() / (T)RAND_MAX; for (int j=0; j= cdf[j]) && (r <= cdf[j+1])) { if (!unique || !samples.contains(j)) samples.append(j); break; } } } return samples; } /*! * \brief See Matlab function unique() for documentation. */ template void Unique(const QList &vals, QList &b, QList &m, QList &n) { const int size = vals.size(); assert(size > 0); b.reserve(size); m.reserve(size); n.reserve(size); // Compute b and m QList< QPair > sortedPairs = Sort(vals); b.append(sortedPairs[0].first); m.append(sortedPairs[0].second); for (size_t i=1; i void SplitPairs(const QList< QPair > &pairs, QList &first, QList &second) { first.reserve(pairs.size()); second.reserve(pairs.size()); typedef QPair pair_t; foreach (const pair_t &pair, pairs) { first.append(pair.first); second.append(pair.second); } } /*! * \brief Removes values outside of 1.5 * Inner Quartile Range. */ template QList RemoveOutliers(QList vals) { T q1, q3; Median(vals, &q1, &q3); T iqr = q3-q1; T min = q1 - 1.5*iqr; T max = q3 + 1.5*iqr; QList newVals; for (int i=0; i= min) && (vals[i] <= max)) newVals.append(vals[i]); return newVals; } /*! * \brief Sorts and evenly downsamples a vector to size k. */ template QList Downsample(QList vals, long k) { // Use 'long' instead of 'int' so multiplication doesn't overflow qSort(vals); long size = (long)vals.size(); if (size <= k) return vals; QList newVals; newVals.reserve(k); for (long i=0; i