RandomSelect.hpp
12.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
/**
* \file RandomSelect.hpp
* \brief Header for RandomSelect.
*
* An implementation of the Walker algorithm for selecting from a finite set.
*
* Copyright (c) Charles Karney (2006-2011) <charles@karney.com> and licensed
* under the MIT/X11 License. For more information, see
* http://randomlib.sourceforge.net/
**********************************************************************/
#if !defined(RANDOMLIB_RANDOMSELECT_HPP)
#define RANDOMLIB_RANDOMSELECT_HPP 1
#include <vector>
#include <limits>
#include <stdexcept>
#if defined(_MSC_VER)
// Squelch warnings about constant conditional expressions
# pragma warning (push)
# pragma warning (disable: 4127)
#endif
namespace RandomLib {
/**
* \brief Random selection from a discrete set.
*
* An implementation of Walker algorithm for selecting from a finite set
* (following Knuth, TAOCP, Vol 2, Sec 3.4.1.A). This provides a rapid way
* of selecting one of several choices depending on a discrete set weights.
* Original citation is\n A. J. Walker,\n An Efficient Method for Generating
* Discrete Random Variables and General Distributions,\n ACM TOMS 3,
* 253--256 (1977).
*
* There are two changes here in the setup algorithm as given by Knuth:
*
* - The probabilities aren't sorted at the beginning of the setup; nor are
* they maintained in a sorted order. Instead they are just partitioned on
* the mean. This improves the setup time from O(\e k<sup>2</sup>) to O(\e
* k).
*
* - The internal calculations are carried out with type \e NumericType. If
* the input weights are of integer type, then choosing an integer type for
* \e NumericType yields an exact solution for the returned distribution
* (assuming that the underlying random generator is exact.)
*
* Example:
* \code
#include <RandomLib/RandomSelect.hpp>
// Weights for throwing a pair of dice
unsigned w[] = { 0, 0, 1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1 };
// Initialize selection
RandomLib::RandomSelect<unsigned> sel(w, w + 13);
RandomLib::Random r; // Initialize random numbers
std::cout << "Seed set to " << r.SeedString() << "\n";
std::cout << "Throw a pair of dice 100 times:";
for (unsigned i = 0; i < 100; ++i)
std::cout << " " << sel(r);
std::cout << "\n";
\endcode
*
* @tparam NumericType the numeric type to use (default double).
**********************************************************************/
template<typename NumericType = double> class RandomSelect {
public:
/**
* Initialize in a cleared state (equivalent to having a single
* choice).
**********************************************************************/
RandomSelect() : _k(0), _wsum(0), _wmax(0) {}
/**
* Initialize with a weight vector \e w of elements of type \e WeightType.
* Internal calculations are carried out with type \e NumericType. \e
* NumericType needs to allow Choices() * MaxWeight() to be represented.
* Sensible combinations are:
* - \e WeightType integer, \e NumericType integer with
* digits(\e NumericType) ≥ digits(\e WeightType)
* - \e WeightType integer, \e NumericType real
* - \e WeightType real, \e NumericType real with digits(\e NumericType)
* ≥ digits(\e WeightType)
*
* @tparam WeightType the type of the weights.
* @param[in] w the vector of weights.
* @exception RandomErr if any of the weights are negative or if the total
* weight is not positive.
**********************************************************************/
template<typename WeightType>
RandomSelect(const std::vector<WeightType>& w) { Init(w.begin(), w.end()); }
/**
* Initialize with a weight given by a pair of iterators [\e a, \e b).
*
* @tparam InputIterator the type of the iterator.
* @param[in] a the beginning iterator.
* @param[in] b the ending iterator.
* @exception RandomErr if any of the weights are negative or if the total
* weight is not positive.
**********************************************************************/
template<typename InputIterator>
RandomSelect(InputIterator a, InputIterator b);
/**
* Clear the state (equivalent to having a single choice).
**********************************************************************/
void Init() throw()
{ _k = 0; _wsum = 0; _wmax = 0; _Q.clear(); _Y.clear(); }
/**
* Re-initialize with a weight vector \e w. Leave state unaltered in the
* case of an error.
*
* @tparam WeightType the type of the weights.
* @param[in] w the vector of weights.
**********************************************************************/
template<typename WeightType>
void Init(const std::vector<WeightType>& w) { Init(w.begin(), w.end()); }
/**
* Re-initialize with a weight given as a pair of iterators [\e a, \e b).
* Leave state unaltered in the case of an error.
*
* @tparam InputIterator the type of the iterator.
* @param[in] a the beginning iterator.
* @param[in] b the ending iterator.
**********************************************************************/
template<typename InputIterator>
void Init(InputIterator a, InputIterator b) {
RandomSelect<NumericType> t(a, b);
_Q.reserve(t._k);
_Y.reserve(t._k);
*this = t;
}
/**
* Return an index into the weight vector with probability proportional to
* the weight.
*
* @tparam Random the type of RandomCanonical generator.
* @param[in,out] r the RandomCanonical generator.
* @return the random index into the weight vector.
**********************************************************************/
template<class Random>
unsigned operator()(Random& r) const throw() {
if (_k <= 1)
return 0; // Special cases
const unsigned K = r.template Integer<unsigned>(_k);
// redundant casts to type NumericType to prevent warning from MS Project
return (std::numeric_limits<NumericType>::is_integer ?
r.template Prob<NumericType>(NumericType(_Q[K]),
NumericType(_wsum)) :
r.template Prob<NumericType>(NumericType(_Q[K]))) ?
K : _Y[K];
}
/**
* @return the sum of the weights.
**********************************************************************/
NumericType TotalWeight() const throw() { return _wsum; }
/**
* @return the maximum weight.
**********************************************************************/
NumericType MaxWeight() const throw() { return _wmax; }
/**
* @param[in] i the index in to the weight vector.
* @return the weight for sample \e i. Weight(i) / TotalWeight() gives the
* probability of sample \e i.
**********************************************************************/
NumericType Weight(unsigned i) const throw() {
if (i >= _k)
return NumericType(0);
else if (_k == 1)
return _wsum;
const NumericType n = std::numeric_limits<NumericType>::is_integer ?
_wsum : NumericType(1);
NumericType p = _Q[i];
for (unsigned j = _k; j;)
if (_Y[--j] == i)
p += n - _Q[j];
// If NumericType is integral, then p % _k == 0.
// assert(!std::numeric_limits<NumericType>::is_integer || p % _k == 0);
return (p / NumericType(_k)) * (_wsum / n);
}
/**
* @return the number of choices, i.e., the length of the weight vector.
**********************************************************************/
unsigned Choices() const throw() { return _k; }
private:
/**
* Size of weight vector
**********************************************************************/
unsigned _k;
/**
* Vector of cutoffs
**********************************************************************/
std::vector<NumericType> _Q;
/**
* Vector of aliases
**********************************************************************/
std::vector<unsigned> _Y;
/**
* The sum of the weights
**********************************************************************/
NumericType _wsum;
/**
* The maximum weight
**********************************************************************/
NumericType _wmax;
};
template<typename NumericType> template<typename InputIterator>
RandomSelect<NumericType>::RandomSelect(InputIterator a, InputIterator b) {
typedef typename std::iterator_traits<InputIterator>::value_type
WeightType;
// Disallow WeightType = real, NumericType = integer
STATIC_ASSERT(std::numeric_limits<WeightType>::is_integer ||
!std::numeric_limits<NumericType>::is_integer,
"RandomSelect: inconsistent WeightType and NumericType");
// If WeightType and NumericType are the same type, NumericType as precise
// as WeightType
STATIC_ASSERT(std::numeric_limits<WeightType>::is_integer !=
std::numeric_limits<NumericType>::is_integer ||
std::numeric_limits<NumericType>::digits >=
std::numeric_limits<WeightType>::digits,
"RandomSelect: NumericType insufficiently precise");
_wsum = 0;
_wmax = 0;
std::vector<NumericType> p;
for (InputIterator wptr = a; wptr != b; ++wptr) {
// Test *wptr < 0 without triggering compiler warning when *wptr =
// unsigned
if (!(*wptr > 0 || *wptr == 0))
// This also catches NaNs
throw RandomErr("RandomSelect: Illegal weight");
NumericType w = NumericType(*wptr);
if (w > (std::numeric_limits<NumericType>::max)() - _wsum)
throw RandomErr("RandomSelect: Overflow");
_wsum += w;
_wmax = w > _wmax ? w : _wmax;
p.push_back(w);
}
_k = unsigned(p.size());
if (_wsum <= 0)
throw RandomErr("RandomSelect: Zero total weight");
if (_k <= 1) {
// We treat k <= 1 as a special case in operator()
_Q.clear();
_Y.clear();
return;
}
if ((std::numeric_limits<NumericType>::max)()/NumericType(_k) <
NumericType(_wmax))
throw RandomErr("RandomSelect: Overflow");
std::vector<unsigned> j(_k);
_Q.resize(_k);
_Y.resize(_k);
// Pointers to the next empty low and high slots
unsigned u = 0;
unsigned v = _k - 1;
// Scale input and store in p and setup index array j. Note _wsum =
// mean(p). We could scale out _wsum here, but the following is exact when
// w[i] are low integers.
for (unsigned i = 0; i < _k; ++i) {
p[i] *= NumericType(_k);
j[p[i] > _wsum ? v-- : u++] = i;
}
// Pointers to the next low and high slots to use. Work towards the
// middle. This simplifies the loop exit test to u == v.
u = 0;
v = _k - 1;
// For integer NumericType, store the unnormalized probability in _Q and
// select using the exact Prob(_Q[k], _wsum). For real NumericType, store
// the normalized probability and select using Prob(_Q[k]). There will be
// a round off error in performing the division; but there is also the
// potential for round off errors in performing the arithmetic on p. There
// is therefore no point in simulating the division exactly using the
// slower Prob(real, real).
const NumericType n = std::numeric_limits<NumericType>::is_integer ?
NumericType(1) : _wsum;
while (true) {
// A loop invariant here is mean(p[j[u..v]]) == _wsum
_Q[j[u]] = p[j[u]] / n;
// If all arithmetic were exact this assignment could be:
// if (p[j[u]] < _wsum) _Y[j[u]] = j[v];
// But the following is safer:
_Y[j[u]] = j[p[j[u]] < _wsum ? v : u];
if (u == v) {
// The following assertion may fail because of roundoff errors
// assert( p[j[u]] == _wsum );
break;
}
// Update p, u, and v maintaining the loop invariant
p[j[v]] = p[j[v]] - (_wsum - p[j[u]]);
if (p[j[v]] > _wsum)
++u;
else
j[u] = j[v--];
}
return;
}
} // namespace RandomLib
#if defined(_MSC_VER)
# pragma warning (pop)
#endif
#endif // RANDOMLIB_RANDOMSELECT_HPP