]>
git.proxmox.com Git - rustc.git/blob - vendor/rand-0.6.1/src/seq/index.rs
1 // Copyright 2018 Developers of the Rand project.
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
11 #[cfg(feature="alloc")] use core::slice;
13 #[cfg(feature="std")] use std::vec;
14 #[cfg(all(feature="alloc", not(feature="std")))] use alloc::vec::{self, Vec};
15 // BTreeMap is not as fast in tests, but better than nothing.
16 #[cfg(feature="std")] use std::collections::{HashSet};
17 #[cfg(all(feature="alloc", not(feature="std")))] use alloc::collections::BTreeSet;
19 #[cfg(feature="alloc")] use distributions::{Distribution, Uniform};
22 /// A vector of indices.
24 /// Multiple internal representations are possible.
25 #[derive(Clone, Debug)]
27 #[doc(hidden)] U32(Vec<u32>),
28 #[doc(hidden)] USize(Vec<usize>),
32 /// Returns the number of indices
33 pub fn len(&self) -> usize {
35 &IndexVec
::U32(ref v
) => v
.len(),
36 &IndexVec
::USize(ref v
) => v
.len(),
40 /// Return the value at the given `index`.
42 /// (Note: we cannot implement `std::ops::Index` because of lifetime
44 pub fn index(&self, index
: usize) -> usize {
46 &IndexVec
::U32(ref v
) => v
[index
] as usize,
47 &IndexVec
::USize(ref v
) => v
[index
],
51 /// Return result as a `Vec<usize>`. Conversion may or may not be trivial.
52 pub fn into_vec(self) -> Vec
<usize> {
54 IndexVec
::U32(v
) => v
.into_iter().map(|i
| i
as usize).collect(),
55 IndexVec
::USize(v
) => v
,
59 /// Iterate over the indices as a sequence of `usize` values
60 pub fn iter
<'a
>(&'a
self) -> IndexVecIter
<'a
> {
62 &IndexVec
::U32(ref v
) => IndexVecIter
::U32(v
.iter()),
63 &IndexVec
::USize(ref v
) => IndexVecIter
::USize(v
.iter()),
67 /// Convert into an iterator over the indices as a sequence of `usize` values
68 pub fn into_iter(self) -> IndexVecIntoIter
{
70 IndexVec
::U32(v
) => IndexVecIntoIter
::U32(v
.into_iter()),
71 IndexVec
::USize(v
) => IndexVecIntoIter
::USize(v
.into_iter()),
76 impl PartialEq
for IndexVec
{
77 fn eq(&self, other
: &IndexVec
) -> bool
{
78 use self::IndexVec
::*;
80 (&U32(ref v1
), &U32(ref v2
)) => v1
== v2
,
81 (&USize(ref v1
), &USize(ref v2
)) => v1
== v2
,
82 (&U32(ref v1
), &USize(ref v2
)) => (v1
.len() == v2
.len())
83 && (v1
.iter().zip(v2
.iter()).all(|(x
, y
)| *x
as usize == *y
)),
84 (&USize(ref v1
), &U32(ref v2
)) => (v1
.len() == v2
.len())
85 && (v1
.iter().zip(v2
.iter()).all(|(x
, y
)| *x
== *y
as usize)),
90 impl From
<Vec
<u32>> for IndexVec
{
91 fn from(v
: Vec
<u32>) -> Self {
96 impl From
<Vec
<usize>> for IndexVec
{
97 fn from(v
: Vec
<usize>) -> Self {
102 /// Return type of `IndexVec::iter`.
104 pub enum IndexVecIter
<'a
> {
105 #[doc(hidden)] U32(slice::Iter<'a, u32>),
106 #[doc(hidden)] USize(slice::Iter<'a, usize>),
109 impl<'a
> Iterator
for IndexVecIter
<'a
> {
111 fn next(&mut self) -> Option
<usize> {
112 use self::IndexVecIter
::*;
114 &mut U32(ref mut iter
) => iter
.next().map(|i
| *i
as usize),
115 &mut USize(ref mut iter
) => iter
.next().cloned(),
119 fn size_hint(&self) -> (usize, Option
<usize>) {
121 &IndexVecIter
::U32(ref v
) => v
.size_hint(),
122 &IndexVecIter
::USize(ref v
) => v
.size_hint(),
127 impl<'a
> ExactSizeIterator
for IndexVecIter
<'a
> {}
129 /// Return type of `IndexVec::into_iter`.
130 #[derive(Clone, Debug)]
131 pub enum IndexVecIntoIter
{
132 #[doc(hidden)] U32(vec::IntoIter<u32>),
133 #[doc(hidden)] USize(vec::IntoIter<usize>),
136 impl Iterator
for IndexVecIntoIter
{
139 fn next(&mut self) -> Option
<Self::Item
> {
140 use self::IndexVecIntoIter
::*;
142 &mut U32(ref mut v
) => v
.next().map(|i
| i
as usize),
143 &mut USize(ref mut v
) => v
.next(),
147 fn size_hint(&self) -> (usize, Option
<usize>) {
148 use self::IndexVecIntoIter
::*;
150 &U32(ref v
) => v
.size_hint(),
151 &USize(ref v
) => v
.size_hint(),
156 impl ExactSizeIterator
for IndexVecIntoIter {}
159 /// Randomly sample exactly `amount` distinct indices from `0..length`, and
160 /// return them in random order (fully shuffled).
162 /// This method is used internally by the slice sampling methods, but it can
163 /// sometimes be useful to have the indices themselves so this is provided as
166 /// The implementation used is not specified; we automatically select the
167 /// fastest available algorithm for the `length` and `amount` parameters
168 /// (based on detailed profiling on an Intel Haswell CPU). Roughly speaking,
169 /// complexity is `O(amount)`, except that when `amount` is small, performance
170 /// is closer to `O(amount^2)`, and when `length` is close to `amount` then
173 /// Note that performance is significantly better over `u32` indices than over
174 /// `u64` indices. Because of this we hide the underlying type behind an
175 /// abstraction, `IndexVec`.
177 /// If an allocation-free `no_std` function is required, it is suggested
178 /// to adapt the internal `sample_floyd` implementation.
180 /// Panics if `amount > length`.
181 pub fn sample
<R
>(rng
: &mut R
, length
: usize, amount
: usize) -> IndexVec
182 where R
: Rng
+ ?Sized
,
185 panic
!("`amount` of samples must be less than or equal to `length`");
187 if length
> (::core
::u32::MAX
as usize) {
188 // We never want to use inplace here, but could use floyd's alg
189 // Lazy version: always use the cache alg.
190 return sample_rejection(rng
, length
, amount
);
192 let amount
= amount
as u32;
193 let length
= length
as u32;
195 // Choice of algorithm here depends on both length and amount. See:
196 // https://github.com/rust-random/rand/pull/479
197 // We do some calculations with f32. Accuracy is not very important.
200 const C
: [[f32; 2]; 2] = [[1.6, 8.0/45.0], [10.0, 70.0/9.0]];
201 let j
= if length
< 500_000 { 0 }
else { 1 }
;
202 let amount_fp
= amount
as f32;
203 let m4
= C
[0][j
] * amount_fp
;
204 // Short-cut: when amount < 12, floyd's is always faster
205 if amount
> 11 && (length
as f32) < (C
[1][j
] + m4
) * amount_fp
{
206 sample_inplace(rng
, length
, amount
)
208 sample_floyd(rng
, length
, amount
)
211 const C
: [f32; 2] = [270.0, 330.0/9.0];
212 let j
= if length
< 500_000 { 0 }
else { 1 }
;
213 if (length
as f32) < C
[j
] * (amount
as f32) {
214 sample_inplace(rng
, length
, amount
)
216 // note: could have a specific u32 impl, but I'm lazy and
217 // generics don't have usable conversions
218 sample_rejection(rng
, length
as usize, amount
as usize)
223 /// Randomly sample exactly `amount` indices from `0..length`, using Floyd's
224 /// combination algorithm.
226 /// The output values are fully shuffled. (Overhead is under 50%.)
228 /// This implementation uses `O(amount)` memory and `O(amount^2)` time.
229 fn sample_floyd
<R
>(rng
: &mut R
, length
: u32, amount
: u32) -> IndexVec
230 where R
: Rng
+ ?Sized
,
232 // For small amount we use Floyd's fully-shuffled variant. For larger
233 // amounts this is slow due to Vec::insert performance, so we shuffle
234 // afterwards. Benchmarks show little overhead from extra logic.
235 let floyd_shuffle
= amount
< 50;
237 debug_assert
!(amount
<= length
);
238 let mut indices
= Vec
::with_capacity(amount
as usize);
239 for j
in length
- amount
.. length
{
240 let t
= rng
.gen_range(0, j
+ 1);
242 if let Some(pos
) = indices
.iter().position(|&x
| x
== t
) {
243 indices
.insert(pos
, j
);
247 if indices
.contains(&t
) {
255 // Reimplement SliceRandom::shuffle with smaller indices
256 for i
in (1..amount
).rev() {
257 // invariant: elements with index > i have been locked in place.
258 indices
.swap(i
as usize, rng
.gen_range(0, i
+ 1) as usize);
261 IndexVec
::from(indices
)
264 /// Randomly sample exactly `amount` indices from `0..length`, using an inplace
265 /// partial Fisher-Yates method.
266 /// Sample an amount of indices using an inplace partial fisher yates method.
268 /// This allocates the entire `length` of indices and randomizes only the first `amount`.
269 /// It then truncates to `amount` and returns.
271 /// This method is not appropriate for large `length` and potentially uses a lot
272 /// of memory; because of this we only implement for `u32` index (which improves
273 /// performance in all cases).
275 /// Set-up is `O(length)` time and memory and shuffling is `O(amount)` time.
276 fn sample_inplace
<R
>(rng
: &mut R
, length
: u32, amount
: u32) -> IndexVec
277 where R
: Rng
+ ?Sized
,
279 debug_assert
!(amount
<= length
);
280 let mut indices
: Vec
<u32> = Vec
::with_capacity(length
as usize);
281 indices
.extend(0..length
);
283 let j
: u32 = rng
.gen_range(i
, length
);
284 indices
.swap(i
as usize, j
as usize);
286 indices
.truncate(amount
as usize);
287 debug_assert_eq
!(indices
.len(), amount
as usize);
288 IndexVec
::from(indices
)
291 /// Randomly sample exactly `amount` indices from `0..length`, using rejection
294 /// Since `amount <<< length` there is a low chance of a random sample in
295 /// `0..length` being a duplicate. We test for duplicates and resample where
296 /// necessary. The algorithm is `O(amount)` time and memory.
297 fn sample_rejection
<R
>(rng
: &mut R
, length
: usize, amount
: usize) -> IndexVec
298 where R
: Rng
+ ?Sized
,
300 debug_assert
!(amount
< length
);
301 #[cfg(feature="std")] let mut cache = HashSet::with_capacity(amount);
302 #[cfg(not(feature="std"))] let mut cache = BTreeSet::new();
303 let distr
= Uniform
::new(0, length
);
304 let mut indices
= Vec
::with_capacity(amount
);
306 let mut pos
= distr
.sample(rng
);
307 while !cache
.insert(pos
) {
308 pos
= distr
.sample(rng
);
313 debug_assert_eq
!(indices
.len(), amount
);
314 IndexVec
::from(indices
)
322 fn test_sample_boundaries() {
323 let mut r
= ::test
::rng(404);
325 assert_eq
!(sample_inplace(&mut r
, 0, 0).len(), 0);
326 assert_eq
!(sample_inplace(&mut r
, 1, 0).len(), 0);
327 assert_eq
!(sample_inplace(&mut r
, 1, 1).into_vec(), vec
![0]);
329 assert_eq
!(sample_rejection(&mut r
, 1, 0).len(), 0);
331 assert_eq
!(sample_floyd(&mut r
, 0, 0).len(), 0);
332 assert_eq
!(sample_floyd(&mut r
, 1, 0).len(), 0);
333 assert_eq
!(sample_floyd(&mut r
, 1, 1).into_vec(), vec
![0]);
335 // These algorithms should be fast with big numbers. Test average.
336 let sum
: usize = sample_rejection(&mut r
, 1 << 25, 10)
338 assert
!(1 << 25 < sum
&& sum
< (1 << 25) * 25);
340 let sum
: usize = sample_floyd(&mut r
, 1 << 25, 10)
342 assert
!(1 << 25 < sum
&& sum
< (1 << 25) * 25);
346 fn test_sample_alg() {
347 let seed_rng
= ::test
::rng
;
349 // We can't test which algorithm is used directly, but Floyd's alg
350 // should produce different results from the others. (Also, `inplace`
351 // and `cached` currently use different sizes thus produce different results.)
353 // A small length and relatively large amount should use inplace
354 let (length
, amount
): (usize, usize) = (100, 50);
355 let v1
= sample(&mut seed_rng(420), length
, amount
);
356 let v2
= sample_inplace(&mut seed_rng(420), length
as u32, amount
as u32);
357 assert
!(v1
.iter().all(|e
| e
< length
));
360 // Test Floyd's alg does produce different results
361 let v3
= sample_floyd(&mut seed_rng(420), length
as u32, amount
as u32);
364 // A large length and small amount should use Floyd
365 let (length
, amount
): (usize, usize) = (1<<20, 50);
366 let v1
= sample(&mut seed_rng(421), length
, amount
);
367 let v2
= sample_floyd(&mut seed_rng(421), length
as u32, amount
as u32);
368 assert
!(v1
.iter().all(|e
| e
< length
));
371 // A large length and larger amount should use cache
372 let (length
, amount
): (usize, usize) = (1<<20, 600);
373 let v1
= sample(&mut seed_rng(422), length
, amount
);
374 let v2
= sample_rejection(&mut seed_rng(422), length
, amount
);
375 assert
!(v1
.iter().all(|e
| e
< length
));