]> git.proxmox.com Git - rustc.git/blame - library/core/src/slice/sort.rs
New upstream version 1.61.0+dfsg1
[rustc.git] / library / core / src / slice / sort.rs
CommitLineData
cc61c64b
XL
1//! Slice sorting
2//!
f035d41b 3//! This module contains a sorting algorithm based on Orson Peters' pattern-defeating quicksort,
29967ef6 4//! published at: <https://github.com/orlp/pdqsort>
cc61c64b
XL
5//!
6//! Unstable sorting is compatible with libcore because it doesn't allocate memory, unlike our
7//! stable sorting implementation.
8
48663c56
XL
9use crate::cmp;
10use crate::mem::{self, MaybeUninit};
11use crate::ptr;
cc61c64b
XL
12
13/// When dropped, copies from `src` into `dest`.
14struct CopyOnDrop<T> {
a2a8927a 15 src: *const T,
cc61c64b
XL
16 dest: *mut T,
17}
18
19impl<T> Drop for CopyOnDrop<T> {
20 fn drop(&mut self) {
f035d41b
XL
21 // SAFETY: This is a helper class.
22 // Please refer to its usage for correctness.
23 // Namely, one must be sure that `src` and `dst` does not overlap as required by `ptr::copy_nonoverlapping`.
60c5eb7d
XL
24 unsafe {
25 ptr::copy_nonoverlapping(self.src, self.dest, 1);
26 }
cc61c64b
XL
27 }
28}
29
30/// Shifts the first element to the right until it encounters a greater or equal element.
31fn shift_head<T, F>(v: &mut [T], is_less: &mut F)
60c5eb7d
XL
32where
33 F: FnMut(&T, &T) -> bool,
cc61c64b
XL
34{
35 let len = v.len();
a2a8927a
XL
36 // SAFETY: The unsafe operations below involves indexing without a bounds check (by offsetting a
37 // pointer) and copying memory (`ptr::copy_nonoverlapping`).
f035d41b
XL
38 //
39 // a. Indexing:
40 // 1. We checked the size of the array to >=2.
41 // 2. All the indexing that we will do is always between {0 <= index < len} at most.
42 //
43 // b. Memory copying
44 // 1. We are obtaining pointers to references which are guaranteed to be valid.
45 // 2. They cannot overlap because we obtain pointers to difference indices of the slice.
46 // Namely, `i` and `i-1`.
47 // 3. If the slice is properly aligned, the elements are properly aligned.
48 // It is the caller's responsibility to make sure the slice is properly aligned.
49 //
50 // See comments below for further detail.
cc61c64b
XL
51 unsafe {
52 // If the first two elements are out-of-order...
53 if len >= 2 && is_less(v.get_unchecked(1), v.get_unchecked(0)) {
54 // Read the first element into a stack-allocated variable. If a following comparison
55 // operation panics, `hole` will get dropped and automatically write the element back
56 // into the slice.
a2a8927a
XL
57 let tmp = mem::ManuallyDrop::new(ptr::read(v.get_unchecked(0)));
58 let v = v.as_mut_ptr();
59 let mut hole = CopyOnDrop { src: &*tmp, dest: v.add(1) };
60 ptr::copy_nonoverlapping(v.add(1), v.add(0), 1);
cc61c64b
XL
61
62 for i in 2..len {
a2a8927a 63 if !is_less(&*v.add(i), &*tmp) {
cc61c64b
XL
64 break;
65 }
66
67 // Move `i`-th element one place to the left, thus shifting the hole to the right.
a2a8927a
XL
68 ptr::copy_nonoverlapping(v.add(i), v.add(i - 1), 1);
69 hole.dest = v.add(i);
cc61c64b
XL
70 }
71 // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
72 }
73 }
74}
75
76/// Shifts the last element to the left until it encounters a smaller or equal element.
77fn shift_tail<T, F>(v: &mut [T], is_less: &mut F)
60c5eb7d
XL
78where
79 F: FnMut(&T, &T) -> bool,
cc61c64b
XL
80{
81 let len = v.len();
a2a8927a
XL
82 // SAFETY: The unsafe operations below involves indexing without a bound check (by offsetting a
83 // pointer) and copying memory (`ptr::copy_nonoverlapping`).
f035d41b
XL
84 //
85 // a. Indexing:
86 // 1. We checked the size of the array to >= 2.
87 // 2. All the indexing that we will do is always between `0 <= index < len-1` at most.
88 //
89 // b. Memory copying
90 // 1. We are obtaining pointers to references which are guaranteed to be valid.
91 // 2. They cannot overlap because we obtain pointers to difference indices of the slice.
92 // Namely, `i` and `i+1`.
93 // 3. If the slice is properly aligned, the elements are properly aligned.
94 // It is the caller's responsibility to make sure the slice is properly aligned.
95 //
96 // See comments below for further detail.
cc61c64b
XL
97 unsafe {
98 // If the last two elements are out-of-order...
99 if len >= 2 && is_less(v.get_unchecked(len - 1), v.get_unchecked(len - 2)) {
100 // Read the last element into a stack-allocated variable. If a following comparison
101 // operation panics, `hole` will get dropped and automatically write the element back
102 // into the slice.
a2a8927a
XL
103 let tmp = mem::ManuallyDrop::new(ptr::read(v.get_unchecked(len - 1)));
104 let v = v.as_mut_ptr();
105 let mut hole = CopyOnDrop { src: &*tmp, dest: v.add(len - 2) };
106 ptr::copy_nonoverlapping(v.add(len - 2), v.add(len - 1), 1);
cc61c64b 107
60c5eb7d 108 for i in (0..len - 2).rev() {
a2a8927a 109 if !is_less(&*tmp, &*v.add(i)) {
cc61c64b
XL
110 break;
111 }
112
113 // Move `i`-th element one place to the right, thus shifting the hole to the left.
a2a8927a
XL
114 ptr::copy_nonoverlapping(v.add(i), v.add(i + 1), 1);
115 hole.dest = v.add(i);
cc61c64b
XL
116 }
117 // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
118 }
119 }
120}
121
122/// Partially sorts a slice by shifting several out-of-order elements around.
123///
3dfed10e 124/// Returns `true` if the slice is sorted at the end. This function is *O*(*n*) worst-case.
cc61c64b
XL
125#[cold]
126fn partial_insertion_sort<T, F>(v: &mut [T], is_less: &mut F) -> bool
60c5eb7d
XL
127where
128 F: FnMut(&T, &T) -> bool,
cc61c64b
XL
129{
130 // Maximum number of adjacent out-of-order pairs that will get shifted.
131 const MAX_STEPS: usize = 5;
132 // If the slice is shorter than this, don't shift any elements.
133 const SHORTEST_SHIFTING: usize = 50;
134
135 let len = v.len();
136 let mut i = 1;
137
138 for _ in 0..MAX_STEPS {
f035d41b
XL
139 // SAFETY: We already explicitly did the bound checking with `i < len`.
140 // All our subsequent indexing is only in the range `0 <= index < len`
cc61c64b
XL
141 unsafe {
142 // Find the next pair of adjacent out-of-order elements.
143 while i < len && !is_less(v.get_unchecked(i), v.get_unchecked(i - 1)) {
144 i += 1;
145 }
146 }
147
148 // Are we done?
149 if i == len {
150 return true;
151 }
152
153 // Don't shift elements on short arrays, that has a performance cost.
154 if len < SHORTEST_SHIFTING {
155 return false;
156 }
157
158 // Swap the found pair of elements. This puts them in correct order.
159 v.swap(i - 1, i);
160
161 // Shift the smaller element to the left.
162 shift_tail(&mut v[..i], is_less);
163 // Shift the greater element to the right.
164 shift_head(&mut v[i..], is_less);
165 }
166
167 // Didn't manage to sort the slice in the limited number of steps.
168 false
169}
170
3dfed10e 171/// Sorts a slice using insertion sort, which is *O*(*n*^2) worst-case.
cc61c64b 172fn insertion_sort<T, F>(v: &mut [T], is_less: &mut F)
60c5eb7d
XL
173where
174 F: FnMut(&T, &T) -> bool,
cc61c64b
XL
175{
176 for i in 1..v.len() {
60c5eb7d 177 shift_tail(&mut v[..i + 1], is_less);
cc61c64b
XL
178 }
179}
180
3dfed10e 181/// Sorts `v` using heapsort, which guarantees *O*(*n* \* log(*n*)) worst-case.
cc61c64b 182#[cold]
1b1a35ee
XL
183#[unstable(feature = "sort_internals", reason = "internal to sort module", issue = "none")]
184pub fn heapsort<T, F>(v: &mut [T], mut is_less: F)
60c5eb7d
XL
185where
186 F: FnMut(&T, &T) -> bool,
cc61c64b
XL
187{
188 // This binary heap respects the invariant `parent >= child`.
189 let mut sift_down = |v: &mut [T], mut node| {
190 loop {
191 // Children of `node`:
192 let left = 2 * node + 1;
193 let right = 2 * node + 2;
194
195 // Choose the greater child.
60c5eb7d
XL
196 let greater =
197 if right < v.len() && is_less(&v[left], &v[right]) { right } else { left };
cc61c64b
XL
198
199 // Stop if the invariant holds at `node`.
200 if greater >= v.len() || !is_less(&v[node], &v[greater]) {
201 break;
202 }
203
204 // Swap `node` with the greater child, move one step down, and continue sifting.
205 v.swap(node, greater);
206 node = greater;
207 }
208 };
209
210 // Build the heap in linear time.
60c5eb7d 211 for i in (0..v.len() / 2).rev() {
cc61c64b
XL
212 sift_down(v, i);
213 }
214
215 // Pop maximal elements from the heap.
60c5eb7d 216 for i in (1..v.len()).rev() {
cc61c64b
XL
217 v.swap(0, i);
218 sift_down(&mut v[..i], 0);
219 }
220}
221
222/// Partitions `v` into elements smaller than `pivot`, followed by elements greater than or equal
223/// to `pivot`.
224///
225/// Returns the number of elements smaller than `pivot`.
226///
227/// Partitioning is performed block-by-block in order to minimize the cost of branching operations.
228/// This idea is presented in the [BlockQuicksort][pdf] paper.
229///
136023e0 230/// [pdf]: https://drops.dagstuhl.de/opus/volltexte/2016/6389/pdf/LIPIcs-ESA-2016-38.pdf
cc61c64b 231fn partition_in_blocks<T, F>(v: &mut [T], pivot: &T, is_less: &mut F) -> usize
60c5eb7d
XL
232where
233 F: FnMut(&T, &T) -> bool,
cc61c64b
XL
234{
235 // Number of elements in a typical block.
236 const BLOCK: usize = 128;
237
238 // The partitioning algorithm repeats the following steps until completion:
239 //
240 // 1. Trace a block from the left side to identify elements greater than or equal to the pivot.
241 // 2. Trace a block from the right side to identify elements smaller than the pivot.
242 // 3. Exchange the identified elements between the left and right side.
243 //
244 // We keep the following variables for a block of elements:
245 //
246 // 1. `block` - Number of elements in the block.
247 // 2. `start` - Start pointer into the `offsets` array.
248 // 3. `end` - End pointer into the `offsets` array.
249 // 4. `offsets - Indices of out-of-order elements within the block.
250
b7449926 251 // The current block on the left side (from `l` to `l.add(block_l)`).
cc61c64b
XL
252 let mut l = v.as_mut_ptr();
253 let mut block_l = BLOCK;
254 let mut start_l = ptr::null_mut();
255 let mut end_l = ptr::null_mut();
416331ca 256 let mut offsets_l = [MaybeUninit::<u8>::uninit(); BLOCK];
cc61c64b 257
b7449926 258 // The current block on the right side (from `r.sub(block_r)` to `r`).
f035d41b 259 // SAFETY: The documentation for .add() specifically mention that `vec.as_ptr().add(vec.len())` is always safe`
b7449926 260 let mut r = unsafe { l.add(v.len()) };
cc61c64b
XL
261 let mut block_r = BLOCK;
262 let mut start_r = ptr::null_mut();
263 let mut end_r = ptr::null_mut();
416331ca 264 let mut offsets_r = [MaybeUninit::<u8>::uninit(); BLOCK];
cc61c64b
XL
265
266 // FIXME: When we get VLAs, try creating one array of length `min(v.len(), 2 * BLOCK)` rather
267 // than two fixed-size arrays of length `BLOCK`. VLAs might be more cache-efficient.
268
269 // Returns the number of elements between pointers `l` (inclusive) and `r` (exclusive).
270 fn width<T>(l: *mut T, r: *mut T) -> usize {
271 assert!(mem::size_of::<T>() > 0);
5e7ed085
FG
272 // FIXME: this should *likely* use `offset_from`, but more
273 // investigation is needed (including running tests in miri).
274 (r.addr() - l.addr()) / mem::size_of::<T>()
cc61c64b
XL
275 }
276
277 loop {
278 // We are done with partitioning block-by-block when `l` and `r` get very close. Then we do
279 // some patch-up work in order to partition the remaining elements in between.
280 let is_done = width(l, r) <= 2 * BLOCK;
281
282 if is_done {
283 // Number of remaining elements (still not compared to the pivot).
284 let mut rem = width(l, r);
285 if start_l < end_l || start_r < end_r {
286 rem -= BLOCK;
287 }
288
289 // Adjust block sizes so that the left and right block don't overlap, but get perfectly
290 // aligned to cover the whole remaining gap.
291 if start_l < end_l {
292 block_r = rem;
293 } else if start_r < end_r {
294 block_l = rem;
295 } else {
c295e0f8
XL
296 // There were the same number of elements to switch on both blocks during the last
297 // iteration, so there are no remaining elements on either block. Cover the remaining
298 // items with roughly equally-sized blocks.
cc61c64b
XL
299 block_l = rem / 2;
300 block_r = rem - block_l;
301 }
302 debug_assert!(block_l <= BLOCK && block_r <= BLOCK);
303 debug_assert!(width(l, r) == block_l + block_r);
304 }
305
306 if start_l == end_l {
307 // Trace `block_l` elements from the left side.
1b1a35ee 308 start_l = MaybeUninit::slice_as_mut_ptr(&mut offsets_l);
a2a8927a 309 end_l = start_l;
cc61c64b
XL
310 let mut elem = l;
311
312 for i in 0..block_l {
f035d41b
XL
313 // SAFETY: The unsafety operations below involve the usage of the `offset`.
314 // According to the conditions required by the function, we satisfy them because:
315 // 1. `offsets_l` is stack-allocated, and thus considered separate allocated object.
316 // 2. The function `is_less` returns a `bool`.
317 // Casting a `bool` will never overflow `isize`.
318 // 3. We have guaranteed that `block_l` will be `<= BLOCK`.
319 // Plus, `end_l` was initially set to the begin pointer of `offsets_` which was declared on the stack.
320 // Thus, we know that even in the worst case (all invocations of `is_less` returns false) we will only be at most 1 byte pass the end.
321 // Another unsafety operation here is dereferencing `elem`.
322 // However, `elem` was initially the begin pointer to the slice which is always valid.
cc61c64b
XL
323 unsafe {
324 // Branchless comparison.
325 *end_l = i as u8;
326 end_l = end_l.offset(!is_less(&*elem, pivot) as isize);
327 elem = elem.offset(1);
328 }
329 }
330 }
331
332 if start_r == end_r {
333 // Trace `block_r` elements from the right side.
1b1a35ee 334 start_r = MaybeUninit::slice_as_mut_ptr(&mut offsets_r);
a2a8927a 335 end_r = start_r;
cc61c64b
XL
336 let mut elem = r;
337
338 for i in 0..block_r {
f035d41b
XL
339 // SAFETY: The unsafety operations below involve the usage of the `offset`.
340 // According to the conditions required by the function, we satisfy them because:
341 // 1. `offsets_r` is stack-allocated, and thus considered separate allocated object.
342 // 2. The function `is_less` returns a `bool`.
343 // Casting a `bool` will never overflow `isize`.
344 // 3. We have guaranteed that `block_r` will be `<= BLOCK`.
345 // Plus, `end_r` was initially set to the begin pointer of `offsets_` which was declared on the stack.
346 // Thus, we know that even in the worst case (all invocations of `is_less` returns true) we will only be at most 1 byte pass the end.
347 // Another unsafety operation here is dereferencing `elem`.
348 // However, `elem` was initially `1 * sizeof(T)` past the end and we decrement it by `1 * sizeof(T)` before accessing it.
349 // Plus, `block_r` was asserted to be less than `BLOCK` and `elem` will therefore at most be pointing to the beginning of the slice.
cc61c64b
XL
350 unsafe {
351 // Branchless comparison.
352 elem = elem.offset(-1);
353 *end_r = i as u8;
354 end_r = end_r.offset(is_less(&*elem, pivot) as isize);
355 }
356 }
357 }
358
359 // Number of out-of-order elements to swap between the left and right side.
360 let count = cmp::min(width(start_l, end_l), width(start_r, end_r));
361
362 if count > 0 {
60c5eb7d
XL
363 macro_rules! left {
364 () => {
365 l.offset(*start_l as isize)
366 };
367 }
368 macro_rules! right {
369 () => {
370 r.offset(-(*start_r as isize) - 1)
371 };
372 }
cc61c64b
XL
373
374 // Instead of swapping one pair at the time, it is more efficient to perform a cyclic
375 // permutation. This is not strictly equivalent to swapping, but produces a similar
376 // result using fewer memory operations.
94222f64
XL
377
378 // SAFETY: The use of `ptr::read` is valid because there is at least one element in
379 // both `offsets_l` and `offsets_r`, so `left!` is a valid pointer to read from.
380 //
381 // The uses of `left!` involve calls to `offset` on `l`, which points to the
382 // beginning of `v`. All the offsets pointed-to by `start_l` are at most `block_l`, so
383 // these `offset` calls are safe as all reads are within the block. The same argument
384 // applies for the uses of `right!`.
385 //
386 // The calls to `start_l.offset` are valid because there are at most `count-1` of them,
387 // plus the final one at the end of the unsafe block, where `count` is the minimum number
388 // of collected offsets in `offsets_l` and `offsets_r`, so there is no risk of there not
389 // being enough elements. The same reasoning applies to the calls to `start_r.offset`.
390 //
391 // The calls to `copy_nonoverlapping` are safe because `left!` and `right!` are guaranteed
392 // not to overlap, and are valid because of the reasoning above.
cc61c64b
XL
393 unsafe {
394 let tmp = ptr::read(left!());
395 ptr::copy_nonoverlapping(right!(), left!(), 1);
396
397 for _ in 1..count {
398 start_l = start_l.offset(1);
399 ptr::copy_nonoverlapping(left!(), right!(), 1);
400 start_r = start_r.offset(1);
401 ptr::copy_nonoverlapping(right!(), left!(), 1);
402 }
403
404 ptr::copy_nonoverlapping(&tmp, right!(), 1);
405 mem::forget(tmp);
406 start_l = start_l.offset(1);
407 start_r = start_r.offset(1);
408 }
409 }
410
411 if start_l == end_l {
412 // All out-of-order elements in the left block were moved. Move to the next block.
94222f64
XL
413
414 // block-width-guarantee
415 // SAFETY: if `!is_done` then the slice width is guaranteed to be at least `2*BLOCK` wide. There
416 // are at most `BLOCK` elements in `offsets_l` because of its size, so the `offset` operation is
417 // safe. Otherwise, the debug assertions in the `is_done` case guarantee that
418 // `width(l, r) == block_l + block_r`, namely, that the block sizes have been adjusted to account
419 // for the smaller number of remaining elements.
cc61c64b
XL
420 l = unsafe { l.offset(block_l as isize) };
421 }
422
423 if start_r == end_r {
424 // All out-of-order elements in the right block were moved. Move to the previous block.
94222f64
XL
425
426 // SAFETY: Same argument as [block-width-guarantee]. Either this is a full block `2*BLOCK`-wide,
427 // or `block_r` has been adjusted for the last handful of elements.
cc61c64b
XL
428 r = unsafe { r.offset(-(block_r as isize)) };
429 }
430
431 if is_done {
432 break;
433 }
434 }
435
436 // All that remains now is at most one block (either the left or the right) with out-of-order
437 // elements that need to be moved. Such remaining elements can be simply shifted to the end
438 // within their block.
439
440 if start_l < end_l {
441 // The left block remains.
041b39d2 442 // Move its remaining out-of-order elements to the far right.
cc61c64b
XL
443 debug_assert_eq!(width(l, r), block_l);
444 while start_l < end_l {
c295e0f8
XL
445 // remaining-elements-safety
446 // SAFETY: while the loop condition holds there are still elements in `offsets_l`, so it
447 // is safe to point `end_l` to the previous element.
448 //
449 // The `ptr::swap` is safe if both its arguments are valid for reads and writes:
450 // - Per the debug assert above, the distance between `l` and `r` is `block_l`
451 // elements, so there can be at most `block_l` remaining offsets between `start_l`
452 // and `end_l`. This means `r` will be moved at most `block_l` steps back, which
453 // makes the `r.offset` calls valid (at that point `l == r`).
454 // - `offsets_l` contains valid offsets into `v` collected during the partitioning of
455 // the last block, so the `l.offset` calls are valid.
cc61c64b
XL
456 unsafe {
457 end_l = end_l.offset(-1);
458 ptr::swap(l.offset(*end_l as isize), r.offset(-1));
459 r = r.offset(-1);
460 }
461 }
462 width(v.as_mut_ptr(), r)
463 } else if start_r < end_r {
464 // The right block remains.
041b39d2 465 // Move its remaining out-of-order elements to the far left.
cc61c64b
XL
466 debug_assert_eq!(width(l, r), block_r);
467 while start_r < end_r {
c295e0f8 468 // SAFETY: See the reasoning in [remaining-elements-safety].
cc61c64b
XL
469 unsafe {
470 end_r = end_r.offset(-1);
471 ptr::swap(l, r.offset(-(*end_r as isize) - 1));
472 l = l.offset(1);
473 }
474 }
475 width(v.as_mut_ptr(), l)
476 } else {
477 // Nothing else to do, we're done.
478 width(v.as_mut_ptr(), l)
479 }
480}
481
482/// Partitions `v` into elements smaller than `v[pivot]`, followed by elements greater than or
483/// equal to `v[pivot]`.
484///
485/// Returns a tuple of:
486///
487/// 1. Number of elements smaller than `v[pivot]`.
488/// 2. True if `v` was already partitioned.
489fn partition<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> (usize, bool)
60c5eb7d
XL
490where
491 F: FnMut(&T, &T) -> bool,
cc61c64b
XL
492{
493 let (mid, was_partitioned) = {
494 // Place the pivot at the beginning of slice.
495 v.swap(0, pivot);
496 let (pivot, v) = v.split_at_mut(1);
497 let pivot = &mut pivot[0];
498
499 // Read the pivot into a stack-allocated variable for efficiency. If a following comparison
500 // operation panics, the pivot will be automatically written back into the slice.
c295e0f8
XL
501
502 // SAFETY: `pivot` is a reference to the first element of `v`, so `ptr::read` is safe.
a2a8927a
XL
503 let tmp = mem::ManuallyDrop::new(unsafe { ptr::read(pivot) });
504 let _pivot_guard = CopyOnDrop { src: &*tmp, dest: pivot };
cc61c64b
XL
505 let pivot = &*tmp;
506
507 // Find the first pair of out-of-order elements.
508 let mut l = 0;
509 let mut r = v.len();
f035d41b
XL
510
511 // SAFETY: The unsafety below involves indexing an array.
512 // For the first one: We already do the bounds checking here with `l < r`.
513 // For the second one: We initially have `l == 0` and `r == v.len()` and we checked that `l < r` at every indexing operation.
514 // From here we know that `r` must be at least `r == l` which was shown to be valid from the first one.
cc61c64b 515 unsafe {
f035d41b 516 // Find the first element greater than or equal to the pivot.
cc61c64b
XL
517 while l < r && is_less(v.get_unchecked(l), pivot) {
518 l += 1;
519 }
520
521 // Find the last element smaller that the pivot.
522 while l < r && !is_less(v.get_unchecked(r - 1), pivot) {
523 r -= 1;
524 }
525 }
526
527 (l + partition_in_blocks(&mut v[l..r], pivot, is_less), l >= r)
528
529 // `_pivot_guard` goes out of scope and writes the pivot (which is a stack-allocated
530 // variable) back into the slice where it originally was. This step is critical in ensuring
531 // safety!
532 };
533
534 // Place the pivot between the two partitions.
535 v.swap(0, mid);
536
537 (mid, was_partitioned)
538}
539
540/// Partitions `v` into elements equal to `v[pivot]` followed by elements greater than `v[pivot]`.
541///
542/// Returns the number of elements equal to the pivot. It is assumed that `v` does not contain
543/// elements smaller than the pivot.
544fn partition_equal<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> usize
60c5eb7d
XL
545where
546 F: FnMut(&T, &T) -> bool,
cc61c64b
XL
547{
548 // Place the pivot at the beginning of slice.
549 v.swap(0, pivot);
550 let (pivot, v) = v.split_at_mut(1);
551 let pivot = &mut pivot[0];
552
553 // Read the pivot into a stack-allocated variable for efficiency. If a following comparison
554 // operation panics, the pivot will be automatically written back into the slice.
f035d41b 555 // SAFETY: The pointer here is valid because it is obtained from a reference to a slice.
a2a8927a
XL
556 let tmp = mem::ManuallyDrop::new(unsafe { ptr::read(pivot) });
557 let _pivot_guard = CopyOnDrop { src: &*tmp, dest: pivot };
cc61c64b
XL
558 let pivot = &*tmp;
559
560 // Now partition the slice.
561 let mut l = 0;
562 let mut r = v.len();
563 loop {
f035d41b
XL
564 // SAFETY: The unsafety below involves indexing an array.
565 // For the first one: We already do the bounds checking here with `l < r`.
566 // For the second one: We initially have `l == 0` and `r == v.len()` and we checked that `l < r` at every indexing operation.
567 // From here we know that `r` must be at least `r == l` which was shown to be valid from the first one.
cc61c64b 568 unsafe {
f035d41b 569 // Find the first element greater than the pivot.
cc61c64b
XL
570 while l < r && !is_less(pivot, v.get_unchecked(l)) {
571 l += 1;
572 }
573
574 // Find the last element equal to the pivot.
575 while l < r && is_less(pivot, v.get_unchecked(r - 1)) {
576 r -= 1;
577 }
578
579 // Are we done?
580 if l >= r {
581 break;
582 }
583
584 // Swap the found pair of out-of-order elements.
585 r -= 1;
a2a8927a
XL
586 let ptr = v.as_mut_ptr();
587 ptr::swap(ptr.add(l), ptr.add(r));
cc61c64b
XL
588 l += 1;
589 }
590 }
591
592 // We found `l` elements equal to the pivot. Add 1 to account for the pivot itself.
593 l + 1
594
595 // `_pivot_guard` goes out of scope and writes the pivot (which is a stack-allocated variable)
596 // back into the slice where it originally was. This step is critical in ensuring safety!
597}
598
599/// Scatters some elements around in an attempt to break patterns that might cause imbalanced
600/// partitions in quicksort.
601#[cold]
602fn break_patterns<T>(v: &mut [T]) {
603 let len = v.len();
604 if len >= 8 {
605 // Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia.
606 let mut random = len as u32;
607 let mut gen_u32 = || {
608 random ^= random << 13;
609 random ^= random >> 17;
610 random ^= random << 5;
611 random
612 };
613 let mut gen_usize = || {
1b1a35ee 614 if usize::BITS <= 32 {
cc61c64b
XL
615 gen_u32() as usize
616 } else {
617 (((gen_u32() as u64) << 32) | (gen_u32() as u64)) as usize
618 }
619 };
620
621 // Take random numbers modulo this number.
622 // The number fits into `usize` because `len` is not greater than `isize::MAX`.
623 let modulus = len.next_power_of_two();
624
625 // Some pivot candidates will be in the nearby of this index. Let's randomize them.
626 let pos = len / 4 * 2;
627
628 for i in 0..3 {
629 // Generate a random number modulo `len`. However, in order to avoid costly operations
630 // we first take it modulo a power of two, and then decrease by `len` until it fits
631 // into the range `[0, len - 1]`.
632 let mut other = gen_usize() & (modulus - 1);
633
634 // `other` is guaranteed to be less than `2 * len`.
635 if other >= len {
636 other -= len;
637 }
638
639 v.swap(pos - 1 + i, other);
640 }
641 }
642}
643
644/// Chooses a pivot in `v` and returns the index and `true` if the slice is likely already sorted.
645///
646/// Elements in `v` might be reordered in the process.
647fn choose_pivot<T, F>(v: &mut [T], is_less: &mut F) -> (usize, bool)
60c5eb7d
XL
648where
649 F: FnMut(&T, &T) -> bool,
cc61c64b
XL
650{
651 // Minimum length to choose the median-of-medians method.
652 // Shorter slices use the simple median-of-three method.
653 const SHORTEST_MEDIAN_OF_MEDIANS: usize = 50;
654 // Maximum number of swaps that can be performed in this function.
655 const MAX_SWAPS: usize = 4 * 3;
656
657 let len = v.len();
658
659 // Three indices near which we are going to choose a pivot.
660 let mut a = len / 4 * 1;
661 let mut b = len / 4 * 2;
662 let mut c = len / 4 * 3;
663
664 // Counts the total number of swaps we are about to perform while sorting indices.
665 let mut swaps = 0;
666
667 if len >= 8 {
668 // Swaps indices so that `v[a] <= v[b]`.
c295e0f8
XL
669 // SAFETY: `len >= 8` so there are at least two elements in the neighborhoods of
670 // `a`, `b` and `c`. This means the three calls to `sort_adjacent` result in
671 // corresponding calls to `sort3` with valid 3-item neighborhoods around each
672 // pointer, which in turn means the calls to `sort2` are done with valid
673 // references. Thus the `v.get_unchecked` calls are safe, as is the `ptr::swap`
674 // call.
cc61c64b
XL
675 let mut sort2 = |a: &mut usize, b: &mut usize| unsafe {
676 if is_less(v.get_unchecked(*b), v.get_unchecked(*a)) {
677 ptr::swap(a, b);
678 swaps += 1;
679 }
680 };
681
682 // Swaps indices so that `v[a] <= v[b] <= v[c]`.
683 let mut sort3 = |a: &mut usize, b: &mut usize, c: &mut usize| {
684 sort2(a, b);
685 sort2(b, c);
686 sort2(a, b);
687 };
688
689 if len >= SHORTEST_MEDIAN_OF_MEDIANS {
690 // Finds the median of `v[a - 1], v[a], v[a + 1]` and stores the index into `a`.
691 let mut sort_adjacent = |a: &mut usize| {
692 let tmp = *a;
693 sort3(&mut (tmp - 1), a, &mut (tmp + 1));
694 };
695
696 // Find medians in the neighborhoods of `a`, `b`, and `c`.
697 sort_adjacent(&mut a);
698 sort_adjacent(&mut b);
699 sort_adjacent(&mut c);
700 }
701
702 // Find the median among `a`, `b`, and `c`.
703 sort3(&mut a, &mut b, &mut c);
704 }
705
706 if swaps < MAX_SWAPS {
707 (b, swaps == 0)
708 } else {
709 // The maximum number of swaps was performed. Chances are the slice is descending or mostly
710 // descending, so reversing will probably help sort it faster.
711 v.reverse();
712 (len - 1 - b, true)
713 }
714}
715
716/// Sorts `v` recursively.
717///
718/// If the slice had a predecessor in the original array, it is specified as `pred`.
719///
720/// `limit` is the number of allowed imbalanced partitions before switching to `heapsort`. If zero,
721/// this function will immediately switch to heapsort.
1b1a35ee 722fn recurse<'a, T, F>(mut v: &'a mut [T], is_less: &mut F, mut pred: Option<&'a T>, mut limit: u32)
60c5eb7d
XL
723where
724 F: FnMut(&T, &T) -> bool,
cc61c64b
XL
725{
726 // Slices of up to this length get sorted using insertion sort.
727 const MAX_INSERTION: usize = 20;
728
729 // True if the last partitioning was reasonably balanced.
730 let mut was_balanced = true;
731 // True if the last partitioning didn't shuffle elements (the slice was already partitioned).
732 let mut was_partitioned = true;
733
734 loop {
735 let len = v.len();
736
737 // Very short slices get sorted using insertion sort.
738 if len <= MAX_INSERTION {
739 insertion_sort(v, is_less);
740 return;
741 }
742
743 // If too many bad pivot choices were made, simply fall back to heapsort in order to
ba9703b0 744 // guarantee `O(n * log(n))` worst-case.
cc61c64b
XL
745 if limit == 0 {
746 heapsort(v, is_less);
747 return;
748 }
749
750 // If the last partitioning was imbalanced, try breaking patterns in the slice by shuffling
751 // some elements around. Hopefully we'll choose a better pivot this time.
752 if !was_balanced {
753 break_patterns(v);
754 limit -= 1;
755 }
756
757 // Choose a pivot and try guessing whether the slice is already sorted.
758 let (pivot, likely_sorted) = choose_pivot(v, is_less);
759
760 // If the last partitioning was decently balanced and didn't shuffle elements, and if pivot
761 // selection predicts the slice is likely already sorted...
762 if was_balanced && was_partitioned && likely_sorted {
763 // Try identifying several out-of-order elements and shifting them to correct
764 // positions. If the slice ends up being completely sorted, we're done.
765 if partial_insertion_sort(v, is_less) {
766 return;
767 }
768 }
769
770 // If the chosen pivot is equal to the predecessor, then it's the smallest element in the
771 // slice. Partition the slice into elements equal to and elements greater than the pivot.
772 // This case is usually hit when the slice contains many duplicate elements.
773 if let Some(p) = pred {
774 if !is_less(p, &v[pivot]) {
775 let mid = partition_equal(v, pivot, is_less);
776
777 // Continue sorting elements greater than the pivot.
5099ac24 778 v = &mut v[mid..];
cc61c64b
XL
779 continue;
780 }
781 }
782
783 // Partition the slice.
784 let (mid, was_p) = partition(v, pivot, is_less);
785 was_balanced = cmp::min(mid, len - mid) >= len / 8;
786 was_partitioned = was_p;
787
788 // Split the slice into `left`, `pivot`, and `right`.
5099ac24 789 let (left, right) = v.split_at_mut(mid);
cc61c64b
XL
790 let (pivot, right) = right.split_at_mut(1);
791 let pivot = &pivot[0];
792
793 // Recurse into the shorter side only in order to minimize the total number of recursive
794 // calls and consume less stack space. Then just continue with the longer side (this is
795 // akin to tail recursion).
796 if left.len() < right.len() {
797 recurse(left, is_less, pred, limit);
798 v = right;
799 pred = Some(pivot);
800 } else {
801 recurse(right, is_less, Some(pivot), limit);
802 v = left;
803 }
804 }
805}
806
3dfed10e 807/// Sorts `v` using pattern-defeating quicksort, which is *O*(*n* \* log(*n*)) worst-case.
cc61c64b 808pub fn quicksort<T, F>(v: &mut [T], mut is_less: F)
60c5eb7d
XL
809where
810 F: FnMut(&T, &T) -> bool,
cc61c64b
XL
811{
812 // Sorting has no meaningful behavior on zero-sized types.
813 if mem::size_of::<T>() == 0 {
814 return;
815 }
816
817 // Limit the number of imbalanced partitions to `floor(log2(len)) + 1`.
1b1a35ee 818 let limit = usize::BITS - v.len().leading_zeros();
cc61c64b
XL
819
820 recurse(v, &mut is_less, None, limit);
821}
532ac7d7 822
60c5eb7d
XL
823fn partition_at_index_loop<'a, T, F>(
824 mut v: &'a mut [T],
825 mut index: usize,
826 is_less: &mut F,
827 mut pred: Option<&'a T>,
828) where
829 F: FnMut(&T, &T) -> bool,
532ac7d7
XL
830{
831 loop {
832 // For slices of up to this length it's probably faster to simply sort them.
833 const MAX_INSERTION: usize = 10;
834 if v.len() <= MAX_INSERTION {
835 insertion_sort(v, is_less);
836 return;
837 }
838
839 // Choose a pivot
840 let (pivot, _) = choose_pivot(v, is_less);
841
842 // If the chosen pivot is equal to the predecessor, then it's the smallest element in the
843 // slice. Partition the slice into elements equal to and elements greater than the pivot.
844 // This case is usually hit when the slice contains many duplicate elements.
845 if let Some(p) = pred {
846 if !is_less(p, &v[pivot]) {
847 let mid = partition_equal(v, pivot, is_less);
848
849 // If we've passed our index, then we're good.
850 if mid > index {
851 return;
852 }
853
854 // Otherwise, continue sorting elements greater than the pivot.
855 v = &mut v[mid..];
856 index = index - mid;
857 pred = None;
858 continue;
859 }
860 }
861
862 let (mid, _) = partition(v, pivot, is_less);
863
864 // Split the slice into `left`, `pivot`, and `right`.
5099ac24 865 let (left, right) = v.split_at_mut(mid);
532ac7d7
XL
866 let (pivot, right) = right.split_at_mut(1);
867 let pivot = &pivot[0];
868
869 if mid < index {
870 v = right;
871 index = index - mid - 1;
872 pred = Some(pivot);
873 } else if mid > index {
874 v = left;
875 } else {
876 // If mid == index, then we're done, since partition() guaranteed that all elements
877 // after mid are greater than or equal to mid.
878 return;
879 }
880 }
881}
882
60c5eb7d
XL
883pub fn partition_at_index<T, F>(
884 v: &mut [T],
885 index: usize,
886 mut is_less: F,
887) -> (&mut [T], &mut T, &mut [T])
888where
889 F: FnMut(&T, &T) -> bool,
532ac7d7 890{
532ac7d7 891 use cmp::Ordering::Greater;
60c5eb7d 892 use cmp::Ordering::Less;
532ac7d7
XL
893
894 if index >= v.len() {
895 panic!("partition_at_index index {} greater than length of slice {}", index, v.len());
896 }
897
898 if mem::size_of::<T>() == 0 {
899 // Sorting has no meaningful behavior on zero-sized types. Do nothing.
900 } else if index == v.len() - 1 {
901 // Find max element and place it in the last position of the array. We're free to use
902 // `unwrap()` here because we know v must not be empty.
60c5eb7d
XL
903 let (max_index, _) = v
904 .iter()
905 .enumerate()
906 .max_by(|&(_, x), &(_, y)| if is_less(x, y) { Less } else { Greater })
907 .unwrap();
532ac7d7
XL
908 v.swap(max_index, index);
909 } else if index == 0 {
910 // Find min element and place it in the first position of the array. We're free to use
911 // `unwrap()` here because we know v must not be empty.
60c5eb7d
XL
912 let (min_index, _) = v
913 .iter()
914 .enumerate()
915 .min_by(|&(_, x), &(_, y)| if is_less(x, y) { Less } else { Greater })
916 .unwrap();
532ac7d7
XL
917 v.swap(min_index, index);
918 } else {
919 partition_at_index_loop(v, index, &mut is_less, None);
920 }
921
922 let (left, right) = v.split_at_mut(index);
923 let (pivot, right) = right.split_at_mut(1);
924 let pivot = &mut pivot[0];
925 (left, pivot, right)
926}