]> git.proxmox.com Git - rustc.git/blame - vendor/rayon/src/slice/mergesort.rs
New upstream version 1.54.0+dfsg1
[rustc.git] / vendor / rayon / src / slice / mergesort.rs
CommitLineData
2c00a5a8
XL
1//! Parallel merge sort.
2//!
3//! This implementation is copied verbatim from `std::slice::sort` and then parallelized.
4//! The only difference from the original is that the sequential `mergesort` returns
5//! `MergesortResult` and leaves descending arrays intact.
6
f035d41b
XL
7use crate::iter::*;
8use crate::slice::ParallelSliceMut;
2c00a5a8 9use std::mem;
416331ca 10use std::mem::size_of;
2c00a5a8
XL
11use std::ptr;
12use std::slice;
13
14unsafe fn get_and_increment<T>(ptr: &mut *mut T) -> *mut T {
15 let old = *ptr;
16 *ptr = ptr.offset(1);
17 old
18}
19
20unsafe fn decrement_and_get<T>(ptr: &mut *mut T) -> *mut T {
21 *ptr = ptr.offset(-1);
22 *ptr
23}
24
25/// When dropped, copies from `src` into `dest` a sequence of length `len`.
26struct CopyOnDrop<T> {
27 src: *mut T,
28 dest: *mut T,
29 len: usize,
30}
31
32impl<T> Drop for CopyOnDrop<T> {
33 fn drop(&mut self) {
34 unsafe {
35 ptr::copy_nonoverlapping(self.src, self.dest, self.len);
36 }
37 }
38}
39
40/// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted.
41///
42/// This is the integral subroutine of insertion sort.
43fn insert_head<T, F>(v: &mut [T], is_less: &F)
44where
45 F: Fn(&T, &T) -> bool,
46{
47 if v.len() >= 2 && is_less(&v[1], &v[0]) {
48 unsafe {
49 // There are three ways to implement insertion here:
50 //
51 // 1. Swap adjacent elements until the first one gets to its final destination.
52 // However, this way we copy data around more than is necessary. If elements are big
53 // structures (costly to copy), this method will be slow.
54 //
55 // 2. Iterate until the right place for the first element is found. Then shift the
56 // elements succeeding it to make room for it and finally place it into the
57 // remaining hole. This is a good method.
58 //
59 // 3. Copy the first element into a temporary variable. Iterate until the right place
60 // for it is found. As we go along, copy every traversed element into the slot
61 // preceding it. Finally, copy data from the temporary variable into the remaining
62 // hole. This method is very good. Benchmarks demonstrated slightly better
63 // performance than with the 2nd method.
64 //
65 // All methods were benchmarked, and the 3rd showed best results. So we chose that one.
416331ca
XL
66 let mut tmp = NoDrop {
67 value: Some(ptr::read(&v[0])),
68 };
2c00a5a8
XL
69
70 // Intermediate state of the insertion process is always tracked by `hole`, which
71 // serves two purposes:
72 // 1. Protects integrity of `v` from panics in `is_less`.
73 // 2. Fills the remaining hole in `v` in the end.
74 //
75 // Panic safety:
76 //
77 // If `is_less` panics at any point during the process, `hole` will get dropped and
78 // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
79 // initially held exactly once.
80 let mut hole = InsertionHole {
81 src: tmp.value.as_mut().unwrap(),
82 dest: &mut v[1],
83 };
84 ptr::copy_nonoverlapping(&v[1], &mut v[0], 1);
85
86 for i in 2..v.len() {
87 if !is_less(&v[i], tmp.value.as_ref().unwrap()) {
88 break;
89 }
90 ptr::copy_nonoverlapping(&v[i], &mut v[i - 1], 1);
91 hole.dest = &mut v[i];
92 }
93 // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
94 }
95 }
96
97 // Holds a value, but never drops it.
98 struct NoDrop<T> {
99 value: Option<T>,
100 }
101
102 impl<T> Drop for NoDrop<T> {
103 fn drop(&mut self) {
104 mem::forget(self.value.take());
105 }
106 }
107
108 // When dropped, copies from `src` into `dest`.
109 struct InsertionHole<T> {
110 src: *mut T,
111 dest: *mut T,
112 }
113
114 impl<T> Drop for InsertionHole<T> {
115 fn drop(&mut self) {
116 unsafe {
117 ptr::copy_nonoverlapping(self.src, self.dest, 1);
118 }
119 }
120 }
121}
122
123/// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and
124/// stores the result into `v[..]`.
125///
126/// # Safety
127///
128/// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough
129/// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type.
130unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, is_less: &F)
131where
132 F: Fn(&T, &T) -> bool,
133{
134 let len = v.len();
135 let v = v.as_mut_ptr();
416331ca
XL
136 let v_mid = v.add(mid);
137 let v_end = v.add(len);
2c00a5a8
XL
138
139 // The merge process first copies the shorter run into `buf`. Then it traces the newly copied
140 // run and the longer run forwards (or backwards), comparing their next unconsumed elements and
141 // copying the lesser (or greater) one into `v`.
142 //
143 // As soon as the shorter run is fully consumed, the process is done. If the longer run gets
144 // consumed first, then we must copy whatever is left of the shorter run into the remaining
145 // hole in `v`.
146 //
147 // Intermediate state of the process is always tracked by `hole`, which serves two purposes:
148 // 1. Protects integrity of `v` from panics in `is_less`.
149 // 2. Fills the remaining hole in `v` if the longer run gets consumed first.
150 //
151 // Panic safety:
152 //
153 // If `is_less` panics at any point during the process, `hole` will get dropped and fill the
154 // hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every
155 // object it initially held exactly once.
156 let mut hole;
157
158 if mid <= len - mid {
159 // The left run is shorter.
160 ptr::copy_nonoverlapping(v, buf, mid);
161 hole = MergeHole {
162 start: buf,
416331ca 163 end: buf.add(mid),
2c00a5a8
XL
164 dest: v,
165 };
166
167 // Initially, these pointers point to the beginnings of their arrays.
168 let left = &mut hole.start;
169 let mut right = v_mid;
170 let out = &mut hole.dest;
171
172 while *left < hole.end && right < v_end {
173 // Consume the lesser side.
174 // If equal, prefer the left run to maintain stability.
175 let to_copy = if is_less(&*right, &**left) {
176 get_and_increment(&mut right)
177 } else {
178 get_and_increment(left)
179 };
180 ptr::copy_nonoverlapping(to_copy, get_and_increment(out), 1);
181 }
182 } else {
183 // The right run is shorter.
184 ptr::copy_nonoverlapping(v_mid, buf, len - mid);
185 hole = MergeHole {
186 start: buf,
416331ca 187 end: buf.add(len - mid),
2c00a5a8
XL
188 dest: v_mid,
189 };
190
191 // Initially, these pointers point past the ends of their arrays.
192 let left = &mut hole.dest;
193 let right = &mut hole.end;
194 let mut out = v_end;
195
196 while v < *left && buf < *right {
197 // Consume the greater side.
198 // If equal, prefer the right run to maintain stability.
199 let to_copy = if is_less(&*right.offset(-1), &*left.offset(-1)) {
200 decrement_and_get(left)
201 } else {
202 decrement_and_get(right)
203 };
204 ptr::copy_nonoverlapping(to_copy, decrement_and_get(&mut out), 1);
205 }
206 }
207 // Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of
208 // it will now be copied into the hole in `v`.
209
210 // When dropped, copies the range `start..end` into `dest..`.
211 struct MergeHole<T> {
212 start: *mut T,
213 end: *mut T,
214 dest: *mut T,
215 }
216
217 impl<T> Drop for MergeHole<T> {
218 fn drop(&mut self) {
219 // `T` is not a zero-sized type, so it's okay to divide by its size.
416331ca 220 let len = (self.end as usize - self.start as usize) / size_of::<T>();
2c00a5a8
XL
221 unsafe {
222 ptr::copy_nonoverlapping(self.start, self.dest, len);
223 }
224 }
225 }
226}
227
228/// The result of merge sort.
229#[must_use]
230#[derive(Clone, Copy, PartialEq, Eq)]
231enum MergesortResult {
232 /// The slice has already been sorted.
233 NonDescending,
234 /// The slice has been descending and therefore it was left intact.
235 Descending,
236 /// The slice was sorted.
237 Sorted,
238}
239
240/// A sorted run that starts at index `start` and is of length `len`.
241#[derive(Clone, Copy)]
242struct Run {
243 start: usize,
244 len: usize,
245}
246
247/// Examines the stack of runs and identifies the next pair of runs to merge. More specifically,
248/// if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the
249/// algorithm should continue building a new run instead, `None` is returned.
250///
251/// TimSort is infamous for its buggy implementations, as described here:
252/// http://envisage-project.eu/timsort-specification-and-verification/
253///
254/// The gist of the story is: we must enforce the invariants on the top four runs on the stack.
255/// Enforcing them on just top three is not sufficient to ensure that the invariants will still
256/// hold for *all* runs in the stack.
257///
258/// This function correctly checks invariants for the top four runs. Additionally, if the top
259/// run starts at index 0, it will always demand a merge operation until the stack is fully
260/// collapsed, in order to complete the sort.
261#[inline]
262fn collapse(runs: &[Run]) -> Option<usize> {
263 let n = runs.len();
264
416331ca
XL
265 if n >= 2
266 && (runs[n - 1].start == 0
267 || runs[n - 2].len <= runs[n - 1].len
268 || (n >= 3 && runs[n - 3].len <= runs[n - 2].len + runs[n - 1].len)
269 || (n >= 4 && runs[n - 4].len <= runs[n - 3].len + runs[n - 2].len))
2c00a5a8
XL
270 {
271 if n >= 3 && runs[n - 3].len < runs[n - 1].len {
272 Some(n - 3)
273 } else {
274 Some(n - 2)
275 }
276 } else {
277 None
278 }
279}
280
281/// Sorts a slice using merge sort, unless it is already in descending order.
282///
283/// This function doesn't modify the slice if it is already non-descending or descending.
284/// Otherwise, it sorts the slice into non-descending order.
285///
286/// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail
17df50a5 287/// [here](https://svn.python.org/projects/python/trunk/Objects/listsort.txt).
2c00a5a8
XL
288///
289/// The algorithm identifies strictly descending and non-descending subsequences, which are called
290/// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed
291/// onto the stack, and then some pairs of adjacent runs are merged until these two invariants are
292/// satisfied:
293///
294/// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len`
295/// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len`
296///
297/// The invariants ensure that the total running time is `O(n log n)` worst-case.
298///
299/// # Safety
300///
301/// The argument `buf` is used as a temporary buffer and must be at least as long as `v`.
302unsafe fn mergesort<T, F>(v: &mut [T], buf: *mut T, is_less: &F) -> MergesortResult
303where
304 T: Send,
305 F: Fn(&T, &T) -> bool + Sync,
306{
307 // Very short runs are extended using insertion sort to span at least this many elements.
308 const MIN_RUN: usize = 10;
309
310 let len = v.len();
311
312 // In order to identify natural runs in `v`, we traverse it backwards. That might seem like a
313 // strange decision, but consider the fact that merges more often go in the opposite direction
314 // (forwards). According to benchmarks, merging forwards is slightly faster than merging
315 // backwards. To conclude, identifying runs by traversing backwards improves performance.
316 let mut runs = vec![];
317 let mut end = len;
318 while end > 0 {
319 // Find the next natural run, and reverse it if it's strictly descending.
320 let mut start = end - 1;
321
322 if start > 0 {
323 start -= 1;
324
325 if is_less(v.get_unchecked(start + 1), v.get_unchecked(start)) {
326 while start > 0 && is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) {
327 start -= 1;
328 }
329
330 // If this descending run covers the whole slice, return immediately.
331 if start == 0 && end == len {
332 return MergesortResult::Descending;
333 } else {
334 v[start..end].reverse();
335 }
336 } else {
337 while start > 0 && !is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) {
338 start -= 1;
339 }
340
341 // If this non-descending run covers the whole slice, return immediately.
342 if end - start == len {
343 return MergesortResult::NonDescending;
344 }
345 }
346 }
347
348 // Insert some more elements into the run if it's too short. Insertion sort is faster than
349 // merge sort on short sequences, so this significantly improves performance.
350 while start > 0 && end - start < MIN_RUN {
351 start -= 1;
352 insert_head(&mut v[start..end], &is_less);
353 }
354
355 // Push this run onto the stack.
356 runs.push(Run {
416331ca 357 start,
2c00a5a8
XL
358 len: end - start,
359 });
360 end = start;
361
362 // Merge some pairs of adjacent runs to satisfy the invariants.
363 while let Some(r) = collapse(&runs) {
364 let left = runs[r + 1];
365 let right = runs[r];
416331ca
XL
366 merge(
367 &mut v[left.start..right.start + right.len],
368 left.len,
369 buf,
370 &is_less,
371 );
2c00a5a8
XL
372
373 runs[r] = Run {
374 start: left.start,
375 len: left.len + right.len,
376 };
377 runs.remove(r + 1);
378 }
379 }
380
381 // Finally, exactly one run must remain in the stack.
382 debug_assert!(runs.len() == 1 && runs[0].start == 0 && runs[0].len == len);
383
384 // The original order of the slice was neither non-descending nor descending.
385 MergesortResult::Sorted
386}
387
388////////////////////////////////////////////////////////////////////////////
389// Everything above this line is copied from `std::slice::sort` (with very minor tweaks).
390// Everything below this line is parallelization.
391////////////////////////////////////////////////////////////////////////////
392
393/// Splits two sorted slices so that they can be merged in parallel.
394///
395/// Returns two indices `(a, b)` so that slices `left[..a]` and `right[..b]` come before
396/// `left[a..]` and `right[b..]`.
397fn split_for_merge<T, F>(left: &[T], right: &[T], is_less: &F) -> (usize, usize)
398where
399 F: Fn(&T, &T) -> bool,
400{
401 let left_len = left.len();
402 let right_len = right.len();
403
404 if left_len >= right_len {
405 let left_mid = left_len / 2;
406
407 // Find the first element in `right` that is greater than or equal to `left[left_mid]`.
408 let mut a = 0;
409 let mut b = right_len;
410 while a < b {
411 let m = a + (b - a) / 2;
412 if is_less(&right[m], &left[left_mid]) {
413 a = m + 1;
414 } else {
415 b = m;
416 }
417 }
418
419 (left_mid, a)
420 } else {
421 let right_mid = right_len / 2;
422
423 // Find the first element in `left` that is greater than `right[right_mid]`.
424 let mut a = 0;
425 let mut b = left_len;
426 while a < b {
427 let m = a + (b - a) / 2;
428 if is_less(&right[right_mid], &left[m]) {
429 b = m;
430 } else {
431 a = m + 1;
432 }
433 }
434
435 (a, right_mid)
436 }
437}
438
439/// Merges slices `left` and `right` in parallel and stores the result into `dest`.
440///
441/// # Safety
442///
443/// The `dest` pointer must have enough space to store the result.
444///
445/// Even if `is_less` panics at any point during the merge process, this function will fully copy
446/// all elements from `left` and `right` into `dest` (not necessarily in sorted order).
447unsafe fn par_merge<T, F>(left: &mut [T], right: &mut [T], dest: *mut T, is_less: &F)
448where
449 T: Send,
450 F: Fn(&T, &T) -> bool + Sync,
451{
452 // Slices whose lengths sum up to this value are merged sequentially. This number is slightly
453 // larger than `CHUNK_LENGTH`, and the reason is that merging is faster than merge sorting, so
454 // merging needs a bit coarser granularity in order to hide the overhead of Rayon's task
455 // scheduling.
456 const MAX_SEQUENTIAL: usize = 5000;
457
458 let left_len = left.len();
459 let right_len = right.len();
460
461 // Intermediate state of the merge process, which serves two purposes:
462 // 1. Protects integrity of `dest` from panics in `is_less`.
463 // 2. Copies the remaining elements as soon as one of the two sides is exhausted.
464 //
465 // Panic safety:
466 //
467 // If `is_less` panics at any point during the merge process, `s` will get dropped and copy the
468 // remaining parts of `left` and `right` into `dest`.
469 let mut s = State {
470 left_start: left.as_mut_ptr(),
416331ca 471 left_end: left.as_mut_ptr().add(left_len),
2c00a5a8 472 right_start: right.as_mut_ptr(),
416331ca
XL
473 right_end: right.as_mut_ptr().add(right_len),
474 dest,
2c00a5a8
XL
475 };
476
477 if left_len == 0 || right_len == 0 || left_len + right_len < MAX_SEQUENTIAL {
478 while s.left_start < s.left_end && s.right_start < s.right_end {
479 // Consume the lesser side.
480 // If equal, prefer the left run to maintain stability.
481 let to_copy = if is_less(&*s.right_start, &*s.left_start) {
482 get_and_increment(&mut s.right_start)
483 } else {
484 get_and_increment(&mut s.left_start)
485 };
486 ptr::copy_nonoverlapping(to_copy, get_and_increment(&mut s.dest), 1);
487 }
488 } else {
489 // Function `split_for_merge` might panic. If that happens, `s` will get destructed and copy
490 // the whole `left` and `right` into `dest`.
491 let (left_mid, right_mid) = split_for_merge(left, right, is_less);
492 let (left_l, left_r) = left.split_at_mut(left_mid);
493 let (right_l, right_r) = right.split_at_mut(right_mid);
494
495 // Prevent the destructor of `s` from running. Rayon will ensure that both calls to
496 // `par_merge` happen. If one of the two calls panics, they will ensure that elements still
497 // get copied into `dest_left` and `dest_right``.
498 mem::forget(s);
499
500 // Convert the pointers to `usize` because `*mut T` is not `Send`.
501 let dest_l = dest as usize;
416331ca 502 let dest_r = dest.add(left_l.len() + right_l.len()) as usize;
2c00a5a8
XL
503 rayon_core::join(
504 || par_merge(left_l, right_l, dest_l as *mut T, is_less),
505 || par_merge(left_r, right_r, dest_r as *mut T, is_less),
506 );
507 }
508 // Finally, `s` gets dropped if we used sequential merge, thus copying the remaining elements
509 // all at once.
510
511 // When dropped, copies arrays `left_start..left_end` and `right_start..right_end` into `dest`,
512 // in that order.
513 struct State<T> {
514 left_start: *mut T,
515 left_end: *mut T,
516 right_start: *mut T,
517 right_end: *mut T,
518 dest: *mut T,
519 }
520
521 impl<T> Drop for State<T> {
522 fn drop(&mut self) {
416331ca 523 let size = size_of::<T>();
2c00a5a8 524 let left_len = (self.left_end as usize - self.left_start as usize) / size;
416331ca 525 let right_len = (self.right_end as usize - self.right_start as usize) / size;
2c00a5a8
XL
526
527 // Copy array `left`, followed by `right`.
528 unsafe {
529 ptr::copy_nonoverlapping(self.left_start, self.dest, left_len);
416331ca 530 self.dest = self.dest.add(left_len);
2c00a5a8
XL
531 ptr::copy_nonoverlapping(self.right_start, self.dest, right_len);
532 }
533 }
534 }
535}
536
537/// Recursively merges pre-sorted chunks inside `v`.
538///
539/// Chunks of `v` are stored in `chunks` as intervals (inclusive left and exclusive right bound).
540/// Argument `buf` is an auxiliary buffer that will be used during the procedure.
541/// If `into_buf` is true, the result will be stored into `buf`, otherwise it will be in `v`.
542///
543/// # Safety
544///
545/// The number of chunks must be positive and they must be adjacent: the right bound of each chunk
546/// must equal the left bound of the following chunk.
547///
548/// The buffer must be at least as long as `v`.
549unsafe fn recurse<T, F>(
550 v: *mut T,
551 buf: *mut T,
552 chunks: &[(usize, usize)],
553 into_buf: bool,
554 is_less: &F,
416331ca 555) where
2c00a5a8
XL
556 T: Send,
557 F: Fn(&T, &T) -> bool + Sync,
558{
559 let len = chunks.len();
560 debug_assert!(len > 0);
561
562 // Base case of the algorithm.
563 // If only one chunk is remaining, there's no more work to split and merge.
564 if len == 1 {
565 if into_buf {
566 // Copy the chunk from `v` into `buf`.
567 let (start, end) = chunks[0];
416331ca
XL
568 let src = v.add(start);
569 let dest = buf.add(start);
2c00a5a8
XL
570 ptr::copy_nonoverlapping(src, dest, end - start);
571 }
572 return;
573 }
574
575 // Split the chunks into two halves.
576 let (start, _) = chunks[0];
577 let (mid, _) = chunks[len / 2];
578 let (_, end) = chunks[len - 1];
579 let (left, right) = chunks.split_at(len / 2);
580
581 // After recursive calls finish we'll have to merge chunks `(start, mid)` and `(mid, end)` from
582 // `src` into `dest`. If the current invocation has to store the result into `buf`, we'll
583 // merge chunks from `v` into `buf`, and viceversa.
584 //
585 // Recursive calls flip `into_buf` at each level of recursion. More concretely, `par_merge`
586 // merges chunks from `buf` into `v` at the first level, from `v` into `buf` at the second
587 // level etc.
588 let (src, dest) = if into_buf { (v, buf) } else { (buf, v) };
589
590 // Panic safety:
591 //
592 // If `is_less` panics at any point during the recursive calls, the destructor of `guard` will
593 // be executed, thus copying everything from `src` into `dest`. This way we ensure that all
594 // chunks are in fact copied into `dest`, even if the merge process doesn't finish.
595 let guard = CopyOnDrop {
416331ca
XL
596 src: src.add(start),
597 dest: dest.add(start),
2c00a5a8
XL
598 len: end - start,
599 };
600
601 // Convert the pointers to `usize` because `*mut T` is not `Send`.
602 let v = v as usize;
603 let buf = buf as usize;
604 rayon_core::join(
605 || recurse(v as *mut T, buf as *mut T, left, !into_buf, is_less),
606 || recurse(v as *mut T, buf as *mut T, right, !into_buf, is_less),
607 );
608
609 // Everything went all right - recursive calls didn't panic.
610 // Forget the guard in order to prevent its destructor from running.
611 mem::forget(guard);
612
613 // Merge chunks `(start, mid)` and `(mid, end)` from `src` into `dest`.
416331ca
XL
614 let src_left = slice::from_raw_parts_mut(src.add(start), mid - start);
615 let src_right = slice::from_raw_parts_mut(src.add(mid), end - mid);
616 par_merge(src_left, src_right, dest.add(start), is_less);
2c00a5a8
XL
617}
618
619/// Sorts `v` using merge sort in parallel.
620///
621/// The algorithm is stable, allocates memory, and `O(n log n)` worst-case.
622/// The allocated temporary buffer is of the same length as is `v`.
416331ca 623pub(super) fn par_mergesort<T, F>(v: &mut [T], is_less: F)
2c00a5a8
XL
624where
625 T: Send,
626 F: Fn(&T, &T) -> bool + Sync,
627{
628 // Slices of up to this length get sorted using insertion sort in order to avoid the cost of
629 // buffer allocation.
630 const MAX_INSERTION: usize = 20;
631 // The length of initial chunks. This number is as small as possible but so that the overhead
632 // of Rayon's task scheduling is still negligible.
633 const CHUNK_LENGTH: usize = 2000;
634
635 // Sorting has no meaningful behavior on zero-sized types.
636 if size_of::<T>() == 0 {
637 return;
638 }
639
640 let len = v.len();
641
642 // Short slices get sorted in-place via insertion sort to avoid allocations.
643 if len <= MAX_INSERTION {
644 if len >= 2 {
645 for i in (0..len - 1).rev() {
646 insert_head(&mut v[i..], &is_less);
647 }
648 }
649 return;
650 }
651
652 // Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it
653 // shallow copies of the contents of `v` without risking the dtors running on copies if
654 // `is_less` panics.
655 let mut buf = Vec::<T>::with_capacity(len);
656 let buf = buf.as_mut_ptr();
657
658 // If the slice is not longer than one chunk would be, do sequential merge sort and return.
659 if len <= CHUNK_LENGTH {
660 let res = unsafe { mergesort(v, buf, &is_less) };
661 if res == MergesortResult::Descending {
662 v.reverse();
663 }
664 return;
665 }
666
667 // Split the slice into chunks and merge sort them in parallel.
668 // However, descending chunks will not be sorted - they will be simply left intact.
669 let mut iter = {
670 // Convert the pointer to `usize` because `*mut T` is not `Send`.
671 let buf = buf as usize;
672
673 v.par_chunks_mut(CHUNK_LENGTH)
674 .with_max_len(1)
675 .enumerate()
676 .map(|(i, chunk)| {
677 let l = CHUNK_LENGTH * i;
678 let r = l + chunk.len();
679 unsafe {
416331ca 680 let buf = (buf as *mut T).add(l);
2c00a5a8
XL
681 (l, r, mergesort(chunk, buf, &is_less))
682 }
683 })
684 .collect::<Vec<_>>()
685 .into_iter()
686 .peekable()
687 };
688
689 // Now attempt to concatenate adjacent chunks that were left intact.
690 let mut chunks = Vec::with_capacity(iter.len());
691
692 while let Some((a, mut b, res)) = iter.next() {
693 // If this chunk was not modified by the sort procedure...
694 if res != MergesortResult::Sorted {
695 while let Some(&(x, y, r)) = iter.peek() {
696 // If the following chunk is of the same type and can be concatenated...
697 if r == res && (r == MergesortResult::Descending) == is_less(&v[x], &v[x - 1]) {
698 // Concatenate them.
699 b = y;
700 iter.next();
701 } else {
702 break;
703 }
704 }
705 }
706
707 // Descending chunks must be reversed.
708 if res == MergesortResult::Descending {
709 v[a..b].reverse();
710 }
711
712 chunks.push((a, b));
713 }
714
715 // All chunks are properly sorted.
716 // Now we just have to merge them together.
717 unsafe {
416331ca 718 recurse(v.as_mut_ptr(), buf, &chunks, false, &is_less);
2c00a5a8
XL
719 }
720}
721
722#[cfg(test)]
723mod tests {
2c00a5a8 724 use super::split_for_merge;
416331ca
XL
725 use rand::distributions::Uniform;
726 use rand::{thread_rng, Rng};
2c00a5a8
XL
727
728 #[test]
729 fn test_split_for_merge() {
730 fn check(left: &[u32], right: &[u32]) {
731 let (l, r) = split_for_merge(left, right, &|&a, &b| a < b);
416331ca
XL
732 assert!(left[..l]
733 .iter()
734 .all(|&x| right[r..].iter().all(|&y| x <= y)));
2c00a5a8
XL
735 assert!(right[..r].iter().all(|&x| left[l..].iter().all(|&y| x < y)));
736 }
737
738 check(&[1, 2, 2, 2, 2, 3], &[1, 2, 2, 2, 2, 3]);
739 check(&[1, 2, 2, 2, 2, 3], &[]);
740 check(&[], &[1, 2, 2, 2, 2, 3]);
741
17df50a5 742 let ref mut rng = thread_rng();
2c00a5a8 743
416331ca 744 for _ in 0..100 {
17df50a5
XL
745 let limit: u32 = rng.gen_range(1..21);
746 let left_len: usize = rng.gen_range(0..20);
747 let right_len: usize = rng.gen_range(0..20);
2c00a5a8 748
416331ca
XL
749 let mut left = rng
750 .sample_iter(&Uniform::new(0, limit))
2c00a5a8
XL
751 .take(left_len)
752 .collect::<Vec<_>>();
416331ca
XL
753 let mut right = rng
754 .sample_iter(&Uniform::new(0, limit))
2c00a5a8
XL
755 .take(right_len)
756 .collect::<Vec<_>>();
757
758 left.sort();
759 right.sort();
760 check(&left, &right);
761 }
762 }
763}