]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/boost/sort/spreadsort/detail/integer_sort.hpp
import quincy beta 17.1.0
[ceph.git] / ceph / src / boost / boost / sort / spreadsort / detail / integer_sort.hpp
CommitLineData
11fdf7f2
TL
1// Details for templated Spreadsort-based integer_sort.
2
3// Copyright Steven J. Ross 2001 - 2014.
4// Distributed under the Boost Software License, Version 1.0.
5// (See accompanying file LICENSE_1_0.txt or copy at
6// http://www.boost.org/LICENSE_1_0.txt)
7
8// See http://www.boost.org/libs/sort for library home page.
9
10/*
11Some improvements suggested by:
12Phil Endecott and Frank Gennari
13*/
14
15#ifndef BOOST_SORT_SPREADSORT_DETAIL_INTEGER_SORT_HPP
16#define BOOST_SORT_SPREADSORT_DETAIL_INTEGER_SORT_HPP
17#include <algorithm>
18#include <vector>
19#include <limits>
20#include <functional>
21#include <boost/static_assert.hpp>
11fdf7f2
TL
22#include <boost/utility/enable_if.hpp>
23#include <boost/sort/spreadsort/detail/constants.hpp>
24#include <boost/sort/spreadsort/detail/spreadsort_common.hpp>
25#include <boost/cstdint.hpp>
26
27namespace boost {
28namespace sort {
29namespace spreadsort {
30 namespace detail {
31 // Return true if the list is sorted. Otherwise, find the minimum and
32 // maximum using <.
33 template <class RandomAccessIter>
34 inline bool
35 is_sorted_or_find_extremes(RandomAccessIter current, RandomAccessIter last,
36 RandomAccessIter & max, RandomAccessIter & min)
37 {
38 min = max = current;
39 //This assumes we have more than 1 element based on prior checks.
40 while (!(*(current + 1) < *current)) {
41 //If everything is in sorted order, return
42 if (++current == last - 1)
43 return true;
44 }
45
46 //The maximum is the last sorted element
47 max = current;
48 //Start from the first unsorted element
49 while (++current < last) {
50 if (*max < *current)
51 max = current;
52 else if (*current < *min)
53 min = current;
54 }
55 return false;
56 }
57
58 // Return true if the list is sorted. Otherwise, find the minimum and
59 // maximum.
60 // Use a user-defined comparison operator
61 template <class RandomAccessIter, class Compare>
62 inline bool
63 is_sorted_or_find_extremes(RandomAccessIter current, RandomAccessIter last,
64 RandomAccessIter & max, RandomAccessIter & min, Compare comp)
65 {
66 min = max = current;
67 while (!comp(*(current + 1), *current)) {
68 //If everything is in sorted order, return
69 if (++current == last - 1)
70 return true;
71 }
72
73 //The maximum is the last sorted element
74 max = current;
75 while (++current < last) {
76 if (comp(*max, *current))
77 max = current;
78 else if (comp(*current, *min))
79 min = current;
80 }
81 return false;
82 }
83
84 //Gets a non-negative right bit shift to operate as a logarithmic divisor
85 template<unsigned log_mean_bin_size>
86 inline int
87 get_log_divisor(size_t count, int log_range)
88 {
89 int log_divisor;
90 //If we can finish in one iteration without exceeding either
91 //(2 to the max_finishing_splits) or n bins, do so
92 if ((log_divisor = log_range - rough_log_2_size(count)) <= 0 &&
93 log_range <= max_finishing_splits)
94 log_divisor = 0;
95 else {
96 //otherwise divide the data into an optimized number of pieces
97 log_divisor += log_mean_bin_size;
98 //Cannot exceed max_splits or cache misses slow down bin lookups
99 if ((log_range - log_divisor) > max_splits)
100 log_divisor = log_range - max_splits;
101 }
102 return log_divisor;
103 }
104
105 //Implementation for recursive integer sorting
106 template <class RandomAccessIter, class Div_type, class Size_type>
107 inline void
108 spreadsort_rec(RandomAccessIter first, RandomAccessIter last,
109 std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
110 , size_t *bin_sizes)
111 {
112 //This step is roughly 10% of runtime, but it helps avoid worst-case
113 //behavior and improve behavior with real data
114 //If you know the maximum and minimum ahead of time, you can pass those
115 //values in and skip this step for the first iteration
116 RandomAccessIter max, min;
117 if (is_sorted_or_find_extremes(first, last, max, min))
118 return;
119 RandomAccessIter * target_bin;
120 unsigned log_divisor = get_log_divisor<int_log_mean_bin_size>(
121 last - first, rough_log_2_size(Size_type((*max >> 0) - (*min >> 0))));
122 Div_type div_min = *min >> log_divisor;
123 Div_type div_max = *max >> log_divisor;
124 unsigned bin_count = unsigned(div_max - div_min) + 1;
125 unsigned cache_end;
126 RandomAccessIter * bins =
127 size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
128
129 //Calculating the size of each bin; this takes roughly 10% of runtime
130 for (RandomAccessIter current = first; current != last;)
131 bin_sizes[size_t((*(current++) >> log_divisor) - div_min)]++;
132 //Assign the bin positions
133 bins[0] = first;
134 for (unsigned u = 0; u < bin_count - 1; u++)
135 bins[u + 1] = bins[u] + bin_sizes[u];
136
137 RandomAccessIter nextbinstart = first;
138 //Swap into place
139 //This dominates runtime, mostly in the swap and bin lookups
140 for (unsigned u = 0; u < bin_count - 1; ++u) {
141 RandomAccessIter * local_bin = bins + u;
142 nextbinstart += bin_sizes[u];
143 //Iterating over each element in this bin
144 for (RandomAccessIter current = *local_bin; current < nextbinstart;
145 ++current) {
146 //Swapping elements in current into place until the correct
147 //element has been swapped in
148 for (target_bin = (bins + ((*current >> log_divisor) - div_min));
149 target_bin != local_bin;
150 target_bin = bins + ((*current >> log_divisor) - div_min)) {
151 //3-way swap; this is about 1% faster than a 2-way swap
152 //The main advantage is less copies are involved per item
153 //put in the correct place
154 typename std::iterator_traits<RandomAccessIter>::value_type tmp;
155 RandomAccessIter b = (*target_bin)++;
156 RandomAccessIter * b_bin = bins + ((*b >> log_divisor) - div_min);
157 if (b_bin != local_bin) {
158 RandomAccessIter c = (*b_bin)++;
159 tmp = *c;
160 *c = *b;
161 }
162 else
163 tmp = *b;
164 *b = *current;
165 *current = tmp;
166 }
167 }
168 *local_bin = nextbinstart;
169 }
170 bins[bin_count - 1] = last;
171
172 //If we've bucketsorted, the array is sorted and we should skip recursion
173 if (!log_divisor)
174 return;
175 //log_divisor is the remaining range; calculating the comparison threshold
176 size_t max_count =
177 get_min_count<int_log_mean_bin_size, int_log_min_split_count,
178 int_log_finishing_count>(log_divisor);
179
180 //Recursing
181 RandomAccessIter lastPos = first;
182 for (unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u],
183 ++u) {
184 Size_type count = bin_cache[u] - lastPos;
185 //don't sort unless there are at least two items to Compare
186 if (count < 2)
187 continue;
92f5a8d4 188 //using boost::sort::pdqsort if its worst-case is better
11fdf7f2 189 if (count < max_count)
92f5a8d4 190 boost::sort::pdqsort(lastPos, bin_cache[u]);
11fdf7f2
TL
191 else
192 spreadsort_rec<RandomAccessIter, Div_type, Size_type>(lastPos,
193 bin_cache[u],
194 bin_cache,
195 cache_end,
196 bin_sizes);
197 }
198 }
199
200 //Generic bitshift-based 3-way swapping code
201 template <class RandomAccessIter, class Div_type, class Right_shift>
202 inline void inner_swap_loop(RandomAccessIter * bins,
203 const RandomAccessIter & next_bin_start, unsigned ii, Right_shift &rshift
204 , const unsigned log_divisor, const Div_type div_min)
205 {
206 RandomAccessIter * local_bin = bins + ii;
207 for (RandomAccessIter current = *local_bin; current < next_bin_start;
208 ++current) {
209 for (RandomAccessIter * target_bin =
210 (bins + (rshift(*current, log_divisor) - div_min));
211 target_bin != local_bin;
212 target_bin = bins + (rshift(*current, log_divisor) - div_min)) {
213 typename std::iterator_traits<RandomAccessIter>::value_type tmp;
214 RandomAccessIter b = (*target_bin)++;
215 RandomAccessIter * b_bin =
216 bins + (rshift(*b, log_divisor) - div_min);
217 //Three-way swap; if the item to be swapped doesn't belong
218 //in the current bin, swap it to where it belongs
219 if (b_bin != local_bin) {
220 RandomAccessIter c = (*b_bin)++;
221 tmp = *c;
222 *c = *b;
223 }
224 //Note: we could increment current once the swap is done in this case
225 //but that seems to impair performance
226 else
227 tmp = *b;
228 *b = *current;
229 *current = tmp;
230 }
231 }
232 *local_bin = next_bin_start;
233 }
234
235 //Standard swapping wrapper for ascending values
236 template <class RandomAccessIter, class Div_type, class Right_shift>
237 inline void swap_loop(RandomAccessIter * bins,
238 RandomAccessIter & next_bin_start, unsigned ii, Right_shift &rshift
239 , const size_t *bin_sizes
240 , const unsigned log_divisor, const Div_type div_min)
241 {
242 next_bin_start += bin_sizes[ii];
243 inner_swap_loop<RandomAccessIter, Div_type, Right_shift>(bins,
244 next_bin_start, ii, rshift, log_divisor, div_min);
245 }
246
247 //Functor implementation for recursive sorting
248 template <class RandomAccessIter, class Div_type, class Right_shift,
249 class Compare, class Size_type, unsigned log_mean_bin_size,
250 unsigned log_min_split_count, unsigned log_finishing_count>
251 inline void
252 spreadsort_rec(RandomAccessIter first, RandomAccessIter last,
253 std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
254 , size_t *bin_sizes, Right_shift rshift, Compare comp)
255 {
256 RandomAccessIter max, min;
257 if (is_sorted_or_find_extremes(first, last, max, min, comp))
258 return;
259 unsigned log_divisor = get_log_divisor<log_mean_bin_size>(last - first,
260 rough_log_2_size(Size_type(rshift(*max, 0) - rshift(*min, 0))));
261 Div_type div_min = rshift(*min, log_divisor);
262 Div_type div_max = rshift(*max, log_divisor);
263 unsigned bin_count = unsigned(div_max - div_min) + 1;
264 unsigned cache_end;
265 RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
266 cache_end, bin_count);
267
268 //Calculating the size of each bin
269 for (RandomAccessIter current = first; current != last;)
270 bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
271 bins[0] = first;
272 for (unsigned u = 0; u < bin_count - 1; u++)
273 bins[u + 1] = bins[u] + bin_sizes[u];
274
275 //Swap into place
276 RandomAccessIter next_bin_start = first;
277 for (unsigned u = 0; u < bin_count - 1; ++u)
278 swap_loop<RandomAccessIter, Div_type, Right_shift>(bins, next_bin_start,
279 u, rshift, bin_sizes, log_divisor, div_min);
280 bins[bin_count - 1] = last;
281
282 //If we've bucketsorted, the array is sorted
283 if (!log_divisor)
284 return;
285
286 //Recursing
287 size_t max_count = get_min_count<log_mean_bin_size, log_min_split_count,
288 log_finishing_count>(log_divisor);
289 RandomAccessIter lastPos = first;
290 for (unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u],
291 ++u) {
292 size_t count = bin_cache[u] - lastPos;
293 if (count < 2)
294 continue;
295 if (count < max_count)
92f5a8d4 296 boost::sort::pdqsort(lastPos, bin_cache[u], comp);
11fdf7f2
TL
297 else
298 spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
299 Size_type, log_mean_bin_size, log_min_split_count, log_finishing_count>
300 (lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, rshift, comp);
301 }
302 }
303
304 //Functor implementation for recursive sorting with only Shift overridden
305 template <class RandomAccessIter, class Div_type, class Right_shift,
306 class Size_type, unsigned log_mean_bin_size,
307 unsigned log_min_split_count, unsigned log_finishing_count>
308 inline void
309 spreadsort_rec(RandomAccessIter first, RandomAccessIter last,
310 std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
311 , size_t *bin_sizes, Right_shift rshift)
312 {
313 RandomAccessIter max, min;
314 if (is_sorted_or_find_extremes(first, last, max, min))
315 return;
316 unsigned log_divisor = get_log_divisor<log_mean_bin_size>(last - first,
317 rough_log_2_size(Size_type(rshift(*max, 0) - rshift(*min, 0))));
318 Div_type div_min = rshift(*min, log_divisor);
319 Div_type div_max = rshift(*max, log_divisor);
320 unsigned bin_count = unsigned(div_max - div_min) + 1;
321 unsigned cache_end;
322 RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
323 cache_end, bin_count);
324
325 //Calculating the size of each bin
326 for (RandomAccessIter current = first; current != last;)
327 bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
328 bins[0] = first;
329 for (unsigned u = 0; u < bin_count - 1; u++)
330 bins[u + 1] = bins[u] + bin_sizes[u];
331
332 //Swap into place
333 RandomAccessIter nextbinstart = first;
334 for (unsigned ii = 0; ii < bin_count - 1; ++ii)
335 swap_loop<RandomAccessIter, Div_type, Right_shift>(bins, nextbinstart,
336 ii, rshift, bin_sizes, log_divisor, div_min);
337 bins[bin_count - 1] = last;
338
339 //If we've bucketsorted, the array is sorted
340 if (!log_divisor)
341 return;
342
343 //Recursing
344 size_t max_count = get_min_count<log_mean_bin_size, log_min_split_count,
345 log_finishing_count>(log_divisor);
346 RandomAccessIter lastPos = first;
347 for (unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u],
348 ++u) {
349 size_t count = bin_cache[u] - lastPos;
350 if (count < 2)
351 continue;
352 if (count < max_count)
92f5a8d4 353 boost::sort::pdqsort(lastPos, bin_cache[u]);
11fdf7f2
TL
354 else
355 spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Size_type,
356 log_mean_bin_size, log_min_split_count, log_finishing_count>(lastPos,
357 bin_cache[u], bin_cache, cache_end, bin_sizes, rshift);
358 }
359 }
360
361 //Holds the bin vector and makes the initial recursive call
362 template <class RandomAccessIter, class Div_type>
363 //Only use spreadsort if the integer can fit in a size_t
364 inline typename boost::enable_if_c< sizeof(Div_type) <= sizeof(size_t),
365 void >::type
366 integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type)
367 {
368 size_t bin_sizes[1 << max_finishing_splits];
369 std::vector<RandomAccessIter> bin_cache;
370 spreadsort_rec<RandomAccessIter, Div_type, size_t>(first, last,
371 bin_cache, 0, bin_sizes);
372 }
373
374 //Holds the bin vector and makes the initial recursive call
375 template <class RandomAccessIter, class Div_type>
376 //Only use spreadsort if the integer can fit in a uintmax_t
377 inline typename boost::enable_if_c< (sizeof(Div_type) > sizeof(size_t))
378 && sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
379 integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type)
380 {
381 size_t bin_sizes[1 << max_finishing_splits];
382 std::vector<RandomAccessIter> bin_cache;
383 spreadsort_rec<RandomAccessIter, Div_type, boost::uintmax_t>(first,
384 last, bin_cache, 0, bin_sizes);
385 }
386
387 template <class RandomAccessIter, class Div_type>
388 inline typename boost::disable_if_c< sizeof(Div_type) <= sizeof(size_t)
389 || sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
92f5a8d4 390 //defaulting to boost::sort::pdqsort when integer_sort won't work
11fdf7f2
TL
391 integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type)
392 {
92f5a8d4 393 //Warning that we're using boost::sort::pdqsort, even though integer_sort was called
20effc67 394 BOOST_STATIC_ASSERT( sizeof(Div_type) <= sizeof(size_t) );
92f5a8d4 395 boost::sort::pdqsort(first, last);
11fdf7f2
TL
396 }
397
398
399 //Same for the full functor version
400 template <class RandomAccessIter, class Div_type, class Right_shift,
401 class Compare>
402 //Only use spreadsort if the integer can fit in a size_t
403 inline typename boost::enable_if_c< sizeof(Div_type) <= sizeof(size_t),
404 void >::type
405 integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
406 Right_shift shift, Compare comp)
407 {
408 size_t bin_sizes[1 << max_finishing_splits];
409 std::vector<RandomAccessIter> bin_cache;
410 spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
411 size_t, int_log_mean_bin_size, int_log_min_split_count,
412 int_log_finishing_count>
413 (first, last, bin_cache, 0, bin_sizes, shift, comp);
414 }
415
416 template <class RandomAccessIter, class Div_type, class Right_shift,
417 class Compare>
418 //Only use spreadsort if the integer can fit in a uintmax_t
419 inline typename boost::enable_if_c< (sizeof(Div_type) > sizeof(size_t))
420 && sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
421 integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
422 Right_shift shift, Compare comp)
423 {
424 size_t bin_sizes[1 << max_finishing_splits];
425 std::vector<RandomAccessIter> bin_cache;
426 spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
427 boost::uintmax_t, int_log_mean_bin_size,
428 int_log_min_split_count, int_log_finishing_count>
429 (first, last, bin_cache, 0, bin_sizes, shift, comp);
430 }
431
432 template <class RandomAccessIter, class Div_type, class Right_shift,
433 class Compare>
434 inline typename boost::disable_if_c< sizeof(Div_type) <= sizeof(size_t)
435 || sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
92f5a8d4 436 //defaulting to boost::sort::pdqsort when integer_sort won't work
11fdf7f2
TL
437 integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
438 Right_shift shift, Compare comp)
439 {
92f5a8d4 440 //Warning that we're using boost::sort::pdqsort, even though integer_sort was called
20effc67 441 BOOST_STATIC_ASSERT( sizeof(Div_type) <= sizeof(size_t) );
92f5a8d4 442 boost::sort::pdqsort(first, last, comp);
11fdf7f2
TL
443 }
444
445
446 //Same for the right shift version
447 template <class RandomAccessIter, class Div_type, class Right_shift>
448 //Only use spreadsort if the integer can fit in a size_t
449 inline typename boost::enable_if_c< sizeof(Div_type) <= sizeof(size_t),
450 void >::type
451 integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
452 Right_shift shift)
453 {
454 size_t bin_sizes[1 << max_finishing_splits];
455 std::vector<RandomAccessIter> bin_cache;
456 spreadsort_rec<RandomAccessIter, Div_type, Right_shift, size_t,
457 int_log_mean_bin_size, int_log_min_split_count,
458 int_log_finishing_count>
459 (first, last, bin_cache, 0, bin_sizes, shift);
460 }
461
462 template <class RandomAccessIter, class Div_type, class Right_shift>
463 //Only use spreadsort if the integer can fit in a uintmax_t
464 inline typename boost::enable_if_c< (sizeof(Div_type) > sizeof(size_t))
465 && sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
466 integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
467 Right_shift shift)
468 {
469 size_t bin_sizes[1 << max_finishing_splits];
470 std::vector<RandomAccessIter> bin_cache;
471 spreadsort_rec<RandomAccessIter, Div_type, Right_shift,
472 boost::uintmax_t, int_log_mean_bin_size,
473 int_log_min_split_count, int_log_finishing_count>
474 (first, last, bin_cache, 0, bin_sizes, shift);
475 }
476
477 template <class RandomAccessIter, class Div_type, class Right_shift>
478 inline typename boost::disable_if_c< sizeof(Div_type) <= sizeof(size_t)
479 || sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
92f5a8d4 480 //defaulting to boost::sort::pdqsort when integer_sort won't work
11fdf7f2
TL
481 integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
482 Right_shift shift)
483 {
92f5a8d4 484 //Warning that we're using boost::sort::pdqsort, even though integer_sort was called
20effc67 485 BOOST_STATIC_ASSERT( sizeof(Div_type) <= sizeof(size_t) );
92f5a8d4 486 boost::sort::pdqsort(first, last);
11fdf7f2
TL
487 }
488 }
489}
490}
491}
492
493#endif