]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/sort/spreadsort/detail/string_sort.hpp
bump version to 18.2.4-pve3
[ceph.git] / ceph / src / boost / boost / sort / spreadsort / detail / string_sort.hpp
1 // Details for a templated general-case hybrid-radix string_sort.
2
3 // Copyright Steven J. Ross 2001 - 2014.
4 // Distributed under the Boost Software License, Version 1.0.
5 // (See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7
8 // See http://www.boost.org/libs/sort for library home page.
9
10 /*
11 Some improvements suggested by:
12 Phil Endecott and Frank Gennari
13 */
14
15 #ifndef BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_HPP
16 #define BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_HPP
17 #include <algorithm>
18 #include <vector>
19 #include <cstring>
20 #include <limits>
21 #include <functional>
22 #include <boost/static_assert.hpp>
23 #include <boost/utility/enable_if.hpp>
24 #include <boost/sort/spreadsort/detail/constants.hpp>
25 #include <boost/sort/spreadsort/detail/spreadsort_common.hpp>
26 #include <boost/cstdint.hpp>
27
28 namespace boost {
29 namespace sort {
30 namespace spreadsort {
31 namespace detail {
32 static const int max_step_size = 64;
33
34 //Offsetting on identical characters. This function works a chunk of
35 //characters at a time for cache efficiency and optimal worst-case
36 //performance.
37 template<class RandomAccessIter, class Unsigned_char_type>
38 inline void
39 update_offset(RandomAccessIter first, RandomAccessIter finish,
40 size_t &char_offset)
41 {
42 const int char_size = sizeof(Unsigned_char_type);
43 size_t nextOffset = char_offset;
44 int step_size = max_step_size / char_size;
45 while (true) {
46 RandomAccessIter curr = first;
47 do {
48 //Ignore empties, but if the nextOffset would exceed the length or
49 //not match, exit; we've found the last matching character
50 //This will reduce the step_size if the current step doesn't match.
51 if ((*curr).size() > char_offset) {
52 if((*curr).size() <= (nextOffset + step_size)) {
53 step_size = (*curr).size() - nextOffset - 1;
54 if (step_size < 1) {
55 char_offset = nextOffset;
56 return;
57 }
58 }
59 const int step_byte_size = step_size * char_size;
60 if (memcmp(curr->data() + nextOffset, first->data() + nextOffset,
61 step_byte_size) != 0) {
62 if (step_size == 1) {
63 char_offset = nextOffset;
64 return;
65 }
66 step_size = (step_size > 4) ? 4 : 1;
67 continue;
68 }
69 }
70 ++curr;
71 } while (curr != finish);
72 nextOffset += step_size;
73 }
74 }
75
76 //Offsetting on identical characters. This function works a character
77 //at a time for optimal worst-case performance.
78 template<class RandomAccessIter, class Get_char, class Get_length>
79 inline void
80 update_offset(RandomAccessIter first, RandomAccessIter finish,
81 size_t &char_offset, Get_char get_character, Get_length length)
82 {
83 size_t nextOffset = char_offset;
84 while (true) {
85 RandomAccessIter curr = first;
86 do {
87 //ignore empties, but if the nextOffset would exceed the length or
88 //not match, exit; we've found the last matching character
89 if (length(*curr) > char_offset && (length(*curr) <= (nextOffset + 1)
90 || get_character((*curr), nextOffset) != get_character((*first), nextOffset))) {
91 char_offset = nextOffset;
92 return;
93 }
94 } while (++curr != finish);
95 ++nextOffset;
96 }
97 }
98
99 //This comparison functor assumes strings are identical up to char_offset
100 template<class Data_type, class Unsigned_char_type>
101 struct offset_less_than {
102 offset_less_than(size_t char_offset) : fchar_offset(char_offset){}
103 inline bool operator()(const Data_type &x, const Data_type &y) const
104 {
105 size_t minSize = (std::min)(x.size(), y.size());
106 for (size_t u = fchar_offset; u < minSize; ++u) {
107 BOOST_STATIC_ASSERT(sizeof(x[u]) == sizeof(Unsigned_char_type));
108 if (static_cast<Unsigned_char_type>(x[u]) !=
109 static_cast<Unsigned_char_type>(y[u])) {
110 return static_cast<Unsigned_char_type>(x[u]) <
111 static_cast<Unsigned_char_type>(y[u]);
112 }
113 }
114 return x.size() < y.size();
115 }
116 size_t fchar_offset;
117 };
118
119 //Compares strings assuming they are identical up to char_offset
120 template<class Data_type, class Unsigned_char_type>
121 struct offset_greater_than {
122 offset_greater_than(size_t char_offset) : fchar_offset(char_offset){}
123 inline bool operator()(const Data_type &x, const Data_type &y) const
124 {
125 size_t minSize = (std::min)(x.size(), y.size());
126 for (size_t u = fchar_offset; u < minSize; ++u) {
127 BOOST_STATIC_ASSERT(sizeof(x[u]) == sizeof(Unsigned_char_type));
128 if (static_cast<Unsigned_char_type>(x[u]) !=
129 static_cast<Unsigned_char_type>(y[u])) {
130 return static_cast<Unsigned_char_type>(x[u]) >
131 static_cast<Unsigned_char_type>(y[u]);
132 }
133 }
134 return x.size() > y.size();
135 }
136 size_t fchar_offset;
137 };
138
139 //This comparison functor assumes strings are identical up to char_offset
140 template<class Data_type, class Get_char, class Get_length>
141 struct offset_char_less_than {
142 offset_char_less_than(size_t char_offset) : fchar_offset(char_offset){}
143 inline bool operator()(const Data_type &x, const Data_type &y) const
144 {
145 size_t minSize = (std::min)(length(x), length(y));
146 for (size_t u = fchar_offset; u < minSize; ++u) {
147 if (get_character(x, u) != get_character(y, u)) {
148 return get_character(x, u) < get_character(y, u);
149 }
150 }
151 return length(x) < length(y);
152 }
153 size_t fchar_offset;
154 Get_char get_character;
155 Get_length length;
156 };
157
158 //String sorting recursive implementation
159 template <class RandomAccessIter, class Unsigned_char_type>
160 inline void
161 string_sort_rec(RandomAccessIter first, RandomAccessIter last,
162 size_t char_offset,
163 std::vector<RandomAccessIter> &bin_cache,
164 unsigned cache_offset, size_t *bin_sizes)
165 {
166 typedef typename std::iterator_traits<RandomAccessIter>::value_type
167 Data_type;
168 //This section makes handling of long identical substrings much faster
169 //with a mild average performance impact.
170 //Iterate to the end of the empties. If all empty, return
171 while ((*first).size() <= char_offset) {
172 if (++first == last)
173 return;
174 }
175 RandomAccessIter finish = last - 1;
176 //Getting the last non-empty
177 for (;(*finish).size() <= char_offset; --finish);
178 ++finish;
179 //Offsetting on identical characters. This section works
180 //a few characters at a time for optimal worst-case performance.
181 update_offset<RandomAccessIter, Unsigned_char_type>(first, finish,
182 char_offset);
183
184 const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
185 //Equal worst-case of radix and comparison is when bin_count = n*log(n).
186 const unsigned max_size = bin_count;
187 const unsigned membin_count = bin_count + 1;
188 unsigned cache_end;
189 RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
190 cache_end, membin_count) + 1;
191
192 //Calculating the size of each bin; this takes roughly 10% of runtime
193 for (RandomAccessIter current = first; current != last; ++current) {
194 if ((*current).size() <= char_offset) {
195 bin_sizes[0]++;
196 }
197 else
198 bin_sizes[static_cast<Unsigned_char_type>((*current)[char_offset])
199 + 1]++;
200 }
201 //Assign the bin positions
202 bin_cache[cache_offset] = first;
203 for (unsigned u = 0; u < membin_count - 1; u++)
204 bin_cache[cache_offset + u + 1] =
205 bin_cache[cache_offset + u] + bin_sizes[u];
206
207 //Swap into place
208 RandomAccessIter next_bin_start = first;
209 //handling empty bins
210 RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
211 next_bin_start += bin_sizes[0];
212 RandomAccessIter * target_bin;
213 //Iterating over each element in the bin of empties
214 for (RandomAccessIter current = *local_bin; current < next_bin_start;
215 ++current) {
216 //empties belong in this bin
217 while ((*current).size() > char_offset) {
218 target_bin =
219 bins + static_cast<Unsigned_char_type>((*current)[char_offset]);
220 iter_swap(current, (*target_bin)++);
221 }
222 }
223 *local_bin = next_bin_start;
224 //iterate backwards to find the last bin with elements in it
225 //this saves iterations in multiple loops
226 unsigned last_bin = bin_count - 1;
227 for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
228 //This dominates runtime, mostly in the swap and bin lookups
229 for (unsigned u = 0; u < last_bin; ++u) {
230 local_bin = bins + u;
231 next_bin_start += bin_sizes[u + 1];
232 //Iterating over each element in this bin
233 for (RandomAccessIter current = *local_bin; current < next_bin_start;
234 ++current) {
235 //Swapping into place until the correct element has been swapped in
236 for (target_bin = bins + static_cast<Unsigned_char_type>
237 ((*current)[char_offset]); target_bin != local_bin;
238 target_bin = bins + static_cast<Unsigned_char_type>
239 ((*current)[char_offset])) iter_swap(current, (*target_bin)++);
240 }
241 *local_bin = next_bin_start;
242 }
243 bins[last_bin] = last;
244 //Recursing
245 RandomAccessIter lastPos = bin_cache[cache_offset];
246 //Skip this loop for empties
247 for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
248 lastPos = bin_cache[u], ++u) {
249 size_t count = bin_cache[u] - lastPos;
250 //don't sort unless there are at least two items to Compare
251 if (count < 2)
252 continue;
253 //using boost::sort::pdqsort if its worst-case is better
254 if (count < max_size)
255 boost::sort::pdqsort(lastPos, bin_cache[u],
256 offset_less_than<Data_type, Unsigned_char_type>(char_offset + 1));
257 else
258 string_sort_rec<RandomAccessIter, Unsigned_char_type>(lastPos,
259 bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
260 }
261 }
262
263 //Sorts strings in reverse order, with empties at the end
264 template <class RandomAccessIter, class Unsigned_char_type>
265 inline void
266 reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last,
267 size_t char_offset,
268 std::vector<RandomAccessIter> &bin_cache,
269 unsigned cache_offset,
270 size_t *bin_sizes)
271 {
272 typedef typename std::iterator_traits<RandomAccessIter>::value_type
273 Data_type;
274 //This section makes handling of long identical substrings much faster
275 //with a mild average performance impact.
276 RandomAccessIter curr = first;
277 //Iterate to the end of the empties. If all empty, return
278 while ((*curr).size() <= char_offset) {
279 if (++curr == last)
280 return;
281 }
282 //Getting the last non-empty
283 while ((*(--last)).size() <= char_offset);
284 ++last;
285 //Offsetting on identical characters. This section works
286 //a few characters at a time for optimal worst-case performance.
287 update_offset<RandomAccessIter, Unsigned_char_type>(curr, last,
288 char_offset);
289 RandomAccessIter * target_bin;
290
291 const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
292 //Equal worst-case of radix and comparison when bin_count = n*log(n).
293 const unsigned max_size = bin_count;
294 const unsigned membin_count = bin_count + 1;
295 const unsigned max_bin = bin_count - 1;
296 unsigned cache_end;
297 RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
298 cache_end, membin_count);
299 RandomAccessIter * end_bin = &(bin_cache[cache_offset + max_bin]);
300
301 //Calculating the size of each bin; this takes roughly 10% of runtime
302 for (RandomAccessIter current = first; current != last; ++current) {
303 if ((*current).size() <= char_offset) {
304 bin_sizes[bin_count]++;
305 }
306 else
307 bin_sizes[max_bin - static_cast<Unsigned_char_type>
308 ((*current)[char_offset])]++;
309 }
310 //Assign the bin positions
311 bin_cache[cache_offset] = first;
312 for (unsigned u = 0; u < membin_count - 1; u++)
313 bin_cache[cache_offset + u + 1] =
314 bin_cache[cache_offset + u] + bin_sizes[u];
315
316 //Swap into place
317 RandomAccessIter next_bin_start = last;
318 //handling empty bins
319 RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
320 RandomAccessIter lastFull = *local_bin;
321 //Iterating over each element in the bin of empties
322 for (RandomAccessIter current = *local_bin; current < next_bin_start;
323 ++current) {
324 //empties belong in this bin
325 while ((*current).size() > char_offset) {
326 target_bin =
327 end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]);
328 iter_swap(current, (*target_bin)++);
329 }
330 }
331 *local_bin = next_bin_start;
332 next_bin_start = first;
333 //iterate backwards to find the last non-empty bin
334 //this saves iterations in multiple loops
335 unsigned last_bin = max_bin;
336 for (; last_bin && !bin_sizes[last_bin]; --last_bin);
337 //This dominates runtime, mostly in the swap and bin lookups
338 for (unsigned u = 0; u < last_bin; ++u) {
339 local_bin = bins + u;
340 next_bin_start += bin_sizes[u];
341 //Iterating over each element in this bin
342 for (RandomAccessIter current = *local_bin; current < next_bin_start;
343 ++current) {
344 //Swapping into place until the correct element has been swapped in
345 for (target_bin =
346 end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]);
347 target_bin != local_bin;
348 target_bin =
349 end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]))
350 iter_swap(current, (*target_bin)++);
351 }
352 *local_bin = next_bin_start;
353 }
354 bins[last_bin] = lastFull;
355 //Recursing
356 RandomAccessIter lastPos = first;
357 //Skip this loop for empties
358 for (unsigned u = cache_offset; u <= cache_offset + last_bin;
359 lastPos = bin_cache[u], ++u) {
360 size_t count = bin_cache[u] - lastPos;
361 //don't sort unless there are at least two items to Compare
362 if (count < 2)
363 continue;
364 //using boost::sort::pdqsort if its worst-case is better
365 if (count < max_size)
366 boost::sort::pdqsort(lastPos, bin_cache[u], offset_greater_than<Data_type,
367 Unsigned_char_type>(char_offset + 1));
368 else
369 reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type>
370 (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
371 }
372 }
373
374 //String sorting recursive implementation
375 template <class RandomAccessIter, class Unsigned_char_type, class Get_char,
376 class Get_length>
377 inline void
378 string_sort_rec(RandomAccessIter first, RandomAccessIter last,
379 size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
380 unsigned cache_offset, size_t *bin_sizes,
381 Get_char get_character, Get_length length)
382 {
383 typedef typename std::iterator_traits<RandomAccessIter>::value_type
384 Data_type;
385 //This section makes handling of long identical substrings much faster
386 //with a mild average performance impact.
387 //Iterate to the end of the empties. If all empty, return
388 while (length(*first) <= char_offset) {
389 if (++first == last)
390 return;
391 }
392 RandomAccessIter finish = last - 1;
393 //Getting the last non-empty
394 for (;length(*finish) <= char_offset; --finish);
395 ++finish;
396 update_offset(first, finish, char_offset, get_character, length);
397
398 const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
399 //Equal worst-case of radix and comparison is when bin_count = n*log(n).
400 const unsigned max_size = bin_count;
401 const unsigned membin_count = bin_count + 1;
402 unsigned cache_end;
403 RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
404 cache_end, membin_count) + 1;
405
406 //Calculating the size of each bin; this takes roughly 10% of runtime
407 for (RandomAccessIter current = first; current != last; ++current) {
408 if (length(*current) <= char_offset) {
409 bin_sizes[0]++;
410 }
411 else
412 bin_sizes[get_character((*current), char_offset) + 1]++;
413 }
414 //Assign the bin positions
415 bin_cache[cache_offset] = first;
416 for (unsigned u = 0; u < membin_count - 1; u++)
417 bin_cache[cache_offset + u + 1] =
418 bin_cache[cache_offset + u] + bin_sizes[u];
419
420 //Swap into place
421 RandomAccessIter next_bin_start = first;
422 //handling empty bins
423 RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
424 next_bin_start += bin_sizes[0];
425 RandomAccessIter * target_bin;
426 //Iterating over each element in the bin of empties
427 for (RandomAccessIter current = *local_bin; current < next_bin_start;
428 ++current) {
429 //empties belong in this bin
430 while (length(*current) > char_offset) {
431 target_bin = bins + get_character((*current), char_offset);
432 iter_swap(current, (*target_bin)++);
433 }
434 }
435 *local_bin = next_bin_start;
436 //iterate backwards to find the last bin with elements in it
437 //this saves iterations in multiple loops
438 unsigned last_bin = bin_count - 1;
439 for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
440 //This dominates runtime, mostly in the swap and bin lookups
441 for (unsigned ii = 0; ii < last_bin; ++ii) {
442 local_bin = bins + ii;
443 next_bin_start += bin_sizes[ii + 1];
444 //Iterating over each element in this bin
445 for (RandomAccessIter current = *local_bin; current < next_bin_start;
446 ++current) {
447 //Swapping into place until the correct element has been swapped in
448 for (target_bin = bins + get_character((*current), char_offset);
449 target_bin != local_bin;
450 target_bin = bins + get_character((*current), char_offset))
451 iter_swap(current, (*target_bin)++);
452 }
453 *local_bin = next_bin_start;
454 }
455 bins[last_bin] = last;
456
457 //Recursing
458 RandomAccessIter lastPos = bin_cache[cache_offset];
459 //Skip this loop for empties
460 for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
461 lastPos = bin_cache[u], ++u) {
462 size_t count = bin_cache[u] - lastPos;
463 //don't sort unless there are at least two items to Compare
464 if (count < 2)
465 continue;
466 //using boost::sort::pdqsort if its worst-case is better
467 if (count < max_size)
468 boost::sort::pdqsort(lastPos, bin_cache[u], offset_char_less_than<Data_type,
469 Get_char, Get_length>(char_offset + 1));
470 else
471 string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
472 Get_length>(lastPos, bin_cache[u], char_offset + 1, bin_cache,
473 cache_end, bin_sizes, get_character, length);
474 }
475 }
476
477 //String sorting recursive implementation
478 template <class RandomAccessIter, class Unsigned_char_type, class Get_char,
479 class Get_length, class Compare>
480 inline void
481 string_sort_rec(RandomAccessIter first, RandomAccessIter last,
482 size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
483 unsigned cache_offset, size_t *bin_sizes,
484 Get_char get_character, Get_length length, Compare comp)
485 {
486 //This section makes handling of long identical substrings much faster
487 //with a mild average performance impact.
488 //Iterate to the end of the empties. If all empty, return
489 while (length(*first) <= char_offset) {
490 if (++first == last)
491 return;
492 }
493 RandomAccessIter finish = last - 1;
494 //Getting the last non-empty
495 for (;length(*finish) <= char_offset; --finish);
496 ++finish;
497 update_offset(first, finish, char_offset, get_character, length);
498
499 const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
500 //Equal worst-case of radix and comparison is when bin_count = n*log(n).
501 const unsigned max_size = bin_count;
502 const unsigned membin_count = bin_count + 1;
503 unsigned cache_end;
504 RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
505 cache_end, membin_count) + 1;
506
507 //Calculating the size of each bin; this takes roughly 10% of runtime
508 for (RandomAccessIter current = first; current != last; ++current) {
509 if (length(*current) <= char_offset) {
510 bin_sizes[0]++;
511 }
512 else
513 bin_sizes[get_character((*current), char_offset) + 1]++;
514 }
515 //Assign the bin positions
516 bin_cache[cache_offset] = first;
517 for (unsigned u = 0; u < membin_count - 1; u++)
518 bin_cache[cache_offset + u + 1] =
519 bin_cache[cache_offset + u] + bin_sizes[u];
520
521 //Swap into place
522 RandomAccessIter next_bin_start = first;
523 //handling empty bins
524 RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
525 next_bin_start += bin_sizes[0];
526 RandomAccessIter * target_bin;
527 //Iterating over each element in the bin of empties
528 for (RandomAccessIter current = *local_bin; current < next_bin_start;
529 ++current) {
530 //empties belong in this bin
531 while (length(*current) > char_offset) {
532 target_bin = bins + get_character((*current), char_offset);
533 iter_swap(current, (*target_bin)++);
534 }
535 }
536 *local_bin = next_bin_start;
537 //iterate backwards to find the last bin with elements in it
538 //this saves iterations in multiple loops
539 unsigned last_bin = bin_count - 1;
540 for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
541 //This dominates runtime, mostly in the swap and bin lookups
542 for (unsigned u = 0; u < last_bin; ++u) {
543 local_bin = bins + u;
544 next_bin_start += bin_sizes[u + 1];
545 //Iterating over each element in this bin
546 for (RandomAccessIter current = *local_bin; current < next_bin_start;
547 ++current) {
548 //Swapping into place until the correct element has been swapped in
549 for (target_bin = bins + get_character((*current), char_offset);
550 target_bin != local_bin;
551 target_bin = bins + get_character((*current), char_offset))
552 iter_swap(current, (*target_bin)++);
553 }
554 *local_bin = next_bin_start;
555 }
556 bins[last_bin] = last;
557
558 //Recursing
559 RandomAccessIter lastPos = bin_cache[cache_offset];
560 //Skip this loop for empties
561 for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
562 lastPos = bin_cache[u], ++u) {
563 size_t count = bin_cache[u] - lastPos;
564 //don't sort unless there are at least two items to Compare
565 if (count < 2)
566 continue;
567 //using boost::sort::pdqsort if its worst-case is better
568 if (count < max_size)
569 boost::sort::pdqsort(lastPos, bin_cache[u], comp);
570 else
571 string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
572 Get_length, Compare>
573 (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end,
574 bin_sizes, get_character, length, comp);
575 }
576 }
577
578 //Sorts strings in reverse order, with empties at the end
579 template <class RandomAccessIter, class Unsigned_char_type, class Get_char,
580 class Get_length, class Compare>
581 inline void
582 reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last,
583 size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
584 unsigned cache_offset, size_t *bin_sizes,
585 Get_char get_character, Get_length length, Compare comp)
586 {
587 //This section makes handling of long identical substrings much faster
588 //with a mild average performance impact.
589 RandomAccessIter curr = first;
590 //Iterate to the end of the empties. If all empty, return
591 while (length(*curr) <= char_offset) {
592 if (++curr == last)
593 return;
594 }
595 //Getting the last non-empty
596 while (length(*(--last)) <= char_offset);
597 ++last;
598 //Offsetting on identical characters. This section works
599 //a character at a time for optimal worst-case performance.
600 update_offset(curr, last, char_offset, get_character, length);
601
602 const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
603 //Equal worst-case of radix and comparison is when bin_count = n*log(n).
604 const unsigned max_size = bin_count;
605 const unsigned membin_count = bin_count + 1;
606 const unsigned max_bin = bin_count - 1;
607 unsigned cache_end;
608 RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
609 cache_end, membin_count);
610 RandomAccessIter *end_bin = &(bin_cache[cache_offset + max_bin]);
611
612 //Calculating the size of each bin; this takes roughly 10% of runtime
613 for (RandomAccessIter current = first; current != last; ++current) {
614 if (length(*current) <= char_offset) {
615 bin_sizes[bin_count]++;
616 }
617 else
618 bin_sizes[max_bin - get_character((*current), char_offset)]++;
619 }
620 //Assign the bin positions
621 bin_cache[cache_offset] = first;
622 for (unsigned u = 0; u < membin_count - 1; u++)
623 bin_cache[cache_offset + u + 1] =
624 bin_cache[cache_offset + u] + bin_sizes[u];
625
626 //Swap into place
627 RandomAccessIter next_bin_start = last;
628 //handling empty bins
629 RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
630 RandomAccessIter lastFull = *local_bin;
631 RandomAccessIter * target_bin;
632 //Iterating over each element in the bin of empties
633 for (RandomAccessIter current = *local_bin; current < next_bin_start;
634 ++current) {
635 //empties belong in this bin
636 while (length(*current) > char_offset) {
637 target_bin = end_bin - get_character((*current), char_offset);
638 iter_swap(current, (*target_bin)++);
639 }
640 }
641 *local_bin = next_bin_start;
642 next_bin_start = first;
643 //iterate backwards to find the last bin with elements in it
644 //this saves iterations in multiple loops
645 unsigned last_bin = max_bin;
646 for (; last_bin && !bin_sizes[last_bin]; --last_bin);
647 //This dominates runtime, mostly in the swap and bin lookups
648 for (unsigned u = 0; u < last_bin; ++u) {
649 local_bin = bins + u;
650 next_bin_start += bin_sizes[u];
651 //Iterating over each element in this bin
652 for (RandomAccessIter current = *local_bin; current < next_bin_start;
653 ++current) {
654 //Swapping into place until the correct element has been swapped in
655 for (target_bin = end_bin - get_character((*current), char_offset);
656 target_bin != local_bin;
657 target_bin = end_bin - get_character((*current), char_offset))
658 iter_swap(current, (*target_bin)++);
659 }
660 *local_bin = next_bin_start;
661 }
662 bins[last_bin] = lastFull;
663 //Recursing
664 RandomAccessIter lastPos = first;
665 //Skip this loop for empties
666 for (unsigned u = cache_offset; u <= cache_offset + last_bin;
667 lastPos = bin_cache[u], ++u) {
668 size_t count = bin_cache[u] - lastPos;
669 //don't sort unless there are at least two items to Compare
670 if (count < 2)
671 continue;
672 //using boost::sort::pdqsort if its worst-case is better
673 if (count < max_size)
674 boost::sort::pdqsort(lastPos, bin_cache[u], comp);
675 else
676 reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type,
677 Get_char, Get_length, Compare>
678 (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end,
679 bin_sizes, get_character, length, comp);
680 }
681 }
682
683 //Holds the bin vector and makes the initial recursive call
684 template <class RandomAccessIter, class Unsigned_char_type>
685 inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
686 >::type
687 string_sort(RandomAccessIter first, RandomAccessIter last,
688 Unsigned_char_type)
689 {
690 size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
691 std::vector<RandomAccessIter> bin_cache;
692 string_sort_rec<RandomAccessIter, Unsigned_char_type>
693 (first, last, 0, bin_cache, 0, bin_sizes);
694 }
695
696 template <class RandomAccessIter, class Unsigned_char_type>
697 inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
698 >::type
699 string_sort(RandomAccessIter first, RandomAccessIter last,
700 Unsigned_char_type)
701 {
702 //Warning that we're using boost::sort::pdqsort, even though string_sort was called
703 BOOST_STATIC_ASSERT( sizeof(Unsigned_char_type) <= 2 );
704 boost::sort::pdqsort(first, last);
705 }
706
707 //Holds the bin vector and makes the initial recursive call
708 template <class RandomAccessIter, class Unsigned_char_type>
709 inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
710 >::type
711 reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
712 Unsigned_char_type)
713 {
714 size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
715 std::vector<RandomAccessIter> bin_cache;
716 reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type>
717 (first, last, 0, bin_cache, 0, bin_sizes);
718 }
719
720 template <class RandomAccessIter, class Unsigned_char_type>
721 inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
722 >::type
723 reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
724 Unsigned_char_type)
725 {
726 typedef typename std::iterator_traits<RandomAccessIter>::value_type
727 Data_type;
728 //Warning that we're using boost::sort::pdqsort, even though string_sort was called
729 BOOST_STATIC_ASSERT( sizeof(Unsigned_char_type) <= 2 );
730 boost::sort::pdqsort(first, last, std::greater<Data_type>());
731 }
732
733 //Holds the bin vector and makes the initial recursive call
734 template <class RandomAccessIter, class Get_char, class Get_length,
735 class Unsigned_char_type>
736 inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
737 >::type
738 string_sort(RandomAccessIter first, RandomAccessIter last,
739 Get_char get_character, Get_length length, Unsigned_char_type)
740 {
741 size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
742 std::vector<RandomAccessIter> bin_cache;
743 string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
744 Get_length>(first, last, 0, bin_cache, 0, bin_sizes, get_character, length);
745 }
746
747 template <class RandomAccessIter, class Get_char, class Get_length,
748 class Unsigned_char_type>
749 inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
750 >::type
751 string_sort(RandomAccessIter first, RandomAccessIter last,
752 Get_char get_character, Get_length length, Unsigned_char_type)
753 {
754 //Warning that we're using boost::sort::pdqsort, even though string_sort was called
755 BOOST_STATIC_ASSERT( sizeof(Unsigned_char_type) <= 2 );
756 boost::sort::pdqsort(first, last);
757 }
758
759 //Holds the bin vector and makes the initial recursive call
760 template <class RandomAccessIter, class Get_char, class Get_length,
761 class Compare, class Unsigned_char_type>
762 inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
763 >::type
764 string_sort(RandomAccessIter first, RandomAccessIter last,
765 Get_char get_character, Get_length length, Compare comp, Unsigned_char_type)
766 {
767 size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
768 std::vector<RandomAccessIter> bin_cache;
769 string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char
770 , Get_length, Compare>
771 (first, last, 0, bin_cache, 0, bin_sizes, get_character, length, comp);
772 }
773
774 //disable_if_c was refusing to compile, so rewrote to use enable_if_c
775 template <class RandomAccessIter, class Get_char, class Get_length,
776 class Compare, class Unsigned_char_type>
777 inline typename boost::enable_if_c< (sizeof(Unsigned_char_type) > 2), void
778 >::type
779 string_sort(RandomAccessIter first, RandomAccessIter last,
780 Get_char get_character, Get_length length, Compare comp, Unsigned_char_type)
781 {
782 //Warning that we're using boost::sort::pdqsort, even though string_sort was called
783 BOOST_STATIC_ASSERT( sizeof(Unsigned_char_type) <= 2 );
784 boost::sort::pdqsort(first, last, comp);
785 }
786
787 //Holds the bin vector and makes the initial recursive call
788 template <class RandomAccessIter, class Get_char, class Get_length,
789 class Compare, class Unsigned_char_type>
790 inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
791 >::type
792 reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
793 Get_char get_character, Get_length length, Compare comp, Unsigned_char_type)
794 {
795 size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
796 std::vector<RandomAccessIter> bin_cache;
797 reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
798 Get_length, Compare>
799 (first, last, 0, bin_cache, 0, bin_sizes, get_character, length, comp);
800 }
801
802 template <class RandomAccessIter, class Get_char, class Get_length,
803 class Compare, class Unsigned_char_type>
804 inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
805 >::type
806 reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
807 Get_char get_character, Get_length length, Compare comp, Unsigned_char_type)
808 {
809 //Warning that we're using boost::sort::pdqsort, even though string_sort was called
810 BOOST_STATIC_ASSERT( sizeof(Unsigned_char_type) <= 2 );
811 boost::sort::pdqsort(first, last, comp);
812 }
813 }
814 }
815 }
816 }
817
818 #endif