]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/sort/spreadsort/detail/string_sort.hpp
update sources to v12.2.3
[ceph.git] / ceph / src / boost / boost / sort / spreadsort / detail / string_sort.hpp
1 // Details for a templated general-case hybrid-radix string_sort.
2
3 // Copyright Steven J. Ross 2001 - 2014.
4 // Distributed under the Boost Software License, Version 1.0.
5 // (See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7
8 // See http://www.boost.org/libs/sort for library home page.
9
10 /*
11 Some improvements suggested by:
12 Phil Endecott and Frank Gennari
13 */
14
15 #ifndef BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_HPP
16 #define BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_HPP
17 #include <algorithm>
18 #include <vector>
19 #include <cstring>
20 #include <limits>
21 #include <functional>
22 #include <boost/static_assert.hpp>
23 #include <boost/serialization/static_warning.hpp>
24 #include <boost/utility/enable_if.hpp>
25 #include <boost/sort/spreadsort/detail/constants.hpp>
26 #include <boost/sort/spreadsort/detail/spreadsort_common.hpp>
27 #include <boost/cstdint.hpp>
28
29 namespace boost {
30 namespace sort {
31 namespace spreadsort {
32 namespace detail {
33 static const int max_step_size = 64;
34
35 //Offsetting on identical characters. This function works a chunk of
36 //characters at a time for cache efficiency and optimal worst-case
37 //performance.
38 template<class RandomAccessIter, class Unsigned_char_type>
39 inline void
40 update_offset(RandomAccessIter first, RandomAccessIter finish,
41 size_t &char_offset)
42 {
43 const int char_size = sizeof(Unsigned_char_type);
44 size_t nextOffset = char_offset;
45 int step_size = max_step_size / char_size;
46 while (true) {
47 RandomAccessIter curr = first;
48 do {
49 //Ignore empties, but if the nextOffset would exceed the length or
50 //not match, exit; we've found the last matching character
51 //This will reduce the step_size if the current step doesn't match.
52 if ((*curr).size() > char_offset) {
53 if((*curr).size() <= (nextOffset + step_size)) {
54 step_size = (*curr).size() - nextOffset - 1;
55 if (step_size < 1) {
56 char_offset = nextOffset;
57 return;
58 }
59 }
60 const int step_byte_size = step_size * char_size;
61 if (memcmp(curr->data() + nextOffset, first->data() + nextOffset,
62 step_byte_size) != 0) {
63 if (step_size == 1) {
64 char_offset = nextOffset;
65 return;
66 }
67 step_size = (step_size > 4) ? 4 : 1;
68 continue;
69 }
70 }
71 ++curr;
72 } while (curr != finish);
73 nextOffset += step_size;
74 }
75 }
76
77 //Offsetting on identical characters. This function works a character
78 //at a time for optimal worst-case performance.
79 template<class RandomAccessIter, class Get_char, class Get_length>
80 inline void
81 update_offset(RandomAccessIter first, RandomAccessIter finish,
82 size_t &char_offset, Get_char getchar, Get_length length)
83 {
84 size_t nextOffset = char_offset;
85 while (true) {
86 RandomAccessIter curr = first;
87 do {
88 //ignore empties, but if the nextOffset would exceed the length or
89 //not match, exit; we've found the last matching character
90 if (length(*curr) > char_offset && (length(*curr) <= (nextOffset + 1)
91 || getchar((*curr), nextOffset) != getchar((*first), nextOffset))) {
92 char_offset = nextOffset;
93 return;
94 }
95 } while (++curr != finish);
96 ++nextOffset;
97 }
98 }
99
100 //This comparison functor assumes strings are identical up to char_offset
101 template<class Data_type, class Unsigned_char_type>
102 struct offset_less_than {
103 offset_less_than(size_t char_offset) : fchar_offset(char_offset){}
104 inline bool operator()(const Data_type &x, const Data_type &y) const
105 {
106 size_t minSize = (std::min)(x.size(), y.size());
107 for (size_t u = fchar_offset; u < minSize; ++u) {
108 BOOST_STATIC_ASSERT(sizeof(x[u]) == sizeof(Unsigned_char_type));
109 if (static_cast<Unsigned_char_type>(x[u]) !=
110 static_cast<Unsigned_char_type>(y[u])) {
111 return static_cast<Unsigned_char_type>(x[u]) <
112 static_cast<Unsigned_char_type>(y[u]);
113 }
114 }
115 return x.size() < y.size();
116 }
117 size_t fchar_offset;
118 };
119
120 //Compares strings assuming they are identical up to char_offset
121 template<class Data_type, class Unsigned_char_type>
122 struct offset_greater_than {
123 offset_greater_than(size_t char_offset) : fchar_offset(char_offset){}
124 inline bool operator()(const Data_type &x, const Data_type &y) const
125 {
126 size_t minSize = (std::min)(x.size(), y.size());
127 for (size_t u = fchar_offset; u < minSize; ++u) {
128 BOOST_STATIC_ASSERT(sizeof(x[u]) == sizeof(Unsigned_char_type));
129 if (static_cast<Unsigned_char_type>(x[u]) !=
130 static_cast<Unsigned_char_type>(y[u])) {
131 return static_cast<Unsigned_char_type>(x[u]) >
132 static_cast<Unsigned_char_type>(y[u]);
133 }
134 }
135 return x.size() > y.size();
136 }
137 size_t fchar_offset;
138 };
139
140 //This comparison functor assumes strings are identical up to char_offset
141 template<class Data_type, class Get_char, class Get_length>
142 struct offset_char_less_than {
143 offset_char_less_than(size_t char_offset) : fchar_offset(char_offset){}
144 inline bool operator()(const Data_type &x, const Data_type &y) const
145 {
146 size_t minSize = (std::min)(length(x), length(y));
147 for (size_t u = fchar_offset; u < minSize; ++u) {
148 if (getchar(x, u) != getchar(y, u)) {
149 return getchar(x, u) < getchar(y, u);
150 }
151 }
152 return length(x) < length(y);
153 }
154 size_t fchar_offset;
155 Get_char getchar;
156 Get_length length;
157 };
158
159 //String sorting recursive implementation
160 template <class RandomAccessIter, class Unsigned_char_type>
161 inline void
162 string_sort_rec(RandomAccessIter first, RandomAccessIter last,
163 size_t char_offset,
164 std::vector<RandomAccessIter> &bin_cache,
165 unsigned cache_offset, size_t *bin_sizes)
166 {
167 typedef typename std::iterator_traits<RandomAccessIter>::value_type
168 Data_type;
169 //This section makes handling of long identical substrings much faster
170 //with a mild average performance impact.
171 //Iterate to the end of the empties. If all empty, return
172 while ((*first).size() <= char_offset) {
173 if (++first == last)
174 return;
175 }
176 RandomAccessIter finish = last - 1;
177 //Getting the last non-empty
178 for (;(*finish).size() <= char_offset; --finish);
179 ++finish;
180 //Offsetting on identical characters. This section works
181 //a few characters at a time for optimal worst-case performance.
182 update_offset<RandomAccessIter, Unsigned_char_type>(first, finish,
183 char_offset);
184
185 const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
186 //Equal worst-case of radix and comparison is when bin_count = n*log(n).
187 const unsigned max_size = bin_count;
188 const unsigned membin_count = bin_count + 1;
189 unsigned cache_end;
190 RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
191 cache_end, membin_count) + 1;
192
193 //Calculating the size of each bin; this takes roughly 10% of runtime
194 for (RandomAccessIter current = first; current != last; ++current) {
195 if ((*current).size() <= char_offset) {
196 bin_sizes[0]++;
197 }
198 else
199 bin_sizes[static_cast<Unsigned_char_type>((*current)[char_offset])
200 + 1]++;
201 }
202 //Assign the bin positions
203 bin_cache[cache_offset] = first;
204 for (unsigned u = 0; u < membin_count - 1; u++)
205 bin_cache[cache_offset + u + 1] =
206 bin_cache[cache_offset + u] + bin_sizes[u];
207
208 //Swap into place
209 RandomAccessIter next_bin_start = first;
210 //handling empty bins
211 RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
212 next_bin_start += bin_sizes[0];
213 RandomAccessIter * target_bin;
214 //Iterating over each element in the bin of empties
215 for (RandomAccessIter current = *local_bin; current < next_bin_start;
216 ++current) {
217 //empties belong in this bin
218 while ((*current).size() > char_offset) {
219 target_bin =
220 bins + static_cast<Unsigned_char_type>((*current)[char_offset]);
221 iter_swap(current, (*target_bin)++);
222 }
223 }
224 *local_bin = next_bin_start;
225 //iterate backwards to find the last bin with elements in it
226 //this saves iterations in multiple loops
227 unsigned last_bin = bin_count - 1;
228 for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
229 //This dominates runtime, mostly in the swap and bin lookups
230 for (unsigned u = 0; u < last_bin; ++u) {
231 local_bin = bins + u;
232 next_bin_start += bin_sizes[u + 1];
233 //Iterating over each element in this bin
234 for (RandomAccessIter current = *local_bin; current < next_bin_start;
235 ++current) {
236 //Swapping into place until the correct element has been swapped in
237 for (target_bin = bins + static_cast<Unsigned_char_type>
238 ((*current)[char_offset]); target_bin != local_bin;
239 target_bin = bins + static_cast<Unsigned_char_type>
240 ((*current)[char_offset])) iter_swap(current, (*target_bin)++);
241 }
242 *local_bin = next_bin_start;
243 }
244 bins[last_bin] = last;
245 //Recursing
246 RandomAccessIter lastPos = bin_cache[cache_offset];
247 //Skip this loop for empties
248 for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
249 lastPos = bin_cache[u], ++u) {
250 size_t count = bin_cache[u] - lastPos;
251 //don't sort unless there are at least two items to Compare
252 if (count < 2)
253 continue;
254 //using std::sort if its worst-case is better
255 if (count < max_size)
256 std::sort(lastPos, bin_cache[u],
257 offset_less_than<Data_type, Unsigned_char_type>(char_offset + 1));
258 else
259 string_sort_rec<RandomAccessIter, Unsigned_char_type>(lastPos,
260 bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
261 }
262 }
263
264 //Sorts strings in reverse order, with empties at the end
265 template <class RandomAccessIter, class Unsigned_char_type>
266 inline void
267 reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last,
268 size_t char_offset,
269 std::vector<RandomAccessIter> &bin_cache,
270 unsigned cache_offset,
271 size_t *bin_sizes)
272 {
273 typedef typename std::iterator_traits<RandomAccessIter>::value_type
274 Data_type;
275 //This section makes handling of long identical substrings much faster
276 //with a mild average performance impact.
277 RandomAccessIter curr = first;
278 //Iterate to the end of the empties. If all empty, return
279 while ((*curr).size() <= char_offset) {
280 if (++curr == last)
281 return;
282 }
283 //Getting the last non-empty
284 while ((*(--last)).size() <= char_offset);
285 ++last;
286 //Offsetting on identical characters. This section works
287 //a few characters at a time for optimal worst-case performance.
288 update_offset<RandomAccessIter, Unsigned_char_type>(curr, last,
289 char_offset);
290 RandomAccessIter * target_bin;
291
292 const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
293 //Equal worst-case of radix and comparison when bin_count = n*log(n).
294 const unsigned max_size = bin_count;
295 const unsigned membin_count = bin_count + 1;
296 const unsigned max_bin = bin_count - 1;
297 unsigned cache_end;
298 RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
299 cache_end, membin_count);
300 RandomAccessIter * end_bin = &(bin_cache[cache_offset + max_bin]);
301
302 //Calculating the size of each bin; this takes roughly 10% of runtime
303 for (RandomAccessIter current = first; current != last; ++current) {
304 if ((*current).size() <= char_offset) {
305 bin_sizes[bin_count]++;
306 }
307 else
308 bin_sizes[max_bin - static_cast<Unsigned_char_type>
309 ((*current)[char_offset])]++;
310 }
311 //Assign the bin positions
312 bin_cache[cache_offset] = first;
313 for (unsigned u = 0; u < membin_count - 1; u++)
314 bin_cache[cache_offset + u + 1] =
315 bin_cache[cache_offset + u] + bin_sizes[u];
316
317 //Swap into place
318 RandomAccessIter next_bin_start = last;
319 //handling empty bins
320 RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
321 RandomAccessIter lastFull = *local_bin;
322 //Iterating over each element in the bin of empties
323 for (RandomAccessIter current = *local_bin; current < next_bin_start;
324 ++current) {
325 //empties belong in this bin
326 while ((*current).size() > char_offset) {
327 target_bin =
328 end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]);
329 iter_swap(current, (*target_bin)++);
330 }
331 }
332 *local_bin = next_bin_start;
333 next_bin_start = first;
334 //iterate backwards to find the last non-empty bin
335 //this saves iterations in multiple loops
336 unsigned last_bin = max_bin;
337 for (; last_bin && !bin_sizes[last_bin]; --last_bin);
338 //This dominates runtime, mostly in the swap and bin lookups
339 for (unsigned u = 0; u < last_bin; ++u) {
340 local_bin = bins + u;
341 next_bin_start += bin_sizes[u];
342 //Iterating over each element in this bin
343 for (RandomAccessIter current = *local_bin; current < next_bin_start;
344 ++current) {
345 //Swapping into place until the correct element has been swapped in
346 for (target_bin =
347 end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]);
348 target_bin != local_bin;
349 target_bin =
350 end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]))
351 iter_swap(current, (*target_bin)++);
352 }
353 *local_bin = next_bin_start;
354 }
355 bins[last_bin] = lastFull;
356 //Recursing
357 RandomAccessIter lastPos = first;
358 //Skip this loop for empties
359 for (unsigned u = cache_offset; u <= cache_offset + last_bin;
360 lastPos = bin_cache[u], ++u) {
361 size_t count = bin_cache[u] - lastPos;
362 //don't sort unless there are at least two items to Compare
363 if (count < 2)
364 continue;
365 //using std::sort if its worst-case is better
366 if (count < max_size)
367 std::sort(lastPos, bin_cache[u], offset_greater_than<Data_type,
368 Unsigned_char_type>(char_offset + 1));
369 else
370 reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type>
371 (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
372 }
373 }
374
375 //String sorting recursive implementation
376 template <class RandomAccessIter, class Unsigned_char_type, class Get_char,
377 class Get_length>
378 inline void
379 string_sort_rec(RandomAccessIter first, RandomAccessIter last,
380 size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
381 unsigned cache_offset, size_t *bin_sizes,
382 Get_char getchar, Get_length length)
383 {
384 typedef typename std::iterator_traits<RandomAccessIter>::value_type
385 Data_type;
386 //This section makes handling of long identical substrings much faster
387 //with a mild average performance impact.
388 //Iterate to the end of the empties. If all empty, return
389 while (length(*first) <= char_offset) {
390 if (++first == last)
391 return;
392 }
393 RandomAccessIter finish = last - 1;
394 //Getting the last non-empty
395 for (;length(*finish) <= char_offset; --finish);
396 ++finish;
397 update_offset(first, finish, char_offset, getchar, length);
398
399 const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
400 //Equal worst-case of radix and comparison is when bin_count = n*log(n).
401 const unsigned max_size = bin_count;
402 const unsigned membin_count = bin_count + 1;
403 unsigned cache_end;
404 RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
405 cache_end, membin_count) + 1;
406
407 //Calculating the size of each bin; this takes roughly 10% of runtime
408 for (RandomAccessIter current = first; current != last; ++current) {
409 if (length(*current) <= char_offset) {
410 bin_sizes[0]++;
411 }
412 else
413 bin_sizes[getchar((*current), char_offset) + 1]++;
414 }
415 //Assign the bin positions
416 bin_cache[cache_offset] = first;
417 for (unsigned u = 0; u < membin_count - 1; u++)
418 bin_cache[cache_offset + u + 1] =
419 bin_cache[cache_offset + u] + bin_sizes[u];
420
421 //Swap into place
422 RandomAccessIter next_bin_start = first;
423 //handling empty bins
424 RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
425 next_bin_start += bin_sizes[0];
426 RandomAccessIter * target_bin;
427 //Iterating over each element in the bin of empties
428 for (RandomAccessIter current = *local_bin; current < next_bin_start;
429 ++current) {
430 //empties belong in this bin
431 while (length(*current) > char_offset) {
432 target_bin = bins + getchar((*current), char_offset);
433 iter_swap(current, (*target_bin)++);
434 }
435 }
436 *local_bin = next_bin_start;
437 //iterate backwards to find the last bin with elements in it
438 //this saves iterations in multiple loops
439 unsigned last_bin = bin_count - 1;
440 for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
441 //This dominates runtime, mostly in the swap and bin lookups
442 for (unsigned ii = 0; ii < last_bin; ++ii) {
443 local_bin = bins + ii;
444 next_bin_start += bin_sizes[ii + 1];
445 //Iterating over each element in this bin
446 for (RandomAccessIter current = *local_bin; current < next_bin_start;
447 ++current) {
448 //Swapping into place until the correct element has been swapped in
449 for (target_bin = bins + getchar((*current), char_offset);
450 target_bin != local_bin;
451 target_bin = bins + getchar((*current), char_offset))
452 iter_swap(current, (*target_bin)++);
453 }
454 *local_bin = next_bin_start;
455 }
456 bins[last_bin] = last;
457
458 //Recursing
459 RandomAccessIter lastPos = bin_cache[cache_offset];
460 //Skip this loop for empties
461 for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
462 lastPos = bin_cache[u], ++u) {
463 size_t count = bin_cache[u] - lastPos;
464 //don't sort unless there are at least two items to Compare
465 if (count < 2)
466 continue;
467 //using std::sort if its worst-case is better
468 if (count < max_size)
469 std::sort(lastPos, bin_cache[u], offset_char_less_than<Data_type,
470 Get_char, Get_length>(char_offset + 1));
471 else
472 string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
473 Get_length>(lastPos, bin_cache[u], char_offset + 1, bin_cache,
474 cache_end, bin_sizes, getchar, length);
475 }
476 }
477
478 //String sorting recursive implementation
479 template <class RandomAccessIter, class Unsigned_char_type, class Get_char,
480 class Get_length, class Compare>
481 inline void
482 string_sort_rec(RandomAccessIter first, RandomAccessIter last,
483 size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
484 unsigned cache_offset, size_t *bin_sizes,
485 Get_char getchar, Get_length length, Compare comp)
486 {
487 //This section makes handling of long identical substrings much faster
488 //with a mild average performance impact.
489 //Iterate to the end of the empties. If all empty, return
490 while (length(*first) <= char_offset) {
491 if (++first == last)
492 return;
493 }
494 RandomAccessIter finish = last - 1;
495 //Getting the last non-empty
496 for (;length(*finish) <= char_offset; --finish);
497 ++finish;
498 update_offset(first, finish, char_offset, getchar, length);
499
500 const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
501 //Equal worst-case of radix and comparison is when bin_count = n*log(n).
502 const unsigned max_size = bin_count;
503 const unsigned membin_count = bin_count + 1;
504 unsigned cache_end;
505 RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
506 cache_end, membin_count) + 1;
507
508 //Calculating the size of each bin; this takes roughly 10% of runtime
509 for (RandomAccessIter current = first; current != last; ++current) {
510 if (length(*current) <= char_offset) {
511 bin_sizes[0]++;
512 }
513 else
514 bin_sizes[getchar((*current), char_offset) + 1]++;
515 }
516 //Assign the bin positions
517 bin_cache[cache_offset] = first;
518 for (unsigned u = 0; u < membin_count - 1; u++)
519 bin_cache[cache_offset + u + 1] =
520 bin_cache[cache_offset + u] + bin_sizes[u];
521
522 //Swap into place
523 RandomAccessIter next_bin_start = first;
524 //handling empty bins
525 RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
526 next_bin_start += bin_sizes[0];
527 RandomAccessIter * target_bin;
528 //Iterating over each element in the bin of empties
529 for (RandomAccessIter current = *local_bin; current < next_bin_start;
530 ++current) {
531 //empties belong in this bin
532 while (length(*current) > char_offset) {
533 target_bin = bins + getchar((*current), char_offset);
534 iter_swap(current, (*target_bin)++);
535 }
536 }
537 *local_bin = next_bin_start;
538 //iterate backwards to find the last bin with elements in it
539 //this saves iterations in multiple loops
540 unsigned last_bin = bin_count - 1;
541 for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
542 //This dominates runtime, mostly in the swap and bin lookups
543 for (unsigned u = 0; u < last_bin; ++u) {
544 local_bin = bins + u;
545 next_bin_start += bin_sizes[u + 1];
546 //Iterating over each element in this bin
547 for (RandomAccessIter current = *local_bin; current < next_bin_start;
548 ++current) {
549 //Swapping into place until the correct element has been swapped in
550 for (target_bin = bins + getchar((*current), char_offset);
551 target_bin != local_bin;
552 target_bin = bins + getchar((*current), char_offset))
553 iter_swap(current, (*target_bin)++);
554 }
555 *local_bin = next_bin_start;
556 }
557 bins[last_bin] = last;
558
559 //Recursing
560 RandomAccessIter lastPos = bin_cache[cache_offset];
561 //Skip this loop for empties
562 for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
563 lastPos = bin_cache[u], ++u) {
564 size_t count = bin_cache[u] - lastPos;
565 //don't sort unless there are at least two items to Compare
566 if (count < 2)
567 continue;
568 //using std::sort if its worst-case is better
569 if (count < max_size)
570 std::sort(lastPos, bin_cache[u], comp);
571 else
572 string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
573 Get_length, Compare>
574 (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end,
575 bin_sizes, getchar, length, comp);
576 }
577 }
578
579 //Sorts strings in reverse order, with empties at the end
580 template <class RandomAccessIter, class Unsigned_char_type, class Get_char,
581 class Get_length, class Compare>
582 inline void
583 reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last,
584 size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
585 unsigned cache_offset, size_t *bin_sizes,
586 Get_char getchar, Get_length length, Compare comp)
587 {
588 //This section makes handling of long identical substrings much faster
589 //with a mild average performance impact.
590 RandomAccessIter curr = first;
591 //Iterate to the end of the empties. If all empty, return
592 while (length(*curr) <= char_offset) {
593 if (++curr == last)
594 return;
595 }
596 //Getting the last non-empty
597 while (length(*(--last)) <= char_offset);
598 ++last;
599 //Offsetting on identical characters. This section works
600 //a character at a time for optimal worst-case performance.
601 update_offset(curr, last, char_offset, getchar, length);
602
603 const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
604 //Equal worst-case of radix and comparison is when bin_count = n*log(n).
605 const unsigned max_size = bin_count;
606 const unsigned membin_count = bin_count + 1;
607 const unsigned max_bin = bin_count - 1;
608 unsigned cache_end;
609 RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
610 cache_end, membin_count);
611 RandomAccessIter *end_bin = &(bin_cache[cache_offset + max_bin]);
612
613 //Calculating the size of each bin; this takes roughly 10% of runtime
614 for (RandomAccessIter current = first; current != last; ++current) {
615 if (length(*current) <= char_offset) {
616 bin_sizes[bin_count]++;
617 }
618 else
619 bin_sizes[max_bin - getchar((*current), char_offset)]++;
620 }
621 //Assign the bin positions
622 bin_cache[cache_offset] = first;
623 for (unsigned u = 0; u < membin_count - 1; u++)
624 bin_cache[cache_offset + u + 1] =
625 bin_cache[cache_offset + u] + bin_sizes[u];
626
627 //Swap into place
628 RandomAccessIter next_bin_start = last;
629 //handling empty bins
630 RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
631 RandomAccessIter lastFull = *local_bin;
632 RandomAccessIter * target_bin;
633 //Iterating over each element in the bin of empties
634 for (RandomAccessIter current = *local_bin; current < next_bin_start;
635 ++current) {
636 //empties belong in this bin
637 while (length(*current) > char_offset) {
638 target_bin = end_bin - getchar((*current), char_offset);
639 iter_swap(current, (*target_bin)++);
640 }
641 }
642 *local_bin = next_bin_start;
643 next_bin_start = first;
644 //iterate backwards to find the last bin with elements in it
645 //this saves iterations in multiple loops
646 unsigned last_bin = max_bin;
647 for (; last_bin && !bin_sizes[last_bin]; --last_bin);
648 //This dominates runtime, mostly in the swap and bin lookups
649 for (unsigned u = 0; u < last_bin; ++u) {
650 local_bin = bins + u;
651 next_bin_start += bin_sizes[u];
652 //Iterating over each element in this bin
653 for (RandomAccessIter current = *local_bin; current < next_bin_start;
654 ++current) {
655 //Swapping into place until the correct element has been swapped in
656 for (target_bin = end_bin - getchar((*current), char_offset);
657 target_bin != local_bin;
658 target_bin = end_bin - getchar((*current), char_offset))
659 iter_swap(current, (*target_bin)++);
660 }
661 *local_bin = next_bin_start;
662 }
663 bins[last_bin] = lastFull;
664 //Recursing
665 RandomAccessIter lastPos = first;
666 //Skip this loop for empties
667 for (unsigned u = cache_offset; u <= cache_offset + last_bin;
668 lastPos = bin_cache[u], ++u) {
669 size_t count = bin_cache[u] - lastPos;
670 //don't sort unless there are at least two items to Compare
671 if (count < 2)
672 continue;
673 //using std::sort if its worst-case is better
674 if (count < max_size)
675 std::sort(lastPos, bin_cache[u], comp);
676 else
677 reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type,
678 Get_char, Get_length, Compare>
679 (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end,
680 bin_sizes, getchar, length, comp);
681 }
682 }
683
684 //Holds the bin vector and makes the initial recursive call
685 template <class RandomAccessIter, class Unsigned_char_type>
686 inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
687 >::type
688 string_sort(RandomAccessIter first, RandomAccessIter last,
689 Unsigned_char_type)
690 {
691 size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
692 std::vector<RandomAccessIter> bin_cache;
693 string_sort_rec<RandomAccessIter, Unsigned_char_type>
694 (first, last, 0, bin_cache, 0, bin_sizes);
695 }
696
697 template <class RandomAccessIter, class Unsigned_char_type>
698 inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
699 >::type
700 string_sort(RandomAccessIter first, RandomAccessIter last,
701 Unsigned_char_type)
702 {
703 //Warning that we're using std::sort, even though string_sort was called
704 BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
705 std::sort(first, last);
706 }
707
708 //Holds the bin vector and makes the initial recursive call
709 template <class RandomAccessIter, class Unsigned_char_type>
710 inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
711 >::type
712 reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
713 Unsigned_char_type)
714 {
715 size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
716 std::vector<RandomAccessIter> bin_cache;
717 reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type>
718 (first, last, 0, bin_cache, 0, bin_sizes);
719 }
720
721 template <class RandomAccessIter, class Unsigned_char_type>
722 inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
723 >::type
724 reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
725 Unsigned_char_type)
726 {
727 typedef typename std::iterator_traits<RandomAccessIter>::value_type
728 Data_type;
729 //Warning that we're using std::sort, even though string_sort was called
730 BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
731 std::sort(first, last, std::greater<Data_type>());
732 }
733
734 //Holds the bin vector and makes the initial recursive call
735 template <class RandomAccessIter, class Get_char, class Get_length,
736 class Unsigned_char_type>
737 inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
738 >::type
739 string_sort(RandomAccessIter first, RandomAccessIter last,
740 Get_char getchar, Get_length length, Unsigned_char_type)
741 {
742 size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
743 std::vector<RandomAccessIter> bin_cache;
744 string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
745 Get_length>(first, last, 0, bin_cache, 0, bin_sizes, getchar, length);
746 }
747
748 template <class RandomAccessIter, class Get_char, class Get_length,
749 class Unsigned_char_type>
750 inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
751 >::type
752 string_sort(RandomAccessIter first, RandomAccessIter last,
753 Get_char getchar, Get_length length, Unsigned_char_type)
754 {
755 //Warning that we're using std::sort, even though string_sort was called
756 BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
757 std::sort(first, last);
758 }
759
760 //Holds the bin vector and makes the initial recursive call
761 template <class RandomAccessIter, class Get_char, class Get_length,
762 class Compare, class Unsigned_char_type>
763 inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
764 >::type
765 string_sort(RandomAccessIter first, RandomAccessIter last,
766 Get_char getchar, Get_length length, Compare comp, Unsigned_char_type)
767 {
768 size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
769 std::vector<RandomAccessIter> bin_cache;
770 string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char
771 , Get_length, Compare>
772 (first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp);
773 }
774
775 //disable_if_c was refusing to compile, so rewrote to use enable_if_c
776 template <class RandomAccessIter, class Get_char, class Get_length,
777 class Compare, class Unsigned_char_type>
778 inline typename boost::enable_if_c< (sizeof(Unsigned_char_type) > 2), void
779 >::type
780 string_sort(RandomAccessIter first, RandomAccessIter last,
781 Get_char getchar, Get_length length, Compare comp, Unsigned_char_type)
782 {
783 //Warning that we're using std::sort, even though string_sort was called
784 BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
785 std::sort(first, last, comp);
786 }
787
788 //Holds the bin vector and makes the initial recursive call
789 template <class RandomAccessIter, class Get_char, class Get_length,
790 class Compare, class Unsigned_char_type>
791 inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
792 >::type
793 reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
794 Get_char getchar, Get_length length, Compare comp, Unsigned_char_type)
795 {
796 size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
797 std::vector<RandomAccessIter> bin_cache;
798 reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
799 Get_length, Compare>
800 (first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp);
801 }
802
803 template <class RandomAccessIter, class Get_char, class Get_length,
804 class Compare, class Unsigned_char_type>
805 inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
806 >::type
807 reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
808 Get_char getchar, Get_length length, Compare comp, Unsigned_char_type)
809 {
810 //Warning that we're using std::sort, even though string_sort was called
811 BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
812 std::sort(first, last, comp);
813 }
814 }
815 }
816 }
817 }
818
819 #endif