15 #ifndef BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_HPP
16 #define BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_HPP
22 #include <boost/static_assert.hpp>
23 #include <boost/serialization/static_warning.hpp>
24 #include <boost/utility/enable_if.hpp>
27 #include <boost/cstdint.hpp>
32 static const int max_step_size = 64;
37 template<
class RandomAccessIter,
class Un
signed_
char_type>
42 const int char_size =
sizeof(Unsigned_char_type);
43 size_t nextOffset = char_offset;
44 int step_size = max_step_size;
46 RandomAccessIter curr = first;
51 if ((*curr).size() > char_offset) {
52 if((*curr).size() <= (nextOffset + step_size)) {
53 step_size = (*curr).size() - nextOffset - 1;
55 char_offset = nextOffset;
59 const int step_byte_size = step_size * char_size;
60 if (memcmp(curr->data() + nextOffset, first->data() + nextOffset,
61 step_byte_size) != 0) {
63 char_offset = nextOffset;
66 step_size = (step_size > 4) ? 4 : 1;
71 }
while (curr != finish);
72 nextOffset += step_size;
78 template<
class RandomAccessIter,
class Get_
char,
class Get_length>
81 size_t &char_offset, Get_char getchar, Get_length length)
83 size_t nextOffset = char_offset;
85 RandomAccessIter curr = first;
89 if (length(*curr) > char_offset && (length(*curr) <= (nextOffset + 1)
90 || getchar((*curr), nextOffset) != getchar((*first), nextOffset))) {
91 char_offset = nextOffset;
94 }
while (++curr != finish);
100 template<
class Data_type,
class Un
signed_
char_type>
103 inline bool operator()(
const Data_type &x,
const Data_type &y)
const
105 size_t minSize = (std::min)(x.size(), y.size());
107 BOOST_STATIC_ASSERT(
sizeof(x[u]) ==
sizeof(Unsigned_char_type));
108 if (static_cast<Unsigned_char_type>(x[u]) !=
109 static_cast<Unsigned_char_type>(y[u])) {
110 return static_cast<Unsigned_char_type
>(x[u]) <
111 static_cast<Unsigned_char_type>(y[u]);
114 return x.size() < y.size();
120 template<
class Data_type,
class Un
signed_
char_type>
123 inline bool operator()(
const Data_type &x,
const Data_type &y)
const
125 size_t minSize = (std::min)(x.size(), y.size());
127 BOOST_STATIC_ASSERT(
sizeof(x[u]) ==
sizeof(Unsigned_char_type));
128 if (static_cast<Unsigned_char_type>(x[u]) !=
129 static_cast<Unsigned_char_type>(y[u])) {
130 return static_cast<Unsigned_char_type
>(x[u]) >
131 static_cast<Unsigned_char_type>(y[u]);
134 return x.size() > y.size();
140 template<
class Data_type,
class Get_
char,
class Get_length>
143 inline bool operator()(
const Data_type &x,
const Data_type &y)
const
159 template <
class RandomAccessIter,
class Un
signed_
char_type>
163 std::vector<RandomAccessIter> &bin_cache,
164 unsigned cache_offset,
size_t *bin_sizes)
166 typedef typename std::iterator_traits<RandomAccessIter>::value_type
171 while ((*first).size() <= char_offset) {
175 RandomAccessIter finish = last - 1;
177 for (;(*finish).size() <= char_offset; --finish);
181 update_offset<RandomAccessIter, Unsigned_char_type>(first, finish,
184 const unsigned bin_count = (1 << (
sizeof(Unsigned_char_type)*8));
186 const unsigned max_size = bin_count;
187 const unsigned membin_count = bin_count + 1;
189 RandomAccessIter * bins =
size_bins(bin_sizes, bin_cache, cache_offset,
190 cache_end, membin_count) + 1;
193 for (RandomAccessIter current = first; current != last; ++current) {
194 if ((*current).size() <= char_offset) {
198 bin_sizes[
static_cast<Unsigned_char_type
>((*current)[char_offset])
202 bin_cache[cache_offset] = first;
203 for (
unsigned u = 0; u < membin_count - 1; u++)
204 bin_cache[cache_offset + u + 1] =
205 bin_cache[cache_offset + u] + bin_sizes[u];
208 RandomAccessIter next_bin_start = first;
210 RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
211 next_bin_start += bin_sizes[0];
212 RandomAccessIter * target_bin;
214 for (RandomAccessIter current = *local_bin; current < next_bin_start;
217 while ((*current).size() > char_offset) {
219 bins +
static_cast<Unsigned_char_type
>((*current)[char_offset]);
220 iter_swap(current, (*target_bin)++);
223 *local_bin = next_bin_start;
226 unsigned last_bin = bin_count - 1;
227 for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
229 for (
unsigned u = 0; u < last_bin; ++u) {
230 local_bin = bins + u;
231 next_bin_start += bin_sizes[u + 1];
233 for (RandomAccessIter current = *local_bin; current < next_bin_start;
236 for (target_bin = bins + static_cast<Unsigned_char_type>
237 ((*current)[char_offset]); target_bin != local_bin;
238 target_bin = bins +
static_cast<Unsigned_char_type
>
239 ((*current)[char_offset])) iter_swap(current, (*target_bin)++);
241 *local_bin = next_bin_start;
243 bins[last_bin] = last;
245 RandomAccessIter lastPos = bin_cache[cache_offset];
247 for (
unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
248 lastPos = bin_cache[u], ++u) {
249 size_t count = bin_cache[u] - lastPos;
254 if (count < max_size)
255 std::sort(lastPos, bin_cache[u],
258 string_sort_rec<RandomAccessIter, Unsigned_char_type>(lastPos,
259 bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
264 template <
class RandomAccessIter,
class Un
signed_
char_type>
268 std::vector<RandomAccessIter> &bin_cache,
269 unsigned cache_offset,
272 typedef typename std::iterator_traits<RandomAccessIter>::value_type
276 RandomAccessIter curr = first;
278 while ((*curr).size() <= char_offset) {
283 while ((*(--last)).size() <= char_offset);
287 update_offset<RandomAccessIter, Unsigned_char_type>(first, last,
289 RandomAccessIter * target_bin;
291 const unsigned bin_count = (1 << (
sizeof(Unsigned_char_type)*8));
293 const unsigned max_size = bin_count;
294 const unsigned membin_count = bin_count + 1;
295 const unsigned max_bin = bin_count - 1;
297 RandomAccessIter * bins =
size_bins(bin_sizes, bin_cache, cache_offset,
298 cache_end, membin_count);
299 RandomAccessIter * end_bin = &(bin_cache[cache_offset + max_bin]);
302 for (RandomAccessIter current = first; current != last; ++current) {
303 if ((*current).size() <= char_offset) {
304 bin_sizes[bin_count]++;
307 bin_sizes[max_bin -
static_cast<Unsigned_char_type
>
308 ((*current)[char_offset])]++;
311 bin_cache[cache_offset] = first;
312 for (
unsigned u = 0; u < membin_count - 1; u++)
313 bin_cache[cache_offset + u + 1] =
314 bin_cache[cache_offset + u] + bin_sizes[u];
317 RandomAccessIter next_bin_start = last;
319 RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
320 RandomAccessIter lastFull = *local_bin;
322 for (RandomAccessIter current = *local_bin; current < next_bin_start;
325 while ((*current).size() > char_offset) {
327 end_bin -
static_cast<Unsigned_char_type
>((*current)[char_offset]);
328 iter_swap(current, (*target_bin)++);
331 *local_bin = next_bin_start;
332 next_bin_start = first;
335 unsigned last_bin = max_bin;
336 for (; last_bin && !bin_sizes[last_bin]; --last_bin);
338 for (
unsigned u = 0; u < last_bin; ++u) {
339 local_bin = bins + u;
340 next_bin_start += bin_sizes[u];
342 for (RandomAccessIter current = *local_bin; current < next_bin_start;
346 end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]);
347 target_bin != local_bin;
349 end_bin -
static_cast<Unsigned_char_type
>((*current)[char_offset]))
350 iter_swap(current, (*target_bin)++);
352 *local_bin = next_bin_start;
354 bins[last_bin] = lastFull;
356 RandomAccessIter lastPos = first;
358 for (
unsigned u = cache_offset; u <= cache_offset + last_bin;
359 lastPos = bin_cache[u], ++u) {
360 size_t count = bin_cache[u] - lastPos;
365 if (count < max_size)
367 Unsigned_char_type>(char_offset + 1));
369 reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type>
370 (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
375 template <
class RandomAccessIter,
class Unsigned_char_type,
class Get_char,
379 size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
380 unsigned cache_offset,
size_t *bin_sizes,
381 Get_char getchar, Get_length length)
383 typedef typename std::iterator_traits<RandomAccessIter>::value_type
388 while (length(*first) <= char_offset) {
392 RandomAccessIter finish = last - 1;
394 for (;length(*finish) <= char_offset; --finish);
398 const unsigned bin_count = (1 << (
sizeof(Unsigned_char_type)*8));
400 const unsigned max_size = bin_count;
401 const unsigned membin_count = bin_count + 1;
403 RandomAccessIter * bins =
size_bins(bin_sizes, bin_cache, cache_offset,
404 cache_end, membin_count) + 1;
407 for (RandomAccessIter current = first; current != last; ++current) {
408 if (length(*current) <= char_offset) {
412 bin_sizes[getchar((*current), char_offset) + 1]++;
415 bin_cache[cache_offset] = first;
416 for (
unsigned u = 0; u < membin_count - 1; u++)
417 bin_cache[cache_offset + u + 1] =
418 bin_cache[cache_offset + u] + bin_sizes[u];
421 RandomAccessIter next_bin_start = first;
423 RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
424 next_bin_start += bin_sizes[0];
425 RandomAccessIter * target_bin;
427 for (RandomAccessIter current = *local_bin; current < next_bin_start;
430 while (length(*current) > char_offset) {
431 target_bin = bins + getchar((*current), char_offset);
432 iter_swap(current, (*target_bin)++);
435 *local_bin = next_bin_start;
438 unsigned last_bin = bin_count - 1;
439 for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
441 for (
unsigned ii = 0; ii < last_bin; ++ii) {
442 local_bin = bins + ii;
443 next_bin_start += bin_sizes[ii + 1];
445 for (RandomAccessIter current = *local_bin; current < next_bin_start;
448 for (target_bin = bins + getchar((*current), char_offset);
449 target_bin != local_bin;
450 target_bin = bins + getchar((*current), char_offset))
451 iter_swap(current, (*target_bin)++);
453 *local_bin = next_bin_start;
455 bins[last_bin] = last;
458 RandomAccessIter lastPos = bin_cache[cache_offset];
460 for (
unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
461 lastPos = bin_cache[u], ++u) {
462 size_t count = bin_cache[u] - lastPos;
467 if (count < max_size)
469 Get_char, Get_length>(char_offset + 1));
472 Get_length>(lastPos, bin_cache[u], char_offset + 1, bin_cache,
473 cache_end, bin_sizes, getchar, length);
478 template <
class RandomAccessIter,
class Unsigned_char_type,
class Get_char,
479 class Get_length,
class Compare>
482 size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
483 unsigned cache_offset,
size_t *bin_sizes,
484 Get_char getchar, Get_length length, Compare comp)
489 while (length(*first) <= char_offset) {
493 RandomAccessIter finish = last - 1;
495 for (;length(*finish) <= char_offset; --finish);
499 const unsigned bin_count = (1 << (
sizeof(Unsigned_char_type)*8));
501 const unsigned max_size = bin_count;
502 const unsigned membin_count = bin_count + 1;
504 RandomAccessIter * bins =
size_bins(bin_sizes, bin_cache, cache_offset,
505 cache_end, membin_count) + 1;
508 for (RandomAccessIter current = first; current != last; ++current) {
509 if (length(*current) <= char_offset) {
513 bin_sizes[getchar((*current), char_offset) + 1]++;
516 bin_cache[cache_offset] = first;
517 for (
unsigned u = 0; u < membin_count - 1; u++)
518 bin_cache[cache_offset + u + 1] =
519 bin_cache[cache_offset + u] + bin_sizes[u];
522 RandomAccessIter next_bin_start = first;
524 RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
525 next_bin_start += bin_sizes[0];
526 RandomAccessIter * target_bin;
528 for (RandomAccessIter current = *local_bin; current < next_bin_start;
531 while (length(*current) > char_offset) {
532 target_bin = bins + getchar((*current), char_offset);
533 iter_swap(current, (*target_bin)++);
536 *local_bin = next_bin_start;
539 unsigned last_bin = bin_count - 1;
540 for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
542 for (
unsigned u = 0; u < last_bin; ++u) {
543 local_bin = bins + u;
544 next_bin_start += bin_sizes[u + 1];
546 for (RandomAccessIter current = *local_bin; current < next_bin_start;
549 for (target_bin = bins + getchar((*current), char_offset);
550 target_bin != local_bin;
551 target_bin = bins + getchar((*current), char_offset))
552 iter_swap(current, (*target_bin)++);
554 *local_bin = next_bin_start;
556 bins[last_bin] = last;
559 RandomAccessIter lastPos = bin_cache[cache_offset];
561 for (
unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
562 lastPos = bin_cache[u], ++u) {
563 size_t count = bin_cache[u] - lastPos;
568 if (count < max_size)
569 std::sort(lastPos, bin_cache[u], comp);
573 (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end,
574 bin_sizes, getchar, length, comp);
579 template <
class RandomAccessIter,
class Unsigned_char_type,
class Get_char,
580 class Get_length,
class Compare>
583 size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
584 unsigned cache_offset,
size_t *bin_sizes,
585 Get_char getchar, Get_length length, Compare comp)
589 RandomAccessIter curr = first;
591 while (length(*curr) <= char_offset) {
596 while (length(*(--last)) <= char_offset);
602 const unsigned bin_count = (1 << (
sizeof(Unsigned_char_type)*8));
604 const unsigned max_size = bin_count;
605 const unsigned membin_count = bin_count + 1;
606 const unsigned max_bin = bin_count - 1;
608 RandomAccessIter * bins =
size_bins(bin_sizes, bin_cache, cache_offset,
609 cache_end, membin_count);
610 RandomAccessIter *end_bin = &(bin_cache[cache_offset + max_bin]);
613 for (RandomAccessIter current = first; current != last; ++current) {
614 if (length(*current) <= char_offset) {
615 bin_sizes[bin_count]++;
618 bin_sizes[max_bin - getchar((*current), char_offset)]++;
621 bin_cache[cache_offset] = first;
622 for (
unsigned u = 0; u < membin_count - 1; u++)
623 bin_cache[cache_offset + u + 1] =
624 bin_cache[cache_offset + u] + bin_sizes[u];
627 RandomAccessIter next_bin_start = last;
629 RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
630 RandomAccessIter lastFull = *local_bin;
631 RandomAccessIter * target_bin;
633 for (RandomAccessIter current = *local_bin; current < next_bin_start;
636 while (length(*current) > char_offset) {
637 target_bin = end_bin - getchar((*current), char_offset);
638 iter_swap(current, (*target_bin)++);
641 *local_bin = next_bin_start;
642 next_bin_start = first;
645 unsigned last_bin = max_bin;
646 for (; last_bin && !bin_sizes[last_bin]; --last_bin);
648 for (
unsigned u = 0; u < last_bin; ++u) {
649 local_bin = bins + u;
650 next_bin_start += bin_sizes[u];
652 for (RandomAccessIter current = *local_bin; current < next_bin_start;
655 for (target_bin = end_bin - getchar((*current), char_offset);
656 target_bin != local_bin;
657 target_bin = end_bin - getchar((*current), char_offset))
658 iter_swap(current, (*target_bin)++);
660 *local_bin = next_bin_start;
662 bins[last_bin] = lastFull;
664 RandomAccessIter lastPos = first;
666 for (
unsigned u = cache_offset; u <= cache_offset + last_bin;
667 lastPos = bin_cache[u], ++u) {
668 size_t count = bin_cache[u] - lastPos;
673 if (count < max_size)
674 std::sort(lastPos, bin_cache[u], comp);
677 Get_char, Get_length, Compare>
678 (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end,
679 bin_sizes, getchar, length, comp);
684 template <
class RandomAccessIter,
class Un
signed_
char_type>
685 inline typename boost::enable_if_c<
sizeof(Unsigned_char_type) <= 2,
void
687 string_sort(RandomAccessIter first, RandomAccessIter last,
690 size_t bin_sizes[(1 << (8 *
sizeof(Unsigned_char_type))) + 1];
691 std::vector<RandomAccessIter> bin_cache;
692 string_sort_rec<RandomAccessIter, Unsigned_char_type>
693 (first, last, 0, bin_cache, 0, bin_sizes);
696 template <
class RandomAccessIter,
class Un
signed_
char_type>
697 inline typename boost::disable_if_c<
sizeof(Unsigned_char_type) <= 2,
void
699 string_sort(RandomAccessIter first, RandomAccessIter last,
703 BOOST_STATIC_WARNING(
sizeof(Unsigned_char_type) <= 2 );
704 std::sort(first, last);
708 template <
class RandomAccessIter,
class Un
signed_
char_type>
709 inline typename boost::enable_if_c<
sizeof(Unsigned_char_type) <= 2,
void
714 size_t bin_sizes[(1 << (8 *
sizeof(Unsigned_char_type))) + 1];
715 std::vector<RandomAccessIter> bin_cache;
716 reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type>
717 (first, last, 0, bin_cache, 0, bin_sizes);
720 template <
class RandomAccessIter,
class Un
signed_
char_type>
721 inline typename boost::disable_if_c<
sizeof(Unsigned_char_type) <= 2,
void
726 typedef typename std::iterator_traits<RandomAccessIter>::value_type
729 BOOST_STATIC_WARNING(
sizeof(Unsigned_char_type) <= 2 );
730 std::sort(first, last, std::greater<Data_type>());
734 template <
class RandomAccessIter,
class Get_char,
class Get_length,
735 class Unsigned_char_type>
736 inline typename boost::enable_if_c<
sizeof(Unsigned_char_type) <= 2,
void
738 string_sort(RandomAccessIter first, RandomAccessIter last,
739 Get_char getchar, Get_length length, Unsigned_char_type)
741 size_t bin_sizes[(1 << (8 *
sizeof(Unsigned_char_type))) + 1];
742 std::vector<RandomAccessIter> bin_cache;
744 Get_length>(first, last, 0, bin_cache, 0, bin_sizes, getchar, length);
747 template <
class RandomAccessIter,
class Get_char,
class Get_length,
748 class Unsigned_char_type>
749 inline typename boost::disable_if_c<
sizeof(Unsigned_char_type) <= 2,
void
751 string_sort(RandomAccessIter first, RandomAccessIter last,
752 Get_char getchar, Get_length length, Unsigned_char_type)
755 BOOST_STATIC_WARNING(
sizeof(Unsigned_char_type) <= 2 );
756 std::sort(first, last);
760 template <
class RandomAccessIter,
class Get_char,
class Get_length,
761 class Compare,
class Unsigned_char_type>
762 inline typename boost::enable_if_c<
sizeof(Unsigned_char_type) <= 2,
void
764 string_sort(RandomAccessIter first, RandomAccessIter last,
765 Get_char getchar, Get_length length, Compare comp, Unsigned_char_type)
767 size_t bin_sizes[(1 << (8 *
sizeof(Unsigned_char_type))) + 1];
768 std::vector<RandomAccessIter> bin_cache;
770 , Get_length, Compare>
771 (first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp);
775 template <
class RandomAccessIter,
class Get_char,
class Get_length,
776 class Compare,
class Unsigned_char_type>
777 inline typename boost::enable_if_c< (sizeof(Unsigned_char_type) > 2),
void
779 string_sort(RandomAccessIter first, RandomAccessIter last,
780 Get_char getchar, Get_length length, Compare comp, Unsigned_char_type)
783 BOOST_STATIC_WARNING(
sizeof(Unsigned_char_type) <= 2 );
784 std::sort(first, last, comp);
788 template <
class RandomAccessIter,
class Get_char,
class Get_length,
789 class Compare,
class Unsigned_char_type>
790 inline typename boost::enable_if_c<
sizeof(Unsigned_char_type) <= 2,
void
793 Get_char getchar, Get_length length, Compare comp, Unsigned_char_type)
795 size_t bin_sizes[(1 << (8 *
sizeof(Unsigned_char_type))) + 1];
796 std::vector<RandomAccessIter> bin_cache;
799 (first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp);
802 template <
class RandomAccessIter,
class Get_char,
class Get_length,
803 class Compare,
class Unsigned_char_type>
804 inline typename boost::disable_if_c<
sizeof(Unsigned_char_type) <= 2,
void
807 Get_char getchar, Get_length length, Compare comp, Unsigned_char_type)
810 BOOST_STATIC_WARNING(
sizeof(Unsigned_char_type) <= 2 );
811 std::sort(first, last, comp);
void update_offset(RandomAccessIter first, RandomAccessIter finish, size_t &char_offset)
Definition: string_sort.hpp:39
Definition: constants.hpp:11
offset_char_less_than(size_t char_offset)
Definition: string_sort.hpp:142
bool operator()(const Data_type &x, const Data_type &y) const
Definition: string_sort.hpp:123
size_t fchar_offset
Definition: string_sort.hpp:136
Definition: string_sort.hpp:121
void string_sort_rec(RandomAccessIter first, RandomAccessIter last, size_t char_offset, std::vector< RandomAccessIter > &bin_cache, unsigned cache_offset, size_t *bin_sizes)
Definition: string_sort.hpp:161
void reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last, size_t char_offset, std::vector< RandomAccessIter > &bin_cache, unsigned cache_offset, size_t *bin_sizes)
Definition: string_sort.hpp:266
offset_less_than(size_t char_offset)
Definition: string_sort.hpp:102
size_t fchar_offset
Definition: string_sort.hpp:153
void string_sort(RandomAccessIter first, RandomAccessIter last, Unsigned_char_type unused)
Definition: string_sort.hpp:29
Definition: string_sort.hpp:141
void reverse_string_sort(RandomAccessIter first, RandomAccessIter last, Compare comp, Unsigned_char_type unused)
Definition: string_sort.hpp:49
Definition: string_sort.hpp:101
offset_greater_than(size_t char_offset)
Definition: string_sort.hpp:122
bool operator()(const Data_type &x, const Data_type &y) const
Definition: string_sort.hpp:103
size_t fchar_offset
Definition: string_sort.hpp:116
Get_length length
Definition: string_sort.hpp:155
RandomAccessIter * size_bins(size_t *bin_sizes, std::vector< RandomAccessIter > &bin_cache, unsigned cache_offset, unsigned &cache_end, unsigned bin_count)
Definition: spreadsort_common.hpp:106
Get_char getchar
Definition: string_sort.hpp:154
bool operator()(const Data_type &x, const Data_type &y) const
Definition: string_sort.hpp:143