-// Details for a templated general-case hybrid-radix string_sort.\r
-\r
-// Copyright Steven J. Ross 2001 - 2014.\r
-// Distributed under the Boost Software License, Version 1.0.\r
-// (See accompanying file LICENSE_1_0.txt or copy at\r
-// http://www.boost.org/LICENSE_1_0.txt)\r
-\r
-// See http://www.boost.org/libs/sort for library home page.\r
-\r
-/*\r
-Some improvements suggested by:\r
-Phil Endecott and Frank Gennari\r
-*/\r
-\r
-#ifndef BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_HPP\r
-#define BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_HPP\r
-#include <algorithm>\r
-#include <vector>\r
-#include <cstring>\r
-#include <limits>\r
-#include <functional>\r
-#include <boost/static_assert.hpp>\r
-#include <boost/serialization/static_warning.hpp>\r
-#include <boost/utility/enable_if.hpp>\r
-#include <boost/sort/spreadsort/detail/constants.hpp>\r
-#include <boost/sort/spreadsort/detail/spreadsort_common.hpp>\r
-#include <boost/cstdint.hpp>\r
-\r
-namespace boost {\r
-namespace sort {\r
-namespace spreadsort {\r
- namespace detail {\r
- static const int max_step_size = 64;\r
-\r
- //Offsetting on identical characters. This function works a chunk of\r
- //characters at a time for cache efficiency and optimal worst-case\r
- //performance.\r
- template<class RandomAccessIter, class Unsigned_char_type>\r
- inline void\r
- update_offset(RandomAccessIter first, RandomAccessIter finish,\r
- size_t &char_offset)\r
- {\r
- const int char_size = sizeof(Unsigned_char_type);\r
- size_t nextOffset = char_offset;\r
- int step_size = max_step_size / char_size;\r
- while (true) {\r
- RandomAccessIter curr = first;\r
- do {\r
- //Ignore empties, but if the nextOffset would exceed the length or\r
- //not match, exit; we've found the last matching character\r
- //This will reduce the step_size if the current step doesn't match.\r
- if ((*curr).size() > char_offset) {\r
- if((*curr).size() <= (nextOffset + step_size)) {\r
- step_size = (*curr).size() - nextOffset - 1;\r
- if (step_size < 1) {\r
- char_offset = nextOffset;\r
- return;\r
- }\r
- }\r
- const int step_byte_size = step_size * char_size;\r
- if (memcmp(curr->data() + nextOffset, first->data() + nextOffset, \r
- step_byte_size) != 0) {\r
- if (step_size == 1) {\r
- char_offset = nextOffset;\r
- return;\r
- }\r
- step_size = (step_size > 4) ? 4 : 1;\r
- continue;\r
- }\r
- }\r
- ++curr;\r
- } while (curr != finish);\r
- nextOffset += step_size;\r
- }\r
- }\r
-\r
- //Offsetting on identical characters. This function works a character\r
- //at a time for optimal worst-case performance.\r
- template<class RandomAccessIter, class Get_char, class Get_length>\r
- inline void\r
- update_offset(RandomAccessIter first, RandomAccessIter finish,\r
- size_t &char_offset, Get_char getchar, Get_length length)\r
- {\r
- size_t nextOffset = char_offset;\r
- while (true) {\r
- RandomAccessIter curr = first;\r
- do {\r
- //ignore empties, but if the nextOffset would exceed the length or\r
- //not match, exit; we've found the last matching character\r
- if (length(*curr) > char_offset && (length(*curr) <= (nextOffset + 1)\r
- || getchar((*curr), nextOffset) != getchar((*first), nextOffset))) {\r
- char_offset = nextOffset;\r
- return;\r
- }\r
- } while (++curr != finish);\r
- ++nextOffset;\r
- }\r
- }\r
-\r
- //This comparison functor assumes strings are identical up to char_offset\r
- template<class Data_type, class Unsigned_char_type>\r
- struct offset_less_than {\r
- offset_less_than(size_t char_offset) : fchar_offset(char_offset){}\r
- inline bool operator()(const Data_type &x, const Data_type &y) const\r
- {\r
- size_t minSize = (std::min)(x.size(), y.size());\r
- for (size_t u = fchar_offset; u < minSize; ++u) {\r
- BOOST_STATIC_ASSERT(sizeof(x[u]) == sizeof(Unsigned_char_type));\r
- if (static_cast<Unsigned_char_type>(x[u]) !=\r
- static_cast<Unsigned_char_type>(y[u])) {\r
- return static_cast<Unsigned_char_type>(x[u]) < \r
- static_cast<Unsigned_char_type>(y[u]);\r
- }\r
- }\r
- return x.size() < y.size();\r
- }\r
- size_t fchar_offset;\r
- };\r
-\r
- //Compares strings assuming they are identical up to char_offset\r
- template<class Data_type, class Unsigned_char_type>\r
- struct offset_greater_than {\r
- offset_greater_than(size_t char_offset) : fchar_offset(char_offset){}\r
- inline bool operator()(const Data_type &x, const Data_type &y) const\r
- {\r
- size_t minSize = (std::min)(x.size(), y.size());\r
- for (size_t u = fchar_offset; u < minSize; ++u) {\r
- BOOST_STATIC_ASSERT(sizeof(x[u]) == sizeof(Unsigned_char_type));\r
- if (static_cast<Unsigned_char_type>(x[u]) !=\r
- static_cast<Unsigned_char_type>(y[u])) {\r
- return static_cast<Unsigned_char_type>(x[u]) > \r
- static_cast<Unsigned_char_type>(y[u]);\r
- }\r
- }\r
- return x.size() > y.size();\r
- }\r
- size_t fchar_offset;\r
- };\r
-\r
- //This comparison functor assumes strings are identical up to char_offset\r
- template<class Data_type, class Get_char, class Get_length>\r
- struct offset_char_less_than {\r
- offset_char_less_than(size_t char_offset) : fchar_offset(char_offset){}\r
- inline bool operator()(const Data_type &x, const Data_type &y) const\r
- {\r
- size_t minSize = (std::min)(length(x), length(y));\r
- for (size_t u = fchar_offset; u < minSize; ++u) {\r
- if (getchar(x, u) != getchar(y, u)) {\r
- return getchar(x, u) < getchar(y, u);\r
- }\r
- }\r
- return length(x) < length(y);\r
- }\r
- size_t fchar_offset;\r
- Get_char getchar;\r
- Get_length length;\r
- };\r
-\r
- //String sorting recursive implementation\r
- template <class RandomAccessIter, class Unsigned_char_type>\r
- inline void\r
- string_sort_rec(RandomAccessIter first, RandomAccessIter last,\r
- size_t char_offset,\r
- std::vector<RandomAccessIter> &bin_cache,\r
- unsigned cache_offset, size_t *bin_sizes)\r
- {\r
- typedef typename std::iterator_traits<RandomAccessIter>::value_type\r
- Data_type;\r
- //This section makes handling of long identical substrings much faster\r
- //with a mild average performance impact.\r
- //Iterate to the end of the empties. If all empty, return\r
- while ((*first).size() <= char_offset) {\r
- if (++first == last)\r
- return;\r
- }\r
- RandomAccessIter finish = last - 1;\r
- //Getting the last non-empty\r
- for (;(*finish).size() <= char_offset; --finish);\r
- ++finish;\r
- //Offsetting on identical characters. This section works\r
- //a few characters at a time for optimal worst-case performance.\r
- update_offset<RandomAccessIter, Unsigned_char_type>(first, finish,\r
- char_offset);\r
- \r
- const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));\r
- //Equal worst-case of radix and comparison is when bin_count = n*log(n).\r
- const unsigned max_size = bin_count;\r
- const unsigned membin_count = bin_count + 1;\r
- unsigned cache_end;\r
- RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,\r
- cache_end, membin_count) + 1;\r
-\r
- //Calculating the size of each bin; this takes roughly 10% of runtime\r
- for (RandomAccessIter current = first; current != last; ++current) {\r
- if ((*current).size() <= char_offset) {\r
- bin_sizes[0]++;\r
- }\r
- else\r
- bin_sizes[static_cast<Unsigned_char_type>((*current)[char_offset])\r
- + 1]++;\r
- }\r
- //Assign the bin positions\r
- bin_cache[cache_offset] = first;\r
- for (unsigned u = 0; u < membin_count - 1; u++)\r
- bin_cache[cache_offset + u + 1] =\r
- bin_cache[cache_offset + u] + bin_sizes[u];\r
-\r
- //Swap into place\r
- RandomAccessIter next_bin_start = first;\r
- //handling empty bins\r
- RandomAccessIter * local_bin = &(bin_cache[cache_offset]);\r
- next_bin_start += bin_sizes[0];\r
- RandomAccessIter * target_bin;\r
- //Iterating over each element in the bin of empties\r
- for (RandomAccessIter current = *local_bin; current < next_bin_start;\r
- ++current) {\r
- //empties belong in this bin\r
- while ((*current).size() > char_offset) {\r
- target_bin =\r
- bins + static_cast<Unsigned_char_type>((*current)[char_offset]);\r
- iter_swap(current, (*target_bin)++);\r
- }\r
- }\r
- *local_bin = next_bin_start;\r
- //iterate backwards to find the last bin with elements in it\r
- //this saves iterations in multiple loops\r
- unsigned last_bin = bin_count - 1;\r
- for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);\r
- //This dominates runtime, mostly in the swap and bin lookups\r
- for (unsigned u = 0; u < last_bin; ++u) {\r
- local_bin = bins + u;\r
- next_bin_start += bin_sizes[u + 1];\r
- //Iterating over each element in this bin\r
- for (RandomAccessIter current = *local_bin; current < next_bin_start;\r
- ++current) {\r
- //Swapping into place until the correct element has been swapped in\r
- for (target_bin = bins + static_cast<Unsigned_char_type>\r
- ((*current)[char_offset]); target_bin != local_bin;\r
- target_bin = bins + static_cast<Unsigned_char_type>\r
- ((*current)[char_offset])) iter_swap(current, (*target_bin)++);\r
- }\r
- *local_bin = next_bin_start;\r
- }\r
- bins[last_bin] = last;\r
- //Recursing\r
- RandomAccessIter lastPos = bin_cache[cache_offset];\r
- //Skip this loop for empties\r
- for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;\r
- lastPos = bin_cache[u], ++u) {\r
- size_t count = bin_cache[u] - lastPos;\r
- //don't sort unless there are at least two items to Compare\r
- if (count < 2)\r
- continue;\r
- //using std::sort if its worst-case is better\r
- if (count < max_size)\r
- std::sort(lastPos, bin_cache[u],\r
- offset_less_than<Data_type, Unsigned_char_type>(char_offset + 1));\r
- else\r
- string_sort_rec<RandomAccessIter, Unsigned_char_type>(lastPos,\r
- bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);\r
- }\r
- }\r
-\r
- //Sorts strings in reverse order, with empties at the end\r
- template <class RandomAccessIter, class Unsigned_char_type>\r
- inline void\r
- reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last,\r
- size_t char_offset,\r
- std::vector<RandomAccessIter> &bin_cache,\r
- unsigned cache_offset,\r
- size_t *bin_sizes)\r
- {\r
- typedef typename std::iterator_traits<RandomAccessIter>::value_type\r
- Data_type;\r
- //This section makes handling of long identical substrings much faster\r
- //with a mild average performance impact.\r
- RandomAccessIter curr = first;\r
- //Iterate to the end of the empties. If all empty, return\r
- while ((*curr).size() <= char_offset) {\r
- if (++curr == last)\r
- return;\r
- }\r
- //Getting the last non-empty\r
- while ((*(--last)).size() <= char_offset);\r
- ++last;\r
- //Offsetting on identical characters. This section works\r
- //a few characters at a time for optimal worst-case performance.\r
- update_offset<RandomAccessIter, Unsigned_char_type>(curr, last,\r
- char_offset);\r
- RandomAccessIter * target_bin;\r
-\r
- const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));\r
- //Equal worst-case of radix and comparison when bin_count = n*log(n).\r
- const unsigned max_size = bin_count;\r
- const unsigned membin_count = bin_count + 1;\r
- const unsigned max_bin = bin_count - 1;\r
- unsigned cache_end;\r
- RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,\r
- cache_end, membin_count);\r
- RandomAccessIter * end_bin = &(bin_cache[cache_offset + max_bin]);\r
-\r
- //Calculating the size of each bin; this takes roughly 10% of runtime\r
- for (RandomAccessIter current = first; current != last; ++current) {\r
- if ((*current).size() <= char_offset) {\r
- bin_sizes[bin_count]++;\r
- }\r
- else\r
- bin_sizes[max_bin - static_cast<Unsigned_char_type>\r
- ((*current)[char_offset])]++;\r
- }\r
- //Assign the bin positions\r
- bin_cache[cache_offset] = first;\r
- for (unsigned u = 0; u < membin_count - 1; u++)\r
- bin_cache[cache_offset + u + 1] =\r
- bin_cache[cache_offset + u] + bin_sizes[u];\r
-\r
- //Swap into place\r
- RandomAccessIter next_bin_start = last;\r
- //handling empty bins\r
- RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);\r
- RandomAccessIter lastFull = *local_bin;\r
- //Iterating over each element in the bin of empties\r
- for (RandomAccessIter current = *local_bin; current < next_bin_start;\r
- ++current) {\r
- //empties belong in this bin\r
- while ((*current).size() > char_offset) {\r
- target_bin =\r
- end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]);\r
- iter_swap(current, (*target_bin)++);\r
- }\r
- }\r
- *local_bin = next_bin_start;\r
- next_bin_start = first;\r
- //iterate backwards to find the last non-empty bin\r
- //this saves iterations in multiple loops\r
- unsigned last_bin = max_bin;\r
- for (; last_bin && !bin_sizes[last_bin]; --last_bin);\r
- //This dominates runtime, mostly in the swap and bin lookups\r
- for (unsigned u = 0; u < last_bin; ++u) {\r
- local_bin = bins + u;\r
- next_bin_start += bin_sizes[u];\r
- //Iterating over each element in this bin\r
- for (RandomAccessIter current = *local_bin; current < next_bin_start;\r
- ++current) {\r
- //Swapping into place until the correct element has been swapped in\r
- for (target_bin =\r
- end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]);\r
- target_bin != local_bin;\r
- target_bin =\r
- end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]))\r
- iter_swap(current, (*target_bin)++);\r
- }\r
- *local_bin = next_bin_start;\r
- }\r
- bins[last_bin] = lastFull;\r
- //Recursing\r
- RandomAccessIter lastPos = first;\r
- //Skip this loop for empties\r
- for (unsigned u = cache_offset; u <= cache_offset + last_bin;\r
- lastPos = bin_cache[u], ++u) {\r
- size_t count = bin_cache[u] - lastPos;\r
- //don't sort unless there are at least two items to Compare\r
- if (count < 2)\r
- continue;\r
- //using std::sort if its worst-case is better\r
- if (count < max_size)\r
- std::sort(lastPos, bin_cache[u], offset_greater_than<Data_type,\r
- Unsigned_char_type>(char_offset + 1));\r
- else\r
- reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type>\r
- (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);\r
- }\r
- }\r
-\r
- //String sorting recursive implementation\r
- template <class RandomAccessIter, class Unsigned_char_type, class Get_char,\r
- class Get_length>\r
- inline void\r
- string_sort_rec(RandomAccessIter first, RandomAccessIter last,\r
- size_t char_offset, std::vector<RandomAccessIter> &bin_cache,\r
- unsigned cache_offset, size_t *bin_sizes,\r
- Get_char getchar, Get_length length)\r
- {\r
- typedef typename std::iterator_traits<RandomAccessIter>::value_type\r
- Data_type;\r
- //This section makes handling of long identical substrings much faster\r
- //with a mild average performance impact.\r
- //Iterate to the end of the empties. If all empty, return\r
- while (length(*first) <= char_offset) {\r
- if (++first == last)\r
- return;\r
- }\r
- RandomAccessIter finish = last - 1;\r
- //Getting the last non-empty\r
- for (;length(*finish) <= char_offset; --finish);\r
- ++finish;\r
- update_offset(first, finish, char_offset, getchar, length);\r
-\r
- const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));\r
- //Equal worst-case of radix and comparison is when bin_count = n*log(n).\r
- const unsigned max_size = bin_count;\r
- const unsigned membin_count = bin_count + 1;\r
- unsigned cache_end;\r
- RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,\r
- cache_end, membin_count) + 1;\r
-\r
- //Calculating the size of each bin; this takes roughly 10% of runtime\r
- for (RandomAccessIter current = first; current != last; ++current) {\r
- if (length(*current) <= char_offset) {\r
- bin_sizes[0]++;\r
- }\r
- else\r
- bin_sizes[getchar((*current), char_offset) + 1]++;\r
- }\r
- //Assign the bin positions\r
- bin_cache[cache_offset] = first;\r
- for (unsigned u = 0; u < membin_count - 1; u++)\r
- bin_cache[cache_offset + u + 1] =\r
- bin_cache[cache_offset + u] + bin_sizes[u];\r
-\r
- //Swap into place\r
- RandomAccessIter next_bin_start = first;\r
- //handling empty bins\r
- RandomAccessIter * local_bin = &(bin_cache[cache_offset]);\r
- next_bin_start += bin_sizes[0];\r
- RandomAccessIter * target_bin;\r
- //Iterating over each element in the bin of empties\r
- for (RandomAccessIter current = *local_bin; current < next_bin_start;\r
- ++current) {\r
- //empties belong in this bin\r
- while (length(*current) > char_offset) {\r
- target_bin = bins + getchar((*current), char_offset);\r
- iter_swap(current, (*target_bin)++);\r
- }\r
- }\r
- *local_bin = next_bin_start;\r
- //iterate backwards to find the last bin with elements in it\r
- //this saves iterations in multiple loops\r
- unsigned last_bin = bin_count - 1;\r
- for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);\r
- //This dominates runtime, mostly in the swap and bin lookups\r
- for (unsigned ii = 0; ii < last_bin; ++ii) {\r
- local_bin = bins + ii;\r
- next_bin_start += bin_sizes[ii + 1];\r
- //Iterating over each element in this bin\r
- for (RandomAccessIter current = *local_bin; current < next_bin_start;\r
- ++current) {\r
- //Swapping into place until the correct element has been swapped in\r
- for (target_bin = bins + getchar((*current), char_offset);\r
- target_bin != local_bin;\r
- target_bin = bins + getchar((*current), char_offset))\r
- iter_swap(current, (*target_bin)++);\r
- }\r
- *local_bin = next_bin_start;\r
- }\r
- bins[last_bin] = last;\r
-\r
- //Recursing\r
- RandomAccessIter lastPos = bin_cache[cache_offset];\r
- //Skip this loop for empties\r
- for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;\r
- lastPos = bin_cache[u], ++u) {\r
- size_t count = bin_cache[u] - lastPos;\r
- //don't sort unless there are at least two items to Compare\r
- if (count < 2)\r
- continue;\r
- //using std::sort if its worst-case is better\r
- if (count < max_size)\r
- std::sort(lastPos, bin_cache[u], offset_char_less_than<Data_type,\r
- Get_char, Get_length>(char_offset + 1));\r
- else\r
- string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,\r
- Get_length>(lastPos, bin_cache[u], char_offset + 1, bin_cache,\r
- cache_end, bin_sizes, getchar, length);\r
- }\r
- }\r
-\r
- //String sorting recursive implementation\r
- template <class RandomAccessIter, class Unsigned_char_type, class Get_char,\r
- class Get_length, class Compare>\r
- inline void\r
- string_sort_rec(RandomAccessIter first, RandomAccessIter last,\r
- size_t char_offset, std::vector<RandomAccessIter> &bin_cache,\r
- unsigned cache_offset, size_t *bin_sizes,\r
- Get_char getchar, Get_length length, Compare comp)\r
- {\r
- //This section makes handling of long identical substrings much faster\r
- //with a mild average performance impact.\r
- //Iterate to the end of the empties. If all empty, return\r
- while (length(*first) <= char_offset) {\r
- if (++first == last)\r
- return;\r
- }\r
- RandomAccessIter finish = last - 1;\r
- //Getting the last non-empty\r
- for (;length(*finish) <= char_offset; --finish);\r
- ++finish;\r
- update_offset(first, finish, char_offset, getchar, length);\r
-\r
- const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));\r
- //Equal worst-case of radix and comparison is when bin_count = n*log(n).\r
- const unsigned max_size = bin_count;\r
- const unsigned membin_count = bin_count + 1;\r
- unsigned cache_end;\r
- RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,\r
- cache_end, membin_count) + 1;\r
-\r
- //Calculating the size of each bin; this takes roughly 10% of runtime\r
- for (RandomAccessIter current = first; current != last; ++current) {\r
- if (length(*current) <= char_offset) {\r
- bin_sizes[0]++;\r
- }\r
- else\r
- bin_sizes[getchar((*current), char_offset) + 1]++;\r
- }\r
- //Assign the bin positions\r
- bin_cache[cache_offset] = first;\r
- for (unsigned u = 0; u < membin_count - 1; u++)\r
- bin_cache[cache_offset + u + 1] =\r
- bin_cache[cache_offset + u] + bin_sizes[u];\r
-\r
- //Swap into place\r
- RandomAccessIter next_bin_start = first;\r
- //handling empty bins\r
- RandomAccessIter * local_bin = &(bin_cache[cache_offset]);\r
- next_bin_start += bin_sizes[0];\r
- RandomAccessIter * target_bin;\r
- //Iterating over each element in the bin of empties\r
- for (RandomAccessIter current = *local_bin; current < next_bin_start;\r
- ++current) {\r
- //empties belong in this bin\r
- while (length(*current) > char_offset) {\r
- target_bin = bins + getchar((*current), char_offset);\r
- iter_swap(current, (*target_bin)++);\r
- }\r
- }\r
- *local_bin = next_bin_start;\r
- //iterate backwards to find the last bin with elements in it\r
- //this saves iterations in multiple loops\r
- unsigned last_bin = bin_count - 1;\r
- for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);\r
- //This dominates runtime, mostly in the swap and bin lookups\r
- for (unsigned u = 0; u < last_bin; ++u) {\r
- local_bin = bins + u;\r
- next_bin_start += bin_sizes[u + 1];\r
- //Iterating over each element in this bin\r
- for (RandomAccessIter current = *local_bin; current < next_bin_start;\r
- ++current) {\r
- //Swapping into place until the correct element has been swapped in\r
- for (target_bin = bins + getchar((*current), char_offset);\r
- target_bin != local_bin;\r
- target_bin = bins + getchar((*current), char_offset))\r
- iter_swap(current, (*target_bin)++);\r
- }\r
- *local_bin = next_bin_start;\r
- }\r
- bins[last_bin] = last;\r
-\r
- //Recursing\r
- RandomAccessIter lastPos = bin_cache[cache_offset];\r
- //Skip this loop for empties\r
- for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;\r
- lastPos = bin_cache[u], ++u) {\r
- size_t count = bin_cache[u] - lastPos;\r
- //don't sort unless there are at least two items to Compare\r
- if (count < 2)\r
- continue;\r
- //using std::sort if its worst-case is better\r
- if (count < max_size)\r
- std::sort(lastPos, bin_cache[u], comp);\r
- else\r
- string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,\r
- Get_length, Compare>\r
- (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end,\r
- bin_sizes, getchar, length, comp);\r
- }\r
- }\r
-\r
- //Sorts strings in reverse order, with empties at the end\r
- template <class RandomAccessIter, class Unsigned_char_type, class Get_char,\r
- class Get_length, class Compare>\r
- inline void\r
- reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last,\r
- size_t char_offset, std::vector<RandomAccessIter> &bin_cache,\r
- unsigned cache_offset, size_t *bin_sizes,\r
- Get_char getchar, Get_length length, Compare comp)\r
- {\r
- //This section makes handling of long identical substrings much faster\r
- //with a mild average performance impact.\r
- RandomAccessIter curr = first;\r
- //Iterate to the end of the empties. If all empty, return\r
- while (length(*curr) <= char_offset) {\r
- if (++curr == last)\r
- return;\r
- }\r
- //Getting the last non-empty\r
- while (length(*(--last)) <= char_offset);\r
- ++last;\r
- //Offsetting on identical characters. This section works\r
- //a character at a time for optimal worst-case performance.\r
- update_offset(curr, last, char_offset, getchar, length);\r
-\r
- const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));\r
- //Equal worst-case of radix and comparison is when bin_count = n*log(n).\r
- const unsigned max_size = bin_count;\r
- const unsigned membin_count = bin_count + 1;\r
- const unsigned max_bin = bin_count - 1;\r
- unsigned cache_end;\r
- RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,\r
- cache_end, membin_count);\r
- RandomAccessIter *end_bin = &(bin_cache[cache_offset + max_bin]);\r
-\r
- //Calculating the size of each bin; this takes roughly 10% of runtime\r
- for (RandomAccessIter current = first; current != last; ++current) {\r
- if (length(*current) <= char_offset) {\r
- bin_sizes[bin_count]++;\r
- }\r
- else\r
- bin_sizes[max_bin - getchar((*current), char_offset)]++;\r
- }\r
- //Assign the bin positions\r
- bin_cache[cache_offset] = first;\r
- for (unsigned u = 0; u < membin_count - 1; u++)\r
- bin_cache[cache_offset + u + 1] =\r
- bin_cache[cache_offset + u] + bin_sizes[u];\r
-\r
- //Swap into place\r
- RandomAccessIter next_bin_start = last;\r
- //handling empty bins\r
- RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);\r
- RandomAccessIter lastFull = *local_bin;\r
- RandomAccessIter * target_bin;\r
- //Iterating over each element in the bin of empties\r
- for (RandomAccessIter current = *local_bin; current < next_bin_start;\r
- ++current) {\r
- //empties belong in this bin\r
- while (length(*current) > char_offset) {\r
- target_bin = end_bin - getchar((*current), char_offset);\r
- iter_swap(current, (*target_bin)++);\r
- }\r
- }\r
- *local_bin = next_bin_start;\r
- next_bin_start = first;\r
- //iterate backwards to find the last bin with elements in it\r
- //this saves iterations in multiple loops\r
- unsigned last_bin = max_bin;\r
- for (; last_bin && !bin_sizes[last_bin]; --last_bin);\r
- //This dominates runtime, mostly in the swap and bin lookups\r
- for (unsigned u = 0; u < last_bin; ++u) {\r
- local_bin = bins + u;\r
- next_bin_start += bin_sizes[u];\r
- //Iterating over each element in this bin\r
- for (RandomAccessIter current = *local_bin; current < next_bin_start;\r
- ++current) {\r
- //Swapping into place until the correct element has been swapped in\r
- for (target_bin = end_bin - getchar((*current), char_offset);\r
- target_bin != local_bin;\r
- target_bin = end_bin - getchar((*current), char_offset))\r
- iter_swap(current, (*target_bin)++);\r
- }\r
- *local_bin = next_bin_start;\r
- }\r
- bins[last_bin] = lastFull;\r
- //Recursing\r
- RandomAccessIter lastPos = first;\r
- //Skip this loop for empties\r
- for (unsigned u = cache_offset; u <= cache_offset + last_bin;\r
- lastPos = bin_cache[u], ++u) {\r
- size_t count = bin_cache[u] - lastPos;\r
- //don't sort unless there are at least two items to Compare\r
- if (count < 2)\r
- continue;\r
- //using std::sort if its worst-case is better\r
- if (count < max_size)\r
- std::sort(lastPos, bin_cache[u], comp);\r
- else\r
- reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type,\r
- Get_char, Get_length, Compare>\r
- (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end,\r
- bin_sizes, getchar, length, comp);\r
- }\r
- }\r
-\r
- //Holds the bin vector and makes the initial recursive call\r
- template <class RandomAccessIter, class Unsigned_char_type>\r
- inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void\r
- >::type\r
- string_sort(RandomAccessIter first, RandomAccessIter last,\r
- Unsigned_char_type)\r
- {\r
- size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];\r
- std::vector<RandomAccessIter> bin_cache;\r
- string_sort_rec<RandomAccessIter, Unsigned_char_type>\r
- (first, last, 0, bin_cache, 0, bin_sizes);\r
- }\r
-\r
- template <class RandomAccessIter, class Unsigned_char_type>\r
- inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void\r
- >::type\r
- string_sort(RandomAccessIter first, RandomAccessIter last,\r
- Unsigned_char_type)\r
- {\r
- //Warning that we're using std::sort, even though string_sort was called\r
- BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );\r
- std::sort(first, last);\r
- }\r
-\r
- //Holds the bin vector and makes the initial recursive call\r
- template <class RandomAccessIter, class Unsigned_char_type>\r
- inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void\r
- >::type\r
- reverse_string_sort(RandomAccessIter first, RandomAccessIter last,\r
- Unsigned_char_type)\r
- {\r
- size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];\r
- std::vector<RandomAccessIter> bin_cache;\r
- reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type>\r
- (first, last, 0, bin_cache, 0, bin_sizes);\r
- }\r
-\r
- template <class RandomAccessIter, class Unsigned_char_type>\r
- inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void\r
- >::type\r
- reverse_string_sort(RandomAccessIter first, RandomAccessIter last,\r
- Unsigned_char_type)\r
- {\r
- typedef typename std::iterator_traits<RandomAccessIter>::value_type\r
- Data_type;\r
- //Warning that we're using std::sort, even though string_sort was called\r
- BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );\r
- std::sort(first, last, std::greater<Data_type>());\r
- }\r
-\r
- //Holds the bin vector and makes the initial recursive call\r
- template <class RandomAccessIter, class Get_char, class Get_length,\r
- class Unsigned_char_type>\r
- inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void\r
- >::type\r
- string_sort(RandomAccessIter first, RandomAccessIter last,\r
- Get_char getchar, Get_length length, Unsigned_char_type)\r
- {\r
- size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];\r
- std::vector<RandomAccessIter> bin_cache;\r
- string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,\r
- Get_length>(first, last, 0, bin_cache, 0, bin_sizes, getchar, length);\r
- }\r
-\r
- template <class RandomAccessIter, class Get_char, class Get_length,\r
- class Unsigned_char_type>\r
- inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void\r
- >::type\r
- string_sort(RandomAccessIter first, RandomAccessIter last,\r
- Get_char getchar, Get_length length, Unsigned_char_type)\r
- {\r
- //Warning that we're using std::sort, even though string_sort was called\r
- BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );\r
- std::sort(first, last);\r
- }\r
-\r
- //Holds the bin vector and makes the initial recursive call\r
- template <class RandomAccessIter, class Get_char, class Get_length,\r
- class Compare, class Unsigned_char_type>\r
- inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void\r
- >::type\r
- string_sort(RandomAccessIter first, RandomAccessIter last,\r
- Get_char getchar, Get_length length, Compare comp, Unsigned_char_type)\r
- {\r
- size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];\r
- std::vector<RandomAccessIter> bin_cache;\r
- string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char\r
- , Get_length, Compare>\r
- (first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp);\r
- }\r
-\r
- //disable_if_c was refusing to compile, so rewrote to use enable_if_c\r
- template <class RandomAccessIter, class Get_char, class Get_length,\r
- class Compare, class Unsigned_char_type>\r
- inline typename boost::enable_if_c< (sizeof(Unsigned_char_type) > 2), void\r
- >::type\r
- string_sort(RandomAccessIter first, RandomAccessIter last,\r
- Get_char getchar, Get_length length, Compare comp, Unsigned_char_type)\r
- {\r
- //Warning that we're using std::sort, even though string_sort was called\r
- BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );\r
- std::sort(first, last, comp);\r
- }\r
-\r
- //Holds the bin vector and makes the initial recursive call\r
- template <class RandomAccessIter, class Get_char, class Get_length,\r
- class Compare, class Unsigned_char_type>\r
- inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void\r
- >::type\r
- reverse_string_sort(RandomAccessIter first, RandomAccessIter last,\r
- Get_char getchar, Get_length length, Compare comp, Unsigned_char_type)\r
- {\r
- size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];\r
- std::vector<RandomAccessIter> bin_cache;\r
- reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,\r
- Get_length, Compare>\r
- (first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp);\r
- }\r
-\r
- template <class RandomAccessIter, class Get_char, class Get_length,\r
- class Compare, class Unsigned_char_type>\r
- inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void\r
- >::type\r
- reverse_string_sort(RandomAccessIter first, RandomAccessIter last,\r
- Get_char getchar, Get_length length, Compare comp, Unsigned_char_type)\r
- {\r
- //Warning that we're using std::sort, even though string_sort was called\r
- BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );\r
- std::sort(first, last, comp);\r
- }\r
- }\r
-}\r
-}\r
-}\r
-\r
-#endif\r
+// Details for a templated general-case hybrid-radix string_sort.
+
+// Copyright Steven J. Ross 2001 - 2014.
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org/libs/sort for library home page.
+
+/*
+Some improvements suggested by:
+Phil Endecott and Frank Gennari
+*/
+
+#ifndef BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_HPP
+#define BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_HPP
+#include <algorithm>
+#include <vector>
+#include <cstring>
+#include <limits>
+#include <functional>
+#include <boost/static_assert.hpp>
+#include <boost/serialization/static_warning.hpp>
+#include <boost/utility/enable_if.hpp>
+#include <boost/sort/spreadsort/detail/constants.hpp>
+#include <boost/sort/spreadsort/detail/spreadsort_common.hpp>
+#include <boost/cstdint.hpp>
+
+namespace boost {
+namespace sort {
+namespace spreadsort {
+ namespace detail {
+ static const int max_step_size = 64;
+
+ //Offsetting on identical characters. This function works a chunk of
+ //characters at a time for cache efficiency and optimal worst-case
+ //performance.
+ template<class RandomAccessIter, class Unsigned_char_type>
+ inline void
+ update_offset(RandomAccessIter first, RandomAccessIter finish,
+ size_t &char_offset)
+ {
+ const int char_size = sizeof(Unsigned_char_type);
+ size_t nextOffset = char_offset;
+ int step_size = max_step_size / char_size;
+ while (true) {
+ RandomAccessIter curr = first;
+ do {
+ //Ignore empties, but if the nextOffset would exceed the length or
+ //not match, exit; we've found the last matching character
+ //This will reduce the step_size if the current step doesn't match.
+ if ((*curr).size() > char_offset) {
+ if((*curr).size() <= (nextOffset + step_size)) {
+ step_size = (*curr).size() - nextOffset - 1;
+ if (step_size < 1) {
+ char_offset = nextOffset;
+ return;
+ }
+ }
+ const int step_byte_size = step_size * char_size;
+ if (memcmp(curr->data() + nextOffset, first->data() + nextOffset,
+ step_byte_size) != 0) {
+ if (step_size == 1) {
+ char_offset = nextOffset;
+ return;
+ }
+ step_size = (step_size > 4) ? 4 : 1;
+ continue;
+ }
+ }
+ ++curr;
+ } while (curr != finish);
+ nextOffset += step_size;
+ }
+ }
+
+ //Offsetting on identical characters. This function works a character
+ //at a time for optimal worst-case performance.
+ template<class RandomAccessIter, class Get_char, class Get_length>
+ inline void
+ update_offset(RandomAccessIter first, RandomAccessIter finish,
+ size_t &char_offset, Get_char get_character, Get_length length)
+ {
+ size_t nextOffset = char_offset;
+ while (true) {
+ RandomAccessIter curr = first;
+ do {
+ //ignore empties, but if the nextOffset would exceed the length or
+ //not match, exit; we've found the last matching character
+ if (length(*curr) > char_offset && (length(*curr) <= (nextOffset + 1)
+ || get_character((*curr), nextOffset) != get_character((*first), nextOffset))) {
+ char_offset = nextOffset;
+ return;
+ }
+ } while (++curr != finish);
+ ++nextOffset;
+ }
+ }
+
+ //This comparison functor assumes strings are identical up to char_offset
+ template<class Data_type, class Unsigned_char_type>
+ struct offset_less_than {
+ offset_less_than(size_t char_offset) : fchar_offset(char_offset){}
+ inline bool operator()(const Data_type &x, const Data_type &y) const
+ {
+ size_t minSize = (std::min)(x.size(), y.size());
+ for (size_t u = fchar_offset; u < minSize; ++u) {
+ BOOST_STATIC_ASSERT(sizeof(x[u]) == sizeof(Unsigned_char_type));
+ if (static_cast<Unsigned_char_type>(x[u]) !=
+ static_cast<Unsigned_char_type>(y[u])) {
+ return static_cast<Unsigned_char_type>(x[u]) <
+ static_cast<Unsigned_char_type>(y[u]);
+ }
+ }
+ return x.size() < y.size();
+ }
+ size_t fchar_offset;
+ };
+
+ //Compares strings assuming they are identical up to char_offset
+ template<class Data_type, class Unsigned_char_type>
+ struct offset_greater_than {
+ offset_greater_than(size_t char_offset) : fchar_offset(char_offset){}
+ inline bool operator()(const Data_type &x, const Data_type &y) const
+ {
+ size_t minSize = (std::min)(x.size(), y.size());
+ for (size_t u = fchar_offset; u < minSize; ++u) {
+ BOOST_STATIC_ASSERT(sizeof(x[u]) == sizeof(Unsigned_char_type));
+ if (static_cast<Unsigned_char_type>(x[u]) !=
+ static_cast<Unsigned_char_type>(y[u])) {
+ return static_cast<Unsigned_char_type>(x[u]) >
+ static_cast<Unsigned_char_type>(y[u]);
+ }
+ }
+ return x.size() > y.size();
+ }
+ size_t fchar_offset;
+ };
+
+ //This comparison functor assumes strings are identical up to char_offset
+ template<class Data_type, class Get_char, class Get_length>
+ struct offset_char_less_than {
+ offset_char_less_than(size_t char_offset) : fchar_offset(char_offset){}
+ inline bool operator()(const Data_type &x, const Data_type &y) const
+ {
+ size_t minSize = (std::min)(length(x), length(y));
+ for (size_t u = fchar_offset; u < minSize; ++u) {
+ if (get_character(x, u) != get_character(y, u)) {
+ return get_character(x, u) < get_character(y, u);
+ }
+ }
+ return length(x) < length(y);
+ }
+ size_t fchar_offset;
+ Get_char get_character;
+ Get_length length;
+ };
+
+ //String sorting recursive implementation
+ template <class RandomAccessIter, class Unsigned_char_type>
+ inline void
+ string_sort_rec(RandomAccessIter first, RandomAccessIter last,
+ size_t char_offset,
+ std::vector<RandomAccessIter> &bin_cache,
+ unsigned cache_offset, size_t *bin_sizes)
+ {
+ typedef typename std::iterator_traits<RandomAccessIter>::value_type
+ Data_type;
+ //This section makes handling of long identical substrings much faster
+ //with a mild average performance impact.
+ //Iterate to the end of the empties. If all empty, return
+ while ((*first).size() <= char_offset) {
+ if (++first == last)
+ return;
+ }
+ RandomAccessIter finish = last - 1;
+ //Getting the last non-empty
+ for (;(*finish).size() <= char_offset; --finish);
+ ++finish;
+ //Offsetting on identical characters. This section works
+ //a few characters at a time for optimal worst-case performance.
+ update_offset<RandomAccessIter, Unsigned_char_type>(first, finish,
+ char_offset);
+
+ const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
+ //Equal worst-case of radix and comparison is when bin_count = n*log(n).
+ const unsigned max_size = bin_count;
+ const unsigned membin_count = bin_count + 1;
+ unsigned cache_end;
+ RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
+ cache_end, membin_count) + 1;
+
+ //Calculating the size of each bin; this takes roughly 10% of runtime
+ for (RandomAccessIter current = first; current != last; ++current) {
+ if ((*current).size() <= char_offset) {
+ bin_sizes[0]++;
+ }
+ else
+ bin_sizes[static_cast<Unsigned_char_type>((*current)[char_offset])
+ + 1]++;
+ }
+ //Assign the bin positions
+ bin_cache[cache_offset] = first;
+ for (unsigned u = 0; u < membin_count - 1; u++)
+ bin_cache[cache_offset + u + 1] =
+ bin_cache[cache_offset + u] + bin_sizes[u];
+
+ //Swap into place
+ RandomAccessIter next_bin_start = first;
+ //handling empty bins
+ RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
+ next_bin_start += bin_sizes[0];
+ RandomAccessIter * target_bin;
+ //Iterating over each element in the bin of empties
+ for (RandomAccessIter current = *local_bin; current < next_bin_start;
+ ++current) {
+ //empties belong in this bin
+ while ((*current).size() > char_offset) {
+ target_bin =
+ bins + static_cast<Unsigned_char_type>((*current)[char_offset]);
+ iter_swap(current, (*target_bin)++);
+ }
+ }
+ *local_bin = next_bin_start;
+ //iterate backwards to find the last bin with elements in it
+ //this saves iterations in multiple loops
+ unsigned last_bin = bin_count - 1;
+ for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
+ //This dominates runtime, mostly in the swap and bin lookups
+ for (unsigned u = 0; u < last_bin; ++u) {
+ local_bin = bins + u;
+ next_bin_start += bin_sizes[u + 1];
+ //Iterating over each element in this bin
+ for (RandomAccessIter current = *local_bin; current < next_bin_start;
+ ++current) {
+ //Swapping into place until the correct element has been swapped in
+ for (target_bin = bins + static_cast<Unsigned_char_type>
+ ((*current)[char_offset]); target_bin != local_bin;
+ target_bin = bins + static_cast<Unsigned_char_type>
+ ((*current)[char_offset])) iter_swap(current, (*target_bin)++);
+ }
+ *local_bin = next_bin_start;
+ }
+ bins[last_bin] = last;
+ //Recursing
+ RandomAccessIter lastPos = bin_cache[cache_offset];
+ //Skip this loop for empties
+ for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
+ lastPos = bin_cache[u], ++u) {
+ size_t count = bin_cache[u] - lastPos;
+ //don't sort unless there are at least two items to Compare
+ if (count < 2)
+ continue;
+ //using std::sort if its worst-case is better
+ if (count < max_size)
+ std::sort(lastPos, bin_cache[u],
+ offset_less_than<Data_type, Unsigned_char_type>(char_offset + 1));
+ else
+ string_sort_rec<RandomAccessIter, Unsigned_char_type>(lastPos,
+ bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
+ }
+ }
+
+ //Sorts strings in reverse order, with empties at the end
+ template <class RandomAccessIter, class Unsigned_char_type>
+ inline void
+ reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last,
+ size_t char_offset,
+ std::vector<RandomAccessIter> &bin_cache,
+ unsigned cache_offset,
+ size_t *bin_sizes)
+ {
+ typedef typename std::iterator_traits<RandomAccessIter>::value_type
+ Data_type;
+ //This section makes handling of long identical substrings much faster
+ //with a mild average performance impact.
+ RandomAccessIter curr = first;
+ //Iterate to the end of the empties. If all empty, return
+ while ((*curr).size() <= char_offset) {
+ if (++curr == last)
+ return;
+ }
+ //Getting the last non-empty
+ while ((*(--last)).size() <= char_offset);
+ ++last;
+ //Offsetting on identical characters. This section works
+ //a few characters at a time for optimal worst-case performance.
+ update_offset<RandomAccessIter, Unsigned_char_type>(curr, last,
+ char_offset);
+ RandomAccessIter * target_bin;
+
+ const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
+ //Equal worst-case of radix and comparison when bin_count = n*log(n).
+ const unsigned max_size = bin_count;
+ const unsigned membin_count = bin_count + 1;
+ const unsigned max_bin = bin_count - 1;
+ unsigned cache_end;
+ RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
+ cache_end, membin_count);
+ RandomAccessIter * end_bin = &(bin_cache[cache_offset + max_bin]);
+
+ //Calculating the size of each bin; this takes roughly 10% of runtime
+ for (RandomAccessIter current = first; current != last; ++current) {
+ if ((*current).size() <= char_offset) {
+ bin_sizes[bin_count]++;
+ }
+ else
+ bin_sizes[max_bin - static_cast<Unsigned_char_type>
+ ((*current)[char_offset])]++;
+ }
+ //Assign the bin positions
+ bin_cache[cache_offset] = first;
+ for (unsigned u = 0; u < membin_count - 1; u++)
+ bin_cache[cache_offset + u + 1] =
+ bin_cache[cache_offset + u] + bin_sizes[u];
+
+ //Swap into place
+ RandomAccessIter next_bin_start = last;
+ //handling empty bins
+ RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
+ RandomAccessIter lastFull = *local_bin;
+ //Iterating over each element in the bin of empties
+ for (RandomAccessIter current = *local_bin; current < next_bin_start;
+ ++current) {
+ //empties belong in this bin
+ while ((*current).size() > char_offset) {
+ target_bin =
+ end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]);
+ iter_swap(current, (*target_bin)++);
+ }
+ }
+ *local_bin = next_bin_start;
+ next_bin_start = first;
+ //iterate backwards to find the last non-empty bin
+ //this saves iterations in multiple loops
+ unsigned last_bin = max_bin;
+ for (; last_bin && !bin_sizes[last_bin]; --last_bin);
+ //This dominates runtime, mostly in the swap and bin lookups
+ for (unsigned u = 0; u < last_bin; ++u) {
+ local_bin = bins + u;
+ next_bin_start += bin_sizes[u];
+ //Iterating over each element in this bin
+ for (RandomAccessIter current = *local_bin; current < next_bin_start;
+ ++current) {
+ //Swapping into place until the correct element has been swapped in
+ for (target_bin =
+ end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]);
+ target_bin != local_bin;
+ target_bin =
+ end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]))
+ iter_swap(current, (*target_bin)++);
+ }
+ *local_bin = next_bin_start;
+ }
+ bins[last_bin] = lastFull;
+ //Recursing
+ RandomAccessIter lastPos = first;
+ //Skip this loop for empties
+ for (unsigned u = cache_offset; u <= cache_offset + last_bin;
+ lastPos = bin_cache[u], ++u) {
+ size_t count = bin_cache[u] - lastPos;
+ //don't sort unless there are at least two items to Compare
+ if (count < 2)
+ continue;
+ //using std::sort if its worst-case is better
+ if (count < max_size)
+ std::sort(lastPos, bin_cache[u], offset_greater_than<Data_type,
+ Unsigned_char_type>(char_offset + 1));
+ else
+ reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type>
+ (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
+ }
+ }
+
+ //String sorting recursive implementation
+ template <class RandomAccessIter, class Unsigned_char_type, class Get_char,
+ class Get_length>
+ inline void
+ string_sort_rec(RandomAccessIter first, RandomAccessIter last,
+ size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
+ unsigned cache_offset, size_t *bin_sizes,
+ Get_char get_character, Get_length length)
+ {
+ typedef typename std::iterator_traits<RandomAccessIter>::value_type
+ Data_type;
+ //This section makes handling of long identical substrings much faster
+ //with a mild average performance impact.
+ //Iterate to the end of the empties. If all empty, return
+ while (length(*first) <= char_offset) {
+ if (++first == last)
+ return;
+ }
+ RandomAccessIter finish = last - 1;
+ //Getting the last non-empty
+ for (;length(*finish) <= char_offset; --finish);
+ ++finish;
+ update_offset(first, finish, char_offset, get_character, length);
+
+ const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
+ //Equal worst-case of radix and comparison is when bin_count = n*log(n).
+ const unsigned max_size = bin_count;
+ const unsigned membin_count = bin_count + 1;
+ unsigned cache_end;
+ RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
+ cache_end, membin_count) + 1;
+
+ //Calculating the size of each bin; this takes roughly 10% of runtime
+ for (RandomAccessIter current = first; current != last; ++current) {
+ if (length(*current) <= char_offset) {
+ bin_sizes[0]++;
+ }
+ else
+ bin_sizes[get_character((*current), char_offset) + 1]++;
+ }
+ //Assign the bin positions
+ bin_cache[cache_offset] = first;
+ for (unsigned u = 0; u < membin_count - 1; u++)
+ bin_cache[cache_offset + u + 1] =
+ bin_cache[cache_offset + u] + bin_sizes[u];
+
+ //Swap into place
+ RandomAccessIter next_bin_start = first;
+ //handling empty bins
+ RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
+ next_bin_start += bin_sizes[0];
+ RandomAccessIter * target_bin;
+ //Iterating over each element in the bin of empties
+ for (RandomAccessIter current = *local_bin; current < next_bin_start;
+ ++current) {
+ //empties belong in this bin
+ while (length(*current) > char_offset) {
+ target_bin = bins + get_character((*current), char_offset);
+ iter_swap(current, (*target_bin)++);
+ }
+ }
+ *local_bin = next_bin_start;
+ //iterate backwards to find the last bin with elements in it
+ //this saves iterations in multiple loops
+ unsigned last_bin = bin_count - 1;
+ for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
+ //This dominates runtime, mostly in the swap and bin lookups
+ for (unsigned ii = 0; ii < last_bin; ++ii) {
+ local_bin = bins + ii;
+ next_bin_start += bin_sizes[ii + 1];
+ //Iterating over each element in this bin
+ for (RandomAccessIter current = *local_bin; current < next_bin_start;
+ ++current) {
+ //Swapping into place until the correct element has been swapped in
+ for (target_bin = bins + get_character((*current), char_offset);
+ target_bin != local_bin;
+ target_bin = bins + get_character((*current), char_offset))
+ iter_swap(current, (*target_bin)++);
+ }
+ *local_bin = next_bin_start;
+ }
+ bins[last_bin] = last;
+
+ //Recursing
+ RandomAccessIter lastPos = bin_cache[cache_offset];
+ //Skip this loop for empties
+ for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
+ lastPos = bin_cache[u], ++u) {
+ size_t count = bin_cache[u] - lastPos;
+ //don't sort unless there are at least two items to Compare
+ if (count < 2)
+ continue;
+ //using std::sort if its worst-case is better
+ if (count < max_size)
+ std::sort(lastPos, bin_cache[u], offset_char_less_than<Data_type,
+ Get_char, Get_length>(char_offset + 1));
+ else
+ string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
+ Get_length>(lastPos, bin_cache[u], char_offset + 1, bin_cache,
+ cache_end, bin_sizes, get_character, length);
+ }
+ }
+
+ //String sorting recursive implementation
+ template <class RandomAccessIter, class Unsigned_char_type, class Get_char,
+ class Get_length, class Compare>
+ inline void
+ string_sort_rec(RandomAccessIter first, RandomAccessIter last,
+ size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
+ unsigned cache_offset, size_t *bin_sizes,
+ Get_char get_character, Get_length length, Compare comp)
+ {
+ //This section makes handling of long identical substrings much faster
+ //with a mild average performance impact.
+ //Iterate to the end of the empties. If all empty, return
+ while (length(*first) <= char_offset) {
+ if (++first == last)
+ return;
+ }
+ RandomAccessIter finish = last - 1;
+ //Getting the last non-empty
+ for (;length(*finish) <= char_offset; --finish);
+ ++finish;
+ update_offset(first, finish, char_offset, get_character, length);
+
+ const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
+ //Equal worst-case of radix and comparison is when bin_count = n*log(n).
+ const unsigned max_size = bin_count;
+ const unsigned membin_count = bin_count + 1;
+ unsigned cache_end;
+ RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
+ cache_end, membin_count) + 1;
+
+ //Calculating the size of each bin; this takes roughly 10% of runtime
+ for (RandomAccessIter current = first; current != last; ++current) {
+ if (length(*current) <= char_offset) {
+ bin_sizes[0]++;
+ }
+ else
+ bin_sizes[get_character((*current), char_offset) + 1]++;
+ }
+ //Assign the bin positions
+ bin_cache[cache_offset] = first;
+ for (unsigned u = 0; u < membin_count - 1; u++)
+ bin_cache[cache_offset + u + 1] =
+ bin_cache[cache_offset + u] + bin_sizes[u];
+
+ //Swap into place
+ RandomAccessIter next_bin_start = first;
+ //handling empty bins
+ RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
+ next_bin_start += bin_sizes[0];
+ RandomAccessIter * target_bin;
+ //Iterating over each element in the bin of empties
+ for (RandomAccessIter current = *local_bin; current < next_bin_start;
+ ++current) {
+ //empties belong in this bin
+ while (length(*current) > char_offset) {
+ target_bin = bins + get_character((*current), char_offset);
+ iter_swap(current, (*target_bin)++);
+ }
+ }
+ *local_bin = next_bin_start;
+ //iterate backwards to find the last bin with elements in it
+ //this saves iterations in multiple loops
+ unsigned last_bin = bin_count - 1;
+ for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
+ //This dominates runtime, mostly in the swap and bin lookups
+ for (unsigned u = 0; u < last_bin; ++u) {
+ local_bin = bins + u;
+ next_bin_start += bin_sizes[u + 1];
+ //Iterating over each element in this bin
+ for (RandomAccessIter current = *local_bin; current < next_bin_start;
+ ++current) {
+ //Swapping into place until the correct element has been swapped in
+ for (target_bin = bins + get_character((*current), char_offset);
+ target_bin != local_bin;
+ target_bin = bins + get_character((*current), char_offset))
+ iter_swap(current, (*target_bin)++);
+ }
+ *local_bin = next_bin_start;
+ }
+ bins[last_bin] = last;
+
+ //Recursing
+ RandomAccessIter lastPos = bin_cache[cache_offset];
+ //Skip this loop for empties
+ for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
+ lastPos = bin_cache[u], ++u) {
+ size_t count = bin_cache[u] - lastPos;
+ //don't sort unless there are at least two items to Compare
+ if (count < 2)
+ continue;
+ //using std::sort if its worst-case is better
+ if (count < max_size)
+ std::sort(lastPos, bin_cache[u], comp);
+ else
+ string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
+ Get_length, Compare>
+ (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end,
+ bin_sizes, get_character, length, comp);
+ }
+ }
+
+ //Sorts strings in reverse order, with empties at the end
+ template <class RandomAccessIter, class Unsigned_char_type, class Get_char,
+ class Get_length, class Compare>
+ inline void
+ reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last,
+ size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
+ unsigned cache_offset, size_t *bin_sizes,
+ Get_char get_character, Get_length length, Compare comp)
+ {
+ //This section makes handling of long identical substrings much faster
+ //with a mild average performance impact.
+ RandomAccessIter curr = first;
+ //Iterate to the end of the empties. If all empty, return
+ while (length(*curr) <= char_offset) {
+ if (++curr == last)
+ return;
+ }
+ //Getting the last non-empty
+ while (length(*(--last)) <= char_offset);
+ ++last;
+ //Offsetting on identical characters. This section works
+ //a character at a time for optimal worst-case performance.
+ update_offset(curr, last, char_offset, get_character, length);
+
+ const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
+ //Equal worst-case of radix and comparison is when bin_count = n*log(n).
+ const unsigned max_size = bin_count;
+ const unsigned membin_count = bin_count + 1;
+ const unsigned max_bin = bin_count - 1;
+ unsigned cache_end;
+ RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
+ cache_end, membin_count);
+ RandomAccessIter *end_bin = &(bin_cache[cache_offset + max_bin]);
+
+ //Calculating the size of each bin; this takes roughly 10% of runtime
+ for (RandomAccessIter current = first; current != last; ++current) {
+ if (length(*current) <= char_offset) {
+ bin_sizes[bin_count]++;
+ }
+ else
+ bin_sizes[max_bin - get_character((*current), char_offset)]++;
+ }
+ //Assign the bin positions
+ bin_cache[cache_offset] = first;
+ for (unsigned u = 0; u < membin_count - 1; u++)
+ bin_cache[cache_offset + u + 1] =
+ bin_cache[cache_offset + u] + bin_sizes[u];
+
+ //Swap into place
+ RandomAccessIter next_bin_start = last;
+ //handling empty bins
+ RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
+ RandomAccessIter lastFull = *local_bin;
+ RandomAccessIter * target_bin;
+ //Iterating over each element in the bin of empties
+ for (RandomAccessIter current = *local_bin; current < next_bin_start;
+ ++current) {
+ //empties belong in this bin
+ while (length(*current) > char_offset) {
+ target_bin = end_bin - get_character((*current), char_offset);
+ iter_swap(current, (*target_bin)++);
+ }
+ }
+ *local_bin = next_bin_start;
+ next_bin_start = first;
+ //iterate backwards to find the last bin with elements in it
+ //this saves iterations in multiple loops
+ unsigned last_bin = max_bin;
+ for (; last_bin && !bin_sizes[last_bin]; --last_bin);
+ //This dominates runtime, mostly in the swap and bin lookups
+ for (unsigned u = 0; u < last_bin; ++u) {
+ local_bin = bins + u;
+ next_bin_start += bin_sizes[u];
+ //Iterating over each element in this bin
+ for (RandomAccessIter current = *local_bin; current < next_bin_start;
+ ++current) {
+ //Swapping into place until the correct element has been swapped in
+ for (target_bin = end_bin - get_character((*current), char_offset);
+ target_bin != local_bin;
+ target_bin = end_bin - get_character((*current), char_offset))
+ iter_swap(current, (*target_bin)++);
+ }
+ *local_bin = next_bin_start;
+ }
+ bins[last_bin] = lastFull;
+ //Recursing
+ RandomAccessIter lastPos = first;
+ //Skip this loop for empties
+ for (unsigned u = cache_offset; u <= cache_offset + last_bin;
+ lastPos = bin_cache[u], ++u) {
+ size_t count = bin_cache[u] - lastPos;
+ //don't sort unless there are at least two items to Compare
+ if (count < 2)
+ continue;
+ //using std::sort if its worst-case is better
+ if (count < max_size)
+ std::sort(lastPos, bin_cache[u], comp);
+ else
+ reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type,
+ Get_char, Get_length, Compare>
+ (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end,
+ bin_sizes, get_character, length, comp);
+ }
+ }
+
+ //Holds the bin vector and makes the initial recursive call
+ template <class RandomAccessIter, class Unsigned_char_type>
+ inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
+ >::type
+ string_sort(RandomAccessIter first, RandomAccessIter last,
+ Unsigned_char_type)
+ {
+ size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
+ std::vector<RandomAccessIter> bin_cache;
+ string_sort_rec<RandomAccessIter, Unsigned_char_type>
+ (first, last, 0, bin_cache, 0, bin_sizes);
+ }
+
+ template <class RandomAccessIter, class Unsigned_char_type>
+ inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
+ >::type
+ string_sort(RandomAccessIter first, RandomAccessIter last,
+ Unsigned_char_type)
+ {
+ //Warning that we're using std::sort, even though string_sort was called
+ BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
+ std::sort(first, last);
+ }
+
+ //Holds the bin vector and makes the initial recursive call
+ template <class RandomAccessIter, class Unsigned_char_type>
+ inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
+ >::type
+ reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
+ Unsigned_char_type)
+ {
+ size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
+ std::vector<RandomAccessIter> bin_cache;
+ reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type>
+ (first, last, 0, bin_cache, 0, bin_sizes);
+ }
+
+ template <class RandomAccessIter, class Unsigned_char_type>
+ inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
+ >::type
+ reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
+ Unsigned_char_type)
+ {
+ typedef typename std::iterator_traits<RandomAccessIter>::value_type
+ Data_type;
+ //Warning that we're using std::sort, even though string_sort was called
+ BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
+ std::sort(first, last, std::greater<Data_type>());
+ }
+
+ //Holds the bin vector and makes the initial recursive call
+ template <class RandomAccessIter, class Get_char, class Get_length,
+ class Unsigned_char_type>
+ inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
+ >::type
+ string_sort(RandomAccessIter first, RandomAccessIter last,
+ Get_char get_character, Get_length length, Unsigned_char_type)
+ {
+ size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
+ std::vector<RandomAccessIter> bin_cache;
+ string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
+ Get_length>(first, last, 0, bin_cache, 0, bin_sizes, get_character, length);
+ }
+
+ template <class RandomAccessIter, class Get_char, class Get_length,
+ class Unsigned_char_type>
+ inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
+ >::type
+ string_sort(RandomAccessIter first, RandomAccessIter last,
+ Get_char get_character, Get_length length, Unsigned_char_type)
+ {
+ //Warning that we're using std::sort, even though string_sort was called
+ BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
+ std::sort(first, last);
+ }
+
+ //Holds the bin vector and makes the initial recursive call
+ template <class RandomAccessIter, class Get_char, class Get_length,
+ class Compare, class Unsigned_char_type>
+ inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
+ >::type
+ string_sort(RandomAccessIter first, RandomAccessIter last,
+ Get_char get_character, Get_length length, Compare comp, Unsigned_char_type)
+ {
+ size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
+ std::vector<RandomAccessIter> bin_cache;
+ string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char
+ , Get_length, Compare>
+ (first, last, 0, bin_cache, 0, bin_sizes, get_character, length, comp);
+ }
+
+ //disable_if_c was refusing to compile, so rewrote to use enable_if_c
+ template <class RandomAccessIter, class Get_char, class Get_length,
+ class Compare, class Unsigned_char_type>
+ inline typename boost::enable_if_c< (sizeof(Unsigned_char_type) > 2), void
+ >::type
+ string_sort(RandomAccessIter first, RandomAccessIter last,
+ Get_char get_character, Get_length length, Compare comp, Unsigned_char_type)
+ {
+ //Warning that we're using std::sort, even though string_sort was called
+ BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
+ std::sort(first, last, comp);
+ }
+
+ //Holds the bin vector and makes the initial recursive call
+ template <class RandomAccessIter, class Get_char, class Get_length,
+ class Compare, class Unsigned_char_type>
+ inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
+ >::type
+ reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
+ Get_char get_character, Get_length length, Compare comp, Unsigned_char_type)
+ {
+ size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
+ std::vector<RandomAccessIter> bin_cache;
+ reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
+ Get_length, Compare>
+ (first, last, 0, bin_cache, 0, bin_sizes, get_character, length, comp);
+ }
+
+ template <class RandomAccessIter, class Get_char, class Get_length,
+ class Compare, class Unsigned_char_type>
+ inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
+ >::type
+ reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
+ Get_char get_character, Get_length length, Compare comp, Unsigned_char_type)
+ {
+ //Warning that we're using std::sort, even though string_sort was called
+ BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
+ std::sort(first, last, comp);
+ }
+ }
+}
+}
+}
+
+#endif