1 //Templated hybrid string_sort
3 // Copyright Steven J. Ross 2001 - 2009.
4 // Distributed under the Boost Software License, Version 1.0.
5 // (See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
8 // See http://www.boost.org/libs/sort/ for library home page.
11 Some improvements suggested by:
12 Phil Endecott and Frank Gennari
15 #ifndef BOOST_STRING_SORT_HPP
16 #define BOOST_STRING_SORT_HPP
21 #include <boost/static_assert.hpp>
22 #include <boost/sort/spreadsort/detail/constants.hpp>
23 #include <boost/sort/spreadsort/detail/string_sort.hpp>
27 namespace spreadsort {
29 /*! \brief String sort algorithm using random access iterators, allowing character-type overloads.\n
30 (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
32 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
33 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
35 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
36 so @c integer_sort is asymptotically faster
37 than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
38 so its worst-case with default settings for 32-bit integers is
39 <em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
40 Some performance plots of runtime vs. n and log(range) are provided:\n
41 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
42 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
44 \tparam RandomAccessIter <a href="http://www.cplusplus.com/reference/iterator/RandomAccessIterator/">Random access iterator</a>
45 \tparam Unsigned_char_type Unsigned character type used for string.
46 \param[in] first Iterator pointer to first element.
47 \param[in] last Iterator pointing to one beyond the end of data.
48 \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused.
50 \pre [@c first, @c last) is a valid range.
51 \pre @c RandomAccessIter @c value_type is mutable.
52 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
53 \pre @c RandomAccessIter @c value_type supports the @c operator>>,
54 which returns an integer-type right-shifted a specified number of bits.
55 \post The elements in the range [@c first, @c last) are sorted in ascending order.
57 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
58 the right shift, subtraction of right-shifted elements, functors,
59 or any operations on iterators throw.
61 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
62 \warning Invalid arguments cause undefined behaviour.
63 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
64 enabling faster generic-programming.
66 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
67 \remark * N is @c last - @c first,
68 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
69 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
73 template <class RandomAccessIter, class Unsigned_char_type>
74 inline void string_sort(RandomAccessIter first, RandomAccessIter last,
75 Unsigned_char_type unused)
77 //Don't sort if it's too small to optimize
78 if (last - first < detail::min_sort_size)
79 std::sort(first, last);
81 detail::string_sort(first, last, unused);
85 /*! \brief String sort algorithm using random access iterators, wraps using default of unsigned char.
86 (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
88 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
89 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
90 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
91 so @c integer_sort is asymptotically faster
92 than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
93 so its worst-case with default settings for 32-bit integers is
94 <em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
95 Some performance plots of runtime vs. n and log(range) are provided:\n
96 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>
98 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
100 \param[in] first Iterator pointer to first element.
101 \param[in] last Iterator pointing to one beyond the end of data.
103 \pre [@c first, @c last) is a valid range.
104 \pre @c RandomAccessIter @c value_type is mutable.
105 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
106 \pre @c RandomAccessIter @c value_type supports the @c operator>>,
107 which returns an integer-type right-shifted a specified number of bits.
108 \post The elements in the range [@c first, @c last) are sorted in ascending order.
110 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
111 the right shift, subtraction of right-shifted elements, functors,
112 or any operations on iterators throw.
114 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
115 \warning Invalid arguments cause undefined behaviour.
116 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
117 enabling faster generic-programming.
119 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
120 \remark * N is @c last - @c first,
121 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
122 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
125 template <class RandomAccessIter>
126 inline void string_sort(RandomAccessIter first, RandomAccessIter last)
128 unsigned char unused = '\0';
129 string_sort(first, last, unused);
133 /*! \brief String sort algorithm using random access iterators, allowing character-type overloads.
135 (All variants fall back to @c std::sort if the data size is too small, < detail::min_sort_size).
137 \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
138 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
139 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
140 so @c integer_sort is asymptotically faster
141 than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
142 so its worst-case with default settings for 32-bit integers is
143 <em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
144 Some performance plots of runtime vs. n and log(range) are provided:\n
145 <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
147 <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
150 \tparam RandomAccessIter <a href="http://www.cplusplus.com/reference/iterator/RandomAccessIterator/">Random access iterator</a>
151 \tparam Comp Functor type to use for comparison.
152 \tparam Unsigned_char_type Unsigned character type used for string.
154 \param[in] first Iterator pointer to first element.
155 \param[in] last Iterator pointing to one beyond the end of data.
156 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
157 \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused.
159 \pre [@c first, @c last) is a valid range.
160 \pre @c RandomAccessIter @c value_type is mutable.
161 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
162 \pre @c RandomAccessIter @c value_type supports the @c operator>>,
163 which returns an integer-type right-shifted a specified number of bits.
164 \post The elements in the range [@c first, @c last) are sorted in ascending order.
168 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
169 the right shift, subtraction of right-shifted elements, functors,
170 or any operations on iterators throw.
172 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
173 \warning Invalid arguments cause undefined behaviour.
174 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
175 enabling faster generic-programming.
177 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
178 \remark * N is @c last - @c first,
179 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
180 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
182 template <class RandomAccessIter, class Compare, class Unsigned_char_type>
183 inline void reverse_string_sort(RandomAccessIter first,
184 RandomAccessIter last, Compare comp, Unsigned_char_type unused)
186 //Don't sort if it's too small to optimize.
187 if (last - first < detail::min_sort_size)
188 std::sort(first, last, comp);
190 detail::reverse_string_sort(first, last, unused);
194 /*! \brief String sort algorithm using random access iterators, wraps using default of @c unsigned char.
196 (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
198 \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
199 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
200 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
201 so @c integer_sort is asymptotically faster
202 than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
203 so its worst-case with default settings for 32-bit integers is
204 <em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
205 Some performance plots of runtime vs. n and log(range) are provided:\n
206 <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
208 <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
210 \param[in] first Iterator pointer to first element.
211 \param[in] last Iterator pointing to one beyond the end of data.
212 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
214 \pre [@c first, @c last) is a valid range.
215 \pre @c RandomAccessIter @c value_type is mutable.
216 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
217 \pre @c RandomAccessIter @c value_type supports the @c operator>>,
218 which returns an integer-type right-shifted a specified number of bits.
219 \post The elements in the range [@c first, @c last) are sorted in ascending order.
223 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
224 the right shift, subtraction of right-shifted elements, functors,
225 or any operations on iterators throw.
227 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
228 \warning Invalid arguments cause undefined behaviour.
229 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
230 enabling faster generic-programming.
232 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
233 \remark * N is @c last - @c first,
234 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
235 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
237 template <class RandomAccessIter, class Compare>
238 inline void reverse_string_sort(RandomAccessIter first,
239 RandomAccessIter last, Compare comp)
241 unsigned char unused = '\0';
242 reverse_string_sort(first, last, comp, unused);
246 /*! \brief String sort algorithm using random access iterators, wraps using default of @c unsigned char.
248 (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
250 \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
251 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
252 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
253 so @c integer_sort is asymptotically faster
254 than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
255 so its worst-case with default settings for 32-bit integers is
256 <em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
257 Some performance plots of runtime vs. n and log(range) are provided:\n
258 <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
260 <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
262 \param[in] first Iterator pointer to first element.
263 \param[in] last Iterator pointing to one beyond the end of data.
264 \param[in] getchar Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
265 \param[in] length Functor to get the length of the string in characters.
267 \pre [@c first, @c last) is a valid range.
268 \pre @c RandomAccessIter @c value_type is mutable.
269 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
270 \pre @c RandomAccessIter @c value_type supports the @c operator>>,
271 which returns an integer-type right-shifted a specified number of bits.
272 \post The elements in the range [@c first, @c last) are sorted in ascending order.
276 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
277 the right shift, subtraction of right-shifted elements, functors,
278 or any operations on iterators throw.
280 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
281 \warning Invalid arguments cause undefined behaviour.
282 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
283 enabling faster generic-programming.
285 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
286 \remark * N is @c last - @c first,
287 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
288 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
291 template <class RandomAccessIter, class Get_char, class Get_length>
292 inline void string_sort(RandomAccessIter first, RandomAccessIter last,
293 Get_char getchar, Get_length length)
295 //Don't sort if it's too small to optimize
296 if (last - first < detail::min_sort_size)
297 std::sort(first, last);
299 //skipping past empties, which allows us to get the character type
300 //.empty() is not used so as not to require a user declaration of it
301 while (!length(*first)) {
305 detail::string_sort(first, last, getchar, length, getchar((*first), 0));
311 /*! \brief String sort algorithm using random access iterators, wraps using default of @c unsigned char.
313 (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
315 \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
316 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
317 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
318 so @c integer_sort is asymptotically faster
319 than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
320 so its worst-case with default settings for 32-bit integers is
321 <em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
322 Some performance plots of runtime vs. n and log(range) are provided:\n
323 <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
325 <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
328 \param[in] first Iterator pointer to first element.
329 \param[in] last Iterator pointing to one beyond the end of data.
330 \param[in] getchar Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
331 \param[in] length Functor to get the length of the string in characters.
332 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
335 \pre [@c first, @c last) is a valid range.
336 \pre @c RandomAccessIter @c value_type is mutable.
337 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
338 \post The elements in the range [@c first, @c last) are sorted in ascending order.
342 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
343 the right shift, subtraction of right-shifted elements, functors,
344 or any operations on iterators throw.
346 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
347 \warning Invalid arguments cause undefined behaviour.
348 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
349 enabling faster generic-programming.
351 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
352 \remark * N is @c last - @c first,
353 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
354 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
357 template <class RandomAccessIter, class Get_char, class Get_length,
359 inline void string_sort(RandomAccessIter first, RandomAccessIter last,
360 Get_char getchar, Get_length length, Compare comp)
362 //Don't sort if it's too small to optimize
363 if (last - first < detail::min_sort_size)
364 std::sort(first, last, comp);
366 //skipping past empties, which allows us to get the character type
367 //.empty() is not used so as not to require a user declaration of it
368 while (!length(*first)) {
372 detail::string_sort(first, last, getchar, length, comp,
373 getchar((*first), 0));
378 /*! \brief Reverse String sort algorithm using random access iterators.
380 (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
382 \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
383 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
384 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
385 so @c integer_sort is asymptotically faster
386 than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
387 so its worst-case with default settings for 32-bit integers is
388 <em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
389 Some performance plots of runtime vs. n and log(range) are provided:\n
390 <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
392 <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
395 \param[in] first Iterator pointer to first element.
396 \param[in] last Iterator pointing to one beyond the end of data.
397 \param[in] getchar Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
398 \param[in] length Functor to get the length of the string in characters.
399 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
402 \pre [@c first, @c last) is a valid range.
403 \pre @c RandomAccessIter @c value_type is mutable.
404 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
405 \post The elements in the range [@c first, @c last) are sorted in ascending order.
409 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
410 the right shift, subtraction of right-shifted elements, functors,
411 or any operations on iterators throw.
413 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
414 \warning Invalid arguments cause undefined behaviour.
415 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
416 enabling faster generic-programming.
418 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
419 \remark * N is @c last - @c first,
420 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
421 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
424 template <class RandomAccessIter, class Get_char, class Get_length,
426 inline void reverse_string_sort(RandomAccessIter first,
427 RandomAccessIter last, Get_char getchar, Get_length length, Compare comp)
429 //Don't sort if it's too small to optimize
430 if (last - first < detail::min_sort_size)
431 std::sort(first, last, comp);
433 //skipping past empties, which allows us to get the character type
434 //.empty() is not used so as not to require a user declaration of it
435 while (!length(*(--last))) {
436 //If there is just one non-empty at the beginning, this is sorted
440 //making last just after the end of the non-empty part of the array
441 detail::reverse_string_sort(first, last + 1, getchar, length, comp,
442 getchar((*last), 0));