]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/sort/spreadsort/string_sort.hpp
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / boost / boost / sort / spreadsort / string_sort.hpp
1 //Templated hybrid string_sort
2
3 // Copyright Steven J. Ross 2001 - 2009.
4 // Distributed under the Boost Software License, Version 1.0.
5 // (See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7
8 // See http://www.boost.org/libs/sort/ for library home page.
9
10 /*
11 Some improvements suggested by:
12 Phil Endecott and Frank Gennari
13 */
14
15 #ifndef BOOST_STRING_SORT_HPP
16 #define BOOST_STRING_SORT_HPP
17 #include <algorithm>
18 #include <vector>
19 #include <cstring>
20 #include <limits>
21 #include <boost/static_assert.hpp>
22 #include <boost/sort/spreadsort/detail/constants.hpp>
23 #include <boost/sort/spreadsort/detail/string_sort.hpp>
24 #include <boost/range/begin.hpp>
25 #include <boost/range/end.hpp>
26
27 namespace boost {
28 namespace sort {
29 namespace spreadsort {
30
31 /*! \brief String sort algorithm using random access iterators, allowing character-type overloads.\n
32 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
33
34 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
35 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
36 \par
37 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
38 so @c string_sort is asymptotically faster
39 than pure comparison-based algorithms. \n\n
40 Some performance plots of runtime vs. n and log(range) are provided:\n
41 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
42 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
43
44 \tparam RandomAccessIter <a href="http://www.cplusplus.com/reference/iterator/RandomAccessIterator/">Random access iterator</a>
45 \tparam Unsigned_char_type Unsigned character type used for string.
46 \param[in] first Iterator pointer to first element.
47 \param[in] last Iterator pointing to one beyond the end of data.
48 \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused.
49
50 \pre [@c first, @c last) is a valid range.
51 \pre @c RandomAccessIter @c value_type is mutable.
52 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
53 \pre @c RandomAccessIter @c value_type supports the @c operator>>,
54 which returns an integer-type right-shifted a specified number of bits.
55 \post The elements in the range [@c first, @c last) are sorted in ascending order.
56
57 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
58 the right shift, subtraction of right-shifted elements, functors,
59 or any operations on iterators throw.
60
61 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
62 \warning Invalid arguments cause undefined behaviour.
63 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
64 enabling faster generic-programming.
65
66 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
67 \remark * N is @c last - @c first,
68 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
69 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
70
71 */
72
73 template <class RandomAccessIter, class Unsigned_char_type>
74 inline void string_sort(RandomAccessIter first, RandomAccessIter last,
75 Unsigned_char_type unused)
76 {
77 //Don't sort if it's too small to optimize
78 if (last - first < detail::min_sort_size)
79 boost::sort::pdqsort(first, last);
80 else
81 detail::string_sort(first, last, unused);
82 }
83
84 /*! \brief String sort algorithm using range, allowing character-type overloads.\n
85 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
86
87 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
88 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
89 \par
90 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
91 so @c string_sort is asymptotically faster
92 than pure comparison-based algorithms. \n\n
93 Some performance plots of runtime vs. n and log(range) are provided:\n
94 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
95 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
96
97 \tparam Unsigned_char_type Unsigned character type used for string.
98 \param[in] range Range [first, last) for sorting.
99 \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused.
100
101 \pre [@c first, @c last) is a valid range.
102 \post The elements in the range [@c first, @c last) are sorted in ascending order.
103
104 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
105 the right shift, subtraction of right-shifted elements, functors,
106 or any operations on iterators throw.
107
108 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
109 \warning Invalid arguments cause undefined behaviour.
110 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
111 enabling faster generic-programming.
112
113 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
114 \remark * N is @c last - @c first,
115 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
116 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
117
118 */
119
120 template <class Range, class Unsigned_char_type>
121 inline void string_sort(Range& range, Unsigned_char_type unused)
122 {
123 string_sort(boost::begin(range), boost::end(range), unused);
124 }
125
126 /*! \brief String sort algorithm using random access iterators, wraps using default of unsigned char.
127 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
128
129 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
130 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
131 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
132 so @c string_sort is asymptotically faster
133 than pure comparison-based algorithms. \n\n
134 Some performance plots of runtime vs. n and log(range) are provided:\n
135 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>
136 \n
137 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
138
139 \param[in] first Iterator pointer to first element.
140 \param[in] last Iterator pointing to one beyond the end of data.
141
142 \pre [@c first, @c last) is a valid range.
143 \pre @c RandomAccessIter @c value_type is mutable.
144 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
145 \pre @c RandomAccessIter @c value_type supports the @c operator>>,
146 which returns an integer-type right-shifted a specified number of bits.
147 \post The elements in the range [@c first, @c last) are sorted in ascending order.
148
149 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
150 the right shift, subtraction of right-shifted elements, functors,
151 or any operations on iterators throw.
152
153 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
154 \warning Invalid arguments cause undefined behaviour.
155 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
156 enabling faster generic-programming.
157
158 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
159 \remark * N is @c last - @c first,
160 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
161 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
162
163 */
164 template <class RandomAccessIter>
165 inline void string_sort(RandomAccessIter first, RandomAccessIter last)
166 {
167 unsigned char unused = '\0';
168 string_sort(first, last, unused);
169 }
170
171 /*! \brief String sort algorithm using range, wraps using default of unsigned char.
172 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
173
174 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
175 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
176 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
177 so @c string_sort is asymptotically faster
178 than pure comparison-based algorithms. \n\n
179 Some performance plots of runtime vs. n and log(range) are provided:\n
180 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>
181 \n
182 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
183
184 \param[in] range Range [first, last) for sorting.
185
186 \pre [@c first, @c last) is a valid range.
187 \post The elements in the range [@c first, @c last) are sorted in ascending order.
188
189 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
190 the right shift, subtraction of right-shifted elements, functors,
191 or any operations on iterators throw.
192
193 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
194 \warning Invalid arguments cause undefined behaviour.
195 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
196 enabling faster generic-programming.
197
198 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
199 \remark * N is @c last - @c first,
200 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
201 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
202
203 */
204 template <class Range>
205 inline void string_sort(Range& range)
206 {
207 string_sort(boost::begin(range), boost::end(range));
208 }
209
210 /*! \brief String sort algorithm using random access iterators, allowing character-type overloads.
211
212 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < detail::min_sort_size).
213
214 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
215 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
216 \par
217 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
218 so @c string_sort is asymptotically faster
219 than pure comparison-based algorithms. \n\n
220 Some performance plots of runtime vs. n and log(range) are provided:\n
221 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
222 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
223
224
225 \tparam RandomAccessIter <a href="http://www.cplusplus.com/reference/iterator/RandomAccessIterator/">Random access iterator</a>
226 \tparam Comp Functor type to use for comparison.
227 \tparam Unsigned_char_type Unsigned character type used for string.
228
229 \param[in] first Iterator pointer to first element.
230 \param[in] last Iterator pointing to one beyond the end of data.
231 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
232 \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused.
233
234 \pre [@c first, @c last) is a valid range.
235 \pre @c RandomAccessIter @c value_type is mutable.
236 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
237 \pre @c RandomAccessIter @c value_type supports the @c operator>>,
238 which returns an integer-type right-shifted a specified number of bits.
239 \post The elements in the range [@c first, @c last) are sorted in ascending order.
240
241 \return @c void.
242
243 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
244 the right shift, subtraction of right-shifted elements, functors,
245 or any operations on iterators throw.
246
247 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
248 \warning Invalid arguments cause undefined behaviour.
249 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
250 enabling faster generic-programming.
251
252 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
253 \remark * N is @c last - @c first,
254 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
255 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
256 */
257 template <class RandomAccessIter, class Compare, class Unsigned_char_type>
258 inline void reverse_string_sort(RandomAccessIter first,
259 RandomAccessIter last, Compare comp, Unsigned_char_type unused)
260 {
261 //Don't sort if it's too small to optimize.
262 if (last - first < detail::min_sort_size)
263 boost::sort::pdqsort(first, last, comp);
264 else
265 detail::reverse_string_sort(first, last, unused);
266 }
267
268 /*! \brief String sort algorithm using range, allowing character-type overloads.
269
270 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < detail::min_sort_size).
271
272 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
273 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
274 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
275 so @c string_sort is asymptotically faster
276 than pure comparison-based algorithms. \n\n
277 Some performance plots of runtime vs. n and log(range) are provided:\n
278 <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
279 \n
280 <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
281
282
283 \tparam Comp Functor type to use for comparison.
284 \tparam Unsigned_char_type Unsigned character type used for string.
285
286 \param[in] range Range [first, last) for sorting.
287 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
288 \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused.
289
290 \pre [@c first, @c last) is a valid range.
291 \post The elements in the range [@c first, @c last) are sorted in ascending order.
292
293 \return @c void.
294
295 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
296 the right shift, subtraction of right-shifted elements, functors,
297 or any operations on iterators throw.
298
299 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
300 \warning Invalid arguments cause undefined behaviour.
301 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
302 enabling faster generic-programming.
303
304 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
305 \remark * N is @c last - @c first,
306 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
307 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
308 */
309 template <class Range, class Compare, class Unsigned_char_type>
310 inline void reverse_string_sort(Range& range, Compare comp, Unsigned_char_type unused)
311 {
312 reverse_string_sort(boost::begin(range), boost::end(range), comp, unused);
313 }
314
315 /*! \brief String sort algorithm using random access iterators, wraps using default of @c unsigned char.
316
317 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
318
319 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
320 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
321 \par
322 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
323 so @c string_sort is asymptotically faster
324 than pure comparison-based algorithms.\n\n
325 Some performance plots of runtime vs. n and log(range) are provided:\n
326 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
327 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
328
329 \param[in] first Iterator pointer to first element.
330 \param[in] last Iterator pointing to one beyond the end of data.
331 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
332
333 \pre [@c first, @c last) is a valid range.
334 \pre @c RandomAccessIter @c value_type is mutable.
335 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
336 \pre @c RandomAccessIter @c value_type supports the @c operator>>,
337 which returns an integer-type right-shifted a specified number of bits.
338 \post The elements in the range [@c first, @c last) are sorted in ascending order.
339
340 \return @c void.
341
342 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
343 the right shift, subtraction of right-shifted elements, functors,
344 or any operations on iterators throw.
345
346 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
347 \warning Invalid arguments cause undefined behaviour.
348 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
349 enabling faster generic-programming.
350
351 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
352 \remark * N is @c last - @c first,
353 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
354 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
355 */
356 template <class RandomAccessIter, class Compare>
357 inline void reverse_string_sort(RandomAccessIter first,
358 RandomAccessIter last, Compare comp)
359 {
360 unsigned char unused = '\0';
361 reverse_string_sort(first, last, comp, unused);
362 }
363
364 /*! \brief String sort algorithm using range, wraps using default of @c unsigned char.
365
366 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
367
368 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
369 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
370 \par
371 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
372 so @c string_sort is asymptotically faster
373 than pure comparison-based algorithms. \n\n
374 Some performance plots of runtime vs. n and log(range) are provided:\n
375 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
376 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
377
378 \param[in] range Range [first, last) for sorting.
379 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
380
381 \pre [@c first, @c last) is a valid range.
382 \post The elements in the range [@c first, @c last) are sorted in ascending order.
383
384 \return @c void.
385
386 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
387 the right shift, subtraction of right-shifted elements, functors,
388 or any operations on iterators throw.
389
390 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
391 \warning Invalid arguments cause undefined behaviour.
392 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
393 enabling faster generic-programming.
394
395 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
396 \remark * N is @c last - @c first,
397 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
398 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
399 */
400 template <class Range, class Compare>
401 inline void reverse_string_sort(Range& range, Compare comp)
402 {
403 reverse_string_sort(boost::begin(range), boost::end(range), comp);
404 }
405
406 /*! \brief String sort algorithm using random access iterators, wraps using default of @c unsigned char.
407
408 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
409
410 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
411 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
412 \par
413 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
414 so @c string_sort is asymptotically faster
415 than pure comparison-based algorithms. \n\n
416 Some performance plots of runtime vs. n and log(range) are provided:\n
417 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
418 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
419
420 \param[in] first Iterator pointer to first element.
421 \param[in] last Iterator pointing to one beyond the end of data.
422 \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
423 \param[in] length Functor to get the length of the string in characters.
424
425 \pre [@c first, @c last) is a valid range.
426 \pre @c RandomAccessIter @c value_type is mutable.
427 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
428 \pre @c RandomAccessIter @c value_type supports the @c operator>>,
429 which returns an integer-type right-shifted a specified number of bits.
430 \post The elements in the range [@c first, @c last) are sorted in ascending order.
431
432 \return @c void.
433
434 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
435 the right shift, subtraction of right-shifted elements, functors,
436 or any operations on iterators throw.
437
438 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
439 \warning Invalid arguments cause undefined behaviour.
440 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
441 enabling faster generic-programming.
442
443 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
444 \remark * N is @c last - @c first,
445 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
446 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
447
448 */
449 template <class RandomAccessIter, class Get_char, class Get_length>
450 inline void string_sort(RandomAccessIter first, RandomAccessIter last,
451 Get_char get_character, Get_length length)
452 {
453 //Don't sort if it's too small to optimize
454 if (last - first < detail::min_sort_size)
455 boost::sort::pdqsort(first, last);
456 else {
457 //skipping past empties, which allows us to get the character type
458 //.empty() is not used so as not to require a user declaration of it
459 while (!length(*first)) {
460 if (++first == last)
461 return;
462 }
463 detail::string_sort(first, last, get_character, length, get_character((*first), 0));
464 }
465 }
466
467 /*! \brief String sort algorithm using range, wraps using default of @c unsigned char.
468
469 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
470
471 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
472 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
473 \par
474 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
475 so @c string_sort is asymptotically faster
476 than pure comparison-based algorithms. \n\n
477 Some performance plots of runtime vs. n and log(range) are provided:\n
478 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
479 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
480
481 \param[in] range Range [first, last) for sorting.
482 \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
483 \param[in] length Functor to get the length of the string in characters.
484
485 \pre [@c first, @c last) is a valid range.
486 \post The elements in the range [@c first, @c last) are sorted in ascending order.
487
488 \return @c void.
489
490 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
491 the right shift, subtraction of right-shifted elements, functors,
492 or any operations on iterators throw.
493
494 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
495 \warning Invalid arguments cause undefined behaviour.
496 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
497 enabling faster generic-programming.
498
499 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
500 \remark * N is @c last - @c first,
501 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
502 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
503
504 */
505 template <class Range, class Get_char, class Get_length>
506 inline void string_sort(Range& range, Get_char get_character, Get_length length)
507 {
508 string_sort(boost::begin(range), boost::end(range), get_character, length);
509 }
510
511
512 /*! \brief String sort algorithm using random access iterators, wraps using default of @c unsigned char.
513
514 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
515
516 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
517 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
518 \par
519 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
520 so @c string_sort is asymptotically faster
521 than pure comparison-based algorithms. \n\n
522 Some performance plots of runtime vs. n and log(range) are provided:\n
523 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
524 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
525
526
527 \param[in] first Iterator pointer to first element.
528 \param[in] last Iterator pointing to one beyond the end of data.
529 \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
530 \param[in] length Functor to get the length of the string in characters.
531 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
532
533
534 \pre [@c first, @c last) is a valid range.
535 \pre @c RandomAccessIter @c value_type is mutable.
536 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
537 \post The elements in the range [@c first, @c last) are sorted in ascending order.
538
539 \return @c void.
540
541 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
542 the right shift, subtraction of right-shifted elements, functors,
543 or any operations on iterators throw.
544
545 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
546 \warning Invalid arguments cause undefined behaviour.
547 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
548 enabling faster generic-programming.
549
550 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
551 \remark * N is @c last - @c first,
552 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
553 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
554
555 */
556 template <class RandomAccessIter, class Get_char, class Get_length,
557 class Compare>
558 inline void string_sort(RandomAccessIter first, RandomAccessIter last,
559 Get_char get_character, Get_length length, Compare comp)
560 {
561 //Don't sort if it's too small to optimize
562 if (last - first < detail::min_sort_size)
563 boost::sort::pdqsort(first, last, comp);
564 else {
565 //skipping past empties, which allows us to get the character type
566 //.empty() is not used so as not to require a user declaration of it
567 while (!length(*first)) {
568 if (++first == last)
569 return;
570 }
571 detail::string_sort(first, last, get_character, length, comp,
572 get_character((*first), 0));
573 }
574 }
575
576 /*! \brief String sort algorithm using range, wraps using default of @c unsigned char.
577
578 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
579
580 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
581 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
582 \par
583 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
584 so @c string_sort is asymptotically faster
585 than pure comparison-based algorithms. \n\n
586 Some performance plots of runtime vs. n and log(range) are provided:\n
587 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
588 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
589
590
591 \param[in] range Range [first, last) for sorting.
592 \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
593 \param[in] length Functor to get the length of the string in characters.
594 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
595
596
597 \pre [@c first, @c last) is a valid range.
598 \post The elements in the range [@c first, @c last) are sorted in ascending order.
599
600 \return @c void.
601
602 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
603 the right shift, subtraction of right-shifted elements, functors,
604 or any operations on iterators throw.
605
606 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
607 \warning Invalid arguments cause undefined behaviour.
608 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
609 enabling faster generic-programming.
610
611 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
612 \remark * N is @c last - @c first,
613 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
614 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
615
616 */
617 template <class Range, class Get_char, class Get_length, class Compare>
618 inline void string_sort(Range& range,
619 Get_char get_character, Get_length length, Compare comp)
620 {
621 string_sort(boost::begin(range), boost::end(range), get_character, length, comp);
622 }
623
624 /*! \brief Reverse String sort algorithm using random access iterators.
625
626 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
627
628 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
629 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
630 \par
631 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
632 so @c string_sort is asymptotically faster
633 than pure comparison-based algorithms. \n\n
634 Some performance plots of runtime vs. n and log(range) are provided:\n
635 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
636 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
637
638
639 \param[in] first Iterator pointer to first element.
640 \param[in] last Iterator pointing to one beyond the end of data.
641 \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
642 \param[in] length Functor to get the length of the string in characters.
643 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
644
645
646 \pre [@c first, @c last) is a valid range.
647 \pre @c RandomAccessIter @c value_type is mutable.
648 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
649 \post The elements in the range [@c first, @c last) are sorted in ascending order.
650
651 \return @c void.
652
653 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
654 the right shift, subtraction of right-shifted elements, functors,
655 or any operations on iterators throw.
656
657 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
658 \warning Invalid arguments cause undefined behaviour.
659 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
660 enabling faster generic-programming.
661
662 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
663 \remark * N is @c last - @c first,
664 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
665 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
666
667 */
668 template <class RandomAccessIter, class Get_char, class Get_length,
669 class Compare>
670 inline void reverse_string_sort(RandomAccessIter first,
671 RandomAccessIter last, Get_char get_character, Get_length length, Compare comp)
672 {
673 //Don't sort if it's too small to optimize
674 if (last - first < detail::min_sort_size)
675 boost::sort::pdqsort(first, last, comp);
676 else {
677 //skipping past empties, which allows us to get the character type
678 //.empty() is not used so as not to require a user declaration of it
679 while (!length(*(--last))) {
680 //If there is just one non-empty at the beginning, this is sorted
681 if (first == last)
682 return;
683 }
684 //making last just after the end of the non-empty part of the array
685 detail::reverse_string_sort(first, last + 1, get_character, length, comp,
686 get_character((*last), 0));
687 }
688 }
689
690 /*! \brief Reverse String sort algorithm using range.
691
692 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
693
694 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
695 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
696 \par
697 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
698 so @c string_sort is asymptotically faster
699 than pure comparison-based algorithms. \n\n
700 Some performance plots of runtime vs. n and log(range) are provided:\n
701 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
702 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
703
704
705 \param[in] range Range [first, last) for sorting.
706 \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
707 \param[in] length Functor to get the length of the string in characters.
708 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
709
710
711 \pre [@c first, @c last) is a valid range.
712 \post The elements in the range [@c first, @c last) are sorted in ascending order.
713
714 \return @c void.
715
716 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
717 the right shift, subtraction of right-shifted elements, functors,
718 or any operations on iterators throw.
719
720 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
721 \warning Invalid arguments cause undefined behaviour.
722 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
723 enabling faster generic-programming.
724
725 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
726 \remark * N is @c last - @c first,
727 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
728 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
729
730 */
731 template <class Range, class Get_char, class Get_length,
732 class Compare>
733 inline void reverse_string_sort(Range& range, Get_char get_character, Get_length length, Compare comp)
734 {
735 reverse_string_sort(boost::begin(range), boost::end(range), get_character, length, comp);
736 }
737 }
738 }
739 }
740
741 #endif