]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/sort/include/boost/sort/spreadsort/string_sort.hpp
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / boost / libs / sort / include / boost / sort / spreadsort / string_sort.hpp
1 //Templated hybrid string_sort
2
3 // Copyright Steven J. Ross 2001 - 2009.
4 // Distributed under the Boost Software License, Version 1.0.
5 // (See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7
8 // See http://www.boost.org/libs/sort/ for library home page.
9
10 /*
11 Some improvements suggested by:
12 Phil Endecott and Frank Gennari
13 */
14
15 #ifndef BOOST_STRING_SORT_HPP
16 #define BOOST_STRING_SORT_HPP
17 #include <algorithm>
18 #include <vector>
19 #include <cstring>
20 #include <limits>
21 #include <boost/static_assert.hpp>
22 #include <boost/sort/spreadsort/detail/constants.hpp>
23 #include <boost/sort/spreadsort/detail/string_sort.hpp>
24
25 namespace boost {
26 namespace sort {
27 namespace spreadsort {
28
29 /*! \brief String sort algorithm using random access iterators, allowing character-type overloads.\n
30 (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
31
32 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
33 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
34 \par
35 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
36 so @c integer_sort is asymptotically faster
37 than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
38 so its worst-case with default settings for 32-bit integers is
39 <em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
40 Some performance plots of runtime vs. n and log(range) are provided:\n
41 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
42 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
43
44 \tparam RandomAccessIter <a href="http://www.cplusplus.com/reference/iterator/RandomAccessIterator/">Random access iterator</a>
45 \tparam Unsigned_char_type Unsigned character type used for string.
46 \param[in] first Iterator pointer to first element.
47 \param[in] last Iterator pointing to one beyond the end of data.
48 \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused.
49
50 \pre [@c first, @c last) is a valid range.
51 \pre @c RandomAccessIter @c value_type is mutable.
52 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
53 \pre @c RandomAccessIter @c value_type supports the @c operator>>,
54 which returns an integer-type right-shifted a specified number of bits.
55 \post The elements in the range [@c first, @c last) are sorted in ascending order.
56
57 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
58 the right shift, subtraction of right-shifted elements, functors,
59 or any operations on iterators throw.
60
61 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
62 \warning Invalid arguments cause undefined behaviour.
63 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
64 enabling faster generic-programming.
65
66 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
67 \remark * N is @c last - @c first,
68 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
69 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
70
71 */
72
73 template <class RandomAccessIter, class Unsigned_char_type>
74 inline void string_sort(RandomAccessIter first, RandomAccessIter last,
75 Unsigned_char_type unused)
76 {
77 //Don't sort if it's too small to optimize
78 if (last - first < detail::min_sort_size)
79 std::sort(first, last);
80 else
81 detail::string_sort(first, last, unused);
82 }
83
84
85 /*! \brief String sort algorithm using random access iterators, wraps using default of unsigned char.
86 (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
87
88 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
89 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
90 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
91 so @c integer_sort is asymptotically faster
92 than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
93 so its worst-case with default settings for 32-bit integers is
94 <em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
95 Some performance plots of runtime vs. n and log(range) are provided:\n
96 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>
97 \n
98 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
99
100 \param[in] first Iterator pointer to first element.
101 \param[in] last Iterator pointing to one beyond the end of data.
102
103 \pre [@c first, @c last) is a valid range.
104 \pre @c RandomAccessIter @c value_type is mutable.
105 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
106 \pre @c RandomAccessIter @c value_type supports the @c operator>>,
107 which returns an integer-type right-shifted a specified number of bits.
108 \post The elements in the range [@c first, @c last) are sorted in ascending order.
109
110 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
111 the right shift, subtraction of right-shifted elements, functors,
112 or any operations on iterators throw.
113
114 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
115 \warning Invalid arguments cause undefined behaviour.
116 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
117 enabling faster generic-programming.
118
119 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
120 \remark * N is @c last - @c first,
121 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
122 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
123
124 */
125 template <class RandomAccessIter>
126 inline void string_sort(RandomAccessIter first, RandomAccessIter last)
127 {
128 unsigned char unused = '\0';
129 string_sort(first, last, unused);
130 }
131
132
133 /*! \brief String sort algorithm using random access iterators, allowing character-type overloads.
134
135 (All variants fall back to @c std::sort if the data size is too small, < detail::min_sort_size).
136
137 \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
138 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
139 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
140 so @c integer_sort is asymptotically faster
141 than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
142 so its worst-case with default settings for 32-bit integers is
143 <em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
144 Some performance plots of runtime vs. n and log(range) are provided:\n
145 <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
146 \n
147 <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
148
149
150 \tparam RandomAccessIter <a href="http://www.cplusplus.com/reference/iterator/RandomAccessIterator/">Random access iterator</a>
151 \tparam Comp Functor type to use for comparison.
152 \tparam Unsigned_char_type Unsigned character type used for string.
153
154 \param[in] first Iterator pointer to first element.
155 \param[in] last Iterator pointing to one beyond the end of data.
156 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
157 \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused.
158
159 \pre [@c first, @c last) is a valid range.
160 \pre @c RandomAccessIter @c value_type is mutable.
161 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
162 \pre @c RandomAccessIter @c value_type supports the @c operator>>,
163 which returns an integer-type right-shifted a specified number of bits.
164 \post The elements in the range [@c first, @c last) are sorted in ascending order.
165
166 \return @c void.
167
168 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
169 the right shift, subtraction of right-shifted elements, functors,
170 or any operations on iterators throw.
171
172 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
173 \warning Invalid arguments cause undefined behaviour.
174 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
175 enabling faster generic-programming.
176
177 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
178 \remark * N is @c last - @c first,
179 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
180 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
181 */
182 template <class RandomAccessIter, class Compare, class Unsigned_char_type>
183 inline void reverse_string_sort(RandomAccessIter first,
184 RandomAccessIter last, Compare comp, Unsigned_char_type unused)
185 {
186 //Don't sort if it's too small to optimize.
187 if (last - first < detail::min_sort_size)
188 std::sort(first, last, comp);
189 else
190 detail::reverse_string_sort(first, last, unused);
191 }
192
193
194 /*! \brief String sort algorithm using random access iterators, wraps using default of @c unsigned char.
195
196 (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
197
198 \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
199 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
200 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
201 so @c integer_sort is asymptotically faster
202 than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
203 so its worst-case with default settings for 32-bit integers is
204 <em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
205 Some performance plots of runtime vs. n and log(range) are provided:\n
206 <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
207 \n
208 <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
209
210 \param[in] first Iterator pointer to first element.
211 \param[in] last Iterator pointing to one beyond the end of data.
212 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
213
214 \pre [@c first, @c last) is a valid range.
215 \pre @c RandomAccessIter @c value_type is mutable.
216 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
217 \pre @c RandomAccessIter @c value_type supports the @c operator>>,
218 which returns an integer-type right-shifted a specified number of bits.
219 \post The elements in the range [@c first, @c last) are sorted in ascending order.
220
221 \return @c void.
222
223 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
224 the right shift, subtraction of right-shifted elements, functors,
225 or any operations on iterators throw.
226
227 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
228 \warning Invalid arguments cause undefined behaviour.
229 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
230 enabling faster generic-programming.
231
232 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
233 \remark * N is @c last - @c first,
234 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
235 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
236 */
237 template <class RandomAccessIter, class Compare>
238 inline void reverse_string_sort(RandomAccessIter first,
239 RandomAccessIter last, Compare comp)
240 {
241 unsigned char unused = '\0';
242 reverse_string_sort(first, last, comp, unused);
243 }
244
245
246 /*! \brief String sort algorithm using random access iterators, wraps using default of @c unsigned char.
247
248 (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
249
250 \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
251 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
252 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
253 so @c integer_sort is asymptotically faster
254 than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
255 so its worst-case with default settings for 32-bit integers is
256 <em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
257 Some performance plots of runtime vs. n and log(range) are provided:\n
258 <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
259 \n
260 <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
261
262 \param[in] first Iterator pointer to first element.
263 \param[in] last Iterator pointing to one beyond the end of data.
264 \param[in] getchar Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
265 \param[in] length Functor to get the length of the string in characters.
266
267 \pre [@c first, @c last) is a valid range.
268 \pre @c RandomAccessIter @c value_type is mutable.
269 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
270 \pre @c RandomAccessIter @c value_type supports the @c operator>>,
271 which returns an integer-type right-shifted a specified number of bits.
272 \post The elements in the range [@c first, @c last) are sorted in ascending order.
273
274 \return @c void.
275
276 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
277 the right shift, subtraction of right-shifted elements, functors,
278 or any operations on iterators throw.
279
280 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
281 \warning Invalid arguments cause undefined behaviour.
282 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
283 enabling faster generic-programming.
284
285 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
286 \remark * N is @c last - @c first,
287 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
288 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
289
290 */
291 template <class RandomAccessIter, class Get_char, class Get_length>
292 inline void string_sort(RandomAccessIter first, RandomAccessIter last,
293 Get_char getchar, Get_length length)
294 {
295 //Don't sort if it's too small to optimize
296 if (last - first < detail::min_sort_size)
297 std::sort(first, last);
298 else {
299 //skipping past empties, which allows us to get the character type
300 //.empty() is not used so as not to require a user declaration of it
301 while (!length(*first)) {
302 if (++first == last)
303 return;
304 }
305 detail::string_sort(first, last, getchar, length, getchar((*first), 0));
306 }
307 }
308
309
310
311 /*! \brief String sort algorithm using random access iterators, wraps using default of @c unsigned char.
312
313 (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
314
315 \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
316 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
317 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
318 so @c integer_sort is asymptotically faster
319 than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
320 so its worst-case with default settings for 32-bit integers is
321 <em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
322 Some performance plots of runtime vs. n and log(range) are provided:\n
323 <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
324 \n
325 <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
326
327
328 \param[in] first Iterator pointer to first element.
329 \param[in] last Iterator pointing to one beyond the end of data.
330 \param[in] getchar Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
331 \param[in] length Functor to get the length of the string in characters.
332 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
333
334
335 \pre [@c first, @c last) is a valid range.
336 \pre @c RandomAccessIter @c value_type is mutable.
337 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
338 \post The elements in the range [@c first, @c last) are sorted in ascending order.
339
340 \return @c void.
341
342 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
343 the right shift, subtraction of right-shifted elements, functors,
344 or any operations on iterators throw.
345
346 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
347 \warning Invalid arguments cause undefined behaviour.
348 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
349 enabling faster generic-programming.
350
351 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
352 \remark * N is @c last - @c first,
353 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
354 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
355
356 */
357 template <class RandomAccessIter, class Get_char, class Get_length,
358 class Compare>
359 inline void string_sort(RandomAccessIter first, RandomAccessIter last,
360 Get_char getchar, Get_length length, Compare comp)
361 {
362 //Don't sort if it's too small to optimize
363 if (last - first < detail::min_sort_size)
364 std::sort(first, last, comp);
365 else {
366 //skipping past empties, which allows us to get the character type
367 //.empty() is not used so as not to require a user declaration of it
368 while (!length(*first)) {
369 if (++first == last)
370 return;
371 }
372 detail::string_sort(first, last, getchar, length, comp,
373 getchar((*first), 0));
374 }
375 }
376
377
378 /*! \brief Reverse String sort algorithm using random access iterators.
379
380 (All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
381
382 \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
383 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
384 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
385 so @c integer_sort is asymptotically faster
386 than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
387 so its worst-case with default settings for 32-bit integers is
388 <em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
389 Some performance plots of runtime vs. n and log(range) are provided:\n
390 <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
391 \n
392 <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
393
394
395 \param[in] first Iterator pointer to first element.
396 \param[in] last Iterator pointing to one beyond the end of data.
397 \param[in] getchar Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
398 \param[in] length Functor to get the length of the string in characters.
399 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
400
401
402 \pre [@c first, @c last) is a valid range.
403 \pre @c RandomAccessIter @c value_type is mutable.
404 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
405 \post The elements in the range [@c first, @c last) are sorted in ascending order.
406
407 \return @c void.
408
409 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
410 the right shift, subtraction of right-shifted elements, functors,
411 or any operations on iterators throw.
412
413 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
414 \warning Invalid arguments cause undefined behaviour.
415 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
416 enabling faster generic-programming.
417
418 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
419 \remark * N is @c last - @c first,
420 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
421 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
422
423 */
424 template <class RandomAccessIter, class Get_char, class Get_length,
425 class Compare>
426 inline void reverse_string_sort(RandomAccessIter first,
427 RandomAccessIter last, Get_char getchar, Get_length length, Compare comp)
428 {
429 //Don't sort if it's too small to optimize
430 if (last - first < detail::min_sort_size)
431 std::sort(first, last, comp);
432 else {
433 //skipping past empties, which allows us to get the character type
434 //.empty() is not used so as not to require a user declaration of it
435 while (!length(*(--last))) {
436 //If there is just one non-empty at the beginning, this is sorted
437 if (first == last)
438 return;
439 }
440 //making last just after the end of the non-empty part of the array
441 detail::reverse_string_sort(first, last + 1, getchar, length, comp,
442 getchar((*last), 0));
443 }
444 }
445 }
446 }
447 }
448
449 #endif