ceph/src/boost/boost/move/algo/adaptive_sort.hpp

   1 //////////////////////////////////////////////////////////////////////////////
   2 //
   3 // (C) Copyright Ion Gaztanaga 2015-2016.
   4 // Distributed under the Boost Software License, Version 1.0.
   5 // (See accompanying file LICENSE_1_0.txt or copy at
   6 // http://www.boost.org/LICENSE_1_0.txt)
   7 //
   8 // See http://www.boost.org/libs/move for documentation.
   9 //
  10 //////////////////////////////////////////////////////////////////////////////
  11
  12 #ifndef BOOST_MOVE_ADAPTIVE_SORT_HPP
  13 #define BOOST_MOVE_ADAPTIVE_SORT_HPP
  14
  15 #include <boost/move/detail/config_begin.hpp>
  16 #include <boost/move/algo/detail/adaptive_sort_merge.hpp>
  17
  18 namespace boost {
  19 namespace movelib {
  20
  21 ///@cond
  22 namespace detail_adaptive {
  23
  24 template<class RandIt>
  25 void move_data_backward( RandIt cur_pos
  26               , typename iterator_traits<RandIt>::size_type const l_data
  27               , RandIt new_pos
  28               , bool const xbuf_used)
  29 {
  30    //Move buffer to the total combination right
  31    if(xbuf_used){
  32       boost::move_backward(cur_pos, cur_pos+l_data, new_pos+l_data);
  33    }
  34    else{
  35       boost::adl_move_swap_ranges_backward(cur_pos, cur_pos+l_data, new_pos+l_data);
  36       //Rotate does less moves but it seems slower due to cache issues
  37       //rotate_gcd(first-l_block, first+len-l_block, first+len);
  38    }
  39 }
  40
  41 template<class RandIt>
  42 void move_data_forward( RandIt cur_pos
  43               , typename iterator_traits<RandIt>::size_type const l_data
  44               , RandIt new_pos
  45               , bool const xbuf_used)
  46 {
  47    //Move buffer to the total combination right
  48    if(xbuf_used){
  49       boost::move(cur_pos, cur_pos+l_data, new_pos);
  50    }
  51    else{
  52       boost::adl_move_swap_ranges(cur_pos, cur_pos+l_data, new_pos);
  53       //Rotate does less moves but it seems slower due to cache issues
  54       //rotate_gcd(first-l_block, first+len-l_block, first+len);
  55    }
  56 }
  57
  58 // build blocks of length 2*l_build_buf. l_build_buf is power of two
  59 // input: [0, l_build_buf) elements are buffer, rest unsorted elements
  60 // output: [0, l_build_buf) elements are buffer, blocks 2*l_build_buf and last subblock sorted
  61 //
  62 // First elements are merged from right to left until elements start
  63 // at first. All old elements [first, first + l_build_buf) are placed at the end
  64 // [first+len-l_build_buf, first+len). To achieve this:
  65 // - If we have external memory to merge, we save elements from the buffer
  66 //   so that a non-swapping merge is used. Buffer elements are restored
  67 //   at the end of the buffer from the external memory.
  68 //
  69 // - When the external memory is not available or it is insufficient
  70 //   for a merge operation, left swap merging is used.
  71 //
  72 // Once elements are merged left to right in blocks of l_build_buf, then a single left
  73 // to right merge step is performed to achieve merged blocks of size 2K.
  74 // If external memory is available, usual merge is used, swap merging otherwise.
  75 //
  76 // As a last step, if auxiliary memory is available in-place merge is performed.
  77 // until all is merged or auxiliary memory is not large enough.
  78 template<class RandIt, class Compare, class XBuf>
  79 typename iterator_traits<RandIt>::size_type
  80    adaptive_sort_build_blocks
  81       ( RandIt const first
  82       , typename iterator_traits<RandIt>::size_type const len
  83       , typename iterator_traits<RandIt>::size_type const l_base
  84       , typename iterator_traits<RandIt>::size_type const l_build_buf
  85       , XBuf & xbuf
  86       , Compare comp)
  87 {
  88    typedef typename iterator_traits<RandIt>::size_type  size_type;
  89    BOOST_ASSERT(l_build_buf <= len);
  90    BOOST_ASSERT(0 == ((l_build_buf / l_base)&(l_build_buf/l_base-1)));
  91
  92    //Place the start pointer after the buffer
  93    RandIt first_block = first + l_build_buf;
  94    size_type const elements_in_blocks = len - l_build_buf;
  95
  96    //////////////////////////////////
  97    // Start of merge to left step
  98    //////////////////////////////////
  99    size_type l_merged = 0u;
 100
 101    BOOST_ASSERT(l_build_buf);
 102    //If there is no enough buffer for the insertion sort step, just avoid the external buffer
 103    size_type kbuf = min_value<size_type>(l_build_buf, size_type(xbuf.capacity()));
 104    kbuf = kbuf < l_base ? 0 : kbuf;
 105
 106    if(kbuf){
 107       //Backup internal buffer values in external buffer so they can be overwritten
 108       xbuf.move_assign(first+l_build_buf-kbuf, kbuf);
 109       l_merged = op_insertion_sort_step_left(first_block, elements_in_blocks, l_base, comp, move_op());
 110
 111       //Now combine them using the buffer. Elements from buffer can be
 112       //overwritten since they've been saved to xbuf
 113       l_merged = op_merge_left_step_multiple
 114          ( first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, kbuf - l_merged, comp, move_op());
 115
 116       //Restore internal buffer from external buffer unless kbuf was l_build_buf,
 117       //in that case restoration will happen later
 118       if(kbuf != l_build_buf){
 119          boost::move(xbuf.data()+kbuf-l_merged, xbuf.data() + kbuf, first_block-l_merged+elements_in_blocks);
 120       }
 121    }
 122    else{
 123       l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp);
 124       rotate_gcd(first_block - l_merged, first_block, first_block+elements_in_blocks);
 125    }
 126
 127    //Now combine elements using the buffer. Elements from buffer can't be
 128    //overwritten since xbuf was not big enough, so merge swapping elements.
 129    l_merged = op_merge_left_step_multiple
 130       (first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, l_build_buf - l_merged, comp, swap_op());
 131
 132    BOOST_ASSERT(l_merged == l_build_buf);
 133
 134    //////////////////////////////////
 135    // Start of merge to right step
 136    //////////////////////////////////
 137
 138    //If kbuf is l_build_buf then we can merge right without swapping
 139    //Saved data is still in xbuf
 140    if(kbuf && kbuf == l_build_buf){
 141       op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, move_op());
 142       //Restore internal buffer from external buffer if kbuf was l_build_buf.
 143       //as this operation was previously delayed.
 144       boost::move(xbuf.data(), xbuf.data() + kbuf, first);
 145    }
 146    else{
 147       op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, swap_op());
 148    }
 149    xbuf.clear();
 150    //2*l_build_buf or total already merged
 151    return min_value<size_type>(elements_in_blocks, 2*l_build_buf);
 152 }
 153
 154 template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class XBuf>
 155 void adaptive_sort_combine_blocks
 156    ( RandItKeys const keys
 157    , KeyCompare key_comp
 158    , RandIt const first
 159    , typename iterator_traits<RandIt>::size_type const len
 160    , typename iterator_traits<RandIt>::size_type const l_prev_merged
 161    , typename iterator_traits<RandIt>::size_type const l_block
 162    , bool const use_buf
 163    , bool const xbuf_used
 164    , XBuf & xbuf
 165    , Compare comp
 166    , bool merge_left)
 167 {
 168    (void)xbuf;
 169    typedef typename iterator_traits<RandIt>::size_type   size_type;
 170
 171    size_type const l_reg_combined   = 2*l_prev_merged;
 172    size_type l_irreg_combined = 0;
 173    size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined);
 174    size_type const n_reg_combined = len/l_reg_combined;
 175    RandIt combined_first = first;
 176
 177    (void)l_total_combined;
 178    BOOST_ASSERT(l_total_combined <= len);
 179
 180    size_type const max_i = n_reg_combined + (l_irreg_combined != 0);
 181
 182    if(merge_left || !use_buf) {
 183       for( size_type combined_i = 0; combined_i != max_i; ) {
 184          //Now merge blocks
 185          bool const is_last = combined_i==n_reg_combined;
 186          size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
 187
 188          range_xbuf<RandIt, size_type, move_op> rbuf( (use_buf && xbuf_used) ? (combined_first-l_block) : combined_first, combined_first);
 189          size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
 190          combine_params( keys, key_comp, l_cur_combined
 191                         , l_prev_merged, l_block, rbuf
 192                         , n_block_a, n_block_b, l_irreg1, l_irreg2);   //Outputs
 193          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A combpar:            ", len + l_block);
 194          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
 195             BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
 196          if(!use_buf){
 197             merge_blocks_bufferless
 198                (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp);
 199          }
 200          else{
 201             merge_blocks_left
 202                (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
 203          }
 204          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   After merge_blocks_L: ", len + l_block);
 205          ++combined_i;
 206          if(combined_i != max_i)
 207             combined_first += l_reg_combined;
 208       }
 209    }
 210    else{
 211       combined_first += l_reg_combined*(max_i-1);
 212       for( size_type combined_i = max_i; combined_i; ) {
 213          --combined_i;
 214          bool const is_last = combined_i==n_reg_combined;
 215          size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
 216
 217          RandIt const combined_last(combined_first+l_cur_combined);
 218          range_xbuf<RandIt, size_type, move_op> rbuf(combined_last, xbuf_used ? (combined_last+l_block) : combined_last);
 219          size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
 220          combine_params( keys, key_comp, l_cur_combined
 221                         , l_prev_merged, l_block, rbuf
 222                         , n_block_a, n_block_b, l_irreg1, l_irreg2);  //Outputs
 223          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A combpar:            ", len + l_block);
 224          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
 225          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
 226          merge_blocks_right
 227             (keys, key_comp, combined_first, l_block, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
 228          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   After merge_blocks_R: ", len + l_block);
 229          if(combined_i)
 230             combined_first -= l_reg_combined;
 231       }
 232    }
 233 }
 234
 235 //Returns true if buffer is placed in
 236 //[buffer+len-l_intbuf, buffer+len). Otherwise, buffer is
 237 //[buffer,buffer+l_intbuf)
 238 template<class RandIt, class Compare, class XBuf>
 239 bool adaptive_sort_combine_all_blocks
 240    ( RandIt keys
 241    , typename iterator_traits<RandIt>::size_type &n_keys
 242    , RandIt const buffer
 243    , typename iterator_traits<RandIt>::size_type const l_buf_plus_data
 244    , typename iterator_traits<RandIt>::size_type l_merged
 245    , typename iterator_traits<RandIt>::size_type &l_intbuf
 246    , XBuf & xbuf
 247    , Compare comp)
 248 {
 249    typedef typename iterator_traits<RandIt>::size_type  size_type;
 250    RandIt const first = buffer + l_intbuf;
 251    size_type const l_data = l_buf_plus_data - l_intbuf;
 252    size_type const l_unique = l_intbuf+n_keys;
 253    //Backup data to external buffer once if possible
 254    bool const common_xbuf = l_data > l_merged && l_intbuf && l_intbuf <= xbuf.capacity();
 255    if(common_xbuf){
 256       xbuf.move_assign(buffer, l_intbuf);
 257    }
 258
 259    bool prev_merge_left = true;
 260    size_type l_prev_total_combined = l_merged, l_prev_block = 0;
 261    bool prev_use_internal_buf = true;
 262
 263    for( size_type n = 0; l_data > l_merged
 264       ; l_merged*=2
 265       , ++n){
 266       //If l_intbuf is non-zero, use that internal buffer.
 267       //    Implies l_block == l_intbuf && use_internal_buf == true
 268       //If l_intbuf is zero, see if half keys can be reused as a reduced emergency buffer,
 269       //    Implies l_block == n_keys/2 && use_internal_buf == true
 270       //Otherwise, just give up and and use all keys to merge using rotations (use_internal_buf = false)
 271       bool use_internal_buf = false;
 272       size_type const l_block = lblock_for_combine(l_intbuf, n_keys, size_type(2*l_merged), use_internal_buf);
 273       BOOST_ASSERT(!l_intbuf || (l_block == l_intbuf));
 274       BOOST_ASSERT(n == 0 || (!use_internal_buf || prev_use_internal_buf) );
 275       BOOST_ASSERT(n == 0 || (!use_internal_buf || l_prev_block == l_block) );
 276
 277       bool const is_merge_left = (n&1) == 0;
 278       size_type const l_total_combined = calculate_total_combined(l_data, l_merged);
 279       if(n && prev_use_internal_buf && prev_merge_left){
 280          if(is_merge_left || !use_internal_buf){
 281             move_data_backward(first-l_prev_block, l_prev_total_combined, first, common_xbuf);
 282          }
 283          else{
 284             //Put the buffer just after l_total_combined
 285             RandIt const buf_end = first+l_prev_total_combined;
 286             RandIt const buf_beg = buf_end-l_block;
 287             if(l_prev_total_combined > l_total_combined){
 288                size_type const l_diff = l_prev_total_combined - l_total_combined;
 289                move_data_backward(buf_beg-l_diff, l_diff, buf_end-l_diff, common_xbuf);
 290             }
 291             else if(l_prev_total_combined < l_total_combined){
 292                size_type const l_diff = l_total_combined - l_prev_total_combined;
 293                move_data_forward(buf_end, l_diff, buf_beg, common_xbuf);
 294             }
 295          }
 296          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   After move_data     : ", l_data + l_intbuf);
 297       }
 298
 299       //Combine to form l_merged*2 segments
 300       if(n_keys){
 301          size_type upper_n_keys_this_iter = 2*l_merged/l_block;
 302          if(upper_n_keys_this_iter > 256){
 303             adaptive_sort_combine_blocks
 304                ( keys, comp, !use_internal_buf || is_merge_left ? first : first-l_block
 305                , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
 306          }
 307          else{
 308             unsigned char uint_keys[256];
 309             adaptive_sort_combine_blocks
 310                ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block
 311                , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
 312             }
 313       }
 314       else{
 315          size_type *const uint_keys = xbuf.template aligned_trailing<size_type>();
 316          adaptive_sort_combine_blocks
 317             ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block
 318             , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
 319       }
 320
 321       BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(is_merge_left ? "   After comb blocks L:  " : "   After comb blocks R:  ", l_data + l_intbuf);
 322       prev_merge_left = is_merge_left;
 323       l_prev_total_combined = l_total_combined;
 324       l_prev_block = l_block;
 325       prev_use_internal_buf = use_internal_buf;
 326    }
 327    BOOST_ASSERT(l_prev_total_combined == l_data);
 328    bool const buffer_right = prev_use_internal_buf && prev_merge_left;
 329
 330    l_intbuf = prev_use_internal_buf ? l_prev_block : 0u;
 331    n_keys = l_unique - l_intbuf;
 332    //Restore data from to external common buffer if used
 333    if(common_xbuf){
 334       if(buffer_right){
 335          boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer+l_data);
 336       }
 337       else{
 338          boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer);
 339       }
 340    }
 341    return buffer_right;
 342 }
 343
 344
 345 template<class RandIt, class Compare, class XBuf>
 346 void adaptive_sort_final_merge( bool buffer_right
 347                               , RandIt const first
 348                               , typename iterator_traits<RandIt>::size_type const l_intbuf
 349                               , typename iterator_traits<RandIt>::size_type const n_keys
 350                               , typename iterator_traits<RandIt>::size_type const len
 351                               , XBuf & xbuf
 352                               , Compare comp)
 353 {
 354    //BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf);
 355    xbuf.clear();
 356
 357    typedef typename iterator_traits<RandIt>::size_type  size_type;
 358    size_type const n_key_plus_buf = l_intbuf+n_keys;
 359    if(buffer_right){
 360       //Use stable sort as some buffer elements might not be unique (see non_unique_buf)
 361       stable_sort(first+len-l_intbuf, first+len, comp, xbuf);
 362       stable_merge(first+n_keys, first+len-l_intbuf, first+len, antistable<Compare>(comp), xbuf);
 363       unstable_sort(first, first+n_keys, comp, xbuf);
 364       stable_merge(first, first+n_keys, first+len, comp, xbuf);
 365    }
 366    else{
 367       //Use stable sort as some buffer elements might not be unique (see non_unique_buf)
 368       stable_sort(first, first+n_key_plus_buf, comp, xbuf);
 369       if(xbuf.capacity() >= n_key_plus_buf){
 370          buffered_merge(first, first+n_key_plus_buf, first+len, comp, xbuf);
 371       }
 372       else if(xbuf.capacity() >= min_value<size_type>(l_intbuf, n_keys)){
 373          stable_merge(first+n_keys, first+n_key_plus_buf, first+len, comp, xbuf);
 374          stable_merge(first, first+n_keys, first+len, comp, xbuf);
 375       }
 376       else{
 377          stable_merge(first, first+n_key_plus_buf, first+len, comp, xbuf);
 378       }
 379    }
 380    BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   After final_merge   : ", len);
 381 }
 382
 383 template<class RandIt, class Compare, class Unsigned, class XBuf>
 384 bool adaptive_sort_build_params
 385    (RandIt first, Unsigned const len, Compare comp
 386    , Unsigned &n_keys, Unsigned &l_intbuf, Unsigned &l_base, Unsigned &l_build_buf
 387    , XBuf & xbuf
 388    )
 389 {
 390    typedef Unsigned size_type;
 391
 392    //Calculate ideal parameters and try to collect needed unique keys
 393    l_base = 0u;
 394
 395    //Try to find a value near sqrt(len) that is 2^N*l_base where
 396    //l_base <= AdaptiveSortInsertionSortThreshold. This property is important
 397    //as build_blocks merges to the left iteratively duplicating the
 398    //merged size and all the buffer must be used just before the final
 399    //merge to right step. This guarantees "build_blocks" produces
 400    //segments of size l_build_buf*2, maximizing the classic merge phase.
 401    l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base));
 402
 403    //The internal buffer can be expanded if there is enough external memory
 404    while(xbuf.capacity() >= l_intbuf*2){
 405       l_intbuf *= 2;
 406    }
 407
 408    //This is the minimum number of keys to implement the ideal algorithm
 409    //
 410    //l_intbuf is used as buffer plus the key count
 411    size_type n_min_ideal_keys = l_intbuf-1;
 412    while(n_min_ideal_keys >= (len-l_intbuf-n_min_ideal_keys)/l_intbuf){
 413       --n_min_ideal_keys;
 414    }
 415    n_min_ideal_keys += 1;
 416    BOOST_ASSERT(n_min_ideal_keys <= l_intbuf);
 417
 418    if(xbuf.template supports_aligned_trailing<size_type>(l_intbuf, (len-l_intbuf-1)/l_intbuf+1)){
 419       n_keys = 0u;
 420       l_build_buf = l_intbuf;
 421    }
 422    else{
 423       //Try to achieve a l_build_buf of length l_intbuf*2, so that we can merge with that
 424       //l_intbuf*2 buffer in "build_blocks" and use half of them as buffer and the other half
 425       //as keys in combine_all_blocks. In that case n_keys >= n_min_ideal_keys but by a small margin.
 426       //
 427       //If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed,
 428       //(to be used for keys in combine_all_blocks) as the whole l_build_buf
 429       //will be backuped in the buffer during build_blocks.
 430       bool const non_unique_buf = xbuf.capacity() >= l_intbuf;
 431       size_type const to_collect = non_unique_buf ? n_min_ideal_keys : l_intbuf*2;
 432       size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf);
 433
 434       //If available memory is 2*sqrt(l), then for "build_params"
 435       //the situation is the same as if 2*l_intbuf were collected.
 436       if(non_unique_buf && collected == n_min_ideal_keys){
 437          l_build_buf = l_intbuf;
 438          n_keys = n_min_ideal_keys;
 439       }
 440       else if(collected == 2*l_intbuf){
 441          //l_intbuf*2 elements found. Use all of them in the build phase
 442          l_build_buf = l_intbuf*2;
 443          n_keys = l_intbuf;
 444       }
 445       else if(collected == (n_min_ideal_keys+l_intbuf)){
 446          l_build_buf = l_intbuf;
 447          n_keys = n_min_ideal_keys;
 448       }
 449       //If collected keys are not enough, try to fix n_keys and l_intbuf. If no fix
 450       //is possible (due to very low unique keys), then go to a slow sort based on rotations.
 451       else{
 452          BOOST_ASSERT(collected < (n_min_ideal_keys+l_intbuf));
 453          if(collected < 4){  //No combination possible with less that 4 keys
 454             return false;
 455          }
 456          n_keys = l_intbuf;
 457          while(n_keys&(n_keys-1)){
 458             n_keys &= n_keys-1;  // make it power or 2
 459          }
 460          while(n_keys > collected){
 461             n_keys/=2;
 462          }
 463          //AdaptiveSortInsertionSortThreshold is always power of two so the minimum is power of two
 464          l_base = min_value<Unsigned>(n_keys, AdaptiveSortInsertionSortThreshold);
 465          l_intbuf = 0;
 466          l_build_buf = n_keys;
 467       }
 468       BOOST_ASSERT((n_keys+l_intbuf) >= l_build_buf);
 469    }
 470
 471    return true;
 472 }
 473
 474 // Main explanation of the sort algorithm.
 475 //
 476 // csqrtlen = ceil(sqrt(len));
 477 //
 478 // * First, 2*csqrtlen unique elements elements are extracted from elements to be
 479 //   sorted and placed in the beginning of the range.
 480 //
 481 // * Step "build_blocks": In this nearly-classic merge step, 2*csqrtlen unique elements
 482 //   will be used as auxiliary memory, so trailing len-2*csqrtlen elements are
 483 //   are grouped in blocks of sorted 4*csqrtlen elements. At the end of the step
 484 //   2*csqrtlen unique elements are again the leading elements of the whole range.
 485 //
 486 // * Step "combine_blocks": pairs of previously formed blocks are merged with a different
 487 //   ("smart") algorithm to form blocks of 8*csqrtlen elements. This step is slower than the
 488 //   "build_blocks" step and repeated iteratively (forming blocks of 16*csqrtlen, 32*csqrtlen
 489 //   elements, etc) of until all trailing (len-2*csqrtlen) elements are merged.
 490 //
 491 //   In "combine_blocks" len/csqrtlen elements used are as "keys" (markers) to
 492 //   know if elements belong to the first or second block to be merged and another
 493 //   leading csqrtlen elements are used as buffer. Explanation of the "combine_blocks" step:
 494 //
 495 //   Iteratively until all trailing (len-2*csqrtlen) elements are merged:
 496 //      Iteratively for each pair of previously merged block:
 497 //         * Blocks are divided groups of csqrtlen elements and
 498 //           2*merged_block/csqrtlen keys are sorted to be used as markers
 499 //         * Groups are selection-sorted by first or last element (depending whether they are going
 500 //           to be merged to left or right) and keys are reordered accordingly as an imitation-buffer.
 501 //         * Elements of each block pair are merged using the csqrtlen buffer taking into account
 502 //           if they belong to the first half or second half (marked by the key).
 503 //
 504 // * In the final merge step leading elements (2*csqrtlen) are sorted and merged with
 505 //   rotations with the rest of sorted elements in the "combine_blocks" step.
 506 //
 507 // Corner cases:
 508 //
 509 // * If no 2*csqrtlen elements can be extracted:
 510 //
 511 //    * If csqrtlen+len/csqrtlen are extracted, then only csqrtlen elements are used
 512 //      as buffer in the "build_blocks" step forming blocks of 2*csqrtlen elements. This
 513 //      means that an additional "combine_blocks" step will be needed to merge all elements.
 514 //
 515 //    * If no csqrtlen+len/csqrtlen elements can be extracted, but still more than a minimum,
 516 //      then reduces the number of elements used as buffer and keys in the "build_blocks"
 517 //      and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
 518 //      then uses a rotation based smart merge.
 519 //
 520 //    * If the minimum number of keys can't be extracted, a rotation-based sorting is performed.
 521 //
 522 // * If auxiliary memory is more or equal than ceil(len/2), half-copying mergesort is used.
 523 //
 524 // * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
 525 //   then only csqrtlen elements need to be extracted and "combine_blocks" will use integral
 526 //   keys to combine blocks.
 527 //
 528 // * If auxiliary memory is available, the "build_blocks" will be extended to build bigger blocks
 529 //   using classic merge and "combine_blocks" will use bigger blocks when merging.
 530 template<class RandIt, class Compare, class XBuf>
 531 void adaptive_sort_impl
 532    ( RandIt first
 533    , typename iterator_traits<RandIt>::size_type const len
 534    , Compare comp
 535    , XBuf & xbuf
 536    )
 537 {
 538    typedef typename iterator_traits<RandIt>::size_type  size_type;
 539
 540    //Small sorts go directly to insertion sort
 541    if(len <= size_type(AdaptiveSortInsertionSortThreshold)){
 542       insertion_sort(first, first + len, comp);
 543    }
 544    else if((len-len/2) <= xbuf.capacity()){
 545       merge_sort(first, first+len, comp, xbuf.data());
 546    }
 547    else{
 548       //Make sure it is at least four
 549       BOOST_STATIC_ASSERT(AdaptiveSortInsertionSortThreshold >= 4);
 550
 551       size_type l_base = 0;
 552       size_type l_intbuf = 0;
 553       size_type n_keys = 0;
 554       size_type l_build_buf = 0;
 555
 556       //Calculate and extract needed unique elements. If a minimum is not achieved
 557       //fallback to a slow stable sort
 558       if(!adaptive_sort_build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){
 559          stable_sort(first, first+len, comp, xbuf);
 560       }
 561       else{
 562          BOOST_ASSERT(l_build_buf);
 563          //Otherwise, continue the adaptive_sort
 564          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n   After collect_unique: ", len);
 565          size_type const n_key_plus_buf = l_intbuf+n_keys;
 566          //l_build_buf is always power of two if l_intbuf is zero
 567          BOOST_ASSERT(l_intbuf || (0 == (l_build_buf & (l_build_buf-1))));
 568
 569          //Classic merge sort until internal buffer and xbuf are exhausted
 570          size_type const l_merged = adaptive_sort_build_blocks
 571             (first+n_key_plus_buf-l_build_buf, len-n_key_plus_buf+l_build_buf, l_base, l_build_buf, xbuf, comp);
 572          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   After build_blocks:   ", len);
 573
 574          //Non-trivial merge
 575          bool const buffer_right = adaptive_sort_combine_all_blocks
 576             (first, n_keys, first+n_keys, len-n_keys, l_merged, l_intbuf, xbuf, comp);
 577
 578          //Sort keys and buffer and merge the whole sequence
 579          adaptive_sort_final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp);
 580       }
 581    }
 582 }
 583
 584 }  //namespace detail_adaptive {
 585
 586 ///@endcond
 587
 588 //! <b>Effects</b>: Sorts the elements in the range [first, last) in ascending order according
 589 //!   to comparison functor "comp". The sort is stable (order of equal elements
 590 //!   is guaranteed to be preserved). Performance is improved if additional raw storage is
 591 //!   provided.
 592 //!
 593 //! <b>Requires</b>:
 594 //!   - RandIt must meet the requirements of ValueSwappable and RandomAccessIterator.
 595 //!   - The type of dereferenced RandIt must meet the requirements of MoveAssignable and MoveConstructible.
 596 //!
 597 //! <b>Parameters</b>:
 598 //!   - first, last: the range of elements to sort
 599 //!   - comp: comparison function object which returns true if the first argument is is ordered before the second.
 600 //!   - uninitialized, uninitialized_len: raw storage starting on "uninitialized", able to hold "uninitialized_len"
 601 //!      elements of type iterator_traits<RandIt>::value_type. Maximum performance is achieved when uninitialized_len
 602 //!      is ceil(std::distance(first, last)/2).
 603 //!
 604 //! <b>Throws</b>: If comp throws or the move constructor, move assignment or swap of the type
 605 //!   of dereferenced RandIt throws.
 606 //!
 607 //! <b>Complexity</b>: Always K x O(Nxlog(N)) comparisons and move assignments/constructors/swaps.
 608 //!   Comparisons are close to minimum even with no additional memory. Constant factor for data movement is minimized
 609 //!   when uninitialized_len is ceil(std::distance(first, last)/2). Pretty good enough performance is achieved when
 610 //!   ceil(sqrt(std::distance(first, last)))*2.
 611 //!
 612 //! <b>Caution</b>: Experimental implementation, not production-ready.
 613 template<class RandIt, class RandRawIt, class Compare>
 614 void adaptive_sort( RandIt first, RandIt last, Compare comp
 615                , RandRawIt uninitialized
 616                , typename iterator_traits<RandIt>::size_type uninitialized_len)
 617 {
 618    typedef typename iterator_traits<RandIt>::size_type  size_type;
 619    typedef typename iterator_traits<RandIt>::value_type value_type;
 620
 621    ::boost::movelib::adaptive_xbuf<value_type, RandRawIt, size_type> xbuf(uninitialized, uninitialized_len);
 622    ::boost::movelib::detail_adaptive::adaptive_sort_impl(first, size_type(last - first), comp, xbuf);
 623 }
 624
 625 template<class RandIt, class Compare>
 626 void adaptive_sort( RandIt first, RandIt last, Compare comp)
 627 {
 628    typedef typename iterator_traits<RandIt>::value_type value_type;
 629    adaptive_sort(first, last, comp, (value_type*)0, 0u);
 630 }
 631
 632 }  //namespace movelib {
 633 }  //namespace boost {
 634
 635 #include <boost/move/detail/config_end.hpp>
 636
 637 #endif   //#define BOOST_MOVE_ADAPTIVE_SORT_HPP