[ceph.git] / ceph / src / boost / libs / sort / include / boost / sort / spreadsort / detail / spreadsort_common.hpp

// Contains get_min_count, the core optimization of the spreadsort algorithm.
// Also has other helper functions commonly useful across variants.

//          Copyright Steven J. Ross 2001 - 2014.
// Distributed under the Boost Software License, Version 1.0.
//    (See accompanying file LICENSE_1_0.txt or copy at
//          http://www.boost.org/LICENSE_1_0.txt)

// See http://www.boost.org/libs/sort for library home page.

/*
Some improvements suggested by:
Phil Endecott and Frank Gennari
*/

#ifndef BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_COMMON_HPP
#define BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_COMMON_HPP
#include <algorithm>
#include <vector>
#include <cstring>
#include <limits>
#include <functional>
#include <boost/static_assert.hpp>
#include <boost/serialization/static_warning.hpp>
#include <boost/sort/spreadsort/detail/constants.hpp>
#include <boost/cstdint.hpp>

namespace boost {
namespace sort {
namespace spreadsort {
 namespace detail {
    //This only works on unsigned data types
    template <typename T>
    inline unsigned
    rough_log_2_size(const T& input)
    {
      unsigned result = 0;
      //The && is necessary on some compilers to avoid infinite loops
      //it doesn't significantly impair performance
      while ((input >> result) && (result < (8*sizeof(T)))) ++result;
      return result;
    }

    //Gets the minimum size to call spreadsort on to control worst-case runtime.
    //This is called for a set of bins, instead of bin-by-bin, to minimize
    //runtime overhead.
    //This could be replaced by a lookup table of sizeof(Div_type)*8 but this
    //function is more general.
    template<unsigned log_mean_bin_size,
      unsigned log_min_split_count, unsigned log_finishing_count>
    inline size_t
    get_min_count(unsigned log_range)
    {
      const size_t typed_one = 1;
      const unsigned min_size = log_mean_bin_size + log_min_split_count;
      //Assuring that constants have valid settings
      BOOST_STATIC_ASSERT(log_min_split_count <= max_splits &&
                          log_min_split_count > 0);
      BOOST_STATIC_ASSERT(max_splits > 1 &&
                          max_splits < (8 * sizeof(unsigned)));
      BOOST_STATIC_ASSERT(max_finishing_splits >= max_splits &&
                          max_finishing_splits < (8 * sizeof(unsigned)));
      BOOST_STATIC_ASSERT(log_mean_bin_size >= 0);
      BOOST_STATIC_ASSERT(log_finishing_count >= 0);
      //if we can complete in one iteration, do so
      //This first check allows the compiler to optimize never-executed code out
      if (log_finishing_count < min_size) {
        if (log_range <= min_size && log_range <= max_splits) {
          //Return no smaller than a certain minimum limit
          if (log_range <= log_finishing_count)
            return typed_one << log_finishing_count;
          return typed_one << log_range;
        }
      }
      const unsigned base_iterations = max_splits - log_min_split_count;
      //sum of n to n + x = ((x + 1) * (n + (n + x)))/2 + log_mean_bin_size
      const unsigned base_range =
          ((base_iterations + 1) * (max_splits + log_min_split_count))/2
          + log_mean_bin_size;
      //Calculating the required number of iterations, and returning
      //1 << (iteration_count + min_size)
      if (log_range < base_range) {
        unsigned result = log_min_split_count;
        for (unsigned offset = min_size; offset < log_range;
          offset += ++result);
        //Preventing overflow; this situation shouldn't occur
        if ((result + log_mean_bin_size) >= (8 * sizeof(size_t)))
          return typed_one << ((8 * sizeof(size_t)) - 1);
        return typed_one << (result + log_mean_bin_size);
      }
      //A quick division can calculate the worst-case runtime for larger ranges
      unsigned remainder = log_range - base_range;
      //the max_splits - 1 is used to calculate the ceiling of the division
      unsigned bit_length = ((((max_splits - 1) + remainder)/max_splits)
        + base_iterations + min_size);
      //Preventing overflow; this situation shouldn't occur
      if (bit_length >= (8 * sizeof(size_t)))
        return typed_one << ((8 * sizeof(size_t)) - 1);
      //n(log_range)/max_splits + C, optimizing worst-case performance
      return typed_one << bit_length;
    }

    // Resizes the bin cache and bin sizes, and initializes each bin size to 0.
    // This generates the memory overhead to use in radix sorting.
    template <class RandomAccessIter>
    inline RandomAccessIter *
    size_bins(size_t *bin_sizes, std::vector<RandomAccessIter>
  &bin_cache, unsigned cache_offset, unsigned &cache_end, unsigned bin_count)
    {
      // Clear the bin sizes
      for (size_t u = 0; u < bin_count; u++)
        bin_sizes[u] = 0;
      //Make sure there is space for the bins
      cache_end = cache_offset + bin_count;
      if (cache_end > bin_cache.size())
        bin_cache.resize(cache_end);
      return &(bin_cache[cache_offset]);
    }
  }
}
}
}

#endif
Commit	Line	Data
7c673cae FG	1	// Contains get_min_count, the core optimization of the spreadsort algorithm.
	2	// Also has other helper functions commonly useful across variants.
	3
	4	// Copyright Steven J. Ross 2001 - 2014.
	5	// Distributed under the Boost Software License, Version 1.0.
	6	// (See accompanying file LICENSE_1_0.txt or copy at
	7	// http://www.boost.org/LICENSE_1_0.txt)
	8
	9	// See http://www.boost.org/libs/sort for library home page.
	10
	11	/*
	12	Some improvements suggested by:
	13	Phil Endecott and Frank Gennari
	14	*/
	15
	16	#ifndef BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_COMMON_HPP
	17	#define BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_COMMON_HPP
	18	#include <algorithm>
	19	#include <vector>
	20	#include <cstring>
	21	#include <limits>
	22	#include <functional>
	23	#include <boost/static_assert.hpp>
	24	#include <boost/serialization/static_warning.hpp>
	25	#include <boost/sort/spreadsort/detail/constants.hpp>
	26	#include <boost/cstdint.hpp>
	27
	28	namespace boost {
	29	namespace sort {
	30	namespace spreadsort {
	31	namespace detail {
	32	//This only works on unsigned data types
	33	template <typename T>
	34	inline unsigned
	35	rough_log_2_size(const T& input)
	36	{
	37	unsigned result = 0;
	38	//The && is necessary on some compilers to avoid infinite loops
	39	//it doesn't significantly impair performance
	40	while ((input >> result) && (result < (8*sizeof(T)))) ++result;
	41	return result;
	42	}
	43
	44	//Gets the minimum size to call spreadsort on to control worst-case runtime.
	45	//This is called for a set of bins, instead of bin-by-bin, to minimize
	46	//runtime overhead.
	47	//This could be replaced by a lookup table of sizeof(Div_type)*8 but this
	48	//function is more general.
	49	template<unsigned log_mean_bin_size,
	50	unsigned log_min_split_count, unsigned log_finishing_count>
	51	inline size_t
	52	get_min_count(unsigned log_range)
	53	{
	54	const size_t typed_one = 1;
	55	const unsigned min_size = log_mean_bin_size + log_min_split_count;
	56	//Assuring that constants have valid settings
	57	BOOST_STATIC_ASSERT(log_min_split_count <= max_splits &&
	58	log_min_split_count > 0);
	59	BOOST_STATIC_ASSERT(max_splits > 1 &&
	60	max_splits < (8 * sizeof(unsigned)));
	61	BOOST_STATIC_ASSERT(max_finishing_splits >= max_splits &&
	62	max_finishing_splits < (8 * sizeof(unsigned)));
	63	BOOST_STATIC_ASSERT(log_mean_bin_size >= 0);
	64	BOOST_STATIC_ASSERT(log_finishing_count >= 0);
65	//if we can complete in one iteration, do so
66	//This first check allows the compiler to optimize never-executed code out
67	if (log_finishing_count < min_size) {
68	if (log_range <= min_size && log_range <= max_splits) {
69	//Return no smaller than a certain minimum limit
70	if (log_range <= log_finishing_count)
71	return typed_one << log_finishing_count;
72	return typed_one << log_range;
73	}
74	}
75	const unsigned base_iterations = max_splits - log_min_split_count;
76	//sum of n to n + x = ((x + 1) * (n + (n + x)))/2 + log_mean_bin_size
77	const unsigned base_range =
78	((base_iterations + 1) * (max_splits + log_min_split_count))/2
79	+ log_mean_bin_size;
80	//Calculating the required number of iterations, and returning
81	//1 << (iteration_count + min_size)
82	if (log_range < base_range) {
83	unsigned result = log_min_split_count;
84	for (unsigned offset = min_size; offset < log_range;
85	offset += ++result);
86	//Preventing overflow; this situation shouldn't occur
87	if ((result + log_mean_bin_size) >= (8 * sizeof(size_t)))
88	return typed_one << ((8 * sizeof(size_t)) - 1);
89	return typed_one << (result + log_mean_bin_size);
90	}
91	//A quick division can calculate the worst-case runtime for larger ranges
92	unsigned remainder = log_range - base_range;
93	//the max_splits - 1 is used to calculate the ceiling of the division
94	unsigned bit_length = ((((max_splits - 1) + remainder)/max_splits)
95	+ base_iterations + min_size);
96	//Preventing overflow; this situation shouldn't occur
97	if (bit_length >= (8 * sizeof(size_t)))
98	return typed_one << ((8 * sizeof(size_t)) - 1);
99	//n(log_range)/max_splits + C, optimizing worst-case performance
100	return typed_one << bit_length;
101	}
102
103	// Resizes the bin cache and bin sizes, and initializes each bin size to 0.
104	// This generates the memory overhead to use in radix sorting.
105	template <class RandomAccessIter>
106	inline RandomAccessIter *
107	size_bins(size_t *bin_sizes, std::vector<RandomAccessIter>
108	&bin_cache, unsigned cache_offset, unsigned &cache_end, unsigned bin_count)
109	{
110	// Clear the bin sizes
111	for (size_t u = 0; u < bin_count; u++)
112	bin_sizes[u] = 0;
113	//Make sure there is space for the bins
114	cache_end = cache_offset + bin_count;
115	if (cache_end > bin_cache.size())
116	bin_cache.resize(cache_end);
117	return &(bin_cache[cache_offset]);
118	}
119	}
120	}
121	}
122	}
123
124	#endif