[ceph.git] / ceph / src / boost / boost / sort / block_indirect_sort / blk_detail / parallel_sort.hpp

//----------------------------------------------------------------------------
/// @file parallel_sort.hpp
/// @brief Contains the parallel_sort class, which is part of the
///        block_indirect_sort algorithm
///
/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
///         Distributed under the Boost Software License, Version 1.0.\n
///         ( See accompanying file LICENSE_1_0.txt or copy at
///           http://www.boost.org/LICENSE_1_0.txt  )
/// @version 0.1
///
/// @remarks
//-----------------------------------------------------------------------------
#ifndef __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_SORT_HPP
#define __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_SORT_HPP

#include <boost/sort/block_indirect_sort/blk_detail/backbone.hpp>
#include <boost/sort/pdqsort/pdqsort.hpp>
#include <boost/sort/common/pivot.hpp>

namespace boost
{
namespace sort
{
namespace blk_detail
{

//----------------------------------------------------------------------------
//                          USING SENTENCES
//----------------------------------------------------------------------------
namespace bsc = boost::sort::common;
namespace bscu = bsc::util;
using bscu::nbits64;
using bsc::pivot9;
using boost::sort::pdqsort;
//
///---------------------------------------------------------------------------
/// @struct parallel_sort
/// @brief This class do a parallel sort, using the quicksort filtering,
///        splitting the data until the number of elements is smaller than a
///        predefined value (max_per_thread)
//----------------------------------------------------------------------------
template<uint32_t Block_size, class Iter_t, class Compare>
struct parallel_sort
{
    //-------------------------------------------------------------------------
    //                  D E F I N I T I O N S
    //-------------------------------------------------------------------------
    typedef typename std::iterator_traits<Iter_t>::value_type value_t;
    typedef std::atomic<uint32_t> atomic_t;
    typedef std::function<void(void)> function_t;
    typedef backbone<Block_size, Iter_t, Compare> backbone_t;

    //------------------------------------------------------------------------
    //                V A R I A B L E S
    //------------------------------------------------------------------------
    // reference to a object with all the data to sort
    backbone_t &bk;

    // maximun number of element to sort woth 1 thread
    size_t max_per_thread;

    // atomic counter for to detect the end of the works created inside
    // the object
    atomic_t counter;

    //------------------------------------------------------------------------
    //                F U N C T I O N S
    //------------------------------------------------------------------------
    parallel_sort(backbone_t &bkbn, Iter_t first, Iter_t last);

    void divide_sort(Iter_t first, Iter_t last, uint32_t level);
    //
    //------------------------------------------------------------------------
    //  function : function_divide_sort
    /// @brief create a function_t with a call to divide_sort, and inser in
    ///        the stack of the backbone
    //
    /// @param first : iterator to the first element of the range to divide
    /// @param last : iterator to the next element after the last element of
    ///               the range to divide
    /// @param level : level of depth in the division.When zero call to
    ///                pdqsort
    /// @param counter : atomic variable which is decremented when finish
    ///                  the function. This variable is used for to know
    ///                  when are finished all the function_t created
    ///                  inside an object
    /// @param error : global indicator of error.
    //------------------------------------------------------------------------
    void function_divide_sort(Iter_t first, Iter_t last, uint32_t level,
                              atomic_t &counter, bool &error)
    {
        bscu::atomic_add(counter, 1);
        function_t f1 = [this, first, last, level, &counter, &error]( )
        {
            if (not error)
            {
                try
                {
                    this->divide_sort (first, last, level);
                }
                catch (std::bad_alloc &)
                {
                    error = true;
                };
            };
            bscu::atomic_sub (counter, 1);
        };
        bk.works.emplace_back(f1);
    };

//--------------------------------------------------------------------------
};// end struct parallel_sort
//--------------------------------------------------------------------------
//
//############################################################################
//                                                                          ##
//                                                                          ##
//            N O N     I N L I N E      F U N C T I O N S                  ##
//                                                                          ##
//                                                                          ##
//############################################################################
//
//------------------------------------------------------------------------
//  function : parallel_sort
/// @brief constructor of the class
/// @param [in] bkbn : backbone struct with all the information to sort
/// @param [in] first : iterator to the first element to sort
/// @param [in] last : iterator to the next element after the last
//------------------------------------------------------------------------
template<uint32_t Block_size, class Iter_t, class Compare>
parallel_sort<Block_size, Iter_t, Compare>
::parallel_sort(backbone_t &bkbn, Iter_t first, Iter_t last)
 : bk(bkbn), counter(0)
{
    assert((last - first) >= 0);
    size_t nelem = size_t(last - first);

    //------------------- check if sort --------------------------------------
    bool sorted = true;
    for (Iter_t it1 = first, it2 = first + 1;
         it2 != last and (sorted = not bk.cmp(*it2, *it1)); it1 = it2++);
    if (sorted) return;

    //------------------- check if reverse sort ---------------------------
    sorted = true;
    for (Iter_t it1 = first, it2 = first + 1;
         it2 != last and (sorted = not bk.cmp(*it1, *it2)); it1 = it2++);

    if (sorted)
    {
        size_t nelem2 = nelem >> 1;
        Iter_t it1 = first, it2 = last - 1;
        for (size_t i = 0; i < nelem2; ++i)
            std::swap(*(it1++), *(it2--));
        return;
    };

    //-------------------max_per_thread ---------------------------
    uint32_t nbits_size = (nbits64(sizeof(value_t))) >> 1;
    if (nbits_size > 5) nbits_size = 5;
    max_per_thread = 1 << (18 - nbits_size);

    uint32_t level = ((nbits64(nelem / max_per_thread)) * 3) / 2;

    //---------------- check if only single thread -----------------------
    if (nelem < (max_per_thread))
    {
        pdqsort(first, last, bk.cmp);
        return;
    };
    if (not bk.error) divide_sort(first, last, level);

    // wait until all the parts are finished
    bk.exec(counter);
};

//------------------------------------------------------------------------
//  function : divide_sort
/// @brief this function divide the data in two part, for to be sorted in
///        a parallel mode
/// @param first : iterator to the first element to sort
/// @param last : iterator to the next element after the last
/// @param level : level of depth before call to pdqsort
//------------------------------------------------------------------------
template<uint32_t Block_size, class Iter_t, class Compare>
void parallel_sort<Block_size, Iter_t, Compare>
::divide_sort(Iter_t first, Iter_t last, uint32_t level)
{
    //------------------- check if sort -----------------------------------
    bool sorted = true;
    for (Iter_t it1 = first, it2 = first + 1;
         it2 != last and (sorted = not bk.cmp(*it2, *it1)); it1 = it2++);
    if (sorted) return;

    //---------------- check if finish the subdivision -------------------
    size_t nelem = last - first;
    if (level == 0 or nelem < (max_per_thread))
    {
        return pdqsort(first, last, bk.cmp);
    };

    //-------------------- pivoting  ----------------------------------
    pivot9(first, last, bk.cmp);
    const value_t &val = const_cast<value_t &>(*first);
    Iter_t c_first = first + 1, c_last = last - 1;

    while (bk.cmp(*c_first, val))   ++c_first;
    while (bk.cmp(val, *c_last))    --c_last;

    while (c_first < c_last)
    {
        std::swap(*(c_first++), *(c_last--));
        while (bk.cmp(*c_first, val))
            ++c_first;
        while (bk.cmp(val, *c_last))
            --c_last;
    };

    std::swap(*first, *c_last);

    // insert  the work of the second half in the stack of works
    function_divide_sort(c_first, last, level - 1, counter, bk.error);
    if (bk.error) return;

    // The first half is done by the same thread
    function_divide_sort(first, c_last, level - 1, counter, bk.error);
};
//
//****************************************************************************
};//    End namespace blk_detail
};//    End namespace sort
};//    End namespace boost
//****************************************************************************
//
#endif
Commit	Line	Data
11fdf7f2 TL	1	//----------------------------------------------------------------------------
	2	/// @file parallel_sort.hpp
	3	/// @brief Contains the parallel_sort class, which is part of the
	4	/// block_indirect_sort algorithm
	5	///
	6	/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
	7	/// Distributed under the Boost Software License, Version 1.0.\n
	8	/// ( See accompanying file LICENSE_1_0.txt or copy at
	9	/// http://www.boost.org/LICENSE_1_0.txt )
	10	/// @version 0.1
	11	///
	12	/// @remarks
	13	//-----------------------------------------------------------------------------
	14	#ifndef __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_SORT_HPP
	15	#define __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_SORT_HPP
	16
	17	#include <boost/sort/block_indirect_sort/blk_detail/backbone.hpp>
	18	#include <boost/sort/pdqsort/pdqsort.hpp>
	19	#include <boost/sort/common/pivot.hpp>
	20
	21	namespace boost
	22	{
	23	namespace sort
	24	{
	25	namespace blk_detail
	26	{
	27
	28	//----------------------------------------------------------------------------
	29	// USING SENTENCES
	30	//----------------------------------------------------------------------------
	31	namespace bsc = boost::sort::common;
	32	namespace bscu = bsc::util;
	33	using bscu::nbits64;
	34	using bsc::pivot9;
	35	using boost::sort::pdqsort;
	36	//
	37	///---------------------------------------------------------------------------
	38	/// @struct parallel_sort
	39	/// @brief This class do a parallel sort, using the quicksort filtering,
	40	/// splitting the data until the number of elements is smaller than a
	41	/// predefined value (max_per_thread)
	42	//----------------------------------------------------------------------------
	43	template<uint32_t Block_size, class Iter_t, class Compare>
	44	struct parallel_sort
	45	{
	46	//-------------------------------------------------------------------------
	47	// D E F I N I T I O N S
	48	//-------------------------------------------------------------------------
	49	typedef typename std::iterator_traits<Iter_t>::value_type value_t;
	50	typedef std::atomic<uint32_t> atomic_t;
	51	typedef std::function<void(void)> function_t;
	52	typedef backbone<Block_size, Iter_t, Compare> backbone_t;
	53
	54	//------------------------------------------------------------------------
	55	// V A R I A B L E S
	56	//------------------------------------------------------------------------
	57	// reference to a object with all the data to sort
	58	backbone_t &bk;
	59
	60	// maximun number of element to sort woth 1 thread
	61	size_t max_per_thread;
	62
	63	// atomic counter for to detect the end of the works created inside
	64	// the object
65	atomic_t counter;
66
67	//------------------------------------------------------------------------
68	// F U N C T I O N S
69	//------------------------------------------------------------------------
70	parallel_sort(backbone_t &bkbn, Iter_t first, Iter_t last);
71
72	void divide_sort(Iter_t first, Iter_t last, uint32_t level);
73	//
74	//------------------------------------------------------------------------
75	// function : function_divide_sort
76	/// @brief create a function_t with a call to divide_sort, and inser in
77	/// the stack of the backbone
78	//
79	/// @param first : iterator to the first element of the range to divide
80	/// @param last : iterator to the next element after the last element of
81	/// the range to divide
82	/// @param level : level of depth in the division.When zero call to
83	/// pdqsort
84	/// @param counter : atomic variable which is decremented when finish
85	/// the function. This variable is used for to know
86	/// when are finished all the function_t created
87	/// inside an object
88	/// @param error : global indicator of error.
89	//------------------------------------------------------------------------
90	void function_divide_sort(Iter_t first, Iter_t last, uint32_t level,
91	atomic_t &counter, bool &error)
92	{
93	bscu::atomic_add(counter, 1);
94	function_t f1 = [this, first, last, level, &counter, &error]( )
95	{
96	if (not error)
97	{
98	try
99	{
100	this->divide_sort (first, last, level);
101	}
102	catch (std::bad_alloc &)
103	{
104	error = true;
105	};
106	};
107	bscu::atomic_sub (counter, 1);
108	};
109	bk.works.emplace_back(f1);
110	};
111
112	//--------------------------------------------------------------------------
113	};// end struct parallel_sort
114	//--------------------------------------------------------------------------
115	//
116	//############################################################################
117	// ##
118	// ##
119	// N O N I N L I N E F U N C T I O N S ##
120	// ##
121	// ##
122	//############################################################################
123	//
124	//------------------------------------------------------------------------
125	// function : parallel_sort
126	/// @brief constructor of the class
127	/// @param [in] bkbn : backbone struct with all the information to sort
128	/// @param [in] first : iterator to the first element to sort
129	/// @param [in] last : iterator to the next element after the last
130	//------------------------------------------------------------------------
131	template<uint32_t Block_size, class Iter_t, class Compare>
132	parallel_sort<Block_size, Iter_t, Compare>
133	::parallel_sort(backbone_t &bkbn, Iter_t first, Iter_t last)
134	: bk(bkbn), counter(0)
135	{
136	assert((last - first) >= 0);
137	size_t nelem = size_t(last - first);
138
139	//------------------- check if sort --------------------------------------
140	bool sorted = true;
141	for (Iter_t it1 = first, it2 = first + 1;
142	it2 != last and (sorted = not bk.cmp(it2, it1)); it1 = it2++);
143	if (sorted) return;
144
145	//------------------- check if reverse sort ---------------------------
146	sorted = true;
147	for (Iter_t it1 = first, it2 = first + 1;
148	it2 != last and (sorted = not bk.cmp(it1, it2)); it1 = it2++);
149
150	if (sorted)
151	{
152	size_t nelem2 = nelem >> 1;
153	Iter_t it1 = first, it2 = last - 1;
154	for (size_t i = 0; i < nelem2; ++i)
155	std::swap((it1++), (it2--));
156	return;
157	};
158
159	//-------------------max_per_thread ---------------------------
160	uint32_t nbits_size = (nbits64(sizeof(value_t))) >> 1;
161	if (nbits_size > 5) nbits_size = 5;
162	max_per_thread = 1 << (18 - nbits_size);
163
164	uint32_t level = ((nbits64(nelem / max_per_thread)) * 3) / 2;
165
166	//---------------- check if only single thread -----------------------
167	if (nelem < (max_per_thread))
168	{
169	pdqsort(first, last, bk.cmp);
170	return;
171	};
172	if (not bk.error) divide_sort(first, last, level);
173
174	// wait until all the parts are finished
175	bk.exec(counter);
176	};
177
178	//------------------------------------------------------------------------
179	// function : divide_sort
180	/// @brief this function divide the data in two part, for to be sorted in
181	/// a parallel mode
182	/// @param first : iterator to the first element to sort
183	/// @param last : iterator to the next element after the last
184	/// @param level : level of depth before call to pdqsort
185	//------------------------------------------------------------------------
186	template<uint32_t Block_size, class Iter_t, class Compare>
187	void parallel_sort<Block_size, Iter_t, Compare>
188	::divide_sort(Iter_t first, Iter_t last, uint32_t level)
189	{
190	//------------------- check if sort -----------------------------------
191	bool sorted = true;
192	for (Iter_t it1 = first, it2 = first + 1;
193	it2 != last and (sorted = not bk.cmp(it2, it1)); it1 = it2++);
194	if (sorted) return;
195
196	//---------------- check if finish the subdivision -------------------
197	size_t nelem = last - first;
198	if (level == 0 or nelem < (max_per_thread))
199	{
200	return pdqsort(first, last, bk.cmp);
201	};
202
203	//-------------------- pivoting ----------------------------------
204	pivot9(first, last, bk.cmp);
205	const value_t &val = const_cast<value_t &>(*first);
206	Iter_t c_first = first + 1, c_last = last - 1;
207
208	while (bk.cmp(*c_first, val)) ++c_first;
209	while (bk.cmp(val, *c_last)) --c_last;
210
92f5a8d4	211	while (c_first < c_last)
11fdf7f2 TL	212	{
	213	std::swap((c_first++), (c_last--));
	214	while (bk.cmp(*c_first, val))
	215	++c_first;
	216	while (bk.cmp(val, *c_last))
	217	--c_last;
	218	};
	219
	220	std::swap(first, c_last);
	221
	222	// insert the work of the second half in the stack of works
	223	function_divide_sort(c_first, last, level - 1, counter, bk.error);
	224	if (bk.error) return;
	225
	226	// The first half is done by the same thread
	227	function_divide_sort(first, c_last, level - 1, counter, bk.error);
	228	};
	229	//
	230	//****************************************************************************
	231	};// End namespace blk_detail
	232	};// End namespace sort
	233	};// End namespace boost
	234	//****************************************************************************
	235	//
	236	#endif