]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | //---------------------------------------------------------------------------- |
2 | /// @file parallel_sort.hpp | |
3 | /// @brief Contains the parallel_sort class, which is part of the | |
4 | /// block_indirect_sort algorithm | |
5 | /// | |
6 | /// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n | |
7 | /// Distributed under the Boost Software License, Version 1.0.\n | |
8 | /// ( See accompanying file LICENSE_1_0.txt or copy at | |
9 | /// http://www.boost.org/LICENSE_1_0.txt ) | |
10 | /// @version 0.1 | |
11 | /// | |
12 | /// @remarks | |
13 | //----------------------------------------------------------------------------- | |
14 | #ifndef __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_SORT_HPP | |
15 | #define __BOOST_SORT_PARALLEL_DETAIL_PARALLEL_SORT_HPP | |
16 | ||
17 | #include <boost/sort/block_indirect_sort/blk_detail/backbone.hpp> | |
18 | #include <boost/sort/pdqsort/pdqsort.hpp> | |
19 | #include <boost/sort/common/pivot.hpp> | |
20 | ||
21 | namespace boost | |
22 | { | |
23 | namespace sort | |
24 | { | |
25 | namespace blk_detail | |
26 | { | |
27 | ||
28 | //---------------------------------------------------------------------------- | |
29 | // USING SENTENCES | |
30 | //---------------------------------------------------------------------------- | |
31 | namespace bsc = boost::sort::common; | |
32 | namespace bscu = bsc::util; | |
33 | using bscu::nbits64; | |
34 | using bsc::pivot9; | |
35 | using boost::sort::pdqsort; | |
36 | // | |
37 | ///--------------------------------------------------------------------------- | |
38 | /// @struct parallel_sort | |
39 | /// @brief This class do a parallel sort, using the quicksort filtering, | |
40 | /// splitting the data until the number of elements is smaller than a | |
41 | /// predefined value (max_per_thread) | |
42 | //---------------------------------------------------------------------------- | |
43 | template<uint32_t Block_size, class Iter_t, class Compare> | |
44 | struct parallel_sort | |
45 | { | |
46 | //------------------------------------------------------------------------- | |
47 | // D E F I N I T I O N S | |
48 | //------------------------------------------------------------------------- | |
49 | typedef typename std::iterator_traits<Iter_t>::value_type value_t; | |
50 | typedef std::atomic<uint32_t> atomic_t; | |
51 | typedef std::function<void(void)> function_t; | |
52 | typedef backbone<Block_size, Iter_t, Compare> backbone_t; | |
53 | ||
54 | //------------------------------------------------------------------------ | |
55 | // V A R I A B L E S | |
56 | //------------------------------------------------------------------------ | |
57 | // reference to a object with all the data to sort | |
58 | backbone_t &bk; | |
59 | ||
60 | // maximun number of element to sort woth 1 thread | |
61 | size_t max_per_thread; | |
62 | ||
63 | // atomic counter for to detect the end of the works created inside | |
64 | // the object | |
65 | atomic_t counter; | |
66 | ||
67 | //------------------------------------------------------------------------ | |
68 | // F U N C T I O N S | |
69 | //------------------------------------------------------------------------ | |
70 | parallel_sort(backbone_t &bkbn, Iter_t first, Iter_t last); | |
71 | ||
72 | void divide_sort(Iter_t first, Iter_t last, uint32_t level); | |
73 | // | |
74 | //------------------------------------------------------------------------ | |
75 | // function : function_divide_sort | |
76 | /// @brief create a function_t with a call to divide_sort, and inser in | |
77 | /// the stack of the backbone | |
78 | // | |
79 | /// @param first : iterator to the first element of the range to divide | |
80 | /// @param last : iterator to the next element after the last element of | |
81 | /// the range to divide | |
82 | /// @param level : level of depth in the division.When zero call to | |
83 | /// pdqsort | |
84 | /// @param counter : atomic variable which is decremented when finish | |
85 | /// the function. This variable is used for to know | |
86 | /// when are finished all the function_t created | |
87 | /// inside an object | |
88 | /// @param error : global indicator of error. | |
89 | //------------------------------------------------------------------------ | |
90 | void function_divide_sort(Iter_t first, Iter_t last, uint32_t level, | |
91 | atomic_t &counter, bool &error) | |
92 | { | |
93 | bscu::atomic_add(counter, 1); | |
94 | function_t f1 = [this, first, last, level, &counter, &error]( ) | |
95 | { | |
96 | if (not error) | |
97 | { | |
98 | try | |
99 | { | |
100 | this->divide_sort (first, last, level); | |
101 | } | |
102 | catch (std::bad_alloc &) | |
103 | { | |
104 | error = true; | |
105 | }; | |
106 | }; | |
107 | bscu::atomic_sub (counter, 1); | |
108 | }; | |
109 | bk.works.emplace_back(f1); | |
110 | }; | |
111 | ||
112 | //-------------------------------------------------------------------------- | |
113 | };// end struct parallel_sort | |
114 | //-------------------------------------------------------------------------- | |
115 | // | |
116 | //############################################################################ | |
117 | // ## | |
118 | // ## | |
119 | // N O N I N L I N E F U N C T I O N S ## | |
120 | // ## | |
121 | // ## | |
122 | //############################################################################ | |
123 | // | |
124 | //------------------------------------------------------------------------ | |
125 | // function : parallel_sort | |
126 | /// @brief constructor of the class | |
127 | /// @param [in] bkbn : backbone struct with all the information to sort | |
128 | /// @param [in] first : iterator to the first element to sort | |
129 | /// @param [in] last : iterator to the next element after the last | |
130 | //------------------------------------------------------------------------ | |
131 | template<uint32_t Block_size, class Iter_t, class Compare> | |
132 | parallel_sort<Block_size, Iter_t, Compare> | |
133 | ::parallel_sort(backbone_t &bkbn, Iter_t first, Iter_t last) | |
134 | : bk(bkbn), counter(0) | |
135 | { | |
136 | assert((last - first) >= 0); | |
137 | size_t nelem = size_t(last - first); | |
138 | ||
139 | //------------------- check if sort -------------------------------------- | |
140 | bool sorted = true; | |
141 | for (Iter_t it1 = first, it2 = first + 1; | |
142 | it2 != last and (sorted = not bk.cmp(*it2, *it1)); it1 = it2++); | |
143 | if (sorted) return; | |
144 | ||
145 | //------------------- check if reverse sort --------------------------- | |
146 | sorted = true; | |
147 | for (Iter_t it1 = first, it2 = first + 1; | |
148 | it2 != last and (sorted = not bk.cmp(*it1, *it2)); it1 = it2++); | |
149 | ||
150 | if (sorted) | |
151 | { | |
152 | size_t nelem2 = nelem >> 1; | |
153 | Iter_t it1 = first, it2 = last - 1; | |
154 | for (size_t i = 0; i < nelem2; ++i) | |
155 | std::swap(*(it1++), *(it2--)); | |
156 | return; | |
157 | }; | |
158 | ||
159 | //-------------------max_per_thread --------------------------- | |
160 | uint32_t nbits_size = (nbits64(sizeof(value_t))) >> 1; | |
161 | if (nbits_size > 5) nbits_size = 5; | |
162 | max_per_thread = 1 << (18 - nbits_size); | |
163 | ||
164 | uint32_t level = ((nbits64(nelem / max_per_thread)) * 3) / 2; | |
165 | ||
166 | //---------------- check if only single thread ----------------------- | |
167 | if (nelem < (max_per_thread)) | |
168 | { | |
169 | pdqsort(first, last, bk.cmp); | |
170 | return; | |
171 | }; | |
172 | if (not bk.error) divide_sort(first, last, level); | |
173 | ||
174 | // wait until all the parts are finished | |
175 | bk.exec(counter); | |
176 | }; | |
177 | ||
178 | //------------------------------------------------------------------------ | |
179 | // function : divide_sort | |
180 | /// @brief this function divide the data in two part, for to be sorted in | |
181 | /// a parallel mode | |
182 | /// @param first : iterator to the first element to sort | |
183 | /// @param last : iterator to the next element after the last | |
184 | /// @param level : level of depth before call to pdqsort | |
185 | //------------------------------------------------------------------------ | |
186 | template<uint32_t Block_size, class Iter_t, class Compare> | |
187 | void parallel_sort<Block_size, Iter_t, Compare> | |
188 | ::divide_sort(Iter_t first, Iter_t last, uint32_t level) | |
189 | { | |
190 | //------------------- check if sort ----------------------------------- | |
191 | bool sorted = true; | |
192 | for (Iter_t it1 = first, it2 = first + 1; | |
193 | it2 != last and (sorted = not bk.cmp(*it2, *it1)); it1 = it2++); | |
194 | if (sorted) return; | |
195 | ||
196 | //---------------- check if finish the subdivision ------------------- | |
197 | size_t nelem = last - first; | |
198 | if (level == 0 or nelem < (max_per_thread)) | |
199 | { | |
200 | return pdqsort(first, last, bk.cmp); | |
201 | }; | |
202 | ||
203 | //-------------------- pivoting ---------------------------------- | |
204 | pivot9(first, last, bk.cmp); | |
205 | const value_t &val = const_cast<value_t &>(*first); | |
206 | Iter_t c_first = first + 1, c_last = last - 1; | |
207 | ||
208 | while (bk.cmp(*c_first, val)) ++c_first; | |
209 | while (bk.cmp(val, *c_last)) --c_last; | |
210 | ||
92f5a8d4 | 211 | while (c_first < c_last) |
11fdf7f2 TL |
212 | { |
213 | std::swap(*(c_first++), *(c_last--)); | |
214 | while (bk.cmp(*c_first, val)) | |
215 | ++c_first; | |
216 | while (bk.cmp(val, *c_last)) | |
217 | --c_last; | |
218 | }; | |
219 | ||
220 | std::swap(*first, *c_last); | |
221 | ||
222 | // insert the work of the second half in the stack of works | |
223 | function_divide_sort(c_first, last, level - 1, counter, bk.error); | |
224 | if (bk.error) return; | |
225 | ||
226 | // The first half is done by the same thread | |
227 | function_divide_sort(first, c_last, level - 1, counter, bk.error); | |
228 | }; | |
229 | // | |
230 | //**************************************************************************** | |
231 | };// End namespace blk_detail | |
232 | };// End namespace sort | |
233 | };// End namespace boost | |
234 | //**************************************************************************** | |
235 | // | |
236 | #endif |