]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | //---------------------------------------------------------------------------- |
2 | /// @file block_indirect_sort.hpp | |
3 | /// @brief block indirect sort algorithm | |
4 | /// | |
5 | /// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n | |
6 | /// Distributed under the Boost Software License, Version 1.0.\n | |
7 | /// ( See accompanying file LICENSE_1_0.txt or copy at | |
8 | /// http://www.boost.org/LICENSE_1_0.txt ) | |
9 | /// @version 0.1 | |
10 | /// | |
11 | /// @remarks | |
12 | //----------------------------------------------------------------------------- | |
13 | #ifndef __BOOST_SORT_PARALLEL_DETAIL_BLOCK_INDIRECT_SORT_HPP | |
14 | #define __BOOST_SORT_PARALLEL_DETAIL_BLOCK_INDIRECT_SORT_HPP | |
15 | ||
16 | #include <atomic> | |
17 | #include <boost/sort/block_indirect_sort/blk_detail/merge_blocks.hpp> | |
18 | #include <boost/sort/block_indirect_sort/blk_detail/move_blocks.hpp> | |
19 | #include <boost/sort/block_indirect_sort/blk_detail/parallel_sort.hpp> | |
20 | #include <boost/sort/pdqsort/pdqsort.hpp> | |
21 | #include <boost/sort/common/util/traits.hpp> | |
22 | #include <boost/sort/common/util/algorithm.hpp> | |
23 | #include <future> | |
24 | #include <iterator> | |
25 | ||
26 | // This value is the minimal number of threads for to use the | |
27 | // block_indirect_sort algorithm | |
28 | #define BOOST_NTHREAD_BORDER 6 | |
29 | ||
30 | namespace boost | |
31 | { | |
32 | namespace sort | |
33 | { | |
34 | namespace blk_detail | |
35 | { | |
36 | //--------------------------------------------------------------------------- | |
37 | // USING SENTENCES | |
38 | //--------------------------------------------------------------------------- | |
39 | namespace bs = boost::sort; | |
40 | namespace bsc = bs::common; | |
41 | namespace bscu = bsc::util; | |
42 | using bscu::compare_iter; | |
43 | using bscu::value_iter; | |
44 | using bsc::range; | |
45 | using bsc::destroy; | |
46 | using bsc::initialize; | |
47 | using bscu::nbits64; | |
48 | using bs::pdqsort; | |
49 | using bscu::enable_if_string; | |
50 | using bscu::enable_if_not_string; | |
51 | using bscu::tmsb; | |
52 | // | |
53 | ///--------------------------------------------------------------------------- | |
54 | /// @struct block_indirect_sort | |
55 | /// @brief This class is the entry point of the block indirect sort. The code | |
56 | /// of this algorithm is divided in several classes: | |
57 | /// bis/block.hpp : basic structures used in the algorithm | |
58 | /// bis/backbone.hpp : data used by all the classes | |
59 | /// bis/merge_blocks.hpp : merge the internal blocks | |
60 | /// bis/move_blocks.hpp : move the blocks, and obtain all the elements | |
61 | /// phisicaly sorted | |
62 | /// bis/parallel_sort.hpp : make the parallel sort of each part in the | |
63 | /// initial division of the data | |
64 | /// | |
65 | //---------------------------------------------------------------------------- | |
66 | template<uint32_t Block_size, uint32_t Group_size, class Iter_t, | |
67 | class Compare = compare_iter<Iter_t> > | |
68 | struct block_indirect_sort | |
69 | { | |
70 | //------------------------------------------------------------------------ | |
71 | // D E F I N I T I O N S | |
72 | //------------------------------------------------------------------------ | |
73 | typedef typename std::iterator_traits<Iter_t>::value_type value_t; | |
74 | typedef std::atomic<uint32_t> atomic_t; | |
75 | typedef range<size_t> range_pos; | |
76 | typedef range<Iter_t> range_it; | |
77 | typedef range<value_t *> range_buf; | |
78 | typedef std::function<void(void)> function_t; | |
79 | ||
80 | // classes used in the internal operations of the algorithm | |
81 | typedef block_pos block_pos_t; | |
82 | typedef block<Block_size, Iter_t> block_t; | |
83 | typedef backbone<Block_size, Iter_t, Compare> backbone_t; | |
84 | typedef parallel_sort<Block_size, Iter_t, Compare> parallel_sort_t; | |
85 | ||
86 | typedef merge_blocks<Block_size, Group_size, Iter_t, Compare> merge_blocks_t; | |
87 | typedef move_blocks<Block_size, Group_size, Iter_t, Compare> move_blocks_t; | |
88 | typedef compare_block_pos<Block_size, Iter_t, Compare> compare_block_pos_t; | |
89 | // | |
90 | //------------------------------------------------------------------------ | |
91 | // V A R I A B L E S A N D C O N S T A N T S | |
92 | //------------------------------------------------------------------------ | |
93 | // contains the data and the internal data structures of the algorithm for | |
94 | // to be shared between the classes which are part of the algorithm | |
95 | backbone_t bk; | |
96 | // atomic counter for to detect the end of the works created inside | |
97 | // the object | |
98 | atomic_t counter; | |
99 | // pointer to the uninitialized memory used for the thread buffers | |
100 | value_t *ptr; | |
101 | // indicate if the memory pointed by ptr is initialized | |
102 | bool construct; | |
103 | // range from extract the buffers for the threads | |
104 | range_buf rglobal_buf; | |
105 | // number of threads to use | |
106 | uint32_t nthread; | |
107 | // | |
108 | //------------------------------------------------------------------------ | |
109 | // F U N C T I O N S | |
110 | //------------------------------------------------------------------------ | |
111 | ||
112 | block_indirect_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t nthr); | |
113 | ||
114 | block_indirect_sort(Iter_t first, Iter_t last) : | |
115 | block_indirect_sort(first, last, Compare(), | |
116 | std::thread::hardware_concurrency()) { } | |
117 | ||
118 | ||
119 | block_indirect_sort(Iter_t first, Iter_t last, Compare cmp) : | |
120 | block_indirect_sort(first, last, cmp, | |
121 | std::thread::hardware_concurrency()) { } | |
122 | ||
123 | ||
124 | block_indirect_sort(Iter_t first, Iter_t last, uint32_t nthread) : | |
125 | block_indirect_sort(first, last, Compare(), nthread){} | |
126 | ||
127 | ||
128 | // | |
129 | //------------------------------------------------------------------------ | |
130 | // function :destroy_all | |
131 | /// @brief destructor all the data structures of the class (if the memory | |
132 | /// is constructed, is destroyed) and return the uninitialized | |
133 | /// memory | |
134 | //------------------------------------------------------------------------ | |
135 | void destroy_all(void) | |
136 | { | |
137 | if (ptr != nullptr) | |
138 | { | |
139 | if (construct) | |
140 | { | |
141 | destroy(rglobal_buf); | |
142 | construct = false; | |
143 | }; | |
144 | std::return_temporary_buffer(ptr); | |
145 | ptr = nullptr; | |
146 | }; | |
147 | } | |
148 | // | |
149 | //------------------------------------------------------------------------ | |
150 | // function :~block_indirect_sort | |
151 | /// @brief destructor of the class (if the memory is constructed, is | |
152 | /// destroyed) and return the uninitialized memory | |
153 | //------------------------------------------------------------------------ | |
154 | ~block_indirect_sort(void) | |
155 | { | |
156 | destroy_all(); | |
157 | } | |
158 | ||
159 | void split_range(size_t pos_index1, size_t pos_index2, | |
160 | uint32_t level_thread); | |
161 | ||
162 | void start_function(void); | |
163 | ||
164 | //------------------------------------------------------------------------- | |
165 | }; // End class block_indirect_sort | |
166 | //---------------------------------------------------------------------------- | |
167 | // | |
168 | //############################################################################ | |
169 | // ## | |
170 | // ## | |
171 | // N O N I N L I N E F U N C T I O N S ## | |
172 | // ## | |
173 | // ## | |
174 | //############################################################################ | |
175 | // | |
176 | //------------------------------------------------------------------------- | |
177 | // function : block_indirect_sort | |
178 | /// @brief begin with the execution of the functions stored in works | |
179 | /// @param first : iterator to the first element of the range to sort | |
180 | /// @param last : iterator after the last element to the range to sort | |
181 | /// @param comp : object for to compare two elements pointed by Iter_t | |
182 | /// iterators | |
183 | /// @param nthr : Number of threads to use in the process.When this value | |
184 | /// is lower than 2, the sorting is done with 1 thread | |
185 | //------------------------------------------------------------------------- | |
186 | template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare> | |
187 | block_indirect_sort<Block_size, Group_size, Iter_t, Compare> | |
188 | ::block_indirect_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t nthr) | |
189 | : bk(first, last, cmp), counter(0), ptr(nullptr), construct(false), | |
190 | nthread(nthr) | |
191 | { | |
192 | try | |
193 | { | |
194 | assert((last - first) >= 0); | |
195 | size_t nelem = size_t(last - first); | |
196 | if (nelem == 0) return; | |
197 | ||
198 | //------------------- check if sort ----------------------------------- | |
199 | bool sorted = true; | |
200 | for (Iter_t it1 = first, it2 = first + 1; it2 != last and (sorted = | |
201 | not bk.cmp(*it2, *it1)); it1 = it2++); | |
202 | if (sorted) return; | |
203 | ||
204 | //------------------- check if reverse sort --------------------------- | |
205 | sorted = true; | |
206 | for (Iter_t it1 = first, it2 = first + 1; it2 != last and (sorted = | |
207 | not bk.cmp(*it1, *it2)); it1 = it2++); | |
208 | ||
209 | if (sorted) | |
210 | { | |
211 | size_t nelem2 = nelem >> 1; | |
212 | Iter_t it1 = first, it2 = last - 1; | |
213 | for (size_t i = 0; i < nelem2; ++i) | |
214 | { | |
215 | std::swap(*(it1++), *(it2--)); | |
216 | }; | |
217 | return; | |
218 | }; | |
219 | ||
220 | //---------------- check if only single thread ----------------------- | |
221 | size_t nthreadmax = nelem / (Block_size * Group_size) + 1; | |
222 | if (nthread > nthreadmax) nthread = (uint32_t) nthreadmax; | |
223 | ||
224 | uint32_t nbits_size = (nbits64(sizeof(value_t)) >> 1); | |
225 | if (nbits_size > 5) nbits_size = 5; | |
226 | size_t max_per_thread = 1 << (18 - nbits_size); | |
227 | ||
228 | if (nelem < (max_per_thread) or nthread < 2) | |
229 | { | |
230 | //intro_sort (first, last, bk.cmp); | |
231 | pdqsort(first, last, bk.cmp); | |
232 | return; | |
233 | }; | |
234 | ||
235 | //----------- creation of the temporary buffer -------------------- | |
236 | ptr = std::get_temporary_buffer<value_t>(Block_size * nthread).first; | |
237 | if (ptr == nullptr) | |
238 | { | |
239 | bk.error = true; | |
240 | throw std::bad_alloc(); | |
241 | }; | |
242 | ||
243 | rglobal_buf = range_buf(ptr, ptr + (Block_size * nthread)); | |
244 | initialize(rglobal_buf, *first); | |
245 | construct = true; | |
246 | ||
247 | // creation of the buffers for the threads | |
248 | std::vector<value_t *> vbuf(nthread); | |
249 | for (uint32_t i = 0; i < nthread; ++i) | |
250 | { | |
251 | vbuf[i] = ptr + (i * Block_size); | |
252 | }; | |
253 | ||
254 | // Insert the first work in the stack | |
255 | bscu::atomic_write(counter, 1); | |
256 | function_t f1 = [&]( ) | |
257 | { | |
258 | start_function ( ); | |
259 | bscu::atomic_sub (counter, 1); | |
260 | }; | |
261 | bk.works.emplace_back(f1); | |
262 | ||
263 | //--------------------------------------------------------------------- | |
264 | // PROCESS | |
265 | //--------------------------------------------------------------------- | |
266 | std::vector<std::future<void> > vfuture(nthread); | |
267 | ||
268 | // The function launched with the futures is "execute the functions of | |
269 | // the stack until this->counter is zero | |
270 | // vbuf[i] is the memory from the main thread for to configure the | |
271 | // thread local buffer | |
272 | for (uint32_t i = 0; i < nthread; ++i) | |
273 | { | |
274 | auto f1 = [=, &vbuf]( ) | |
275 | { bk.exec (vbuf[i], this->counter);}; | |
276 | vfuture[i] = std::async(std::launch::async, f1); | |
277 | }; | |
278 | for (uint32_t i = 0; i < nthread; ++i) | |
279 | vfuture[i].get(); | |
280 | if (bk.error) throw std::bad_alloc(); | |
281 | } | |
282 | catch (std::bad_alloc &) | |
283 | { | |
284 | destroy_all(); | |
285 | throw; | |
286 | } | |
287 | }; | |
288 | // | |
289 | //----------------------------------------------------------------------------- | |
290 | // function : split_rage | |
291 | /// @brief this function splits a range of positions in the index, and | |
292 | /// depending of the size, sort directly or make to a recursive call | |
293 | /// to split_range | |
294 | /// @param pos_index1 : first position in the index | |
295 | /// @param pos_index2 : position after the last in the index | |
296 | /// @param level_thread : depth of the call. When 0 sort the blocks | |
297 | //----------------------------------------------------------------------------- | |
298 | template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare> | |
299 | void block_indirect_sort<Block_size, Group_size, Iter_t, Compare> | |
300 | ::split_range(size_t pos_index1, size_t pos_index2, uint32_t level_thread) | |
301 | { | |
302 | size_t nblock = pos_index2 - pos_index1; | |
303 | ||
304 | //------------------------------------------------------------------------- | |
305 | // In the blocks not sorted, the physical position is the logical position | |
306 | //------------------------------------------------------------------------- | |
307 | Iter_t first = bk.get_block(pos_index1).first; | |
308 | Iter_t last = bk.get_range(pos_index2 - 1).last; | |
309 | ||
310 | if (nblock < Group_size) | |
311 | { | |
312 | pdqsort(first, last, bk.cmp); | |
313 | return; | |
314 | }; | |
315 | ||
316 | size_t pos_index_mid = pos_index1 + (nblock >> 1); | |
317 | atomic_t son_counter(1); | |
318 | ||
319 | //------------------------------------------------------------------------- | |
320 | // Insert in the stack the work for the second part, and the actual thread, | |
321 | // execute the first part | |
322 | //------------------------------------------------------------------------- | |
323 | if (level_thread != 0) | |
324 | { | |
325 | auto f1 = [=, &son_counter]( ) | |
326 | { | |
327 | split_range (pos_index_mid, pos_index2, level_thread - 1); | |
328 | bscu::atomic_sub (son_counter, 1); | |
329 | }; | |
330 | bk.works.emplace_back(f1); | |
331 | if (bk.error) return; | |
332 | split_range(pos_index1, pos_index_mid, level_thread - 1); | |
333 | } | |
334 | else | |
335 | { | |
336 | Iter_t mid = first + ((nblock >> 1) * Block_size); | |
337 | auto f1 = [=, &son_counter]( ) | |
338 | { | |
339 | parallel_sort_t (bk, mid, last); | |
340 | bscu::atomic_sub (son_counter, 1); | |
341 | }; | |
342 | bk.works.emplace_back(f1); | |
343 | if (bk.error) return; | |
344 | parallel_sort_t(bk, first, mid); | |
345 | }; | |
346 | bk.exec(son_counter); | |
347 | if (bk.error) return; | |
348 | merge_blocks_t(bk, pos_index1, pos_index_mid, pos_index2); | |
349 | }; | |
350 | ||
351 | // | |
352 | //----------------------------------------------------------------------------- | |
353 | // function : start_function | |
354 | /// @brief this function init the process. When the number of threads is lower | |
355 | /// than a predefined value, sort the elements with a parallel pdqsort. | |
356 | //----------------------------------------------------------------------------- | |
357 | template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare> | |
358 | void block_indirect_sort<Block_size, Group_size, Iter_t, Compare> | |
359 | ::start_function(void) | |
360 | { | |
361 | if (nthread < BOOST_NTHREAD_BORDER) | |
362 | { | |
363 | parallel_sort_t(bk, bk.global_range.first, bk.global_range.last); | |
364 | } | |
365 | else | |
366 | { | |
367 | size_t level_thread = nbits64(nthread - 1) - 1; | |
368 | split_range(0, bk.nblock, level_thread - 1); | |
369 | if (bk.error) return; | |
370 | move_blocks_t k(bk); | |
371 | }; | |
372 | }; | |
373 | ||
374 | ///--------------------------------------------------------------------------- | |
375 | // function block_indirect_sort_call | |
376 | /// @brief This class is select the block size in the block_indirect_sort | |
377 | /// algorithm depending of the type and size of the data to sort | |
378 | /// | |
379 | //---------------------------------------------------------------------------- | |
380 | template <class Iter_t, class Compare, | |
381 | enable_if_string<value_iter<Iter_t>> * = nullptr> | |
382 | inline void block_indirect_sort_call(Iter_t first, Iter_t last, Compare cmp, | |
383 | uint32_t nthr) | |
384 | { | |
385 | block_indirect_sort<128, 128, Iter_t, Compare>(first, last, cmp, nthr); | |
386 | }; | |
387 | ||
388 | template<size_t Size> | |
389 | struct block_size | |
390 | { | |
391 | static constexpr const uint32_t BitsSize = | |
392 | (Size == 0) ? 0 : (Size > 256) ? 9 : tmsb[Size - 1]; | |
393 | static constexpr const uint32_t sz[10] = | |
394 | { 4096, 4096, 4096, 4096, 2048, 1024, 768, 512, 256, 128 }; | |
395 | static constexpr const uint32_t data = sz[BitsSize]; | |
396 | }; | |
397 | // | |
398 | ///--------------------------------------------------------------------------- | |
399 | /// @struct block_indirect_sort_call | |
400 | /// @brief This class is select the block size in the block_indirect_sort | |
401 | /// algorithm depending of the type and size of the data to sort | |
402 | /// | |
403 | //---------------------------------------------------------------------------- | |
404 | template <class Iter_t, class Compare, | |
405 | enable_if_not_string<value_iter<Iter_t>> * = nullptr> | |
406 | inline void block_indirect_sort_call (Iter_t first, Iter_t last, Compare cmp, | |
407 | uint32_t nthr) | |
408 | { | |
409 | block_indirect_sort<block_size<sizeof (value_iter<Iter_t> )>::data, 64, | |
410 | Iter_t, Compare> (first, last, cmp, nthr); | |
411 | }; | |
412 | ||
413 | // | |
414 | //**************************************************************************** | |
415 | }; // End namespace blk_detail | |
416 | //**************************************************************************** | |
417 | // | |
418 | namespace bscu = boost::sort::common::util; | |
419 | // | |
420 | //############################################################################ | |
421 | // ## | |
422 | // ## | |
423 | // B L O C K _ I N D I R E C T _ S O R T ## | |
424 | // ## | |
425 | // ## | |
426 | //############################################################################ | |
427 | // | |
428 | //----------------------------------------------------------------------------- | |
429 | // function : block_indirect_sort | |
92f5a8d4 | 430 | /// @brief invocation of block_indirtect_sort with 2 parameters |
11fdf7f2 TL |
431 | /// |
432 | /// @param first : iterator to the first element of the range to sort | |
433 | /// @param last : iterator after the last element to the range to sort | |
434 | //----------------------------------------------------------------------------- | |
435 | template<class Iter_t> | |
436 | void block_indirect_sort(Iter_t first, Iter_t last) | |
437 | { | |
438 | typedef bscu::compare_iter<Iter_t> Compare; | |
439 | blk_detail::block_indirect_sort_call (first, last, Compare(), | |
440 | std::thread::hardware_concurrency()); | |
441 | } | |
442 | ||
443 | // | |
444 | //----------------------------------------------------------------------------- | |
445 | // function : block_indirect_sort | |
92f5a8d4 TL |
446 | /// @brief invocation of block_indirtect_sort with 3 parameters. The third is |
447 | /// the number of threads | |
11fdf7f2 TL |
448 | /// |
449 | /// @param first : iterator to the first element of the range to sort | |
450 | /// @param last : iterator after the last element to the range to sort | |
451 | /// @param nthread : Number of threads to use in the process. When this value | |
452 | /// is lower than 2, the sorting is done with 1 thread | |
453 | //----------------------------------------------------------------------------- | |
454 | template<class Iter_t> | |
455 | void block_indirect_sort(Iter_t first, Iter_t last, uint32_t nthread) | |
456 | { | |
457 | typedef bscu::compare_iter<Iter_t> Compare; | |
458 | blk_detail::block_indirect_sort_call(first, last, Compare(), nthread); | |
459 | } | |
460 | // | |
461 | //----------------------------------------------------------------------------- | |
462 | // function : block_indirect_sort | |
92f5a8d4 TL |
463 | /// @brief invocation of block_indirtect_sort with 3 parameters. The third is |
464 | /// the comparison object | |
11fdf7f2 TL |
465 | /// |
466 | /// @param first : iterator to the first element of the range to sort | |
467 | /// @param last : iterator after the last element to the range to sort | |
468 | /// @param comp : object for to compare two elements pointed by Iter_t | |
469 | /// iterators | |
470 | //----------------------------------------------------------------------------- | |
471 | template <class Iter_t, class Compare, | |
472 | bscu::enable_if_not_integral<Compare> * = nullptr> | |
473 | void block_indirect_sort(Iter_t first, Iter_t last, Compare comp) | |
474 | { | |
475 | blk_detail::block_indirect_sort_call (first, last, comp, | |
476 | std::thread::hardware_concurrency()); | |
477 | } | |
478 | ||
479 | // | |
480 | //----------------------------------------------------------------------------- | |
481 | // function : block_indirect_sort | |
92f5a8d4 | 482 | /// @brief invocation of block_indirtect_sort with 4 parameters. |
11fdf7f2 TL |
483 | /// |
484 | /// @param first : iterator to the first element of the range to sort | |
485 | /// @param last : iterator after the last element to the range to sort | |
486 | /// @param comp : object for to compare two elements pointed by Iter_t | |
487 | /// iterators | |
488 | /// @param nthread : Number of threads to use in the process. When this value | |
489 | /// is lower than 2, the sorting is done with 1 thread | |
490 | //----------------------------------------------------------------------------- | |
491 | template<class Iter_t, class Compare> | |
492 | void block_indirect_sort (Iter_t first, Iter_t last, Compare comp, | |
493 | uint32_t nthread) | |
494 | { | |
495 | blk_detail::block_indirect_sort_call(first, last, comp, nthread); | |
496 | } | |
497 | // | |
498 | //**************************************************************************** | |
499 | }; // End namespace sort | |
500 | }; // End namespace boost | |
501 | //**************************************************************************** | |
502 | // | |
503 | #endif |