]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/sort/block_indirect_sort/block_indirect_sort.hpp
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / boost / boost / sort / block_indirect_sort / block_indirect_sort.hpp
1 //----------------------------------------------------------------------------
2 /// @file block_indirect_sort.hpp
3 /// @brief block indirect sort algorithm
4 ///
5 /// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
6 /// Distributed under the Boost Software License, Version 1.0.\n
7 /// ( See accompanying file LICENSE_1_0.txt or copy at
8 /// http://www.boost.org/LICENSE_1_0.txt )
9 /// @version 0.1
10 ///
11 /// @remarks
12 //-----------------------------------------------------------------------------
13 #ifndef __BOOST_SORT_PARALLEL_DETAIL_BLOCK_INDIRECT_SORT_HPP
14 #define __BOOST_SORT_PARALLEL_DETAIL_BLOCK_INDIRECT_SORT_HPP
15
16 #include <atomic>
17 #include <boost/sort/block_indirect_sort/blk_detail/merge_blocks.hpp>
18 #include <boost/sort/block_indirect_sort/blk_detail/move_blocks.hpp>
19 #include <boost/sort/block_indirect_sort/blk_detail/parallel_sort.hpp>
20 #include <boost/sort/pdqsort/pdqsort.hpp>
21 #include <boost/sort/common/util/traits.hpp>
22 #include <boost/sort/common/util/algorithm.hpp>
23 #include <future>
24 #include <iterator>
25
26 // This value is the minimal number of threads for to use the
27 // block_indirect_sort algorithm
28 #define BOOST_NTHREAD_BORDER 6
29
30 namespace boost
31 {
32 namespace sort
33 {
34 namespace blk_detail
35 {
36 //---------------------------------------------------------------------------
37 // USING SENTENCES
38 //---------------------------------------------------------------------------
39 namespace bs = boost::sort;
40 namespace bsc = bs::common;
41 namespace bscu = bsc::util;
42 using bscu::compare_iter;
43 using bscu::value_iter;
44 using bsc::range;
45 using bsc::destroy;
46 using bsc::initialize;
47 using bscu::nbits64;
48 using bs::pdqsort;
49 using bscu::enable_if_string;
50 using bscu::enable_if_not_string;
51 using bscu::tmsb;
52 //
53 ///---------------------------------------------------------------------------
54 /// @struct block_indirect_sort
55 /// @brief This class is the entry point of the block indirect sort. The code
56 /// of this algorithm is divided in several classes:
57 /// bis/block.hpp : basic structures used in the algorithm
58 /// bis/backbone.hpp : data used by all the classes
59 /// bis/merge_blocks.hpp : merge the internal blocks
60 /// bis/move_blocks.hpp : move the blocks, and obtain all the elements
61 /// phisicaly sorted
62 /// bis/parallel_sort.hpp : make the parallel sort of each part in the
63 /// initial division of the data
64 ///
65 //----------------------------------------------------------------------------
66 template<uint32_t Block_size, uint32_t Group_size, class Iter_t,
67 class Compare = compare_iter<Iter_t> >
68 struct block_indirect_sort
69 {
70 //------------------------------------------------------------------------
71 // D E F I N I T I O N S
72 //------------------------------------------------------------------------
73 typedef typename std::iterator_traits<Iter_t>::value_type value_t;
74 typedef std::atomic<uint32_t> atomic_t;
75 typedef range<size_t> range_pos;
76 typedef range<Iter_t> range_it;
77 typedef range<value_t *> range_buf;
78 typedef std::function<void(void)> function_t;
79
80 // classes used in the internal operations of the algorithm
81 typedef block_pos block_pos_t;
82 typedef block<Block_size, Iter_t> block_t;
83 typedef backbone<Block_size, Iter_t, Compare> backbone_t;
84 typedef parallel_sort<Block_size, Iter_t, Compare> parallel_sort_t;
85
86 typedef merge_blocks<Block_size, Group_size, Iter_t, Compare> merge_blocks_t;
87 typedef move_blocks<Block_size, Group_size, Iter_t, Compare> move_blocks_t;
88 typedef compare_block_pos<Block_size, Iter_t, Compare> compare_block_pos_t;
89 //
90 //------------------------------------------------------------------------
91 // V A R I A B L E S A N D C O N S T A N T S
92 //------------------------------------------------------------------------
93 // contains the data and the internal data structures of the algorithm for
94 // to be shared between the classes which are part of the algorithm
95 backbone_t bk;
96 // atomic counter for to detect the end of the works created inside
97 // the object
98 atomic_t counter;
99 // pointer to the uninitialized memory used for the thread buffers
100 value_t *ptr;
101 // indicate if the memory pointed by ptr is initialized
102 bool construct;
103 // range from extract the buffers for the threads
104 range_buf rglobal_buf;
105 // number of threads to use
106 uint32_t nthread;
107 //
108 //------------------------------------------------------------------------
109 // F U N C T I O N S
110 //------------------------------------------------------------------------
111
112 block_indirect_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t nthr);
113
114 block_indirect_sort(Iter_t first, Iter_t last) :
115 block_indirect_sort(first, last, Compare(),
116 std::thread::hardware_concurrency()) { }
117
118
119 block_indirect_sort(Iter_t first, Iter_t last, Compare cmp) :
120 block_indirect_sort(first, last, cmp,
121 std::thread::hardware_concurrency()) { }
122
123
124 block_indirect_sort(Iter_t first, Iter_t last, uint32_t nthread) :
125 block_indirect_sort(first, last, Compare(), nthread){}
126
127
128 //
129 //------------------------------------------------------------------------
130 // function :destroy_all
131 /// @brief destructor all the data structures of the class (if the memory
132 /// is constructed, is destroyed) and return the uninitialized
133 /// memory
134 //------------------------------------------------------------------------
135 void destroy_all(void)
136 {
137 if (ptr != nullptr)
138 {
139 if (construct)
140 {
141 destroy(rglobal_buf);
142 construct = false;
143 };
144 std::return_temporary_buffer(ptr);
145 ptr = nullptr;
146 };
147 }
148 //
149 //------------------------------------------------------------------------
150 // function :~block_indirect_sort
151 /// @brief destructor of the class (if the memory is constructed, is
152 /// destroyed) and return the uninitialized memory
153 //------------------------------------------------------------------------
154 ~block_indirect_sort(void)
155 {
156 destroy_all();
157 }
158
159 void split_range(size_t pos_index1, size_t pos_index2,
160 uint32_t level_thread);
161
162 void start_function(void);
163
164 //-------------------------------------------------------------------------
165 }; // End class block_indirect_sort
166 //----------------------------------------------------------------------------
167 //
168 //############################################################################
169 // ##
170 // ##
171 // N O N I N L I N E F U N C T I O N S ##
172 // ##
173 // ##
174 //############################################################################
175 //
176 //-------------------------------------------------------------------------
177 // function : block_indirect_sort
178 /// @brief begin with the execution of the functions stored in works
179 /// @param first : iterator to the first element of the range to sort
180 /// @param last : iterator after the last element to the range to sort
181 /// @param comp : object for to compare two elements pointed by Iter_t
182 /// iterators
183 /// @param nthr : Number of threads to use in the process.When this value
184 /// is lower than 2, the sorting is done with 1 thread
185 //-------------------------------------------------------------------------
186 template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare>
187 block_indirect_sort<Block_size, Group_size, Iter_t, Compare>
188 ::block_indirect_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t nthr)
189 : bk(first, last, cmp), counter(0), ptr(nullptr), construct(false),
190 nthread(nthr)
191 {
192 try
193 {
194 assert((last - first) >= 0);
195 size_t nelem = size_t(last - first);
196 if (nelem == 0) return;
197
198 //------------------- check if sort -----------------------------------
199 bool sorted = true;
200 for (Iter_t it1 = first, it2 = first + 1; it2 != last and (sorted =
201 not bk.cmp(*it2, *it1)); it1 = it2++);
202 if (sorted) return;
203
204 //------------------- check if reverse sort ---------------------------
205 sorted = true;
206 for (Iter_t it1 = first, it2 = first + 1; it2 != last and (sorted =
207 not bk.cmp(*it1, *it2)); it1 = it2++);
208
209 if (sorted)
210 {
211 size_t nelem2 = nelem >> 1;
212 Iter_t it1 = first, it2 = last - 1;
213 for (size_t i = 0; i < nelem2; ++i)
214 {
215 std::swap(*(it1++), *(it2--));
216 };
217 return;
218 };
219
220 //---------------- check if only single thread -----------------------
221 size_t nthreadmax = nelem / (Block_size * Group_size) + 1;
222 if (nthread > nthreadmax) nthread = (uint32_t) nthreadmax;
223
224 uint32_t nbits_size = (nbits64(sizeof(value_t)) >> 1);
225 if (nbits_size > 5) nbits_size = 5;
226 size_t max_per_thread = 1 << (18 - nbits_size);
227
228 if (nelem < (max_per_thread) or nthread < 2)
229 {
230 //intro_sort (first, last, bk.cmp);
231 pdqsort(first, last, bk.cmp);
232 return;
233 };
234
235 //----------- creation of the temporary buffer --------------------
236 ptr = std::get_temporary_buffer<value_t>(Block_size * nthread).first;
237 if (ptr == nullptr)
238 {
239 bk.error = true;
240 throw std::bad_alloc();
241 };
242
243 rglobal_buf = range_buf(ptr, ptr + (Block_size * nthread));
244 initialize(rglobal_buf, *first);
245 construct = true;
246
247 // creation of the buffers for the threads
248 std::vector<value_t *> vbuf(nthread);
249 for (uint32_t i = 0; i < nthread; ++i)
250 {
251 vbuf[i] = ptr + (i * Block_size);
252 };
253
254 // Insert the first work in the stack
255 bscu::atomic_write(counter, 1);
256 function_t f1 = [&]( )
257 {
258 start_function ( );
259 bscu::atomic_sub (counter, 1);
260 };
261 bk.works.emplace_back(f1);
262
263 //---------------------------------------------------------------------
264 // PROCESS
265 //---------------------------------------------------------------------
266 std::vector<std::future<void> > vfuture(nthread);
267
268 // The function launched with the futures is "execute the functions of
269 // the stack until this->counter is zero
270 // vbuf[i] is the memory from the main thread for to configure the
271 // thread local buffer
272 for (uint32_t i = 0; i < nthread; ++i)
273 {
274 auto f1 = [=, &vbuf]( )
275 { bk.exec (vbuf[i], this->counter);};
276 vfuture[i] = std::async(std::launch::async, f1);
277 };
278 for (uint32_t i = 0; i < nthread; ++i)
279 vfuture[i].get();
280 if (bk.error) throw std::bad_alloc();
281 }
282 catch (std::bad_alloc &)
283 {
284 destroy_all();
285 throw;
286 }
287 };
288 //
289 //-----------------------------------------------------------------------------
290 // function : split_rage
291 /// @brief this function splits a range of positions in the index, and
292 /// depending of the size, sort directly or make to a recursive call
293 /// to split_range
294 /// @param pos_index1 : first position in the index
295 /// @param pos_index2 : position after the last in the index
296 /// @param level_thread : depth of the call. When 0 sort the blocks
297 //-----------------------------------------------------------------------------
298 template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare>
299 void block_indirect_sort<Block_size, Group_size, Iter_t, Compare>
300 ::split_range(size_t pos_index1, size_t pos_index2, uint32_t level_thread)
301 {
302 size_t nblock = pos_index2 - pos_index1;
303
304 //-------------------------------------------------------------------------
305 // In the blocks not sorted, the physical position is the logical position
306 //-------------------------------------------------------------------------
307 Iter_t first = bk.get_block(pos_index1).first;
308 Iter_t last = bk.get_range(pos_index2 - 1).last;
309
310 if (nblock < Group_size)
311 {
312 pdqsort(first, last, bk.cmp);
313 return;
314 };
315
316 size_t pos_index_mid = pos_index1 + (nblock >> 1);
317 atomic_t son_counter(1);
318
319 //-------------------------------------------------------------------------
320 // Insert in the stack the work for the second part, and the actual thread,
321 // execute the first part
322 //-------------------------------------------------------------------------
323 if (level_thread != 0)
324 {
325 auto f1 = [=, &son_counter]( )
326 {
327 split_range (pos_index_mid, pos_index2, level_thread - 1);
328 bscu::atomic_sub (son_counter, 1);
329 };
330 bk.works.emplace_back(f1);
331 if (bk.error) return;
332 split_range(pos_index1, pos_index_mid, level_thread - 1);
333 }
334 else
335 {
336 Iter_t mid = first + ((nblock >> 1) * Block_size);
337 auto f1 = [=, &son_counter]( )
338 {
339 parallel_sort_t (bk, mid, last);
340 bscu::atomic_sub (son_counter, 1);
341 };
342 bk.works.emplace_back(f1);
343 if (bk.error) return;
344 parallel_sort_t(bk, first, mid);
345 };
346 bk.exec(son_counter);
347 if (bk.error) return;
348 merge_blocks_t(bk, pos_index1, pos_index_mid, pos_index2);
349 };
350
351 //
352 //-----------------------------------------------------------------------------
353 // function : start_function
354 /// @brief this function init the process. When the number of threads is lower
355 /// than a predefined value, sort the elements with a parallel pdqsort.
356 //-----------------------------------------------------------------------------
357 template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare>
358 void block_indirect_sort<Block_size, Group_size, Iter_t, Compare>
359 ::start_function(void)
360 {
361 if (nthread < BOOST_NTHREAD_BORDER)
362 {
363 parallel_sort_t(bk, bk.global_range.first, bk.global_range.last);
364 }
365 else
366 {
367 size_t level_thread = nbits64(nthread - 1) - 1;
368 split_range(0, bk.nblock, level_thread - 1);
369 if (bk.error) return;
370 move_blocks_t k(bk);
371 };
372 };
373
374 ///---------------------------------------------------------------------------
375 // function block_indirect_sort_call
376 /// @brief This class is select the block size in the block_indirect_sort
377 /// algorithm depending of the type and size of the data to sort
378 ///
379 //----------------------------------------------------------------------------
380 template <class Iter_t, class Compare,
381 enable_if_string<value_iter<Iter_t>> * = nullptr>
382 inline void block_indirect_sort_call(Iter_t first, Iter_t last, Compare cmp,
383 uint32_t nthr)
384 {
385 block_indirect_sort<128, 128, Iter_t, Compare>(first, last, cmp, nthr);
386 };
387
388 template<size_t Size>
389 struct block_size
390 {
391 static constexpr const uint32_t BitsSize =
392 (Size == 0) ? 0 : (Size > 256) ? 9 : tmsb[Size - 1];
393 static constexpr const uint32_t sz[10] =
394 { 4096, 4096, 4096, 4096, 2048, 1024, 768, 512, 256, 128 };
395 static constexpr const uint32_t data = sz[BitsSize];
396 };
397 //
398 ///---------------------------------------------------------------------------
399 /// @struct block_indirect_sort_call
400 /// @brief This class is select the block size in the block_indirect_sort
401 /// algorithm depending of the type and size of the data to sort
402 ///
403 //----------------------------------------------------------------------------
404 template <class Iter_t, class Compare,
405 enable_if_not_string<value_iter<Iter_t>> * = nullptr>
406 inline void block_indirect_sort_call (Iter_t first, Iter_t last, Compare cmp,
407 uint32_t nthr)
408 {
409 block_indirect_sort<block_size<sizeof (value_iter<Iter_t> )>::data, 64,
410 Iter_t, Compare> (first, last, cmp, nthr);
411 };
412
413 //
414 //****************************************************************************
415 }; // End namespace blk_detail
416 //****************************************************************************
417 //
418 namespace bscu = boost::sort::common::util;
419 //
420 //############################################################################
421 // ##
422 // ##
423 // B L O C K _ I N D I R E C T _ S O R T ##
424 // ##
425 // ##
426 //############################################################################
427 //
428 //-----------------------------------------------------------------------------
429 // function : block_indirect_sort
430 /// @brief invocation of block_indirtect_sort with 2 parameters
431 ///
432 /// @param first : iterator to the first element of the range to sort
433 /// @param last : iterator after the last element to the range to sort
434 //-----------------------------------------------------------------------------
435 template<class Iter_t>
436 void block_indirect_sort(Iter_t first, Iter_t last)
437 {
438 typedef bscu::compare_iter<Iter_t> Compare;
439 blk_detail::block_indirect_sort_call (first, last, Compare(),
440 std::thread::hardware_concurrency());
441 }
442
443 //
444 //-----------------------------------------------------------------------------
445 // function : block_indirect_sort
446 /// @brief invocation of block_indirtect_sort with 3 parameters. The third is
447 /// the number of threads
448 ///
449 /// @param first : iterator to the first element of the range to sort
450 /// @param last : iterator after the last element to the range to sort
451 /// @param nthread : Number of threads to use in the process. When this value
452 /// is lower than 2, the sorting is done with 1 thread
453 //-----------------------------------------------------------------------------
454 template<class Iter_t>
455 void block_indirect_sort(Iter_t first, Iter_t last, uint32_t nthread)
456 {
457 typedef bscu::compare_iter<Iter_t> Compare;
458 blk_detail::block_indirect_sort_call(first, last, Compare(), nthread);
459 }
460 //
461 //-----------------------------------------------------------------------------
462 // function : block_indirect_sort
463 /// @brief invocation of block_indirtect_sort with 3 parameters. The third is
464 /// the comparison object
465 ///
466 /// @param first : iterator to the first element of the range to sort
467 /// @param last : iterator after the last element to the range to sort
468 /// @param comp : object for to compare two elements pointed by Iter_t
469 /// iterators
470 //-----------------------------------------------------------------------------
471 template <class Iter_t, class Compare,
472 bscu::enable_if_not_integral<Compare> * = nullptr>
473 void block_indirect_sort(Iter_t first, Iter_t last, Compare comp)
474 {
475 blk_detail::block_indirect_sort_call (first, last, comp,
476 std::thread::hardware_concurrency());
477 }
478
479 //
480 //-----------------------------------------------------------------------------
481 // function : block_indirect_sort
482 /// @brief invocation of block_indirtect_sort with 4 parameters.
483 ///
484 /// @param first : iterator to the first element of the range to sort
485 /// @param last : iterator after the last element to the range to sort
486 /// @param comp : object for to compare two elements pointed by Iter_t
487 /// iterators
488 /// @param nthread : Number of threads to use in the process. When this value
489 /// is lower than 2, the sorting is done with 1 thread
490 //-----------------------------------------------------------------------------
491 template<class Iter_t, class Compare>
492 void block_indirect_sort (Iter_t first, Iter_t last, Compare comp,
493 uint32_t nthread)
494 {
495 blk_detail::block_indirect_sort_call(first, last, comp, nthread);
496 }
497 //
498 //****************************************************************************
499 }; // End namespace sort
500 }; // End namespace boost
501 //****************************************************************************
502 //
503 #endif