]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | ////////////////////////////////////////////////////////////////////////////// |
2 | // | |
3 | // (C) Copyright Ion Gaztanaga 2015-2016. | |
4 | // Distributed under the Boost Software License, Version 1.0. | |
5 | // (See accompanying file LICENSE_1_0.txt or copy at | |
6 | // http://www.boost.org/LICENSE_1_0.txt) | |
7 | // | |
8 | // See http://www.boost.org/libs/move for documentation. | |
9 | // | |
10 | ////////////////////////////////////////////////////////////////////////////// | |
11 | ||
12 | #ifndef BOOST_MOVE_ADAPTIVE_MERGE_HPP | |
13 | #define BOOST_MOVE_ADAPTIVE_MERGE_HPP | |
14 | ||
15 | #include <boost/move/detail/config_begin.hpp> | |
16 | #include <boost/move/algo/detail/adaptive_sort_merge.hpp> | |
17 | ||
18 | namespace boost { | |
19 | namespace movelib { | |
20 | ||
11fdf7f2 TL |
21 | ///@cond |
22 | namespace detail_adaptive { | |
23 | ||
24 | template<class RandIt, class Compare, class XBuf> | |
25 | inline void adaptive_merge_combine_blocks( RandIt first | |
26 | , typename iterator_traits<RandIt>::size_type len1 | |
27 | , typename iterator_traits<RandIt>::size_type len2 | |
28 | , typename iterator_traits<RandIt>::size_type collected | |
29 | , typename iterator_traits<RandIt>::size_type n_keys | |
30 | , typename iterator_traits<RandIt>::size_type l_block | |
31 | , bool use_internal_buf | |
32 | , bool xbuf_used | |
33 | , Compare comp | |
34 | , XBuf & xbuf | |
35 | ) | |
36 | { | |
37 | typedef typename iterator_traits<RandIt>::size_type size_type; | |
38 | size_type const len = len1+len2; | |
39 | size_type const l_combine = len-collected; | |
40 | size_type const l_combine1 = len1-collected; | |
41 | ||
42 | if(n_keys){ | |
43 | RandIt const first_data = first+collected; | |
44 | RandIt const keys = first; | |
45 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len); | |
46 | if(xbuf_used){ | |
47 | if(xbuf.size() < l_block){ | |
48 | xbuf.initialize_until(l_block, *first); | |
49 | } | |
50 | BOOST_ASSERT(xbuf.size() >= l_block); | |
51 | size_type n_block_a, n_block_b, l_irreg1, l_irreg2; | |
52 | combine_params( keys, comp, l_combine | |
53 | , l_combine1, l_block, xbuf | |
54 | , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs | |
55 | op_merge_blocks_with_buf | |
56 | (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data()); | |
57 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg xbf: ", len); | |
58 | } | |
59 | else{ | |
60 | size_type n_block_a, n_block_b, l_irreg1, l_irreg2; | |
61 | combine_params( keys, comp, l_combine | |
62 | , l_combine1, l_block, xbuf | |
63 | , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs | |
64 | if(use_internal_buf){ | |
65 | op_merge_blocks_with_buf | |
66 | (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op(), first_data-l_block); | |
67 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A mrg buf: ", len); | |
68 | } | |
69 | else{ | |
70 | merge_blocks_bufferless | |
71 | (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp); | |
72 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg nbf: ", len); | |
73 | } | |
74 | } | |
75 | } | |
76 | else{ | |
77 | xbuf.shrink_to_fit(l_block); | |
78 | if(xbuf.size() < l_block){ | |
79 | xbuf.initialize_until(l_block, *first); | |
80 | } | |
81 | size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(l_block); | |
82 | size_type n_block_a, n_block_b, l_irreg1, l_irreg2; | |
83 | combine_params( uint_keys, less(), l_combine | |
84 | , l_combine1, l_block, xbuf | |
85 | , n_block_a, n_block_b, l_irreg1, l_irreg2, true); //Outputs | |
86 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len); | |
87 | BOOST_ASSERT(xbuf.size() >= l_block); | |
88 | op_merge_blocks_with_buf | |
89 | (uint_keys, less(), first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data()); | |
90 | xbuf.clear(); | |
91 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg buf: ", len); | |
92 | } | |
93 | } | |
94 | ||
95 | template<class RandIt, class Compare, class XBuf> | |
96 | inline void adaptive_merge_final_merge( RandIt first | |
97 | , typename iterator_traits<RandIt>::size_type len1 | |
98 | , typename iterator_traits<RandIt>::size_type len2 | |
99 | , typename iterator_traits<RandIt>::size_type collected | |
100 | , typename iterator_traits<RandIt>::size_type l_intbuf | |
101 | , typename iterator_traits<RandIt>::size_type l_block | |
102 | , bool use_internal_buf | |
103 | , bool xbuf_used | |
104 | , Compare comp | |
105 | , XBuf & xbuf | |
106 | ) | |
107 | { | |
108 | typedef typename iterator_traits<RandIt>::size_type size_type; | |
109 | (void)l_block; | |
110 | size_type n_keys = collected-l_intbuf; | |
111 | size_type len = len1+len2; | |
112 | if(use_internal_buf){ | |
113 | if(xbuf_used){ | |
114 | xbuf.clear(); | |
115 | //Nothing to do | |
116 | if(n_keys){ | |
117 | unstable_sort(first, first+n_keys, comp, xbuf); | |
118 | stable_merge(first, first+n_keys, first+len, comp, xbuf); | |
119 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A key mrg: ", len); | |
120 | } | |
121 | } | |
122 | else{ | |
123 | xbuf.clear(); | |
124 | unstable_sort(first, first+collected, comp, xbuf); | |
125 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len); | |
126 | stable_merge(first, first+collected, first+len, comp, xbuf); | |
127 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b mrg: ", len); | |
128 | } | |
129 | } | |
130 | else{ | |
131 | xbuf.clear(); | |
132 | unstable_sort(first, first+collected, comp, xbuf); | |
133 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len); | |
134 | stable_merge(first, first+collected, first+len1+len2, comp, xbuf); | |
135 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b mrg: ", len); | |
136 | } | |
137 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A fin mrg: ", len); | |
138 | } | |
139 | ||
140 | template<class SizeType, class Xbuf> | |
141 | inline SizeType adaptive_merge_n_keys_intbuf(SizeType &rl_block, SizeType len1, SizeType len2, Xbuf & xbuf, SizeType &l_intbuf_inout) | |
142 | { | |
143 | typedef SizeType size_type; | |
144 | size_type l_block = rl_block; | |
145 | size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block; | |
146 | ||
147 | while(xbuf.capacity() >= l_block*2){ | |
148 | l_block *= 2; | |
149 | } | |
150 | ||
151 | //This is the minimum number of keys to implement the ideal algorithm | |
152 | size_type n_keys = len1/l_block+len2/l_block; | |
153 | while(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block)){ | |
154 | --n_keys; | |
155 | } | |
156 | ++n_keys; | |
157 | BOOST_ASSERT(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block)); | |
158 | ||
159 | if(xbuf.template supports_aligned_trailing<size_type>(l_block, n_keys)){ | |
160 | n_keys = 0u; | |
161 | } | |
162 | l_intbuf_inout = l_intbuf; | |
163 | rl_block = l_block; | |
164 | return n_keys; | |
165 | } | |
166 | ||
167 | // Main explanation of the merge algorithm. | |
168 | // | |
169 | // csqrtlen = ceil(sqrt(len)); | |
170 | // | |
171 | // * First, csqrtlen [to be used as buffer] + (len/csqrtlen - 1) [to be used as keys] => to_collect | |
172 | // unique elements are extracted from elements to be sorted and placed in the beginning of the range. | |
173 | // | |
174 | // * Step "combine_blocks": the leading (len1-to_collect) elements plus trailing len2 elements | |
175 | // are merged with a non-trivial ("smart") algorithm to form an ordered range trailing "len-to_collect" elements. | |
176 | // | |
177 | // Explanation of the "combine_blocks" step: | |
178 | // | |
179 | // * Trailing [first+to_collect, first+len1) elements are divided in groups of cqrtlen elements. | |
180 | // Remaining elements that can't form a group are grouped in front of those elements. | |
181 | // * Trailing [first+len1, first+len1+len2) elements are divided in groups of cqrtlen elements. | |
182 | // Remaining elements that can't form a group are grouped in the back of those elements. | |
183 | // * In parallel the following two steps are performed: | |
184 | // * Groups are selection-sorted by first or last element (depending whether they are going | |
185 | // to be merged to left or right) and keys are reordered accordingly as an imitation-buffer. | |
186 | // * Elements of each block pair are merged using the csqrtlen buffer taking into account | |
187 | // if they belong to the first half or second half (marked by the key). | |
188 | // | |
189 | // * In the final merge step leading "to_collect" elements are merged with rotations | |
190 | // with the rest of merged elements in the "combine_blocks" step. | |
191 | // | |
192 | // Corner cases: | |
193 | // | |
194 | // * If no "to_collect" elements can be extracted: | |
195 | // | |
196 | // * If more than a minimum number of elements is extracted | |
197 | // then reduces the number of elements used as buffer and keys in the | |
198 | // and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction | |
199 | // then uses a rotation based smart merge. | |
200 | // | |
201 | // * If the minimum number of keys can't be extracted, a rotation-based merge is performed. | |
202 | // | |
203 | // * If auxiliary memory is more or equal than min(len1, len2), a buffered merge is performed. | |
204 | // | |
205 | // * If the len1 or len2 are less than 2*csqrtlen then a rotation-based merge is performed. | |
206 | // | |
207 | // * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t), | |
208 | // then no csqrtlen need to be extracted and "combine_blocks" will use integral | |
209 | // keys to combine blocks. | |
210 | template<class RandIt, class Compare, class XBuf> | |
211 | void adaptive_merge_impl | |
212 | ( RandIt first | |
213 | , typename iterator_traits<RandIt>::size_type len1 | |
214 | , typename iterator_traits<RandIt>::size_type len2 | |
215 | , Compare comp | |
216 | , XBuf & xbuf | |
217 | ) | |
218 | { | |
219 | typedef typename iterator_traits<RandIt>::size_type size_type; | |
220 | ||
221 | if(xbuf.capacity() >= min_value<size_type>(len1, len2)){ | |
222 | buffered_merge(first, first+len1, first+(len1+len2), comp, xbuf); | |
223 | } | |
224 | else{ | |
225 | const size_type len = len1+len2; | |
226 | //Calculate ideal parameters and try to collect needed unique keys | |
227 | size_type l_block = size_type(ceil_sqrt(len)); | |
228 | ||
229 | //One range is not big enough to extract keys and the internal buffer so a | |
230 | //rotation-based based merge will do just fine | |
231 | if(len1 <= l_block*2 || len2 <= l_block*2){ | |
232 | merge_bufferless(first, first+len1, first+len1+len2, comp); | |
233 | return; | |
234 | } | |
235 | ||
236 | //Detail the number of keys and internal buffer. If xbuf has enough memory, no | |
237 | //internal buffer is needed so l_intbuf will remain 0. | |
238 | size_type l_intbuf = 0; | |
239 | size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len1, len2, xbuf, l_intbuf); | |
240 | size_type const to_collect = l_intbuf+n_keys; | |
241 | //Try to extract needed unique values from the first range | |
242 | size_type const collected = collect_unique(first, first+len1, to_collect, comp, xbuf); | |
243 | BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n A collect: ", len); | |
244 | ||
245 | //Not the minimum number of keys is not available on the first range, so fallback to rotations | |
246 | if(collected != to_collect && collected < 4){ | |
247 | merge_bufferless(first, first+collected, first+len1, comp); | |
248 | merge_bufferless(first, first + len1, first + len1 + len2, comp); | |
249 | return; | |
250 | } | |
251 | ||
252 | //If not enough keys but more than minimum, adjust the internal buffer and key count | |
253 | bool use_internal_buf = collected == to_collect; | |
254 | if (!use_internal_buf){ | |
255 | l_intbuf = 0u; | |
256 | n_keys = collected; | |
257 | l_block = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf); | |
258 | //If use_internal_buf is false, then then internal buffer will be zero and rotation-based combination will be used | |
259 | l_intbuf = use_internal_buf ? l_block : 0u; | |
260 | } | |
261 | ||
262 | bool const xbuf_used = collected == to_collect && xbuf.capacity() >= l_block; | |
263 | //Merge trailing elements using smart merges | |
264 | adaptive_merge_combine_blocks(first, len1, len2, collected, n_keys, l_block, use_internal_buf, xbuf_used, comp, xbuf); | |
265 | //Merge buffer and keys with the rest of the values | |
266 | adaptive_merge_final_merge (first, len1, len2, collected, l_intbuf, l_block, use_internal_buf, xbuf_used, comp, xbuf); | |
267 | } | |
268 | } | |
269 | ||
270 | } //namespace detail_adaptive { | |
271 | ||
272 | ///@endcond | |
273 | ||
7c673cae FG |
274 | //! <b>Effects</b>: Merges two consecutive sorted ranges [first, middle) and [middle, last) |
275 | //! into one sorted range [first, last) according to the given comparison function comp. | |
276 | //! The algorithm is stable (if there are equivalent elements in the original two ranges, | |
277 | //! the elements from the first range (preserving their original order) precede the elements | |
278 | //! from the second range (preserving their original order). | |
279 | //! | |
280 | //! <b>Requires</b>: | |
281 | //! - RandIt must meet the requirements of ValueSwappable and RandomAccessIterator. | |
282 | //! - The type of dereferenced RandIt must meet the requirements of MoveAssignable and MoveConstructible. | |
283 | //! | |
284 | //! <b>Parameters</b>: | |
285 | //! - first: the beginning of the first sorted range. | |
286 | //! - middle: the end of the first sorted range and the beginning of the second | |
287 | //! - last: the end of the second sorted range | |
288 | //! - comp: comparison function object which returns true if the first argument is is ordered before the second. | |
289 | //! - uninitialized, uninitialized_len: raw storage starting on "uninitialized", able to hold "uninitialized_len" | |
290 | //! elements of type iterator_traits<RandIt>::value_type. Maximum performance is achieved when uninitialized_len | |
291 | //! is min(std::distance(first, middle), std::distance(middle, last)). | |
292 | //! | |
293 | //! <b>Throws</b>: If comp throws or the move constructor, move assignment or swap of the type | |
294 | //! of dereferenced RandIt throws. | |
295 | //! | |
296 | //! <b>Complexity</b>: Always K x O(N) comparisons and move assignments/constructors/swaps. | |
297 | //! Constant factor for comparisons and data movement is minimized when uninitialized_len | |
298 | //! is min(std::distance(first, middle), std::distance(middle, last)). | |
299 | //! Pretty good enough performance is achieved when uninitialized_len is | |
300 | //! ceil(sqrt(std::distance(first, last)))*2. | |
301 | //! | |
302 | //! <b>Caution</b>: Experimental implementation, not production-ready. | |
303 | template<class RandIt, class Compare> | |
304 | void adaptive_merge( RandIt first, RandIt middle, RandIt last, Compare comp | |
305 | , typename iterator_traits<RandIt>::value_type* uninitialized = 0 | |
306 | , std::size_t uninitialized_len = 0) | |
307 | { | |
308 | typedef typename iterator_traits<RandIt>::size_type size_type; | |
309 | typedef typename iterator_traits<RandIt>::value_type value_type; | |
310 | ||
311 | ::boost::movelib::detail_adaptive::adaptive_xbuf<value_type> xbuf(uninitialized, uninitialized_len); | |
312 | ::boost::movelib::detail_adaptive::adaptive_merge_impl(first, size_type(middle - first), size_type(last - middle), comp, xbuf); | |
313 | } | |
314 | ||
315 | } //namespace movelib { | |
316 | } //namespace boost { | |
317 | ||
318 | #include <boost/move/detail/config_end.hpp> | |
319 | ||
320 | #endif //#define BOOST_MOVE_ADAPTIVE_MERGE_HPP |