]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | ////////////////////////////////////////////////////////////////////////////// |
2 | // | |
3 | // (C) Copyright Ion Gaztanaga 2015-2016. | |
4 | // Distributed under the Boost Software License, Version 1.0. | |
5 | // (See accompanying file LICENSE_1_0.txt or copy at | |
6 | // http://www.boost.org/LICENSE_1_0.txt) | |
7 | // | |
8 | // See http://www.boost.org/libs/move for documentation. | |
9 | // | |
10 | ////////////////////////////////////////////////////////////////////////////// | |
11 | ||
92f5a8d4 TL |
12 | //#define BOOST_MOVE_ADAPTIVE_SORT_STATS |
13 | //#define BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL 2 | |
14 | ||
7c673cae FG |
15 | #include <algorithm> //std::inplace_merge |
16 | #include <cstdio> //std::printf | |
17 | #include <iostream> //std::cout | |
92f5a8d4 | 18 | #include <boost/container/vector.hpp> //boost::container::vector |
7c673cae FG |
19 | |
20 | #include <boost/config.hpp> | |
20effc67 | 21 | #include <cstdlib> |
7c673cae FG |
22 | |
23 | #include <boost/move/unique_ptr.hpp> | |
20effc67 | 24 | #include <boost/move/detail/nsec_clock.hpp> |
7c673cae FG |
25 | |
26 | #include "order_type.hpp" | |
b32b8144 | 27 | #include "random_shuffle.hpp" |
7c673cae | 28 | |
20effc67 TL |
29 | using boost::move_detail::cpu_timer; |
30 | using boost::move_detail::nanosecond_type; | |
7c673cae | 31 | |
7c673cae FG |
32 | void print_stats(const char *str, boost::ulong_long_type element_count) |
33 | { | |
b32b8144 | 34 | std::printf("%sCmp:%8.04f Cpy:%9.04f\n", str, double(order_perf_type::num_compare)/element_count, double(order_perf_type::num_copy)/element_count ); |
7c673cae FG |
35 | } |
36 | ||
37 | #include <boost/move/algo/adaptive_merge.hpp> | |
38 | #include <boost/move/algo/detail/merge.hpp> | |
39 | #include <boost/move/core.hpp> | |
40 | ||
41 | template<class T, class Compare> | |
92f5a8d4 | 42 | std::size_t generate_elements(boost::container::vector<T> &elements, std::size_t L, std::size_t NK, Compare comp) |
7c673cae | 43 | { |
92f5a8d4 TL |
44 | elements.resize(L); |
45 | boost::movelib::unique_ptr<std::size_t[]> key_reps(new std::size_t[NK ? NK : L]); | |
46 | ||
7c673cae | 47 | std::srand(0); |
92f5a8d4 TL |
48 | for (std::size_t i = 0; i < (NK ? NK : L); ++i) { |
49 | key_reps[i] = 0; | |
7c673cae | 50 | } |
92f5a8d4 TL |
51 | for (std::size_t i = 0; i < L; ++i) { |
52 | std::size_t key = NK ? (i % NK) : i; | |
53 | elements[i].key = key; | |
7c673cae | 54 | } |
92f5a8d4 TL |
55 | ::random_shuffle(elements.data(), elements.data() + L); |
56 | ::random_shuffle(elements.data(), elements.data() + L); | |
57 | ||
58 | for (std::size_t i = 0; i < L; ++i) { | |
7c673cae FG |
59 | elements[i].val = key_reps[elements[i].key]++; |
60 | } | |
92f5a8d4 TL |
61 | std::size_t split_count = L / 2; |
62 | std::stable_sort(elements.data(), elements.data() + split_count, comp); | |
63 | std::stable_sort(elements.data() + split_count, elements.data() + L, comp); | |
7c673cae FG |
64 | return split_count; |
65 | } | |
66 | ||
7c673cae FG |
67 | template<class T, class Compare> |
68 | void adaptive_merge_buffered(T *elements, T *mid, T *last, Compare comp, std::size_t BufLen) | |
69 | { | |
70 | boost::movelib::unique_ptr<char[]> mem(new char[sizeof(T)*BufLen]); | |
71 | boost::movelib::adaptive_merge(elements, mid, last, comp, reinterpret_cast<T*>(mem.get()), BufLen); | |
72 | } | |
73 | ||
92f5a8d4 TL |
74 | template<class T, class Compare> |
75 | void std_like_adaptive_merge_buffered(T *elements, T *mid, T *last, Compare comp, std::size_t BufLen) | |
76 | { | |
77 | boost::movelib::unique_ptr<char[]> mem(new char[sizeof(T)*BufLen]); | |
78 | boost::movelib::merge_adaptive_ONlogN(elements, mid, last, comp, reinterpret_cast<T*>(mem.get()), BufLen); | |
79 | } | |
80 | ||
7c673cae FG |
81 | enum AlgoType |
82 | { | |
83 | StdMerge, | |
92f5a8d4 TL |
84 | AdaptMerge, |
85 | SqrtHAdaptMerge, | |
86 | SqrtAdaptMerge, | |
87 | Sqrt2AdaptMerge, | |
88 | QuartAdaptMerge, | |
7c673cae | 89 | StdInplaceMerge, |
92f5a8d4 TL |
90 | StdSqrtHAdaptMerge, |
91 | StdSqrtAdaptMerge, | |
92 | StdSqrt2AdaptMerge, | |
93 | StdQuartAdaptMerge, | |
7c673cae FG |
94 | MaxMerge |
95 | }; | |
96 | ||
92f5a8d4 TL |
97 | const char *AlgoNames [] = { "StdMerge " |
98 | , "AdaptMerge " | |
99 | , "SqrtHAdaptMerge " | |
100 | , "SqrtAdaptMerge " | |
101 | , "Sqrt2AdaptMerge " | |
102 | , "QuartAdaptMerge " | |
103 | , "StdInplaceMerge " | |
104 | , "StdSqrtHAdaptMerge " | |
105 | , "StdSqrtAdaptMerge " | |
106 | , "StdSqrt2AdaptMerge " | |
107 | , "StdQuartAdaptMerge " | |
7c673cae FG |
108 | }; |
109 | ||
110 | BOOST_STATIC_ASSERT((sizeof(AlgoNames)/sizeof(*AlgoNames)) == MaxMerge); | |
111 | ||
112 | template<class T> | |
92f5a8d4 | 113 | bool measure_algo(T *elements, std::size_t element_count, std::size_t split_pos, std::size_t alg, nanosecond_type &prev_clock) |
7c673cae | 114 | { |
7c673cae | 115 | std::printf("%s ", AlgoNames[alg]); |
b32b8144 FG |
116 | order_perf_type::num_compare=0; |
117 | order_perf_type::num_copy=0; | |
118 | order_perf_type::num_elements = element_count; | |
7c673cae FG |
119 | cpu_timer timer; |
120 | timer.resume(); | |
121 | switch(alg) | |
122 | { | |
123 | case StdMerge: | |
b32b8144 | 124 | std::inplace_merge(elements, elements+split_pos, elements+element_count, order_type_less()); |
7c673cae | 125 | break; |
92f5a8d4 | 126 | case AdaptMerge: |
b32b8144 | 127 | boost::movelib::adaptive_merge(elements, elements+split_pos, elements+element_count, order_type_less()); |
7c673cae | 128 | break; |
92f5a8d4 | 129 | case SqrtHAdaptMerge: |
b32b8144 | 130 | adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
7c673cae FG |
131 | , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)/2+1); |
132 | break; | |
92f5a8d4 | 133 | case SqrtAdaptMerge: |
b32b8144 | 134 | adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
7c673cae FG |
135 | , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); |
136 | break; | |
92f5a8d4 | 137 | case Sqrt2AdaptMerge: |
b32b8144 | 138 | adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
7c673cae FG |
139 | , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); |
140 | break; | |
92f5a8d4 | 141 | case QuartAdaptMerge: |
b32b8144 | 142 | adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
92f5a8d4 | 143 | , (element_count)/4+1); |
7c673cae FG |
144 | break; |
145 | case StdInplaceMerge: | |
b32b8144 | 146 | boost::movelib::merge_bufferless_ONlogN(elements, elements+split_pos, elements+element_count, order_type_less()); |
7c673cae | 147 | break; |
92f5a8d4 TL |
148 | case StdSqrtHAdaptMerge: |
149 | std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() | |
150 | , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)/2+1); | |
151 | break; | |
152 | case StdSqrtAdaptMerge: | |
153 | std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() | |
154 | , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); | |
155 | break; | |
156 | case StdSqrt2AdaptMerge: | |
157 | std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() | |
158 | , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); | |
159 | break; | |
160 | case StdQuartAdaptMerge: | |
161 | std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() | |
162 | , (element_count)/4+1); | |
163 | break; | |
7c673cae FG |
164 | } |
165 | timer.stop(); | |
166 | ||
b32b8144 | 167 | if(order_perf_type::num_elements == element_count){ |
7c673cae FG |
168 | std::printf(" Tmp Ok "); |
169 | } else{ | |
170 | std::printf(" Tmp KO "); | |
171 | } | |
172 | nanosecond_type new_clock = timer.elapsed().wall; | |
173 | ||
b32b8144 FG |
174 | //std::cout << "Cmp:" << order_perf_type::num_compare << " Cpy:" << order_perf_type::num_copy; //for old compilers without ll size argument |
175 | std::printf("Cmp:%8.04f Cpy:%9.04f", double(order_perf_type::num_compare)/element_count, double(order_perf_type::num_copy)/element_count ); | |
7c673cae FG |
176 | |
177 | double time = double(new_clock); | |
178 | ||
179 | const char *units = "ns"; | |
180 | if(time >= 1000000000.0){ | |
181 | time /= 1000000000.0; | |
182 | units = " s"; | |
183 | } | |
184 | else if(time >= 1000000.0){ | |
185 | time /= 1000000.0; | |
186 | units = "ms"; | |
187 | } | |
188 | else if(time >= 1000.0){ | |
189 | time /= 1000.0; | |
190 | units = "us"; | |
191 | } | |
192 | ||
193 | std::printf(" %6.02f%s (%6.02f)\n" | |
194 | , time | |
195 | , units | |
196 | , prev_clock ? double(new_clock)/double(prev_clock): 1.0); | |
197 | prev_clock = new_clock; | |
198 | bool res = is_order_type_ordered(elements, element_count, true); | |
199 | return res; | |
200 | } | |
201 | ||
202 | template<class T> | |
203 | bool measure_all(std::size_t L, std::size_t NK) | |
204 | { | |
92f5a8d4 TL |
205 | boost::container::vector<T> original_elements, elements; |
206 | std::size_t split_pos = generate_elements(original_elements, L, NK, order_type_less()); | |
7c673cae FG |
207 | std::printf("\n - - N: %u, NK: %u - -\n", (unsigned)L, (unsigned)NK); |
208 | ||
209 | nanosecond_type prev_clock = 0; | |
210 | nanosecond_type back_clock; | |
211 | bool res = true; | |
92f5a8d4 TL |
212 | |
213 | elements = original_elements; | |
214 | res = res && measure_algo(elements.data(), L, split_pos, StdMerge, prev_clock); | |
b32b8144 | 215 | back_clock = prev_clock; |
7c673cae | 216 | // |
92f5a8d4 TL |
217 | |
218 | prev_clock = back_clock; | |
219 | elements = original_elements; | |
220 | res = res && measure_algo(elements.data(), L, split_pos, QuartAdaptMerge, prev_clock); | |
221 | // | |
222 | prev_clock = back_clock; | |
223 | elements = original_elements; | |
224 | res = res && measure_algo(elements.data(), L, split_pos, StdQuartAdaptMerge, prev_clock); | |
225 | // | |
7c673cae | 226 | prev_clock = back_clock; |
92f5a8d4 TL |
227 | elements = original_elements; |
228 | res = res && measure_algo(elements.data(), L, split_pos, Sqrt2AdaptMerge, prev_clock); | |
7c673cae FG |
229 | // |
230 | prev_clock = back_clock; | |
92f5a8d4 TL |
231 | elements = original_elements; |
232 | res = res && measure_algo(elements.data(), L, split_pos, StdSqrt2AdaptMerge, prev_clock); | |
7c673cae FG |
233 | // |
234 | prev_clock = back_clock; | |
92f5a8d4 TL |
235 | elements = original_elements; |
236 | res = res && measure_algo(elements.data(), L, split_pos, SqrtAdaptMerge, prev_clock); | |
7c673cae FG |
237 | // |
238 | prev_clock = back_clock; | |
92f5a8d4 TL |
239 | elements = original_elements; |
240 | res = res && measure_algo(elements.data(), L, split_pos, StdSqrtAdaptMerge, prev_clock); | |
7c673cae FG |
241 | // |
242 | prev_clock = back_clock; | |
92f5a8d4 TL |
243 | elements = original_elements; |
244 | res = res && measure_algo(elements.data(), L, split_pos, SqrtHAdaptMerge, prev_clock); | |
7c673cae FG |
245 | // |
246 | prev_clock = back_clock; | |
92f5a8d4 TL |
247 | elements = original_elements; |
248 | res = res && measure_algo(elements.data(), L, split_pos, StdSqrtHAdaptMerge, prev_clock); | |
7c673cae | 249 | // |
92f5a8d4 TL |
250 | prev_clock = back_clock; |
251 | elements = original_elements; | |
252 | res = res && measure_algo(elements.data(), L, split_pos, AdaptMerge, prev_clock); | |
253 | // | |
254 | prev_clock = back_clock; | |
255 | elements = original_elements; | |
256 | res = res && measure_algo(elements.data(), L, split_pos,StdInplaceMerge, prev_clock); | |
257 | // | |
20effc67 TL |
258 | if (!res) |
259 | std::abort(); | |
7c673cae FG |
260 | return res; |
261 | } | |
262 | ||
263 | //Undef it to run the long test | |
264 | #define BENCH_MERGE_SHORT | |
265 | #define BENCH_SORT_UNIQUE_VALUES | |
266 | ||
267 | int main() | |
268 | { | |
7c673cae | 269 | #ifndef BENCH_SORT_UNIQUE_VALUES |
b32b8144 | 270 | measure_all<order_perf_type>(101,1); |
92f5a8d4 | 271 | measure_all<order_perf_type>(101,5); |
b32b8144 FG |
272 | measure_all<order_perf_type>(101,7); |
273 | measure_all<order_perf_type>(101,31); | |
7c673cae | 274 | #endif |
b32b8144 | 275 | measure_all<order_perf_type>(101,0); |
7c673cae FG |
276 | |
277 | // | |
278 | #ifndef BENCH_SORT_UNIQUE_VALUES | |
b32b8144 FG |
279 | measure_all<order_perf_type>(1101,1); |
280 | measure_all<order_perf_type>(1001,7); | |
281 | measure_all<order_perf_type>(1001,31); | |
282 | measure_all<order_perf_type>(1001,127); | |
283 | measure_all<order_perf_type>(1001,511); | |
7c673cae | 284 | #endif |
b32b8144 | 285 | measure_all<order_perf_type>(1001,0); |
92f5a8d4 | 286 | |
7c673cae | 287 | // |
7c673cae | 288 | #ifndef BENCH_SORT_UNIQUE_VALUES |
b32b8144 FG |
289 | measure_all<order_perf_type>(10001,65); |
290 | measure_all<order_perf_type>(10001,255); | |
291 | measure_all<order_perf_type>(10001,1023); | |
292 | measure_all<order_perf_type>(10001,4095); | |
7c673cae | 293 | #endif |
b32b8144 | 294 | measure_all<order_perf_type>(10001,0); |
7c673cae FG |
295 | |
296 | // | |
92f5a8d4 | 297 | #if defined(NDEBUG) |
7c673cae | 298 | #ifndef BENCH_SORT_UNIQUE_VALUES |
b32b8144 FG |
299 | measure_all<order_perf_type>(100001,511); |
300 | measure_all<order_perf_type>(100001,2047); | |
301 | measure_all<order_perf_type>(100001,8191); | |
302 | measure_all<order_perf_type>(100001,32767); | |
7c673cae | 303 | #endif |
b32b8144 | 304 | measure_all<order_perf_type>(100001,0); |
7c673cae FG |
305 | |
306 | // | |
92f5a8d4 | 307 | #if !defined(BENCH_MERGE_SHORT) |
7c673cae | 308 | #ifndef BENCH_SORT_UNIQUE_VALUES |
92f5a8d4 TL |
309 | measure_all<order_perf_type>(1000001, 8192); |
310 | measure_all<order_perf_type>(1000001, 32768); | |
311 | measure_all<order_perf_type>(1000001, 131072); | |
312 | measure_all<order_perf_type>(1000001, 524288); | |
7c673cae | 313 | #endif |
b32b8144 | 314 | measure_all<order_perf_type>(1000001,0); |
7c673cae | 315 | |
92f5a8d4 TL |
316 | #ifndef BENCH_SORT_UNIQUE_VALUES |
317 | measure_all<order_perf_type>(10000001, 65536); | |
318 | measure_all<order_perf_type>(10000001, 262144); | |
319 | measure_all<order_perf_type>(10000001, 1048576); | |
320 | measure_all<order_perf_type>(10000001, 4194304); | |
321 | #endif | |
322 | measure_all<order_perf_type>(10000001,0); | |
7c673cae | 323 | #endif //#ifndef BENCH_MERGE_SHORT |
92f5a8d4 | 324 | #endif //#ifdef NDEBUG |
7c673cae FG |
325 | |
326 | return 0; | |
327 | } | |
328 |