]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | ////////////////////////////////////////////////////////////////////////////// |
2 | // | |
3 | // (C) Copyright Ion Gaztanaga 2015-2016. | |
4 | // Distributed under the Boost Software License, Version 1.0. | |
5 | // (See accompanying file LICENSE_1_0.txt or copy at | |
6 | // http://www.boost.org/LICENSE_1_0.txt) | |
7 | // | |
8 | // See http://www.boost.org/libs/move for documentation. | |
9 | // | |
10 | ////////////////////////////////////////////////////////////////////////////// | |
11 | ||
92f5a8d4 TL |
12 | //#define BOOST_MOVE_ADAPTIVE_SORT_STATS |
13 | //#define BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL 2 | |
14 | ||
7c673cae FG |
15 | #include <algorithm> //std::inplace_merge |
16 | #include <cstdio> //std::printf | |
17 | #include <iostream> //std::cout | |
92f5a8d4 | 18 | #include <boost/container/vector.hpp> //boost::container::vector |
7c673cae FG |
19 | |
20 | #include <boost/config.hpp> | |
20effc67 | 21 | #include <cstdlib> |
7c673cae FG |
22 | |
23 | #include <boost/move/unique_ptr.hpp> | |
20effc67 | 24 | #include <boost/move/detail/nsec_clock.hpp> |
1e59de90 | 25 | #include <boost/move/detail/force_ptr.hpp> |
7c673cae FG |
26 | |
27 | #include "order_type.hpp" | |
b32b8144 | 28 | #include "random_shuffle.hpp" |
7c673cae | 29 | |
20effc67 TL |
30 | using boost::move_detail::cpu_timer; |
31 | using boost::move_detail::nanosecond_type; | |
7c673cae | 32 | |
7c673cae FG |
33 | void print_stats(const char *str, boost::ulong_long_type element_count) |
34 | { | |
1e59de90 TL |
35 | std::printf( "%sCmp:%8.04f Cpy:%9.04f\n", str |
36 | , double(order_perf_type::num_compare)/double(element_count) | |
37 | , double(order_perf_type::num_copy)/double(element_count)); | |
7c673cae FG |
38 | } |
39 | ||
40 | #include <boost/move/algo/adaptive_merge.hpp> | |
41 | #include <boost/move/algo/detail/merge.hpp> | |
42 | #include <boost/move/core.hpp> | |
43 | ||
44 | template<class T, class Compare> | |
92f5a8d4 | 45 | std::size_t generate_elements(boost::container::vector<T> &elements, std::size_t L, std::size_t NK, Compare comp) |
7c673cae | 46 | { |
92f5a8d4 TL |
47 | elements.resize(L); |
48 | boost::movelib::unique_ptr<std::size_t[]> key_reps(new std::size_t[NK ? NK : L]); | |
49 | ||
7c673cae | 50 | std::srand(0); |
92f5a8d4 TL |
51 | for (std::size_t i = 0; i < (NK ? NK : L); ++i) { |
52 | key_reps[i] = 0; | |
7c673cae | 53 | } |
92f5a8d4 TL |
54 | for (std::size_t i = 0; i < L; ++i) { |
55 | std::size_t key = NK ? (i % NK) : i; | |
56 | elements[i].key = key; | |
7c673cae | 57 | } |
92f5a8d4 TL |
58 | ::random_shuffle(elements.data(), elements.data() + L); |
59 | ::random_shuffle(elements.data(), elements.data() + L); | |
60 | ||
61 | for (std::size_t i = 0; i < L; ++i) { | |
7c673cae FG |
62 | elements[i].val = key_reps[elements[i].key]++; |
63 | } | |
92f5a8d4 TL |
64 | std::size_t split_count = L / 2; |
65 | std::stable_sort(elements.data(), elements.data() + split_count, comp); | |
66 | std::stable_sort(elements.data() + split_count, elements.data() + L, comp); | |
7c673cae FG |
67 | return split_count; |
68 | } | |
69 | ||
7c673cae FG |
70 | template<class T, class Compare> |
71 | void adaptive_merge_buffered(T *elements, T *mid, T *last, Compare comp, std::size_t BufLen) | |
72 | { | |
73 | boost::movelib::unique_ptr<char[]> mem(new char[sizeof(T)*BufLen]); | |
1e59de90 | 74 | boost::movelib::adaptive_merge(elements, mid, last, comp, boost::move_detail::force_ptr<T*>(mem.get()), BufLen); |
7c673cae FG |
75 | } |
76 | ||
92f5a8d4 TL |
77 | template<class T, class Compare> |
78 | void std_like_adaptive_merge_buffered(T *elements, T *mid, T *last, Compare comp, std::size_t BufLen) | |
79 | { | |
80 | boost::movelib::unique_ptr<char[]> mem(new char[sizeof(T)*BufLen]); | |
1e59de90 | 81 | boost::movelib::merge_adaptive_ONlogN(elements, mid, last, comp, boost::move_detail::force_ptr<T*>(mem.get()), BufLen); |
92f5a8d4 TL |
82 | } |
83 | ||
7c673cae FG |
84 | enum AlgoType |
85 | { | |
86 | StdMerge, | |
92f5a8d4 TL |
87 | AdaptMerge, |
88 | SqrtHAdaptMerge, | |
89 | SqrtAdaptMerge, | |
90 | Sqrt2AdaptMerge, | |
91 | QuartAdaptMerge, | |
7c673cae | 92 | StdInplaceMerge, |
92f5a8d4 TL |
93 | StdSqrtHAdaptMerge, |
94 | StdSqrtAdaptMerge, | |
95 | StdSqrt2AdaptMerge, | |
96 | StdQuartAdaptMerge, | |
7c673cae FG |
97 | MaxMerge |
98 | }; | |
99 | ||
92f5a8d4 TL |
100 | const char *AlgoNames [] = { "StdMerge " |
101 | , "AdaptMerge " | |
102 | , "SqrtHAdaptMerge " | |
103 | , "SqrtAdaptMerge " | |
104 | , "Sqrt2AdaptMerge " | |
105 | , "QuartAdaptMerge " | |
106 | , "StdInplaceMerge " | |
107 | , "StdSqrtHAdaptMerge " | |
108 | , "StdSqrtAdaptMerge " | |
109 | , "StdSqrt2AdaptMerge " | |
110 | , "StdQuartAdaptMerge " | |
7c673cae FG |
111 | }; |
112 | ||
113 | BOOST_STATIC_ASSERT((sizeof(AlgoNames)/sizeof(*AlgoNames)) == MaxMerge); | |
114 | ||
115 | template<class T> | |
92f5a8d4 | 116 | bool measure_algo(T *elements, std::size_t element_count, std::size_t split_pos, std::size_t alg, nanosecond_type &prev_clock) |
7c673cae | 117 | { |
7c673cae | 118 | std::printf("%s ", AlgoNames[alg]); |
b32b8144 FG |
119 | order_perf_type::num_compare=0; |
120 | order_perf_type::num_copy=0; | |
121 | order_perf_type::num_elements = element_count; | |
7c673cae FG |
122 | cpu_timer timer; |
123 | timer.resume(); | |
124 | switch(alg) | |
125 | { | |
126 | case StdMerge: | |
b32b8144 | 127 | std::inplace_merge(elements, elements+split_pos, elements+element_count, order_type_less()); |
7c673cae | 128 | break; |
92f5a8d4 | 129 | case AdaptMerge: |
b32b8144 | 130 | boost::movelib::adaptive_merge(elements, elements+split_pos, elements+element_count, order_type_less()); |
7c673cae | 131 | break; |
92f5a8d4 | 132 | case SqrtHAdaptMerge: |
b32b8144 | 133 | adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
7c673cae FG |
134 | , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)/2+1); |
135 | break; | |
92f5a8d4 | 136 | case SqrtAdaptMerge: |
b32b8144 | 137 | adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
7c673cae FG |
138 | , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); |
139 | break; | |
92f5a8d4 | 140 | case Sqrt2AdaptMerge: |
b32b8144 | 141 | adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
7c673cae FG |
142 | , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); |
143 | break; | |
92f5a8d4 | 144 | case QuartAdaptMerge: |
b32b8144 | 145 | adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
92f5a8d4 | 146 | , (element_count)/4+1); |
7c673cae FG |
147 | break; |
148 | case StdInplaceMerge: | |
b32b8144 | 149 | boost::movelib::merge_bufferless_ONlogN(elements, elements+split_pos, elements+element_count, order_type_less()); |
7c673cae | 150 | break; |
92f5a8d4 TL |
151 | case StdSqrtHAdaptMerge: |
152 | std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() | |
153 | , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)/2+1); | |
154 | break; | |
155 | case StdSqrtAdaptMerge: | |
156 | std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() | |
157 | , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); | |
158 | break; | |
159 | case StdSqrt2AdaptMerge: | |
160 | std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() | |
161 | , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); | |
162 | break; | |
163 | case StdQuartAdaptMerge: | |
164 | std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() | |
165 | , (element_count)/4+1); | |
166 | break; | |
7c673cae FG |
167 | } |
168 | timer.stop(); | |
169 | ||
b32b8144 | 170 | if(order_perf_type::num_elements == element_count){ |
7c673cae FG |
171 | std::printf(" Tmp Ok "); |
172 | } else{ | |
173 | std::printf(" Tmp KO "); | |
174 | } | |
175 | nanosecond_type new_clock = timer.elapsed().wall; | |
176 | ||
b32b8144 | 177 | //std::cout << "Cmp:" << order_perf_type::num_compare << " Cpy:" << order_perf_type::num_copy; //for old compilers without ll size argument |
1e59de90 | 178 | std::printf("Cmp:%8.04f Cpy:%9.04f", double(order_perf_type::num_compare)/double(element_count), double(order_perf_type::num_copy)/double(element_count) ); |
7c673cae FG |
179 | |
180 | double time = double(new_clock); | |
181 | ||
182 | const char *units = "ns"; | |
183 | if(time >= 1000000000.0){ | |
184 | time /= 1000000000.0; | |
185 | units = " s"; | |
186 | } | |
187 | else if(time >= 1000000.0){ | |
188 | time /= 1000000.0; | |
189 | units = "ms"; | |
190 | } | |
191 | else if(time >= 1000.0){ | |
192 | time /= 1000.0; | |
193 | units = "us"; | |
194 | } | |
195 | ||
196 | std::printf(" %6.02f%s (%6.02f)\n" | |
197 | , time | |
198 | , units | |
199 | , prev_clock ? double(new_clock)/double(prev_clock): 1.0); | |
200 | prev_clock = new_clock; | |
201 | bool res = is_order_type_ordered(elements, element_count, true); | |
202 | return res; | |
203 | } | |
204 | ||
205 | template<class T> | |
206 | bool measure_all(std::size_t L, std::size_t NK) | |
207 | { | |
92f5a8d4 TL |
208 | boost::container::vector<T> original_elements, elements; |
209 | std::size_t split_pos = generate_elements(original_elements, L, NK, order_type_less()); | |
7c673cae FG |
210 | std::printf("\n - - N: %u, NK: %u - -\n", (unsigned)L, (unsigned)NK); |
211 | ||
212 | nanosecond_type prev_clock = 0; | |
213 | nanosecond_type back_clock; | |
214 | bool res = true; | |
92f5a8d4 TL |
215 | |
216 | elements = original_elements; | |
217 | res = res && measure_algo(elements.data(), L, split_pos, StdMerge, prev_clock); | |
b32b8144 | 218 | back_clock = prev_clock; |
7c673cae | 219 | // |
92f5a8d4 TL |
220 | |
221 | prev_clock = back_clock; | |
222 | elements = original_elements; | |
223 | res = res && measure_algo(elements.data(), L, split_pos, QuartAdaptMerge, prev_clock); | |
224 | // | |
225 | prev_clock = back_clock; | |
226 | elements = original_elements; | |
227 | res = res && measure_algo(elements.data(), L, split_pos, StdQuartAdaptMerge, prev_clock); | |
228 | // | |
7c673cae | 229 | prev_clock = back_clock; |
92f5a8d4 TL |
230 | elements = original_elements; |
231 | res = res && measure_algo(elements.data(), L, split_pos, Sqrt2AdaptMerge, prev_clock); | |
7c673cae FG |
232 | // |
233 | prev_clock = back_clock; | |
92f5a8d4 TL |
234 | elements = original_elements; |
235 | res = res && measure_algo(elements.data(), L, split_pos, StdSqrt2AdaptMerge, prev_clock); | |
7c673cae FG |
236 | // |
237 | prev_clock = back_clock; | |
92f5a8d4 TL |
238 | elements = original_elements; |
239 | res = res && measure_algo(elements.data(), L, split_pos, SqrtAdaptMerge, prev_clock); | |
7c673cae FG |
240 | // |
241 | prev_clock = back_clock; | |
92f5a8d4 TL |
242 | elements = original_elements; |
243 | res = res && measure_algo(elements.data(), L, split_pos, StdSqrtAdaptMerge, prev_clock); | |
7c673cae FG |
244 | // |
245 | prev_clock = back_clock; | |
92f5a8d4 TL |
246 | elements = original_elements; |
247 | res = res && measure_algo(elements.data(), L, split_pos, SqrtHAdaptMerge, prev_clock); | |
7c673cae FG |
248 | // |
249 | prev_clock = back_clock; | |
92f5a8d4 TL |
250 | elements = original_elements; |
251 | res = res && measure_algo(elements.data(), L, split_pos, StdSqrtHAdaptMerge, prev_clock); | |
7c673cae | 252 | // |
92f5a8d4 TL |
253 | prev_clock = back_clock; |
254 | elements = original_elements; | |
255 | res = res && measure_algo(elements.data(), L, split_pos, AdaptMerge, prev_clock); | |
256 | // | |
257 | prev_clock = back_clock; | |
258 | elements = original_elements; | |
259 | res = res && measure_algo(elements.data(), L, split_pos,StdInplaceMerge, prev_clock); | |
260 | // | |
20effc67 TL |
261 | if (!res) |
262 | std::abort(); | |
7c673cae FG |
263 | return res; |
264 | } | |
265 | ||
266 | //Undef it to run the long test | |
267 | #define BENCH_MERGE_SHORT | |
268 | #define BENCH_SORT_UNIQUE_VALUES | |
269 | ||
270 | int main() | |
271 | { | |
7c673cae | 272 | #ifndef BENCH_SORT_UNIQUE_VALUES |
b32b8144 | 273 | measure_all<order_perf_type>(101,1); |
92f5a8d4 | 274 | measure_all<order_perf_type>(101,5); |
b32b8144 FG |
275 | measure_all<order_perf_type>(101,7); |
276 | measure_all<order_perf_type>(101,31); | |
7c673cae | 277 | #endif |
b32b8144 | 278 | measure_all<order_perf_type>(101,0); |
7c673cae FG |
279 | |
280 | // | |
281 | #ifndef BENCH_SORT_UNIQUE_VALUES | |
b32b8144 FG |
282 | measure_all<order_perf_type>(1101,1); |
283 | measure_all<order_perf_type>(1001,7); | |
284 | measure_all<order_perf_type>(1001,31); | |
285 | measure_all<order_perf_type>(1001,127); | |
286 | measure_all<order_perf_type>(1001,511); | |
7c673cae | 287 | #endif |
b32b8144 | 288 | measure_all<order_perf_type>(1001,0); |
92f5a8d4 | 289 | |
7c673cae | 290 | // |
7c673cae | 291 | #ifndef BENCH_SORT_UNIQUE_VALUES |
b32b8144 FG |
292 | measure_all<order_perf_type>(10001,65); |
293 | measure_all<order_perf_type>(10001,255); | |
294 | measure_all<order_perf_type>(10001,1023); | |
295 | measure_all<order_perf_type>(10001,4095); | |
7c673cae | 296 | #endif |
b32b8144 | 297 | measure_all<order_perf_type>(10001,0); |
7c673cae FG |
298 | |
299 | // | |
92f5a8d4 | 300 | #if defined(NDEBUG) |
7c673cae | 301 | #ifndef BENCH_SORT_UNIQUE_VALUES |
b32b8144 FG |
302 | measure_all<order_perf_type>(100001,511); |
303 | measure_all<order_perf_type>(100001,2047); | |
304 | measure_all<order_perf_type>(100001,8191); | |
305 | measure_all<order_perf_type>(100001,32767); | |
7c673cae | 306 | #endif |
b32b8144 | 307 | measure_all<order_perf_type>(100001,0); |
7c673cae FG |
308 | |
309 | // | |
92f5a8d4 | 310 | #if !defined(BENCH_MERGE_SHORT) |
7c673cae | 311 | #ifndef BENCH_SORT_UNIQUE_VALUES |
92f5a8d4 TL |
312 | measure_all<order_perf_type>(1000001, 8192); |
313 | measure_all<order_perf_type>(1000001, 32768); | |
314 | measure_all<order_perf_type>(1000001, 131072); | |
315 | measure_all<order_perf_type>(1000001, 524288); | |
7c673cae | 316 | #endif |
b32b8144 | 317 | measure_all<order_perf_type>(1000001,0); |
7c673cae | 318 | |
92f5a8d4 TL |
319 | #ifndef BENCH_SORT_UNIQUE_VALUES |
320 | measure_all<order_perf_type>(10000001, 65536); | |
321 | measure_all<order_perf_type>(10000001, 262144); | |
322 | measure_all<order_perf_type>(10000001, 1048576); | |
323 | measure_all<order_perf_type>(10000001, 4194304); | |
324 | #endif | |
325 | measure_all<order_perf_type>(10000001,0); | |
7c673cae | 326 | #endif //#ifndef BENCH_MERGE_SHORT |
92f5a8d4 | 327 | #endif //#ifdef NDEBUG |
7c673cae FG |
328 | |
329 | return 0; | |
330 | } | |
331 |