]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | ////////////////////////////////////////////////////////////////////////////// |
2 | // | |
3 | // (C) Copyright Ion Gaztanaga 2015-2016. | |
4 | // Distributed under the Boost Software License, Version 1.0. | |
5 | // (See accompanying file LICENSE_1_0.txt or copy at | |
6 | // http://www.boost.org/LICENSE_1_0.txt) | |
7 | // | |
8 | // See http://www.boost.org/libs/move for documentation. | |
9 | // | |
10 | ////////////////////////////////////////////////////////////////////////////// | |
11 | ||
12 | #include <algorithm> //std::inplace_merge | |
13 | #include <cstdio> //std::printf | |
14 | #include <iostream> //std::cout | |
15 | ||
16 | #include <boost/config.hpp> | |
17 | ||
18 | #include <boost/move/unique_ptr.hpp> | |
19 | #include <boost/timer/timer.hpp> | |
20 | ||
21 | #include "order_type.hpp" | |
b32b8144 | 22 | #include "random_shuffle.hpp" |
7c673cae FG |
23 | |
24 | using boost::timer::cpu_timer; | |
25 | using boost::timer::cpu_times; | |
26 | using boost::timer::nanosecond_type; | |
27 | ||
28 | //#define BOOST_MOVE_ADAPTIVE_SORT_STATS | |
11fdf7f2 | 29 | //#define BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL 2 |
7c673cae FG |
30 | void print_stats(const char *str, boost::ulong_long_type element_count) |
31 | { | |
b32b8144 | 32 | std::printf("%sCmp:%8.04f Cpy:%9.04f\n", str, double(order_perf_type::num_compare)/element_count, double(order_perf_type::num_copy)/element_count ); |
7c673cae FG |
33 | } |
34 | ||
35 | #include <boost/move/algo/adaptive_merge.hpp> | |
36 | #include <boost/move/algo/detail/merge.hpp> | |
37 | #include <boost/move/core.hpp> | |
38 | ||
39 | template<class T, class Compare> | |
40 | std::size_t generate_elements(T elements[], std::size_t element_count, std::size_t key_reps[], std::size_t key_len, Compare comp) | |
41 | { | |
42 | std::srand(0); | |
43 | for(std::size_t i = 0; i < (key_len ? key_len : element_count); ++i){ | |
44 | key_reps[i]=0; | |
45 | } | |
46 | for(std::size_t i=0; i < element_count; ++i){ | |
47 | std::size_t key = key_len ? (i % key_len) : i; | |
48 | elements[i].key=key; | |
49 | } | |
b32b8144 FG |
50 | ::random_shuffle(elements, elements + element_count); |
51 | ::random_shuffle(elements, elements + element_count); | |
52 | ::random_shuffle(elements, elements + element_count); | |
7c673cae FG |
53 | for(std::size_t i = 0; i < element_count; ++i){ |
54 | elements[i].val = key_reps[elements[i].key]++; | |
55 | } | |
56 | std::size_t split_count = element_count/2; | |
57 | std::stable_sort(elements, elements+split_count, comp); | |
58 | std::stable_sort(elements+split_count, elements+element_count, comp); | |
59 | return split_count; | |
60 | } | |
61 | ||
62 | ||
63 | ||
64 | template<class T, class Compare> | |
65 | void adaptive_merge_buffered(T *elements, T *mid, T *last, Compare comp, std::size_t BufLen) | |
66 | { | |
67 | boost::movelib::unique_ptr<char[]> mem(new char[sizeof(T)*BufLen]); | |
68 | boost::movelib::adaptive_merge(elements, mid, last, comp, reinterpret_cast<T*>(mem.get()), BufLen); | |
69 | } | |
70 | ||
71 | enum AlgoType | |
72 | { | |
73 | StdMerge, | |
74 | AdaptiveMerge, | |
75 | SqrtHAdaptiveMerge, | |
76 | SqrtAdaptiveMerge, | |
77 | Sqrt2AdaptiveMerge, | |
78 | QuartAdaptiveMerge, | |
79 | StdInplaceMerge, | |
80 | MaxMerge | |
81 | }; | |
82 | ||
83 | const char *AlgoNames [] = { "StdMerge " | |
84 | , "AdaptMerge " | |
85 | , "SqrtHAdaptMerge " | |
86 | , "SqrtAdaptMerge " | |
87 | , "Sqrt2AdaptMerge " | |
11fdf7f2 | 88 | , "QuartAdaptMerge " |
7c673cae FG |
89 | , "StdInplaceMerge " |
90 | }; | |
91 | ||
92 | BOOST_STATIC_ASSERT((sizeof(AlgoNames)/sizeof(*AlgoNames)) == MaxMerge); | |
93 | ||
94 | template<class T> | |
95 | bool measure_algo(T *elements, std::size_t key_reps[], std::size_t element_count, std::size_t key_len, unsigned alg, nanosecond_type &prev_clock) | |
96 | { | |
b32b8144 | 97 | std::size_t const split_pos = generate_elements(elements, element_count, key_reps, key_len, order_type_less()); |
7c673cae FG |
98 | |
99 | std::printf("%s ", AlgoNames[alg]); | |
b32b8144 FG |
100 | order_perf_type::num_compare=0; |
101 | order_perf_type::num_copy=0; | |
102 | order_perf_type::num_elements = element_count; | |
7c673cae FG |
103 | cpu_timer timer; |
104 | timer.resume(); | |
105 | switch(alg) | |
106 | { | |
107 | case StdMerge: | |
b32b8144 | 108 | std::inplace_merge(elements, elements+split_pos, elements+element_count, order_type_less()); |
7c673cae FG |
109 | break; |
110 | case AdaptiveMerge: | |
b32b8144 | 111 | boost::movelib::adaptive_merge(elements, elements+split_pos, elements+element_count, order_type_less()); |
7c673cae FG |
112 | break; |
113 | case SqrtHAdaptiveMerge: | |
b32b8144 | 114 | adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
7c673cae FG |
115 | , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)/2+1); |
116 | break; | |
117 | case SqrtAdaptiveMerge: | |
b32b8144 | 118 | adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
7c673cae FG |
119 | , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); |
120 | break; | |
121 | case Sqrt2AdaptiveMerge: | |
b32b8144 | 122 | adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
7c673cae FG |
123 | , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)); |
124 | break; | |
125 | case QuartAdaptiveMerge: | |
b32b8144 | 126 | adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less() |
7c673cae FG |
127 | , (element_count-1)/4+1); |
128 | break; | |
129 | case StdInplaceMerge: | |
b32b8144 | 130 | boost::movelib::merge_bufferless_ONlogN(elements, elements+split_pos, elements+element_count, order_type_less()); |
7c673cae FG |
131 | break; |
132 | } | |
133 | timer.stop(); | |
134 | ||
b32b8144 | 135 | if(order_perf_type::num_elements == element_count){ |
7c673cae FG |
136 | std::printf(" Tmp Ok "); |
137 | } else{ | |
138 | std::printf(" Tmp KO "); | |
139 | } | |
140 | nanosecond_type new_clock = timer.elapsed().wall; | |
141 | ||
b32b8144 FG |
142 | //std::cout << "Cmp:" << order_perf_type::num_compare << " Cpy:" << order_perf_type::num_copy; //for old compilers without ll size argument |
143 | std::printf("Cmp:%8.04f Cpy:%9.04f", double(order_perf_type::num_compare)/element_count, double(order_perf_type::num_copy)/element_count ); | |
7c673cae FG |
144 | |
145 | double time = double(new_clock); | |
146 | ||
147 | const char *units = "ns"; | |
148 | if(time >= 1000000000.0){ | |
149 | time /= 1000000000.0; | |
150 | units = " s"; | |
151 | } | |
152 | else if(time >= 1000000.0){ | |
153 | time /= 1000000.0; | |
154 | units = "ms"; | |
155 | } | |
156 | else if(time >= 1000.0){ | |
157 | time /= 1000.0; | |
158 | units = "us"; | |
159 | } | |
160 | ||
161 | std::printf(" %6.02f%s (%6.02f)\n" | |
162 | , time | |
163 | , units | |
164 | , prev_clock ? double(new_clock)/double(prev_clock): 1.0); | |
165 | prev_clock = new_clock; | |
166 | bool res = is_order_type_ordered(elements, element_count, true); | |
167 | return res; | |
168 | } | |
169 | ||
170 | template<class T> | |
171 | bool measure_all(std::size_t L, std::size_t NK) | |
172 | { | |
173 | boost::movelib::unique_ptr<T[]> pdata(new T[L]); | |
174 | boost::movelib::unique_ptr<std::size_t[]> pkeys(new std::size_t[NK ? NK : L]); | |
175 | T *A = pdata.get(); | |
176 | std::size_t *Keys = pkeys.get(); | |
177 | std::printf("\n - - N: %u, NK: %u - -\n", (unsigned)L, (unsigned)NK); | |
178 | ||
179 | nanosecond_type prev_clock = 0; | |
180 | nanosecond_type back_clock; | |
181 | bool res = true; | |
182 | res = res && measure_algo(A,Keys,L,NK,StdMerge, prev_clock); | |
b32b8144 | 183 | back_clock = prev_clock; |
7c673cae FG |
184 | // |
185 | prev_clock = back_clock; | |
b32b8144 | 186 | res = res && measure_algo(A,Keys,L,NK,QuartAdaptiveMerge, prev_clock); |
7c673cae FG |
187 | // |
188 | prev_clock = back_clock; | |
189 | res = res && measure_algo(A,Keys,L,NK,Sqrt2AdaptiveMerge, prev_clock); | |
190 | // | |
191 | prev_clock = back_clock; | |
192 | res = res && measure_algo(A,Keys,L,NK,SqrtAdaptiveMerge, prev_clock); | |
193 | // | |
194 | prev_clock = back_clock; | |
195 | res = res && measure_algo(A,Keys,L,NK,SqrtHAdaptiveMerge, prev_clock); | |
196 | // | |
197 | prev_clock = back_clock; | |
198 | res = res && measure_algo(A,Keys,L,NK,AdaptiveMerge, prev_clock); | |
199 | // | |
200 | prev_clock = back_clock; | |
201 | res = res && measure_algo(A,Keys,L,NK,StdInplaceMerge, prev_clock); | |
202 | // | |
203 | if(!res) | |
204 | throw int(0); | |
205 | return res; | |
206 | } | |
207 | ||
208 | //Undef it to run the long test | |
209 | #define BENCH_MERGE_SHORT | |
210 | #define BENCH_SORT_UNIQUE_VALUES | |
211 | ||
212 | int main() | |
213 | { | |
214 | try{ | |
215 | #ifndef BENCH_SORT_UNIQUE_VALUES | |
b32b8144 FG |
216 | measure_all<order_perf_type>(101,1); |
217 | measure_all<order_perf_type>(101,7); | |
218 | measure_all<order_perf_type>(101,31); | |
7c673cae | 219 | #endif |
b32b8144 | 220 | measure_all<order_perf_type>(101,0); |
7c673cae FG |
221 | |
222 | // | |
223 | #ifndef BENCH_SORT_UNIQUE_VALUES | |
b32b8144 FG |
224 | measure_all<order_perf_type>(1101,1); |
225 | measure_all<order_perf_type>(1001,7); | |
226 | measure_all<order_perf_type>(1001,31); | |
227 | measure_all<order_perf_type>(1001,127); | |
228 | measure_all<order_perf_type>(1001,511); | |
7c673cae | 229 | #endif |
b32b8144 | 230 | measure_all<order_perf_type>(1001,0); |
7c673cae FG |
231 | // |
232 | #ifndef BENCH_MERGE_SHORT | |
233 | #ifndef BENCH_SORT_UNIQUE_VALUES | |
b32b8144 FG |
234 | measure_all<order_perf_type>(10001,65); |
235 | measure_all<order_perf_type>(10001,255); | |
236 | measure_all<order_perf_type>(10001,1023); | |
237 | measure_all<order_perf_type>(10001,4095); | |
7c673cae | 238 | #endif |
b32b8144 | 239 | measure_all<order_perf_type>(10001,0); |
7c673cae FG |
240 | |
241 | // | |
242 | #ifndef BENCH_SORT_UNIQUE_VALUES | |
b32b8144 FG |
243 | measure_all<order_perf_type>(100001,511); |
244 | measure_all<order_perf_type>(100001,2047); | |
245 | measure_all<order_perf_type>(100001,8191); | |
246 | measure_all<order_perf_type>(100001,32767); | |
7c673cae | 247 | #endif |
b32b8144 | 248 | measure_all<order_perf_type>(100001,0); |
7c673cae FG |
249 | |
250 | // | |
251 | #ifdef NDEBUG | |
252 | #ifndef BENCH_SORT_UNIQUE_VALUES | |
b32b8144 FG |
253 | measure_all<order_perf_type>(1000001,1); |
254 | measure_all<order_perf_type>(1000001,1024); | |
255 | measure_all<order_perf_type>(1000001,32768); | |
256 | measure_all<order_perf_type>(1000001,524287); | |
7c673cae | 257 | #endif |
b32b8144 FG |
258 | measure_all<order_perf_type>(1000001,0); |
259 | measure_all<order_perf_type>(3000001,0); | |
11fdf7f2 | 260 | measure_all<order_perf_type>(5000001,0); |
7c673cae FG |
261 | #endif //NDEBUG |
262 | ||
263 | #endif //#ifndef BENCH_MERGE_SHORT | |
264 | ||
b32b8144 | 265 | //measure_all<order_perf_type>(100000001,0); |
7c673cae FG |
266 | } |
267 | catch(...) | |
268 | { | |
269 | return 1; | |
270 | } | |
271 | ||
272 | return 0; | |
273 | } | |
274 |