]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /********************************************************************** |
2 | Copyright(c) 2011-2015 Intel Corporation All rights reserved. | |
3 | ||
4 | Redistribution and use in source and binary forms, with or without | |
f91f0fd5 | 5 | modification, are permitted provided that the following conditions |
7c673cae FG |
6 | are met: |
7 | * Redistributions of source code must retain the above copyright | |
8 | notice, this list of conditions and the following disclaimer. | |
9 | * Redistributions in binary form must reproduce the above copyright | |
10 | notice, this list of conditions and the following disclaimer in | |
11 | the documentation and/or other materials provided with the | |
12 | distribution. | |
13 | * Neither the name of Intel Corporation nor the names of its | |
14 | contributors may be used to endorse or promote products derived | |
15 | from this software without specific prior written permission. | |
16 | ||
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | **********************************************************************/ | |
29 | ||
7c673cae FG |
30 | #ifndef _TEST_H |
31 | #define _TEST_H | |
32 | ||
33 | #ifdef __cplusplus | |
34 | extern "C" { | |
35 | #endif | |
36 | ||
f91f0fd5 TL |
37 | #include <stdio.h> |
38 | #include <stdint.h> | |
39 | ||
40 | #ifdef _MSC_VER | |
41 | # define inline __inline | |
42 | #endif | |
43 | ||
44 | /* Decide wether to use benchmark time as an approximation or a minimum. Fewer | |
45 | * calls to the timer are required for the approximation case.*/ | |
46 | #define BENCHMARK_MIN_TIME 0 | |
47 | #define BENCHMARK_APPROX_TIME 1 | |
48 | #ifndef BENCHMARK_TYPE | |
49 | #define BENCHMARK_TYPE BENCHMARK_MIN_TIME | |
50 | #endif | |
51 | ||
52 | #ifdef USE_RDTSC | |
53 | /* The use of rtdsc is nuanced. On many processors it corresponds to a | |
54 | * standardized clock source. To obtain a meaningful result it may be | |
55 | * necessary to fix the CPU clock to match the rtdsc tick rate. | |
56 | */ | |
57 | # include <inttypes.h> | |
58 | # include <x86intrin.h> | |
59 | # define USE_CYCLES | |
60 | #else | |
61 | # include <time.h> | |
62 | #define USE_SECONDS | |
63 | #endif | |
7c673cae | 64 | |
f91f0fd5 TL |
65 | #ifdef USE_RDTSC |
66 | #ifndef BENCHMARK_TIME | |
67 | # define BENCHMARK_TIME 6 | |
68 | #endif | |
69 | # define GHZ 1000000000 | |
70 | # define UNIT_SCALE (GHZ) | |
71 | # define CALLIBRATE_TIME (UNIT_SCALE / 2) | |
72 | static inline long long get_time(void) { | |
73 | unsigned int dummy; | |
74 | return __rdtscp(&dummy); | |
75 | } | |
76 | ||
77 | static inline long long get_res(void) { | |
78 | return 1; | |
79 | } | |
80 | #else | |
81 | #ifndef BENCHMARK_TIME | |
82 | # define BENCHMARK_TIME 3 | |
83 | #endif | |
84 | #ifdef _MSC_VER | |
85 | #define UNIT_SCALE get_res() | |
86 | #define CALLIBRATE_TIME (UNIT_SCALE / 4) | |
87 | static inline long long get_time(void) { | |
88 | long long ret = 0; | |
89 | QueryPerformanceCounter(&ret); | |
90 | return ret; | |
91 | } | |
7c673cae | 92 | |
f91f0fd5 TL |
93 | static inline long long get_res(void) { |
94 | long long ret = 0; | |
95 | QueryPerformanceFrequency(&ret); | |
96 | return ret; | |
97 | } | |
98 | #else | |
99 | # define NANO_SCALE 1000000000 | |
100 | # define UNIT_SCALE NANO_SCALE | |
101 | # define CALLIBRATE_TIME (UNIT_SCALE / 4) | |
102 | #ifdef __FreeBSD__ | |
103 | # define CLOCK_ID CLOCK_MONOTONIC_PRECISE | |
104 | #else | |
105 | # define CLOCK_ID CLOCK_MONOTONIC | |
106 | #endif | |
107 | ||
108 | static inline long long get_time(void) { | |
109 | struct timespec time; | |
110 | long long nano_total; | |
111 | clock_gettime(CLOCK_ID, &time); | |
112 | nano_total = time.tv_sec; | |
113 | nano_total *= NANO_SCALE; | |
114 | nano_total += time.tv_nsec; | |
115 | return nano_total; | |
116 | } | |
117 | ||
118 | static inline long long get_res(void) { | |
119 | struct timespec time; | |
120 | long long nano_total; | |
121 | clock_getres(CLOCK_ID, &time); | |
122 | nano_total = time.tv_sec; | |
123 | nano_total *= NANO_SCALE; | |
124 | nano_total += time.tv_nsec; | |
125 | return nano_total; | |
126 | } | |
127 | #endif | |
128 | #endif | |
129 | struct perf { | |
130 | long long start; | |
131 | long long stop; | |
132 | long long run_total; | |
133 | long long iterations; | |
7c673cae FG |
134 | }; |
135 | ||
f91f0fd5 TL |
136 | static inline void perf_init(struct perf *p) { |
137 | p->start = 0; | |
138 | p->stop = 0; | |
139 | p->run_total = 0; | |
140 | } | |
7c673cae | 141 | |
f91f0fd5 TL |
142 | static inline void perf_continue(struct perf *p) { |
143 | p->start = get_time(); | |
7c673cae | 144 | } |
f91f0fd5 TL |
145 | |
146 | static inline void perf_pause(struct perf *p) { | |
147 | p->stop = get_time(); | |
148 | p->run_total = p->run_total + p->stop - p->start; | |
149 | p->start = p->stop; | |
7c673cae FG |
150 | } |
151 | ||
f91f0fd5 TL |
152 | static inline void perf_start(struct perf *p) { |
153 | perf_init(p); | |
154 | perf_continue(p); | |
155 | } | |
7c673cae | 156 | |
f91f0fd5 TL |
157 | static inline void perf_stop(struct perf *p) { |
158 | perf_pause(p); | |
159 | } | |
160 | ||
161 | static inline double get_time_elapsed(struct perf *p) { | |
162 | return 1.0 * p->run_total / UNIT_SCALE; | |
163 | } | |
164 | ||
165 | static inline long long get_base_elapsed(struct perf *p) { | |
166 | return p->run_total; | |
167 | } | |
168 | ||
169 | static inline unsigned long long estimate_perf_iterations(struct perf *p, | |
170 | unsigned long long runs, | |
171 | unsigned long long total) { | |
172 | total = total * runs; | |
173 | if (get_base_elapsed(p) > 0) | |
174 | return (total + get_base_elapsed(p) - 1) / get_base_elapsed(p); | |
175 | else | |
176 | return (total + get_res() - 1) / get_res(); | |
177 | } | |
178 | ||
179 | #define CALLIBRATE(PERF, FUNC_CALL) { \ | |
180 | unsigned long long _i, _iter = 1; \ | |
181 | perf_start(PERF); \ | |
182 | FUNC_CALL; \ | |
183 | perf_pause(PERF); \ | |
184 | \ | |
185 | while (get_base_elapsed(PERF) < CALLIBRATE_TIME) { \ | |
186 | _iter = estimate_perf_iterations(PERF, _iter, \ | |
187 | 2 * CALLIBRATE_TIME); \ | |
188 | perf_start(PERF); \ | |
189 | for (_i = 0; _i < _iter; _i++) { \ | |
190 | FUNC_CALL; \ | |
191 | } \ | |
192 | perf_stop(PERF); \ | |
193 | } \ | |
194 | (PERF)->iterations=_iter; \ | |
195 | } | |
196 | ||
197 | #define PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL) { \ | |
198 | unsigned long long _i, _iter = (PERF)->iterations; \ | |
199 | unsigned long long _run_total = RUN_TIME; \ | |
200 | _run_total *= UNIT_SCALE; \ | |
201 | _iter = estimate_perf_iterations(PERF, _iter, _run_total);\ | |
202 | (PERF)->iterations = 0; \ | |
203 | perf_start(PERF); \ | |
204 | for (_i = 0; _i < _iter; _i++) { \ | |
205 | FUNC_CALL; \ | |
206 | } \ | |
207 | perf_pause(PERF); \ | |
208 | (PERF)->iterations += _iter; \ | |
209 | \ | |
210 | if(get_base_elapsed(PERF) < _run_total && \ | |
211 | BENCHMARK_TYPE == BENCHMARK_MIN_TIME) { \ | |
212 | _iter = estimate_perf_iterations(PERF, _iter, \ | |
213 | _run_total - get_base_elapsed(PERF) + \ | |
214 | (UNIT_SCALE / 16)); \ | |
215 | perf_continue(PERF); \ | |
216 | for (_i = 0; _i < _iter; _i++) { \ | |
217 | FUNC_CALL; \ | |
218 | } \ | |
219 | perf_pause(PERF); \ | |
220 | (PERF)->iterations += _iter; \ | |
221 | } \ | |
222 | } | |
223 | ||
224 | #define BENCHMARK(PERF, RUN_TIME, FUNC_CALL) { \ | |
225 | if((RUN_TIME) > 0) { \ | |
226 | CALLIBRATE(PERF, FUNC_CALL); \ | |
227 | PERFORMANCE_TEST(PERF, RUN_TIME, FUNC_CALL); \ | |
228 | \ | |
229 | } else { \ | |
230 | (PERF)->iterations = 1; \ | |
231 | perf_start(PERF); \ | |
232 | FUNC_CALL; \ | |
233 | perf_stop(PERF); \ | |
234 | } \ | |
235 | } | |
236 | ||
237 | #ifdef USE_CYCLES | |
238 | static inline void perf_print(struct perf p, long long unit_count) { | |
239 | long long total_units = p.iterations * unit_count; | |
240 | ||
241 | printf("runtime = %10lld ticks", get_base_elapsed(&p)); | |
242 | if (total_units != 0) { | |
243 | printf(", bandwidth %lld MB in %.4f GC = %.2f ticks/byte", | |
244 | total_units / (1000000), get_time_elapsed(&p), | |
245 | get_base_elapsed(&p) / (double)total_units); | |
246 | } | |
247 | printf("\n"); | |
248 | } | |
7c673cae | 249 | #else |
f91f0fd5 TL |
250 | static inline void perf_print(struct perf p, double unit_count) { |
251 | long long total_units = p.iterations * unit_count; | |
252 | long long usecs = (long long)(get_time_elapsed(&p) * 1000000); | |
253 | ||
254 | printf("runtime = %10lld usecs", usecs); | |
255 | if (total_units != 0) { | |
256 | printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s", | |
257 | total_units / (1000000), get_time_elapsed(&p), | |
258 | ((double)total_units) / (1000000 * get_time_elapsed(&p))); | |
7c673cae | 259 | } |
f91f0fd5 | 260 | printf("\n"); |
7c673cae | 261 | } |
f91f0fd5 | 262 | #endif |
7c673cae | 263 | |
f91f0fd5 TL |
264 | static inline uint64_t get_filesize(FILE * fp) { |
265 | uint64_t file_size; | |
266 | fpos_t pos, pos_curr; | |
267 | ||
268 | fgetpos(fp, &pos_curr); /* Save current position */ | |
269 | #if defined(_WIN32) || defined(_WIN64) | |
270 | _fseeki64(fp, 0, SEEK_END); | |
271 | #else | |
272 | fseeko(fp, 0, SEEK_END); | |
273 | #endif | |
274 | fgetpos(fp, &pos); | |
275 | file_size = *(uint64_t *) & pos; | |
276 | fsetpos(fp, &pos_curr); /* Restore position */ | |
277 | ||
278 | return file_size; | |
279 | } | |
7c673cae FG |
280 | |
281 | #ifdef __cplusplus | |
282 | } | |
283 | #endif | |
284 | ||
285 | #endif // _TEST_H |