]>
Commit | Line | Data |
---|---|---|
bf3afd5f EC |
1 | /* |
2 | * qdist.c - QEMU helpers for handling frequency distributions of data. | |
3 | * | |
4 | * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> | |
5 | * | |
6 | * License: GNU GPL, version 2 or later. | |
7 | * See the COPYING file in the top-level directory. | |
8 | */ | |
9 | #include "qemu/qdist.h" | |
10 | ||
11 | #include <math.h> | |
12 | #ifndef NAN | |
13 | #define NAN (0.0 / 0.0) | |
14 | #endif | |
15 | ||
16 | void qdist_init(struct qdist *dist) | |
17 | { | |
18 | dist->entries = g_malloc(sizeof(*dist->entries)); | |
19 | dist->size = 1; | |
20 | dist->n = 0; | |
21 | } | |
22 | ||
23 | void qdist_destroy(struct qdist *dist) | |
24 | { | |
25 | g_free(dist->entries); | |
26 | } | |
27 | ||
28 | static inline int qdist_cmp_double(double a, double b) | |
29 | { | |
30 | if (a > b) { | |
31 | return 1; | |
32 | } else if (a < b) { | |
33 | return -1; | |
34 | } | |
35 | return 0; | |
36 | } | |
37 | ||
38 | static int qdist_cmp(const void *ap, const void *bp) | |
39 | { | |
40 | const struct qdist_entry *a = ap; | |
41 | const struct qdist_entry *b = bp; | |
42 | ||
43 | return qdist_cmp_double(a->x, b->x); | |
44 | } | |
45 | ||
46 | void qdist_add(struct qdist *dist, double x, long count) | |
47 | { | |
48 | struct qdist_entry *entry = NULL; | |
49 | ||
50 | if (dist->n) { | |
51 | struct qdist_entry e; | |
52 | ||
53 | e.x = x; | |
54 | entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp); | |
55 | } | |
56 | ||
57 | if (entry) { | |
58 | entry->count += count; | |
59 | return; | |
60 | } | |
61 | ||
62 | if (unlikely(dist->n == dist->size)) { | |
63 | dist->size *= 2; | |
64 | dist->entries = g_realloc(dist->entries, | |
65 | sizeof(*dist->entries) * (dist->size)); | |
66 | } | |
67 | dist->n++; | |
68 | entry = &dist->entries[dist->n - 1]; | |
69 | entry->x = x; | |
70 | entry->count = count; | |
71 | qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp); | |
72 | } | |
73 | ||
74 | void qdist_inc(struct qdist *dist, double x) | |
75 | { | |
76 | qdist_add(dist, x, 1); | |
77 | } | |
78 | ||
79 | /* | |
80 | * Unicode for block elements. See: | |
81 | * https://en.wikipedia.org/wiki/Block_Elements | |
82 | */ | |
83 | static const gunichar qdist_blocks[] = { | |
84 | 0x2581, | |
85 | 0x2582, | |
86 | 0x2583, | |
87 | 0x2584, | |
88 | 0x2585, | |
89 | 0x2586, | |
90 | 0x2587, | |
91 | 0x2588 | |
92 | }; | |
93 | ||
94 | #define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks) | |
95 | ||
96 | /* | |
97 | * Print a distribution into a string. | |
98 | * | |
99 | * This function assumes that appropriate binning has been done on the input; | |
100 | * see qdist_bin__internal() and qdist_pr_plain(). | |
101 | * | |
102 | * Callers must free the returned string with g_free(). | |
103 | */ | |
104 | static char *qdist_pr_internal(const struct qdist *dist) | |
105 | { | |
106 | double min, max; | |
107 | GString *s = g_string_new(""); | |
108 | size_t i; | |
109 | ||
110 | /* if only one entry, its printout will be either full or empty */ | |
111 | if (dist->n == 1) { | |
112 | if (dist->entries[0].count) { | |
113 | g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]); | |
114 | } else { | |
115 | g_string_append_c(s, ' '); | |
116 | } | |
117 | goto out; | |
118 | } | |
119 | ||
120 | /* get min and max counts */ | |
121 | min = dist->entries[0].count; | |
122 | max = min; | |
123 | for (i = 0; i < dist->n; i++) { | |
124 | struct qdist_entry *e = &dist->entries[i]; | |
125 | ||
126 | if (e->count < min) { | |
127 | min = e->count; | |
128 | } | |
129 | if (e->count > max) { | |
130 | max = e->count; | |
131 | } | |
132 | } | |
133 | ||
134 | for (i = 0; i < dist->n; i++) { | |
135 | struct qdist_entry *e = &dist->entries[i]; | |
136 | int index; | |
137 | ||
138 | /* make an exception with 0; instead of using block[0], print a space */ | |
139 | if (e->count) { | |
140 | /* divide first to avoid loss of precision when e->count == max */ | |
141 | index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1); | |
142 | g_string_append_unichar(s, qdist_blocks[index]); | |
143 | } else { | |
144 | g_string_append_c(s, ' '); | |
145 | } | |
146 | } | |
147 | out: | |
148 | return g_string_free(s, FALSE); | |
149 | } | |
150 | ||
151 | /* | |
152 | * Bin the distribution in @from into @n bins of consecutive, non-overlapping | |
153 | * intervals, copying the result to @to. | |
154 | * | |
155 | * This function is internal to qdist: only this file and test code should | |
156 | * ever call it. | |
157 | * | |
158 | * Note: calling this function on an already-binned qdist is a bug. | |
159 | * | |
160 | * If @n == 0 or @from->n == 1, use @from->n. | |
161 | */ | |
162 | void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n) | |
163 | { | |
164 | double xmin, xmax; | |
165 | double step; | |
166 | size_t i, j; | |
167 | ||
168 | qdist_init(to); | |
169 | ||
170 | if (from->n == 0) { | |
171 | return; | |
172 | } | |
173 | if (n == 0 || from->n == 1) { | |
174 | n = from->n; | |
175 | } | |
176 | ||
177 | /* set equally-sized bins between @from's left and right */ | |
178 | xmin = qdist_xmin(from); | |
179 | xmax = qdist_xmax(from); | |
180 | step = (xmax - xmin) / n; | |
181 | ||
182 | if (n == from->n) { | |
183 | /* if @from's entries are equally spaced, no need to re-bin */ | |
184 | for (i = 0; i < from->n; i++) { | |
185 | if (from->entries[i].x != xmin + i * step) { | |
186 | goto rebin; | |
187 | } | |
188 | } | |
189 | /* they're equally spaced, so copy the dist and bail out */ | |
190 | to->entries = g_new(struct qdist_entry, from->n); | |
191 | to->n = from->n; | |
192 | memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n); | |
193 | return; | |
194 | } | |
195 | ||
196 | rebin: | |
197 | j = 0; | |
198 | for (i = 0; i < n; i++) { | |
199 | double x; | |
200 | double left, right; | |
201 | ||
202 | left = xmin + i * step; | |
203 | right = xmin + (i + 1) * step; | |
204 | ||
205 | /* Add x, even if it might not get any counts later */ | |
206 | x = left; | |
207 | qdist_add(to, x, 0); | |
208 | ||
209 | /* | |
210 | * To avoid double-counting we capture [left, right) ranges, except for | |
211 | * the righmost bin, which captures a [left, right] range. | |
212 | */ | |
213 | while (j < from->n && (from->entries[j].x < right || i == n - 1)) { | |
214 | struct qdist_entry *o = &from->entries[j]; | |
215 | ||
216 | qdist_add(to, x, o->count); | |
217 | j++; | |
218 | } | |
219 | } | |
220 | } | |
221 | ||
222 | /* | |
223 | * Print @dist into a string, after re-binning it into @n bins of consecutive, | |
224 | * non-overlapping intervals. | |
225 | * | |
226 | * If @n == 0, use @orig->n. | |
227 | * | |
228 | * Callers must free the returned string with g_free(). | |
229 | */ | |
230 | char *qdist_pr_plain(const struct qdist *dist, size_t n) | |
231 | { | |
232 | struct qdist binned; | |
233 | char *ret; | |
234 | ||
235 | if (dist->n == 0) { | |
236 | return NULL; | |
237 | } | |
238 | qdist_bin__internal(&binned, dist, n); | |
239 | ret = qdist_pr_internal(&binned); | |
240 | qdist_destroy(&binned); | |
241 | return ret; | |
242 | } | |
243 | ||
244 | static char *qdist_pr_label(const struct qdist *dist, size_t n_bins, | |
245 | uint32_t opt, bool is_left) | |
246 | { | |
247 | const char *percent; | |
248 | const char *lparen; | |
249 | const char *rparen; | |
250 | GString *s; | |
251 | double x1, x2, step; | |
252 | double x; | |
253 | double n; | |
254 | int dec; | |
255 | ||
256 | s = g_string_new(""); | |
257 | if (!(opt & QDIST_PR_LABELS)) { | |
258 | goto out; | |
259 | } | |
260 | ||
261 | dec = opt & QDIST_PR_NODECIMAL ? 0 : 1; | |
262 | percent = opt & QDIST_PR_PERCENT ? "%" : ""; | |
263 | ||
264 | n = n_bins ? n_bins : dist->n; | |
265 | x = is_left ? qdist_xmin(dist) : qdist_xmax(dist); | |
266 | step = (qdist_xmax(dist) - qdist_xmin(dist)) / n; | |
267 | ||
268 | if (opt & QDIST_PR_100X) { | |
269 | x *= 100.0; | |
270 | step *= 100.0; | |
271 | } | |
272 | if (opt & QDIST_PR_NOBINRANGE) { | |
273 | lparen = rparen = ""; | |
274 | x1 = x; | |
275 | x2 = x; /* unnecessary, but a dumb compiler might not figure it out */ | |
276 | } else { | |
277 | lparen = "["; | |
278 | rparen = is_left ? ")" : "]"; | |
279 | if (is_left) { | |
280 | x1 = x; | |
281 | x2 = x + step; | |
282 | } else { | |
283 | x1 = x - step; | |
284 | x2 = x; | |
285 | } | |
286 | } | |
287 | g_string_append_printf(s, "%s%.*f", lparen, dec, x1); | |
288 | if (!(opt & QDIST_PR_NOBINRANGE)) { | |
289 | g_string_append_printf(s, ",%.*f%s", dec, x2, rparen); | |
290 | } | |
291 | g_string_append(s, percent); | |
292 | out: | |
293 | return g_string_free(s, FALSE); | |
294 | } | |
295 | ||
296 | /* | |
297 | * Print the distribution's histogram into a string. | |
298 | * | |
299 | * See also: qdist_pr_plain(). | |
300 | * | |
301 | * Callers must free the returned string with g_free(). | |
302 | */ | |
303 | char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt) | |
304 | { | |
305 | const char *border = opt & QDIST_PR_BORDER ? "|" : ""; | |
306 | char *llabel, *rlabel; | |
307 | char *hgram; | |
308 | GString *s; | |
309 | ||
310 | if (dist->n == 0) { | |
311 | return NULL; | |
312 | } | |
313 | ||
314 | s = g_string_new(""); | |
315 | ||
316 | llabel = qdist_pr_label(dist, n_bins, opt, true); | |
317 | rlabel = qdist_pr_label(dist, n_bins, opt, false); | |
318 | hgram = qdist_pr_plain(dist, n_bins); | |
319 | g_string_append_printf(s, "%s%s%s%s%s", | |
320 | llabel, border, hgram, border, rlabel); | |
321 | g_free(llabel); | |
322 | g_free(rlabel); | |
323 | g_free(hgram); | |
324 | ||
325 | return g_string_free(s, FALSE); | |
326 | } | |
327 | ||
328 | static inline double qdist_x(const struct qdist *dist, int index) | |
329 | { | |
330 | if (dist->n == 0) { | |
331 | return NAN; | |
332 | } | |
333 | return dist->entries[index].x; | |
334 | } | |
335 | ||
336 | double qdist_xmin(const struct qdist *dist) | |
337 | { | |
338 | return qdist_x(dist, 0); | |
339 | } | |
340 | ||
341 | double qdist_xmax(const struct qdist *dist) | |
342 | { | |
343 | return qdist_x(dist, dist->n - 1); | |
344 | } | |
345 | ||
346 | size_t qdist_unique_entries(const struct qdist *dist) | |
347 | { | |
348 | return dist->n; | |
349 | } | |
350 | ||
351 | unsigned long qdist_sample_count(const struct qdist *dist) | |
352 | { | |
353 | unsigned long count = 0; | |
354 | size_t i; | |
355 | ||
356 | for (i = 0; i < dist->n; i++) { | |
357 | struct qdist_entry *e = &dist->entries[i]; | |
358 | ||
359 | count += e->count; | |
360 | } | |
361 | return count; | |
362 | } | |
363 | ||
364 | static double qdist_pairwise_avg(const struct qdist *dist, size_t index, | |
365 | size_t n, unsigned long count) | |
366 | { | |
367 | /* amortize the recursion by using a base case > 2 */ | |
368 | if (n <= 8) { | |
369 | size_t i; | |
370 | double ret = 0; | |
371 | ||
372 | for (i = 0; i < n; i++) { | |
373 | struct qdist_entry *e = &dist->entries[index + i]; | |
374 | ||
375 | ret += e->x * e->count / count; | |
376 | } | |
377 | return ret; | |
378 | } else { | |
379 | size_t n2 = n / 2; | |
380 | ||
381 | return qdist_pairwise_avg(dist, index, n2, count) + | |
382 | qdist_pairwise_avg(dist, index + n2, n - n2, count); | |
383 | } | |
384 | } | |
385 | ||
386 | double qdist_avg(const struct qdist *dist) | |
387 | { | |
388 | unsigned long count; | |
389 | ||
390 | count = qdist_sample_count(dist); | |
391 | if (!count) { | |
392 | return NAN; | |
393 | } | |
394 | return qdist_pairwise_avg(dist, 0, dist->n, count); | |
395 | } |