]> git.proxmox.com Git - mirror_qemu.git/blob - util/qdist.c
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
[mirror_qemu.git] / util / qdist.c
1 /*
2 * qdist.c - QEMU helpers for handling frequency distributions of data.
3 *
4 * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
5 *
6 * License: GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 */
9 #include "qemu/osdep.h"
10 #include "qemu/qdist.h"
11
12 #include <math.h>
13 #ifndef NAN
14 #define NAN (0.0 / 0.0)
15 #endif
16
17 void qdist_init(struct qdist *dist)
18 {
19 dist->entries = g_malloc(sizeof(*dist->entries));
20 dist->size = 1;
21 dist->n = 0;
22 }
23
24 void qdist_destroy(struct qdist *dist)
25 {
26 g_free(dist->entries);
27 }
28
29 static inline int qdist_cmp_double(double a, double b)
30 {
31 if (a > b) {
32 return 1;
33 } else if (a < b) {
34 return -1;
35 }
36 return 0;
37 }
38
39 static int qdist_cmp(const void *ap, const void *bp)
40 {
41 const struct qdist_entry *a = ap;
42 const struct qdist_entry *b = bp;
43
44 return qdist_cmp_double(a->x, b->x);
45 }
46
47 void qdist_add(struct qdist *dist, double x, long count)
48 {
49 struct qdist_entry *entry = NULL;
50
51 if (dist->n) {
52 struct qdist_entry e;
53
54 e.x = x;
55 entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp);
56 }
57
58 if (entry) {
59 entry->count += count;
60 return;
61 }
62
63 if (unlikely(dist->n == dist->size)) {
64 dist->size *= 2;
65 dist->entries = g_realloc(dist->entries,
66 sizeof(*dist->entries) * (dist->size));
67 }
68 dist->n++;
69 entry = &dist->entries[dist->n - 1];
70 entry->x = x;
71 entry->count = count;
72 qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp);
73 }
74
75 void qdist_inc(struct qdist *dist, double x)
76 {
77 qdist_add(dist, x, 1);
78 }
79
80 /*
81 * Unicode for block elements. See:
82 * https://en.wikipedia.org/wiki/Block_Elements
83 */
84 static const gunichar qdist_blocks[] = {
85 0x2581,
86 0x2582,
87 0x2583,
88 0x2584,
89 0x2585,
90 0x2586,
91 0x2587,
92 0x2588
93 };
94
95 #define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks)
96
97 /*
98 * Print a distribution into a string.
99 *
100 * This function assumes that appropriate binning has been done on the input;
101 * see qdist_bin__internal() and qdist_pr_plain().
102 *
103 * Callers must free the returned string with g_free().
104 */
105 static char *qdist_pr_internal(const struct qdist *dist)
106 {
107 double min, max;
108 GString *s = g_string_new("");
109 size_t i;
110
111 /* if only one entry, its printout will be either full or empty */
112 if (dist->n == 1) {
113 if (dist->entries[0].count) {
114 g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]);
115 } else {
116 g_string_append_c(s, ' ');
117 }
118 goto out;
119 }
120
121 /* get min and max counts */
122 min = dist->entries[0].count;
123 max = min;
124 for (i = 0; i < dist->n; i++) {
125 struct qdist_entry *e = &dist->entries[i];
126
127 if (e->count < min) {
128 min = e->count;
129 }
130 if (e->count > max) {
131 max = e->count;
132 }
133 }
134
135 for (i = 0; i < dist->n; i++) {
136 struct qdist_entry *e = &dist->entries[i];
137 int index;
138
139 /* make an exception with 0; instead of using block[0], print a space */
140 if (e->count) {
141 /* divide first to avoid loss of precision when e->count == max */
142 index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1);
143 g_string_append_unichar(s, qdist_blocks[index]);
144 } else {
145 g_string_append_c(s, ' ');
146 }
147 }
148 out:
149 return g_string_free(s, FALSE);
150 }
151
152 /*
153 * Bin the distribution in @from into @n bins of consecutive, non-overlapping
154 * intervals, copying the result to @to.
155 *
156 * This function is internal to qdist: only this file and test code should
157 * ever call it.
158 *
159 * Note: calling this function on an already-binned qdist is a bug.
160 *
161 * If @n == 0 or @from->n == 1, use @from->n.
162 */
163 void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n)
164 {
165 double xmin, xmax;
166 double step;
167 size_t i, j;
168
169 qdist_init(to);
170
171 if (from->n == 0) {
172 return;
173 }
174 if (n == 0 || from->n == 1) {
175 n = from->n;
176 }
177
178 /* set equally-sized bins between @from's left and right */
179 xmin = qdist_xmin(from);
180 xmax = qdist_xmax(from);
181 step = (xmax - xmin) / n;
182
183 if (n == from->n) {
184 /* if @from's entries are equally spaced, no need to re-bin */
185 for (i = 0; i < from->n; i++) {
186 if (from->entries[i].x != xmin + i * step) {
187 goto rebin;
188 }
189 }
190 /* they're equally spaced, so copy the dist and bail out */
191 to->entries = g_new(struct qdist_entry, from->n);
192 to->n = from->n;
193 memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n);
194 return;
195 }
196
197 rebin:
198 j = 0;
199 for (i = 0; i < n; i++) {
200 double x;
201 double left, right;
202
203 left = xmin + i * step;
204 right = xmin + (i + 1) * step;
205
206 /* Add x, even if it might not get any counts later */
207 x = left;
208 qdist_add(to, x, 0);
209
210 /*
211 * To avoid double-counting we capture [left, right) ranges, except for
212 * the righmost bin, which captures a [left, right] range.
213 */
214 while (j < from->n && (from->entries[j].x < right || i == n - 1)) {
215 struct qdist_entry *o = &from->entries[j];
216
217 qdist_add(to, x, o->count);
218 j++;
219 }
220 }
221 }
222
223 /*
224 * Print @dist into a string, after re-binning it into @n bins of consecutive,
225 * non-overlapping intervals.
226 *
227 * If @n == 0, use @orig->n.
228 *
229 * Callers must free the returned string with g_free().
230 */
231 char *qdist_pr_plain(const struct qdist *dist, size_t n)
232 {
233 struct qdist binned;
234 char *ret;
235
236 if (dist->n == 0) {
237 return NULL;
238 }
239 qdist_bin__internal(&binned, dist, n);
240 ret = qdist_pr_internal(&binned);
241 qdist_destroy(&binned);
242 return ret;
243 }
244
245 static char *qdist_pr_label(const struct qdist *dist, size_t n_bins,
246 uint32_t opt, bool is_left)
247 {
248 const char *percent;
249 const char *lparen;
250 const char *rparen;
251 GString *s;
252 double x1, x2, step;
253 double x;
254 double n;
255 int dec;
256
257 s = g_string_new("");
258 if (!(opt & QDIST_PR_LABELS)) {
259 goto out;
260 }
261
262 dec = opt & QDIST_PR_NODECIMAL ? 0 : 1;
263 percent = opt & QDIST_PR_PERCENT ? "%" : "";
264
265 n = n_bins ? n_bins : dist->n;
266 x = is_left ? qdist_xmin(dist) : qdist_xmax(dist);
267 step = (qdist_xmax(dist) - qdist_xmin(dist)) / n;
268
269 if (opt & QDIST_PR_100X) {
270 x *= 100.0;
271 step *= 100.0;
272 }
273 if (opt & QDIST_PR_NOBINRANGE) {
274 lparen = rparen = "";
275 x1 = x;
276 x2 = x; /* unnecessary, but a dumb compiler might not figure it out */
277 } else {
278 lparen = "[";
279 rparen = is_left ? ")" : "]";
280 if (is_left) {
281 x1 = x;
282 x2 = x + step;
283 } else {
284 x1 = x - step;
285 x2 = x;
286 }
287 }
288 g_string_append_printf(s, "%s%.*f", lparen, dec, x1);
289 if (!(opt & QDIST_PR_NOBINRANGE)) {
290 g_string_append_printf(s, ",%.*f%s", dec, x2, rparen);
291 }
292 g_string_append(s, percent);
293 out:
294 return g_string_free(s, FALSE);
295 }
296
297 /*
298 * Print the distribution's histogram into a string.
299 *
300 * See also: qdist_pr_plain().
301 *
302 * Callers must free the returned string with g_free().
303 */
304 char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt)
305 {
306 const char *border = opt & QDIST_PR_BORDER ? "|" : "";
307 char *llabel, *rlabel;
308 char *hgram;
309 GString *s;
310
311 if (dist->n == 0) {
312 return NULL;
313 }
314
315 s = g_string_new("");
316
317 llabel = qdist_pr_label(dist, n_bins, opt, true);
318 rlabel = qdist_pr_label(dist, n_bins, opt, false);
319 hgram = qdist_pr_plain(dist, n_bins);
320 g_string_append_printf(s, "%s%s%s%s%s",
321 llabel, border, hgram, border, rlabel);
322 g_free(llabel);
323 g_free(rlabel);
324 g_free(hgram);
325
326 return g_string_free(s, FALSE);
327 }
328
329 static inline double qdist_x(const struct qdist *dist, int index)
330 {
331 if (dist->n == 0) {
332 return NAN;
333 }
334 return dist->entries[index].x;
335 }
336
337 double qdist_xmin(const struct qdist *dist)
338 {
339 return qdist_x(dist, 0);
340 }
341
342 double qdist_xmax(const struct qdist *dist)
343 {
344 return qdist_x(dist, dist->n - 1);
345 }
346
347 size_t qdist_unique_entries(const struct qdist *dist)
348 {
349 return dist->n;
350 }
351
352 unsigned long qdist_sample_count(const struct qdist *dist)
353 {
354 unsigned long count = 0;
355 size_t i;
356
357 for (i = 0; i < dist->n; i++) {
358 struct qdist_entry *e = &dist->entries[i];
359
360 count += e->count;
361 }
362 return count;
363 }
364
365 static double qdist_pairwise_avg(const struct qdist *dist, size_t index,
366 size_t n, unsigned long count)
367 {
368 /* amortize the recursion by using a base case > 2 */
369 if (n <= 8) {
370 size_t i;
371 double ret = 0;
372
373 for (i = 0; i < n; i++) {
374 struct qdist_entry *e = &dist->entries[index + i];
375
376 ret += e->x * e->count / count;
377 }
378 return ret;
379 } else {
380 size_t n2 = n / 2;
381
382 return qdist_pairwise_avg(dist, index, n2, count) +
383 qdist_pairwise_avg(dist, index + n2, n - n2, count);
384 }
385 }
386
387 double qdist_avg(const struct qdist *dist)
388 {
389 unsigned long count;
390
391 count = qdist_sample_count(dist);
392 if (!count) {
393 return NAN;
394 }
395 return qdist_pairwise_avg(dist, 0, dist->n, count);
396 }