]> git.proxmox.com Git - mirror_ovs.git/blame - lib/stopwatch.c
stopwatch: Add API for retrieving calculated statistics
[mirror_ovs.git] / lib / stopwatch.c
CommitLineData
aed45bef
MM
1/* Copyright (c) 2017 Red Hat, Inc.
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include <config.h>
17
18#include "stopwatch.h"
19#include "openvswitch/shash.h"
20#include "openvswitch/vlog.h"
21#include "unixctl.h"
22#include "openvswitch/dynamic-string.h"
23#include "openvswitch/poll-loop.h"
24#include "ovs-thread.h"
25#include <unistd.h>
26#include "socket-util.h"
27
28VLOG_DEFINE_THIS_MODULE(stopwatch);
29
30struct average {
31 double average; /* Moving average */
32 double alpha; /* Weight given to new samples */
33};
34
35#define MARKERS 5
36
37/* Number of samples to collect before reporting P-square calculated
38 * percentile
39 */
40#define P_SQUARE_MIN 50
41
42/* The naming of these fields is based on the naming used in the
43 * P-square algorithm paper.
44 */
45struct percentile {
46 int n[MARKERS];
47 double n_prime[MARKERS];
48 double q[MARKERS];
49 double dn[MARKERS];
50 unsigned long long samples[P_SQUARE_MIN];
51 double percentile;
52};
53
54struct stopwatch {
55 enum stopwatch_units units;
56 unsigned long long n_samples;
57 unsigned long long max;
58 unsigned long long min;
59 struct percentile pctl;
60 struct average short_term;
61 struct average long_term;
62 unsigned long long sample_start;
63 bool sample_in_progress;
64};
65
66enum stopwatch_op {
67 OP_START_SAMPLE,
68 OP_END_SAMPLE,
69 OP_RESET,
70 OP_SHUTDOWN,
71};
72
73struct stopwatch_packet {
74 enum stopwatch_op op;
75 char name[32];
76 unsigned long long time;
77};
78
79static struct shash stopwatches = SHASH_INITIALIZER(&stopwatches);
80static struct ovs_mutex stopwatches_lock = OVS_MUTEX_INITIALIZER;
81
82static int stopwatch_pipe[2];
83static pthread_t stopwatch_thread_id;
84
85static const char *unit_name[] = {
86 [SW_MS] = "msec",
87 [SW_US] = "usec",
88 [SW_NS] = "nsec",
89};
90
91/* Percentile value we are calculating */
92#define P 0.95
93
94static int
95comp_samples(const void *left, const void *right)
96{
97 const double *left_d = left;
98 const double *right_d = right;
99
100 return (int) *right_d - *left_d;
101}
102
103/* Calculate the percentile using the P-square algorithm. For more
104 * information, see https://www1.cse.wustl.edu/~jain/papers/ftp/psqr.pdf
105 */
106static void
107calc_percentile(unsigned long long n_samples, struct percentile *pctl,
108 unsigned long long new_sample)
109{
110
111 if (n_samples < P_SQUARE_MIN) {
112 pctl->samples[n_samples - 1] = new_sample;
113 }
114
115 /* For the first MARKERS samples, we calculate the percentile
116 * in the traditional way in the pct->q array.
117 */
118 if (n_samples <= MARKERS) {
119 pctl->q[n_samples - 1] = new_sample;
120 qsort(pctl->q, n_samples, sizeof *pctl->q, comp_samples);
121 if (n_samples == MARKERS) {
122 pctl->n[0] = 0;
123 pctl->n[1] = 1;
124 pctl->n[2] = 2;
125 pctl->n[3] = 3;
126 pctl->n[4] = 4;
127
128 pctl->n_prime[0] = 0;
129 pctl->n_prime[1] = 2 * P;
130 pctl->n_prime[2] = 4 * P;
131 pctl->n_prime[3] = 2 + 2 * P;
132 pctl->n_prime[4] = 4;
133
134 pctl->dn[0] = 0;
135 pctl->dn[1] = P / 2;
136 pctl->dn[2] = P;
137 pctl->dn[3] = (1 + P) / 2;
138 pctl->dn[4] = 1;
139 }
140 pctl->percentile = pctl->q[(int) P * n_samples];
141 return;
142 }
143
144 /* From here on, update the markers using quadratic spline calculations */
145 int k;
146 if (new_sample < pctl->q[0]) {
147 k = 0;
148 pctl->q[0] = new_sample;
149 } else if (new_sample < pctl->q[1]) {
150 k = 0;
151 } else if (new_sample < pctl->q[2]) {
152 k = 1;
153 } else if (new_sample < pctl->q[3]) {
154 k = 2;
155 } else if (new_sample <= pctl->q[4]) {
156 k = 3;
157 } else {
158 k = 3;
159 pctl->q[4] = new_sample;
160 }
161
162 for (int i = k + 1; i < MARKERS; i++) {
163 pctl->n[i]++;
164 }
165
166 for (int i = 0; i < MARKERS; i++) {
167 pctl->n_prime[i] += pctl->dn[i];
168 }
169
170 for (int i = 1; i < MARKERS - 1; i++) {
171 double d = pctl->n_prime[i] - pctl->n[i];
172
173 if ((d >= 1 && pctl->n[i + 1] - pctl->n[i] > 1) ||
174 (d <= -1 && pctl->n[i - 1] - pctl->n[i] < -1)) {
175 d = d >= 0 ? 1 : -1;
176
177 double a = d / (pctl->n[i + 1] - pctl->n[i - 1]);
178 double b = (pctl->n[i] - pctl->n[i - 1] + d) *
179 (pctl->q[i + 1] - pctl->q[i]) / (pctl->n[i + 1] - pctl->n[i]);
180 double c = (pctl->n[i + 1] - pctl->n[i] - d) *
181 (pctl->q[i] - pctl->q[i - 1]) / (pctl->n[i] - pctl->n[i - 1]);
182
183 double candidate = pctl->q[i] + a * (b + c);
184 if (pctl->q[i - 1] < candidate && candidate < pctl->q[i + 1]) {
185 pctl->q[i] = candidate;
186 } else {
187 pctl->q[i] = pctl->q[i] +
188 (d * (pctl->q[i + (int)d] - pctl->q[i]) /
189 (pctl->n[i +(int)d] - pctl->n[i]));
190 }
191
192 pctl->n[i] += d;
193 }
194 }
195
196 /* Without enough samples, P-square is not very accurate. Until we reach
197 * P_SQUARE_MIN, use a traditional calculation for the percentile.
198 */
199 if (n_samples < P_SQUARE_MIN) {
200 qsort(pctl->samples, n_samples, sizeof *pctl->samples, comp_samples);
201 pctl->percentile = pctl->samples[(int) (P * n_samples)];
202 } else {
203 pctl->percentile = pctl->q[2];
204 }
205}
206
207static void
208calc_average(struct average *avg, double new_sample)
209{
210 avg->average = new_sample * avg->alpha + (1 - avg->alpha) * avg->average;
211}
212
213static void
214add_sample(struct stopwatch *sw, unsigned long long new_sample)
215{
216 if (new_sample > sw->max) {
217 sw->max = new_sample;
218 }
219
220 if (new_sample < sw->min || sw->n_samples == 0) {
221 sw->min = new_sample;
222 }
223
224 calc_percentile(sw->n_samples, &sw->pctl, new_sample);
225
226 if (sw->n_samples++ == 0) {
227 sw->short_term.average = sw->long_term.average = new_sample;
228 return;
229 }
230
231 calc_average(&sw->short_term, new_sample);
232 calc_average(&sw->long_term, new_sample);
233}
234
89189388
JS
235static bool
236performance_get_stats_protected(const char *name,
237 struct performance_stats *stats)
238{
239 struct performance *perf;
240
241 perf = shash_find_data(&performances, name);
242 if (!perf) {
243 return false;
244 }
245
246 stats->count = perf->samples;
247 stats->unit = perf->units;
248 stats->max = perf->max;
249 stats->min = perf->min;
250 stats->pctl_95 = perf->pctl.percentile;
251 stats->ewma_50 = perf->short_term.average;
252 stats->ewma_1 = perf->long_term.average;
253
254 return true;
255}
256
257bool
258performance_get_stats(const char *name, struct performance_stats *stats)
259{
260 bool found = false;
261
262 ovs_mutex_lock(&performances_lock);
263 found = performance_get_stats_protected(name, stats);
264 ovs_mutex_unlock(&performances_lock);
265
266 return found;
267}
268
aed45bef
MM
269static void
270stopwatch_print(struct stopwatch *sw, const char *name,
271 struct ds *s)
272{
273 ds_put_format(s, "Statistics for '%s'\n", name);
274
275 const char *units = unit_name[sw->units];
276 ds_put_format(s, "\t Total samples: %llu\n", sw->n_samples);
277 ds_put_format(s, "\t Maximum: %llu %s\n", sw->max, units);
278 ds_put_format(s, "\t Minimum: %llu %s\n", sw->min, units);
279 ds_put_format(s, "\t 95th percentile: %f %s\n",
280 sw->pctl.percentile, units);
281 ds_put_format(s, "\t Short term average: %f %s\n",
282 sw->short_term.average, units);
283 ds_put_format(s, "\t Long term average: %f %s\n",
284 sw->long_term.average, units);
285}
286
287static bool
288stopwatch_show_protected(int argc, const char *argv[], struct ds *s)
289{
290 struct stopwatch *sw;
291
292 if (argc > 1) {
293 sw = shash_find_data(&stopwatches, argv[1]);
294 if (!sw) {
295 ds_put_cstr(s, "No such stopwatch");
296 return false;
297 }
298 stopwatch_print(sw, argv[1], s);
299 } else {
300 struct shash_node *node;
301 SHASH_FOR_EACH (node, &stopwatches) {
302 sw = node->data;
303 stopwatch_print(sw, node->name, s);
304 }
305 }
306
307 return true;
308}
309
310static void
311stopwatch_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
312 const char *argv[], void *ignore OVS_UNUSED)
313{
314 struct ds s = DS_EMPTY_INITIALIZER;
315 bool success;
316
317 ovs_mutex_lock(&stopwatches_lock);
318 success = stopwatch_show_protected(argc, argv, &s);
319 ovs_mutex_unlock(&stopwatches_lock);
320
321 if (success) {
322 unixctl_command_reply(conn, ds_cstr(&s));
323 } else {
324 unixctl_command_reply_error(conn, ds_cstr(&s));
325 }
326 ds_destroy(&s);
327}
328
329static void
330stopwatch_reset(struct unixctl_conn *conn, int argc OVS_UNUSED,
331 const char *argv[], void *ignore OVS_UNUSED)
332{
333 struct stopwatch_packet pkt = {
334 .op = OP_RESET,
335 };
336 if (argc > 1) {
337 ovs_strlcpy(pkt.name, argv[1], sizeof(pkt.name));
338 }
339 write(stopwatch_pipe[1], &pkt, sizeof(pkt));
340 unixctl_command_reply(conn, "");
341}
342
343static void
344stopwatch_start_sample_protected(const struct stopwatch_packet *pkt)
345{
346 struct stopwatch *sw = shash_find_data(&stopwatches, pkt->name);
347 if (!sw || sw->sample_in_progress) {
348 return;
349 }
350
351 sw->sample_start = pkt->time;
352 sw->sample_in_progress = true;
353}
354
355static void
356stopwatch_end_sample_protected(const struct stopwatch_packet *pkt)
357{
358 struct stopwatch *sw = shash_find_data(&stopwatches, pkt->name);
359 if (!sw || !sw->sample_in_progress) {
360 return;
361 }
362
363 add_sample(sw, pkt->time - sw->sample_start);
364 sw->sample_in_progress = false;
365}
366
367static void reset_stopwatch(struct stopwatch *sw)
368{
369 sw->short_term.average = 0;
370 sw->long_term.average = 0;
371 sw->pctl.percentile = 0;
372 sw->n_samples = 0;
373 sw->max = 0;
374 sw->min = 0;
375 /* Don't reset sw->sample_start or sw->sample_in_progress.
376 * This way, if a sample was currently in progress, it can be
377 * concluded properly after the reset.
378 */
379}
380
381static void
382stopwatch_reset_protected(const struct stopwatch_packet *pkt)
383{
384 if (pkt->name[0]) {
385 struct stopwatch *sw = shash_find_data(&stopwatches, pkt->name);
386 if (!sw) {
387 return;
388 }
389 reset_stopwatch(sw);
390 return;
391 }
392
393 struct shash_node *node;
394 SHASH_FOR_EACH (node, &stopwatches) {
395 struct stopwatch *sw = node->data;
396 reset_stopwatch(sw);
397 }
398}
399
400static void *
401stopwatch_thread(void *ign OVS_UNUSED)
402{
403 bool should_exit = false;
404
405 while (!should_exit) {
406 struct stopwatch_packet pkt;
407 while (read(stopwatch_pipe[0], &pkt, sizeof(pkt)) > 0) {
408 ovs_mutex_lock(&stopwatches_lock);
409 switch (pkt.op) {
410 case OP_START_SAMPLE:
411 stopwatch_start_sample_protected(&pkt);
412 break;
413 case OP_END_SAMPLE:
414 stopwatch_end_sample_protected(&pkt);
415 break;
416 case OP_RESET:
417 stopwatch_reset_protected(&pkt);
418 break;
419 case OP_SHUTDOWN:
420 should_exit = true;
421 break;
422 }
423 ovs_mutex_unlock(&stopwatches_lock);
424 }
425
426 if (!should_exit) {
427 poll_fd_wait(stopwatch_pipe[0], POLLIN);
428 poll_block();
429 }
430 }
431
432 return NULL;
433}
434
435static void
436stopwatch_exit(void)
437{
438 struct shash_node *node, *node_next;
439 struct stopwatch_packet pkt = {
440 .op = OP_SHUTDOWN,
441 };
442
443 write(stopwatch_pipe[1], &pkt, sizeof pkt);
444 xpthread_join(stopwatch_thread_id, NULL);
445
446 /* Process is exiting and we have joined the only
447 * other competing thread. We are now the sole owners
448 * of all data in the file.
449 */
450 SHASH_FOR_EACH_SAFE (node, node_next, &stopwatches) {
451 struct stopwatch *sw = node->data;
452 shash_delete(&stopwatches, node);
453 free(sw);
454 }
455 shash_destroy(&stopwatches);
456 ovs_mutex_destroy(&stopwatches_lock);
457}
458
459static void
460do_init_stopwatch(void)
461{
462 unixctl_command_register("stopwatch/show", "[NAME]", 0, 1,
463 stopwatch_show, NULL);
464 unixctl_command_register("stopwatch/reset", "[NAME]", 0, 1,
465 stopwatch_reset, NULL);
466 xpipe_nonblocking(stopwatch_pipe);
467 stopwatch_thread_id = ovs_thread_create(
468 "stopwatch", stopwatch_thread, NULL);
469 atexit(stopwatch_exit);
470}
471
472static void
473stopwatch_init(void)
474{
475 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
476 if (ovsthread_once_start(&once)) {
477 do_init_stopwatch();
478 ovsthread_once_done(&once);
479 }
480}
481
482void
483stopwatch_create(const char *name, enum stopwatch_units units)
484{
485 stopwatch_init();
486
487 struct stopwatch *sw = xzalloc(sizeof *sw);
488 sw->units = units;
489 sw->short_term.alpha = 0.50;
490 sw->long_term.alpha = 0.01;
491
492 ovs_mutex_lock(&stopwatches_lock);
493 shash_add(&stopwatches, name, sw);
494 ovs_mutex_unlock(&stopwatches_lock);
495}
496
497void
498stopwatch_start(const char *name, unsigned long long ts)
499{
500 struct stopwatch_packet pkt = {
501 .op = OP_START_SAMPLE,
502 .time = ts,
503 };
504 ovs_strlcpy(pkt.name, name, sizeof(pkt.name));
505 write(stopwatch_pipe[1], &pkt, sizeof(pkt));
506}
507
508void
509stopwatch_stop(const char *name, unsigned long long ts)
510{
511 struct stopwatch_packet pkt = {
512 .op = OP_END_SAMPLE,
513 .time = ts,
514 };
515 ovs_strlcpy(pkt.name, name, sizeof(pkt.name));
516 write(stopwatch_pipe[1], &pkt, sizeof(pkt));
517}