]> git.proxmox.com Git - ovs.git/blame - lib/timeval.c
unixctl: New JSON RPC back-end.
[ovs.git] / lib / timeval.c
CommitLineData
064af421 1/*
4ae90ff9 2 * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira Networks.
064af421 3 *
a14bc59f
BP
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
064af421 7 *
a14bc59f
BP
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
064af421
BP
15 */
16
17#include <config.h>
18#include "timeval.h"
19#include <assert.h>
20#include <errno.h>
21#include <poll.h>
22#include <signal.h>
6197af6e 23#include <stdlib.h>
064af421
BP
24#include <string.h>
25#include <sys/time.h>
26#include <sys/resource.h>
27#include <unistd.h>
28#include "coverage.h"
6197af6e 29#include "dummy.h"
064af421 30#include "fatal-signal.h"
279c9e03 31#include "signals.h"
6197af6e 32#include "unixctl.h"
064af421 33#include "util.h"
064af421 34#include "vlog.h"
5136ce49 35
d98e6007 36VLOG_DEFINE_THIS_MODULE(timeval);
064af421 37
f54e56fc
BP
38/* The clock to use for measuring time intervals. This is CLOCK_MONOTONIC by
39 * preference, but on systems that don't have a monotonic clock we fall back
40 * to CLOCK_REALTIME. */
c73814a3
JG
41static clockid_t monotonic_clock;
42
7bc9188d
BP
43/* Has a timer tick occurred?
44 *
45 * We initialize these to true to force time_init() to get called on the first
46 * call to time_msec() or another function that queries the current time. */
47static volatile sig_atomic_t wall_tick = true;
48static volatile sig_atomic_t monotonic_tick = true;
064af421
BP
49
50/* The current time, as of the last refresh. */
c73814a3
JG
51static struct timespec wall_time;
52static struct timespec monotonic_time;
064af421 53
4ae90ff9
BP
54/* The monotonic time at which the time module was initialized. */
55static long long int boot_time;
56
6197af6e
BP
57/* Fixed monotonic time offset, for use by unit tests. */
58static struct timespec warp_offset;
59
064af421
BP
60/* Time at which to die with SIGALRM (if not TIME_MIN). */
61static time_t deadline = TIME_MIN;
62
d6fbec6d 63static void set_up_timer(void);
58fda1da 64static void set_up_signal(int flags);
064af421 65static void sigalrm_handler(int);
c73814a3
JG
66static void refresh_wall_if_ticked(void);
67static void refresh_monotonic_if_ticked(void);
064af421
BP
68static time_t time_add(time_t, time_t);
69static void block_sigalrm(sigset_t *);
70static void unblock_sigalrm(const sigset_t *);
959ec62e
BP
71static void log_poll_interval(long long int last_wakeup);
72static struct rusage *get_recent_rusage(void);
73static void refresh_rusage(void);
6197af6e
BP
74static void timespec_add(struct timespec *sum,
75 const struct timespec *a, const struct timespec *b);
064af421 76
4ae90ff9 77/* Initializes the timetracking module, if not already initialized. */
ff8bb7e7 78static void
064af421
BP
79time_init(void)
80{
7bc9188d 81 static bool inited;
064af421
BP
82 if (inited) {
83 return;
84 }
7bc9188d 85 inited = true;
064af421 86
f5c6854a
JP
87 coverage_init();
88
f54e56fc 89 if (!clock_gettime(CLOCK_MONOTONIC, &monotonic_time)) {
c73814a3
JG
90 monotonic_clock = CLOCK_MONOTONIC;
91 } else {
92 monotonic_clock = CLOCK_REALTIME;
93 VLOG_DBG("monotonic timer not available");
94 }
95
f54e56fc
BP
96 set_up_signal(SA_RESTART);
97 set_up_timer();
4ae90ff9 98 boot_time = time_msec();
c73814a3
JG
99}
100
dc81071d 101static void
58fda1da 102set_up_signal(int flags)
dc81071d
BP
103{
104 struct sigaction sa;
105
064af421
BP
106 memset(&sa, 0, sizeof sa);
107 sa.sa_handler = sigalrm_handler;
108 sigemptyset(&sa.sa_mask);
dc81071d 109 sa.sa_flags = flags;
279c9e03 110 xsigaction(SIGALRM, &sa, NULL);
dc81071d 111}
064af421 112
dc81071d
BP
113/* Remove SA_RESTART from the flags for SIGALRM, so that any system call that
114 * is interrupted by the periodic timer interrupt will return EINTR instead of
115 * continuing after the signal handler returns.
116 *
117 * time_disable_restart() and time_enable_restart() may be usefully wrapped
118 * around function calls that might otherwise block forever unless interrupted
119 * by a signal, e.g.:
120 *
121 * time_disable_restart();
122 * fcntl(fd, F_SETLKW, &lock);
123 * time_enable_restart();
124 */
125void
126time_disable_restart(void)
127{
7bc9188d 128 time_init();
58fda1da 129 set_up_signal(0);
dc81071d
BP
130}
131
132/* Add SA_RESTART to the flags for SIGALRM, so that any system call that
133 * is interrupted by the periodic timer interrupt will continue after the
134 * signal handler returns instead of returning EINTR. */
135void
136time_enable_restart(void)
137{
7bc9188d 138 time_init();
58fda1da 139 set_up_signal(SA_RESTART);
03fbffbd
BP
140}
141
142static void
d6fbec6d 143set_up_timer(void)
03fbffbd 144{
4cfffdd8 145 static timer_t timer_id; /* "static" to avoid apparent memory leak. */
c73814a3
JG
146 struct itimerspec itimer;
147
c73814a3 148 if (timer_create(monotonic_clock, NULL, &timer_id)) {
279c9e03 149 VLOG_FATAL("timer_create failed (%s)", strerror(errno));
c73814a3 150 }
03fbffbd 151
064af421 152 itimer.it_interval.tv_sec = 0;
c73814a3 153 itimer.it_interval.tv_nsec = TIME_UPDATE_INTERVAL * 1000 * 1000;
064af421 154 itimer.it_value = itimer.it_interval;
c73814a3
JG
155
156 if (timer_settime(timer_id, 0, &itimer, NULL)) {
279c9e03 157 VLOG_FATAL("timer_settime failed (%s)", strerror(errno));
064af421
BP
158 }
159}
160
03fbffbd
BP
161/* Set up the interval timer, to ensure that time advances even without calling
162 * time_refresh().
163 *
164 * A child created with fork() does not inherit the parent's interval timer, so
165 * this function needs to be called from the child after fork(). */
166void
167time_postfork(void)
168{
f54e56fc 169 time_init();
d6fbec6d 170 set_up_timer();
03fbffbd
BP
171}
172
c73814a3
JG
173static void
174refresh_wall(void)
175{
7bc9188d 176 time_init();
c73814a3
JG
177 clock_gettime(CLOCK_REALTIME, &wall_time);
178 wall_tick = false;
179}
180
181static void
182refresh_monotonic(void)
183{
f54e56fc 184 time_init();
c73814a3
JG
185
186 if (monotonic_clock == CLOCK_MONOTONIC) {
187 clock_gettime(monotonic_clock, &monotonic_time);
188 } else {
189 refresh_wall_if_ticked();
190 monotonic_time = wall_time;
191 }
6197af6e 192 timespec_add(&monotonic_time, &monotonic_time, &warp_offset);
c73814a3
JG
193
194 monotonic_tick = false;
195}
196
064af421
BP
197/* Forces a refresh of the current time from the kernel. It is not usually
198 * necessary to call this function, since the time will be refreshed
199 * automatically at least every TIME_UPDATE_INTERVAL milliseconds. */
200void
201time_refresh(void)
202{
c73814a3 203 wall_tick = monotonic_tick = true;
064af421
BP
204}
205
c73814a3 206/* Returns a monotonic timer, in seconds. */
064af421
BP
207time_t
208time_now(void)
209{
c73814a3
JG
210 refresh_monotonic_if_ticked();
211 return monotonic_time.tv_sec;
064af421
BP
212}
213
c73814a3 214/* Same as time_now() except does not write to static variables, for use in
f54e56fc 215 * signal handlers. */
c73814a3
JG
216static time_t
217time_now_sig(void)
218{
219 struct timespec cur_time;
220
221 clock_gettime(monotonic_clock, &cur_time);
222 return cur_time.tv_sec;
223}
224
225/* Returns the current time, in seconds. */
226time_t
227time_wall(void)
228{
229 refresh_wall_if_ticked();
230 return wall_time.tv_sec;
231}
232
233/* Returns a monotonic timer, in ms (within TIME_UPDATE_INTERVAL ms). */
064af421
BP
234long long int
235time_msec(void)
236{
c73814a3
JG
237 refresh_monotonic_if_ticked();
238 return timespec_to_msec(&monotonic_time);
239}
240
241/* Returns the current time, in ms (within TIME_UPDATE_INTERVAL ms). */
242long long int
243time_wall_msec(void)
244{
245 refresh_wall_if_ticked();
246 return timespec_to_msec(&wall_time);
247}
248
249/* Stores a monotonic timer, accurate within TIME_UPDATE_INTERVAL ms, into
250 * '*ts'. */
251void
252time_timespec(struct timespec *ts)
253{
254 refresh_monotonic_if_ticked();
255 *ts = monotonic_time;
064af421
BP
256}
257
258/* Stores the current time, accurate within TIME_UPDATE_INTERVAL ms, into
c73814a3 259 * '*ts'. */
064af421 260void
c73814a3 261time_wall_timespec(struct timespec *ts)
064af421 262{
c73814a3
JG
263 refresh_wall_if_ticked();
264 *ts = wall_time;
064af421
BP
265}
266
267/* Configures the program to die with SIGALRM 'secs' seconds from now, if
268 * 'secs' is nonzero, or disables the feature if 'secs' is zero. */
269void
270time_alarm(unsigned int secs)
271{
272 sigset_t oldsigs;
273
274 time_init();
275 block_sigalrm(&oldsigs);
276 deadline = secs ? time_add(time_now(), secs) : TIME_MIN;
277 unblock_sigalrm(&oldsigs);
278}
279
280/* Like poll(), except:
cee03df4
BP
281 *
282 * - The timeout is specified as an absolute time, as defined by
283 * time_msec(), instead of a duration.
064af421
BP
284 *
285 * - On error, returns a negative error code (instead of setting errno).
286 *
287 * - If interrupted by a signal, retries automatically until the original
cee03df4 288 * timeout is reached. (Because of this property, this function will
064af421
BP
289 * never return -EINTR.)
290 *
291 * - As a side effect, refreshes the current time (like time_refresh()).
cee03df4
BP
292 *
293 * Stores the number of milliseconds elapsed during poll in '*elapsed'. */
064af421 294int
cee03df4
BP
295time_poll(struct pollfd *pollfds, int n_pollfds, long long int timeout_when,
296 int *elapsed)
064af421
BP
297{
298 static long long int last_wakeup;
064af421
BP
299 long long int start;
300 sigset_t oldsigs;
301 bool blocked;
302 int retval;
303
304 time_refresh();
959ec62e 305 log_poll_interval(last_wakeup);
064af421
BP
306 coverage_clear();
307 start = time_msec();
308 blocked = false;
309 for (;;) {
cee03df4 310 long long int now = time_msec();
064af421 311 int time_left;
cee03df4
BP
312
313 if (now >= timeout_when) {
314 time_left = 0;
315 } else if ((unsigned long long int) timeout_when - now > INT_MAX) {
316 time_left = INT_MAX;
064af421 317 } else {
cee03df4 318 time_left = timeout_when - now;
064af421
BP
319 }
320
321 retval = poll(pollfds, n_pollfds, time_left);
322 if (retval < 0) {
323 retval = -errno;
324 }
325 time_refresh();
326 if (retval != -EINTR) {
327 break;
328 }
329
330 if (!blocked && deadline == TIME_MIN) {
331 block_sigalrm(&oldsigs);
332 blocked = true;
333 }
334 }
335 if (blocked) {
336 unblock_sigalrm(&oldsigs);
337 }
338 last_wakeup = time_msec();
959ec62e 339 refresh_rusage();
cee03df4 340 *elapsed = last_wakeup - start;
064af421
BP
341 return retval;
342}
343
344/* Returns the sum of 'a' and 'b', with saturation on overflow or underflow. */
345static time_t
346time_add(time_t a, time_t b)
347{
348 return (a >= 0
349 ? (b > TIME_MAX - a ? TIME_MAX : a + b)
350 : (b < TIME_MIN - a ? TIME_MIN : a + b));
351}
352
353static void
354sigalrm_handler(int sig_nr)
355{
c73814a3
JG
356 wall_tick = true;
357 monotonic_tick = true;
358 if (deadline != TIME_MIN && time_now_sig() > deadline) {
064af421
BP
359 fatal_signal_handler(sig_nr);
360 }
361}
362
363static void
c73814a3 364refresh_wall_if_ticked(void)
064af421 365{
c73814a3
JG
366 if (wall_tick) {
367 refresh_wall();
368 }
369}
370
371static void
372refresh_monotonic_if_ticked(void)
373{
c73814a3
JG
374 if (monotonic_tick) {
375 refresh_monotonic();
064af421
BP
376 }
377}
378
379static void
380block_sigalrm(sigset_t *oldsigs)
381{
382 sigset_t sigalrm;
383 sigemptyset(&sigalrm);
384 sigaddset(&sigalrm, SIGALRM);
279c9e03 385 xsigprocmask(SIG_BLOCK, &sigalrm, oldsigs);
064af421
BP
386}
387
388static void
389unblock_sigalrm(const sigset_t *oldsigs)
390{
279c9e03 391 xsigprocmask(SIG_SETMASK, oldsigs, NULL);
064af421
BP
392}
393
c73814a3
JG
394long long int
395timespec_to_msec(const struct timespec *ts)
396{
397 return (long long int) ts->tv_sec * 1000 + ts->tv_nsec / (1000 * 1000);
398}
399
e7cfedd6 400long long int
064af421
BP
401timeval_to_msec(const struct timeval *tv)
402{
403 return (long long int) tv->tv_sec * 1000 + tv->tv_usec / 1000;
404}
405
4ae90ff9
BP
406/* Returns the monotonic time at which the "time" module was initialized, in
407 * milliseconds(). */
408long long int
409time_boot_msec(void)
410{
411 time_init();
412 return boot_time;
413}
414
279c9e03
BP
415void
416xgettimeofday(struct timeval *tv)
417{
418 if (gettimeofday(tv, NULL) == -1) {
419 VLOG_FATAL("gettimeofday failed (%s)", strerror(errno));
420 }
421}
422
064af421
BP
423static long long int
424timeval_diff_msec(const struct timeval *a, const struct timeval *b)
425{
426 return timeval_to_msec(a) - timeval_to_msec(b);
427}
428
6197af6e
BP
429static void
430timespec_add(struct timespec *sum,
431 const struct timespec *a,
432 const struct timespec *b)
433{
434 struct timespec tmp;
435
436 tmp.tv_sec = a->tv_sec + b->tv_sec;
437 tmp.tv_nsec = a->tv_nsec + b->tv_nsec;
438 if (tmp.tv_nsec >= 1000 * 1000 * 1000) {
439 tmp.tv_nsec -= 1000 * 1000 * 1000;
440 tmp.tv_sec++;
441 }
442
443 *sum = tmp;
444}
445
064af421 446static void
959ec62e 447log_poll_interval(long long int last_wakeup)
064af421
BP
448{
449 static unsigned int mean_interval; /* In 16ths of a millisecond. */
450 static unsigned int n_samples;
451
452 long long int now;
453 unsigned int interval; /* In 16ths of a millisecond. */
454
455 /* Compute interval from last wakeup to now in 16ths of a millisecond,
456 * capped at 10 seconds (16000 in this unit). */
457 now = time_msec();
458 interval = MIN(10000, now - last_wakeup) << 4;
459
14865427
BP
460 /* Warn if we took too much time between polls: at least 50 ms and at least
461 * 8X the mean interval. */
462 if (n_samples > 10 && interval > mean_interval * 8 && interval > 50 * 16) {
959ec62e 463 const struct rusage *last_rusage = get_recent_rusage();
064af421
BP
464 struct rusage rusage;
465
466 getrusage(RUSAGE_SELF, &rusage);
ea8cd10d 467 VLOG_WARN("%lld ms poll interval (%lld ms user, %lld ms system) "
064af421
BP
468 "is over %u times the weighted mean interval %u ms "
469 "(%u samples)",
ea8cd10d 470 now - last_wakeup,
064af421
BP
471 timeval_diff_msec(&rusage.ru_utime, &last_rusage->ru_utime),
472 timeval_diff_msec(&rusage.ru_stime, &last_rusage->ru_stime),
473 interval / mean_interval,
474 (mean_interval + 8) / 16, n_samples);
475 if (rusage.ru_minflt > last_rusage->ru_minflt
476 || rusage.ru_majflt > last_rusage->ru_majflt) {
477 VLOG_WARN("faults: %ld minor, %ld major",
478 rusage.ru_minflt - last_rusage->ru_minflt,
479 rusage.ru_majflt - last_rusage->ru_majflt);
480 }
481 if (rusage.ru_inblock > last_rusage->ru_inblock
482 || rusage.ru_oublock > last_rusage->ru_oublock) {
483 VLOG_WARN("disk: %ld reads, %ld writes",
484 rusage.ru_inblock - last_rusage->ru_inblock,
485 rusage.ru_oublock - last_rusage->ru_oublock);
486 }
487 if (rusage.ru_nvcsw > last_rusage->ru_nvcsw
488 || rusage.ru_nivcsw > last_rusage->ru_nivcsw) {
489 VLOG_WARN("context switches: %ld voluntary, %ld involuntary",
490 rusage.ru_nvcsw - last_rusage->ru_nvcsw,
491 rusage.ru_nivcsw - last_rusage->ru_nivcsw);
492 }
6bc995e4 493
d295e8e9
JP
494 /* Care should be taken in the value chosen for logging. Depending
495 * on the configuration, syslog can write changes synchronously,
496 * which can cause the coverage messages to take longer to log
6bc995e4
JP
497 * than the processing delay that triggered it. */
498 coverage_log(VLL_INFO, true);
064af421
BP
499 }
500
501 /* Update exponentially weighted moving average. With these parameters, a
502 * given value decays to 1% of its value in about 100 time steps. */
503 if (n_samples++) {
504 mean_interval = (mean_interval * 122 + interval * 6 + 64) / 128;
505 } else {
506 mean_interval = interval;
507 }
508}
959ec62e
BP
509\f
510/* CPU usage tracking. */
511
512struct cpu_usage {
513 long long int when; /* Time that this sample was taken. */
514 unsigned long long int cpu; /* Total user+system CPU usage when sampled. */
515};
516
517static struct rusage recent_rusage;
518static struct cpu_usage older = { LLONG_MIN, 0 };
519static struct cpu_usage newer = { LLONG_MIN, 0 };
520static int cpu_usage = -1;
521
522static struct rusage *
523get_recent_rusage(void)
524{
525 return &recent_rusage;
526}
527
528static void
529refresh_rusage(void)
530{
531 long long int now;
532
533 now = time_msec();
534 getrusage(RUSAGE_SELF, &recent_rusage);
535
536 if (now >= newer.when + 3 * 1000) {
537 older = newer;
538 newer.when = now;
539 newer.cpu = (timeval_to_msec(&recent_rusage.ru_utime) +
540 timeval_to_msec(&recent_rusage.ru_stime));
541
542 if (older.when != LLONG_MIN && newer.cpu > older.cpu) {
543 unsigned int dividend = newer.cpu - older.cpu;
544 unsigned int divisor = (newer.when - older.when) / 100;
545 cpu_usage = divisor > 0 ? dividend / divisor : -1;
546 } else {
547 cpu_usage = -1;
548 }
549 }
550}
551
552/* Returns an estimate of this process's CPU usage, as a percentage, over the
553 * past few seconds of wall-clock time. Returns -1 if no estimate is available
554 * (which will happen if the process has not been running long enough to have
555 * an estimate, and can happen for other reasons as well). */
556int
557get_cpu_usage(void)
558{
559 return cpu_usage;
560}
6197af6e
BP
561\f
562/* Unixctl interface. */
563
564static void
565timeval_warp_cb(struct unixctl_conn *conn,
566 int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED)
567{
568 struct timespec ts;
569 int msecs;
570
571 msecs = atoi(argv[1]);
572 if (msecs <= 0) {
bde9f75d 573 unixctl_command_reply_error(conn, "invalid MSECS");
6197af6e
BP
574 return;
575 }
576
577 ts.tv_sec = msecs / 1000;
578 ts.tv_nsec = (msecs % 1000) * 1000 * 1000;
579 timespec_add(&warp_offset, &warp_offset, &ts);
bde9f75d 580 unixctl_command_reply(conn, "warped");
6197af6e
BP
581}
582
583void
584timeval_dummy_register(void)
585{
586 unixctl_command_register("time/warp", "MSECS", 1, 1,
587 timeval_warp_cb, NULL);
588}