]> git.proxmox.com Git - ovs.git/blame - vswitchd/system-stats.c
bridge: Fix remote_opstate bug recently introduced.
[ovs.git] / vswitchd / system-stats.c
CommitLineData
57c8677b 1/* Copyright (c) 2010, 2012 Nicira, Inc.
ce887677
BP
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include <config.h>
17
57c8677b
BP
18#include "system-stats.h"
19
ce887677
BP
20#include <ctype.h>
21#include <dirent.h>
22#include <errno.h>
23#if HAVE_MNTENT_H
24#include <mntent.h>
25#endif
ce887677
BP
26#include <stdint.h>
27#include <stdio.h>
28#include <stdlib.h>
29#if HAVE_SYS_STATVFS_H
30#include <sys/statvfs.h>
31#endif
32#include <unistd.h>
33
34#include "daemon.h"
35#include "dirs.h"
36#include "dynamic-string.h"
35a22d8c
BP
37#include "json.h"
38#include "ofpbuf.h"
39#include "poll-loop.h"
ce887677 40#include "shash.h"
57c8677b 41#include "smap.h"
ce887677
BP
42#include "timeval.h"
43#include "vlog.h"
35a22d8c 44#include "worker.h"
ce887677 45
d98e6007 46VLOG_DEFINE_THIS_MODULE(system_stats);
ce887677
BP
47
48/* #ifdefs make it a pain to maintain code: you have to try to build both ways.
49 * Thus, this file tries to compile as much of the code as possible regardless
361906b1
EJ
50 * of the target, by writing "if (LINUX_DATAPATH)" instead of "#ifdef
51 * __linux__" where this is possible. */
52#ifdef LINUX_DATAPATH
ce887677 53#include <asm/param.h>
ce887677 54#else
361906b1 55#define LINUX_DATAPATH 0
ce887677
BP
56#endif
57
58static void
57c8677b 59get_cpu_cores(struct smap *stats)
ce887677
BP
60{
61 long int n_cores = sysconf(_SC_NPROCESSORS_ONLN);
62 if (n_cores > 0) {
57c8677b 63 smap_add_format(stats, "cpu", "%ld", n_cores);
ce887677
BP
64 }
65}
66
67static void
57c8677b 68get_load_average(struct smap *stats OVS_UNUSED)
ce887677
BP
69{
70#if HAVE_GETLOADAVG
71 double loadavg[3];
72
73 if (getloadavg(loadavg, 3) == 3) {
57c8677b
BP
74 smap_add_format(stats, "load_average", "%.2f,%.2f,%.2f",
75 loadavg[0], loadavg[1], loadavg[2]);
ce887677
BP
76 }
77#endif
78}
79
80static unsigned int
81get_page_size(void)
82{
83 static unsigned int cached;
84
85 if (!cached) {
86 long int value = sysconf(_SC_PAGESIZE);
87 if (value >= 0) {
88 cached = value;
89 }
90 }
91
92 return cached;
93}
94
95static void
57c8677b 96get_memory_stats(struct smap *stats)
ce887677 97{
361906b1 98 if (!LINUX_DATAPATH) {
ce887677
BP
99 unsigned int pagesize = get_page_size();
100 long int phys_pages = sysconf(_SC_PHYS_PAGES);
6ca00f6f 101#ifdef _SC_AVPHYS_PAGES
ce887677 102 long int avphys_pages = sysconf(_SC_AVPHYS_PAGES);
6ca00f6f
ETN
103#else
104 long int avphys_pages = 0;
105#endif
ce887677
BP
106 int mem_total, mem_used;
107
108 if (pagesize <= 0 || phys_pages <= 0 || avphys_pages <= 0) {
109 return;
110 }
111
112 mem_total = phys_pages * (pagesize / 1024);
113 mem_used = (phys_pages - avphys_pages) * (pagesize / 1024);
57c8677b 114 smap_add_format(stats, "memory", "%d,%d", mem_total, mem_used);
ce887677
BP
115 } else {
116 static const char file_name[] = "/proc/meminfo";
117 int mem_used, mem_cache, swap_used;
118 int mem_free = 0;
119 int buffers = 0;
120 int cached = 0;
121 int swap_free = 0;
122 int mem_total = 0;
123 int swap_total = 0;
124 struct shash dict;
125 char line[128];
126 FILE *stream;
127
128 stream = fopen(file_name, "r");
129 if (!stream) {
130 VLOG_WARN_ONCE("%s: open failed (%s)", file_name, strerror(errno));
131 return;
132 }
133
134 shash_init(&dict);
135 shash_add(&dict, "MemTotal", &mem_total);
136 shash_add(&dict, "MemFree", &mem_free);
137 shash_add(&dict, "Buffers", &buffers);
138 shash_add(&dict, "Cached", &cached);
139 shash_add(&dict, "SwapTotal", &swap_total);
140 shash_add(&dict, "SwapFree", &swap_free);
141 while (fgets(line, sizeof line, stream)) {
142 char key[16];
143 int value;
144
145 if (sscanf(line, "%15[^:]: %u", key, &value) == 2) {
146 int *valuep = shash_find_data(&dict, key);
147 if (valuep) {
148 *valuep = value;
149 }
150 }
151 }
152 fclose(stream);
153 shash_destroy(&dict);
154
155 mem_used = mem_total - mem_free;
156 mem_cache = buffers + cached;
157 swap_used = swap_total - swap_free;
57c8677b
BP
158 smap_add_format(stats, "memory", "%d,%d,%d,%d,%d",
159 mem_total, mem_used, mem_cache, swap_total, swap_used);
ce887677
BP
160 }
161}
162
163/* Returns the time at which the system booted, as the number of milliseconds
164 * since the epoch, or 0 if the time of boot cannot be determined. */
165static long long int
166get_boot_time(void)
167{
168 static long long int cache_expiration = LLONG_MIN;
169 static long long int boot_time;
170
cb22974d 171 ovs_assert(LINUX_DATAPATH);
ce887677
BP
172
173 if (time_msec() >= cache_expiration) {
174 static const char stat_file[] = "/proc/stat";
175 char line[128];
176 FILE *stream;
177
178 cache_expiration = time_msec() + 5 * 1000;
179
180 stream = fopen(stat_file, "r");
181 if (!stream) {
182 VLOG_ERR_ONCE("%s: open failed (%s)", stat_file, strerror(errno));
183 return boot_time;
184 }
185
186 while (fgets(line, sizeof line, stream)) {
187 long long int btime;
188 if (sscanf(line, "btime %lld", &btime) == 1) {
189 boot_time = btime * 1000;
190 goto done;
191 }
192 }
193 VLOG_ERR_ONCE("%s: btime not found", stat_file);
194 done:
195 fclose(stream);
196 }
197 return boot_time;
198}
199
200static unsigned long long int
201ticks_to_ms(unsigned long long int ticks)
202{
cb22974d 203 ovs_assert(LINUX_DATAPATH);
ce887677
BP
204
205#ifndef USER_HZ
206#define USER_HZ 100
207#endif
208
209#if USER_HZ == 100 /* Common case. */
210 return ticks * (1000 / USER_HZ);
211#else /* Alpha and some other architectures. */
212 double factor = 1000.0 / USER_HZ;
213 return ticks * factor + 0.5;
214#endif
215}
216
217struct raw_process_info {
218 unsigned long int vsz; /* Virtual size, in kB. */
219 unsigned long int rss; /* Resident set size, in kB. */
220 long long int uptime; /* ms since started. */
221 long long int cputime; /* ms of CPU used during 'uptime'. */
222 pid_t ppid; /* Parent. */
223 char name[18]; /* Name (surrounded by parentheses). */
224};
225
226static bool
227get_raw_process_info(pid_t pid, struct raw_process_info *raw)
228{
229 unsigned long long int vsize, rss, start_time, utime, stime;
230 long long int start_msec;
231 unsigned long ppid;
232 char file_name[128];
233 FILE *stream;
234 int n;
235
cb22974d 236 ovs_assert(LINUX_DATAPATH);
ce887677
BP
237
238 sprintf(file_name, "/proc/%lu/stat", (unsigned long int) pid);
239 stream = fopen(file_name, "r");
240 if (!stream) {
241 VLOG_ERR_ONCE("%s: open failed (%s)", file_name, strerror(errno));
242 return false;
243 }
244
245 n = fscanf(stream,
246 "%*d " /* (1. pid) */
247 "%17s " /* 2. process name */
248 "%*c " /* (3. state) */
249 "%lu " /* 4. ppid */
250 "%*d " /* (5. pgid) */
251 "%*d " /* (6. sid) */
252 "%*d " /* (7. tty_nr) */
253 "%*d " /* (8. tty_pgrp) */
254 "%*u " /* (9. flags) */
255 "%*u " /* (10. min_flt) */
256 "%*u " /* (11. cmin_flt) */
257 "%*u " /* (12. maj_flt) */
258 "%*u " /* (13. cmaj_flt) */
259 "%llu " /* 14. utime */
260 "%llu " /* 15. stime */
261 "%*d " /* (16. cutime) */
262 "%*d " /* (17. cstime) */
263 "%*d " /* (18. priority) */
264 "%*d " /* (19. nice) */
265 "%*d " /* (20. num_threads) */
266 "%*d " /* (21. always 0) */
267 "%llu " /* 22. start_time */
268 "%llu " /* 23. vsize */
269 "%llu " /* 24. rss */
270#if 0
271 /* These are here for documentation but #if'd out to save
272 * actually parsing them from the stream for no benefit. */
273 "%*lu " /* (25. rsslim) */
274 "%*lu " /* (26. start_code) */
275 "%*lu " /* (27. end_code) */
276 "%*lu " /* (28. start_stack) */
277 "%*lu " /* (29. esp) */
278 "%*lu " /* (30. eip) */
279 "%*lu " /* (31. pending signals) */
280 "%*lu " /* (32. blocked signals) */
281 "%*lu " /* (33. ignored signals) */
282 "%*lu " /* (34. caught signals) */
283 "%*lu " /* (35. whcan) */
284 "%*lu " /* (36. always 0) */
285 "%*lu " /* (37. always 0) */
286 "%*d " /* (38. exit_signal) */
287 "%*d " /* (39. task_cpu) */
288 "%*u " /* (40. rt_priority) */
289 "%*u " /* (41. policy) */
290 "%*llu " /* (42. blkio_ticks) */
291 "%*lu " /* (43. gtime) */
292 "%*ld" /* (44. cgtime) */
293#endif
294 , raw->name, &ppid, &utime, &stime, &start_time, &vsize, &rss);
295 fclose(stream);
296 if (n != 7) {
297 VLOG_ERR_ONCE("%s: fscanf failed", file_name);
298 return false;
299 }
300
301 start_msec = get_boot_time() + ticks_to_ms(start_time);
302
303 raw->vsz = vsize / 1024;
304 raw->rss = rss * (getpagesize() / 1024);
305 raw->uptime = time_wall_msec() - start_msec;
306 raw->cputime = ticks_to_ms(utime + stime);
307 raw->ppid = ppid;
308
309 return true;
310}
311
312static int
313count_crashes(pid_t pid)
314{
315 char file_name[128];
316 const char *paren;
317 char line[128];
318 int crashes = 0;
319 FILE *stream;
320
cb22974d 321 ovs_assert(LINUX_DATAPATH);
ce887677
BP
322
323 sprintf(file_name, "/proc/%lu/cmdline", (unsigned long int) pid);
324 stream = fopen(file_name, "r");
325 if (!stream) {
326 VLOG_WARN_ONCE("%s: open failed (%s)", file_name, strerror(errno));
327 goto exit;
328 }
329
330 if (!fgets(line, sizeof line, stream)) {
331 VLOG_WARN_ONCE("%s: read failed (%s)", file_name,
332 feof(stream) ? "end of file" : strerror(errno));
333 goto exit_close;
334 }
335
336 paren = strchr(line, '(');
337 if (paren) {
338 int x;
339 if (sscanf(paren + 1, "%d", &x) == 1) {
340 crashes = x;
341 }
342 }
343
344exit_close:
345 fclose(stream);
346exit:
347 return crashes;
348}
349
350struct process_info {
351 unsigned long int vsz; /* Virtual size, in kB. */
352 unsigned long int rss; /* Resident set size, in kB. */
353 long long int booted; /* ms since monitor started. */
354 int crashes; /* # of crashes (usually 0). */
355 long long int uptime; /* ms since last (re)started by monitor. */
356 long long int cputime; /* ms of CPU used during 'uptime'. */
357};
358
359static bool
360get_process_info(pid_t pid, struct process_info *pinfo)
361{
362 struct raw_process_info child;
363
cb22974d 364 ovs_assert(LINUX_DATAPATH);
ce887677
BP
365 if (!get_raw_process_info(pid, &child)) {
366 return false;
367 }
368
369 pinfo->vsz = child.vsz;
370 pinfo->rss = child.rss;
371 pinfo->booted = child.uptime;
372 pinfo->crashes = 0;
373 pinfo->uptime = child.uptime;
374 pinfo->cputime = child.cputime;
375
376 if (child.ppid) {
377 struct raw_process_info parent;
378
379 get_raw_process_info(child.ppid, &parent);
380 if (!strcmp(child.name, parent.name)) {
381 pinfo->booted = parent.uptime;
382 pinfo->crashes = count_crashes(child.ppid);
383 }
384 }
385
386 return true;
387}
388
389static void
57c8677b 390get_process_stats(struct smap *stats)
ce887677
BP
391{
392 struct dirent *de;
393 DIR *dir;
394
b43c6fe2 395 dir = opendir(ovs_rundir());
ce887677 396 if (!dir) {
b43c6fe2 397 VLOG_ERR_ONCE("%s: open failed (%s)", ovs_rundir(), strerror(errno));
ce887677
BP
398 return;
399 }
400
401 while ((de = readdir(dir)) != NULL) {
402 struct process_info pinfo;
ce887677
BP
403 char *file_name;
404 char *extension;
57c8677b 405 char *key;
ce887677
BP
406 pid_t pid;
407
408#ifdef _DIRENT_HAVE_D_TYPE
409 if (de->d_type != DT_UNKNOWN && de->d_type != DT_REG) {
410 continue;
411 }
412#endif
413
414 extension = strrchr(de->d_name, '.');
415 if (!extension || strcmp(extension, ".pid")) {
416 continue;
417 }
418
b43c6fe2 419 file_name = xasprintf("%s/%s", ovs_rundir(), de->d_name);
ce887677
BP
420 pid = read_pidfile(file_name);
421 free(file_name);
8da7ca87 422 if (pid < 0) {
ce887677
BP
423 continue;
424 }
425
426 key = xasprintf("process_%.*s",
427 (int) (extension - de->d_name), de->d_name);
57c8677b 428 if (!smap_get(stats, key)) {
361906b1 429 if (LINUX_DATAPATH && get_process_info(pid, &pinfo)) {
57c8677b
BP
430 smap_add_format(stats, key, "%lu,%lu,%lld,%d,%lld,%lld",
431 pinfo.vsz, pinfo.rss, pinfo.cputime,
432 pinfo.crashes, pinfo.booted, pinfo.uptime);
433 } else {
434 smap_add(stats, key, "");
435 }
ce887677 436 }
57c8677b 437 free(key);
ce887677
BP
438 }
439
440 closedir(dir);
441}
442
443static void
57c8677b 444get_filesys_stats(struct smap *stats OVS_UNUSED)
ce887677
BP
445{
446#if HAVE_SETMNTENT && HAVE_STATVFS
447 static const char file_name[] = "/etc/mtab";
448 struct mntent *me;
449 FILE *stream;
450 struct ds s;
451
452 stream = setmntent(file_name, "r");
453 if (!stream) {
454 VLOG_ERR_ONCE("%s: open failed (%s)", file_name, strerror(errno));
455 return;
456 }
457
458 ds_init(&s);
459 while ((me = getmntent(stream)) != NULL) {
460 unsigned long long int total, free;
461 struct statvfs vfs;
462 char *p;
463
464 /* Skip non-local and read-only filesystems. */
465 if (strncmp(me->mnt_fsname, "/dev", 4)
466 || !strstr(me->mnt_opts, "rw")) {
467 continue;
468 }
469
470 /* Given the mount point we can stat the file system. */
471 if (statvfs(me->mnt_dir, &vfs) && vfs.f_flag & ST_RDONLY) {
472 /* That's odd... */
473 continue;
474 }
475
476 /* Now format the data. */
477 if (s.length) {
478 ds_put_char(&s, ' ');
479 }
480 for (p = me->mnt_dir; *p != '\0'; p++) {
481 ds_put_char(&s, *p == ' ' || *p == ',' ? '_' : *p);
482 }
483 total = (unsigned long long int) vfs.f_frsize * vfs.f_blocks / 1024;
484 free = (unsigned long long int) vfs.f_frsize * vfs.f_bfree / 1024;
485 ds_put_format(&s, ",%llu,%llu", total, total - free);
486 }
487 endmntent(stream);
488
489 if (s.length) {
57c8677b 490 smap_add(stats, "file_systems", ds_cstr(&s));
ce887677
BP
491 }
492 ds_destroy(&s);
493#endif /* HAVE_SETMNTENT && HAVE_STATVFS */
494}
35a22d8c
BP
495\f
496#define SYSTEM_STATS_INTERVAL (5 * 1000) /* In milliseconds. */
ce887677 497
35a22d8c
BP
498/* Whether the client wants us to report system stats. */
499static bool enabled;
500
501static enum {
502 S_DISABLED, /* Not enabled, nothing going on. */
503 S_WAITING, /* Sleeping for SYSTEM_STATS_INTERVAL ms. */
504 S_REQUEST_SENT, /* Sent a request to worker. */
505 S_REPLY_RECEIVED /* Received a reply from worker. */
506} state;
507
508/* In S_WAITING state: the next time to wake up.
509 * In other states: not meaningful. */
510static long long int next_refresh;
511
512/* In S_REPLY_RECEIVED: the stats that have just been received.
513 * In other states: not meaningful. */
514static struct smap *received_stats;
515
516static worker_request_func system_stats_request_cb;
517static worker_reply_func system_stats_reply_cb;
518
519/* Enables or disables system stats collection, according to 'new_enable'.
520 *
521 * Even if system stats are disabled, the caller should still periodically call
522 * system_stats_run(). */
ce887677 523void
35a22d8c
BP
524system_stats_enable(bool new_enable)
525{
526 if (new_enable != enabled) {
527 if (new_enable) {
528 if (state == S_DISABLED) {
529 state = S_WAITING;
530 next_refresh = time_msec();
531 }
532 } else {
533 if (state == S_WAITING) {
534 state = S_DISABLED;
535 }
536 }
537 enabled = new_enable;
538 }
539}
540
541/* Tries to obtain a new snapshot of system stats every SYSTEM_STATS_INTERVAL
542 * milliseconds.
543 *
544 * When a new snapshot is available (which only occurs if system stats are
545 * enabled), returns it as an smap owned by the caller. The caller must use
546 * both smap_destroy() and free() to complete free the returned data.
547 *
548 * When no new snapshot is available, returns NULL. */
549struct smap *
550system_stats_run(void)
ce887677 551{
35a22d8c
BP
552 switch (state) {
553 case S_DISABLED:
554 break;
555
556 case S_WAITING:
557 if (time_msec() >= next_refresh) {
558 worker_request(NULL, 0, NULL, 0, system_stats_request_cb,
559 system_stats_reply_cb, NULL);
560 state = S_REQUEST_SENT;
561 }
562 break;
563
564 case S_REQUEST_SENT:
565 break;
566
567 case S_REPLY_RECEIVED:
568 if (enabled) {
569 state = S_WAITING;
570 next_refresh = time_msec() + SYSTEM_STATS_INTERVAL;
571 return received_stats;
572 } else {
573 smap_destroy(received_stats);
574 free(received_stats);
575 state = S_DISABLED;
576 }
577 break;
578 }
579
580 return NULL;
581}
582
583/* Causes poll_block() to wake up when system_stats_run() needs to be
584 * called. */
585void
586system_stats_wait(void)
587{
588 switch (state) {
589 case S_DISABLED:
590 break;
591
592 case S_WAITING:
593 poll_timer_wait_until(next_refresh);
594 break;
595
596 case S_REQUEST_SENT:
597 /* Someone else should be calling worker_wait() to wake up when the
598 * reply arrives, otherwise there's a bug. */
599 break;
600
601 case S_REPLY_RECEIVED:
602 poll_immediate_wake();
603 break;
604 }
605}
606
607static void
608system_stats_request_cb(struct ofpbuf *request OVS_UNUSED,
609 const int fds[] OVS_UNUSED, size_t n_fds OVS_UNUSED)
610{
611 struct smap stats;
612 struct json *json;
613 char *s;
614
615 smap_init(&stats);
616 get_cpu_cores(&stats);
617 get_load_average(&stats);
618 get_memory_stats(&stats);
619 get_process_stats(&stats);
620 get_filesys_stats(&stats);
621
622 json = smap_to_json(&stats);
623 s = json_to_string(json, 0);
624 worker_reply(s, strlen(s) + 1, NULL, 0);
625
626 free(s);
627 json_destroy(json);
628 smap_destroy(&stats);
629}
630
631static void
632system_stats_reply_cb(struct ofpbuf *reply,
633 const int fds[] OVS_UNUSED, size_t n_fds OVS_UNUSED,
634 void *aux OVS_UNUSED)
635{
636 struct json *json = json_from_string(reply->data);
637
638 received_stats = xmalloc(sizeof *received_stats);
639 smap_init(received_stats);
640 smap_from_json(received_stats, json);
641
cb22974d 642 ovs_assert(state == S_REQUEST_SENT);
35a22d8c
BP
643 state = S_REPLY_RECEIVED;
644
645 json_destroy(json);
ce887677 646}