]> git.proxmox.com Git - systemd.git/blob - src/bootchart/store.c
Enable seccomp support on powerpc, ppc64el, and s390x
[systemd.git] / src / bootchart / store.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright (C) 2009-2013 Intel Corporation
7
8 Authors:
9 Auke Kok <auke-jan.h.kok@intel.com>
10
11 systemd is free software; you can redistribute it and/or modify it
12 under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 2.1 of the License, or
14 (at your option) any later version.
15
16 systemd is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
20
21 You should have received a copy of the GNU Lesser General Public License
22 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 ***/
24
25 #include <dirent.h>
26 #include <fcntl.h>
27 #include <limits.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <time.h>
32 #include <unistd.h>
33
34 #include "alloc-util.h"
35 #include "bootchart.h"
36 #include "cgroup-util.h"
37 #include "dirent-util.h"
38 #include "fd-util.h"
39 #include "fileio.h"
40 #include "parse-util.h"
41 #include "store.h"
42 #include "string-util.h"
43 #include "strxcpyx.h"
44 #include "time-util.h"
45 #include "util.h"
46
47 /*
48 * Alloc a static 4k buffer for stdio - primarily used to increase
49 * PSS buffering from the default 1k stdin buffer to reduce
50 * read() overhead.
51 */
52 static char smaps_buf[4096];
53 static int skip = 0;
54
55 double gettime_ns(void) {
56 struct timespec n;
57
58 clock_gettime(CLOCK_MONOTONIC, &n);
59
60 return (n.tv_sec + (n.tv_nsec / (double) NSEC_PER_SEC));
61 }
62
63 static char *bufgetline(char *buf) {
64 char *c;
65
66 if (!buf)
67 return NULL;
68
69 c = strchr(buf, '\n');
70 if (c)
71 c++;
72
73 return c;
74 }
75
76 static int pid_cmdline_strscpy(int procfd, char *buffer, size_t buf_len, int pid) {
77 char filename[PATH_MAX];
78 _cleanup_close_ int fd = -1;
79 ssize_t n;
80
81 sprintf(filename, "%d/cmdline", pid);
82 fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
83 if (fd < 0)
84 return -errno;
85
86 n = read(fd, buffer, buf_len-1);
87 if (n > 0) {
88 int i;
89 for (i = 0; i < n; i++)
90 if (buffer[i] == '\0')
91 buffer[i] = ' ';
92 buffer[n] = '\0';
93 }
94
95 return 0;
96 }
97
98 int log_sample(DIR *proc,
99 int sample,
100 struct ps_struct *ps_first,
101 struct list_sample_data **ptr,
102 int *pscount,
103 int *cpus) {
104
105 static int vmstat = -1;
106 _cleanup_free_ char *buf_schedstat = NULL;
107 char buf[4096];
108 char key[256];
109 char val[256];
110 char rt[256];
111 char wt[256];
112 char *m;
113 int r;
114 int c;
115 int p;
116 int mod;
117 static int e_fd = -1;
118 ssize_t s;
119 ssize_t n;
120 struct dirent *ent;
121 int fd;
122 struct list_sample_data *sampledata;
123 struct ps_sched_struct *ps_prev = NULL;
124 int procfd;
125 int taskfd = -1;
126
127 sampledata = *ptr;
128
129 procfd = dirfd(proc);
130 if (procfd < 0)
131 return -errno;
132
133 if (vmstat < 0) {
134 /* block stuff */
135 vmstat = openat(procfd, "vmstat", O_RDONLY|O_CLOEXEC);
136 if (vmstat < 0)
137 return log_error_errno(errno, "Failed to open /proc/vmstat: %m");
138 }
139
140 n = pread(vmstat, buf, sizeof(buf) - 1, 0);
141 if (n <= 0) {
142 vmstat = safe_close(vmstat);
143 if (n < 0)
144 return -errno;
145 return -ENODATA;
146 }
147
148 buf[n] = '\0';
149
150 m = buf;
151 while (m) {
152 if (sscanf(m, "%s %s", key, val) < 2)
153 goto vmstat_next;
154 if (streq(key, "pgpgin"))
155 sampledata->blockstat.bi = atoi(val);
156 if (streq(key, "pgpgout")) {
157 sampledata->blockstat.bo = atoi(val);
158 break;
159 }
160 vmstat_next:
161 m = bufgetline(m);
162 if (!m)
163 break;
164 }
165
166 /* Parse "/proc/schedstat" for overall CPU utilization */
167 r = read_full_file("/proc/schedstat", &buf_schedstat, NULL);
168 if (r < 0)
169 return log_error_errno(r, "Unable to read schedstat: %m");
170
171 m = buf_schedstat;
172 while (m) {
173 if (sscanf(m, "%s %*s %*s %*s %*s %*s %*s %s %s", key, rt, wt) < 3)
174 goto schedstat_next;
175
176 if (strstr(key, "cpu")) {
177 r = safe_atoi((const char*)(key+3), &c);
178 if (r < 0 || c > MAXCPUS -1)
179 /* Oops, we only have room for MAXCPUS data */
180 break;
181 sampledata->runtime[c] = atoll(rt);
182 sampledata->waittime[c] = atoll(wt);
183
184 if (c == *cpus)
185 *cpus = c + 1;
186 }
187 schedstat_next:
188 m = bufgetline(m);
189 if (!m)
190 break;
191 }
192
193 if (arg_entropy) {
194 if (e_fd < 0) {
195 e_fd = openat(procfd, "sys/kernel/random/entropy_avail", O_RDONLY|O_CLOEXEC);
196 if (e_fd < 0)
197 return log_error_errno(errno, "Failed to open /proc/sys/kernel/random/entropy_avail: %m");
198 }
199
200 n = pread(e_fd, buf, sizeof(buf) - 1, 0);
201 if (n <= 0) {
202 e_fd = safe_close(e_fd);
203 } else {
204 buf[n] = '\0';
205 sampledata->entropy_avail = atoi(buf);
206 }
207 }
208
209 while ((ent = readdir(proc)) != NULL) {
210 char filename[PATH_MAX];
211 int pid;
212 struct ps_struct *ps;
213
214 if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9'))
215 continue;
216
217 pid = atoi(ent->d_name);
218
219 if (pid >= MAXPIDS)
220 continue;
221
222 ps = ps_first;
223 while (ps->next_ps) {
224 ps = ps->next_ps;
225 if (ps->pid == pid)
226 break;
227 }
228
229 /* end of our LL? then append a new record */
230 if (ps->pid != pid) {
231 _cleanup_fclose_ FILE *st = NULL;
232 char t[32];
233 struct ps_struct *parent;
234
235 ps->next_ps = new0(struct ps_struct, 1);
236 if (!ps->next_ps)
237 return log_oom();
238
239 ps = ps->next_ps;
240 ps->pid = pid;
241 ps->sched = -1;
242 ps->schedstat = -1;
243
244 ps->sample = new0(struct ps_sched_struct, 1);
245 if (!ps->sample)
246 return log_oom();
247
248 ps->sample->sampledata = sampledata;
249
250 (*pscount)++;
251
252 /* mark our first sample */
253 ps->first = ps->last = ps->sample;
254 ps->sample->runtime = atoll(rt);
255 ps->sample->waittime = atoll(wt);
256
257 /* get name, start time */
258 if (ps->sched < 0) {
259 sprintf(filename, "%d/sched", pid);
260 ps->sched = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
261 if (ps->sched < 0)
262 continue;
263 }
264
265 s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
266 if (s <= 0) {
267 ps->sched = safe_close(ps->sched);
268 continue;
269 }
270 buf[s] = '\0';
271
272 if (!sscanf(buf, "%s %*s %*s", key))
273 continue;
274
275 strscpy(ps->name, sizeof(ps->name), key);
276
277 /* cmdline */
278 if (arg_show_cmdline)
279 pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid);
280
281 /* discard line 2 */
282 m = bufgetline(buf);
283 if (!m)
284 continue;
285
286 m = bufgetline(m);
287 if (!m)
288 continue;
289
290 if (!sscanf(m, "%*s %*s %s", t))
291 continue;
292
293 r = safe_atod(t, &ps->starttime);
294 if (r < 0)
295 continue;
296
297 ps->starttime /= 1000.0;
298
299 if (arg_show_cgroup)
300 /* if this fails, that's OK */
301 cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER,
302 ps->pid, &ps->cgroup);
303
304 /* ppid */
305 sprintf(filename, "%d/stat", pid);
306 fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
307 if (fd < 0)
308 continue;
309
310 st = fdopen(fd, "re");
311 if (!st) {
312 close(fd);
313 continue;
314 }
315
316 if (!fscanf(st, "%*s %*s %*s %i", &p))
317 continue;
318
319 ps->ppid = p;
320
321 /*
322 * setup child pointers
323 *
324 * these are used to paint the tree coherently later
325 * each parent has a LL of children, and a LL of siblings
326 */
327 if (pid == 1)
328 continue; /* nothing to do for init atm */
329
330 /* kthreadd has ppid=0, which breaks our tree ordering */
331 if (ps->ppid == 0)
332 ps->ppid = 1;
333
334 parent = ps_first;
335 while ((parent->next_ps && parent->pid != ps->ppid))
336 parent = parent->next_ps;
337
338 if (parent->pid != ps->ppid) {
339 /* orphan */
340 ps->ppid = 1;
341 parent = ps_first->next_ps;
342 }
343
344 ps->parent = parent;
345
346 if (!parent->children) {
347 /* it's the first child */
348 parent->children = ps;
349 } else {
350 /* walk all children and append */
351 struct ps_struct *children;
352 children = parent->children;
353 while (children->next)
354 children = children->next;
355
356 children->next = ps;
357 }
358 }
359
360 /* else -> found pid, append data in ps */
361
362 /* below here is all continuous logging parts - we get here on every
363 * iteration */
364
365 /* rt, wt */
366 if (ps->schedstat < 0) {
367 sprintf(filename, "%d/schedstat", pid);
368 ps->schedstat = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
369 if (ps->schedstat < 0)
370 continue;
371 }
372
373 s = pread(ps->schedstat, buf, sizeof(buf) - 1, 0);
374 if (s <= 0) {
375 /* clean up our file descriptors - assume that the process exited */
376 close(ps->schedstat);
377 ps->schedstat = -1;
378 ps->sched = safe_close(ps->sched);
379 continue;
380 }
381
382 buf[s] = '\0';
383
384 if (!sscanf(buf, "%s %s %*s", rt, wt))
385 continue;
386
387 ps->sample->next = new0(struct ps_sched_struct, 1);
388 if (!ps->sample->next)
389 return log_oom();
390
391 ps->sample->next->prev = ps->sample;
392 ps->sample = ps->sample->next;
393 ps->last = ps->sample;
394 ps->sample->runtime = atoll(rt);
395 ps->sample->waittime = atoll(wt);
396 ps->sample->sampledata = sampledata;
397 ps->sample->ps_new = ps;
398 if (ps_prev)
399 ps_prev->cross = ps->sample;
400
401 ps_prev = ps->sample;
402 ps->total = (ps->last->runtime - ps->first->runtime)
403 / 1000000000.0;
404
405 /* Take into account CPU runtime/waittime spent in non-main threads of the process
406 * by parsing "/proc/[pid]/task/[tid]/schedstat" for all [tid] != [pid]
407 * See https://github.com/systemd/systemd/issues/139
408 */
409
410 /* Browse directory "/proc/[pid]/task" to know the thread ids of process [pid] */
411 snprintf(filename, sizeof(filename), PID_FMT "/task", pid);
412 taskfd = openat(procfd, filename, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
413 if (taskfd >= 0) {
414 _cleanup_closedir_ DIR *taskdir = NULL;
415
416 taskdir = fdopendir(taskfd);
417 if (!taskdir) {
418 safe_close(taskfd);
419 return -errno;
420 }
421 FOREACH_DIRENT(ent, taskdir, break) {
422 int tid = -1;
423 _cleanup_close_ int tid_schedstat = -1;
424 long long delta_rt;
425 long long delta_wt;
426
427 if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9'))
428 continue;
429
430 /* Skip main thread as it was already accounted */
431 r = safe_atoi(ent->d_name, &tid);
432 if (r < 0 || tid == pid)
433 continue;
434
435 /* Parse "/proc/[pid]/task/[tid]/schedstat" */
436 snprintf(filename, sizeof(filename), PID_FMT "/schedstat", tid);
437 tid_schedstat = openat(taskfd, filename, O_RDONLY|O_CLOEXEC);
438
439 if (tid_schedstat == -1)
440 continue;
441
442 s = pread(tid_schedstat, buf, sizeof(buf) - 1, 0);
443 if (s <= 0)
444 continue;
445 buf[s] = '\0';
446
447 if (!sscanf(buf, "%s %s %*s", rt, wt))
448 continue;
449
450 r = safe_atolli(rt, &delta_rt);
451 if (r < 0)
452 continue;
453 r = safe_atolli(rt, &delta_wt);
454 if (r < 0)
455 continue;
456 ps->sample->runtime += delta_rt;
457 ps->sample->waittime += delta_wt;
458 }
459 }
460
461 if (!arg_pss)
462 goto catch_rename;
463
464 /* Pss */
465 if (!ps->smaps) {
466 sprintf(filename, "%d/smaps", pid);
467 fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
468 if (fd < 0)
469 continue;
470 ps->smaps = fdopen(fd, "re");
471 if (!ps->smaps) {
472 close(fd);
473 continue;
474 }
475 setvbuf(ps->smaps, smaps_buf, _IOFBF, sizeof(smaps_buf));
476 } else {
477 rewind(ps->smaps);
478 }
479
480 /* test to see if we need to skip another field */
481 if (skip == 0) {
482 if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
483 continue;
484 }
485 if (fread(buf, 1, 28 * 15, ps->smaps) != (28 * 15)) {
486 continue;
487 }
488 if (buf[392] == 'V') {
489 skip = 2;
490 }
491 else {
492 skip = 1;
493 }
494 rewind(ps->smaps);
495 }
496
497 while (1) {
498 int pss_kb;
499
500 /* skip one line, this contains the object mapped. */
501 if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
502 break;
503 }
504 /* then there's a 28 char 14 line block */
505 if (fread(buf, 1, 28 * 14, ps->smaps) != 28 * 14) {
506 break;
507 }
508 pss_kb = atoi(&buf[61]);
509 ps->sample->pss += pss_kb;
510
511 /* skip one more line if this is a newer kernel */
512 if (skip == 2) {
513 if (fgets(buf, sizeof(buf), ps->smaps) == NULL)
514 break;
515 }
516 }
517
518 if (ps->sample->pss > ps->pss_max)
519 ps->pss_max = ps->sample->pss;
520
521 catch_rename:
522 /* catch process rename, try to randomize time */
523 mod = (arg_hz < 4.0) ? 4.0 : (arg_hz / 4.0);
524 if (((sample - ps->pid) + pid) % (int)(mod) == 0) {
525
526 /* re-fetch name */
527 /* get name, start time */
528 if (ps->sched < 0) {
529 sprintf(filename, "%d/sched", pid);
530 ps->sched = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
531 if (ps->sched < 0)
532 continue;
533 }
534
535 s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
536 if (s <= 0) {
537 /* clean up file descriptors */
538 ps->sched = safe_close(ps->sched);
539 ps->schedstat = safe_close(ps->schedstat);
540 continue;
541 }
542
543 buf[s] = '\0';
544
545 if (!sscanf(buf, "%s %*s %*s", key))
546 continue;
547
548 strscpy(ps->name, sizeof(ps->name), key);
549
550 /* cmdline */
551 if (arg_show_cmdline)
552 pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid);
553 }
554 }
555
556 return 0;
557 }