]> git.proxmox.com Git - systemd.git/blame - src/bootchart/store.c
Imported Upstream version 229
[systemd.git] / src / bootchart / store.c
CommitLineData
663996b3
MS
1/***
2 This file is part of systemd.
3
14228c0d 4 Copyright (C) 2009-2013 Intel Corporation
663996b3
MS
5
6 Authors:
7 Auke Kok <auke-jan.h.kok@intel.com>
8
9 systemd is free software; you can redistribute it and/or modify it
10 under the terms of the GNU Lesser General Public License as published by
11 the Free Software Foundation; either version 2.1 of the License, or
12 (at your option) any later version.
13
14 systemd is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 Lesser General Public License for more details.
18
19 You should have received a copy of the GNU Lesser General Public License
20 along with systemd; If not, see <http://www.gnu.org/licenses/>.
21 ***/
22
db2df898
MP
23#include <dirent.h>
24#include <fcntl.h>
663996b3 25#include <limits.h>
663996b3 26#include <stdio.h>
db2df898 27#include <stdlib.h>
663996b3 28#include <string.h>
663996b3 29#include <time.h>
db2df898 30#include <unistd.h>
663996b3 31
db2df898 32#include "alloc-util.h"
663996b3 33#include "bootchart.h"
60f067b4 34#include "cgroup-util.h"
db2df898
MP
35#include "dirent-util.h"
36#include "fd-util.h"
fb183854 37#include "fileio.h"
db2df898
MP
38#include "parse-util.h"
39#include "store.h"
40#include "string-util.h"
41#include "strxcpyx.h"
42#include "time-util.h"
43#include "util.h"
663996b3
MS
44
45/*
46 * Alloc a static 4k buffer for stdio - primarily used to increase
47 * PSS buffering from the default 1k stdin buffer to reduce
48 * read() overhead.
49 */
50static char smaps_buf[4096];
51static int skip = 0;
663996b3
MS
52
53double gettime_ns(void) {
54 struct timespec n;
55
56 clock_gettime(CLOCK_MONOTONIC, &n);
57
5eef597e 58 return (n.tv_sec + (n.tv_nsec / (double) NSEC_PER_SEC));
663996b3
MS
59}
60
663996b3
MS
61static char *bufgetline(char *buf) {
62 char *c;
63
64 if (!buf)
65 return NULL;
66
67 c = strchr(buf, '\n');
68 if (c)
69 c++;
e3bff60a 70
663996b3
MS
71 return c;
72}
73
e3bff60a 74static int pid_cmdline_strscpy(int procfd, char *buffer, size_t buf_len, int pid) {
663996b3 75 char filename[PATH_MAX];
e3bff60a 76 _cleanup_close_ int fd = -1;
663996b3
MS
77 ssize_t n;
78
79 sprintf(filename, "%d/cmdline", pid);
e3bff60a 80 fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
663996b3
MS
81 if (fd < 0)
82 return -errno;
83
84 n = read(fd, buffer, buf_len-1);
85 if (n > 0) {
86 int i;
87 for (i = 0; i < n; i++)
88 if (buffer[i] == '\0')
89 buffer[i] = ' ';
90 buffer[n] = '\0';
91 }
e3bff60a 92
663996b3
MS
93 return 0;
94}
95
e3bff60a
MP
96int log_sample(DIR *proc,
97 int sample,
98 struct ps_struct *ps_first,
99 struct list_sample_data **ptr,
100 int *pscount,
101 int *cpus) {
102
103 static int vmstat = -1;
fb183854 104 _cleanup_free_ char *buf_schedstat = NULL;
663996b3
MS
105 char buf[4096];
106 char key[256];
107 char val[256];
108 char rt[256];
109 char wt[256];
110 char *m;
fb183854 111 int r;
663996b3
MS
112 int c;
113 int p;
114 int mod;
e3bff60a 115 static int e_fd = -1;
663996b3
MS
116 ssize_t s;
117 ssize_t n;
118 struct dirent *ent;
119 int fd;
120 struct list_sample_data *sampledata;
121 struct ps_sched_struct *ps_prev = NULL;
e3bff60a 122 int procfd;
fb183854 123 int taskfd = -1;
663996b3 124
663996b3
MS
125 sampledata = *ptr;
126
e3bff60a
MP
127 procfd = dirfd(proc);
128 if (procfd < 0)
129 return -errno;
663996b3 130
e3bff60a 131 if (vmstat < 0) {
663996b3 132 /* block stuff */
e3bff60a
MP
133 vmstat = openat(procfd, "vmstat", O_RDONLY|O_CLOEXEC);
134 if (vmstat < 0)
135 return log_error_errno(errno, "Failed to open /proc/vmstat: %m");
663996b3
MS
136 }
137
138 n = pread(vmstat, buf, sizeof(buf) - 1, 0);
139 if (n <= 0) {
e3bff60a
MP
140 vmstat = safe_close(vmstat);
141 if (n < 0)
142 return -errno;
143 return -ENODATA;
663996b3 144 }
e3bff60a 145
663996b3
MS
146 buf[n] = '\0';
147
148 m = buf;
149 while (m) {
150 if (sscanf(m, "%s %s", key, val) < 2)
151 goto vmstat_next;
152 if (streq(key, "pgpgin"))
153 sampledata->blockstat.bi = atoi(val);
154 if (streq(key, "pgpgout")) {
155 sampledata->blockstat.bo = atoi(val);
156 break;
157 }
158vmstat_next:
159 m = bufgetline(m);
160 if (!m)
161 break;
162 }
163
fb183854
MP
164 /* Parse "/proc/schedstat" for overall CPU utilization */
165 r = read_full_file("/proc/schedstat", &buf_schedstat, NULL);
166 if (r < 0)
167 return log_error_errno(r, "Unable to read schedstat: %m");
e3bff60a 168
fb183854 169 m = buf_schedstat;
663996b3
MS
170 while (m) {
171 if (sscanf(m, "%s %*s %*s %*s %*s %*s %*s %s %s", key, rt, wt) < 3)
172 goto schedstat_next;
173
174 if (strstr(key, "cpu")) {
5eef597e
MP
175 r = safe_atoi((const char*)(key+3), &c);
176 if (r < 0 || c > MAXCPUS -1)
663996b3
MS
177 /* Oops, we only have room for MAXCPUS data */
178 break;
179 sampledata->runtime[c] = atoll(rt);
180 sampledata->waittime[c] = atoll(wt);
181
e3bff60a
MP
182 if (c == *cpus)
183 *cpus = c + 1;
663996b3
MS
184 }
185schedstat_next:
186 m = bufgetline(m);
187 if (!m)
188 break;
189 }
190
191 if (arg_entropy) {
e3bff60a
MP
192 if (e_fd < 0) {
193 e_fd = openat(procfd, "sys/kernel/random/entropy_avail", O_RDONLY|O_CLOEXEC);
194 if (e_fd < 0)
195 return log_error_errno(errno, "Failed to open /proc/sys/kernel/random/entropy_avail: %m");
663996b3
MS
196 }
197
e3bff60a
MP
198 n = pread(e_fd, buf, sizeof(buf) - 1, 0);
199 if (n <= 0) {
200 e_fd = safe_close(e_fd);
201 } else {
202 buf[n] = '\0';
203 sampledata->entropy_avail = atoi(buf);
663996b3
MS
204 }
205 }
206
207 while ((ent = readdir(proc)) != NULL) {
208 char filename[PATH_MAX];
209 int pid;
210 struct ps_struct *ps;
211
212 if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9'))
213 continue;
214
215 pid = atoi(ent->d_name);
216
217 if (pid >= MAXPIDS)
218 continue;
219
220 ps = ps_first;
221 while (ps->next_ps) {
222 ps = ps->next_ps;
223 if (ps->pid == pid)
224 break;
225 }
226
227 /* end of our LL? then append a new record */
228 if (ps->pid != pid) {
229 _cleanup_fclose_ FILE *st = NULL;
230 char t[32];
231 struct ps_struct *parent;
232
60f067b4 233 ps->next_ps = new0(struct ps_struct, 1);
e3bff60a
MP
234 if (!ps->next_ps)
235 return log_oom();
236
663996b3
MS
237 ps = ps->next_ps;
238 ps->pid = pid;
e3bff60a
MP
239 ps->sched = -1;
240 ps->schedstat = -1;
663996b3 241
60f067b4 242 ps->sample = new0(struct ps_sched_struct, 1);
e3bff60a
MP
243 if (!ps->sample)
244 return log_oom();
245
663996b3
MS
246 ps->sample->sampledata = sampledata;
247
e3bff60a 248 (*pscount)++;
663996b3
MS
249
250 /* mark our first sample */
60f067b4 251 ps->first = ps->last = ps->sample;
663996b3
MS
252 ps->sample->runtime = atoll(rt);
253 ps->sample->waittime = atoll(wt);
254
255 /* get name, start time */
e3bff60a 256 if (ps->sched < 0) {
663996b3 257 sprintf(filename, "%d/sched", pid);
e3bff60a
MP
258 ps->sched = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
259 if (ps->sched < 0)
663996b3
MS
260 continue;
261 }
262
263 s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
264 if (s <= 0) {
e3bff60a 265 ps->sched = safe_close(ps->sched);
663996b3
MS
266 continue;
267 }
268 buf[s] = '\0';
269
270 if (!sscanf(buf, "%s %*s %*s", key))
271 continue;
272
273 strscpy(ps->name, sizeof(ps->name), key);
274
275 /* cmdline */
276 if (arg_show_cmdline)
e3bff60a 277 pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid);
663996b3
MS
278
279 /* discard line 2 */
280 m = bufgetline(buf);
281 if (!m)
282 continue;
283
284 m = bufgetline(m);
285 if (!m)
286 continue;
287
288 if (!sscanf(m, "%*s %*s %s", t))
289 continue;
290
5eef597e
MP
291 r = safe_atod(t, &ps->starttime);
292 if (r < 0)
293 continue;
294
295 ps->starttime /= 1000.0;
663996b3 296
60f067b4
JS
297 if (arg_show_cgroup)
298 /* if this fails, that's OK */
299 cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER,
300 ps->pid, &ps->cgroup);
301
663996b3
MS
302 /* ppid */
303 sprintf(filename, "%d/stat", pid);
e3bff60a
MP
304 fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
305 if (fd < 0)
663996b3 306 continue;
e3bff60a
MP
307
308 st = fdopen(fd, "re");
309 if (!st) {
310 close(fd);
663996b3
MS
311 continue;
312 }
e3bff60a
MP
313
314 if (!fscanf(st, "%*s %*s %*s %i", &p))
315 continue;
316
663996b3
MS
317 ps->ppid = p;
318
319 /*
320 * setup child pointers
321 *
322 * these are used to paint the tree coherently later
323 * each parent has a LL of children, and a LL of siblings
324 */
325 if (pid == 1)
326 continue; /* nothing to do for init atm */
327
328 /* kthreadd has ppid=0, which breaks our tree ordering */
329 if (ps->ppid == 0)
330 ps->ppid = 1;
331
332 parent = ps_first;
333 while ((parent->next_ps && parent->pid != ps->ppid))
334 parent = parent->next_ps;
335
60f067b4 336 if (parent->pid != ps->ppid) {
663996b3
MS
337 /* orphan */
338 ps->ppid = 1;
339 parent = ps_first->next_ps;
340 }
341
342 ps->parent = parent;
343
344 if (!parent->children) {
345 /* it's the first child */
346 parent->children = ps;
347 } else {
348 /* walk all children and append */
349 struct ps_struct *children;
350 children = parent->children;
351 while (children->next)
352 children = children->next;
e3bff60a 353
663996b3
MS
354 children->next = ps;
355 }
356 }
357
358 /* else -> found pid, append data in ps */
359
360 /* below here is all continuous logging parts - we get here on every
361 * iteration */
362
363 /* rt, wt */
e3bff60a 364 if (ps->schedstat < 0) {
663996b3 365 sprintf(filename, "%d/schedstat", pid);
e3bff60a
MP
366 ps->schedstat = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
367 if (ps->schedstat < 0)
663996b3
MS
368 continue;
369 }
e3bff60a 370
663996b3
MS
371 s = pread(ps->schedstat, buf, sizeof(buf) - 1, 0);
372 if (s <= 0) {
373 /* clean up our file descriptors - assume that the process exited */
374 close(ps->schedstat);
e3bff60a
MP
375 ps->schedstat = -1;
376 ps->sched = safe_close(ps->sched);
663996b3
MS
377 continue;
378 }
e3bff60a 379
663996b3
MS
380 buf[s] = '\0';
381
382 if (!sscanf(buf, "%s %s %*s", rt, wt))
383 continue;
384
60f067b4 385 ps->sample->next = new0(struct ps_sched_struct, 1);
e3bff60a
MP
386 if (!ps->sample->next)
387 return log_oom();
388
663996b3
MS
389 ps->sample->next->prev = ps->sample;
390 ps->sample = ps->sample->next;
391 ps->last = ps->sample;
392 ps->sample->runtime = atoll(rt);
393 ps->sample->waittime = atoll(wt);
394 ps->sample->sampledata = sampledata;
395 ps->sample->ps_new = ps;
e3bff60a 396 if (ps_prev)
663996b3 397 ps_prev->cross = ps->sample;
e3bff60a 398
663996b3
MS
399 ps_prev = ps->sample;
400 ps->total = (ps->last->runtime - ps->first->runtime)
401 / 1000000000.0;
402
fb183854
MP
403 /* Take into account CPU runtime/waittime spent in non-main threads of the process
404 * by parsing "/proc/[pid]/task/[tid]/schedstat" for all [tid] != [pid]
405 * See https://github.com/systemd/systemd/issues/139
406 */
407
408 /* Browse directory "/proc/[pid]/task" to know the thread ids of process [pid] */
409 snprintf(filename, sizeof(filename), PID_FMT "/task", pid);
410 taskfd = openat(procfd, filename, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
411 if (taskfd >= 0) {
412 _cleanup_closedir_ DIR *taskdir = NULL;
413
414 taskdir = fdopendir(taskfd);
415 if (!taskdir) {
416 safe_close(taskfd);
417 return -errno;
418 }
419 FOREACH_DIRENT(ent, taskdir, break) {
420 int tid = -1;
421 _cleanup_close_ int tid_schedstat = -1;
422 long long delta_rt;
423 long long delta_wt;
424
425 if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9'))
426 continue;
427
428 /* Skip main thread as it was already accounted */
429 r = safe_atoi(ent->d_name, &tid);
430 if (r < 0 || tid == pid)
431 continue;
432
433 /* Parse "/proc/[pid]/task/[tid]/schedstat" */
434 snprintf(filename, sizeof(filename), PID_FMT "/schedstat", tid);
435 tid_schedstat = openat(taskfd, filename, O_RDONLY|O_CLOEXEC);
436
437 if (tid_schedstat == -1)
438 continue;
439
440 s = pread(tid_schedstat, buf, sizeof(buf) - 1, 0);
441 if (s <= 0)
442 continue;
443 buf[s] = '\0';
444
445 if (!sscanf(buf, "%s %s %*s", rt, wt))
446 continue;
447
448 r = safe_atolli(rt, &delta_rt);
449 if (r < 0)
450 continue;
451 r = safe_atolli(rt, &delta_wt);
452 if (r < 0)
453 continue;
454 ps->sample->runtime += delta_rt;
455 ps->sample->waittime += delta_wt;
456 }
457 }
458
663996b3
MS
459 if (!arg_pss)
460 goto catch_rename;
461
462 /* Pss */
463 if (!ps->smaps) {
464 sprintf(filename, "%d/smaps", pid);
e3bff60a
MP
465 fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
466 if (fd < 0)
467 continue;
468 ps->smaps = fdopen(fd, "re");
469 if (!ps->smaps) {
470 close(fd);
663996b3 471 continue;
e3bff60a 472 }
663996b3 473 setvbuf(ps->smaps, smaps_buf, _IOFBF, sizeof(smaps_buf));
e3bff60a 474 } else {
663996b3
MS
475 rewind(ps->smaps);
476 }
e3bff60a 477
663996b3
MS
478 /* test to see if we need to skip another field */
479 if (skip == 0) {
480 if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
481 continue;
482 }
483 if (fread(buf, 1, 28 * 15, ps->smaps) != (28 * 15)) {
484 continue;
485 }
486 if (buf[392] == 'V') {
487 skip = 2;
488 }
489 else {
490 skip = 1;
491 }
492 rewind(ps->smaps);
493 }
e3bff60a 494
663996b3
MS
495 while (1) {
496 int pss_kb;
497
498 /* skip one line, this contains the object mapped. */
499 if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
500 break;
501 }
502 /* then there's a 28 char 14 line block */
503 if (fread(buf, 1, 28 * 14, ps->smaps) != 28 * 14) {
504 break;
505 }
506 pss_kb = atoi(&buf[61]);
507 ps->sample->pss += pss_kb;
508
509 /* skip one more line if this is a newer kernel */
510 if (skip == 2) {
511 if (fgets(buf, sizeof(buf), ps->smaps) == NULL)
512 break;
513 }
514 }
e3bff60a 515
663996b3
MS
516 if (ps->sample->pss > ps->pss_max)
517 ps->pss_max = ps->sample->pss;
518
519catch_rename:
520 /* catch process rename, try to randomize time */
521 mod = (arg_hz < 4.0) ? 4.0 : (arg_hz / 4.0);
e3bff60a 522 if (((sample - ps->pid) + pid) % (int)(mod) == 0) {
663996b3
MS
523
524 /* re-fetch name */
525 /* get name, start time */
e3bff60a 526 if (ps->sched < 0) {
663996b3 527 sprintf(filename, "%d/sched", pid);
e3bff60a
MP
528 ps->sched = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
529 if (ps->sched < 0)
663996b3
MS
530 continue;
531 }
e3bff60a 532
663996b3
MS
533 s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
534 if (s <= 0) {
535 /* clean up file descriptors */
e3bff60a
MP
536 ps->sched = safe_close(ps->sched);
537 ps->schedstat = safe_close(ps->schedstat);
663996b3
MS
538 continue;
539 }
e3bff60a 540
663996b3
MS
541 buf[s] = '\0';
542
543 if (!sscanf(buf, "%s %*s %*s", key))
544 continue;
545
546 strscpy(ps->name, sizeof(ps->name), key);
547
548 /* cmdline */
549 if (arg_show_cmdline)
e3bff60a 550 pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid);
663996b3
MS
551 }
552 }
e3bff60a
MP
553
554 return 0;
663996b3 555}