1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright (C) 2009-2013 Intel Corporation
9 Auke Kok <auke-jan.h.kok@intel.com>
11 systemd is free software; you can redistribute it and/or modify it
12 under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 2.1 of the License, or
14 (at your option) any later version.
16 systemd is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
21 You should have received a copy of the GNU Lesser General Public License
22 along with systemd; If not, see <http://www.gnu.org/licenses/>.
34 #include "alloc-util.h"
35 #include "bootchart.h"
36 #include "cgroup-util.h"
37 #include "dirent-util.h"
40 #include "parse-util.h"
42 #include "string-util.h"
44 #include "time-util.h"
48 * Alloc a static 4k buffer for stdio - primarily used to increase
49 * PSS buffering from the default 1k stdin buffer to reduce
52 static char smaps_buf
[4096];
55 double gettime_ns(void) {
58 clock_gettime(CLOCK_MONOTONIC
, &n
);
60 return (n
.tv_sec
+ (n
.tv_nsec
/ (double) NSEC_PER_SEC
));
63 static char *bufgetline(char *buf
) {
69 c
= strchr(buf
, '\n');
76 static int pid_cmdline_strscpy(int procfd
, char *buffer
, size_t buf_len
, int pid
) {
77 char filename
[PATH_MAX
];
78 _cleanup_close_
int fd
= -1;
81 sprintf(filename
, "%d/cmdline", pid
);
82 fd
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
86 n
= read(fd
, buffer
, buf_len
-1);
89 for (i
= 0; i
< n
; i
++)
90 if (buffer
[i
] == '\0')
98 int log_sample(DIR *proc
,
100 struct ps_struct
*ps_first
,
101 struct list_sample_data
**ptr
,
105 static int vmstat
= -1;
106 _cleanup_free_
char *buf_schedstat
= NULL
;
117 static int e_fd
= -1;
122 struct list_sample_data
*sampledata
;
123 struct ps_sched_struct
*ps_prev
= NULL
;
129 procfd
= dirfd(proc
);
135 vmstat
= openat(procfd
, "vmstat", O_RDONLY
|O_CLOEXEC
);
137 return log_error_errno(errno
, "Failed to open /proc/vmstat: %m");
140 n
= pread(vmstat
, buf
, sizeof(buf
) - 1, 0);
142 vmstat
= safe_close(vmstat
);
152 if (sscanf(m
, "%s %s", key
, val
) < 2)
154 if (streq(key
, "pgpgin"))
155 sampledata
->blockstat
.bi
= atoi(val
);
156 if (streq(key
, "pgpgout")) {
157 sampledata
->blockstat
.bo
= atoi(val
);
166 /* Parse "/proc/schedstat" for overall CPU utilization */
167 r
= read_full_file("/proc/schedstat", &buf_schedstat
, NULL
);
169 return log_error_errno(r
, "Unable to read schedstat: %m");
173 if (sscanf(m
, "%s %*s %*s %*s %*s %*s %*s %s %s", key
, rt
, wt
) < 3)
176 if (strstr(key
, "cpu")) {
177 r
= safe_atoi((const char*)(key
+3), &c
);
178 if (r
< 0 || c
> MAXCPUS
-1)
179 /* Oops, we only have room for MAXCPUS data */
181 sampledata
->runtime
[c
] = atoll(rt
);
182 sampledata
->waittime
[c
] = atoll(wt
);
195 e_fd
= openat(procfd
, "sys/kernel/random/entropy_avail", O_RDONLY
|O_CLOEXEC
);
197 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/random/entropy_avail: %m");
200 n
= pread(e_fd
, buf
, sizeof(buf
) - 1, 0);
202 e_fd
= safe_close(e_fd
);
205 sampledata
->entropy_avail
= atoi(buf
);
209 while ((ent
= readdir(proc
)) != NULL
) {
210 char filename
[PATH_MAX
];
212 struct ps_struct
*ps
;
214 if ((ent
->d_name
[0] < '0') || (ent
->d_name
[0] > '9'))
217 pid
= atoi(ent
->d_name
);
223 while (ps
->next_ps
) {
229 /* end of our LL? then append a new record */
230 if (ps
->pid
!= pid
) {
231 _cleanup_fclose_
FILE *st
= NULL
;
233 struct ps_struct
*parent
;
235 ps
->next_ps
= new0(struct ps_struct
, 1);
244 ps
->sample
= new0(struct ps_sched_struct
, 1);
248 ps
->sample
->sampledata
= sampledata
;
252 /* mark our first sample */
253 ps
->first
= ps
->last
= ps
->sample
;
254 ps
->sample
->runtime
= atoll(rt
);
255 ps
->sample
->waittime
= atoll(wt
);
257 /* get name, start time */
259 sprintf(filename
, "%d/sched", pid
);
260 ps
->sched
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
265 s
= pread(ps
->sched
, buf
, sizeof(buf
) - 1, 0);
267 ps
->sched
= safe_close(ps
->sched
);
272 if (!sscanf(buf
, "%s %*s %*s", key
))
275 strscpy(ps
->name
, sizeof(ps
->name
), key
);
278 if (arg_show_cmdline
)
279 pid_cmdline_strscpy(procfd
, ps
->name
, sizeof(ps
->name
), pid
);
290 if (!sscanf(m
, "%*s %*s %s", t
))
293 r
= safe_atod(t
, &ps
->starttime
);
297 ps
->starttime
/= 1000.0;
300 /* if this fails, that's OK */
301 cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
,
302 ps
->pid
, &ps
->cgroup
);
305 sprintf(filename
, "%d/stat", pid
);
306 fd
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
310 st
= fdopen(fd
, "re");
316 if (!fscanf(st
, "%*s %*s %*s %i", &p
))
322 * setup child pointers
324 * these are used to paint the tree coherently later
325 * each parent has a LL of children, and a LL of siblings
328 continue; /* nothing to do for init atm */
330 /* kthreadd has ppid=0, which breaks our tree ordering */
335 while ((parent
->next_ps
&& parent
->pid
!= ps
->ppid
))
336 parent
= parent
->next_ps
;
338 if (parent
->pid
!= ps
->ppid
) {
341 parent
= ps_first
->next_ps
;
346 if (!parent
->children
) {
347 /* it's the first child */
348 parent
->children
= ps
;
350 /* walk all children and append */
351 struct ps_struct
*children
;
352 children
= parent
->children
;
353 while (children
->next
)
354 children
= children
->next
;
360 /* else -> found pid, append data in ps */
362 /* below here is all continuous logging parts - we get here on every
366 if (ps
->schedstat
< 0) {
367 sprintf(filename
, "%d/schedstat", pid
);
368 ps
->schedstat
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
369 if (ps
->schedstat
< 0)
373 s
= pread(ps
->schedstat
, buf
, sizeof(buf
) - 1, 0);
375 /* clean up our file descriptors - assume that the process exited */
376 close(ps
->schedstat
);
378 ps
->sched
= safe_close(ps
->sched
);
384 if (!sscanf(buf
, "%s %s %*s", rt
, wt
))
387 ps
->sample
->next
= new0(struct ps_sched_struct
, 1);
388 if (!ps
->sample
->next
)
391 ps
->sample
->next
->prev
= ps
->sample
;
392 ps
->sample
= ps
->sample
->next
;
393 ps
->last
= ps
->sample
;
394 ps
->sample
->runtime
= atoll(rt
);
395 ps
->sample
->waittime
= atoll(wt
);
396 ps
->sample
->sampledata
= sampledata
;
397 ps
->sample
->ps_new
= ps
;
399 ps_prev
->cross
= ps
->sample
;
401 ps_prev
= ps
->sample
;
402 ps
->total
= (ps
->last
->runtime
- ps
->first
->runtime
)
405 /* Take into account CPU runtime/waittime spent in non-main threads of the process
406 * by parsing "/proc/[pid]/task/[tid]/schedstat" for all [tid] != [pid]
407 * See https://github.com/systemd/systemd/issues/139
410 /* Browse directory "/proc/[pid]/task" to know the thread ids of process [pid] */
411 snprintf(filename
, sizeof(filename
), PID_FMT
"/task", pid
);
412 taskfd
= openat(procfd
, filename
, O_RDONLY
|O_DIRECTORY
|O_CLOEXEC
);
414 _cleanup_closedir_
DIR *taskdir
= NULL
;
416 taskdir
= fdopendir(taskfd
);
421 FOREACH_DIRENT(ent
, taskdir
, break) {
423 _cleanup_close_
int tid_schedstat
= -1;
427 if ((ent
->d_name
[0] < '0') || (ent
->d_name
[0] > '9'))
430 /* Skip main thread as it was already accounted */
431 r
= safe_atoi(ent
->d_name
, &tid
);
432 if (r
< 0 || tid
== pid
)
435 /* Parse "/proc/[pid]/task/[tid]/schedstat" */
436 snprintf(filename
, sizeof(filename
), PID_FMT
"/schedstat", tid
);
437 tid_schedstat
= openat(taskfd
, filename
, O_RDONLY
|O_CLOEXEC
);
439 if (tid_schedstat
== -1)
442 s
= pread(tid_schedstat
, buf
, sizeof(buf
) - 1, 0);
447 if (!sscanf(buf
, "%s %s %*s", rt
, wt
))
450 r
= safe_atolli(rt
, &delta_rt
);
453 r
= safe_atolli(rt
, &delta_wt
);
456 ps
->sample
->runtime
+= delta_rt
;
457 ps
->sample
->waittime
+= delta_wt
;
466 sprintf(filename
, "%d/smaps", pid
);
467 fd
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
470 ps
->smaps
= fdopen(fd
, "re");
475 setvbuf(ps
->smaps
, smaps_buf
, _IOFBF
, sizeof(smaps_buf
));
480 /* test to see if we need to skip another field */
482 if (fgets(buf
, sizeof(buf
), ps
->smaps
) == NULL
) {
485 if (fread(buf
, 1, 28 * 15, ps
->smaps
) != (28 * 15)) {
488 if (buf
[392] == 'V') {
500 /* skip one line, this contains the object mapped. */
501 if (fgets(buf
, sizeof(buf
), ps
->smaps
) == NULL
) {
504 /* then there's a 28 char 14 line block */
505 if (fread(buf
, 1, 28 * 14, ps
->smaps
) != 28 * 14) {
508 pss_kb
= atoi(&buf
[61]);
509 ps
->sample
->pss
+= pss_kb
;
511 /* skip one more line if this is a newer kernel */
513 if (fgets(buf
, sizeof(buf
), ps
->smaps
) == NULL
)
518 if (ps
->sample
->pss
> ps
->pss_max
)
519 ps
->pss_max
= ps
->sample
->pss
;
522 /* catch process rename, try to randomize time */
523 mod
= (arg_hz
< 4.0) ? 4.0 : (arg_hz
/ 4.0);
524 if (((sample
- ps
->pid
) + pid
) % (int)(mod
) == 0) {
527 /* get name, start time */
529 sprintf(filename
, "%d/sched", pid
);
530 ps
->sched
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
535 s
= pread(ps
->sched
, buf
, sizeof(buf
) - 1, 0);
537 /* clean up file descriptors */
538 ps
->sched
= safe_close(ps
->sched
);
539 ps
->schedstat
= safe_close(ps
->schedstat
);
545 if (!sscanf(buf
, "%s %*s %*s", key
))
548 strscpy(ps
->name
, sizeof(ps
->name
), key
);
551 if (arg_show_cmdline
)
552 pid_cmdline_strscpy(procfd
, ps
->name
, sizeof(ps
->name
), pid
);