]>
Commit | Line | Data |
---|---|---|
663996b3 MS |
1 | /*** |
2 | This file is part of systemd. | |
3 | ||
14228c0d | 4 | Copyright (C) 2009-2013 Intel Corporation |
663996b3 MS |
5 | |
6 | Authors: | |
7 | Auke Kok <auke-jan.h.kok@intel.com> | |
8 | ||
9 | systemd is free software; you can redistribute it and/or modify it | |
10 | under the terms of the GNU Lesser General Public License as published by | |
11 | the Free Software Foundation; either version 2.1 of the License, or | |
12 | (at your option) any later version. | |
13 | ||
14 | systemd is distributed in the hope that it will be useful, but | |
15 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | Lesser General Public License for more details. | |
18 | ||
19 | You should have received a copy of the GNU Lesser General Public License | |
20 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
21 | ***/ | |
22 | ||
db2df898 MP |
23 | #include <dirent.h> |
24 | #include <fcntl.h> | |
663996b3 | 25 | #include <limits.h> |
663996b3 | 26 | #include <stdio.h> |
db2df898 | 27 | #include <stdlib.h> |
663996b3 | 28 | #include <string.h> |
663996b3 | 29 | #include <time.h> |
db2df898 | 30 | #include <unistd.h> |
663996b3 | 31 | |
db2df898 | 32 | #include "alloc-util.h" |
663996b3 | 33 | #include "bootchart.h" |
60f067b4 | 34 | #include "cgroup-util.h" |
db2df898 MP |
35 | #include "dirent-util.h" |
36 | #include "fd-util.h" | |
fb183854 | 37 | #include "fileio.h" |
db2df898 MP |
38 | #include "parse-util.h" |
39 | #include "store.h" | |
40 | #include "string-util.h" | |
41 | #include "strxcpyx.h" | |
42 | #include "time-util.h" | |
43 | #include "util.h" | |
663996b3 MS |
44 | |
45 | /* | |
46 | * Alloc a static 4k buffer for stdio - primarily used to increase | |
47 | * PSS buffering from the default 1k stdin buffer to reduce | |
48 | * read() overhead. | |
49 | */ | |
50 | static char smaps_buf[4096]; | |
51 | static int skip = 0; | |
663996b3 MS |
52 | |
53 | double gettime_ns(void) { | |
54 | struct timespec n; | |
55 | ||
56 | clock_gettime(CLOCK_MONOTONIC, &n); | |
57 | ||
5eef597e | 58 | return (n.tv_sec + (n.tv_nsec / (double) NSEC_PER_SEC)); |
663996b3 MS |
59 | } |
60 | ||
663996b3 MS |
61 | static char *bufgetline(char *buf) { |
62 | char *c; | |
63 | ||
64 | if (!buf) | |
65 | return NULL; | |
66 | ||
67 | c = strchr(buf, '\n'); | |
68 | if (c) | |
69 | c++; | |
e3bff60a | 70 | |
663996b3 MS |
71 | return c; |
72 | } | |
73 | ||
e3bff60a | 74 | static int pid_cmdline_strscpy(int procfd, char *buffer, size_t buf_len, int pid) { |
663996b3 | 75 | char filename[PATH_MAX]; |
e3bff60a | 76 | _cleanup_close_ int fd = -1; |
663996b3 MS |
77 | ssize_t n; |
78 | ||
79 | sprintf(filename, "%d/cmdline", pid); | |
e3bff60a | 80 | fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC); |
663996b3 MS |
81 | if (fd < 0) |
82 | return -errno; | |
83 | ||
84 | n = read(fd, buffer, buf_len-1); | |
85 | if (n > 0) { | |
86 | int i; | |
87 | for (i = 0; i < n; i++) | |
88 | if (buffer[i] == '\0') | |
89 | buffer[i] = ' '; | |
90 | buffer[n] = '\0'; | |
91 | } | |
e3bff60a | 92 | |
663996b3 MS |
93 | return 0; |
94 | } | |
95 | ||
e3bff60a MP |
96 | int log_sample(DIR *proc, |
97 | int sample, | |
98 | struct ps_struct *ps_first, | |
99 | struct list_sample_data **ptr, | |
100 | int *pscount, | |
101 | int *cpus) { | |
102 | ||
103 | static int vmstat = -1; | |
fb183854 | 104 | _cleanup_free_ char *buf_schedstat = NULL; |
663996b3 MS |
105 | char buf[4096]; |
106 | char key[256]; | |
107 | char val[256]; | |
108 | char rt[256]; | |
109 | char wt[256]; | |
110 | char *m; | |
fb183854 | 111 | int r; |
663996b3 MS |
112 | int c; |
113 | int p; | |
114 | int mod; | |
e3bff60a | 115 | static int e_fd = -1; |
663996b3 MS |
116 | ssize_t s; |
117 | ssize_t n; | |
118 | struct dirent *ent; | |
119 | int fd; | |
120 | struct list_sample_data *sampledata; | |
121 | struct ps_sched_struct *ps_prev = NULL; | |
e3bff60a | 122 | int procfd; |
fb183854 | 123 | int taskfd = -1; |
663996b3 | 124 | |
663996b3 MS |
125 | sampledata = *ptr; |
126 | ||
e3bff60a MP |
127 | procfd = dirfd(proc); |
128 | if (procfd < 0) | |
129 | return -errno; | |
663996b3 | 130 | |
e3bff60a | 131 | if (vmstat < 0) { |
663996b3 | 132 | /* block stuff */ |
e3bff60a MP |
133 | vmstat = openat(procfd, "vmstat", O_RDONLY|O_CLOEXEC); |
134 | if (vmstat < 0) | |
135 | return log_error_errno(errno, "Failed to open /proc/vmstat: %m"); | |
663996b3 MS |
136 | } |
137 | ||
138 | n = pread(vmstat, buf, sizeof(buf) - 1, 0); | |
139 | if (n <= 0) { | |
e3bff60a MP |
140 | vmstat = safe_close(vmstat); |
141 | if (n < 0) | |
142 | return -errno; | |
143 | return -ENODATA; | |
663996b3 | 144 | } |
e3bff60a | 145 | |
663996b3 MS |
146 | buf[n] = '\0'; |
147 | ||
148 | m = buf; | |
149 | while (m) { | |
150 | if (sscanf(m, "%s %s", key, val) < 2) | |
151 | goto vmstat_next; | |
152 | if (streq(key, "pgpgin")) | |
153 | sampledata->blockstat.bi = atoi(val); | |
154 | if (streq(key, "pgpgout")) { | |
155 | sampledata->blockstat.bo = atoi(val); | |
156 | break; | |
157 | } | |
158 | vmstat_next: | |
159 | m = bufgetline(m); | |
160 | if (!m) | |
161 | break; | |
162 | } | |
163 | ||
fb183854 MP |
164 | /* Parse "/proc/schedstat" for overall CPU utilization */ |
165 | r = read_full_file("/proc/schedstat", &buf_schedstat, NULL); | |
166 | if (r < 0) | |
167 | return log_error_errno(r, "Unable to read schedstat: %m"); | |
e3bff60a | 168 | |
fb183854 | 169 | m = buf_schedstat; |
663996b3 MS |
170 | while (m) { |
171 | if (sscanf(m, "%s %*s %*s %*s %*s %*s %*s %s %s", key, rt, wt) < 3) | |
172 | goto schedstat_next; | |
173 | ||
174 | if (strstr(key, "cpu")) { | |
5eef597e MP |
175 | r = safe_atoi((const char*)(key+3), &c); |
176 | if (r < 0 || c > MAXCPUS -1) | |
663996b3 MS |
177 | /* Oops, we only have room for MAXCPUS data */ |
178 | break; | |
179 | sampledata->runtime[c] = atoll(rt); | |
180 | sampledata->waittime[c] = atoll(wt); | |
181 | ||
e3bff60a MP |
182 | if (c == *cpus) |
183 | *cpus = c + 1; | |
663996b3 MS |
184 | } |
185 | schedstat_next: | |
186 | m = bufgetline(m); | |
187 | if (!m) | |
188 | break; | |
189 | } | |
190 | ||
191 | if (arg_entropy) { | |
e3bff60a MP |
192 | if (e_fd < 0) { |
193 | e_fd = openat(procfd, "sys/kernel/random/entropy_avail", O_RDONLY|O_CLOEXEC); | |
194 | if (e_fd < 0) | |
195 | return log_error_errno(errno, "Failed to open /proc/sys/kernel/random/entropy_avail: %m"); | |
663996b3 MS |
196 | } |
197 | ||
e3bff60a MP |
198 | n = pread(e_fd, buf, sizeof(buf) - 1, 0); |
199 | if (n <= 0) { | |
200 | e_fd = safe_close(e_fd); | |
201 | } else { | |
202 | buf[n] = '\0'; | |
203 | sampledata->entropy_avail = atoi(buf); | |
663996b3 MS |
204 | } |
205 | } | |
206 | ||
207 | while ((ent = readdir(proc)) != NULL) { | |
208 | char filename[PATH_MAX]; | |
209 | int pid; | |
210 | struct ps_struct *ps; | |
211 | ||
212 | if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9')) | |
213 | continue; | |
214 | ||
215 | pid = atoi(ent->d_name); | |
216 | ||
217 | if (pid >= MAXPIDS) | |
218 | continue; | |
219 | ||
220 | ps = ps_first; | |
221 | while (ps->next_ps) { | |
222 | ps = ps->next_ps; | |
223 | if (ps->pid == pid) | |
224 | break; | |
225 | } | |
226 | ||
227 | /* end of our LL? then append a new record */ | |
228 | if (ps->pid != pid) { | |
229 | _cleanup_fclose_ FILE *st = NULL; | |
230 | char t[32]; | |
231 | struct ps_struct *parent; | |
232 | ||
60f067b4 | 233 | ps->next_ps = new0(struct ps_struct, 1); |
e3bff60a MP |
234 | if (!ps->next_ps) |
235 | return log_oom(); | |
236 | ||
663996b3 MS |
237 | ps = ps->next_ps; |
238 | ps->pid = pid; | |
e3bff60a MP |
239 | ps->sched = -1; |
240 | ps->schedstat = -1; | |
663996b3 | 241 | |
60f067b4 | 242 | ps->sample = new0(struct ps_sched_struct, 1); |
e3bff60a MP |
243 | if (!ps->sample) |
244 | return log_oom(); | |
245 | ||
663996b3 MS |
246 | ps->sample->sampledata = sampledata; |
247 | ||
e3bff60a | 248 | (*pscount)++; |
663996b3 MS |
249 | |
250 | /* mark our first sample */ | |
60f067b4 | 251 | ps->first = ps->last = ps->sample; |
663996b3 MS |
252 | ps->sample->runtime = atoll(rt); |
253 | ps->sample->waittime = atoll(wt); | |
254 | ||
255 | /* get name, start time */ | |
e3bff60a | 256 | if (ps->sched < 0) { |
663996b3 | 257 | sprintf(filename, "%d/sched", pid); |
e3bff60a MP |
258 | ps->sched = openat(procfd, filename, O_RDONLY|O_CLOEXEC); |
259 | if (ps->sched < 0) | |
663996b3 MS |
260 | continue; |
261 | } | |
262 | ||
263 | s = pread(ps->sched, buf, sizeof(buf) - 1, 0); | |
264 | if (s <= 0) { | |
e3bff60a | 265 | ps->sched = safe_close(ps->sched); |
663996b3 MS |
266 | continue; |
267 | } | |
268 | buf[s] = '\0'; | |
269 | ||
270 | if (!sscanf(buf, "%s %*s %*s", key)) | |
271 | continue; | |
272 | ||
273 | strscpy(ps->name, sizeof(ps->name), key); | |
274 | ||
275 | /* cmdline */ | |
276 | if (arg_show_cmdline) | |
e3bff60a | 277 | pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid); |
663996b3 MS |
278 | |
279 | /* discard line 2 */ | |
280 | m = bufgetline(buf); | |
281 | if (!m) | |
282 | continue; | |
283 | ||
284 | m = bufgetline(m); | |
285 | if (!m) | |
286 | continue; | |
287 | ||
288 | if (!sscanf(m, "%*s %*s %s", t)) | |
289 | continue; | |
290 | ||
5eef597e MP |
291 | r = safe_atod(t, &ps->starttime); |
292 | if (r < 0) | |
293 | continue; | |
294 | ||
295 | ps->starttime /= 1000.0; | |
663996b3 | 296 | |
60f067b4 JS |
297 | if (arg_show_cgroup) |
298 | /* if this fails, that's OK */ | |
299 | cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, | |
300 | ps->pid, &ps->cgroup); | |
301 | ||
663996b3 MS |
302 | /* ppid */ |
303 | sprintf(filename, "%d/stat", pid); | |
e3bff60a MP |
304 | fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC); |
305 | if (fd < 0) | |
663996b3 | 306 | continue; |
e3bff60a MP |
307 | |
308 | st = fdopen(fd, "re"); | |
309 | if (!st) { | |
310 | close(fd); | |
663996b3 MS |
311 | continue; |
312 | } | |
e3bff60a MP |
313 | |
314 | if (!fscanf(st, "%*s %*s %*s %i", &p)) | |
315 | continue; | |
316 | ||
663996b3 MS |
317 | ps->ppid = p; |
318 | ||
319 | /* | |
320 | * setup child pointers | |
321 | * | |
322 | * these are used to paint the tree coherently later | |
323 | * each parent has a LL of children, and a LL of siblings | |
324 | */ | |
325 | if (pid == 1) | |
326 | continue; /* nothing to do for init atm */ | |
327 | ||
328 | /* kthreadd has ppid=0, which breaks our tree ordering */ | |
329 | if (ps->ppid == 0) | |
330 | ps->ppid = 1; | |
331 | ||
332 | parent = ps_first; | |
333 | while ((parent->next_ps && parent->pid != ps->ppid)) | |
334 | parent = parent->next_ps; | |
335 | ||
60f067b4 | 336 | if (parent->pid != ps->ppid) { |
663996b3 MS |
337 | /* orphan */ |
338 | ps->ppid = 1; | |
339 | parent = ps_first->next_ps; | |
340 | } | |
341 | ||
342 | ps->parent = parent; | |
343 | ||
344 | if (!parent->children) { | |
345 | /* it's the first child */ | |
346 | parent->children = ps; | |
347 | } else { | |
348 | /* walk all children and append */ | |
349 | struct ps_struct *children; | |
350 | children = parent->children; | |
351 | while (children->next) | |
352 | children = children->next; | |
e3bff60a | 353 | |
663996b3 MS |
354 | children->next = ps; |
355 | } | |
356 | } | |
357 | ||
358 | /* else -> found pid, append data in ps */ | |
359 | ||
360 | /* below here is all continuous logging parts - we get here on every | |
361 | * iteration */ | |
362 | ||
363 | /* rt, wt */ | |
e3bff60a | 364 | if (ps->schedstat < 0) { |
663996b3 | 365 | sprintf(filename, "%d/schedstat", pid); |
e3bff60a MP |
366 | ps->schedstat = openat(procfd, filename, O_RDONLY|O_CLOEXEC); |
367 | if (ps->schedstat < 0) | |
663996b3 MS |
368 | continue; |
369 | } | |
e3bff60a | 370 | |
663996b3 MS |
371 | s = pread(ps->schedstat, buf, sizeof(buf) - 1, 0); |
372 | if (s <= 0) { | |
373 | /* clean up our file descriptors - assume that the process exited */ | |
374 | close(ps->schedstat); | |
e3bff60a MP |
375 | ps->schedstat = -1; |
376 | ps->sched = safe_close(ps->sched); | |
663996b3 MS |
377 | continue; |
378 | } | |
e3bff60a | 379 | |
663996b3 MS |
380 | buf[s] = '\0'; |
381 | ||
382 | if (!sscanf(buf, "%s %s %*s", rt, wt)) | |
383 | continue; | |
384 | ||
60f067b4 | 385 | ps->sample->next = new0(struct ps_sched_struct, 1); |
e3bff60a MP |
386 | if (!ps->sample->next) |
387 | return log_oom(); | |
388 | ||
663996b3 MS |
389 | ps->sample->next->prev = ps->sample; |
390 | ps->sample = ps->sample->next; | |
391 | ps->last = ps->sample; | |
392 | ps->sample->runtime = atoll(rt); | |
393 | ps->sample->waittime = atoll(wt); | |
394 | ps->sample->sampledata = sampledata; | |
395 | ps->sample->ps_new = ps; | |
e3bff60a | 396 | if (ps_prev) |
663996b3 | 397 | ps_prev->cross = ps->sample; |
e3bff60a | 398 | |
663996b3 MS |
399 | ps_prev = ps->sample; |
400 | ps->total = (ps->last->runtime - ps->first->runtime) | |
401 | / 1000000000.0; | |
402 | ||
fb183854 MP |
403 | /* Take into account CPU runtime/waittime spent in non-main threads of the process |
404 | * by parsing "/proc/[pid]/task/[tid]/schedstat" for all [tid] != [pid] | |
405 | * See https://github.com/systemd/systemd/issues/139 | |
406 | */ | |
407 | ||
408 | /* Browse directory "/proc/[pid]/task" to know the thread ids of process [pid] */ | |
409 | snprintf(filename, sizeof(filename), PID_FMT "/task", pid); | |
410 | taskfd = openat(procfd, filename, O_RDONLY|O_DIRECTORY|O_CLOEXEC); | |
411 | if (taskfd >= 0) { | |
412 | _cleanup_closedir_ DIR *taskdir = NULL; | |
413 | ||
414 | taskdir = fdopendir(taskfd); | |
415 | if (!taskdir) { | |
416 | safe_close(taskfd); | |
417 | return -errno; | |
418 | } | |
419 | FOREACH_DIRENT(ent, taskdir, break) { | |
420 | int tid = -1; | |
421 | _cleanup_close_ int tid_schedstat = -1; | |
422 | long long delta_rt; | |
423 | long long delta_wt; | |
424 | ||
425 | if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9')) | |
426 | continue; | |
427 | ||
428 | /* Skip main thread as it was already accounted */ | |
429 | r = safe_atoi(ent->d_name, &tid); | |
430 | if (r < 0 || tid == pid) | |
431 | continue; | |
432 | ||
433 | /* Parse "/proc/[pid]/task/[tid]/schedstat" */ | |
434 | snprintf(filename, sizeof(filename), PID_FMT "/schedstat", tid); | |
435 | tid_schedstat = openat(taskfd, filename, O_RDONLY|O_CLOEXEC); | |
436 | ||
437 | if (tid_schedstat == -1) | |
438 | continue; | |
439 | ||
440 | s = pread(tid_schedstat, buf, sizeof(buf) - 1, 0); | |
441 | if (s <= 0) | |
442 | continue; | |
443 | buf[s] = '\0'; | |
444 | ||
445 | if (!sscanf(buf, "%s %s %*s", rt, wt)) | |
446 | continue; | |
447 | ||
448 | r = safe_atolli(rt, &delta_rt); | |
449 | if (r < 0) | |
450 | continue; | |
451 | r = safe_atolli(rt, &delta_wt); | |
452 | if (r < 0) | |
453 | continue; | |
454 | ps->sample->runtime += delta_rt; | |
455 | ps->sample->waittime += delta_wt; | |
456 | } | |
457 | } | |
458 | ||
663996b3 MS |
459 | if (!arg_pss) |
460 | goto catch_rename; | |
461 | ||
462 | /* Pss */ | |
463 | if (!ps->smaps) { | |
464 | sprintf(filename, "%d/smaps", pid); | |
e3bff60a MP |
465 | fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC); |
466 | if (fd < 0) | |
467 | continue; | |
468 | ps->smaps = fdopen(fd, "re"); | |
469 | if (!ps->smaps) { | |
470 | close(fd); | |
663996b3 | 471 | continue; |
e3bff60a | 472 | } |
663996b3 | 473 | setvbuf(ps->smaps, smaps_buf, _IOFBF, sizeof(smaps_buf)); |
e3bff60a | 474 | } else { |
663996b3 MS |
475 | rewind(ps->smaps); |
476 | } | |
e3bff60a | 477 | |
663996b3 MS |
478 | /* test to see if we need to skip another field */ |
479 | if (skip == 0) { | |
480 | if (fgets(buf, sizeof(buf), ps->smaps) == NULL) { | |
481 | continue; | |
482 | } | |
483 | if (fread(buf, 1, 28 * 15, ps->smaps) != (28 * 15)) { | |
484 | continue; | |
485 | } | |
486 | if (buf[392] == 'V') { | |
487 | skip = 2; | |
488 | } | |
489 | else { | |
490 | skip = 1; | |
491 | } | |
492 | rewind(ps->smaps); | |
493 | } | |
e3bff60a | 494 | |
663996b3 MS |
495 | while (1) { |
496 | int pss_kb; | |
497 | ||
498 | /* skip one line, this contains the object mapped. */ | |
499 | if (fgets(buf, sizeof(buf), ps->smaps) == NULL) { | |
500 | break; | |
501 | } | |
502 | /* then there's a 28 char 14 line block */ | |
503 | if (fread(buf, 1, 28 * 14, ps->smaps) != 28 * 14) { | |
504 | break; | |
505 | } | |
506 | pss_kb = atoi(&buf[61]); | |
507 | ps->sample->pss += pss_kb; | |
508 | ||
509 | /* skip one more line if this is a newer kernel */ | |
510 | if (skip == 2) { | |
511 | if (fgets(buf, sizeof(buf), ps->smaps) == NULL) | |
512 | break; | |
513 | } | |
514 | } | |
e3bff60a | 515 | |
663996b3 MS |
516 | if (ps->sample->pss > ps->pss_max) |
517 | ps->pss_max = ps->sample->pss; | |
518 | ||
519 | catch_rename: | |
520 | /* catch process rename, try to randomize time */ | |
521 | mod = (arg_hz < 4.0) ? 4.0 : (arg_hz / 4.0); | |
e3bff60a | 522 | if (((sample - ps->pid) + pid) % (int)(mod) == 0) { |
663996b3 MS |
523 | |
524 | /* re-fetch name */ | |
525 | /* get name, start time */ | |
e3bff60a | 526 | if (ps->sched < 0) { |
663996b3 | 527 | sprintf(filename, "%d/sched", pid); |
e3bff60a MP |
528 | ps->sched = openat(procfd, filename, O_RDONLY|O_CLOEXEC); |
529 | if (ps->sched < 0) | |
663996b3 MS |
530 | continue; |
531 | } | |
e3bff60a | 532 | |
663996b3 MS |
533 | s = pread(ps->sched, buf, sizeof(buf) - 1, 0); |
534 | if (s <= 0) { | |
535 | /* clean up file descriptors */ | |
e3bff60a MP |
536 | ps->sched = safe_close(ps->sched); |
537 | ps->schedstat = safe_close(ps->schedstat); | |
663996b3 MS |
538 | continue; |
539 | } | |
e3bff60a | 540 | |
663996b3 MS |
541 | buf[s] = '\0'; |
542 | ||
543 | if (!sscanf(buf, "%s %*s %*s", key)) | |
544 | continue; | |
545 | ||
546 | strscpy(ps->name, sizeof(ps->name), key); | |
547 | ||
548 | /* cmdline */ | |
549 | if (arg_show_cmdline) | |
e3bff60a | 550 | pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid); |
663996b3 MS |
551 | } |
552 | } | |
e3bff60a MP |
553 | |
554 | return 0; | |
663996b3 | 555 | } |