]>
git.proxmox.com Git - systemd.git/blob - src/basic/process-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2010 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
24 #include <linux/oom.h>
29 #include <stdio_ext.h>
33 #include <sys/personality.h>
34 #include <sys/prctl.h>
35 #include <sys/types.h>
39 #if HAVE_VALGRIND_VALGRIND_H
40 #include <valgrind/valgrind.h>
43 #include "alloc-util.h"
44 #include "architecture.h"
53 #include "process-util.h"
54 #include "raw-clone.h"
55 #include "signal-util.h"
56 #include "stat-util.h"
57 #include "string-table.h"
58 #include "string-util.h"
59 #include "user-util.h"
62 int get_process_state(pid_t pid
) {
66 _cleanup_free_
char *line
= NULL
;
70 p
= procfs_file_alloca(pid
, "stat");
72 r
= read_one_line_file(p
, &line
);
78 p
= strrchr(line
, ')');
84 if (sscanf(p
, " %c", &state
) != 1)
87 return (unsigned char) state
;
90 int get_process_comm(pid_t pid
, char **name
) {
97 p
= procfs_file_alloca(pid
, "comm");
99 r
= read_one_line_file(p
, name
);
106 int get_process_cmdline(pid_t pid
, size_t max_length
, bool comm_fallback
, char **line
) {
107 _cleanup_fclose_
FILE *f
= NULL
;
109 char *k
, *ans
= NULL
;
116 /* Retrieves a process' command line. Replaces unprintable characters while doing so by whitespace (coalescing
117 * multiple sequential ones into one). If max_length is != 0 will return a string of the specified size at most
118 * (the trailing NUL byte does count towards the length here!), abbreviated with a "..." ellipsis. If
119 * comm_fallback is true and the process has no command line set (the case for kernel threads), or has a
120 * command line that resolves to the empty string will return the "comm" name of the process instead.
122 * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and
123 * comm_fallback is false). Returns 0 and sets *line otherwise. */
125 p
= procfs_file_alloca(pid
, "cmdline");
134 (void) __fsetlocking(f
, FSETLOCKING_BYCALLER
);
136 if (max_length
== 1) {
138 /* If there's only room for one byte, return the empty string */
146 } else if (max_length
== 0) {
147 size_t len
= 0, allocated
= 0;
149 while ((c
= getc(f
)) != EOF
) {
151 if (!GREEDY_REALLOC(ans
, allocated
, len
+3)) {
173 bool dotdotdot
= false;
176 ans
= new(char, max_length
);
182 while ((c
= getc(f
)) != EOF
) {
209 if (max_length
<= 4) {
213 k
= ans
+ max_length
- 4;
216 /* Eat up final spaces */
217 while (k
> ans
&& isspace(k
[-1])) {
223 strncpy(k
, "...", left
-1);
229 /* Kernel threads have no argv[] */
231 _cleanup_free_
char *t
= NULL
;
239 h
= get_process_comm(pid
, &t
);
244 ans
= strjoin("[", t
, "]");
250 if (l
+ 3 <= max_length
)
251 ans
= strjoin("[", t
, "]");
252 else if (max_length
<= 6) {
254 ans
= new(char, max_length
);
258 memcpy(ans
, "[...]", max_length
-1);
259 ans
[max_length
-1] = 0;
263 t
[max_length
- 6] = 0;
265 /* Chop off final spaces */
267 while (e
> t
&& isspace(e
[-1]))
271 ans
= strjoin("[", t
, "...]");
282 int rename_process(const char name
[]) {
283 static size_t mm_size
= 0;
284 static char *mm
= NULL
;
285 bool truncated
= false;
288 /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's
289 * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in
290 * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded;
291 * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be
294 * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */
297 return -EINVAL
; /* let's not confuse users unnecessarily with an empty name */
301 /* First step, change the comm field. */
302 (void) prctl(PR_SET_NAME
, name
);
303 if (l
> 15) /* Linux process names can be 15 chars at max */
306 /* Second step, change glibc's ID of the process name. */
307 if (program_invocation_name
) {
310 k
= strlen(program_invocation_name
);
311 strncpy(program_invocation_name
, name
, k
);
316 /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but
317 * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at
318 * the end. This is the best option for changing /proc/self/cmdline. */
320 /* Let's not bother with this if we don't have euid == 0. Strictly speaking we should check for the
321 * CAP_SYS_RESOURCE capability which is independent of the euid. In our own code the capability generally is
322 * present only for euid == 0, hence let's use this as quick bypass check, to avoid calling mmap() if
323 * PR_SET_MM_ARG_{START,END} fails with EPERM later on anyway. After all geteuid() is dead cheap to call, but
326 log_debug("Skipping PR_SET_MM, as we don't have privileges.");
327 else if (mm_size
< l
+1) {
331 nn_size
= PAGE_ALIGN(l
+1);
332 nn
= mmap(NULL
, nn_size
, PROT_READ
|PROT_WRITE
, MAP_PRIVATE
|MAP_ANONYMOUS
, -1, 0);
333 if (nn
== MAP_FAILED
) {
334 log_debug_errno(errno
, "mmap() failed: %m");
338 strncpy(nn
, name
, nn_size
);
340 /* Now, let's tell the kernel about this new memory */
341 if (prctl(PR_SET_MM
, PR_SET_MM_ARG_START
, (unsigned long) nn
, 0, 0) < 0) {
342 log_debug_errno(errno
, "PR_SET_MM_ARG_START failed, proceeding without: %m");
343 (void) munmap(nn
, nn_size
);
347 /* And update the end pointer to the new end, too. If this fails, we don't really know what to do, it's
348 * pretty unlikely that we can rollback, hence we'll just accept the failure, and continue. */
349 if (prctl(PR_SET_MM
, PR_SET_MM_ARG_END
, (unsigned long) nn
+ l
+ 1, 0, 0) < 0)
350 log_debug_errno(errno
, "PR_SET_MM_ARG_END failed, proceeding without: %m");
353 (void) munmap(mm
, mm_size
);
358 strncpy(mm
, name
, mm_size
);
360 /* Update the end pointer, continuing regardless of any failure. */
361 if (prctl(PR_SET_MM
, PR_SET_MM_ARG_END
, (unsigned long) mm
+ l
+ 1, 0, 0) < 0)
362 log_debug_errno(errno
, "PR_SET_MM_ARG_END failed, proceeding without: %m");
366 /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if
367 * it still looks here */
369 if (saved_argc
> 0) {
375 k
= strlen(saved_argv
[0]);
376 strncpy(saved_argv
[0], name
, k
);
381 for (i
= 1; i
< saved_argc
; i
++) {
385 memzero(saved_argv
[i
], strlen(saved_argv
[i
]));
392 int is_kernel_thread(pid_t pid
) {
399 if (IN_SET(pid
, 0, 1) || pid
== getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */
404 p
= procfs_file_alloca(pid
, "cmdline");
412 (void) __fsetlocking(f
, FSETLOCKING_BYCALLER
);
414 count
= fread(&c
, 1, 1, f
);
418 /* Kernel threads have an empty cmdline */
421 return eof
? 1 : -errno
;
426 int get_process_capeff(pid_t pid
, char **capeff
) {
433 p
= procfs_file_alloca(pid
, "status");
435 r
= get_proc_field(p
, "CapEff", WHITESPACE
, capeff
);
442 static int get_process_link_contents(const char *proc_file
, char **name
) {
448 r
= readlink_malloc(proc_file
, name
);
457 int get_process_exe(pid_t pid
, char **name
) {
464 p
= procfs_file_alloca(pid
, "exe");
465 r
= get_process_link_contents(p
, name
);
469 d
= endswith(*name
, " (deleted)");
476 static int get_process_id(pid_t pid
, const char *field
, uid_t
*uid
) {
477 _cleanup_fclose_
FILE *f
= NULL
;
487 p
= procfs_file_alloca(pid
, "status");
495 (void) __fsetlocking(f
, FSETLOCKING_BYCALLER
);
497 FOREACH_LINE(line
, f
, return -errno
) {
502 if (startswith(l
, field
)) {
504 l
+= strspn(l
, WHITESPACE
);
506 l
[strcspn(l
, WHITESPACE
)] = 0;
508 return parse_uid(l
, uid
);
515 int get_process_uid(pid_t pid
, uid_t
*uid
) {
517 if (pid
== 0 || pid
== getpid_cached()) {
522 return get_process_id(pid
, "Uid:", uid
);
525 int get_process_gid(pid_t pid
, gid_t
*gid
) {
527 if (pid
== 0 || pid
== getpid_cached()) {
532 assert_cc(sizeof(uid_t
) == sizeof(gid_t
));
533 return get_process_id(pid
, "Gid:", gid
);
536 int get_process_cwd(pid_t pid
, char **cwd
) {
541 p
= procfs_file_alloca(pid
, "cwd");
543 return get_process_link_contents(p
, cwd
);
546 int get_process_root(pid_t pid
, char **root
) {
551 p
= procfs_file_alloca(pid
, "root");
553 return get_process_link_contents(p
, root
);
556 int get_process_environ(pid_t pid
, char **env
) {
557 _cleanup_fclose_
FILE *f
= NULL
;
558 _cleanup_free_
char *outcome
= NULL
;
561 size_t allocated
= 0, sz
= 0;
566 p
= procfs_file_alloca(pid
, "environ");
575 (void) __fsetlocking(f
, FSETLOCKING_BYCALLER
);
577 while ((c
= fgetc(f
)) != EOF
) {
578 if (!GREEDY_REALLOC(outcome
, allocated
, sz
+ 5))
582 outcome
[sz
++] = '\n';
584 sz
+= cescape_char(c
, outcome
+ sz
);
588 outcome
= strdup("");
600 int get_process_ppid(pid_t pid
, pid_t
*_ppid
) {
602 _cleanup_free_
char *line
= NULL
;
609 if (pid
== 0 || pid
== getpid_cached()) {
614 p
= procfs_file_alloca(pid
, "stat");
615 r
= read_one_line_file(p
, &line
);
621 /* Let's skip the pid and comm fields. The latter is enclosed
622 * in () but does not escape any () in its value, so let's
623 * skip over it manually */
625 p
= strrchr(line
, ')');
637 if ((long unsigned) (pid_t
) ppid
!= ppid
)
640 *_ppid
= (pid_t
) ppid
;
645 int wait_for_terminate(pid_t pid
, siginfo_t
*status
) {
656 if (waitid(P_PID
, pid
, status
, WEXITED
) < 0) {
661 return negative_errno();
670 * < 0 : wait_for_terminate() failed to get the state of the
671 * process, the process was terminated by a signal, or
672 * failed for an unknown reason.
673 * >=0 : The process terminated normally, and its exit code is
676 * That is, success is indicated by a return value of zero, and an
677 * error is indicated by a non-zero value.
679 * A warning is emitted if the process terminates abnormally,
680 * and also if it returns non-zero unless check_exit_code is true.
682 int wait_for_terminate_and_warn(const char *name
, pid_t pid
, bool check_exit_code
) {
689 r
= wait_for_terminate(pid
, &status
);
691 return log_warning_errno(r
, "Failed to wait for %s: %m", name
);
693 if (status
.si_code
== CLD_EXITED
) {
694 if (status
.si_status
!= 0)
695 log_full(check_exit_code
? LOG_WARNING
: LOG_DEBUG
,
696 "%s failed with error code %i.", name
, status
.si_status
);
698 log_debug("%s succeeded.", name
);
700 return status
.si_status
;
701 } else if (IN_SET(status
.si_code
, CLD_KILLED
, CLD_DUMPED
)) {
703 log_warning("%s terminated by signal %s.", name
, signal_to_string(status
.si_status
));
707 log_warning("%s failed due to unknown reason.", name
);
713 * < 0 : wait_for_terminate_with_timeout() failed to get the state of the
714 * process, the process timed out, the process was terminated by a
715 * signal, or failed for an unknown reason.
716 * >=0 : The process terminated normally with no failures.
718 * Success is indicated by a return value of zero, a timeout is indicated
719 * by ETIMEDOUT, and all other child failure states are indicated by error
720 * is indicated by a non-zero value.
722 int wait_for_terminate_with_timeout(pid_t pid
, usec_t timeout
) {
727 assert_se(sigemptyset(&mask
) == 0);
728 assert_se(sigaddset(&mask
, SIGCHLD
) == 0);
730 /* Drop into a sigtimewait-based timeout. Waiting for the
732 until
= now(CLOCK_MONOTONIC
) + timeout
;
735 siginfo_t status
= {};
738 n
= now(CLOCK_MONOTONIC
);
742 r
= sigtimedwait(&mask
, NULL
, timespec_store(&ts
, until
- n
)) < 0 ? -errno
: 0;
743 /* Assuming we woke due to the child exiting. */
744 if (waitid(P_PID
, pid
, &status
, WEXITED
|WNOHANG
) == 0) {
745 if (status
.si_pid
== pid
) {
746 /* This is the correct child.*/
747 if (status
.si_code
== CLD_EXITED
)
748 return (status
.si_status
== 0) ? 0 : -EPROTO
;
753 /* Not the child, check for errors and proceed appropriately */
757 /* Timed out, child is likely hung. */
760 /* Received a different signal and should retry */
763 /* Return any unexpected errors */
772 void sigkill_wait(pid_t pid
) {
775 if (kill(pid
, SIGKILL
) > 0)
776 (void) wait_for_terminate(pid
, NULL
);
779 void sigkill_waitp(pid_t
*pid
) {
788 int kill_and_sigcont(pid_t pid
, int sig
) {
791 r
= kill(pid
, sig
) < 0 ? -errno
: 0;
793 /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't
794 * affected by a process being suspended anyway. */
795 if (r
>= 0 && !IN_SET(sig
, SIGCONT
, SIGKILL
))
796 (void) kill(pid
, SIGCONT
);
801 int getenv_for_pid(pid_t pid
, const char *field
, char **_value
) {
802 _cleanup_fclose_
FILE *f
= NULL
;
813 path
= procfs_file_alloca(pid
, "environ");
815 f
= fopen(path
, "re");
822 (void) __fsetlocking(f
, FSETLOCKING_BYCALLER
);
831 for (i
= 0; i
< sizeof(line
)-1; i
++) {
835 if (_unlikely_(c
== EOF
)) {
845 if (strneq(line
, field
, l
) && line
[l
] == '=') {
846 value
= strdup(line
+ l
+ 1);
860 bool pid_is_unwaited(pid_t pid
) {
861 /* Checks whether a PID is still valid at all, including a zombie */
866 if (pid
<= 1) /* If we or PID 1 would be dead and have been waited for, this code would not be running */
869 if (pid
== getpid_cached())
872 if (kill(pid
, 0) >= 0)
875 return errno
!= ESRCH
;
878 bool pid_is_alive(pid_t pid
) {
881 /* Checks whether a PID is still valid and not a zombie */
886 if (pid
<= 1) /* If we or PID 1 would be a zombie, this code would not be running */
889 if (pid
== getpid_cached())
892 r
= get_process_state(pid
);
893 if (IN_SET(r
, -ESRCH
, 'Z'))
899 int pid_from_same_root_fs(pid_t pid
) {
905 if (pid
== 0 || pid
== getpid_cached())
908 root
= procfs_file_alloca(pid
, "root");
910 return files_same(root
, "/proc/1/root", 0);
913 bool is_main_thread(void) {
914 static thread_local
int cached
= 0;
916 if (_unlikely_(cached
== 0))
917 cached
= getpid_cached() == gettid() ? 1 : -1;
922 noreturn
void freeze(void) {
926 /* Make sure nobody waits for us on a socket anymore */
927 close_all_fds(NULL
, 0);
935 bool oom_score_adjust_is_valid(int oa
) {
936 return oa
>= OOM_SCORE_ADJ_MIN
&& oa
<= OOM_SCORE_ADJ_MAX
;
939 unsigned long personality_from_string(const char *p
) {
943 return PERSONALITY_INVALID
;
945 /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just
946 * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for
947 * the same register size. */
949 architecture
= architecture_from_string(p
);
950 if (architecture
< 0)
951 return PERSONALITY_INVALID
;
953 if (architecture
== native_architecture())
955 #ifdef SECONDARY_ARCHITECTURE
956 if (architecture
== SECONDARY_ARCHITECTURE
)
960 return PERSONALITY_INVALID
;
963 const char* personality_to_string(unsigned long p
) {
964 int architecture
= _ARCHITECTURE_INVALID
;
967 architecture
= native_architecture();
968 #ifdef SECONDARY_ARCHITECTURE
969 else if (p
== PER_LINUX32
)
970 architecture
= SECONDARY_ARCHITECTURE
;
973 if (architecture
< 0)
976 return architecture_to_string(architecture
);
979 int safe_personality(unsigned long p
) {
982 /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno,
983 * and in others as negative return value containing an errno-like value. Let's work around this: this is a
984 * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and
985 * the return value indicating the same issue, so that we are definitely on the safe side.
987 * See https://github.com/systemd/systemd/issues/6737 */
990 ret
= personality(p
);
1001 int opinionated_personality(unsigned long *ret
) {
1004 /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
1005 * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
1006 * two most relevant personalities: PER_LINUX and PER_LINUX32. */
1008 current
= safe_personality(PERSONALITY_INVALID
);
1012 if (((unsigned long) current
& 0xffff) == PER_LINUX32
)
1020 void valgrind_summary_hack(void) {
1021 #if HAVE_VALGRIND_VALGRIND_H
1022 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND
) {
1024 pid
= raw_clone(SIGCHLD
);
1026 log_emergency_errno(errno
, "Failed to fork off valgrind helper: %m");
1030 log_info("Spawned valgrind helper as PID "PID_FMT
".", pid
);
1031 (void) wait_for_terminate(pid
, NULL
);
1037 int pid_compare_func(const void *a
, const void *b
) {
1038 const pid_t
*p
= a
, *q
= b
;
1040 /* Suitable for usage in qsort() */
1049 int ioprio_parse_priority(const char *s
, int *ret
) {
1055 r
= safe_atoi(s
, &i
);
1059 if (!ioprio_priority_is_valid(i
))
1066 /* The cached PID, possible values:
1068 * == UNSET [0] → cache not initialized yet
1069 * == BUSY [-1] → some thread is initializing it at the moment
1070 * any other → the cached PID
1073 #define CACHED_PID_UNSET ((pid_t) 0)
1074 #define CACHED_PID_BUSY ((pid_t) -1)
1076 static pid_t cached_pid
= CACHED_PID_UNSET
;
1078 static void reset_cached_pid(void) {
1079 /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */
1080 cached_pid
= CACHED_PID_UNSET
;
1083 /* We use glibc __register_atfork() + __dso_handle directly here, as they are not included in the glibc
1084 * headers. __register_atfork() is mostly equivalent to pthread_atfork(), but doesn't require us to link against
1085 * libpthread, as it is part of glibc anyway. */
1086 extern int __register_atfork(void (*prepare
) (void), void (*parent
) (void), void (*child
) (void), void * __dso_handle
);
1087 extern void* __dso_handle
__attribute__ ((__weak__
));
1089 pid_t
getpid_cached(void) {
1090 pid_t current_value
;
1092 /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a
1093 * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally
1094 * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when
1095 * objects were used across fork()s. With this caching the old behaviour is somewhat restored.
1097 * https://bugzilla.redhat.com/show_bug.cgi?id=1443976
1098 * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e
1101 current_value
= __sync_val_compare_and_swap(&cached_pid
, CACHED_PID_UNSET
, CACHED_PID_BUSY
);
1103 switch (current_value
) {
1105 case CACHED_PID_UNSET
: { /* Not initialized yet, then do so now */
1110 if (__register_atfork(NULL
, NULL
, reset_cached_pid
, __dso_handle
) != 0) {
1111 /* OOM? Let's try again later */
1112 cached_pid
= CACHED_PID_UNSET
;
1116 cached_pid
= new_pid
;
1120 case CACHED_PID_BUSY
: /* Somebody else is currently initializing */
1123 default: /* Properly initialized */
1124 return current_value
;
1128 int must_be_root(void) {
1133 log_error("Need to be root.");
1137 static const char *const ioprio_class_table
[] = {
1138 [IOPRIO_CLASS_NONE
] = "none",
1139 [IOPRIO_CLASS_RT
] = "realtime",
1140 [IOPRIO_CLASS_BE
] = "best-effort",
1141 [IOPRIO_CLASS_IDLE
] = "idle"
1144 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ioprio_class
, int, INT_MAX
);
1146 static const char *const sigchld_code_table
[] = {
1147 [CLD_EXITED
] = "exited",
1148 [CLD_KILLED
] = "killed",
1149 [CLD_DUMPED
] = "dumped",
1150 [CLD_TRAPPED
] = "trapped",
1151 [CLD_STOPPED
] = "stopped",
1152 [CLD_CONTINUED
] = "continued",
1155 DEFINE_STRING_TABLE_LOOKUP(sigchld_code
, int);
1157 static const char* const sched_policy_table
[] = {
1158 [SCHED_OTHER
] = "other",
1159 [SCHED_BATCH
] = "batch",
1160 [SCHED_IDLE
] = "idle",
1161 [SCHED_FIFO
] = "fifo",
1165 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy
, int, INT_MAX
);