]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/raw_syscalls.c
raw_syscalls: add lxc_raw_clone{_cb}()
[mirror_lxc.git] / src / lxc / raw_syscalls.c
1 #ifndef _GNU_SOURCE
2 #define _GNU_SOURCE 1
3 #endif
4 #include <errno.h>
5 #include <sched.h>
6 #include <signal.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <sys/syscall.h>
10 #include <unistd.h>
11
12 #include "config.h"
13 #include "macro.h"
14 #include "raw_syscalls.h"
15
16 int lxc_raw_execveat(int dirfd, const char *pathname, char *const argv[],
17 char *const envp[], int flags)
18 {
19 #ifdef __NR_execveat
20 syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
21 #else
22 errno = ENOSYS;
23 return -1;
24 #endif
25 }
26
27 /*
28 * This is based on raw_clone in systemd but adapted to our needs. This uses
29 * copy on write semantics and doesn't pass a stack. CLONE_VM is tricky and
30 * doesn't really matter to us so disallow it.
31 *
32 * The nice thing about this is that we get fork() behavior. That is
33 * lxc_raw_clone() returns 0 in the child and the child pid in the parent.
34 */
35 pid_t lxc_raw_clone(unsigned long flags)
36 {
37
38 /*
39 * These flags don't interest at all so we don't jump through any hoopes
40 * of retrieving them and passing them to the kernel.
41 */
42 errno = EINVAL;
43 if ((flags & (CLONE_VM | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
44 CLONE_CHILD_CLEARTID | CLONE_SETTLS)))
45 return -EINVAL;
46
47 #if defined(__s390x__) || defined(__s390__) || defined(__CRIS__)
48 /* On s390/s390x and cris the order of the first and second arguments
49 * of the system call is reversed.
50 */
51 return (int)syscall(__NR_clone, NULL, flags | SIGCHLD);
52 #elif defined(__sparc__) && defined(__arch64__)
53 {
54 /*
55 * sparc64 always returns the other process id in %o0, and a
56 * boolean flag whether this is the child or the parent in %o1.
57 * Inline assembly is needed to get the flag returned in %o1.
58 */
59 int in_child;
60 int child_pid;
61 asm volatile("mov %2, %%g1\n\t"
62 "mov %3, %%o0\n\t"
63 "mov 0 , %%o1\n\t"
64 "t 0x6d\n\t"
65 "mov %%o1, %0\n\t"
66 "mov %%o0, %1"
67 : "=r"(in_child), "=r"(child_pid)
68 : "i"(__NR_clone), "r"(flags | SIGCHLD)
69 : "%o1", "%o0", "%g1");
70
71 if (in_child)
72 return 0;
73 else
74 return child_pid;
75 }
76 #elif defined(__ia64__)
77 /* On ia64 the stack and stack size are passed as separate arguments. */
78 return (int)syscall(__NR_clone, flags | SIGCHLD, NULL, prctl_arg(0));
79 #else
80 return (int)syscall(__NR_clone, flags | SIGCHLD, NULL);
81 #endif
82 }
83
84 pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, unsigned long flags)
85 {
86 pid_t pid;
87
88 pid = lxc_raw_clone(flags);
89 if (pid < 0)
90 return -1;
91
92 /*
93 * exit() is not thread-safe and might mess with the parent's signal
94 * handlers and other stuff when exec() fails.
95 */
96 if (pid == 0)
97 _exit(fn(args));
98
99 return pid;
100 }