]>
git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/namespace.c
2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2009
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
31 #include <sys/param.h>
33 #include <sys/syscall.h>
34 #include <sys/types.h>
37 #include "namespace.h"
40 lxc_log_define(namespace, lxc
);
47 static int do_clone(void *arg
)
49 struct clone_arg
*clone_arg
= arg
;
50 return clone_arg
->fn(clone_arg
->arg
);
53 pid_t
lxc_clone(int (*fn
)(void *), void *arg
, int flags
)
55 struct clone_arg clone_arg
= {
60 size_t stack_size
= lxc_getpagesize();
61 void *stack
= alloca(stack_size
);
65 ret
= __clone2(do_clone
, stack
, stack_size
, flags
| SIGCHLD
, &clone_arg
);
67 ret
= clone(do_clone
, stack
+ stack_size
, flags
| SIGCHLD
, &clone_arg
);
70 SYSERROR("Failed to clone (%#x)", flags
);
76 * This is based on raw_clone in systemd but adapted to our needs. This uses
77 * copy on write semantics and doesn't pass a stack. CLONE_VM is tricky and
78 * doesn't really matter to us so disallow it.
80 * The nice thing about this is that we get fork() behavior. That is
81 * lxc_raw_clone() returns 0 in the child and the child pid in the parent.
83 pid_t
lxc_raw_clone(unsigned long flags
)
86 /* These flags don't interest at all so we don't jump through any hoopes
87 * of retrieving them and passing them to the kernel.
90 if ((flags
& (CLONE_VM
| CLONE_PARENT_SETTID
| CLONE_CHILD_SETTID
|
91 CLONE_CHILD_CLEARTID
| CLONE_SETTLS
)))
94 #if defined(__s390x__) || defined(__s390__) || defined(__CRIS__)
95 /* On s390/s390x and cris the order of the first and second arguments
96 * of the system call is reversed.
98 return (int)syscall(__NR_clone
, NULL
, flags
| SIGCHLD
);
99 #elif defined(__sparc__) && defined(__arch64__)
102 * sparc64 always returns the other process id in %o0, and
103 * a boolean flag whether this is the child or the parent in
104 * %o1. Inline assembly is needed to get the flag returned
109 asm volatile("mov %2, %%g1\n\t"
115 : "=r"(in_child
), "=r"(child_pid
)
116 : "i"(__NR_clone
), "r"(flags
| SIGCHLD
)
117 : "%o1", "%o0", "%g1");
124 #elif defined(__ia64__)
125 /* On ia64 the stack and stack size are passed as separate arguments. */
126 return (int)syscall(__NR_clone
, flags
| SIGCHLD
, NULL
, 0);
128 return (int)syscall(__NR_clone
, flags
| SIGCHLD
, NULL
);
132 pid_t
lxc_raw_clone_cb(int (*fn
)(void *), void *args
, unsigned long flags
)
136 pid
= lxc_raw_clone(flags
);
140 /* exit() is not thread-safe and might mess with the parent's signal
141 * handlers and other stuff when exec() fails.
149 /* Leave the user namespace at the first position in the array of structs so
150 * that we always attach to it first when iterating over the struct and using
151 * setns() to switch namespaces. This especially affects lxc_attach(): Suppose
152 * you cloned a new user namespace and mount namespace as an unprivileged user
153 * on the host and want to setns() to the mount namespace. This requires you to
154 * attach to the user namespace first otherwise the kernel will fail this check:
156 * if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
157 * !ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
158 * !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
163 * linux/fs/namespace.c:mntns_install().
165 const struct ns_info ns_info
[LXC_NS_MAX
] = {
166 [LXC_NS_USER
] = { "user", CLONE_NEWUSER
, "CLONE_NEWUSER", "LXC_USER_NS" },
167 [LXC_NS_MNT
] = { "mnt", CLONE_NEWNS
, "CLONE_NEWNS", "LXC_MNT_NS" },
168 [LXC_NS_PID
] = { "pid", CLONE_NEWPID
, "CLONE_NEWPID", "LXC_PID_NS" },
169 [LXC_NS_UTS
] = { "uts", CLONE_NEWUTS
, "CLONE_NEWUTS", "LXC_UTS_NS" },
170 [LXC_NS_IPC
] = { "ipc", CLONE_NEWIPC
, "CLONE_NEWIPC", "LXC_IPC_NS" },
171 [LXC_NS_NET
] = { "net", CLONE_NEWNET
, "CLONE_NEWNET", "LXC_NET_NS" },
172 [LXC_NS_CGROUP
] = { "cgroup", CLONE_NEWCGROUP
, "CLONE_NEWCGROUP", "LXC_CGROUP_NS" }
175 int lxc_namespace_2_cloneflag(const char *namespace)
179 for (i
= 0; i
< LXC_NS_MAX
; i
++)
180 if (!strcasecmp(ns_info
[i
].proc_name
, namespace))
181 return ns_info
[i
].clone_flag
;
183 ERROR("Invalid namespace name \"%s\"", namespace);
187 int lxc_namespace_2_ns_idx(const char *namespace)
191 for (i
= 0; i
< LXC_NS_MAX
; i
++)
192 if (!strcmp(ns_info
[i
].proc_name
, namespace))
195 ERROR("Invalid namespace name \"%s\"", namespace);
199 extern int lxc_namespace_2_std_identifiers(char *namespaces
)
204 /* The identifiers for namespaces used with lxc-attach and lxc-unshare
205 * as given on the manpage do not align with the standard identifiers.
206 * This affects network, mount, and uts namespaces. The standard identifiers
207 * are: "mnt", "uts", and "net" whereas lxc-attach and lxc-unshare uses
208 * "MOUNT", "UTSNAME", and "NETWORK". So let's use some cheap memmove()s
209 * to replace them by their standard identifiers.
210 * Let's illustrate this with an example:
217 * dest: del + 1 == OUNT|PID
218 * src: del + 3 == NT|PID
223 while ((del
= strstr(namespaces
, "MOUNT")))
224 memmove(del
+ 1, del
+ 3, strlen(del
) - 2);
226 for (it
= (char *[]){"NETWORK", "UTSNAME", NULL
}; it
&& *it
; it
++)
227 while ((del
= strstr(namespaces
, *it
)))
228 memmove(del
+ 3, del
+ 7, strlen(del
) - 6);
233 int lxc_fill_namespace_flags(char *flaglist
, int *flags
)
239 ERROR("At least one namespace is needed.");
243 lxc_iterate_parts(token
, flaglist
, "|") {
244 aflag
= lxc_namespace_2_cloneflag(token
);