]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - kernel/user_namespace.c
KVM: x86/speculation: Disable Fill buffer clear within guests
[mirror_ubuntu-jammy-kernel.git] / kernel / user_namespace.c
CommitLineData
b886d83c 1// SPDX-License-Identifier: GPL-2.0-only
acce292c 2
9984de1a 3#include <linux/export.h>
acce292c 4#include <linux/nsproxy.h>
1aeb272c 5#include <linux/slab.h>
3f07c014 6#include <linux/sched/signal.h>
acce292c 7#include <linux/user_namespace.h>
0bb80f24 8#include <linux/proc_ns.h>
5c1469de 9#include <linux/highuid.h>
18b6e041 10#include <linux/cred.h>
973c5914 11#include <linux/securebits.h>
22d917d8
EB
12#include <linux/keyctl.h>
13#include <linux/key-type.h>
14#include <keys/user-type.h>
15#include <linux/seq_file.h>
16#include <linux/fs.h>
17#include <linux/uaccess.h>
18#include <linux/ctype.h>
f76d207a 19#include <linux/projid.h>
e66eded8 20#include <linux/fs_struct.h>
6397fac4
CB
21#include <linux/bsearch.h>
22#include <linux/sort.h>
acce292c 23
34227646
SH
24/*
25 * sysctl determining whether unprivileged users may unshare a new
26 * userns. Allowed by default
27 */
28int unprivileged_userns_clone = 1;
29
6164281a 30static struct kmem_cache *user_ns_cachep __read_mostly;
f0d62aec 31static DEFINE_MUTEX(userns_state_mutex);
6164281a 32
6708075f
EB
33static bool new_idmap_permitted(const struct file *file,
34 struct user_namespace *ns, int cap_setid,
22d917d8 35 struct uid_gid_map *map);
b032132c 36static void free_user_ns(struct work_struct *work);
22d917d8 37
25f9c081
EB
38static struct ucounts *inc_user_namespaces(struct user_namespace *ns, kuid_t uid)
39{
40 return inc_ucount(ns, uid, UCOUNT_USER_NAMESPACES);
41}
42
43static void dec_user_namespaces(struct ucounts *ucounts)
44{
45 return dec_ucount(ucounts, UCOUNT_USER_NAMESPACES);
46}
47
cde1975b
EB
48static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
49{
50 /* Start with the same capabilities as init but useless for doing
51 * anything as the capabilities are bound to the new user namespace.
52 */
53 cred->securebits = SECUREBITS_DEFAULT;
54 cred->cap_inheritable = CAP_EMPTY_SET;
55 cred->cap_permitted = CAP_FULL_SET;
56 cred->cap_effective = CAP_FULL_SET;
58319057 57 cred->cap_ambient = CAP_EMPTY_SET;
cde1975b
EB
58 cred->cap_bset = CAP_FULL_SET;
59#ifdef CONFIG_KEYS
60 key_put(cred->request_key_auth);
61 cred->request_key_auth = NULL;
62#endif
63 /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
64 cred->user_ns = user_ns;
65}
66
86b5cafb
EB
67static unsigned long enforced_nproc_rlimit(void)
68{
69 unsigned long limit = RLIM_INFINITY;
70
71 /* Is RLIMIT_NPROC currently enforced? */
72 if (!uid_eq(current_uid(), GLOBAL_ROOT_UID) ||
73 (current_user_ns() != &init_user_ns))
74 limit = rlimit(RLIMIT_NPROC);
75
76 return limit;
77}
78
77ec739d 79/*
18b6e041
SH
80 * Create a new user namespace, deriving the creator from the user in the
81 * passed credentials, and replacing that user with the new root user for the
82 * new namespace.
83 *
84 * This is called by copy_creds(), which will finish setting the target task's
85 * credentials.
77ec739d 86 */
18b6e041 87int create_user_ns(struct cred *new)
77ec739d 88{
0093ccb6 89 struct user_namespace *ns, *parent_ns = new->user_ns;
078de5f7
EB
90 kuid_t owner = new->euid;
91 kgid_t group = new->egid;
f6b2db1a 92 struct ucounts *ucounts;
25f9c081 93 int ret, i;
783291e6 94
df75e774 95 ret = -ENOSPC;
8742f229 96 if (parent_ns->level > 32)
b376c3e1
EB
97 goto fail;
98
f6b2db1a
EB
99 ucounts = inc_user_namespaces(parent_ns, owner);
100 if (!ucounts)
b376c3e1 101 goto fail;
8742f229 102
3151527e
EB
103 /*
104 * Verify that we can not violate the policy of which files
105 * may be accessed that is specified by the root directory,
a12f4f85 106 * by verifying that the root directory is at the root of the
3151527e
EB
107 * mount namespace which allows all files to be accessed.
108 */
b376c3e1 109 ret = -EPERM;
3151527e 110 if (current_chrooted())
b376c3e1 111 goto fail_dec;
3151527e 112
783291e6
EB
113 /* The creator needs a mapping in the parent user namespace
114 * or else we won't be able to reasonably tell userspace who
115 * created a user_namespace.
116 */
b376c3e1 117 ret = -EPERM;
783291e6
EB
118 if (!kuid_has_mapping(parent_ns, owner) ||
119 !kgid_has_mapping(parent_ns, group))
b376c3e1 120 goto fail_dec;
77ec739d 121
b376c3e1 122 ret = -ENOMEM;
22d917d8 123 ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
77ec739d 124 if (!ns)
b376c3e1 125 goto fail_dec;
77ec739d 126
db2e718a 127 ns->parent_could_setfcap = cap_raised(new->cap_effective, CAP_SETFCAP);
6344c433 128 ret = ns_alloc_inum(&ns->ns);
b376c3e1
EB
129 if (ret)
130 goto fail_free;
33c42940 131 ns->ns.ops = &userns_operations;
98f842e6 132
265cbd62 133 refcount_set(&ns->ns.count, 1);
cde1975b 134 /* Leave the new->user_ns reference with the new user namespace. */
aeb3ae9d 135 ns->parent = parent_ns;
8742f229 136 ns->level = parent_ns->level + 1;
783291e6
EB
137 ns->owner = owner;
138 ns->group = group;
b032132c 139 INIT_WORK(&ns->work, free_user_ns);
21d1c5e3 140 for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++) {
25f9c081
EB
141 ns->ucount_max[i] = INT_MAX;
142 }
86b5cafb 143 set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_NPROC, enforced_nproc_rlimit());
c1ada3dc
AG
144 set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MSGQUEUE, rlimit(RLIMIT_MSGQUEUE));
145 set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_SIGPENDING, rlimit(RLIMIT_SIGPENDING));
146 set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MEMLOCK, rlimit(RLIMIT_MEMLOCK));
f6b2db1a 147 ns->ucounts = ucounts;
22d917d8 148
9cc46516
EB
149 /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
150 mutex_lock(&userns_state_mutex);
151 ns->flags = parent_ns->flags;
152 mutex_unlock(&userns_state_mutex);
153
b206f281
DH
154#ifdef CONFIG_KEYS
155 INIT_LIST_HEAD(&ns->keyring_name_list);
0f44e4d9 156 init_rwsem(&ns->keyring_sem);
f36f8c75 157#endif
dbec2846
EB
158 ret = -ENOMEM;
159 if (!setup_userns_sysctls(ns))
160 goto fail_keyring;
161
162 set_cred_user_ns(new, ns);
18b6e041 163 return 0;
dbec2846
EB
164fail_keyring:
165#ifdef CONFIG_PERSISTENT_KEYRINGS
166 key_put(ns->persistent_keyring_register);
167#endif
168 ns_free_inum(&ns->ns);
b376c3e1 169fail_free:
dbec2846 170 kmem_cache_free(user_ns_cachep, ns);
b376c3e1 171fail_dec:
f6b2db1a 172 dec_user_namespaces(ucounts);
b376c3e1 173fail:
dbec2846 174 return ret;
acce292c
CLG
175}
176
b2e0d987
EB
177int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
178{
179 struct cred *cred;
6160968c 180 int err = -ENOMEM;
b2e0d987
EB
181
182 if (!(unshare_flags & CLONE_NEWUSER))
183 return 0;
184
185 cred = prepare_creds();
6160968c
ON
186 if (cred) {
187 err = create_user_ns(cred);
188 if (err)
189 put_cred(cred);
190 else
191 *new_cred = cred;
192 }
b2e0d987 193
6160968c 194 return err;
b2e0d987
EB
195}
196
b032132c 197static void free_user_ns(struct work_struct *work)
acce292c 198{
b032132c
EB
199 struct user_namespace *parent, *ns =
200 container_of(work, struct user_namespace, work);
783291e6 201
c61a2810 202 do {
f6b2db1a 203 struct ucounts *ucounts = ns->ucounts;
c61a2810 204 parent = ns->parent;
6397fac4
CB
205 if (ns->gid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
206 kfree(ns->gid_map.forward);
207 kfree(ns->gid_map.reverse);
208 }
209 if (ns->uid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
210 kfree(ns->uid_map.forward);
211 kfree(ns->uid_map.reverse);
212 }
213 if (ns->projid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
214 kfree(ns->projid_map.forward);
215 kfree(ns->projid_map.reverse);
216 }
dbec2846 217 retire_userns_sysctls(ns);
b206f281 218 key_free_user_ns(ns);
6344c433 219 ns_free_inum(&ns->ns);
c61a2810 220 kmem_cache_free(user_ns_cachep, ns);
f6b2db1a 221 dec_user_namespaces(ucounts);
c61a2810 222 ns = parent;
265cbd62 223 } while (refcount_dec_and_test(&parent->ns.count));
acce292c 224}
b032132c
EB
225
226void __put_user_ns(struct user_namespace *ns)
227{
228 schedule_work(&ns->work);
229}
230EXPORT_SYMBOL(__put_user_ns);
5c1469de 231
6397fac4
CB
232/**
233 * idmap_key struct holds the information necessary to find an idmapping in a
234 * sorted idmap array. It is passed to cmp_map_id() as first argument.
235 */
236struct idmap_key {
237 bool map_up; /* true -> id from kid; false -> kid from id */
238 u32 id; /* id to find */
239 u32 count; /* == 0 unless used with map_id_range_down() */
240};
241
242/**
243 * cmp_map_id - Function to be passed to bsearch() to find the requested
244 * idmapping. Expects struct idmap_key to be passed via @k.
245 */
246static int cmp_map_id(const void *k, const void *e)
247{
248 u32 first, last, id2;
249 const struct idmap_key *key = k;
250 const struct uid_gid_extent *el = e;
251
11a8b927 252 id2 = key->id + key->count - 1;
6397fac4
CB
253
254 /* handle map_id_{down,up}() */
255 if (key->map_up)
256 first = el->lower_first;
257 else
258 first = el->first;
259
260 last = first + el->count - 1;
261
262 if (key->id >= first && key->id <= last &&
263 (id2 >= first && id2 <= last))
264 return 0;
265
266 if (key->id < first || id2 < first)
267 return -1;
268
269 return 1;
270}
271
272/**
273 * map_id_range_down_max - Find idmap via binary search in ordered idmap array.
274 * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
275 */
3edf652f
EB
276static struct uid_gid_extent *
277map_id_range_down_max(unsigned extents, struct uid_gid_map *map, u32 id, u32 count)
5c1469de 278{
6397fac4
CB
279 struct idmap_key key;
280
281 key.map_up = false;
282 key.count = count;
283 key.id = id;
284
3edf652f
EB
285 return bsearch(&key, map->forward, extents,
286 sizeof(struct uid_gid_extent), cmp_map_id);
6397fac4
CB
287}
288
289/**
290 * map_id_range_down_base - Find idmap via binary search in static extent array.
291 * Can only be called if number of mappings is equal or less than
292 * UID_GID_MAP_MAX_BASE_EXTENTS.
293 */
3edf652f
EB
294static struct uid_gid_extent *
295map_id_range_down_base(unsigned extents, struct uid_gid_map *map, u32 id, u32 count)
5c1469de 296{
3edf652f 297 unsigned idx;
22d917d8 298 u32 first, last, id2;
5c1469de 299
22d917d8 300 id2 = id + count - 1;
5c1469de 301
22d917d8 302 /* Find the matching extent */
22d917d8
EB
303 for (idx = 0; idx < extents; idx++) {
304 first = map->extent[idx].first;
305 last = first + map->extent[idx].count - 1;
306 if (id >= first && id <= last &&
307 (id2 >= first && id2 <= last))
3edf652f 308 return &map->extent[idx];
22d917d8 309 }
3edf652f 310 return NULL;
22d917d8
EB
311}
312
6397fac4
CB
313static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
314{
3edf652f
EB
315 struct uid_gid_extent *extent;
316 unsigned extents = map->nr_extents;
6397fac4
CB
317 smp_rmb();
318
319 if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
3edf652f
EB
320 extent = map_id_range_down_base(extents, map, id, count);
321 else
322 extent = map_id_range_down_max(extents, map, id, count);
323
22d917d8 324 /* Map the id or note failure */
3edf652f
EB
325 if (extent)
326 id = (id - extent->first) + extent->lower_first;
22d917d8
EB
327 else
328 id = (u32) -1;
329
330 return id;
331}
332
333static u32 map_id_down(struct uid_gid_map *map, u32 id)
334{
ece66133 335 return map_id_range_down(map, id, 1);
6397fac4
CB
336}
337
338/**
339 * map_id_up_base - Find idmap via binary search in static extent array.
340 * Can only be called if number of mappings is equal or less than
341 * UID_GID_MAP_MAX_BASE_EXTENTS.
342 */
3edf652f
EB
343static struct uid_gid_extent *
344map_id_up_base(unsigned extents, struct uid_gid_map *map, u32 id)
22d917d8 345{
3edf652f 346 unsigned idx;
22d917d8
EB
347 u32 first, last;
348
349 /* Find the matching extent */
22d917d8 350 for (idx = 0; idx < extents; idx++) {
22d917d8 351 first = map->extent[idx].lower_first;
22d917d8
EB
352 last = first + map->extent[idx].count - 1;
353 if (id >= first && id <= last)
3edf652f 354 return &map->extent[idx];
22d917d8 355 }
3edf652f 356 return NULL;
22d917d8 357}
22d917d8 358
6397fac4
CB
359/**
360 * map_id_up_max - Find idmap via binary search in ordered idmap array.
361 * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
362 */
3edf652f
EB
363static struct uid_gid_extent *
364map_id_up_max(unsigned extents, struct uid_gid_map *map, u32 id)
6397fac4 365{
6397fac4
CB
366 struct idmap_key key;
367
368 key.map_up = true;
11a8b927 369 key.count = 1;
6397fac4
CB
370 key.id = id;
371
3edf652f
EB
372 return bsearch(&key, map->reverse, extents,
373 sizeof(struct uid_gid_extent), cmp_map_id);
22d917d8
EB
374}
375
376static u32 map_id_up(struct uid_gid_map *map, u32 id)
377{
3edf652f
EB
378 struct uid_gid_extent *extent;
379 unsigned extents = map->nr_extents;
e79323bd 380 smp_rmb();
6397fac4 381
3edf652f
EB
382 if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
383 extent = map_id_up_base(extents, map, id);
384 else
385 extent = map_id_up_max(extents, map, id);
386
22d917d8 387 /* Map the id or note failure */
6397fac4
CB
388 if (extent)
389 id = (id - extent->lower_first) + extent->first;
22d917d8
EB
390 else
391 id = (u32) -1;
392
393 return id;
394}
395
396/**
397 * make_kuid - Map a user-namespace uid pair into a kuid.
398 * @ns: User namespace that the uid is in
399 * @uid: User identifier
400 *
401 * Maps a user-namespace uid pair into a kernel internal kuid,
402 * and returns that kuid.
403 *
404 * When there is no mapping defined for the user-namespace uid
405 * pair INVALID_UID is returned. Callers are expected to test
b080e047 406 * for and handle INVALID_UID being returned. INVALID_UID
22d917d8
EB
407 * may be tested for using uid_valid().
408 */
409kuid_t make_kuid(struct user_namespace *ns, uid_t uid)
410{
411 /* Map the uid to a global kernel uid */
412 return KUIDT_INIT(map_id_down(&ns->uid_map, uid));
413}
414EXPORT_SYMBOL(make_kuid);
415
416/**
417 * from_kuid - Create a uid from a kuid user-namespace pair.
418 * @targ: The user namespace we want a uid in.
419 * @kuid: The kernel internal uid to start with.
420 *
421 * Map @kuid into the user-namespace specified by @targ and
422 * return the resulting uid.
423 *
424 * There is always a mapping into the initial user_namespace.
425 *
426 * If @kuid has no mapping in @targ (uid_t)-1 is returned.
427 */
428uid_t from_kuid(struct user_namespace *targ, kuid_t kuid)
429{
430 /* Map the uid from a global kernel uid */
431 return map_id_up(&targ->uid_map, __kuid_val(kuid));
432}
433EXPORT_SYMBOL(from_kuid);
434
435/**
436 * from_kuid_munged - Create a uid from a kuid user-namespace pair.
437 * @targ: The user namespace we want a uid in.
438 * @kuid: The kernel internal uid to start with.
439 *
440 * Map @kuid into the user-namespace specified by @targ and
441 * return the resulting uid.
442 *
443 * There is always a mapping into the initial user_namespace.
444 *
445 * Unlike from_kuid from_kuid_munged never fails and always
446 * returns a valid uid. This makes from_kuid_munged appropriate
447 * for use in syscalls like stat and getuid where failing the
448 * system call and failing to provide a valid uid are not an
449 * options.
450 *
451 * If @kuid has no mapping in @targ overflowuid is returned.
452 */
453uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid)
454{
455 uid_t uid;
456 uid = from_kuid(targ, kuid);
457
458 if (uid == (uid_t) -1)
459 uid = overflowuid;
460 return uid;
461}
462EXPORT_SYMBOL(from_kuid_munged);
463
464/**
465 * make_kgid - Map a user-namespace gid pair into a kgid.
466 * @ns: User namespace that the gid is in
68a9a435 467 * @gid: group identifier
22d917d8
EB
468 *
469 * Maps a user-namespace gid pair into a kernel internal kgid,
470 * and returns that kgid.
471 *
472 * When there is no mapping defined for the user-namespace gid
473 * pair INVALID_GID is returned. Callers are expected to test
474 * for and handle INVALID_GID being returned. INVALID_GID may be
475 * tested for using gid_valid().
476 */
477kgid_t make_kgid(struct user_namespace *ns, gid_t gid)
478{
479 /* Map the gid to a global kernel gid */
480 return KGIDT_INIT(map_id_down(&ns->gid_map, gid));
481}
482EXPORT_SYMBOL(make_kgid);
483
484/**
485 * from_kgid - Create a gid from a kgid user-namespace pair.
486 * @targ: The user namespace we want a gid in.
487 * @kgid: The kernel internal gid to start with.
488 *
489 * Map @kgid into the user-namespace specified by @targ and
490 * return the resulting gid.
491 *
492 * There is always a mapping into the initial user_namespace.
493 *
494 * If @kgid has no mapping in @targ (gid_t)-1 is returned.
495 */
496gid_t from_kgid(struct user_namespace *targ, kgid_t kgid)
497{
498 /* Map the gid from a global kernel gid */
499 return map_id_up(&targ->gid_map, __kgid_val(kgid));
500}
501EXPORT_SYMBOL(from_kgid);
502
503/**
504 * from_kgid_munged - Create a gid from a kgid user-namespace pair.
505 * @targ: The user namespace we want a gid in.
506 * @kgid: The kernel internal gid to start with.
507 *
508 * Map @kgid into the user-namespace specified by @targ and
509 * return the resulting gid.
510 *
511 * There is always a mapping into the initial user_namespace.
512 *
513 * Unlike from_kgid from_kgid_munged never fails and always
514 * returns a valid gid. This makes from_kgid_munged appropriate
515 * for use in syscalls like stat and getgid where failing the
516 * system call and failing to provide a valid gid are not options.
517 *
518 * If @kgid has no mapping in @targ overflowgid is returned.
519 */
520gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid)
521{
522 gid_t gid;
523 gid = from_kgid(targ, kgid);
524
525 if (gid == (gid_t) -1)
526 gid = overflowgid;
527 return gid;
528}
529EXPORT_SYMBOL(from_kgid_munged);
530
f76d207a
EB
531/**
532 * make_kprojid - Map a user-namespace projid pair into a kprojid.
533 * @ns: User namespace that the projid is in
534 * @projid: Project identifier
535 *
536 * Maps a user-namespace uid pair into a kernel internal kuid,
537 * and returns that kuid.
538 *
539 * When there is no mapping defined for the user-namespace projid
540 * pair INVALID_PROJID is returned. Callers are expected to test
7b7b8a2c 541 * for and handle INVALID_PROJID being returned. INVALID_PROJID
f76d207a
EB
542 * may be tested for using projid_valid().
543 */
544kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid)
545{
546 /* Map the uid to a global kernel uid */
547 return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid));
548}
549EXPORT_SYMBOL(make_kprojid);
550
551/**
552 * from_kprojid - Create a projid from a kprojid user-namespace pair.
553 * @targ: The user namespace we want a projid in.
554 * @kprojid: The kernel internal project identifier to start with.
555 *
556 * Map @kprojid into the user-namespace specified by @targ and
557 * return the resulting projid.
558 *
559 * There is always a mapping into the initial user_namespace.
560 *
561 * If @kprojid has no mapping in @targ (projid_t)-1 is returned.
562 */
563projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid)
564{
565 /* Map the uid from a global kernel uid */
566 return map_id_up(&targ->projid_map, __kprojid_val(kprojid));
567}
568EXPORT_SYMBOL(from_kprojid);
569
570/**
571 * from_kprojid_munged - Create a projiid from a kprojid user-namespace pair.
572 * @targ: The user namespace we want a projid in.
573 * @kprojid: The kernel internal projid to start with.
574 *
575 * Map @kprojid into the user-namespace specified by @targ and
576 * return the resulting projid.
577 *
578 * There is always a mapping into the initial user_namespace.
579 *
580 * Unlike from_kprojid from_kprojid_munged never fails and always
581 * returns a valid projid. This makes from_kprojid_munged
582 * appropriate for use in syscalls like stat and where
583 * failing the system call and failing to provide a valid projid are
584 * not an options.
585 *
586 * If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned.
587 */
588projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid)
589{
590 projid_t projid;
591 projid = from_kprojid(targ, kprojid);
592
593 if (projid == (projid_t) -1)
594 projid = OVERFLOW_PROJID;
595 return projid;
596}
597EXPORT_SYMBOL(from_kprojid_munged);
598
599
22d917d8
EB
600static int uid_m_show(struct seq_file *seq, void *v)
601{
602 struct user_namespace *ns = seq->private;
603 struct uid_gid_extent *extent = v;
604 struct user_namespace *lower_ns;
605 uid_t lower;
5c1469de 606
c450f371 607 lower_ns = seq_user_ns(seq);
22d917d8
EB
608 if ((lower_ns == ns) && lower_ns->parent)
609 lower_ns = lower_ns->parent;
610
611 lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));
612
613 seq_printf(seq, "%10u %10u %10u\n",
614 extent->first,
615 lower,
616 extent->count);
617
618 return 0;
5c1469de
EB
619}
620
22d917d8 621static int gid_m_show(struct seq_file *seq, void *v)
5c1469de 622{
22d917d8
EB
623 struct user_namespace *ns = seq->private;
624 struct uid_gid_extent *extent = v;
625 struct user_namespace *lower_ns;
626 gid_t lower;
5c1469de 627
c450f371 628 lower_ns = seq_user_ns(seq);
22d917d8
EB
629 if ((lower_ns == ns) && lower_ns->parent)
630 lower_ns = lower_ns->parent;
5c1469de 631
22d917d8
EB
632 lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));
633
634 seq_printf(seq, "%10u %10u %10u\n",
635 extent->first,
636 lower,
637 extent->count);
638
639 return 0;
640}
641
f76d207a
EB
642static int projid_m_show(struct seq_file *seq, void *v)
643{
644 struct user_namespace *ns = seq->private;
645 struct uid_gid_extent *extent = v;
646 struct user_namespace *lower_ns;
647 projid_t lower;
648
649 lower_ns = seq_user_ns(seq);
650 if ((lower_ns == ns) && lower_ns->parent)
651 lower_ns = lower_ns->parent;
652
653 lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first));
654
655 seq_printf(seq, "%10u %10u %10u\n",
656 extent->first,
657 lower,
658 extent->count);
659
660 return 0;
661}
662
68a9a435
FF
663static void *m_start(struct seq_file *seq, loff_t *ppos,
664 struct uid_gid_map *map)
22d917d8 665{
22d917d8 666 loff_t pos = *ppos;
d5e7b3c5
EB
667 unsigned extents = map->nr_extents;
668 smp_rmb();
22d917d8 669
d5e7b3c5 670 if (pos >= extents)
6397fac4 671 return NULL;
22d917d8 672
d5e7b3c5 673 if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
6397fac4 674 return &map->extent[pos];
22d917d8 675
6397fac4 676 return &map->forward[pos];
22d917d8
EB
677}
678
679static void *uid_m_start(struct seq_file *seq, loff_t *ppos)
680{
681 struct user_namespace *ns = seq->private;
682
683 return m_start(seq, ppos, &ns->uid_map);
684}
685
686static void *gid_m_start(struct seq_file *seq, loff_t *ppos)
687{
688 struct user_namespace *ns = seq->private;
689
690 return m_start(seq, ppos, &ns->gid_map);
691}
692
f76d207a
EB
693static void *projid_m_start(struct seq_file *seq, loff_t *ppos)
694{
695 struct user_namespace *ns = seq->private;
696
697 return m_start(seq, ppos, &ns->projid_map);
698}
699
22d917d8
EB
700static void *m_next(struct seq_file *seq, void *v, loff_t *pos)
701{
702 (*pos)++;
703 return seq->op->start(seq, pos);
704}
705
706static void m_stop(struct seq_file *seq, void *v)
707{
708 return;
709}
710
ccf94f1b 711const struct seq_operations proc_uid_seq_operations = {
22d917d8
EB
712 .start = uid_m_start,
713 .stop = m_stop,
714 .next = m_next,
715 .show = uid_m_show,
716};
717
ccf94f1b 718const struct seq_operations proc_gid_seq_operations = {
22d917d8
EB
719 .start = gid_m_start,
720 .stop = m_stop,
721 .next = m_next,
722 .show = gid_m_show,
723};
724
ccf94f1b 725const struct seq_operations proc_projid_seq_operations = {
f76d207a
EB
726 .start = projid_m_start,
727 .stop = m_stop,
728 .next = m_next,
729 .show = projid_m_show,
730};
731
68a9a435
FF
732static bool mappings_overlap(struct uid_gid_map *new_map,
733 struct uid_gid_extent *extent)
0bd14b4f
EB
734{
735 u32 upper_first, lower_first, upper_last, lower_last;
736 unsigned idx;
737
738 upper_first = extent->first;
739 lower_first = extent->lower_first;
740 upper_last = upper_first + extent->count - 1;
741 lower_last = lower_first + extent->count - 1;
742
743 for (idx = 0; idx < new_map->nr_extents; idx++) {
744 u32 prev_upper_first, prev_lower_first;
745 u32 prev_upper_last, prev_lower_last;
746 struct uid_gid_extent *prev;
747
6397fac4
CB
748 if (new_map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
749 prev = &new_map->extent[idx];
750 else
751 prev = &new_map->forward[idx];
0bd14b4f
EB
752
753 prev_upper_first = prev->first;
754 prev_lower_first = prev->lower_first;
755 prev_upper_last = prev_upper_first + prev->count - 1;
756 prev_lower_last = prev_lower_first + prev->count - 1;
757
758 /* Does the upper range intersect a previous extent? */
759 if ((prev_upper_first <= upper_last) &&
760 (prev_upper_last >= upper_first))
761 return true;
762
763 /* Does the lower range intersect a previous extent? */
764 if ((prev_lower_first <= lower_last) &&
765 (prev_lower_last >= lower_first))
766 return true;
767 }
768 return false;
769}
770
6397fac4
CB
771/**
772 * insert_extent - Safely insert a new idmap extent into struct uid_gid_map.
773 * Takes care to allocate a 4K block of memory if the number of mappings exceeds
774 * UID_GID_MAP_MAX_BASE_EXTENTS.
775 */
776static int insert_extent(struct uid_gid_map *map, struct uid_gid_extent *extent)
777{
3fda0e73 778 struct uid_gid_extent *dest;
6397fac4
CB
779
780 if (map->nr_extents == UID_GID_MAP_MAX_BASE_EXTENTS) {
781 struct uid_gid_extent *forward;
782
783 /* Allocate memory for 340 mappings. */
6da2ec56
KC
784 forward = kmalloc_array(UID_GID_MAP_MAX_EXTENTS,
785 sizeof(struct uid_gid_extent),
786 GFP_KERNEL);
6397fac4
CB
787 if (!forward)
788 return -ENOMEM;
789
790 /* Copy over memory. Only set up memory for the forward pointer.
791 * Defer the memory setup for the reverse pointer.
792 */
793 memcpy(forward, map->extent,
794 map->nr_extents * sizeof(map->extent[0]));
795
796 map->forward = forward;
797 map->reverse = NULL;
798 }
799
3fda0e73
EB
800 if (map->nr_extents < UID_GID_MAP_MAX_BASE_EXTENTS)
801 dest = &map->extent[map->nr_extents];
802 else
803 dest = &map->forward[map->nr_extents];
804
805 *dest = *extent;
806 map->nr_extents++;
6397fac4
CB
807 return 0;
808}
809
810/* cmp function to sort() forward mappings */
811static int cmp_extents_forward(const void *a, const void *b)
812{
813 const struct uid_gid_extent *e1 = a;
814 const struct uid_gid_extent *e2 = b;
815
816 if (e1->first < e2->first)
817 return -1;
818
819 if (e1->first > e2->first)
820 return 1;
821
822 return 0;
823}
824
825/* cmp function to sort() reverse mappings */
826static int cmp_extents_reverse(const void *a, const void *b)
827{
828 const struct uid_gid_extent *e1 = a;
829 const struct uid_gid_extent *e2 = b;
830
831 if (e1->lower_first < e2->lower_first)
832 return -1;
833
834 if (e1->lower_first > e2->lower_first)
835 return 1;
836
837 return 0;
838}
839
840/**
841 * sort_idmaps - Sorts an array of idmap entries.
842 * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
843 */
844static int sort_idmaps(struct uid_gid_map *map)
845{
846 if (map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
847 return 0;
848
849 /* Sort forward array. */
850 sort(map->forward, map->nr_extents, sizeof(struct uid_gid_extent),
851 cmp_extents_forward, NULL);
852
853 /* Only copy the memory from forward we actually need. */
854 map->reverse = kmemdup(map->forward,
855 map->nr_extents * sizeof(struct uid_gid_extent),
856 GFP_KERNEL);
857 if (!map->reverse)
858 return -ENOMEM;
859
860 /* Sort reverse array. */
861 sort(map->reverse, map->nr_extents, sizeof(struct uid_gid_extent),
862 cmp_extents_reverse, NULL);
863
864 return 0;
865}
866
db2e718a
SH
867/**
868 * verify_root_map() - check the uid 0 mapping
869 * @file: idmapping file
870 * @map_ns: user namespace of the target process
871 * @new_map: requested idmap
872 *
873 * If a process requests mapping parent uid 0 into the new ns, verify that the
874 * process writing the map had the CAP_SETFCAP capability as the target process
875 * will be able to write fscaps that are valid in ancestor user namespaces.
876 *
877 * Return: true if the mapping is allowed, false if not.
878 */
879static bool verify_root_map(const struct file *file,
880 struct user_namespace *map_ns,
881 struct uid_gid_map *new_map)
882{
883 int idx;
884 const struct user_namespace *file_ns = file->f_cred->user_ns;
885 struct uid_gid_extent *extent0 = NULL;
886
887 for (idx = 0; idx < new_map->nr_extents; idx++) {
888 if (new_map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
889 extent0 = &new_map->extent[idx];
890 else
891 extent0 = &new_map->forward[idx];
892 if (extent0->lower_first == 0)
893 break;
894
895 extent0 = NULL;
896 }
897
898 if (!extent0)
899 return true;
900
901 if (map_ns == file_ns) {
902 /* The process unshared its ns and is writing to its own
903 * /proc/self/uid_map. User already has full capabilites in
904 * the new namespace. Verify that the parent had CAP_SETFCAP
905 * when it unshared.
906 * */
907 if (!file_ns->parent_could_setfcap)
908 return false;
909 } else {
910 /* Process p1 is writing to uid_map of p2, who is in a child
911 * user namespace to p1's. Verify that the opener of the map
912 * file has CAP_SETFCAP against the parent of the new map
913 * namespace */
914 if (!file_ns_capable(file, map_ns->parent, CAP_SETFCAP))
915 return false;
916 }
917
918 return true;
919}
920
22d917d8
EB
921static ssize_t map_write(struct file *file, const char __user *buf,
922 size_t count, loff_t *ppos,
923 int cap_setid,
924 struct uid_gid_map *map,
925 struct uid_gid_map *parent_map)
926{
927 struct seq_file *seq = file->private_data;
db2e718a 928 struct user_namespace *map_ns = seq->private;
22d917d8
EB
929 struct uid_gid_map new_map;
930 unsigned idx;
6397fac4 931 struct uid_gid_extent extent;
70f6cbb6 932 char *kbuf = NULL, *pos, *next_line;
5820f140
JH
933 ssize_t ret;
934
935 /* Only allow < page size writes at the beginning of the file */
936 if ((*ppos != 0) || (count >= PAGE_SIZE))
937 return -EINVAL;
938
939 /* Slurp in the user data */
940 kbuf = memdup_user_nul(buf, count);
941 if (IS_ERR(kbuf))
942 return PTR_ERR(kbuf);
22d917d8
EB
943
944 /*
f0d62aec 945 * The userns_state_mutex serializes all writes to any given map.
22d917d8
EB
946 *
947 * Any map is only ever written once.
948 *
949 * An id map fits within 1 cache line on most architectures.
950 *
951 * On read nothing needs to be done unless you are on an
952 * architecture with a crazy cache coherency model like alpha.
953 *
954 * There is a one time data dependency between reading the
955 * count of the extents and the values of the extents. The
956 * desired behavior is to see the values of the extents that
957 * were written before the count of the extents.
958 *
959 * To achieve this smp_wmb() is used on guarantee the write
e79323bd
MP
960 * order and smp_rmb() is guaranteed that we don't have crazy
961 * architectures returning stale data.
22d917d8 962 */
f0d62aec 963 mutex_lock(&userns_state_mutex);
22d917d8 964
6397fac4
CB
965 memset(&new_map, 0, sizeof(struct uid_gid_map));
966
22d917d8
EB
967 ret = -EPERM;
968 /* Only allow one successful write to the map */
969 if (map->nr_extents != 0)
970 goto out;
971
41c21e35
AL
972 /*
973 * Adjusting namespace settings requires capabilities on the target.
5c1469de 974 */
db2e718a 975 if (cap_valid(cap_setid) && !file_ns_capable(file, map_ns, CAP_SYS_ADMIN))
22d917d8
EB
976 goto out;
977
22d917d8
EB
978 /* Parse the user data */
979 ret = -EINVAL;
980 pos = kbuf;
68a9a435 981 for (; pos; pos = next_line) {
22d917d8
EB
982
983 /* Find the end of line and ensure I don't look past it */
984 next_line = strchr(pos, '\n');
985 if (next_line) {
986 *next_line = '\0';
987 next_line++;
988 if (*next_line == '\0')
989 next_line = NULL;
5c1469de 990 }
22d917d8
EB
991
992 pos = skip_spaces(pos);
6397fac4 993 extent.first = simple_strtoul(pos, &pos, 10);
22d917d8
EB
994 if (!isspace(*pos))
995 goto out;
996
997 pos = skip_spaces(pos);
6397fac4 998 extent.lower_first = simple_strtoul(pos, &pos, 10);
22d917d8
EB
999 if (!isspace(*pos))
1000 goto out;
1001
1002 pos = skip_spaces(pos);
6397fac4 1003 extent.count = simple_strtoul(pos, &pos, 10);
22d917d8
EB
1004 if (*pos && !isspace(*pos))
1005 goto out;
1006
1007 /* Verify there is not trailing junk on the line */
1008 pos = skip_spaces(pos);
1009 if (*pos != '\0')
1010 goto out;
1011
1012 /* Verify we have been given valid starting values */
6397fac4
CB
1013 if ((extent.first == (u32) -1) ||
1014 (extent.lower_first == (u32) -1))
22d917d8
EB
1015 goto out;
1016
68a9a435
FF
1017 /* Verify count is not zero and does not cause the
1018 * extent to wrap
1019 */
6397fac4 1020 if ((extent.first + extent.count) <= extent.first)
22d917d8 1021 goto out;
6397fac4
CB
1022 if ((extent.lower_first + extent.count) <=
1023 extent.lower_first)
22d917d8
EB
1024 goto out;
1025
0bd14b4f 1026 /* Do the ranges in extent overlap any previous extents? */
6397fac4 1027 if (mappings_overlap(&new_map, &extent))
22d917d8
EB
1028 goto out;
1029
6397fac4 1030 if ((new_map.nr_extents + 1) == UID_GID_MAP_MAX_EXTENTS &&
22d917d8
EB
1031 (next_line != NULL))
1032 goto out;
6397fac4
CB
1033
1034 ret = insert_extent(&new_map, &extent);
1035 if (ret < 0)
1036 goto out;
1037 ret = -EINVAL;
5c1469de 1038 }
a12f4f85 1039 /* Be very certain the new map actually exists */
22d917d8
EB
1040 if (new_map.nr_extents == 0)
1041 goto out;
1042
1043 ret = -EPERM;
1044 /* Validate the user is allowed to use user id's mapped to. */
db2e718a 1045 if (!new_idmap_permitted(file, map_ns, cap_setid, &new_map))
22d917d8
EB
1046 goto out;
1047
6397fac4 1048 ret = -EPERM;
22d917d8
EB
1049 /* Map the lower ids from the parent user namespace to the
1050 * kernel global id space.
1051 */
1052 for (idx = 0; idx < new_map.nr_extents; idx++) {
6397fac4 1053 struct uid_gid_extent *e;
22d917d8 1054 u32 lower_first;
6397fac4
CB
1055
1056 if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
1057 e = &new_map.extent[idx];
1058 else
1059 e = &new_map.forward[idx];
22d917d8
EB
1060
1061 lower_first = map_id_range_down(parent_map,
6397fac4
CB
1062 e->lower_first,
1063 e->count);
22d917d8
EB
1064
1065 /* Fail if we can not map the specified extent to
1066 * the kernel global id space.
1067 */
1068 if (lower_first == (u32) -1)
1069 goto out;
1070
6397fac4 1071 e->lower_first = lower_first;
22d917d8
EB
1072 }
1073
d2f007db
JH
1074 /*
1075 * If we want to use binary search for lookup, this clones the extent
1076 * array and sorts both copies.
1077 */
1078 ret = sort_idmaps(&new_map);
1079 if (ret < 0)
1080 goto out;
1081
22d917d8 1082 /* Install the map */
6397fac4
CB
1083 if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) {
1084 memcpy(map->extent, new_map.extent,
1085 new_map.nr_extents * sizeof(new_map.extent[0]));
1086 } else {
1087 map->forward = new_map.forward;
1088 map->reverse = new_map.reverse;
1089 }
22d917d8
EB
1090 smp_wmb();
1091 map->nr_extents = new_map.nr_extents;
1092
1093 *ppos = count;
1094 ret = count;
1095out:
6397fac4
CB
1096 if (ret < 0 && new_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
1097 kfree(new_map.forward);
1098 kfree(new_map.reverse);
1099 map->forward = NULL;
1100 map->reverse = NULL;
1101 map->nr_extents = 0;
1102 }
1103
f0d62aec 1104 mutex_unlock(&userns_state_mutex);
70f6cbb6 1105 kfree(kbuf);
22d917d8
EB
1106 return ret;
1107}
1108
68a9a435
FF
1109ssize_t proc_uid_map_write(struct file *file, const char __user *buf,
1110 size_t size, loff_t *ppos)
22d917d8
EB
1111{
1112 struct seq_file *seq = file->private_data;
1113 struct user_namespace *ns = seq->private;
c450f371 1114 struct user_namespace *seq_ns = seq_user_ns(seq);
22d917d8
EB
1115
1116 if (!ns->parent)
1117 return -EPERM;
1118
c450f371
EB
1119 if ((seq_ns != ns) && (seq_ns != ns->parent))
1120 return -EPERM;
1121
22d917d8
EB
1122 return map_write(file, buf, size, ppos, CAP_SETUID,
1123 &ns->uid_map, &ns->parent->uid_map);
1124}
1125
68a9a435
FF
1126ssize_t proc_gid_map_write(struct file *file, const char __user *buf,
1127 size_t size, loff_t *ppos)
22d917d8
EB
1128{
1129 struct seq_file *seq = file->private_data;
1130 struct user_namespace *ns = seq->private;
c450f371 1131 struct user_namespace *seq_ns = seq_user_ns(seq);
22d917d8
EB
1132
1133 if (!ns->parent)
1134 return -EPERM;
1135
c450f371
EB
1136 if ((seq_ns != ns) && (seq_ns != ns->parent))
1137 return -EPERM;
1138
22d917d8
EB
1139 return map_write(file, buf, size, ppos, CAP_SETGID,
1140 &ns->gid_map, &ns->parent->gid_map);
1141}
1142
68a9a435
FF
1143ssize_t proc_projid_map_write(struct file *file, const char __user *buf,
1144 size_t size, loff_t *ppos)
f76d207a
EB
1145{
1146 struct seq_file *seq = file->private_data;
1147 struct user_namespace *ns = seq->private;
1148 struct user_namespace *seq_ns = seq_user_ns(seq);
1149
1150 if (!ns->parent)
1151 return -EPERM;
1152
1153 if ((seq_ns != ns) && (seq_ns != ns->parent))
1154 return -EPERM;
1155
1156 /* Anyone can set any valid project id no capability needed */
1157 return map_write(file, buf, size, ppos, -1,
1158 &ns->projid_map, &ns->parent->projid_map);
1159}
1160
68a9a435 1161static bool new_idmap_permitted(const struct file *file,
6708075f 1162 struct user_namespace *ns, int cap_setid,
22d917d8
EB
1163 struct uid_gid_map *new_map)
1164{
f95d7918 1165 const struct cred *cred = file->f_cred;
db2e718a
SH
1166
1167 if (cap_setid == CAP_SETUID && !verify_root_map(file, ns, new_map))
1168 return false;
1169
0542f17b
EB
1170 /* Don't allow mappings that would allow anything that wouldn't
1171 * be allowed without the establishment of unprivileged mappings.
1172 */
f95d7918
EB
1173 if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
1174 uid_eq(ns->owner, cred->euid)) {
37657da3
EB
1175 u32 id = new_map->extent[0].lower_first;
1176 if (cap_setid == CAP_SETUID) {
1177 kuid_t uid = make_kuid(ns->parent, id);
f95d7918 1178 if (uid_eq(uid, cred->euid))
37657da3 1179 return true;
68a9a435 1180 } else if (cap_setid == CAP_SETGID) {
37657da3 1181 kgid_t gid = make_kgid(ns->parent, id);
66d2f338
EB
1182 if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
1183 gid_eq(gid, cred->egid))
37657da3
EB
1184 return true;
1185 }
1186 }
1187
f76d207a
EB
1188 /* Allow anyone to set a mapping that doesn't require privilege */
1189 if (!cap_valid(cap_setid))
1190 return true;
1191
22d917d8
EB
1192 /* Allow the specified ids if we have the appropriate capability
1193 * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
a12f4f85 1194 * And the opener of the id file also has the appropriate capability.
22d917d8 1195 */
6708075f
EB
1196 if (ns_capable(ns->parent, cap_setid) &&
1197 file_ns_capable(file, ns->parent, cap_setid))
22d917d8 1198 return true;
5c1469de 1199
22d917d8 1200 return false;
5c1469de 1201}
6164281a 1202
9cc46516
EB
1203int proc_setgroups_show(struct seq_file *seq, void *v)
1204{
1205 struct user_namespace *ns = seq->private;
6aa7de05 1206 unsigned long userns_flags = READ_ONCE(ns->flags);
9cc46516
EB
1207
1208 seq_printf(seq, "%s\n",
1209 (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
1210 "allow" : "deny");
1211 return 0;
1212}
1213
1214ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
1215 size_t count, loff_t *ppos)
1216{
1217 struct seq_file *seq = file->private_data;
1218 struct user_namespace *ns = seq->private;
1219 char kbuf[8], *pos;
1220 bool setgroups_allowed;
1221 ssize_t ret;
1222
1223 /* Only allow a very narrow range of strings to be written */
1224 ret = -EINVAL;
1225 if ((*ppos != 0) || (count >= sizeof(kbuf)))
1226 goto out;
1227
1228 /* What was written? */
1229 ret = -EFAULT;
1230 if (copy_from_user(kbuf, buf, count))
1231 goto out;
1232 kbuf[count] = '\0';
1233 pos = kbuf;
1234
1235 /* What is being requested? */
1236 ret = -EINVAL;
1237 if (strncmp(pos, "allow", 5) == 0) {
1238 pos += 5;
1239 setgroups_allowed = true;
1240 }
1241 else if (strncmp(pos, "deny", 4) == 0) {
1242 pos += 4;
1243 setgroups_allowed = false;
1244 }
1245 else
1246 goto out;
1247
1248 /* Verify there is not trailing junk on the line */
1249 pos = skip_spaces(pos);
1250 if (*pos != '\0')
1251 goto out;
1252
1253 ret = -EPERM;
1254 mutex_lock(&userns_state_mutex);
1255 if (setgroups_allowed) {
1256 /* Enabling setgroups after setgroups has been disabled
1257 * is not allowed.
1258 */
1259 if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
1260 goto out_unlock;
1261 } else {
1262 /* Permanently disabling setgroups after setgroups has
1263 * been enabled by writing the gid_map is not allowed.
1264 */
1265 if (ns->gid_map.nr_extents != 0)
1266 goto out_unlock;
1267 ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
1268 }
1269 mutex_unlock(&userns_state_mutex);
1270
1271 /* Report a successful write */
1272 *ppos = count;
1273 ret = count;
1274out:
1275 return ret;
1276out_unlock:
1277 mutex_unlock(&userns_state_mutex);
1278 goto out;
1279}
1280
273d2c67
EB
1281bool userns_may_setgroups(const struct user_namespace *ns)
1282{
1283 bool allowed;
1284
f0d62aec 1285 mutex_lock(&userns_state_mutex);
273d2c67
EB
1286 /* It is not safe to use setgroups until a gid mapping in
1287 * the user namespace has been established.
1288 */
1289 allowed = ns->gid_map.nr_extents != 0;
9cc46516
EB
1290 /* Is setgroups allowed? */
1291 allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
f0d62aec 1292 mutex_unlock(&userns_state_mutex);
273d2c67
EB
1293
1294 return allowed;
1295}
1296
d07b846f 1297/*
a2b42626
EB
1298 * Returns true if @child is the same namespace or a descendant of
1299 * @ancestor.
d07b846f 1300 */
a2b42626
EB
1301bool in_userns(const struct user_namespace *ancestor,
1302 const struct user_namespace *child)
1303{
1304 const struct user_namespace *ns;
1305 for (ns = child; ns->level > ancestor->level; ns = ns->parent)
1306 ;
1307 return (ns == ancestor);
1308}
1309
d07b846f
SF
1310bool current_in_userns(const struct user_namespace *target_ns)
1311{
a2b42626 1312 return in_userns(target_ns, current_user_ns());
d07b846f 1313}
73f03c2b 1314EXPORT_SYMBOL(current_in_userns);
d07b846f 1315
3c041184
AV
1316static inline struct user_namespace *to_user_ns(struct ns_common *ns)
1317{
1318 return container_of(ns, struct user_namespace, ns);
1319}
1320
64964528 1321static struct ns_common *userns_get(struct task_struct *task)
cde1975b
EB
1322{
1323 struct user_namespace *user_ns;
1324
1325 rcu_read_lock();
1326 user_ns = get_user_ns(__task_cred(task)->user_ns);
1327 rcu_read_unlock();
1328
3c041184 1329 return user_ns ? &user_ns->ns : NULL;
cde1975b
EB
1330}
1331
64964528 1332static void userns_put(struct ns_common *ns)
cde1975b 1333{
3c041184 1334 put_user_ns(to_user_ns(ns));
cde1975b
EB
1335}
1336
f2a8d52e 1337static int userns_install(struct nsset *nsset, struct ns_common *ns)
cde1975b 1338{
3c041184 1339 struct user_namespace *user_ns = to_user_ns(ns);
cde1975b
EB
1340 struct cred *cred;
1341
1342 /* Don't allow gaining capabilities by reentering
1343 * the same user namespace.
1344 */
1345 if (user_ns == current_user_ns())
1346 return -EINVAL;
1347
faf00da5
EB
1348 /* Tasks that share a thread group must share a user namespace */
1349 if (!thread_group_empty(current))
cde1975b
EB
1350 return -EINVAL;
1351
e66eded8
EB
1352 if (current->fs->users != 1)
1353 return -EINVAL;
1354
cde1975b
EB
1355 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1356 return -EPERM;
1357
f2a8d52e 1358 cred = nsset_cred(nsset);
cde1975b 1359 if (!cred)
f2a8d52e 1360 return -EINVAL;
cde1975b
EB
1361
1362 put_user_ns(cred->user_ns);
1363 set_cred_user_ns(cred, get_user_ns(user_ns));
1364
905ae01c
AG
1365 if (set_cred_ucounts(cred) < 0)
1366 return -EINVAL;
1367
f2a8d52e 1368 return 0;
cde1975b
EB
1369}
1370
bcac25a5
AV
1371struct ns_common *ns_get_owner(struct ns_common *ns)
1372{
1373 struct user_namespace *my_user_ns = current_user_ns();
1374 struct user_namespace *owner, *p;
1375
1376 /* See if the owner is in the current user namespace */
1377 owner = p = ns->ops->owner(ns);
1378 for (;;) {
1379 if (!p)
1380 return ERR_PTR(-EPERM);
1381 if (p == my_user_ns)
1382 break;
1383 p = p->parent;
1384 }
1385
1386 return &get_user_ns(owner)->ns;
1387}
1388
1389static struct user_namespace *userns_owner(struct ns_common *ns)
1390{
1391 return to_user_ns(ns)->parent;
1392}
1393
cde1975b
EB
1394const struct proc_ns_operations userns_operations = {
1395 .name = "user",
1396 .type = CLONE_NEWUSER,
1397 .get = userns_get,
1398 .put = userns_put,
1399 .install = userns_install,
bcac25a5 1400 .owner = userns_owner,
a7306ed8 1401 .get_parent = ns_get_owner,
cde1975b
EB
1402};
1403
6164281a
PE
1404static __init int user_namespaces_init(void)
1405{
30acd0bd 1406 user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC | SLAB_ACCOUNT);
6164281a
PE
1407 return 0;
1408}
c96d6660 1409subsys_initcall(user_namespaces_init);