]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - kernel/user_namespace.c
UBUNTU: SAUCE: LSM stacking: LSM: manage credential security blobs
[mirror_ubuntu-artful-kernel.git] / kernel / user_namespace.c
1 /*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License as
4 * published by the Free Software Foundation, version 2 of the
5 * License.
6 */
7
8 #include <linux/export.h>
9 #include <linux/nsproxy.h>
10 #include <linux/slab.h>
11 #include <linux/sched/signal.h>
12 #include <linux/user_namespace.h>
13 #include <linux/proc_ns.h>
14 #include <linux/highuid.h>
15 #include <linux/cred.h>
16 #include <linux/securebits.h>
17 #include <linux/keyctl.h>
18 #include <linux/key-type.h>
19 #include <keys/user-type.h>
20 #include <linux/seq_file.h>
21 #include <linux/fs.h>
22 #include <linux/uaccess.h>
23 #include <linux/ctype.h>
24 #include <linux/projid.h>
25 #include <linux/fs_struct.h>
26
27 /*
28 * sysctl determining whether unprivileged users may unshare a new
29 * userns. Allowed by default
30 */
31 int unprivileged_userns_clone = 1;
32
33 static struct kmem_cache *user_ns_cachep __read_mostly;
34 static DEFINE_MUTEX(userns_state_mutex);
35
36 static bool new_idmap_permitted(const struct file *file,
37 struct user_namespace *ns, int cap_setid,
38 struct uid_gid_map *map);
39 static void free_user_ns(struct work_struct *work);
40
41 static struct ucounts *inc_user_namespaces(struct user_namespace *ns, kuid_t uid)
42 {
43 return inc_ucount(ns, uid, UCOUNT_USER_NAMESPACES);
44 }
45
46 static void dec_user_namespaces(struct ucounts *ucounts)
47 {
48 return dec_ucount(ucounts, UCOUNT_USER_NAMESPACES);
49 }
50
51 static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
52 {
53 /* Start with the same capabilities as init but useless for doing
54 * anything as the capabilities are bound to the new user namespace.
55 */
56 cred->securebits = SECUREBITS_DEFAULT;
57 cred->cap_inheritable = CAP_EMPTY_SET;
58 cred->cap_permitted = CAP_FULL_SET;
59 cred->cap_effective = CAP_FULL_SET;
60 cred->cap_ambient = CAP_EMPTY_SET;
61 cred->cap_bset = CAP_FULL_SET;
62 #ifdef CONFIG_KEYS
63 key_put(cred->request_key_auth);
64 cred->request_key_auth = NULL;
65 #endif
66 /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
67 cred->user_ns = user_ns;
68 }
69
70 /*
71 * Create a new user namespace, deriving the creator from the user in the
72 * passed credentials, and replacing that user with the new root user for the
73 * new namespace.
74 *
75 * This is called by copy_creds(), which will finish setting the target task's
76 * credentials.
77 */
78 int create_user_ns(struct cred *new)
79 {
80 struct user_namespace *ns, *parent_ns = new->user_ns;
81 kuid_t owner = new->euid;
82 kgid_t group = new->egid;
83 struct ucounts *ucounts;
84 int ret, i;
85
86 ret = -ENOSPC;
87 if (parent_ns->level > 32)
88 goto fail;
89
90 ucounts = inc_user_namespaces(parent_ns, owner);
91 if (!ucounts)
92 goto fail;
93
94 /*
95 * Verify that we can not violate the policy of which files
96 * may be accessed that is specified by the root directory,
97 * by verifing that the root directory is at the root of the
98 * mount namespace which allows all files to be accessed.
99 */
100 ret = -EPERM;
101 if (current_chrooted())
102 goto fail_dec;
103
104 /* The creator needs a mapping in the parent user namespace
105 * or else we won't be able to reasonably tell userspace who
106 * created a user_namespace.
107 */
108 ret = -EPERM;
109 if (!kuid_has_mapping(parent_ns, owner) ||
110 !kgid_has_mapping(parent_ns, group))
111 goto fail_dec;
112
113 ret = -ENOMEM;
114 ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
115 if (!ns)
116 goto fail_dec;
117
118 ret = ns_alloc_inum(&ns->ns);
119 if (ret)
120 goto fail_free;
121 ns->ns.ops = &userns_operations;
122
123 atomic_set(&ns->count, 1);
124 /* Leave the new->user_ns reference with the new user namespace. */
125 ns->parent = parent_ns;
126 ns->level = parent_ns->level + 1;
127 ns->owner = owner;
128 ns->group = group;
129 INIT_WORK(&ns->work, free_user_ns);
130 for (i = 0; i < UCOUNT_COUNTS; i++) {
131 ns->ucount_max[i] = INT_MAX;
132 }
133 ns->ucounts = ucounts;
134
135 /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
136 mutex_lock(&userns_state_mutex);
137 ns->flags = parent_ns->flags;
138 mutex_unlock(&userns_state_mutex);
139
140 #ifdef CONFIG_PERSISTENT_KEYRINGS
141 init_rwsem(&ns->persistent_keyring_register_sem);
142 #endif
143 ret = -ENOMEM;
144 if (!setup_userns_sysctls(ns))
145 goto fail_keyring;
146
147 set_cred_user_ns(new, ns);
148 return 0;
149 fail_keyring:
150 #ifdef CONFIG_PERSISTENT_KEYRINGS
151 key_put(ns->persistent_keyring_register);
152 #endif
153 ns_free_inum(&ns->ns);
154 fail_free:
155 kmem_cache_free(user_ns_cachep, ns);
156 fail_dec:
157 dec_user_namespaces(ucounts);
158 fail:
159 return ret;
160 }
161
162 int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
163 {
164 struct cred *cred;
165 int err = -ENOMEM;
166
167 if (!(unshare_flags & CLONE_NEWUSER))
168 return 0;
169
170 cred = prepare_creds();
171 if (cred) {
172 err = create_user_ns(cred);
173 if (err)
174 put_cred(cred);
175 else
176 *new_cred = cred;
177 }
178
179 return err;
180 }
181
182 static void free_user_ns(struct work_struct *work)
183 {
184 struct user_namespace *parent, *ns =
185 container_of(work, struct user_namespace, work);
186
187 do {
188 struct ucounts *ucounts = ns->ucounts;
189 parent = ns->parent;
190 retire_userns_sysctls(ns);
191 #ifdef CONFIG_PERSISTENT_KEYRINGS
192 key_put(ns->persistent_keyring_register);
193 #endif
194 ns_free_inum(&ns->ns);
195 kmem_cache_free(user_ns_cachep, ns);
196 dec_user_namespaces(ucounts);
197 ns = parent;
198 } while (atomic_dec_and_test(&parent->count));
199 }
200
201 void __put_user_ns(struct user_namespace *ns)
202 {
203 schedule_work(&ns->work);
204 }
205 EXPORT_SYMBOL(__put_user_ns);
206
207 static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
208 {
209 unsigned idx, extents;
210 u32 first, last, id2;
211
212 id2 = id + count - 1;
213
214 /* Find the matching extent */
215 extents = map->nr_extents;
216 smp_rmb();
217 for (idx = 0; idx < extents; idx++) {
218 first = map->extent[idx].first;
219 last = first + map->extent[idx].count - 1;
220 if (id >= first && id <= last &&
221 (id2 >= first && id2 <= last))
222 break;
223 }
224 /* Map the id or note failure */
225 if (idx < extents)
226 id = (id - first) + map->extent[idx].lower_first;
227 else
228 id = (u32) -1;
229
230 return id;
231 }
232
233 static u32 map_id_down(struct uid_gid_map *map, u32 id)
234 {
235 unsigned idx, extents;
236 u32 first, last;
237
238 /* Find the matching extent */
239 extents = map->nr_extents;
240 smp_rmb();
241 for (idx = 0; idx < extents; idx++) {
242 first = map->extent[idx].first;
243 last = first + map->extent[idx].count - 1;
244 if (id >= first && id <= last)
245 break;
246 }
247 /* Map the id or note failure */
248 if (idx < extents)
249 id = (id - first) + map->extent[idx].lower_first;
250 else
251 id = (u32) -1;
252
253 return id;
254 }
255
256 static u32 map_id_up(struct uid_gid_map *map, u32 id)
257 {
258 unsigned idx, extents;
259 u32 first, last;
260
261 /* Find the matching extent */
262 extents = map->nr_extents;
263 smp_rmb();
264 for (idx = 0; idx < extents; idx++) {
265 first = map->extent[idx].lower_first;
266 last = first + map->extent[idx].count - 1;
267 if (id >= first && id <= last)
268 break;
269 }
270 /* Map the id or note failure */
271 if (idx < extents)
272 id = (id - first) + map->extent[idx].first;
273 else
274 id = (u32) -1;
275
276 return id;
277 }
278
279 /**
280 * make_kuid - Map a user-namespace uid pair into a kuid.
281 * @ns: User namespace that the uid is in
282 * @uid: User identifier
283 *
284 * Maps a user-namespace uid pair into a kernel internal kuid,
285 * and returns that kuid.
286 *
287 * When there is no mapping defined for the user-namespace uid
288 * pair INVALID_UID is returned. Callers are expected to test
289 * for and handle INVALID_UID being returned. INVALID_UID
290 * may be tested for using uid_valid().
291 */
292 kuid_t make_kuid(struct user_namespace *ns, uid_t uid)
293 {
294 /* Map the uid to a global kernel uid */
295 return KUIDT_INIT(map_id_down(&ns->uid_map, uid));
296 }
297 EXPORT_SYMBOL(make_kuid);
298
299 /**
300 * from_kuid - Create a uid from a kuid user-namespace pair.
301 * @targ: The user namespace we want a uid in.
302 * @kuid: The kernel internal uid to start with.
303 *
304 * Map @kuid into the user-namespace specified by @targ and
305 * return the resulting uid.
306 *
307 * There is always a mapping into the initial user_namespace.
308 *
309 * If @kuid has no mapping in @targ (uid_t)-1 is returned.
310 */
311 uid_t from_kuid(struct user_namespace *targ, kuid_t kuid)
312 {
313 /* Map the uid from a global kernel uid */
314 return map_id_up(&targ->uid_map, __kuid_val(kuid));
315 }
316 EXPORT_SYMBOL(from_kuid);
317
318 /**
319 * from_kuid_munged - Create a uid from a kuid user-namespace pair.
320 * @targ: The user namespace we want a uid in.
321 * @kuid: The kernel internal uid to start with.
322 *
323 * Map @kuid into the user-namespace specified by @targ and
324 * return the resulting uid.
325 *
326 * There is always a mapping into the initial user_namespace.
327 *
328 * Unlike from_kuid from_kuid_munged never fails and always
329 * returns a valid uid. This makes from_kuid_munged appropriate
330 * for use in syscalls like stat and getuid where failing the
331 * system call and failing to provide a valid uid are not an
332 * options.
333 *
334 * If @kuid has no mapping in @targ overflowuid is returned.
335 */
336 uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid)
337 {
338 uid_t uid;
339 uid = from_kuid(targ, kuid);
340
341 if (uid == (uid_t) -1)
342 uid = overflowuid;
343 return uid;
344 }
345 EXPORT_SYMBOL(from_kuid_munged);
346
347 /**
348 * make_kgid - Map a user-namespace gid pair into a kgid.
349 * @ns: User namespace that the gid is in
350 * @gid: group identifier
351 *
352 * Maps a user-namespace gid pair into a kernel internal kgid,
353 * and returns that kgid.
354 *
355 * When there is no mapping defined for the user-namespace gid
356 * pair INVALID_GID is returned. Callers are expected to test
357 * for and handle INVALID_GID being returned. INVALID_GID may be
358 * tested for using gid_valid().
359 */
360 kgid_t make_kgid(struct user_namespace *ns, gid_t gid)
361 {
362 /* Map the gid to a global kernel gid */
363 return KGIDT_INIT(map_id_down(&ns->gid_map, gid));
364 }
365 EXPORT_SYMBOL(make_kgid);
366
367 /**
368 * from_kgid - Create a gid from a kgid user-namespace pair.
369 * @targ: The user namespace we want a gid in.
370 * @kgid: The kernel internal gid to start with.
371 *
372 * Map @kgid into the user-namespace specified by @targ and
373 * return the resulting gid.
374 *
375 * There is always a mapping into the initial user_namespace.
376 *
377 * If @kgid has no mapping in @targ (gid_t)-1 is returned.
378 */
379 gid_t from_kgid(struct user_namespace *targ, kgid_t kgid)
380 {
381 /* Map the gid from a global kernel gid */
382 return map_id_up(&targ->gid_map, __kgid_val(kgid));
383 }
384 EXPORT_SYMBOL(from_kgid);
385
386 /**
387 * from_kgid_munged - Create a gid from a kgid user-namespace pair.
388 * @targ: The user namespace we want a gid in.
389 * @kgid: The kernel internal gid to start with.
390 *
391 * Map @kgid into the user-namespace specified by @targ and
392 * return the resulting gid.
393 *
394 * There is always a mapping into the initial user_namespace.
395 *
396 * Unlike from_kgid from_kgid_munged never fails and always
397 * returns a valid gid. This makes from_kgid_munged appropriate
398 * for use in syscalls like stat and getgid where failing the
399 * system call and failing to provide a valid gid are not options.
400 *
401 * If @kgid has no mapping in @targ overflowgid is returned.
402 */
403 gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid)
404 {
405 gid_t gid;
406 gid = from_kgid(targ, kgid);
407
408 if (gid == (gid_t) -1)
409 gid = overflowgid;
410 return gid;
411 }
412 EXPORT_SYMBOL(from_kgid_munged);
413
414 /**
415 * make_kprojid - Map a user-namespace projid pair into a kprojid.
416 * @ns: User namespace that the projid is in
417 * @projid: Project identifier
418 *
419 * Maps a user-namespace uid pair into a kernel internal kuid,
420 * and returns that kuid.
421 *
422 * When there is no mapping defined for the user-namespace projid
423 * pair INVALID_PROJID is returned. Callers are expected to test
424 * for and handle handle INVALID_PROJID being returned. INVALID_PROJID
425 * may be tested for using projid_valid().
426 */
427 kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid)
428 {
429 /* Map the uid to a global kernel uid */
430 return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid));
431 }
432 EXPORT_SYMBOL(make_kprojid);
433
434 /**
435 * from_kprojid - Create a projid from a kprojid user-namespace pair.
436 * @targ: The user namespace we want a projid in.
437 * @kprojid: The kernel internal project identifier to start with.
438 *
439 * Map @kprojid into the user-namespace specified by @targ and
440 * return the resulting projid.
441 *
442 * There is always a mapping into the initial user_namespace.
443 *
444 * If @kprojid has no mapping in @targ (projid_t)-1 is returned.
445 */
446 projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid)
447 {
448 /* Map the uid from a global kernel uid */
449 return map_id_up(&targ->projid_map, __kprojid_val(kprojid));
450 }
451 EXPORT_SYMBOL(from_kprojid);
452
453 /**
454 * from_kprojid_munged - Create a projiid from a kprojid user-namespace pair.
455 * @targ: The user namespace we want a projid in.
456 * @kprojid: The kernel internal projid to start with.
457 *
458 * Map @kprojid into the user-namespace specified by @targ and
459 * return the resulting projid.
460 *
461 * There is always a mapping into the initial user_namespace.
462 *
463 * Unlike from_kprojid from_kprojid_munged never fails and always
464 * returns a valid projid. This makes from_kprojid_munged
465 * appropriate for use in syscalls like stat and where
466 * failing the system call and failing to provide a valid projid are
467 * not an options.
468 *
469 * If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned.
470 */
471 projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid)
472 {
473 projid_t projid;
474 projid = from_kprojid(targ, kprojid);
475
476 if (projid == (projid_t) -1)
477 projid = OVERFLOW_PROJID;
478 return projid;
479 }
480 EXPORT_SYMBOL(from_kprojid_munged);
481
482
483 static int uid_m_show(struct seq_file *seq, void *v)
484 {
485 struct user_namespace *ns = seq->private;
486 struct uid_gid_extent *extent = v;
487 struct user_namespace *lower_ns;
488 uid_t lower;
489
490 lower_ns = seq_user_ns(seq);
491 if ((lower_ns == ns) && lower_ns->parent)
492 lower_ns = lower_ns->parent;
493
494 lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));
495
496 seq_printf(seq, "%10u %10u %10u\n",
497 extent->first,
498 lower,
499 extent->count);
500
501 return 0;
502 }
503
504 static int gid_m_show(struct seq_file *seq, void *v)
505 {
506 struct user_namespace *ns = seq->private;
507 struct uid_gid_extent *extent = v;
508 struct user_namespace *lower_ns;
509 gid_t lower;
510
511 lower_ns = seq_user_ns(seq);
512 if ((lower_ns == ns) && lower_ns->parent)
513 lower_ns = lower_ns->parent;
514
515 lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));
516
517 seq_printf(seq, "%10u %10u %10u\n",
518 extent->first,
519 lower,
520 extent->count);
521
522 return 0;
523 }
524
525 static int projid_m_show(struct seq_file *seq, void *v)
526 {
527 struct user_namespace *ns = seq->private;
528 struct uid_gid_extent *extent = v;
529 struct user_namespace *lower_ns;
530 projid_t lower;
531
532 lower_ns = seq_user_ns(seq);
533 if ((lower_ns == ns) && lower_ns->parent)
534 lower_ns = lower_ns->parent;
535
536 lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first));
537
538 seq_printf(seq, "%10u %10u %10u\n",
539 extent->first,
540 lower,
541 extent->count);
542
543 return 0;
544 }
545
546 static void *m_start(struct seq_file *seq, loff_t *ppos,
547 struct uid_gid_map *map)
548 {
549 struct uid_gid_extent *extent = NULL;
550 loff_t pos = *ppos;
551
552 if (pos < map->nr_extents)
553 extent = &map->extent[pos];
554
555 return extent;
556 }
557
558 static void *uid_m_start(struct seq_file *seq, loff_t *ppos)
559 {
560 struct user_namespace *ns = seq->private;
561
562 return m_start(seq, ppos, &ns->uid_map);
563 }
564
565 static void *gid_m_start(struct seq_file *seq, loff_t *ppos)
566 {
567 struct user_namespace *ns = seq->private;
568
569 return m_start(seq, ppos, &ns->gid_map);
570 }
571
572 static void *projid_m_start(struct seq_file *seq, loff_t *ppos)
573 {
574 struct user_namespace *ns = seq->private;
575
576 return m_start(seq, ppos, &ns->projid_map);
577 }
578
579 static void *m_next(struct seq_file *seq, void *v, loff_t *pos)
580 {
581 (*pos)++;
582 return seq->op->start(seq, pos);
583 }
584
585 static void m_stop(struct seq_file *seq, void *v)
586 {
587 return;
588 }
589
590 const struct seq_operations proc_uid_seq_operations = {
591 .start = uid_m_start,
592 .stop = m_stop,
593 .next = m_next,
594 .show = uid_m_show,
595 };
596
597 const struct seq_operations proc_gid_seq_operations = {
598 .start = gid_m_start,
599 .stop = m_stop,
600 .next = m_next,
601 .show = gid_m_show,
602 };
603
604 const struct seq_operations proc_projid_seq_operations = {
605 .start = projid_m_start,
606 .stop = m_stop,
607 .next = m_next,
608 .show = projid_m_show,
609 };
610
611 static bool mappings_overlap(struct uid_gid_map *new_map,
612 struct uid_gid_extent *extent)
613 {
614 u32 upper_first, lower_first, upper_last, lower_last;
615 unsigned idx;
616
617 upper_first = extent->first;
618 lower_first = extent->lower_first;
619 upper_last = upper_first + extent->count - 1;
620 lower_last = lower_first + extent->count - 1;
621
622 for (idx = 0; idx < new_map->nr_extents; idx++) {
623 u32 prev_upper_first, prev_lower_first;
624 u32 prev_upper_last, prev_lower_last;
625 struct uid_gid_extent *prev;
626
627 prev = &new_map->extent[idx];
628
629 prev_upper_first = prev->first;
630 prev_lower_first = prev->lower_first;
631 prev_upper_last = prev_upper_first + prev->count - 1;
632 prev_lower_last = prev_lower_first + prev->count - 1;
633
634 /* Does the upper range intersect a previous extent? */
635 if ((prev_upper_first <= upper_last) &&
636 (prev_upper_last >= upper_first))
637 return true;
638
639 /* Does the lower range intersect a previous extent? */
640 if ((prev_lower_first <= lower_last) &&
641 (prev_lower_last >= lower_first))
642 return true;
643 }
644 return false;
645 }
646
647 static ssize_t map_write(struct file *file, const char __user *buf,
648 size_t count, loff_t *ppos,
649 int cap_setid,
650 struct uid_gid_map *map,
651 struct uid_gid_map *parent_map)
652 {
653 struct seq_file *seq = file->private_data;
654 struct user_namespace *ns = seq->private;
655 struct uid_gid_map new_map;
656 unsigned idx;
657 struct uid_gid_extent *extent = NULL;
658 char *kbuf = NULL, *pos, *next_line;
659 ssize_t ret = -EINVAL;
660
661 /*
662 * The userns_state_mutex serializes all writes to any given map.
663 *
664 * Any map is only ever written once.
665 *
666 * An id map fits within 1 cache line on most architectures.
667 *
668 * On read nothing needs to be done unless you are on an
669 * architecture with a crazy cache coherency model like alpha.
670 *
671 * There is a one time data dependency between reading the
672 * count of the extents and the values of the extents. The
673 * desired behavior is to see the values of the extents that
674 * were written before the count of the extents.
675 *
676 * To achieve this smp_wmb() is used on guarantee the write
677 * order and smp_rmb() is guaranteed that we don't have crazy
678 * architectures returning stale data.
679 */
680 mutex_lock(&userns_state_mutex);
681
682 ret = -EPERM;
683 /* Only allow one successful write to the map */
684 if (map->nr_extents != 0)
685 goto out;
686
687 /*
688 * Adjusting namespace settings requires capabilities on the target.
689 */
690 if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))
691 goto out;
692
693 /* Only allow < page size writes at the beginning of the file */
694 ret = -EINVAL;
695 if ((*ppos != 0) || (count >= PAGE_SIZE))
696 goto out;
697
698 /* Slurp in the user data */
699 kbuf = memdup_user_nul(buf, count);
700 if (IS_ERR(kbuf)) {
701 ret = PTR_ERR(kbuf);
702 kbuf = NULL;
703 goto out;
704 }
705
706 /* Parse the user data */
707 ret = -EINVAL;
708 pos = kbuf;
709 new_map.nr_extents = 0;
710 for (; pos; pos = next_line) {
711 extent = &new_map.extent[new_map.nr_extents];
712
713 /* Find the end of line and ensure I don't look past it */
714 next_line = strchr(pos, '\n');
715 if (next_line) {
716 *next_line = '\0';
717 next_line++;
718 if (*next_line == '\0')
719 next_line = NULL;
720 }
721
722 pos = skip_spaces(pos);
723 extent->first = simple_strtoul(pos, &pos, 10);
724 if (!isspace(*pos))
725 goto out;
726
727 pos = skip_spaces(pos);
728 extent->lower_first = simple_strtoul(pos, &pos, 10);
729 if (!isspace(*pos))
730 goto out;
731
732 pos = skip_spaces(pos);
733 extent->count = simple_strtoul(pos, &pos, 10);
734 if (*pos && !isspace(*pos))
735 goto out;
736
737 /* Verify there is not trailing junk on the line */
738 pos = skip_spaces(pos);
739 if (*pos != '\0')
740 goto out;
741
742 /* Verify we have been given valid starting values */
743 if ((extent->first == (u32) -1) ||
744 (extent->lower_first == (u32) -1))
745 goto out;
746
747 /* Verify count is not zero and does not cause the
748 * extent to wrap
749 */
750 if ((extent->first + extent->count) <= extent->first)
751 goto out;
752 if ((extent->lower_first + extent->count) <=
753 extent->lower_first)
754 goto out;
755
756 /* Do the ranges in extent overlap any previous extents? */
757 if (mappings_overlap(&new_map, extent))
758 goto out;
759
760 new_map.nr_extents++;
761
762 /* Fail if the file contains too many extents */
763 if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) &&
764 (next_line != NULL))
765 goto out;
766 }
767 /* Be very certaint the new map actually exists */
768 if (new_map.nr_extents == 0)
769 goto out;
770
771 ret = -EPERM;
772 /* Validate the user is allowed to use user id's mapped to. */
773 if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
774 goto out;
775
776 /* Map the lower ids from the parent user namespace to the
777 * kernel global id space.
778 */
779 for (idx = 0; idx < new_map.nr_extents; idx++) {
780 u32 lower_first;
781 extent = &new_map.extent[idx];
782
783 lower_first = map_id_range_down(parent_map,
784 extent->lower_first,
785 extent->count);
786
787 /* Fail if we can not map the specified extent to
788 * the kernel global id space.
789 */
790 if (lower_first == (u32) -1)
791 goto out;
792
793 extent->lower_first = lower_first;
794 }
795
796 /* Install the map */
797 memcpy(map->extent, new_map.extent,
798 new_map.nr_extents*sizeof(new_map.extent[0]));
799 smp_wmb();
800 map->nr_extents = new_map.nr_extents;
801
802 *ppos = count;
803 ret = count;
804 out:
805 mutex_unlock(&userns_state_mutex);
806 kfree(kbuf);
807 return ret;
808 }
809
810 ssize_t proc_uid_map_write(struct file *file, const char __user *buf,
811 size_t size, loff_t *ppos)
812 {
813 struct seq_file *seq = file->private_data;
814 struct user_namespace *ns = seq->private;
815 struct user_namespace *seq_ns = seq_user_ns(seq);
816
817 if (!ns->parent)
818 return -EPERM;
819
820 if ((seq_ns != ns) && (seq_ns != ns->parent))
821 return -EPERM;
822
823 return map_write(file, buf, size, ppos, CAP_SETUID,
824 &ns->uid_map, &ns->parent->uid_map);
825 }
826
827 ssize_t proc_gid_map_write(struct file *file, const char __user *buf,
828 size_t size, loff_t *ppos)
829 {
830 struct seq_file *seq = file->private_data;
831 struct user_namespace *ns = seq->private;
832 struct user_namespace *seq_ns = seq_user_ns(seq);
833
834 if (!ns->parent)
835 return -EPERM;
836
837 if ((seq_ns != ns) && (seq_ns != ns->parent))
838 return -EPERM;
839
840 return map_write(file, buf, size, ppos, CAP_SETGID,
841 &ns->gid_map, &ns->parent->gid_map);
842 }
843
844 ssize_t proc_projid_map_write(struct file *file, const char __user *buf,
845 size_t size, loff_t *ppos)
846 {
847 struct seq_file *seq = file->private_data;
848 struct user_namespace *ns = seq->private;
849 struct user_namespace *seq_ns = seq_user_ns(seq);
850
851 if (!ns->parent)
852 return -EPERM;
853
854 if ((seq_ns != ns) && (seq_ns != ns->parent))
855 return -EPERM;
856
857 /* Anyone can set any valid project id no capability needed */
858 return map_write(file, buf, size, ppos, -1,
859 &ns->projid_map, &ns->parent->projid_map);
860 }
861
862 static bool new_idmap_permitted(const struct file *file,
863 struct user_namespace *ns, int cap_setid,
864 struct uid_gid_map *new_map)
865 {
866 const struct cred *cred = file->f_cred;
867 /* Don't allow mappings that would allow anything that wouldn't
868 * be allowed without the establishment of unprivileged mappings.
869 */
870 if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
871 uid_eq(ns->owner, cred->euid)) {
872 u32 id = new_map->extent[0].lower_first;
873 if (cap_setid == CAP_SETUID) {
874 kuid_t uid = make_kuid(ns->parent, id);
875 if (uid_eq(uid, cred->euid))
876 return true;
877 } else if (cap_setid == CAP_SETGID) {
878 kgid_t gid = make_kgid(ns->parent, id);
879 if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
880 gid_eq(gid, cred->egid))
881 return true;
882 }
883 }
884
885 /* Allow anyone to set a mapping that doesn't require privilege */
886 if (!cap_valid(cap_setid))
887 return true;
888
889 /* Allow the specified ids if we have the appropriate capability
890 * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
891 * And the opener of the id file also had the approprpiate capability.
892 */
893 if (ns_capable(ns->parent, cap_setid) &&
894 file_ns_capable(file, ns->parent, cap_setid))
895 return true;
896
897 return false;
898 }
899
900 int proc_setgroups_show(struct seq_file *seq, void *v)
901 {
902 struct user_namespace *ns = seq->private;
903 unsigned long userns_flags = ACCESS_ONCE(ns->flags);
904
905 seq_printf(seq, "%s\n",
906 (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
907 "allow" : "deny");
908 return 0;
909 }
910
911 ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
912 size_t count, loff_t *ppos)
913 {
914 struct seq_file *seq = file->private_data;
915 struct user_namespace *ns = seq->private;
916 char kbuf[8], *pos;
917 bool setgroups_allowed;
918 ssize_t ret;
919
920 /* Only allow a very narrow range of strings to be written */
921 ret = -EINVAL;
922 if ((*ppos != 0) || (count >= sizeof(kbuf)))
923 goto out;
924
925 /* What was written? */
926 ret = -EFAULT;
927 if (copy_from_user(kbuf, buf, count))
928 goto out;
929 kbuf[count] = '\0';
930 pos = kbuf;
931
932 /* What is being requested? */
933 ret = -EINVAL;
934 if (strncmp(pos, "allow", 5) == 0) {
935 pos += 5;
936 setgroups_allowed = true;
937 }
938 else if (strncmp(pos, "deny", 4) == 0) {
939 pos += 4;
940 setgroups_allowed = false;
941 }
942 else
943 goto out;
944
945 /* Verify there is not trailing junk on the line */
946 pos = skip_spaces(pos);
947 if (*pos != '\0')
948 goto out;
949
950 ret = -EPERM;
951 mutex_lock(&userns_state_mutex);
952 if (setgroups_allowed) {
953 /* Enabling setgroups after setgroups has been disabled
954 * is not allowed.
955 */
956 if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
957 goto out_unlock;
958 } else {
959 /* Permanently disabling setgroups after setgroups has
960 * been enabled by writing the gid_map is not allowed.
961 */
962 if (ns->gid_map.nr_extents != 0)
963 goto out_unlock;
964 ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
965 }
966 mutex_unlock(&userns_state_mutex);
967
968 /* Report a successful write */
969 *ppos = count;
970 ret = count;
971 out:
972 return ret;
973 out_unlock:
974 mutex_unlock(&userns_state_mutex);
975 goto out;
976 }
977
978 bool userns_may_setgroups(const struct user_namespace *ns)
979 {
980 bool allowed;
981
982 mutex_lock(&userns_state_mutex);
983 /* It is not safe to use setgroups until a gid mapping in
984 * the user namespace has been established.
985 */
986 allowed = ns->gid_map.nr_extents != 0;
987 /* Is setgroups allowed? */
988 allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
989 mutex_unlock(&userns_state_mutex);
990
991 return allowed;
992 }
993
994 /*
995 * Returns true if @ns is the same namespace as or a descendant of
996 * @target_ns.
997 */
998 bool current_in_userns(const struct user_namespace *target_ns)
999 {
1000 struct user_namespace *ns;
1001 for (ns = current_user_ns(); ns; ns = ns->parent) {
1002 if (ns == target_ns)
1003 return true;
1004 }
1005 return false;
1006 }
1007 EXPORT_SYMBOL(current_in_userns);
1008
1009 static inline struct user_namespace *to_user_ns(struct ns_common *ns)
1010 {
1011 return container_of(ns, struct user_namespace, ns);
1012 }
1013
1014 static struct ns_common *userns_get(struct task_struct *task)
1015 {
1016 struct user_namespace *user_ns;
1017
1018 rcu_read_lock();
1019 user_ns = get_user_ns(__task_cred(task)->user_ns);
1020 rcu_read_unlock();
1021
1022 return user_ns ? &user_ns->ns : NULL;
1023 }
1024
1025 static void userns_put(struct ns_common *ns)
1026 {
1027 put_user_ns(to_user_ns(ns));
1028 }
1029
1030 static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
1031 {
1032 struct user_namespace *user_ns = to_user_ns(ns);
1033 struct cred *cred;
1034
1035 /* Don't allow gaining capabilities by reentering
1036 * the same user namespace.
1037 */
1038 if (user_ns == current_user_ns())
1039 return -EINVAL;
1040
1041 /* Tasks that share a thread group must share a user namespace */
1042 if (!thread_group_empty(current))
1043 return -EINVAL;
1044
1045 if (current->fs->users != 1)
1046 return -EINVAL;
1047
1048 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1049 return -EPERM;
1050
1051 cred = prepare_creds();
1052 if (!cred)
1053 return -ENOMEM;
1054
1055 put_user_ns(cred->user_ns);
1056 set_cred_user_ns(cred, get_user_ns(user_ns));
1057
1058 return commit_creds(cred);
1059 }
1060
1061 struct ns_common *ns_get_owner(struct ns_common *ns)
1062 {
1063 struct user_namespace *my_user_ns = current_user_ns();
1064 struct user_namespace *owner, *p;
1065
1066 /* See if the owner is in the current user namespace */
1067 owner = p = ns->ops->owner(ns);
1068 for (;;) {
1069 if (!p)
1070 return ERR_PTR(-EPERM);
1071 if (p == my_user_ns)
1072 break;
1073 p = p->parent;
1074 }
1075
1076 return &get_user_ns(owner)->ns;
1077 }
1078
1079 static struct user_namespace *userns_owner(struct ns_common *ns)
1080 {
1081 return to_user_ns(ns)->parent;
1082 }
1083
1084 const struct proc_ns_operations userns_operations = {
1085 .name = "user",
1086 .type = CLONE_NEWUSER,
1087 .get = userns_get,
1088 .put = userns_put,
1089 .install = userns_install,
1090 .owner = userns_owner,
1091 .get_parent = ns_get_owner,
1092 };
1093
1094 static __init int user_namespaces_init(void)
1095 {
1096 user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
1097 return 0;
1098 }
1099 subsys_initcall(user_namespaces_init);