]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - net/core/net_namespace.c
net-sysfs: Fix reference count leak
[mirror_ubuntu-bionic-kernel.git] / net / core / net_namespace.c
1 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2
3 #include <linux/workqueue.h>
4 #include <linux/rtnetlink.h>
5 #include <linux/cache.h>
6 #include <linux/slab.h>
7 #include <linux/list.h>
8 #include <linux/delay.h>
9 #include <linux/sched.h>
10 #include <linux/idr.h>
11 #include <linux/rculist.h>
12 #include <linux/nsproxy.h>
13 #include <linux/fs.h>
14 #include <linux/proc_ns.h>
15 #include <linux/file.h>
16 #include <linux/export.h>
17 #include <linux/user_namespace.h>
18 #include <linux/net_namespace.h>
19 #include <linux/sched/task.h>
20 #include <linux/uidgid.h>
21
22 #include <net/sock.h>
23 #include <net/netlink.h>
24 #include <net/net_namespace.h>
25 #include <net/netns/generic.h>
26
27 /*
28 * Our network namespace constructor/destructor lists
29 */
30
31 static LIST_HEAD(pernet_list);
32 static struct list_head *first_device = &pernet_list;
33 DEFINE_MUTEX(net_mutex);
34
35 LIST_HEAD(net_namespace_list);
36 EXPORT_SYMBOL_GPL(net_namespace_list);
37
38 struct net init_net = {
39 .count = ATOMIC_INIT(1),
40 .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
41 };
42 EXPORT_SYMBOL(init_net);
43
44 static bool init_net_initialized;
45
46 #define MIN_PERNET_OPS_ID \
47 ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
48
49 #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
50
51 static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
52
53 static struct net_generic *net_alloc_generic(void)
54 {
55 struct net_generic *ng;
56 unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
57
58 ng = kzalloc(generic_size, GFP_KERNEL);
59 if (ng)
60 ng->s.len = max_gen_ptrs;
61
62 return ng;
63 }
64
65 static int net_assign_generic(struct net *net, unsigned int id, void *data)
66 {
67 struct net_generic *ng, *old_ng;
68
69 BUG_ON(!mutex_is_locked(&net_mutex));
70 BUG_ON(id < MIN_PERNET_OPS_ID);
71
72 old_ng = rcu_dereference_protected(net->gen,
73 lockdep_is_held(&net_mutex));
74 if (old_ng->s.len > id) {
75 old_ng->ptr[id] = data;
76 return 0;
77 }
78
79 ng = net_alloc_generic();
80 if (ng == NULL)
81 return -ENOMEM;
82
83 /*
84 * Some synchronisation notes:
85 *
86 * The net_generic explores the net->gen array inside rcu
87 * read section. Besides once set the net->gen->ptr[x]
88 * pointer never changes (see rules in netns/generic.h).
89 *
90 * That said, we simply duplicate this array and schedule
91 * the old copy for kfree after a grace period.
92 */
93
94 memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID],
95 (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *));
96 ng->ptr[id] = data;
97
98 rcu_assign_pointer(net->gen, ng);
99 kfree_rcu(old_ng, s.rcu);
100 return 0;
101 }
102
103 static int ops_init(const struct pernet_operations *ops, struct net *net)
104 {
105 int err = -ENOMEM;
106 void *data = NULL;
107
108 if (ops->id && ops->size) {
109 data = kzalloc(ops->size, GFP_KERNEL);
110 if (!data)
111 goto out;
112
113 err = net_assign_generic(net, *ops->id, data);
114 if (err)
115 goto cleanup;
116 }
117 err = 0;
118 if (ops->init)
119 err = ops->init(net);
120 if (!err)
121 return 0;
122
123 cleanup:
124 kfree(data);
125
126 out:
127 return err;
128 }
129
130 static void ops_free(const struct pernet_operations *ops, struct net *net)
131 {
132 if (ops->id && ops->size) {
133 kfree(net_generic(net, *ops->id));
134 }
135 }
136
137 static void ops_exit_list(const struct pernet_operations *ops,
138 struct list_head *net_exit_list)
139 {
140 struct net *net;
141 if (ops->exit) {
142 list_for_each_entry(net, net_exit_list, exit_list)
143 ops->exit(net);
144 }
145 if (ops->exit_batch)
146 ops->exit_batch(net_exit_list);
147 }
148
149 static void ops_free_list(const struct pernet_operations *ops,
150 struct list_head *net_exit_list)
151 {
152 struct net *net;
153 if (ops->size && ops->id) {
154 list_for_each_entry(net, net_exit_list, exit_list)
155 ops_free(ops, net);
156 }
157 }
158
159 /* should be called with nsid_lock held */
160 static int alloc_netid(struct net *net, struct net *peer, int reqid)
161 {
162 int min = 0, max = 0;
163
164 if (reqid >= 0) {
165 min = reqid;
166 max = reqid + 1;
167 }
168
169 return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC);
170 }
171
172 /* This function is used by idr_for_each(). If net is equal to peer, the
173 * function returns the id so that idr_for_each() stops. Because we cannot
174 * returns the id 0 (idr_for_each() will not stop), we return the magic value
175 * NET_ID_ZERO (-1) for it.
176 */
177 #define NET_ID_ZERO -1
178 static int net_eq_idr(int id, void *net, void *peer)
179 {
180 if (net_eq(net, peer))
181 return id ? : NET_ID_ZERO;
182 return 0;
183 }
184
185 /* Should be called with nsid_lock held. If a new id is assigned, the bool alloc
186 * is set to true, thus the caller knows that the new id must be notified via
187 * rtnl.
188 */
189 static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc)
190 {
191 int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
192 bool alloc_it = *alloc;
193
194 *alloc = false;
195
196 /* Magic value for id 0. */
197 if (id == NET_ID_ZERO)
198 return 0;
199 if (id > 0)
200 return id;
201
202 if (alloc_it) {
203 id = alloc_netid(net, peer, -1);
204 *alloc = true;
205 return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
206 }
207
208 return NETNSA_NSID_NOT_ASSIGNED;
209 }
210
211 /* should be called with nsid_lock held */
212 static int __peernet2id(struct net *net, struct net *peer)
213 {
214 bool no = false;
215
216 return __peernet2id_alloc(net, peer, &no);
217 }
218
219 static void rtnl_net_notifyid(struct net *net, int cmd, int id, gfp_t gfp);
220 /* This function returns the id of a peer netns. If no id is assigned, one will
221 * be allocated and returned.
222 */
223 int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
224 {
225 bool alloc;
226 int id;
227
228 if (atomic_read(&net->count) == 0)
229 return NETNSA_NSID_NOT_ASSIGNED;
230 spin_lock_bh(&net->nsid_lock);
231 alloc = atomic_read(&peer->count) == 0 ? false : true;
232 id = __peernet2id_alloc(net, peer, &alloc);
233 spin_unlock_bh(&net->nsid_lock);
234 if (alloc && id >= 0)
235 rtnl_net_notifyid(net, RTM_NEWNSID, id, gfp);
236 return id;
237 }
238 EXPORT_SYMBOL_GPL(peernet2id_alloc);
239
240 /* This function returns, if assigned, the id of a peer netns. */
241 int peernet2id(struct net *net, struct net *peer)
242 {
243 int id;
244
245 spin_lock_bh(&net->nsid_lock);
246 id = __peernet2id(net, peer);
247 spin_unlock_bh(&net->nsid_lock);
248 return id;
249 }
250 EXPORT_SYMBOL(peernet2id);
251
252 /* This function returns true is the peer netns has an id assigned into the
253 * current netns.
254 */
255 bool peernet_has_id(struct net *net, struct net *peer)
256 {
257 return peernet2id(net, peer) >= 0;
258 }
259
260 struct net *get_net_ns_by_id(struct net *net, int id)
261 {
262 struct net *peer;
263
264 if (id < 0)
265 return NULL;
266
267 rcu_read_lock();
268 spin_lock_bh(&net->nsid_lock);
269 peer = idr_find(&net->netns_ids, id);
270 if (peer)
271 peer = maybe_get_net(peer);
272 spin_unlock_bh(&net->nsid_lock);
273 rcu_read_unlock();
274
275 return peer;
276 }
277
278 /*
279 * setup_net runs the initializers for the network namespace object.
280 */
281 static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
282 {
283 /* Must be called with net_mutex held */
284 const struct pernet_operations *ops, *saved_ops;
285 int error = 0;
286 LIST_HEAD(net_exit_list);
287
288 atomic_set(&net->count, 1);
289 refcount_set(&net->passive, 1);
290 get_random_bytes(&net->hash_mix, sizeof(u32));
291 net->dev_base_seq = 1;
292 net->user_ns = user_ns;
293 idr_init(&net->netns_ids);
294 spin_lock_init(&net->nsid_lock);
295
296 list_for_each_entry(ops, &pernet_list, list) {
297 error = ops_init(ops, net);
298 if (error < 0)
299 goto out_undo;
300 }
301 out:
302 return error;
303
304 out_undo:
305 /* Walk through the list backwards calling the exit functions
306 * for the pernet modules whose init functions did not fail.
307 */
308 list_add(&net->exit_list, &net_exit_list);
309 saved_ops = ops;
310 list_for_each_entry_continue_reverse(ops, &pernet_list, list)
311 ops_exit_list(ops, &net_exit_list);
312
313 ops = saved_ops;
314 list_for_each_entry_continue_reverse(ops, &pernet_list, list)
315 ops_free_list(ops, &net_exit_list);
316
317 rcu_barrier();
318 goto out;
319 }
320
321 static int __net_init net_defaults_init_net(struct net *net)
322 {
323 net->core.sysctl_somaxconn = SOMAXCONN;
324 return 0;
325 }
326
327 static struct pernet_operations net_defaults_ops = {
328 .init = net_defaults_init_net,
329 };
330
331 static __init int net_defaults_init(void)
332 {
333 if (register_pernet_subsys(&net_defaults_ops))
334 panic("Cannot initialize net default settings");
335
336 return 0;
337 }
338
339 core_initcall(net_defaults_init);
340
341 #ifdef CONFIG_NET_NS
342 static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
343 {
344 return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES);
345 }
346
347 static void dec_net_namespaces(struct ucounts *ucounts)
348 {
349 dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
350 }
351
352 static struct kmem_cache *net_cachep;
353 static struct workqueue_struct *netns_wq;
354
355 static struct net *net_alloc(void)
356 {
357 struct net *net = NULL;
358 struct net_generic *ng;
359
360 ng = net_alloc_generic();
361 if (!ng)
362 goto out;
363
364 net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
365 if (!net)
366 goto out_free;
367
368 rcu_assign_pointer(net->gen, ng);
369 out:
370 return net;
371
372 out_free:
373 kfree(ng);
374 goto out;
375 }
376
377 static void net_free(struct net *net)
378 {
379 kfree(rcu_access_pointer(net->gen));
380 kmem_cache_free(net_cachep, net);
381 }
382
383 void net_drop_ns(void *p)
384 {
385 struct net *ns = p;
386 if (ns && refcount_dec_and_test(&ns->passive))
387 net_free(ns);
388 }
389
390 struct net *copy_net_ns(unsigned long flags,
391 struct user_namespace *user_ns, struct net *old_net)
392 {
393 struct ucounts *ucounts;
394 struct net *net;
395 int rv;
396
397 if (!(flags & CLONE_NEWNET))
398 return get_net(old_net);
399
400 ucounts = inc_net_namespaces(user_ns);
401 if (!ucounts)
402 return ERR_PTR(-ENOSPC);
403
404 net = net_alloc();
405 if (!net) {
406 dec_net_namespaces(ucounts);
407 return ERR_PTR(-ENOMEM);
408 }
409
410 get_user_ns(user_ns);
411
412 rv = mutex_lock_killable(&net_mutex);
413 if (rv < 0) {
414 net_free(net);
415 dec_net_namespaces(ucounts);
416 put_user_ns(user_ns);
417 return ERR_PTR(rv);
418 }
419
420 net->ucounts = ucounts;
421 rv = setup_net(net, user_ns);
422 if (rv == 0) {
423 rtnl_lock();
424 list_add_tail_rcu(&net->list, &net_namespace_list);
425 rtnl_unlock();
426 }
427 mutex_unlock(&net_mutex);
428 if (rv < 0) {
429 dec_net_namespaces(ucounts);
430 put_user_ns(user_ns);
431 net_drop_ns(net);
432 return ERR_PTR(rv);
433 }
434 return net;
435 }
436
437 /**
438 * net_ns_get_ownership - get sysfs ownership data for @net
439 * @net: network namespace in question (can be NULL)
440 * @uid: kernel user ID for sysfs objects
441 * @gid: kernel group ID for sysfs objects
442 *
443 * Returns the uid/gid pair of root in the user namespace associated with the
444 * given network namespace.
445 */
446 void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
447 {
448 if (net) {
449 kuid_t ns_root_uid = make_kuid(net->user_ns, 0);
450 kgid_t ns_root_gid = make_kgid(net->user_ns, 0);
451
452 if (uid_valid(ns_root_uid))
453 *uid = ns_root_uid;
454
455 if (gid_valid(ns_root_gid))
456 *gid = ns_root_gid;
457 } else {
458 *uid = GLOBAL_ROOT_UID;
459 *gid = GLOBAL_ROOT_GID;
460 }
461 }
462 EXPORT_SYMBOL_GPL(net_ns_get_ownership);
463
464 static DEFINE_SPINLOCK(cleanup_list_lock);
465 static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
466
467 static void cleanup_net(struct work_struct *work)
468 {
469 const struct pernet_operations *ops;
470 struct net *net, *tmp;
471 struct list_head net_kill_list;
472 LIST_HEAD(net_exit_list);
473
474 /* Atomically snapshot the list of namespaces to cleanup */
475 spin_lock_irq(&cleanup_list_lock);
476 list_replace_init(&cleanup_list, &net_kill_list);
477 spin_unlock_irq(&cleanup_list_lock);
478
479 mutex_lock(&net_mutex);
480
481 /* Don't let anyone else find us. */
482 rtnl_lock();
483 list_for_each_entry(net, &net_kill_list, cleanup_list) {
484 list_del_rcu(&net->list);
485 list_add_tail(&net->exit_list, &net_exit_list);
486 for_each_net(tmp) {
487 int id;
488
489 spin_lock_bh(&tmp->nsid_lock);
490 id = __peernet2id(tmp, net);
491 if (id >= 0)
492 idr_remove(&tmp->netns_ids, id);
493 spin_unlock_bh(&tmp->nsid_lock);
494 if (id >= 0)
495 rtnl_net_notifyid(tmp, RTM_DELNSID, id,
496 GFP_KERNEL);
497 }
498 spin_lock_bh(&net->nsid_lock);
499 idr_destroy(&net->netns_ids);
500 spin_unlock_bh(&net->nsid_lock);
501
502 }
503 rtnl_unlock();
504
505 /*
506 * Another CPU might be rcu-iterating the list, wait for it.
507 * This needs to be before calling the exit() notifiers, so
508 * the rcu_barrier() below isn't sufficient alone.
509 */
510 synchronize_rcu();
511
512 /* Run all of the network namespace exit methods */
513 list_for_each_entry_reverse(ops, &pernet_list, list)
514 ops_exit_list(ops, &net_exit_list);
515
516 /* Free the net generic variables */
517 list_for_each_entry_reverse(ops, &pernet_list, list)
518 ops_free_list(ops, &net_exit_list);
519
520 mutex_unlock(&net_mutex);
521
522 /* Ensure there are no outstanding rcu callbacks using this
523 * network namespace.
524 */
525 rcu_barrier();
526
527 /* Finally it is safe to free my network namespace structure */
528 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
529 list_del_init(&net->exit_list);
530 dec_net_namespaces(net->ucounts);
531 put_user_ns(net->user_ns);
532 net_drop_ns(net);
533 }
534 }
535
536 /**
537 * net_ns_barrier - wait until concurrent net_cleanup_work is done
538 *
539 * cleanup_net runs from work queue and will first remove namespaces
540 * from the global list, then run net exit functions.
541 *
542 * Call this in module exit path to make sure that all netns
543 * ->exit ops have been invoked before the function is removed.
544 */
545 void net_ns_barrier(void)
546 {
547 mutex_lock(&net_mutex);
548 mutex_unlock(&net_mutex);
549 }
550 EXPORT_SYMBOL(net_ns_barrier);
551
552 static DECLARE_WORK(net_cleanup_work, cleanup_net);
553
554 void __put_net(struct net *net)
555 {
556 /* Cleanup the network namespace in process context */
557 unsigned long flags;
558
559 spin_lock_irqsave(&cleanup_list_lock, flags);
560 list_add(&net->cleanup_list, &cleanup_list);
561 spin_unlock_irqrestore(&cleanup_list_lock, flags);
562
563 queue_work(netns_wq, &net_cleanup_work);
564 }
565 EXPORT_SYMBOL_GPL(__put_net);
566
567 struct net *get_net_ns_by_fd(int fd)
568 {
569 struct file *file;
570 struct ns_common *ns;
571 struct net *net;
572
573 file = proc_ns_fget(fd);
574 if (IS_ERR(file))
575 return ERR_CAST(file);
576
577 ns = get_proc_ns(file_inode(file));
578 if (ns->ops == &netns_operations)
579 net = get_net(container_of(ns, struct net, ns));
580 else
581 net = ERR_PTR(-EINVAL);
582
583 fput(file);
584 return net;
585 }
586
587 #else
588 struct net *get_net_ns_by_fd(int fd)
589 {
590 return ERR_PTR(-EINVAL);
591 }
592 #endif
593 EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
594
595 struct net *get_net_ns_by_pid(pid_t pid)
596 {
597 struct task_struct *tsk;
598 struct net *net;
599
600 /* Lookup the network namespace */
601 net = ERR_PTR(-ESRCH);
602 rcu_read_lock();
603 tsk = find_task_by_vpid(pid);
604 if (tsk) {
605 struct nsproxy *nsproxy;
606 task_lock(tsk);
607 nsproxy = tsk->nsproxy;
608 if (nsproxy)
609 net = get_net(nsproxy->net_ns);
610 task_unlock(tsk);
611 }
612 rcu_read_unlock();
613 return net;
614 }
615 EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
616
617 static __net_init int net_ns_net_init(struct net *net)
618 {
619 #ifdef CONFIG_NET_NS
620 net->ns.ops = &netns_operations;
621 #endif
622 return ns_alloc_inum(&net->ns);
623 }
624
625 static __net_exit void net_ns_net_exit(struct net *net)
626 {
627 ns_free_inum(&net->ns);
628 }
629
630 static struct pernet_operations __net_initdata net_ns_ops = {
631 .init = net_ns_net_init,
632 .exit = net_ns_net_exit,
633 };
634
635 static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
636 [NETNSA_NONE] = { .type = NLA_UNSPEC },
637 [NETNSA_NSID] = { .type = NLA_S32 },
638 [NETNSA_PID] = { .type = NLA_U32 },
639 [NETNSA_FD] = { .type = NLA_U32 },
640 };
641
642 static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
643 struct netlink_ext_ack *extack)
644 {
645 struct net *net = sock_net(skb->sk);
646 struct nlattr *tb[NETNSA_MAX + 1];
647 struct nlattr *nla;
648 struct net *peer;
649 int nsid, err;
650
651 err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
652 rtnl_net_policy, extack);
653 if (err < 0)
654 return err;
655 if (!tb[NETNSA_NSID]) {
656 NL_SET_ERR_MSG(extack, "nsid is missing");
657 return -EINVAL;
658 }
659 nsid = nla_get_s32(tb[NETNSA_NSID]);
660
661 if (tb[NETNSA_PID]) {
662 peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
663 nla = tb[NETNSA_PID];
664 } else if (tb[NETNSA_FD]) {
665 peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
666 nla = tb[NETNSA_FD];
667 } else {
668 NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
669 return -EINVAL;
670 }
671 if (IS_ERR(peer)) {
672 NL_SET_BAD_ATTR(extack, nla);
673 NL_SET_ERR_MSG(extack, "Peer netns reference is invalid");
674 return PTR_ERR(peer);
675 }
676
677 spin_lock_bh(&net->nsid_lock);
678 if (__peernet2id(net, peer) >= 0) {
679 spin_unlock_bh(&net->nsid_lock);
680 err = -EEXIST;
681 NL_SET_BAD_ATTR(extack, nla);
682 NL_SET_ERR_MSG(extack,
683 "Peer netns already has a nsid assigned");
684 goto out;
685 }
686
687 err = alloc_netid(net, peer, nsid);
688 spin_unlock_bh(&net->nsid_lock);
689 if (err >= 0) {
690 rtnl_net_notifyid(net, RTM_NEWNSID, err, GFP_KERNEL);
691 err = 0;
692 } else if (err == -ENOSPC && nsid >= 0) {
693 err = -EEXIST;
694 NL_SET_BAD_ATTR(extack, tb[NETNSA_NSID]);
695 NL_SET_ERR_MSG(extack, "The specified nsid is already used");
696 }
697 out:
698 put_net(peer);
699 return err;
700 }
701
702 static int rtnl_net_get_size(void)
703 {
704 return NLMSG_ALIGN(sizeof(struct rtgenmsg))
705 + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
706 ;
707 }
708
709 static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
710 int cmd, struct net *net, int nsid)
711 {
712 struct nlmsghdr *nlh;
713 struct rtgenmsg *rth;
714
715 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
716 if (!nlh)
717 return -EMSGSIZE;
718
719 rth = nlmsg_data(nlh);
720 rth->rtgen_family = AF_UNSPEC;
721
722 if (nla_put_s32(skb, NETNSA_NSID, nsid))
723 goto nla_put_failure;
724
725 nlmsg_end(skb, nlh);
726 return 0;
727
728 nla_put_failure:
729 nlmsg_cancel(skb, nlh);
730 return -EMSGSIZE;
731 }
732
733 static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
734 struct netlink_ext_ack *extack)
735 {
736 struct net *net = sock_net(skb->sk);
737 struct nlattr *tb[NETNSA_MAX + 1];
738 struct nlattr *nla;
739 struct sk_buff *msg;
740 struct net *peer;
741 int err, id;
742
743 err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
744 rtnl_net_policy, extack);
745 if (err < 0)
746 return err;
747 if (tb[NETNSA_PID]) {
748 peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
749 nla = tb[NETNSA_PID];
750 } else if (tb[NETNSA_FD]) {
751 peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
752 nla = tb[NETNSA_FD];
753 } else {
754 NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
755 return -EINVAL;
756 }
757
758 if (IS_ERR(peer)) {
759 NL_SET_BAD_ATTR(extack, nla);
760 NL_SET_ERR_MSG(extack, "Peer netns reference is invalid");
761 return PTR_ERR(peer);
762 }
763
764 msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
765 if (!msg) {
766 err = -ENOMEM;
767 goto out;
768 }
769
770 id = peernet2id(net, peer);
771 err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
772 RTM_NEWNSID, net, id);
773 if (err < 0)
774 goto err_out;
775
776 err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
777 goto out;
778
779 err_out:
780 nlmsg_free(msg);
781 out:
782 put_net(peer);
783 return err;
784 }
785
786 struct rtnl_net_dump_cb {
787 struct net *net;
788 struct sk_buff *skb;
789 struct netlink_callback *cb;
790 int idx;
791 int s_idx;
792 };
793
794 static int rtnl_net_dumpid_one(int id, void *peer, void *data)
795 {
796 struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data;
797 int ret;
798
799 if (net_cb->idx < net_cb->s_idx)
800 goto cont;
801
802 ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid,
803 net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI,
804 RTM_NEWNSID, net_cb->net, id);
805 if (ret < 0)
806 return ret;
807
808 cont:
809 net_cb->idx++;
810 return 0;
811 }
812
813 static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
814 {
815 struct net *net = sock_net(skb->sk);
816 struct rtnl_net_dump_cb net_cb = {
817 .net = net,
818 .skb = skb,
819 .cb = cb,
820 .idx = 0,
821 .s_idx = cb->args[0],
822 };
823
824 spin_lock_bh(&net->nsid_lock);
825 idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
826 spin_unlock_bh(&net->nsid_lock);
827
828 cb->args[0] = net_cb.idx;
829 return skb->len;
830 }
831
832 static void rtnl_net_notifyid(struct net *net, int cmd, int id, gfp_t gfp)
833 {
834 struct sk_buff *msg;
835 int err = -ENOMEM;
836
837 msg = nlmsg_new(rtnl_net_get_size(), gfp);
838 if (!msg)
839 goto out;
840
841 err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, id);
842 if (err < 0)
843 goto err_out;
844
845 rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, gfp);
846 return;
847
848 err_out:
849 nlmsg_free(msg);
850 out:
851 rtnl_set_sk_err(net, RTNLGRP_NSID, err);
852 }
853
854 static int __init net_ns_init(void)
855 {
856 struct net_generic *ng;
857
858 #ifdef CONFIG_NET_NS
859 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
860 SMP_CACHE_BYTES,
861 SLAB_PANIC, NULL);
862
863 /* Create workqueue for cleanup */
864 netns_wq = create_singlethread_workqueue("netns");
865 if (!netns_wq)
866 panic("Could not create netns workq");
867 #endif
868
869 ng = net_alloc_generic();
870 if (!ng)
871 panic("Could not allocate generic netns");
872
873 rcu_assign_pointer(init_net.gen, ng);
874
875 mutex_lock(&net_mutex);
876 if (setup_net(&init_net, &init_user_ns))
877 panic("Could not setup the initial network namespace");
878
879 init_net_initialized = true;
880
881 rtnl_lock();
882 list_add_tail_rcu(&init_net.list, &net_namespace_list);
883 rtnl_unlock();
884
885 mutex_unlock(&net_mutex);
886
887 if (register_pernet_subsys(&net_ns_ops))
888 panic("Could not register network namespace subsystems");
889
890 rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL,
891 RTNL_FLAG_DOIT_UNLOCKED);
892 rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
893 RTNL_FLAG_DOIT_UNLOCKED);
894
895 return 0;
896 }
897
898 pure_initcall(net_ns_init);
899
900 #ifdef CONFIG_NET_NS
901 static int __register_pernet_operations(struct list_head *list,
902 struct pernet_operations *ops)
903 {
904 struct net *net;
905 int error;
906 LIST_HEAD(net_exit_list);
907
908 list_add_tail(&ops->list, list);
909 if (ops->init || (ops->id && ops->size)) {
910 for_each_net(net) {
911 error = ops_init(ops, net);
912 if (error)
913 goto out_undo;
914 list_add_tail(&net->exit_list, &net_exit_list);
915 }
916 }
917 return 0;
918
919 out_undo:
920 /* If I have an error cleanup all namespaces I initialized */
921 list_del(&ops->list);
922 ops_exit_list(ops, &net_exit_list);
923 ops_free_list(ops, &net_exit_list);
924 return error;
925 }
926
927 static void __unregister_pernet_operations(struct pernet_operations *ops)
928 {
929 struct net *net;
930 LIST_HEAD(net_exit_list);
931
932 list_del(&ops->list);
933 for_each_net(net)
934 list_add_tail(&net->exit_list, &net_exit_list);
935 ops_exit_list(ops, &net_exit_list);
936 ops_free_list(ops, &net_exit_list);
937 }
938
939 #else
940
941 static int __register_pernet_operations(struct list_head *list,
942 struct pernet_operations *ops)
943 {
944 if (!init_net_initialized) {
945 list_add_tail(&ops->list, list);
946 return 0;
947 }
948
949 return ops_init(ops, &init_net);
950 }
951
952 static void __unregister_pernet_operations(struct pernet_operations *ops)
953 {
954 if (!init_net_initialized) {
955 list_del(&ops->list);
956 } else {
957 LIST_HEAD(net_exit_list);
958 list_add(&init_net.exit_list, &net_exit_list);
959 ops_exit_list(ops, &net_exit_list);
960 ops_free_list(ops, &net_exit_list);
961 }
962 }
963
964 #endif /* CONFIG_NET_NS */
965
966 static DEFINE_IDA(net_generic_ids);
967
968 static int register_pernet_operations(struct list_head *list,
969 struct pernet_operations *ops)
970 {
971 int error;
972
973 if (ops->id) {
974 again:
975 error = ida_get_new_above(&net_generic_ids, MIN_PERNET_OPS_ID, ops->id);
976 if (error < 0) {
977 if (error == -EAGAIN) {
978 ida_pre_get(&net_generic_ids, GFP_KERNEL);
979 goto again;
980 }
981 return error;
982 }
983 max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1);
984 }
985 error = __register_pernet_operations(list, ops);
986 if (error) {
987 rcu_barrier();
988 if (ops->id)
989 ida_remove(&net_generic_ids, *ops->id);
990 }
991
992 return error;
993 }
994
995 static void unregister_pernet_operations(struct pernet_operations *ops)
996 {
997
998 __unregister_pernet_operations(ops);
999 rcu_barrier();
1000 if (ops->id)
1001 ida_remove(&net_generic_ids, *ops->id);
1002 }
1003
1004 /**
1005 * register_pernet_subsys - register a network namespace subsystem
1006 * @ops: pernet operations structure for the subsystem
1007 *
1008 * Register a subsystem which has init and exit functions
1009 * that are called when network namespaces are created and
1010 * destroyed respectively.
1011 *
1012 * When registered all network namespace init functions are
1013 * called for every existing network namespace. Allowing kernel
1014 * modules to have a race free view of the set of network namespaces.
1015 *
1016 * When a new network namespace is created all of the init
1017 * methods are called in the order in which they were registered.
1018 *
1019 * When a network namespace is destroyed all of the exit methods
1020 * are called in the reverse of the order with which they were
1021 * registered.
1022 */
1023 int register_pernet_subsys(struct pernet_operations *ops)
1024 {
1025 int error;
1026 mutex_lock(&net_mutex);
1027 error = register_pernet_operations(first_device, ops);
1028 mutex_unlock(&net_mutex);
1029 return error;
1030 }
1031 EXPORT_SYMBOL_GPL(register_pernet_subsys);
1032
1033 /**
1034 * unregister_pernet_subsys - unregister a network namespace subsystem
1035 * @ops: pernet operations structure to manipulate
1036 *
1037 * Remove the pernet operations structure from the list to be
1038 * used when network namespaces are created or destroyed. In
1039 * addition run the exit method for all existing network
1040 * namespaces.
1041 */
1042 void unregister_pernet_subsys(struct pernet_operations *ops)
1043 {
1044 mutex_lock(&net_mutex);
1045 unregister_pernet_operations(ops);
1046 mutex_unlock(&net_mutex);
1047 }
1048 EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
1049
1050 /**
1051 * register_pernet_device - register a network namespace device
1052 * @ops: pernet operations structure for the subsystem
1053 *
1054 * Register a device which has init and exit functions
1055 * that are called when network namespaces are created and
1056 * destroyed respectively.
1057 *
1058 * When registered all network namespace init functions are
1059 * called for every existing network namespace. Allowing kernel
1060 * modules to have a race free view of the set of network namespaces.
1061 *
1062 * When a new network namespace is created all of the init
1063 * methods are called in the order in which they were registered.
1064 *
1065 * When a network namespace is destroyed all of the exit methods
1066 * are called in the reverse of the order with which they were
1067 * registered.
1068 */
1069 int register_pernet_device(struct pernet_operations *ops)
1070 {
1071 int error;
1072 mutex_lock(&net_mutex);
1073 error = register_pernet_operations(&pernet_list, ops);
1074 if (!error && (first_device == &pernet_list))
1075 first_device = &ops->list;
1076 mutex_unlock(&net_mutex);
1077 return error;
1078 }
1079 EXPORT_SYMBOL_GPL(register_pernet_device);
1080
1081 /**
1082 * unregister_pernet_device - unregister a network namespace netdevice
1083 * @ops: pernet operations structure to manipulate
1084 *
1085 * Remove the pernet operations structure from the list to be
1086 * used when network namespaces are created or destroyed. In
1087 * addition run the exit method for all existing network
1088 * namespaces.
1089 */
1090 void unregister_pernet_device(struct pernet_operations *ops)
1091 {
1092 mutex_lock(&net_mutex);
1093 if (&ops->list == first_device)
1094 first_device = first_device->next;
1095 unregister_pernet_operations(ops);
1096 mutex_unlock(&net_mutex);
1097 }
1098 EXPORT_SYMBOL_GPL(unregister_pernet_device);
1099
1100 #ifdef CONFIG_NET_NS
1101 static struct ns_common *netns_get(struct task_struct *task)
1102 {
1103 struct net *net = NULL;
1104 struct nsproxy *nsproxy;
1105
1106 task_lock(task);
1107 nsproxy = task->nsproxy;
1108 if (nsproxy)
1109 net = get_net(nsproxy->net_ns);
1110 task_unlock(task);
1111
1112 return net ? &net->ns : NULL;
1113 }
1114
1115 static inline struct net *to_net_ns(struct ns_common *ns)
1116 {
1117 return container_of(ns, struct net, ns);
1118 }
1119
1120 static void netns_put(struct ns_common *ns)
1121 {
1122 put_net(to_net_ns(ns));
1123 }
1124
1125 static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
1126 {
1127 struct net *net = to_net_ns(ns);
1128
1129 if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
1130 !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
1131 return -EPERM;
1132
1133 put_net(nsproxy->net_ns);
1134 nsproxy->net_ns = get_net(net);
1135 return 0;
1136 }
1137
1138 static struct user_namespace *netns_owner(struct ns_common *ns)
1139 {
1140 return to_net_ns(ns)->user_ns;
1141 }
1142
1143 const struct proc_ns_operations netns_operations = {
1144 .name = "net",
1145 .type = CLONE_NEWNET,
1146 .get = netns_get,
1147 .put = netns_put,
1148 .install = netns_install,
1149 .owner = netns_owner,
1150 };
1151 #endif