]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/core/net_namespace.c
proc: Move proc_fd() to fs/proc/fd.h
[mirror_ubuntu-artful-kernel.git] / net / core / net_namespace.c
CommitLineData
e005d193
JP
1#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2
5f256bec
EB
3#include <linux/workqueue.h>
4#include <linux/rtnetlink.h>
5#include <linux/cache.h>
6#include <linux/slab.h>
7#include <linux/list.h>
8#include <linux/delay.h>
9dd776b6 9#include <linux/sched.h>
c93cf61f 10#include <linux/idr.h>
11a28d37 11#include <linux/rculist.h>
30ffee84 12#include <linux/nsproxy.h>
f0630529
EB
13#include <linux/proc_fs.h>
14#include <linux/file.h>
bc3b2d7f 15#include <linux/export.h>
038e7332 16#include <linux/user_namespace.h>
5f256bec 17#include <net/net_namespace.h>
dec827d1 18#include <net/netns/generic.h>
5f256bec
EB
19
20/*
21 * Our network namespace constructor/destructor lists
22 */
23
24static LIST_HEAD(pernet_list);
25static struct list_head *first_device = &pernet_list;
26static DEFINE_MUTEX(net_mutex);
27
5f256bec 28LIST_HEAD(net_namespace_list);
b76a461f 29EXPORT_SYMBOL_GPL(net_namespace_list);
5f256bec 30
734b6541
RM
31struct net init_net = {
32 .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
33};
ff4b9502 34EXPORT_SYMBOL(init_net);
5f256bec 35
dec827d1
PE
36#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
37
073862ba
ED
38static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
39
40static struct net_generic *net_alloc_generic(void)
41{
42 struct net_generic *ng;
43 size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
44
45 ng = kzalloc(generic_size, GFP_KERNEL);
46 if (ng)
47 ng->len = max_gen_ptrs;
48
49 return ng;
50}
51
05fceb4a
JP
52static int net_assign_generic(struct net *net, int id, void *data)
53{
54 struct net_generic *ng, *old_ng;
55
56 BUG_ON(!mutex_is_locked(&net_mutex));
57 BUG_ON(id == 0);
58
1c87733d
ED
59 old_ng = rcu_dereference_protected(net->gen,
60 lockdep_is_held(&net_mutex));
61 ng = old_ng;
05fceb4a
JP
62 if (old_ng->len >= id)
63 goto assign;
64
073862ba 65 ng = net_alloc_generic();
05fceb4a
JP
66 if (ng == NULL)
67 return -ENOMEM;
68
69 /*
70 * Some synchronisation notes:
71 *
72 * The net_generic explores the net->gen array inside rcu
73 * read section. Besides once set the net->gen->ptr[x]
74 * pointer never changes (see rules in netns/generic.h).
75 *
76 * That said, we simply duplicate this array and schedule
77 * the old copy for kfree after a grace period.
78 */
79
05fceb4a
JP
80 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
81
82 rcu_assign_pointer(net->gen, ng);
04d4dfed 83 kfree_rcu(old_ng, rcu);
05fceb4a
JP
84assign:
85 ng->ptr[id - 1] = data;
86 return 0;
87}
88
f875bae0
EB
89static int ops_init(const struct pernet_operations *ops, struct net *net)
90{
b922934d
JA
91 int err = -ENOMEM;
92 void *data = NULL;
93
f875bae0 94 if (ops->id && ops->size) {
b922934d 95 data = kzalloc(ops->size, GFP_KERNEL);
f875bae0 96 if (!data)
b922934d 97 goto out;
f875bae0
EB
98
99 err = net_assign_generic(net, *ops->id, data);
b922934d
JA
100 if (err)
101 goto cleanup;
f875bae0 102 }
b922934d 103 err = 0;
f875bae0 104 if (ops->init)
b922934d
JA
105 err = ops->init(net);
106 if (!err)
107 return 0;
108
109cleanup:
110 kfree(data);
111
112out:
113 return err;
f875bae0
EB
114}
115
116static void ops_free(const struct pernet_operations *ops, struct net *net)
117{
118 if (ops->id && ops->size) {
119 int id = *ops->id;
120 kfree(net_generic(net, id));
121 }
122}
123
72ad937a
EB
124static void ops_exit_list(const struct pernet_operations *ops,
125 struct list_head *net_exit_list)
126{
127 struct net *net;
128 if (ops->exit) {
129 list_for_each_entry(net, net_exit_list, exit_list)
130 ops->exit(net);
131 }
72ad937a
EB
132 if (ops->exit_batch)
133 ops->exit_batch(net_exit_list);
134}
135
136static void ops_free_list(const struct pernet_operations *ops,
137 struct list_head *net_exit_list)
138{
139 struct net *net;
140 if (ops->size && ops->id) {
141 list_for_each_entry(net, net_exit_list, exit_list)
142 ops_free(ops, net);
143 }
144}
145
5f256bec
EB
146/*
147 * setup_net runs the initializers for the network namespace object.
148 */
038e7332 149static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
5f256bec
EB
150{
151 /* Must be called with net_mutex held */
f875bae0 152 const struct pernet_operations *ops, *saved_ops;
486a87f1 153 int error = 0;
72ad937a 154 LIST_HEAD(net_exit_list);
5f256bec 155
5f256bec 156 atomic_set(&net->count, 1);
a685e089 157 atomic_set(&net->passive, 1);
4e985ada 158 net->dev_base_seq = 1;
038e7332 159 net->user_ns = user_ns;
486a87f1 160
5d1e4468 161#ifdef NETNS_REFCNT_DEBUG
5f256bec 162 atomic_set(&net->use_count, 0);
5d1e4468 163#endif
5f256bec 164
768f3591 165 list_for_each_entry(ops, &pernet_list, list) {
f875bae0
EB
166 error = ops_init(ops, net);
167 if (error < 0)
168 goto out_undo;
5f256bec
EB
169 }
170out:
171 return error;
768f3591 172
5f256bec
EB
173out_undo:
174 /* Walk through the list backwards calling the exit functions
175 * for the pernet modules whose init functions did not fail.
176 */
72ad937a 177 list_add(&net->exit_list, &net_exit_list);
f875bae0 178 saved_ops = ops;
72ad937a
EB
179 list_for_each_entry_continue_reverse(ops, &pernet_list, list)
180 ops_exit_list(ops, &net_exit_list);
181
f875bae0
EB
182 ops = saved_ops;
183 list_for_each_entry_continue_reverse(ops, &pernet_list, list)
72ad937a 184 ops_free_list(ops, &net_exit_list);
310928d9
DL
185
186 rcu_barrier();
5f256bec
EB
187 goto out;
188}
189
6a1a3b9f 190
ebe47d47
CN
191#ifdef CONFIG_NET_NS
192static struct kmem_cache *net_cachep;
193static struct workqueue_struct *netns_wq;
194
486a87f1 195static struct net *net_alloc(void)
45a19b0a 196{
486a87f1
DL
197 struct net *net = NULL;
198 struct net_generic *ng;
199
200 ng = net_alloc_generic();
201 if (!ng)
202 goto out;
203
204 net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
45a19b0a 205 if (!net)
486a87f1 206 goto out_free;
45a19b0a 207
486a87f1
DL
208 rcu_assign_pointer(net->gen, ng);
209out:
210 return net;
211
212out_free:
213 kfree(ng);
214 goto out;
215}
216
217static void net_free(struct net *net)
218{
5d1e4468 219#ifdef NETNS_REFCNT_DEBUG
45a19b0a 220 if (unlikely(atomic_read(&net->use_count) != 0)) {
e005d193
JP
221 pr_emerg("network namespace not free! Usage: %d\n",
222 atomic_read(&net->use_count));
45a19b0a
JFS
223 return;
224 }
5d1e4468 225#endif
4ef079cc 226 kfree(net->gen);
45a19b0a
JFS
227 kmem_cache_free(net_cachep, net);
228}
229
a685e089
AV
230void net_drop_ns(void *p)
231{
232 struct net *ns = p;
233 if (ns && atomic_dec_and_test(&ns->passive))
234 net_free(ns);
235}
236
038e7332
EB
237struct net *copy_net_ns(unsigned long flags,
238 struct user_namespace *user_ns, struct net *old_net)
9dd776b6 239{
088eb2d9
AD
240 struct net *net;
241 int rv;
9dd776b6 242
911cb193
RL
243 if (!(flags & CLONE_NEWNET))
244 return get_net(old_net);
245
088eb2d9
AD
246 net = net_alloc();
247 if (!net)
248 return ERR_PTR(-ENOMEM);
038e7332
EB
249
250 get_user_ns(user_ns);
251
9dd776b6 252 mutex_lock(&net_mutex);
038e7332 253 rv = setup_net(net, user_ns);
088eb2d9 254 if (rv == 0) {
486a87f1 255 rtnl_lock();
11a28d37 256 list_add_tail_rcu(&net->list, &net_namespace_list);
486a87f1
DL
257 rtnl_unlock();
258 }
9dd776b6 259 mutex_unlock(&net_mutex);
088eb2d9 260 if (rv < 0) {
038e7332 261 put_user_ns(user_ns);
a685e089 262 net_drop_ns(net);
088eb2d9
AD
263 return ERR_PTR(rv);
264 }
265 return net;
266}
486a87f1 267
2b035b39
EB
268static DEFINE_SPINLOCK(cleanup_list_lock);
269static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
270
6a1a3b9f
PE
271static void cleanup_net(struct work_struct *work)
272{
f875bae0 273 const struct pernet_operations *ops;
2b035b39
EB
274 struct net *net, *tmp;
275 LIST_HEAD(net_kill_list);
72ad937a 276 LIST_HEAD(net_exit_list);
6a1a3b9f 277
2b035b39
EB
278 /* Atomically snapshot the list of namespaces to cleanup */
279 spin_lock_irq(&cleanup_list_lock);
280 list_replace_init(&cleanup_list, &net_kill_list);
281 spin_unlock_irq(&cleanup_list_lock);
6a1a3b9f
PE
282
283 mutex_lock(&net_mutex);
284
285 /* Don't let anyone else find us. */
286 rtnl_lock();
72ad937a 287 list_for_each_entry(net, &net_kill_list, cleanup_list) {
2b035b39 288 list_del_rcu(&net->list);
72ad937a
EB
289 list_add_tail(&net->exit_list, &net_exit_list);
290 }
6a1a3b9f
PE
291 rtnl_unlock();
292
11a28d37
JB
293 /*
294 * Another CPU might be rcu-iterating the list, wait for it.
295 * This needs to be before calling the exit() notifiers, so
296 * the rcu_barrier() below isn't sufficient alone.
297 */
298 synchronize_rcu();
299
6a1a3b9f 300 /* Run all of the network namespace exit methods */
72ad937a
EB
301 list_for_each_entry_reverse(ops, &pernet_list, list)
302 ops_exit_list(ops, &net_exit_list);
303
f875bae0 304 /* Free the net generic variables */
72ad937a
EB
305 list_for_each_entry_reverse(ops, &pernet_list, list)
306 ops_free_list(ops, &net_exit_list);
6a1a3b9f
PE
307
308 mutex_unlock(&net_mutex);
309
310 /* Ensure there are no outstanding rcu callbacks using this
311 * network namespace.
312 */
313 rcu_barrier();
314
315 /* Finally it is safe to free my network namespace structure */
72ad937a
EB
316 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
317 list_del_init(&net->exit_list);
038e7332 318 put_user_ns(net->user_ns);
a685e089 319 net_drop_ns(net);
2b035b39 320 }
6a1a3b9f 321}
2b035b39 322static DECLARE_WORK(net_cleanup_work, cleanup_net);
6a1a3b9f
PE
323
324void __put_net(struct net *net)
325{
326 /* Cleanup the network namespace in process context */
2b035b39
EB
327 unsigned long flags;
328
329 spin_lock_irqsave(&cleanup_list_lock, flags);
330 list_add(&net->cleanup_list, &cleanup_list);
331 spin_unlock_irqrestore(&cleanup_list_lock, flags);
332
333 queue_work(netns_wq, &net_cleanup_work);
6a1a3b9f
PE
334}
335EXPORT_SYMBOL_GPL(__put_net);
336
956c9207
SR
337struct net *get_net_ns_by_fd(int fd)
338{
339 struct proc_inode *ei;
340 struct file *file;
341 struct net *net;
342
956c9207 343 file = proc_ns_fget(fd);
c316e6a3
AV
344 if (IS_ERR(file))
345 return ERR_CAST(file);
956c9207 346
496ad9aa 347 ei = PROC_I(file_inode(file));
c316e6a3
AV
348 if (ei->ns_ops == &netns_operations)
349 net = get_net(ei->ns);
350 else
351 net = ERR_PTR(-EINVAL);
956c9207 352
c316e6a3 353 fput(file);
956c9207
SR
354 return net;
355}
356
6a1a3b9f 357#else
956c9207
SR
358struct net *get_net_ns_by_fd(int fd)
359{
360 return ERR_PTR(-EINVAL);
361}
6a1a3b9f
PE
362#endif
363
30ffee84
JB
364struct net *get_net_ns_by_pid(pid_t pid)
365{
366 struct task_struct *tsk;
367 struct net *net;
368
369 /* Lookup the network namespace */
370 net = ERR_PTR(-ESRCH);
371 rcu_read_lock();
372 tsk = find_task_by_vpid(pid);
373 if (tsk) {
374 struct nsproxy *nsproxy;
375 nsproxy = task_nsproxy(tsk);
376 if (nsproxy)
377 net = get_net(nsproxy->net_ns);
378 }
379 rcu_read_unlock();
380 return net;
381}
382EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
383
98f842e6
EB
384static __net_init int net_ns_net_init(struct net *net)
385{
386 return proc_alloc_inum(&net->proc_inum);
387}
388
389static __net_exit void net_ns_net_exit(struct net *net)
390{
391 proc_free_inum(net->proc_inum);
392}
393
394static struct pernet_operations __net_initdata net_ns_ops = {
395 .init = net_ns_net_init,
396 .exit = net_ns_net_exit,
397};
398
5f256bec
EB
399static int __init net_ns_init(void)
400{
486a87f1 401 struct net_generic *ng;
5f256bec 402
d57a9212 403#ifdef CONFIG_NET_NS
5f256bec
EB
404 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
405 SMP_CACHE_BYTES,
406 SLAB_PANIC, NULL);
3ef1355d
BT
407
408 /* Create workqueue for cleanup */
409 netns_wq = create_singlethread_workqueue("netns");
410 if (!netns_wq)
411 panic("Could not create netns workq");
d57a9212 412#endif
3ef1355d 413
486a87f1
DL
414 ng = net_alloc_generic();
415 if (!ng)
416 panic("Could not allocate generic netns");
417
418 rcu_assign_pointer(init_net.gen, ng);
419
5f256bec 420 mutex_lock(&net_mutex);
038e7332 421 if (setup_net(&init_net, &init_user_ns))
ca0f3112 422 panic("Could not setup the initial network namespace");
5f256bec 423
f4618d39 424 rtnl_lock();
11a28d37 425 list_add_tail_rcu(&init_net.list, &net_namespace_list);
f4618d39 426 rtnl_unlock();
5f256bec
EB
427
428 mutex_unlock(&net_mutex);
5f256bec 429
98f842e6
EB
430 register_pernet_subsys(&net_ns_ops);
431
5f256bec
EB
432 return 0;
433}
434
435pure_initcall(net_ns_init);
436
ed160e83 437#ifdef CONFIG_NET_NS
f875bae0
EB
438static int __register_pernet_operations(struct list_head *list,
439 struct pernet_operations *ops)
5f256bec 440{
72ad937a 441 struct net *net;
5f256bec 442 int error;
72ad937a 443 LIST_HEAD(net_exit_list);
5f256bec 444
5f256bec 445 list_add_tail(&ops->list, list);
f875bae0 446 if (ops->init || (ops->id && ops->size)) {
1dba323b 447 for_each_net(net) {
f875bae0 448 error = ops_init(ops, net);
5f256bec
EB
449 if (error)
450 goto out_undo;
72ad937a 451 list_add_tail(&net->exit_list, &net_exit_list);
5f256bec
EB
452 }
453 }
1dba323b 454 return 0;
5f256bec
EB
455
456out_undo:
457 /* If I have an error cleanup all namespaces I initialized */
458 list_del(&ops->list);
72ad937a
EB
459 ops_exit_list(ops, &net_exit_list);
460 ops_free_list(ops, &net_exit_list);
1dba323b 461 return error;
5f256bec
EB
462}
463
f875bae0 464static void __unregister_pernet_operations(struct pernet_operations *ops)
5f256bec
EB
465{
466 struct net *net;
72ad937a 467 LIST_HEAD(net_exit_list);
5f256bec
EB
468
469 list_del(&ops->list);
72ad937a
EB
470 for_each_net(net)
471 list_add_tail(&net->exit_list, &net_exit_list);
472 ops_exit_list(ops, &net_exit_list);
473 ops_free_list(ops, &net_exit_list);
5f256bec
EB
474}
475
ed160e83
DL
476#else
477
f875bae0
EB
478static int __register_pernet_operations(struct list_head *list,
479 struct pernet_operations *ops)
ed160e83 480{
b922934d 481 return ops_init(ops, &init_net);
ed160e83
DL
482}
483
f875bae0 484static void __unregister_pernet_operations(struct pernet_operations *ops)
ed160e83 485{
72ad937a
EB
486 LIST_HEAD(net_exit_list);
487 list_add(&init_net.exit_list, &net_exit_list);
488 ops_exit_list(ops, &net_exit_list);
489 ops_free_list(ops, &net_exit_list);
ed160e83 490}
f875bae0
EB
491
492#endif /* CONFIG_NET_NS */
ed160e83 493
c93cf61f
PE
494static DEFINE_IDA(net_generic_ids);
495
f875bae0
EB
496static int register_pernet_operations(struct list_head *list,
497 struct pernet_operations *ops)
498{
499 int error;
500
501 if (ops->id) {
502again:
503 error = ida_get_new_above(&net_generic_ids, 1, ops->id);
504 if (error < 0) {
505 if (error == -EAGAIN) {
506 ida_pre_get(&net_generic_ids, GFP_KERNEL);
507 goto again;
508 }
509 return error;
510 }
073862ba 511 max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id);
f875bae0
EB
512 }
513 error = __register_pernet_operations(list, ops);
3a765eda
EB
514 if (error) {
515 rcu_barrier();
516 if (ops->id)
517 ida_remove(&net_generic_ids, *ops->id);
518 }
f875bae0
EB
519
520 return error;
521}
522
523static void unregister_pernet_operations(struct pernet_operations *ops)
524{
525
526 __unregister_pernet_operations(ops);
3a765eda 527 rcu_barrier();
f875bae0
EB
528 if (ops->id)
529 ida_remove(&net_generic_ids, *ops->id);
530}
531
5f256bec
EB
532/**
533 * register_pernet_subsys - register a network namespace subsystem
534 * @ops: pernet operations structure for the subsystem
535 *
536 * Register a subsystem which has init and exit functions
537 * that are called when network namespaces are created and
538 * destroyed respectively.
539 *
540 * When registered all network namespace init functions are
541 * called for every existing network namespace. Allowing kernel
542 * modules to have a race free view of the set of network namespaces.
543 *
544 * When a new network namespace is created all of the init
545 * methods are called in the order in which they were registered.
546 *
547 * When a network namespace is destroyed all of the exit methods
548 * are called in the reverse of the order with which they were
549 * registered.
550 */
551int register_pernet_subsys(struct pernet_operations *ops)
552{
553 int error;
554 mutex_lock(&net_mutex);
555 error = register_pernet_operations(first_device, ops);
556 mutex_unlock(&net_mutex);
557 return error;
558}
559EXPORT_SYMBOL_GPL(register_pernet_subsys);
560
561/**
562 * unregister_pernet_subsys - unregister a network namespace subsystem
563 * @ops: pernet operations structure to manipulate
564 *
565 * Remove the pernet operations structure from the list to be
53379e57 566 * used when network namespaces are created or destroyed. In
5f256bec
EB
567 * addition run the exit method for all existing network
568 * namespaces.
569 */
b3c981d2 570void unregister_pernet_subsys(struct pernet_operations *ops)
5f256bec
EB
571{
572 mutex_lock(&net_mutex);
b3c981d2 573 unregister_pernet_operations(ops);
5f256bec
EB
574 mutex_unlock(&net_mutex);
575}
576EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
577
578/**
579 * register_pernet_device - register a network namespace device
580 * @ops: pernet operations structure for the subsystem
581 *
582 * Register a device which has init and exit functions
583 * that are called when network namespaces are created and
584 * destroyed respectively.
585 *
586 * When registered all network namespace init functions are
587 * called for every existing network namespace. Allowing kernel
588 * modules to have a race free view of the set of network namespaces.
589 *
590 * When a new network namespace is created all of the init
591 * methods are called in the order in which they were registered.
592 *
593 * When a network namespace is destroyed all of the exit methods
594 * are called in the reverse of the order with which they were
595 * registered.
596 */
597int register_pernet_device(struct pernet_operations *ops)
598{
599 int error;
600 mutex_lock(&net_mutex);
601 error = register_pernet_operations(&pernet_list, ops);
602 if (!error && (first_device == &pernet_list))
603 first_device = &ops->list;
604 mutex_unlock(&net_mutex);
605 return error;
606}
607EXPORT_SYMBOL_GPL(register_pernet_device);
608
609/**
610 * unregister_pernet_device - unregister a network namespace netdevice
611 * @ops: pernet operations structure to manipulate
612 *
613 * Remove the pernet operations structure from the list to be
53379e57 614 * used when network namespaces are created or destroyed. In
5f256bec
EB
615 * addition run the exit method for all existing network
616 * namespaces.
617 */
618void unregister_pernet_device(struct pernet_operations *ops)
619{
620 mutex_lock(&net_mutex);
621 if (&ops->list == first_device)
622 first_device = first_device->next;
623 unregister_pernet_operations(ops);
624 mutex_unlock(&net_mutex);
625}
626EXPORT_SYMBOL_GPL(unregister_pernet_device);
13b6f576
EB
627
628#ifdef CONFIG_NET_NS
629static void *netns_get(struct task_struct *task)
630{
f0630529
EB
631 struct net *net = NULL;
632 struct nsproxy *nsproxy;
633
13b6f576 634 rcu_read_lock();
f0630529
EB
635 nsproxy = task_nsproxy(task);
636 if (nsproxy)
637 net = get_net(nsproxy->net_ns);
13b6f576 638 rcu_read_unlock();
f0630529 639
13b6f576
EB
640 return net;
641}
642
643static void netns_put(void *ns)
644{
645 put_net(ns);
646}
647
648static int netns_install(struct nsproxy *nsproxy, void *ns)
649{
142e1d1d
EB
650 struct net *net = ns;
651
5e4a0847
EB
652 if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
653 !nsown_capable(CAP_SYS_ADMIN))
142e1d1d
EB
654 return -EPERM;
655
13b6f576 656 put_net(nsproxy->net_ns);
142e1d1d 657 nsproxy->net_ns = get_net(net);
13b6f576
EB
658 return 0;
659}
660
98f842e6
EB
661static unsigned int netns_inum(void *ns)
662{
663 struct net *net = ns;
664 return net->proc_inum;
665}
666
13b6f576
EB
667const struct proc_ns_operations netns_operations = {
668 .name = "net",
669 .type = CLONE_NEWNET,
670 .get = netns_get,
671 .put = netns_put,
672 .install = netns_install,
98f842e6 673 .inum = netns_inum,
13b6f576
EB
674};
675#endif