]>
Commit | Line | Data |
---|---|---|
1 | #include <linux/workqueue.h> | |
2 | #include <linux/rtnetlink.h> | |
3 | #include <linux/cache.h> | |
4 | #include <linux/slab.h> | |
5 | #include <linux/list.h> | |
6 | #include <linux/delay.h> | |
7 | #include <linux/sched.h> | |
8 | #include <linux/idr.h> | |
9 | #include <linux/rculist.h> | |
10 | #include <linux/nsproxy.h> | |
11 | #include <net/net_namespace.h> | |
12 | #include <net/netns/generic.h> | |
13 | ||
14 | /* | |
15 | * Our network namespace constructor/destructor lists | |
16 | */ | |
17 | ||
18 | static LIST_HEAD(pernet_list); | |
19 | static struct list_head *first_device = &pernet_list; | |
20 | static DEFINE_MUTEX(net_mutex); | |
21 | ||
22 | LIST_HEAD(net_namespace_list); | |
23 | EXPORT_SYMBOL_GPL(net_namespace_list); | |
24 | ||
25 | struct net init_net; | |
26 | EXPORT_SYMBOL(init_net); | |
27 | ||
28 | #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ | |
29 | ||
30 | static void net_generic_release(struct rcu_head *rcu) | |
31 | { | |
32 | struct net_generic *ng; | |
33 | ||
34 | ng = container_of(rcu, struct net_generic, rcu); | |
35 | kfree(ng); | |
36 | } | |
37 | ||
38 | static int net_assign_generic(struct net *net, int id, void *data) | |
39 | { | |
40 | struct net_generic *ng, *old_ng; | |
41 | ||
42 | BUG_ON(!mutex_is_locked(&net_mutex)); | |
43 | BUG_ON(id == 0); | |
44 | ||
45 | old_ng = rcu_dereference_protected(net->gen, | |
46 | lockdep_is_held(&net_mutex)); | |
47 | ng = old_ng; | |
48 | if (old_ng->len >= id) | |
49 | goto assign; | |
50 | ||
51 | ng = kzalloc(sizeof(struct net_generic) + | |
52 | id * sizeof(void *), GFP_KERNEL); | |
53 | if (ng == NULL) | |
54 | return -ENOMEM; | |
55 | ||
56 | /* | |
57 | * Some synchronisation notes: | |
58 | * | |
59 | * The net_generic explores the net->gen array inside rcu | |
60 | * read section. Besides once set the net->gen->ptr[x] | |
61 | * pointer never changes (see rules in netns/generic.h). | |
62 | * | |
63 | * That said, we simply duplicate this array and schedule | |
64 | * the old copy for kfree after a grace period. | |
65 | */ | |
66 | ||
67 | ng->len = id; | |
68 | memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); | |
69 | ||
70 | rcu_assign_pointer(net->gen, ng); | |
71 | call_rcu(&old_ng->rcu, net_generic_release); | |
72 | assign: | |
73 | ng->ptr[id - 1] = data; | |
74 | return 0; | |
75 | } | |
76 | ||
77 | static int ops_init(const struct pernet_operations *ops, struct net *net) | |
78 | { | |
79 | int err; | |
80 | if (ops->id && ops->size) { | |
81 | void *data = kzalloc(ops->size, GFP_KERNEL); | |
82 | if (!data) | |
83 | return -ENOMEM; | |
84 | ||
85 | err = net_assign_generic(net, *ops->id, data); | |
86 | if (err) { | |
87 | kfree(data); | |
88 | return err; | |
89 | } | |
90 | } | |
91 | if (ops->init) | |
92 | return ops->init(net); | |
93 | return 0; | |
94 | } | |
95 | ||
96 | static void ops_free(const struct pernet_operations *ops, struct net *net) | |
97 | { | |
98 | if (ops->id && ops->size) { | |
99 | int id = *ops->id; | |
100 | kfree(net_generic(net, id)); | |
101 | } | |
102 | } | |
103 | ||
104 | static void ops_exit_list(const struct pernet_operations *ops, | |
105 | struct list_head *net_exit_list) | |
106 | { | |
107 | struct net *net; | |
108 | if (ops->exit) { | |
109 | list_for_each_entry(net, net_exit_list, exit_list) | |
110 | ops->exit(net); | |
111 | } | |
112 | if (ops->exit_batch) | |
113 | ops->exit_batch(net_exit_list); | |
114 | } | |
115 | ||
116 | static void ops_free_list(const struct pernet_operations *ops, | |
117 | struct list_head *net_exit_list) | |
118 | { | |
119 | struct net *net; | |
120 | if (ops->size && ops->id) { | |
121 | list_for_each_entry(net, net_exit_list, exit_list) | |
122 | ops_free(ops, net); | |
123 | } | |
124 | } | |
125 | ||
126 | /* | |
127 | * setup_net runs the initializers for the network namespace object. | |
128 | */ | |
129 | static __net_init int setup_net(struct net *net) | |
130 | { | |
131 | /* Must be called with net_mutex held */ | |
132 | const struct pernet_operations *ops, *saved_ops; | |
133 | int error = 0; | |
134 | LIST_HEAD(net_exit_list); | |
135 | ||
136 | atomic_set(&net->count, 1); | |
137 | ||
138 | #ifdef NETNS_REFCNT_DEBUG | |
139 | atomic_set(&net->use_count, 0); | |
140 | #endif | |
141 | ||
142 | list_for_each_entry(ops, &pernet_list, list) { | |
143 | error = ops_init(ops, net); | |
144 | if (error < 0) | |
145 | goto out_undo; | |
146 | } | |
147 | out: | |
148 | return error; | |
149 | ||
150 | out_undo: | |
151 | /* Walk through the list backwards calling the exit functions | |
152 | * for the pernet modules whose init functions did not fail. | |
153 | */ | |
154 | list_add(&net->exit_list, &net_exit_list); | |
155 | saved_ops = ops; | |
156 | list_for_each_entry_continue_reverse(ops, &pernet_list, list) | |
157 | ops_exit_list(ops, &net_exit_list); | |
158 | ||
159 | ops = saved_ops; | |
160 | list_for_each_entry_continue_reverse(ops, &pernet_list, list) | |
161 | ops_free_list(ops, &net_exit_list); | |
162 | ||
163 | rcu_barrier(); | |
164 | goto out; | |
165 | } | |
166 | ||
167 | static struct net_generic *net_alloc_generic(void) | |
168 | { | |
169 | struct net_generic *ng; | |
170 | size_t generic_size = sizeof(struct net_generic) + | |
171 | INITIAL_NET_GEN_PTRS * sizeof(void *); | |
172 | ||
173 | ng = kzalloc(generic_size, GFP_KERNEL); | |
174 | if (ng) | |
175 | ng->len = INITIAL_NET_GEN_PTRS; | |
176 | ||
177 | return ng; | |
178 | } | |
179 | ||
180 | #ifdef CONFIG_NET_NS | |
181 | static struct kmem_cache *net_cachep; | |
182 | static struct workqueue_struct *netns_wq; | |
183 | ||
184 | static struct net *net_alloc(void) | |
185 | { | |
186 | struct net *net = NULL; | |
187 | struct net_generic *ng; | |
188 | ||
189 | ng = net_alloc_generic(); | |
190 | if (!ng) | |
191 | goto out; | |
192 | ||
193 | net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); | |
194 | if (!net) | |
195 | goto out_free; | |
196 | ||
197 | rcu_assign_pointer(net->gen, ng); | |
198 | out: | |
199 | return net; | |
200 | ||
201 | out_free: | |
202 | kfree(ng); | |
203 | goto out; | |
204 | } | |
205 | ||
206 | static void net_free(struct net *net) | |
207 | { | |
208 | #ifdef NETNS_REFCNT_DEBUG | |
209 | if (unlikely(atomic_read(&net->use_count) != 0)) { | |
210 | printk(KERN_EMERG "network namespace not free! Usage: %d\n", | |
211 | atomic_read(&net->use_count)); | |
212 | return; | |
213 | } | |
214 | #endif | |
215 | kfree(net->gen); | |
216 | kmem_cache_free(net_cachep, net); | |
217 | } | |
218 | ||
219 | static struct net *net_create(void) | |
220 | { | |
221 | struct net *net; | |
222 | int rv; | |
223 | ||
224 | net = net_alloc(); | |
225 | if (!net) | |
226 | return ERR_PTR(-ENOMEM); | |
227 | mutex_lock(&net_mutex); | |
228 | rv = setup_net(net); | |
229 | if (rv == 0) { | |
230 | rtnl_lock(); | |
231 | list_add_tail_rcu(&net->list, &net_namespace_list); | |
232 | rtnl_unlock(); | |
233 | } | |
234 | mutex_unlock(&net_mutex); | |
235 | if (rv < 0) { | |
236 | net_free(net); | |
237 | return ERR_PTR(rv); | |
238 | } | |
239 | return net; | |
240 | } | |
241 | ||
242 | struct net *copy_net_ns(unsigned long flags, struct net *old_net) | |
243 | { | |
244 | if (!(flags & CLONE_NEWNET)) | |
245 | return get_net(old_net); | |
246 | return net_create(); | |
247 | } | |
248 | ||
249 | static DEFINE_SPINLOCK(cleanup_list_lock); | |
250 | static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ | |
251 | ||
252 | static void cleanup_net(struct work_struct *work) | |
253 | { | |
254 | const struct pernet_operations *ops; | |
255 | struct net *net, *tmp; | |
256 | LIST_HEAD(net_kill_list); | |
257 | LIST_HEAD(net_exit_list); | |
258 | ||
259 | /* Atomically snapshot the list of namespaces to cleanup */ | |
260 | spin_lock_irq(&cleanup_list_lock); | |
261 | list_replace_init(&cleanup_list, &net_kill_list); | |
262 | spin_unlock_irq(&cleanup_list_lock); | |
263 | ||
264 | mutex_lock(&net_mutex); | |
265 | ||
266 | /* Don't let anyone else find us. */ | |
267 | rtnl_lock(); | |
268 | list_for_each_entry(net, &net_kill_list, cleanup_list) { | |
269 | list_del_rcu(&net->list); | |
270 | list_add_tail(&net->exit_list, &net_exit_list); | |
271 | } | |
272 | rtnl_unlock(); | |
273 | ||
274 | /* | |
275 | * Another CPU might be rcu-iterating the list, wait for it. | |
276 | * This needs to be before calling the exit() notifiers, so | |
277 | * the rcu_barrier() below isn't sufficient alone. | |
278 | */ | |
279 | synchronize_rcu(); | |
280 | ||
281 | /* Run all of the network namespace exit methods */ | |
282 | list_for_each_entry_reverse(ops, &pernet_list, list) | |
283 | ops_exit_list(ops, &net_exit_list); | |
284 | ||
285 | /* Free the net generic variables */ | |
286 | list_for_each_entry_reverse(ops, &pernet_list, list) | |
287 | ops_free_list(ops, &net_exit_list); | |
288 | ||
289 | mutex_unlock(&net_mutex); | |
290 | ||
291 | /* Ensure there are no outstanding rcu callbacks using this | |
292 | * network namespace. | |
293 | */ | |
294 | rcu_barrier(); | |
295 | ||
296 | /* Finally it is safe to free my network namespace structure */ | |
297 | list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { | |
298 | list_del_init(&net->exit_list); | |
299 | net_free(net); | |
300 | } | |
301 | } | |
302 | static DECLARE_WORK(net_cleanup_work, cleanup_net); | |
303 | ||
304 | void __put_net(struct net *net) | |
305 | { | |
306 | /* Cleanup the network namespace in process context */ | |
307 | unsigned long flags; | |
308 | ||
309 | spin_lock_irqsave(&cleanup_list_lock, flags); | |
310 | list_add(&net->cleanup_list, &cleanup_list); | |
311 | spin_unlock_irqrestore(&cleanup_list_lock, flags); | |
312 | ||
313 | queue_work(netns_wq, &net_cleanup_work); | |
314 | } | |
315 | EXPORT_SYMBOL_GPL(__put_net); | |
316 | ||
317 | #else | |
318 | struct net *copy_net_ns(unsigned long flags, struct net *old_net) | |
319 | { | |
320 | if (flags & CLONE_NEWNET) | |
321 | return ERR_PTR(-EINVAL); | |
322 | return old_net; | |
323 | } | |
324 | #endif | |
325 | ||
326 | struct net *get_net_ns_by_pid(pid_t pid) | |
327 | { | |
328 | struct task_struct *tsk; | |
329 | struct net *net; | |
330 | ||
331 | /* Lookup the network namespace */ | |
332 | net = ERR_PTR(-ESRCH); | |
333 | rcu_read_lock(); | |
334 | tsk = find_task_by_vpid(pid); | |
335 | if (tsk) { | |
336 | struct nsproxy *nsproxy; | |
337 | nsproxy = task_nsproxy(tsk); | |
338 | if (nsproxy) | |
339 | net = get_net(nsproxy->net_ns); | |
340 | } | |
341 | rcu_read_unlock(); | |
342 | return net; | |
343 | } | |
344 | EXPORT_SYMBOL_GPL(get_net_ns_by_pid); | |
345 | ||
346 | static int __init net_ns_init(void) | |
347 | { | |
348 | struct net_generic *ng; | |
349 | ||
350 | #ifdef CONFIG_NET_NS | |
351 | net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), | |
352 | SMP_CACHE_BYTES, | |
353 | SLAB_PANIC, NULL); | |
354 | ||
355 | /* Create workqueue for cleanup */ | |
356 | netns_wq = create_singlethread_workqueue("netns"); | |
357 | if (!netns_wq) | |
358 | panic("Could not create netns workq"); | |
359 | #endif | |
360 | ||
361 | ng = net_alloc_generic(); | |
362 | if (!ng) | |
363 | panic("Could not allocate generic netns"); | |
364 | ||
365 | rcu_assign_pointer(init_net.gen, ng); | |
366 | ||
367 | mutex_lock(&net_mutex); | |
368 | if (setup_net(&init_net)) | |
369 | panic("Could not setup the initial network namespace"); | |
370 | ||
371 | rtnl_lock(); | |
372 | list_add_tail_rcu(&init_net.list, &net_namespace_list); | |
373 | rtnl_unlock(); | |
374 | ||
375 | mutex_unlock(&net_mutex); | |
376 | ||
377 | return 0; | |
378 | } | |
379 | ||
380 | pure_initcall(net_ns_init); | |
381 | ||
382 | #ifdef CONFIG_NET_NS | |
383 | static int __register_pernet_operations(struct list_head *list, | |
384 | struct pernet_operations *ops) | |
385 | { | |
386 | struct net *net; | |
387 | int error; | |
388 | LIST_HEAD(net_exit_list); | |
389 | ||
390 | list_add_tail(&ops->list, list); | |
391 | if (ops->init || (ops->id && ops->size)) { | |
392 | for_each_net(net) { | |
393 | error = ops_init(ops, net); | |
394 | if (error) | |
395 | goto out_undo; | |
396 | list_add_tail(&net->exit_list, &net_exit_list); | |
397 | } | |
398 | } | |
399 | return 0; | |
400 | ||
401 | out_undo: | |
402 | /* If I have an error cleanup all namespaces I initialized */ | |
403 | list_del(&ops->list); | |
404 | ops_exit_list(ops, &net_exit_list); | |
405 | ops_free_list(ops, &net_exit_list); | |
406 | return error; | |
407 | } | |
408 | ||
409 | static void __unregister_pernet_operations(struct pernet_operations *ops) | |
410 | { | |
411 | struct net *net; | |
412 | LIST_HEAD(net_exit_list); | |
413 | ||
414 | list_del(&ops->list); | |
415 | for_each_net(net) | |
416 | list_add_tail(&net->exit_list, &net_exit_list); | |
417 | ops_exit_list(ops, &net_exit_list); | |
418 | ops_free_list(ops, &net_exit_list); | |
419 | } | |
420 | ||
421 | #else | |
422 | ||
423 | static int __register_pernet_operations(struct list_head *list, | |
424 | struct pernet_operations *ops) | |
425 | { | |
426 | int err = 0; | |
427 | err = ops_init(ops, &init_net); | |
428 | if (err) | |
429 | ops_free(ops, &init_net); | |
430 | return err; | |
431 | ||
432 | } | |
433 | ||
434 | static void __unregister_pernet_operations(struct pernet_operations *ops) | |
435 | { | |
436 | LIST_HEAD(net_exit_list); | |
437 | list_add(&init_net.exit_list, &net_exit_list); | |
438 | ops_exit_list(ops, &net_exit_list); | |
439 | ops_free_list(ops, &net_exit_list); | |
440 | } | |
441 | ||
442 | #endif /* CONFIG_NET_NS */ | |
443 | ||
444 | static DEFINE_IDA(net_generic_ids); | |
445 | ||
446 | static int register_pernet_operations(struct list_head *list, | |
447 | struct pernet_operations *ops) | |
448 | { | |
449 | int error; | |
450 | ||
451 | if (ops->id) { | |
452 | again: | |
453 | error = ida_get_new_above(&net_generic_ids, 1, ops->id); | |
454 | if (error < 0) { | |
455 | if (error == -EAGAIN) { | |
456 | ida_pre_get(&net_generic_ids, GFP_KERNEL); | |
457 | goto again; | |
458 | } | |
459 | return error; | |
460 | } | |
461 | } | |
462 | error = __register_pernet_operations(list, ops); | |
463 | if (error) { | |
464 | rcu_barrier(); | |
465 | if (ops->id) | |
466 | ida_remove(&net_generic_ids, *ops->id); | |
467 | } | |
468 | ||
469 | return error; | |
470 | } | |
471 | ||
472 | static void unregister_pernet_operations(struct pernet_operations *ops) | |
473 | { | |
474 | ||
475 | __unregister_pernet_operations(ops); | |
476 | rcu_barrier(); | |
477 | if (ops->id) | |
478 | ida_remove(&net_generic_ids, *ops->id); | |
479 | } | |
480 | ||
481 | /** | |
482 | * register_pernet_subsys - register a network namespace subsystem | |
483 | * @ops: pernet operations structure for the subsystem | |
484 | * | |
485 | * Register a subsystem which has init and exit functions | |
486 | * that are called when network namespaces are created and | |
487 | * destroyed respectively. | |
488 | * | |
489 | * When registered all network namespace init functions are | |
490 | * called for every existing network namespace. Allowing kernel | |
491 | * modules to have a race free view of the set of network namespaces. | |
492 | * | |
493 | * When a new network namespace is created all of the init | |
494 | * methods are called in the order in which they were registered. | |
495 | * | |
496 | * When a network namespace is destroyed all of the exit methods | |
497 | * are called in the reverse of the order with which they were | |
498 | * registered. | |
499 | */ | |
500 | int register_pernet_subsys(struct pernet_operations *ops) | |
501 | { | |
502 | int error; | |
503 | mutex_lock(&net_mutex); | |
504 | error = register_pernet_operations(first_device, ops); | |
505 | mutex_unlock(&net_mutex); | |
506 | return error; | |
507 | } | |
508 | EXPORT_SYMBOL_GPL(register_pernet_subsys); | |
509 | ||
510 | /** | |
511 | * unregister_pernet_subsys - unregister a network namespace subsystem | |
512 | * @ops: pernet operations structure to manipulate | |
513 | * | |
514 | * Remove the pernet operations structure from the list to be | |
515 | * used when network namespaces are created or destroyed. In | |
516 | * addition run the exit method for all existing network | |
517 | * namespaces. | |
518 | */ | |
519 | void unregister_pernet_subsys(struct pernet_operations *ops) | |
520 | { | |
521 | mutex_lock(&net_mutex); | |
522 | unregister_pernet_operations(ops); | |
523 | mutex_unlock(&net_mutex); | |
524 | } | |
525 | EXPORT_SYMBOL_GPL(unregister_pernet_subsys); | |
526 | ||
527 | /** | |
528 | * register_pernet_device - register a network namespace device | |
529 | * @ops: pernet operations structure for the subsystem | |
530 | * | |
531 | * Register a device which has init and exit functions | |
532 | * that are called when network namespaces are created and | |
533 | * destroyed respectively. | |
534 | * | |
535 | * When registered all network namespace init functions are | |
536 | * called for every existing network namespace. Allowing kernel | |
537 | * modules to have a race free view of the set of network namespaces. | |
538 | * | |
539 | * When a new network namespace is created all of the init | |
540 | * methods are called in the order in which they were registered. | |
541 | * | |
542 | * When a network namespace is destroyed all of the exit methods | |
543 | * are called in the reverse of the order with which they were | |
544 | * registered. | |
545 | */ | |
546 | int register_pernet_device(struct pernet_operations *ops) | |
547 | { | |
548 | int error; | |
549 | mutex_lock(&net_mutex); | |
550 | error = register_pernet_operations(&pernet_list, ops); | |
551 | if (!error && (first_device == &pernet_list)) | |
552 | first_device = &ops->list; | |
553 | mutex_unlock(&net_mutex); | |
554 | return error; | |
555 | } | |
556 | EXPORT_SYMBOL_GPL(register_pernet_device); | |
557 | ||
558 | /** | |
559 | * unregister_pernet_device - unregister a network namespace netdevice | |
560 | * @ops: pernet operations structure to manipulate | |
561 | * | |
562 | * Remove the pernet operations structure from the list to be | |
563 | * used when network namespaces are created or destroyed. In | |
564 | * addition run the exit method for all existing network | |
565 | * namespaces. | |
566 | */ | |
567 | void unregister_pernet_device(struct pernet_operations *ops) | |
568 | { | |
569 | mutex_lock(&net_mutex); | |
570 | if (&ops->list == first_device) | |
571 | first_device = first_device->next; | |
572 | unregister_pernet_operations(ops); | |
573 | mutex_unlock(&net_mutex); | |
574 | } | |
575 | EXPORT_SYMBOL_GPL(unregister_pernet_device); |