]>
Commit | Line | Data |
---|---|---|
e005d193 JP |
1 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
2 | ||
5f256bec EB |
3 | #include <linux/workqueue.h> |
4 | #include <linux/rtnetlink.h> | |
5 | #include <linux/cache.h> | |
6 | #include <linux/slab.h> | |
7 | #include <linux/list.h> | |
8 | #include <linux/delay.h> | |
9dd776b6 | 9 | #include <linux/sched.h> |
c93cf61f | 10 | #include <linux/idr.h> |
11a28d37 | 11 | #include <linux/rculist.h> |
30ffee84 | 12 | #include <linux/nsproxy.h> |
0bb80f24 DH |
13 | #include <linux/fs.h> |
14 | #include <linux/proc_ns.h> | |
f0630529 | 15 | #include <linux/file.h> |
bc3b2d7f | 16 | #include <linux/export.h> |
038e7332 | 17 | #include <linux/user_namespace.h> |
0c7aecd4 | 18 | #include <linux/net_namespace.h> |
f719ff9b | 19 | #include <linux/sched/task.h> |
fbdeaed4 | 20 | #include <linux/uidgid.h> |
f719ff9b | 21 | |
0c7aecd4 ND |
22 | #include <net/sock.h> |
23 | #include <net/netlink.h> | |
5f256bec | 24 | #include <net/net_namespace.h> |
dec827d1 | 25 | #include <net/netns/generic.h> |
5f256bec EB |
26 | |
27 | /* | |
28 | * Our network namespace constructor/destructor lists | |
29 | */ | |
30 | ||
31 | static LIST_HEAD(pernet_list); | |
32 | static struct list_head *first_device = &pernet_list; | |
5f256bec | 33 | |
5f256bec | 34 | LIST_HEAD(net_namespace_list); |
b76a461f | 35 | EXPORT_SYMBOL_GPL(net_namespace_list); |
5f256bec | 36 | |
f0b07bb1 KT |
37 | /* Protects net_namespace_list. Nests iside rtnl_lock() */ |
38 | DECLARE_RWSEM(net_rwsem); | |
39 | EXPORT_SYMBOL_GPL(net_rwsem); | |
40 | ||
734b6541 | 41 | struct net init_net = { |
273c28bc | 42 | .count = REFCOUNT_INIT(1), |
b5082df8 | 43 | .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), |
734b6541 | 44 | }; |
ff4b9502 | 45 | EXPORT_SYMBOL(init_net); |
5f256bec | 46 | |
f8c46cb3 | 47 | static bool init_net_initialized; |
1a57feb8 | 48 | /* |
4420bf21 | 49 | * pernet_ops_rwsem: protects: pernet_list, net_generic_ids, |
1a57feb8 | 50 | * init_net_initialized and first_device pointer. |
8518e9bb KT |
51 | * This is internal net namespace object. Please, don't use it |
52 | * outside. | |
1a57feb8 | 53 | */ |
4420bf21 | 54 | DECLARE_RWSEM(pernet_ops_rwsem); |
554873e5 | 55 | EXPORT_SYMBOL_GPL(pernet_ops_rwsem); |
f8c46cb3 | 56 | |
6af2d5ff AD |
57 | #define MIN_PERNET_OPS_ID \ |
58 | ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *)) | |
59 | ||
dec827d1 PE |
60 | #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ |
61 | ||
073862ba ED |
62 | static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; |
63 | ||
64 | static struct net_generic *net_alloc_generic(void) | |
65 | { | |
66 | struct net_generic *ng; | |
6af2d5ff | 67 | unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); |
073862ba ED |
68 | |
69 | ng = kzalloc(generic_size, GFP_KERNEL); | |
70 | if (ng) | |
9bfc7b99 | 71 | ng->s.len = max_gen_ptrs; |
073862ba ED |
72 | |
73 | return ng; | |
74 | } | |
75 | ||
c7d03a00 | 76 | static int net_assign_generic(struct net *net, unsigned int id, void *data) |
05fceb4a JP |
77 | { |
78 | struct net_generic *ng, *old_ng; | |
79 | ||
6af2d5ff | 80 | BUG_ON(id < MIN_PERNET_OPS_ID); |
05fceb4a | 81 | |
1c87733d | 82 | old_ng = rcu_dereference_protected(net->gen, |
4420bf21 | 83 | lockdep_is_held(&pernet_ops_rwsem)); |
6af2d5ff AD |
84 | if (old_ng->s.len > id) { |
85 | old_ng->ptr[id] = data; | |
1a9a0592 AD |
86 | return 0; |
87 | } | |
05fceb4a | 88 | |
073862ba | 89 | ng = net_alloc_generic(); |
05fceb4a JP |
90 | if (ng == NULL) |
91 | return -ENOMEM; | |
92 | ||
93 | /* | |
94 | * Some synchronisation notes: | |
95 | * | |
96 | * The net_generic explores the net->gen array inside rcu | |
97 | * read section. Besides once set the net->gen->ptr[x] | |
98 | * pointer never changes (see rules in netns/generic.h). | |
99 | * | |
100 | * That said, we simply duplicate this array and schedule | |
101 | * the old copy for kfree after a grace period. | |
102 | */ | |
103 | ||
6af2d5ff AD |
104 | memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID], |
105 | (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *)); | |
106 | ng->ptr[id] = data; | |
05fceb4a JP |
107 | |
108 | rcu_assign_pointer(net->gen, ng); | |
9bfc7b99 | 109 | kfree_rcu(old_ng, s.rcu); |
05fceb4a JP |
110 | return 0; |
111 | } | |
112 | ||
f875bae0 EB |
113 | static int ops_init(const struct pernet_operations *ops, struct net *net) |
114 | { | |
b922934d JA |
115 | int err = -ENOMEM; |
116 | void *data = NULL; | |
117 | ||
f875bae0 | 118 | if (ops->id && ops->size) { |
b922934d | 119 | data = kzalloc(ops->size, GFP_KERNEL); |
f875bae0 | 120 | if (!data) |
b922934d | 121 | goto out; |
f875bae0 EB |
122 | |
123 | err = net_assign_generic(net, *ops->id, data); | |
b922934d JA |
124 | if (err) |
125 | goto cleanup; | |
f875bae0 | 126 | } |
b922934d | 127 | err = 0; |
f875bae0 | 128 | if (ops->init) |
b922934d JA |
129 | err = ops->init(net); |
130 | if (!err) | |
131 | return 0; | |
132 | ||
133 | cleanup: | |
134 | kfree(data); | |
135 | ||
136 | out: | |
137 | return err; | |
f875bae0 EB |
138 | } |
139 | ||
140 | static void ops_free(const struct pernet_operations *ops, struct net *net) | |
141 | { | |
142 | if (ops->id && ops->size) { | |
c7d03a00 | 143 | kfree(net_generic(net, *ops->id)); |
f875bae0 EB |
144 | } |
145 | } | |
146 | ||
72ad937a EB |
147 | static void ops_exit_list(const struct pernet_operations *ops, |
148 | struct list_head *net_exit_list) | |
149 | { | |
150 | struct net *net; | |
151 | if (ops->exit) { | |
152 | list_for_each_entry(net, net_exit_list, exit_list) | |
153 | ops->exit(net); | |
154 | } | |
72ad937a EB |
155 | if (ops->exit_batch) |
156 | ops->exit_batch(net_exit_list); | |
157 | } | |
158 | ||
159 | static void ops_free_list(const struct pernet_operations *ops, | |
160 | struct list_head *net_exit_list) | |
161 | { | |
162 | struct net *net; | |
163 | if (ops->size && ops->id) { | |
164 | list_for_each_entry(net, net_exit_list, exit_list) | |
165 | ops_free(ops, net); | |
166 | } | |
167 | } | |
168 | ||
95f38411 | 169 | /* should be called with nsid_lock held */ |
0c7aecd4 ND |
170 | static int alloc_netid(struct net *net, struct net *peer, int reqid) |
171 | { | |
3138dbf8 | 172 | int min = 0, max = 0; |
0c7aecd4 | 173 | |
0c7aecd4 ND |
174 | if (reqid >= 0) { |
175 | min = reqid; | |
176 | max = reqid + 1; | |
177 | } | |
178 | ||
95f38411 | 179 | return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC); |
0c7aecd4 ND |
180 | } |
181 | ||
182 | /* This function is used by idr_for_each(). If net is equal to peer, the | |
183 | * function returns the id so that idr_for_each() stops. Because we cannot | |
184 | * returns the id 0 (idr_for_each() will not stop), we return the magic value | |
185 | * NET_ID_ZERO (-1) for it. | |
186 | */ | |
187 | #define NET_ID_ZERO -1 | |
188 | static int net_eq_idr(int id, void *net, void *peer) | |
189 | { | |
190 | if (net_eq(net, peer)) | |
191 | return id ? : NET_ID_ZERO; | |
192 | return 0; | |
193 | } | |
194 | ||
95f38411 ND |
195 | /* Should be called with nsid_lock held. If a new id is assigned, the bool alloc |
196 | * is set to true, thus the caller knows that the new id must be notified via | |
197 | * rtnl. | |
198 | */ | |
3138dbf8 | 199 | static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc) |
0c7aecd4 ND |
200 | { |
201 | int id = idr_for_each(&net->netns_ids, net_eq_idr, peer); | |
3138dbf8 | 202 | bool alloc_it = *alloc; |
0c7aecd4 | 203 | |
3138dbf8 ND |
204 | *alloc = false; |
205 | ||
0c7aecd4 ND |
206 | /* Magic value for id 0. */ |
207 | if (id == NET_ID_ZERO) | |
208 | return 0; | |
209 | if (id > 0) | |
210 | return id; | |
211 | ||
3138dbf8 | 212 | if (alloc_it) { |
109582af | 213 | id = alloc_netid(net, peer, -1); |
3138dbf8 | 214 | *alloc = true; |
109582af ND |
215 | return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED; |
216 | } | |
0c7aecd4 | 217 | |
109582af | 218 | return NETNSA_NSID_NOT_ASSIGNED; |
0c7aecd4 ND |
219 | } |
220 | ||
95f38411 | 221 | /* should be called with nsid_lock held */ |
3138dbf8 ND |
222 | static int __peernet2id(struct net *net, struct net *peer) |
223 | { | |
224 | bool no = false; | |
225 | ||
226 | return __peernet2id_alloc(net, peer, &no); | |
227 | } | |
228 | ||
229 | static void rtnl_net_notifyid(struct net *net, int cmd, int id); | |
0c7aecd4 ND |
230 | /* This function returns the id of a peer netns. If no id is assigned, one will |
231 | * be allocated and returned. | |
232 | */ | |
7a0877d4 | 233 | int peernet2id_alloc(struct net *net, struct net *peer) |
0c7aecd4 | 234 | { |
0c06bea9 | 235 | bool alloc = false, alive = false; |
3138dbf8 | 236 | int id; |
0c7aecd4 | 237 | |
273c28bc | 238 | if (refcount_read(&net->count) == 0) |
cfc44a4d | 239 | return NETNSA_NSID_NOT_ASSIGNED; |
fba143c6 | 240 | spin_lock_bh(&net->nsid_lock); |
0c06bea9 KT |
241 | /* |
242 | * When peer is obtained from RCU lists, we may race with | |
243 | * its cleanup. Check whether it's alive, and this guarantees | |
244 | * we never hash a peer back to net->netns_ids, after it has | |
245 | * just been idr_remove()'d from there in cleanup_net(). | |
246 | */ | |
247 | if (maybe_get_net(peer)) | |
248 | alive = alloc = true; | |
3138dbf8 | 249 | id = __peernet2id_alloc(net, peer, &alloc); |
fba143c6 | 250 | spin_unlock_bh(&net->nsid_lock); |
3138dbf8 ND |
251 | if (alloc && id >= 0) |
252 | rtnl_net_notifyid(net, RTM_NEWNSID, id); | |
0c06bea9 KT |
253 | if (alive) |
254 | put_net(peer); | |
3138dbf8 | 255 | return id; |
0c7aecd4 | 256 | } |
7cbebc8a | 257 | EXPORT_SYMBOL_GPL(peernet2id_alloc); |
0c7aecd4 | 258 | |
95f38411 | 259 | /* This function returns, if assigned, the id of a peer netns. */ |
59324cf3 | 260 | int peernet2id(struct net *net, struct net *peer) |
95f38411 | 261 | { |
95f38411 ND |
262 | int id; |
263 | ||
fba143c6 | 264 | spin_lock_bh(&net->nsid_lock); |
95f38411 | 265 | id = __peernet2id(net, peer); |
fba143c6 | 266 | spin_unlock_bh(&net->nsid_lock); |
95f38411 ND |
267 | return id; |
268 | } | |
38f507f1 | 269 | EXPORT_SYMBOL(peernet2id); |
95f38411 | 270 | |
59324cf3 ND |
271 | /* This function returns true is the peer netns has an id assigned into the |
272 | * current netns. | |
273 | */ | |
274 | bool peernet_has_id(struct net *net, struct net *peer) | |
275 | { | |
276 | return peernet2id(net, peer) >= 0; | |
277 | } | |
278 | ||
0c7aecd4 ND |
279 | struct net *get_net_ns_by_id(struct net *net, int id) |
280 | { | |
281 | struct net *peer; | |
282 | ||
283 | if (id < 0) | |
284 | return NULL; | |
285 | ||
286 | rcu_read_lock(); | |
287 | peer = idr_find(&net->netns_ids, id); | |
288 | if (peer) | |
21b59443 | 289 | peer = maybe_get_net(peer); |
0c7aecd4 ND |
290 | rcu_read_unlock(); |
291 | ||
292 | return peer; | |
293 | } | |
294 | ||
5f256bec EB |
295 | /* |
296 | * setup_net runs the initializers for the network namespace object. | |
297 | */ | |
038e7332 | 298 | static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) |
5f256bec | 299 | { |
4420bf21 | 300 | /* Must be called with pernet_ops_rwsem held */ |
f875bae0 | 301 | const struct pernet_operations *ops, *saved_ops; |
486a87f1 | 302 | int error = 0; |
72ad937a | 303 | LIST_HEAD(net_exit_list); |
5f256bec | 304 | |
273c28bc | 305 | refcount_set(&net->count, 1); |
c122e14d | 306 | refcount_set(&net->passive, 1); |
4e985ada | 307 | net->dev_base_seq = 1; |
038e7332 | 308 | net->user_ns = user_ns; |
0c7aecd4 | 309 | idr_init(&net->netns_ids); |
de133464 | 310 | spin_lock_init(&net->nsid_lock); |
d9ff3049 | 311 | mutex_init(&net->ipv4.ra_mutex); |
486a87f1 | 312 | |
768f3591 | 313 | list_for_each_entry(ops, &pernet_list, list) { |
f875bae0 EB |
314 | error = ops_init(ops, net); |
315 | if (error < 0) | |
316 | goto out_undo; | |
5f256bec | 317 | } |
f0b07bb1 | 318 | down_write(&net_rwsem); |
98f6c533 | 319 | list_add_tail_rcu(&net->list, &net_namespace_list); |
f0b07bb1 | 320 | up_write(&net_rwsem); |
5f256bec EB |
321 | out: |
322 | return error; | |
768f3591 | 323 | |
5f256bec EB |
324 | out_undo: |
325 | /* Walk through the list backwards calling the exit functions | |
326 | * for the pernet modules whose init functions did not fail. | |
327 | */ | |
72ad937a | 328 | list_add(&net->exit_list, &net_exit_list); |
f875bae0 | 329 | saved_ops = ops; |
72ad937a EB |
330 | list_for_each_entry_continue_reverse(ops, &pernet_list, list) |
331 | ops_exit_list(ops, &net_exit_list); | |
332 | ||
f875bae0 EB |
333 | ops = saved_ops; |
334 | list_for_each_entry_continue_reverse(ops, &pernet_list, list) | |
72ad937a | 335 | ops_free_list(ops, &net_exit_list); |
310928d9 DL |
336 | |
337 | rcu_barrier(); | |
5f256bec EB |
338 | goto out; |
339 | } | |
340 | ||
7c3f1875 RK |
341 | static int __net_init net_defaults_init_net(struct net *net) |
342 | { | |
343 | net->core.sysctl_somaxconn = SOMAXCONN; | |
344 | return 0; | |
345 | } | |
346 | ||
347 | static struct pernet_operations net_defaults_ops = { | |
348 | .init = net_defaults_init_net, | |
349 | }; | |
350 | ||
351 | static __init int net_defaults_init(void) | |
352 | { | |
353 | if (register_pernet_subsys(&net_defaults_ops)) | |
354 | panic("Cannot initialize net default settings"); | |
355 | ||
356 | return 0; | |
357 | } | |
358 | ||
359 | core_initcall(net_defaults_init); | |
6a1a3b9f | 360 | |
ebe47d47 | 361 | #ifdef CONFIG_NET_NS |
2ed6afde AB |
362 | static struct ucounts *inc_net_namespaces(struct user_namespace *ns) |
363 | { | |
364 | return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES); | |
365 | } | |
366 | ||
367 | static void dec_net_namespaces(struct ucounts *ucounts) | |
368 | { | |
369 | dec_ucount(ucounts, UCOUNT_NET_NAMESPACES); | |
370 | } | |
371 | ||
08009a76 | 372 | static struct kmem_cache *net_cachep __ro_after_init; |
ebe47d47 CN |
373 | static struct workqueue_struct *netns_wq; |
374 | ||
486a87f1 | 375 | static struct net *net_alloc(void) |
45a19b0a | 376 | { |
486a87f1 DL |
377 | struct net *net = NULL; |
378 | struct net_generic *ng; | |
379 | ||
380 | ng = net_alloc_generic(); | |
381 | if (!ng) | |
382 | goto out; | |
383 | ||
384 | net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); | |
45a19b0a | 385 | if (!net) |
486a87f1 | 386 | goto out_free; |
45a19b0a | 387 | |
486a87f1 DL |
388 | rcu_assign_pointer(net->gen, ng); |
389 | out: | |
390 | return net; | |
391 | ||
392 | out_free: | |
393 | kfree(ng); | |
394 | goto out; | |
395 | } | |
396 | ||
397 | static void net_free(struct net *net) | |
398 | { | |
416c51e1 | 399 | kfree(rcu_access_pointer(net->gen)); |
45a19b0a JFS |
400 | kmem_cache_free(net_cachep, net); |
401 | } | |
402 | ||
a685e089 AV |
403 | void net_drop_ns(void *p) |
404 | { | |
405 | struct net *ns = p; | |
c122e14d | 406 | if (ns && refcount_dec_and_test(&ns->passive)) |
a685e089 AV |
407 | net_free(ns); |
408 | } | |
409 | ||
038e7332 EB |
410 | struct net *copy_net_ns(unsigned long flags, |
411 | struct user_namespace *user_ns, struct net *old_net) | |
9dd776b6 | 412 | { |
70328660 | 413 | struct ucounts *ucounts; |
088eb2d9 AD |
414 | struct net *net; |
415 | int rv; | |
9dd776b6 | 416 | |
911cb193 RL |
417 | if (!(flags & CLONE_NEWNET)) |
418 | return get_net(old_net); | |
419 | ||
70328660 EB |
420 | ucounts = inc_net_namespaces(user_ns); |
421 | if (!ucounts) | |
df75e774 | 422 | return ERR_PTR(-ENOSPC); |
70328660 | 423 | |
088eb2d9 | 424 | net = net_alloc(); |
70328660 | 425 | if (!net) { |
5ba049a5 KT |
426 | rv = -ENOMEM; |
427 | goto dec_ucounts; | |
70328660 | 428 | } |
5ba049a5 KT |
429 | refcount_set(&net->passive, 1); |
430 | net->ucounts = ucounts; | |
038e7332 | 431 | get_user_ns(user_ns); |
094374e5 | 432 | |
4420bf21 | 433 | rv = down_read_killable(&pernet_ops_rwsem); |
5ba049a5 KT |
434 | if (rv < 0) |
435 | goto put_userns; | |
19efbd93 | 436 | |
038e7332 | 437 | rv = setup_net(net, user_ns); |
19efbd93 | 438 | |
4420bf21 | 439 | up_read(&pernet_ops_rwsem); |
19efbd93 | 440 | |
088eb2d9 | 441 | if (rv < 0) { |
5ba049a5 | 442 | put_userns: |
038e7332 | 443 | put_user_ns(user_ns); |
a685e089 | 444 | net_drop_ns(net); |
5ba049a5 KT |
445 | dec_ucounts: |
446 | dec_net_namespaces(ucounts); | |
088eb2d9 AD |
447 | return ERR_PTR(rv); |
448 | } | |
449 | return net; | |
450 | } | |
486a87f1 | 451 | |
fbdeaed4 TH |
452 | /** |
453 | * net_ns_get_ownership - get sysfs ownership data for @net | |
454 | * @net: network namespace in question (can be NULL) | |
455 | * @uid: kernel user ID for sysfs objects | |
456 | * @gid: kernel group ID for sysfs objects | |
457 | * | |
458 | * Returns the uid/gid pair of root in the user namespace associated with the | |
459 | * given network namespace. | |
460 | */ | |
461 | void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid) | |
462 | { | |
463 | if (net) { | |
464 | kuid_t ns_root_uid = make_kuid(net->user_ns, 0); | |
465 | kgid_t ns_root_gid = make_kgid(net->user_ns, 0); | |
466 | ||
467 | if (uid_valid(ns_root_uid)) | |
468 | *uid = ns_root_uid; | |
469 | ||
470 | if (gid_valid(ns_root_gid)) | |
471 | *gid = ns_root_gid; | |
472 | } else { | |
473 | *uid = GLOBAL_ROOT_UID; | |
474 | *gid = GLOBAL_ROOT_GID; | |
475 | } | |
476 | } | |
477 | EXPORT_SYMBOL_GPL(net_ns_get_ownership); | |
478 | ||
fb07a820 KT |
479 | static void unhash_nsid(struct net *net, struct net *last) |
480 | { | |
481 | struct net *tmp; | |
482 | /* This function is only called from cleanup_net() work, | |
483 | * and this work is the only process, that may delete | |
484 | * a net from net_namespace_list. So, when the below | |
485 | * is executing, the list may only grow. Thus, we do not | |
f0b07bb1 | 486 | * use for_each_net_rcu() or net_rwsem. |
fb07a820 KT |
487 | */ |
488 | for_each_net(tmp) { | |
489 | int id; | |
490 | ||
491 | spin_lock_bh(&tmp->nsid_lock); | |
492 | id = __peernet2id(tmp, net); | |
493 | if (id >= 0) | |
494 | idr_remove(&tmp->netns_ids, id); | |
495 | spin_unlock_bh(&tmp->nsid_lock); | |
496 | if (id >= 0) | |
497 | rtnl_net_notifyid(tmp, RTM_DELNSID, id); | |
498 | if (tmp == last) | |
499 | break; | |
500 | } | |
501 | spin_lock_bh(&net->nsid_lock); | |
502 | idr_destroy(&net->netns_ids); | |
503 | spin_unlock_bh(&net->nsid_lock); | |
504 | } | |
505 | ||
65b7b5b9 | 506 | static LLIST_HEAD(cleanup_list); |
2b035b39 | 507 | |
6a1a3b9f PE |
508 | static void cleanup_net(struct work_struct *work) |
509 | { | |
f875bae0 | 510 | const struct pernet_operations *ops; |
fb07a820 | 511 | struct net *net, *tmp, *last; |
65b7b5b9 | 512 | struct llist_node *net_kill_list; |
72ad937a | 513 | LIST_HEAD(net_exit_list); |
6a1a3b9f | 514 | |
2b035b39 | 515 | /* Atomically snapshot the list of namespaces to cleanup */ |
65b7b5b9 | 516 | net_kill_list = llist_del_all(&cleanup_list); |
6a1a3b9f | 517 | |
4420bf21 | 518 | down_read(&pernet_ops_rwsem); |
6a1a3b9f PE |
519 | |
520 | /* Don't let anyone else find us. */ | |
f0b07bb1 | 521 | down_write(&net_rwsem); |
65b7b5b9 | 522 | llist_for_each_entry(net, net_kill_list, cleanup_list) |
2b035b39 | 523 | list_del_rcu(&net->list); |
fb07a820 KT |
524 | /* Cache last net. After we unlock rtnl, no one new net |
525 | * added to net_namespace_list can assign nsid pointer | |
526 | * to a net from net_kill_list (see peernet2id_alloc()). | |
527 | * So, we skip them in unhash_nsid(). | |
528 | * | |
529 | * Note, that unhash_nsid() does not delete nsid links | |
530 | * between net_kill_list's nets, as they've already | |
531 | * deleted from net_namespace_list. But, this would be | |
532 | * useless anyway, as netns_ids are destroyed there. | |
533 | */ | |
534 | last = list_last_entry(&net_namespace_list, struct net, list); | |
f0b07bb1 | 535 | up_write(&net_rwsem); |
6d458f5b | 536 | |
65b7b5b9 | 537 | llist_for_each_entry(net, net_kill_list, cleanup_list) { |
fb07a820 KT |
538 | unhash_nsid(net, last); |
539 | list_add_tail(&net->exit_list, &net_exit_list); | |
72ad937a | 540 | } |
6a1a3b9f | 541 | |
11a28d37 JB |
542 | /* |
543 | * Another CPU might be rcu-iterating the list, wait for it. | |
544 | * This needs to be before calling the exit() notifiers, so | |
545 | * the rcu_barrier() below isn't sufficient alone. | |
546 | */ | |
547 | synchronize_rcu(); | |
548 | ||
6a1a3b9f | 549 | /* Run all of the network namespace exit methods */ |
72ad937a EB |
550 | list_for_each_entry_reverse(ops, &pernet_list, list) |
551 | ops_exit_list(ops, &net_exit_list); | |
552 | ||
f875bae0 | 553 | /* Free the net generic variables */ |
72ad937a EB |
554 | list_for_each_entry_reverse(ops, &pernet_list, list) |
555 | ops_free_list(ops, &net_exit_list); | |
6a1a3b9f | 556 | |
4420bf21 | 557 | up_read(&pernet_ops_rwsem); |
6a1a3b9f PE |
558 | |
559 | /* Ensure there are no outstanding rcu callbacks using this | |
560 | * network namespace. | |
561 | */ | |
562 | rcu_barrier(); | |
563 | ||
564 | /* Finally it is safe to free my network namespace structure */ | |
72ad937a EB |
565 | list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { |
566 | list_del_init(&net->exit_list); | |
70328660 | 567 | dec_net_namespaces(net->ucounts); |
038e7332 | 568 | put_user_ns(net->user_ns); |
a685e089 | 569 | net_drop_ns(net); |
2b035b39 | 570 | } |
6a1a3b9f | 571 | } |
7866cc57 FW |
572 | |
573 | /** | |
574 | * net_ns_barrier - wait until concurrent net_cleanup_work is done | |
575 | * | |
576 | * cleanup_net runs from work queue and will first remove namespaces | |
577 | * from the global list, then run net exit functions. | |
578 | * | |
579 | * Call this in module exit path to make sure that all netns | |
580 | * ->exit ops have been invoked before the function is removed. | |
581 | */ | |
582 | void net_ns_barrier(void) | |
583 | { | |
4420bf21 KT |
584 | down_write(&pernet_ops_rwsem); |
585 | up_write(&pernet_ops_rwsem); | |
7866cc57 FW |
586 | } |
587 | EXPORT_SYMBOL(net_ns_barrier); | |
588 | ||
2b035b39 | 589 | static DECLARE_WORK(net_cleanup_work, cleanup_net); |
6a1a3b9f PE |
590 | |
591 | void __put_net(struct net *net) | |
592 | { | |
593 | /* Cleanup the network namespace in process context */ | |
8349efd9 KT |
594 | if (llist_add(&net->cleanup_list, &cleanup_list)) |
595 | queue_work(netns_wq, &net_cleanup_work); | |
6a1a3b9f PE |
596 | } |
597 | EXPORT_SYMBOL_GPL(__put_net); | |
598 | ||
956c9207 SR |
599 | struct net *get_net_ns_by_fd(int fd) |
600 | { | |
956c9207 | 601 | struct file *file; |
33c42940 | 602 | struct ns_common *ns; |
956c9207 SR |
603 | struct net *net; |
604 | ||
956c9207 | 605 | file = proc_ns_fget(fd); |
c316e6a3 AV |
606 | if (IS_ERR(file)) |
607 | return ERR_CAST(file); | |
956c9207 | 608 | |
f77c8014 | 609 | ns = get_proc_ns(file_inode(file)); |
33c42940 AV |
610 | if (ns->ops == &netns_operations) |
611 | net = get_net(container_of(ns, struct net, ns)); | |
c316e6a3 AV |
612 | else |
613 | net = ERR_PTR(-EINVAL); | |
956c9207 | 614 | |
c316e6a3 | 615 | fput(file); |
956c9207 SR |
616 | return net; |
617 | } | |
618 | ||
6a1a3b9f | 619 | #else |
956c9207 SR |
620 | struct net *get_net_ns_by_fd(int fd) |
621 | { | |
622 | return ERR_PTR(-EINVAL); | |
623 | } | |
6a1a3b9f | 624 | #endif |
4b681c82 | 625 | EXPORT_SYMBOL_GPL(get_net_ns_by_fd); |
6a1a3b9f | 626 | |
30ffee84 JB |
627 | struct net *get_net_ns_by_pid(pid_t pid) |
628 | { | |
629 | struct task_struct *tsk; | |
630 | struct net *net; | |
631 | ||
632 | /* Lookup the network namespace */ | |
633 | net = ERR_PTR(-ESRCH); | |
634 | rcu_read_lock(); | |
635 | tsk = find_task_by_vpid(pid); | |
636 | if (tsk) { | |
637 | struct nsproxy *nsproxy; | |
728dba3a EB |
638 | task_lock(tsk); |
639 | nsproxy = tsk->nsproxy; | |
30ffee84 JB |
640 | if (nsproxy) |
641 | net = get_net(nsproxy->net_ns); | |
728dba3a | 642 | task_unlock(tsk); |
30ffee84 JB |
643 | } |
644 | rcu_read_unlock(); | |
645 | return net; | |
646 | } | |
647 | EXPORT_SYMBOL_GPL(get_net_ns_by_pid); | |
648 | ||
98f842e6 EB |
649 | static __net_init int net_ns_net_init(struct net *net) |
650 | { | |
33c42940 AV |
651 | #ifdef CONFIG_NET_NS |
652 | net->ns.ops = &netns_operations; | |
653 | #endif | |
6344c433 | 654 | return ns_alloc_inum(&net->ns); |
98f842e6 EB |
655 | } |
656 | ||
657 | static __net_exit void net_ns_net_exit(struct net *net) | |
658 | { | |
6344c433 | 659 | ns_free_inum(&net->ns); |
98f842e6 EB |
660 | } |
661 | ||
662 | static struct pernet_operations __net_initdata net_ns_ops = { | |
663 | .init = net_ns_net_init, | |
664 | .exit = net_ns_net_exit, | |
665 | }; | |
666 | ||
3ee5256d | 667 | static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { |
0c7aecd4 ND |
668 | [NETNSA_NONE] = { .type = NLA_UNSPEC }, |
669 | [NETNSA_NSID] = { .type = NLA_S32 }, | |
670 | [NETNSA_PID] = { .type = NLA_U32 }, | |
671 | [NETNSA_FD] = { .type = NLA_U32 }, | |
cff478b9 | 672 | [NETNSA_TARGET_NSID] = { .type = NLA_S32 }, |
0c7aecd4 ND |
673 | }; |
674 | ||
c21ef3e3 DA |
675 | static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, |
676 | struct netlink_ext_ack *extack) | |
0c7aecd4 ND |
677 | { |
678 | struct net *net = sock_net(skb->sk); | |
679 | struct nlattr *tb[NETNSA_MAX + 1]; | |
4a7f7bc6 | 680 | struct nlattr *nla; |
0c7aecd4 ND |
681 | struct net *peer; |
682 | int nsid, err; | |
683 | ||
684 | err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, | |
c21ef3e3 | 685 | rtnl_net_policy, extack); |
0c7aecd4 ND |
686 | if (err < 0) |
687 | return err; | |
4a7f7bc6 ND |
688 | if (!tb[NETNSA_NSID]) { |
689 | NL_SET_ERR_MSG(extack, "nsid is missing"); | |
0c7aecd4 | 690 | return -EINVAL; |
4a7f7bc6 | 691 | } |
0c7aecd4 ND |
692 | nsid = nla_get_s32(tb[NETNSA_NSID]); |
693 | ||
4a7f7bc6 | 694 | if (tb[NETNSA_PID]) { |
0c7aecd4 | 695 | peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); |
4a7f7bc6 ND |
696 | nla = tb[NETNSA_PID]; |
697 | } else if (tb[NETNSA_FD]) { | |
0c7aecd4 | 698 | peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); |
4a7f7bc6 ND |
699 | nla = tb[NETNSA_FD]; |
700 | } else { | |
701 | NL_SET_ERR_MSG(extack, "Peer netns reference is missing"); | |
0c7aecd4 | 702 | return -EINVAL; |
4a7f7bc6 ND |
703 | } |
704 | if (IS_ERR(peer)) { | |
705 | NL_SET_BAD_ATTR(extack, nla); | |
706 | NL_SET_ERR_MSG(extack, "Peer netns reference is invalid"); | |
0c7aecd4 | 707 | return PTR_ERR(peer); |
4a7f7bc6 | 708 | } |
0c7aecd4 | 709 | |
fba143c6 | 710 | spin_lock_bh(&net->nsid_lock); |
3138dbf8 | 711 | if (__peernet2id(net, peer) >= 0) { |
fba143c6 | 712 | spin_unlock_bh(&net->nsid_lock); |
0c7aecd4 | 713 | err = -EEXIST; |
4a7f7bc6 ND |
714 | NL_SET_BAD_ATTR(extack, nla); |
715 | NL_SET_ERR_MSG(extack, | |
716 | "Peer netns already has a nsid assigned"); | |
0c7aecd4 ND |
717 | goto out; |
718 | } | |
719 | ||
720 | err = alloc_netid(net, peer, nsid); | |
fba143c6 | 721 | spin_unlock_bh(&net->nsid_lock); |
3138dbf8 ND |
722 | if (err >= 0) { |
723 | rtnl_net_notifyid(net, RTM_NEWNSID, err); | |
0c7aecd4 | 724 | err = 0; |
4a7f7bc6 | 725 | } else if (err == -ENOSPC && nsid >= 0) { |
10d486a3 | 726 | err = -EEXIST; |
4a7f7bc6 ND |
727 | NL_SET_BAD_ATTR(extack, tb[NETNSA_NSID]); |
728 | NL_SET_ERR_MSG(extack, "The specified nsid is already used"); | |
3138dbf8 | 729 | } |
0c7aecd4 ND |
730 | out: |
731 | put_net(peer); | |
732 | return err; | |
733 | } | |
734 | ||
735 | static int rtnl_net_get_size(void) | |
736 | { | |
737 | return NLMSG_ALIGN(sizeof(struct rtgenmsg)) | |
738 | + nla_total_size(sizeof(s32)) /* NETNSA_NSID */ | |
288f06a0 | 739 | + nla_total_size(sizeof(s32)) /* NETNSA_CURRENT_NSID */ |
0c7aecd4 ND |
740 | ; |
741 | } | |
742 | ||
a0732ad1 ND |
743 | struct net_fill_args { |
744 | u32 portid; | |
745 | u32 seq; | |
746 | int flags; | |
747 | int cmd; | |
748 | int nsid; | |
288f06a0 ND |
749 | bool add_ref; |
750 | int ref_nsid; | |
a0732ad1 ND |
751 | }; |
752 | ||
753 | static int rtnl_net_fill(struct sk_buff *skb, struct net_fill_args *args) | |
0c7aecd4 ND |
754 | { |
755 | struct nlmsghdr *nlh; | |
756 | struct rtgenmsg *rth; | |
0c7aecd4 | 757 | |
a0732ad1 ND |
758 | nlh = nlmsg_put(skb, args->portid, args->seq, args->cmd, sizeof(*rth), |
759 | args->flags); | |
0c7aecd4 ND |
760 | if (!nlh) |
761 | return -EMSGSIZE; | |
762 | ||
763 | rth = nlmsg_data(nlh); | |
764 | rth->rtgen_family = AF_UNSPEC; | |
765 | ||
a0732ad1 | 766 | if (nla_put_s32(skb, NETNSA_NSID, args->nsid)) |
0c7aecd4 ND |
767 | goto nla_put_failure; |
768 | ||
288f06a0 ND |
769 | if (args->add_ref && |
770 | nla_put_s32(skb, NETNSA_CURRENT_NSID, args->ref_nsid)) | |
771 | goto nla_put_failure; | |
772 | ||
0c7aecd4 ND |
773 | nlmsg_end(skb, nlh); |
774 | return 0; | |
775 | ||
776 | nla_put_failure: | |
777 | nlmsg_cancel(skb, nlh); | |
778 | return -EMSGSIZE; | |
779 | } | |
780 | ||
c21ef3e3 DA |
781 | static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh, |
782 | struct netlink_ext_ack *extack) | |
0c7aecd4 ND |
783 | { |
784 | struct net *net = sock_net(skb->sk); | |
785 | struct nlattr *tb[NETNSA_MAX + 1]; | |
a0732ad1 ND |
786 | struct net_fill_args fillargs = { |
787 | .portid = NETLINK_CB(skb).portid, | |
788 | .seq = nlh->nlmsg_seq, | |
789 | .cmd = RTM_NEWNSID, | |
790 | }; | |
cff478b9 | 791 | struct net *peer, *target = net; |
4a7f7bc6 | 792 | struct nlattr *nla; |
0c7aecd4 | 793 | struct sk_buff *msg; |
a0732ad1 | 794 | int err; |
0c7aecd4 ND |
795 | |
796 | err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, | |
c21ef3e3 | 797 | rtnl_net_policy, extack); |
0c7aecd4 ND |
798 | if (err < 0) |
799 | return err; | |
4a7f7bc6 | 800 | if (tb[NETNSA_PID]) { |
0c7aecd4 | 801 | peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); |
4a7f7bc6 ND |
802 | nla = tb[NETNSA_PID]; |
803 | } else if (tb[NETNSA_FD]) { | |
0c7aecd4 | 804 | peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); |
4a7f7bc6 | 805 | nla = tb[NETNSA_FD]; |
3a4f68bf ND |
806 | } else if (tb[NETNSA_NSID]) { |
807 | peer = get_net_ns_by_id(net, nla_get_u32(tb[NETNSA_NSID])); | |
808 | if (!peer) | |
809 | peer = ERR_PTR(-ENOENT); | |
810 | nla = tb[NETNSA_NSID]; | |
4a7f7bc6 ND |
811 | } else { |
812 | NL_SET_ERR_MSG(extack, "Peer netns reference is missing"); | |
0c7aecd4 | 813 | return -EINVAL; |
4a7f7bc6 | 814 | } |
0c7aecd4 | 815 | |
4a7f7bc6 ND |
816 | if (IS_ERR(peer)) { |
817 | NL_SET_BAD_ATTR(extack, nla); | |
818 | NL_SET_ERR_MSG(extack, "Peer netns reference is invalid"); | |
0c7aecd4 | 819 | return PTR_ERR(peer); |
4a7f7bc6 | 820 | } |
0c7aecd4 | 821 | |
cff478b9 ND |
822 | if (tb[NETNSA_TARGET_NSID]) { |
823 | int id = nla_get_s32(tb[NETNSA_TARGET_NSID]); | |
824 | ||
825 | target = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, id); | |
826 | if (IS_ERR(target)) { | |
827 | NL_SET_BAD_ATTR(extack, tb[NETNSA_TARGET_NSID]); | |
828 | NL_SET_ERR_MSG(extack, | |
829 | "Target netns reference is invalid"); | |
830 | err = PTR_ERR(target); | |
831 | goto out; | |
832 | } | |
288f06a0 ND |
833 | fillargs.add_ref = true; |
834 | fillargs.ref_nsid = peernet2id(net, peer); | |
cff478b9 ND |
835 | } |
836 | ||
0c7aecd4 ND |
837 | msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); |
838 | if (!msg) { | |
839 | err = -ENOMEM; | |
840 | goto out; | |
841 | } | |
842 | ||
cff478b9 | 843 | fillargs.nsid = peernet2id(target, peer); |
a0732ad1 | 844 | err = rtnl_net_fill(msg, &fillargs); |
0c7aecd4 ND |
845 | if (err < 0) |
846 | goto err_out; | |
847 | ||
848 | err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid); | |
849 | goto out; | |
850 | ||
851 | err_out: | |
852 | nlmsg_free(msg); | |
853 | out: | |
288f06a0 | 854 | if (fillargs.add_ref) |
cff478b9 | 855 | put_net(target); |
0c7aecd4 ND |
856 | put_net(peer); |
857 | return err; | |
858 | } | |
859 | ||
a143c40c | 860 | struct rtnl_net_dump_cb { |
cff478b9 | 861 | struct net *tgt_net; |
288f06a0 | 862 | struct net *ref_net; |
a143c40c | 863 | struct sk_buff *skb; |
a0732ad1 | 864 | struct net_fill_args fillargs; |
a143c40c ND |
865 | int idx; |
866 | int s_idx; | |
867 | }; | |
868 | ||
869 | static int rtnl_net_dumpid_one(int id, void *peer, void *data) | |
870 | { | |
871 | struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data; | |
872 | int ret; | |
873 | ||
874 | if (net_cb->idx < net_cb->s_idx) | |
875 | goto cont; | |
876 | ||
a0732ad1 | 877 | net_cb->fillargs.nsid = id; |
288f06a0 ND |
878 | if (net_cb->fillargs.add_ref) |
879 | net_cb->fillargs.ref_nsid = __peernet2id(net_cb->ref_net, peer); | |
a0732ad1 | 880 | ret = rtnl_net_fill(net_cb->skb, &net_cb->fillargs); |
a143c40c ND |
881 | if (ret < 0) |
882 | return ret; | |
883 | ||
884 | cont: | |
885 | net_cb->idx++; | |
886 | return 0; | |
887 | } | |
888 | ||
cff478b9 ND |
889 | static int rtnl_valid_dump_net_req(const struct nlmsghdr *nlh, struct sock *sk, |
890 | struct rtnl_net_dump_cb *net_cb, | |
891 | struct netlink_callback *cb) | |
892 | { | |
893 | struct netlink_ext_ack *extack = cb->extack; | |
894 | struct nlattr *tb[NETNSA_MAX + 1]; | |
895 | int err, i; | |
896 | ||
897 | err = nlmsg_parse_strict(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, | |
898 | rtnl_net_policy, extack); | |
899 | if (err < 0) | |
900 | return err; | |
901 | ||
902 | for (i = 0; i <= NETNSA_MAX; i++) { | |
903 | if (!tb[i]) | |
904 | continue; | |
905 | ||
906 | if (i == NETNSA_TARGET_NSID) { | |
907 | struct net *net; | |
908 | ||
909 | net = rtnl_get_net_ns_capable(sk, nla_get_s32(tb[i])); | |
910 | if (IS_ERR(net)) { | |
911 | NL_SET_BAD_ATTR(extack, tb[i]); | |
912 | NL_SET_ERR_MSG(extack, | |
913 | "Invalid target network namespace id"); | |
914 | return PTR_ERR(net); | |
915 | } | |
288f06a0 ND |
916 | net_cb->fillargs.add_ref = true; |
917 | net_cb->ref_net = net_cb->tgt_net; | |
cff478b9 | 918 | net_cb->tgt_net = net; |
cff478b9 ND |
919 | } else { |
920 | NL_SET_BAD_ATTR(extack, tb[i]); | |
921 | NL_SET_ERR_MSG(extack, | |
922 | "Unsupported attribute in dump request"); | |
923 | return -EINVAL; | |
924 | } | |
925 | } | |
926 | ||
927 | return 0; | |
928 | } | |
929 | ||
a143c40c ND |
930 | static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) |
931 | { | |
a143c40c | 932 | struct rtnl_net_dump_cb net_cb = { |
cff478b9 | 933 | .tgt_net = sock_net(skb->sk), |
a143c40c | 934 | .skb = skb, |
a0732ad1 ND |
935 | .fillargs = { |
936 | .portid = NETLINK_CB(cb->skb).portid, | |
937 | .seq = cb->nlh->nlmsg_seq, | |
938 | .flags = NLM_F_MULTI, | |
939 | .cmd = RTM_NEWNSID, | |
940 | }, | |
a143c40c ND |
941 | .idx = 0, |
942 | .s_idx = cb->args[0], | |
943 | }; | |
cff478b9 | 944 | int err = 0; |
a143c40c | 945 | |
cff478b9 ND |
946 | if (cb->strict_check) { |
947 | err = rtnl_valid_dump_net_req(cb->nlh, skb->sk, &net_cb, cb); | |
948 | if (err < 0) | |
949 | goto end; | |
f80f14c3 DA |
950 | } |
951 | ||
cff478b9 | 952 | spin_lock_bh(&net_cb.tgt_net->nsid_lock); |
288f06a0 ND |
953 | if (net_cb.fillargs.add_ref && |
954 | !net_eq(net_cb.ref_net, net_cb.tgt_net) && | |
955 | !spin_trylock_bh(&net_cb.ref_net->nsid_lock)) { | |
956 | spin_unlock_bh(&net_cb.tgt_net->nsid_lock); | |
957 | err = -EAGAIN; | |
958 | goto end; | |
959 | } | |
cff478b9 | 960 | idr_for_each(&net_cb.tgt_net->netns_ids, rtnl_net_dumpid_one, &net_cb); |
288f06a0 ND |
961 | if (net_cb.fillargs.add_ref && |
962 | !net_eq(net_cb.ref_net, net_cb.tgt_net)) | |
963 | spin_unlock_bh(&net_cb.ref_net->nsid_lock); | |
cff478b9 | 964 | spin_unlock_bh(&net_cb.tgt_net->nsid_lock); |
a143c40c ND |
965 | |
966 | cb->args[0] = net_cb.idx; | |
cff478b9 | 967 | end: |
288f06a0 | 968 | if (net_cb.fillargs.add_ref) |
cff478b9 ND |
969 | put_net(net_cb.tgt_net); |
970 | return err < 0 ? err : skb->len; | |
a143c40c ND |
971 | } |
972 | ||
cab3c8ec | 973 | static void rtnl_net_notifyid(struct net *net, int cmd, int id) |
9a963454 | 974 | { |
a0732ad1 ND |
975 | struct net_fill_args fillargs = { |
976 | .cmd = cmd, | |
977 | .nsid = id, | |
978 | }; | |
9a963454 ND |
979 | struct sk_buff *msg; |
980 | int err = -ENOMEM; | |
981 | ||
982 | msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); | |
983 | if (!msg) | |
984 | goto out; | |
985 | ||
a0732ad1 | 986 | err = rtnl_net_fill(msg, &fillargs); |
9a963454 ND |
987 | if (err < 0) |
988 | goto err_out; | |
989 | ||
990 | rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0); | |
991 | return; | |
992 | ||
993 | err_out: | |
994 | nlmsg_free(msg); | |
995 | out: | |
996 | rtnl_set_sk_err(net, RTNLGRP_NSID, err); | |
997 | } | |
998 | ||
5f256bec EB |
999 | static int __init net_ns_init(void) |
1000 | { | |
486a87f1 | 1001 | struct net_generic *ng; |
5f256bec | 1002 | |
d57a9212 | 1003 | #ifdef CONFIG_NET_NS |
5f256bec EB |
1004 | net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), |
1005 | SMP_CACHE_BYTES, | |
30855ffc | 1006 | SLAB_PANIC|SLAB_ACCOUNT, NULL); |
3ef1355d BT |
1007 | |
1008 | /* Create workqueue for cleanup */ | |
1009 | netns_wq = create_singlethread_workqueue("netns"); | |
1010 | if (!netns_wq) | |
1011 | panic("Could not create netns workq"); | |
d57a9212 | 1012 | #endif |
3ef1355d | 1013 | |
486a87f1 DL |
1014 | ng = net_alloc_generic(); |
1015 | if (!ng) | |
1016 | panic("Could not allocate generic netns"); | |
1017 | ||
1018 | rcu_assign_pointer(init_net.gen, ng); | |
1019 | ||
4420bf21 | 1020 | down_write(&pernet_ops_rwsem); |
038e7332 | 1021 | if (setup_net(&init_net, &init_user_ns)) |
ca0f3112 | 1022 | panic("Could not setup the initial network namespace"); |
5f256bec | 1023 | |
f8c46cb3 | 1024 | init_net_initialized = true; |
4420bf21 | 1025 | up_write(&pernet_ops_rwsem); |
5f256bec | 1026 | |
0eb987c8 AP |
1027 | if (register_pernet_subsys(&net_ns_ops)) |
1028 | panic("Could not register network namespace subsystems"); | |
98f842e6 | 1029 | |
165b9117 FW |
1030 | rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, |
1031 | RTNL_FLAG_DOIT_UNLOCKED); | |
a143c40c | 1032 | rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid, |
165b9117 | 1033 | RTNL_FLAG_DOIT_UNLOCKED); |
0c7aecd4 | 1034 | |
5f256bec EB |
1035 | return 0; |
1036 | } | |
1037 | ||
1038 | pure_initcall(net_ns_init); | |
1039 | ||
ed160e83 | 1040 | #ifdef CONFIG_NET_NS |
f875bae0 EB |
1041 | static int __register_pernet_operations(struct list_head *list, |
1042 | struct pernet_operations *ops) | |
5f256bec | 1043 | { |
72ad937a | 1044 | struct net *net; |
5f256bec | 1045 | int error; |
72ad937a | 1046 | LIST_HEAD(net_exit_list); |
5f256bec | 1047 | |
5f256bec | 1048 | list_add_tail(&ops->list, list); |
f875bae0 | 1049 | if (ops->init || (ops->id && ops->size)) { |
f0b07bb1 KT |
1050 | /* We held write locked pernet_ops_rwsem, and parallel |
1051 | * setup_net() and cleanup_net() are not possible. | |
1052 | */ | |
1dba323b | 1053 | for_each_net(net) { |
f875bae0 | 1054 | error = ops_init(ops, net); |
5f256bec EB |
1055 | if (error) |
1056 | goto out_undo; | |
72ad937a | 1057 | list_add_tail(&net->exit_list, &net_exit_list); |
5f256bec EB |
1058 | } |
1059 | } | |
1dba323b | 1060 | return 0; |
5f256bec EB |
1061 | |
1062 | out_undo: | |
1063 | /* If I have an error cleanup all namespaces I initialized */ | |
1064 | list_del(&ops->list); | |
72ad937a EB |
1065 | ops_exit_list(ops, &net_exit_list); |
1066 | ops_free_list(ops, &net_exit_list); | |
1dba323b | 1067 | return error; |
5f256bec EB |
1068 | } |
1069 | ||
f875bae0 | 1070 | static void __unregister_pernet_operations(struct pernet_operations *ops) |
5f256bec EB |
1071 | { |
1072 | struct net *net; | |
72ad937a | 1073 | LIST_HEAD(net_exit_list); |
5f256bec EB |
1074 | |
1075 | list_del(&ops->list); | |
f0b07bb1 | 1076 | /* See comment in __register_pernet_operations() */ |
72ad937a EB |
1077 | for_each_net(net) |
1078 | list_add_tail(&net->exit_list, &net_exit_list); | |
1079 | ops_exit_list(ops, &net_exit_list); | |
1080 | ops_free_list(ops, &net_exit_list); | |
5f256bec EB |
1081 | } |
1082 | ||
ed160e83 DL |
1083 | #else |
1084 | ||
f875bae0 EB |
1085 | static int __register_pernet_operations(struct list_head *list, |
1086 | struct pernet_operations *ops) | |
ed160e83 | 1087 | { |
f8c46cb3 DT |
1088 | if (!init_net_initialized) { |
1089 | list_add_tail(&ops->list, list); | |
1090 | return 0; | |
1091 | } | |
1092 | ||
b922934d | 1093 | return ops_init(ops, &init_net); |
ed160e83 DL |
1094 | } |
1095 | ||
f875bae0 | 1096 | static void __unregister_pernet_operations(struct pernet_operations *ops) |
ed160e83 | 1097 | { |
f8c46cb3 DT |
1098 | if (!init_net_initialized) { |
1099 | list_del(&ops->list); | |
1100 | } else { | |
1101 | LIST_HEAD(net_exit_list); | |
1102 | list_add(&init_net.exit_list, &net_exit_list); | |
1103 | ops_exit_list(ops, &net_exit_list); | |
1104 | ops_free_list(ops, &net_exit_list); | |
1105 | } | |
ed160e83 | 1106 | } |
f875bae0 EB |
1107 | |
1108 | #endif /* CONFIG_NET_NS */ | |
ed160e83 | 1109 | |
c93cf61f PE |
1110 | static DEFINE_IDA(net_generic_ids); |
1111 | ||
f875bae0 EB |
1112 | static int register_pernet_operations(struct list_head *list, |
1113 | struct pernet_operations *ops) | |
1114 | { | |
1115 | int error; | |
1116 | ||
1117 | if (ops->id) { | |
6e77cc47 MW |
1118 | error = ida_alloc_min(&net_generic_ids, MIN_PERNET_OPS_ID, |
1119 | GFP_KERNEL); | |
1120 | if (error < 0) | |
f875bae0 | 1121 | return error; |
6e77cc47 | 1122 | *ops->id = error; |
6af2d5ff | 1123 | max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1); |
f875bae0 EB |
1124 | } |
1125 | error = __register_pernet_operations(list, ops); | |
3a765eda EB |
1126 | if (error) { |
1127 | rcu_barrier(); | |
1128 | if (ops->id) | |
6e77cc47 | 1129 | ida_free(&net_generic_ids, *ops->id); |
3a765eda | 1130 | } |
f875bae0 EB |
1131 | |
1132 | return error; | |
1133 | } | |
1134 | ||
1135 | static void unregister_pernet_operations(struct pernet_operations *ops) | |
1136 | { | |
f875bae0 | 1137 | __unregister_pernet_operations(ops); |
3a765eda | 1138 | rcu_barrier(); |
f875bae0 | 1139 | if (ops->id) |
6e77cc47 | 1140 | ida_free(&net_generic_ids, *ops->id); |
f875bae0 EB |
1141 | } |
1142 | ||
5f256bec EB |
1143 | /** |
1144 | * register_pernet_subsys - register a network namespace subsystem | |
1145 | * @ops: pernet operations structure for the subsystem | |
1146 | * | |
1147 | * Register a subsystem which has init and exit functions | |
1148 | * that are called when network namespaces are created and | |
1149 | * destroyed respectively. | |
1150 | * | |
1151 | * When registered all network namespace init functions are | |
1152 | * called for every existing network namespace. Allowing kernel | |
1153 | * modules to have a race free view of the set of network namespaces. | |
1154 | * | |
1155 | * When a new network namespace is created all of the init | |
1156 | * methods are called in the order in which they were registered. | |
1157 | * | |
1158 | * When a network namespace is destroyed all of the exit methods | |
1159 | * are called in the reverse of the order with which they were | |
1160 | * registered. | |
1161 | */ | |
1162 | int register_pernet_subsys(struct pernet_operations *ops) | |
1163 | { | |
1164 | int error; | |
4420bf21 | 1165 | down_write(&pernet_ops_rwsem); |
5f256bec | 1166 | error = register_pernet_operations(first_device, ops); |
4420bf21 | 1167 | up_write(&pernet_ops_rwsem); |
5f256bec EB |
1168 | return error; |
1169 | } | |
1170 | EXPORT_SYMBOL_GPL(register_pernet_subsys); | |
1171 | ||
1172 | /** | |
1173 | * unregister_pernet_subsys - unregister a network namespace subsystem | |
1174 | * @ops: pernet operations structure to manipulate | |
1175 | * | |
1176 | * Remove the pernet operations structure from the list to be | |
53379e57 | 1177 | * used when network namespaces are created or destroyed. In |
5f256bec EB |
1178 | * addition run the exit method for all existing network |
1179 | * namespaces. | |
1180 | */ | |
b3c981d2 | 1181 | void unregister_pernet_subsys(struct pernet_operations *ops) |
5f256bec | 1182 | { |
4420bf21 | 1183 | down_write(&pernet_ops_rwsem); |
b3c981d2 | 1184 | unregister_pernet_operations(ops); |
4420bf21 | 1185 | up_write(&pernet_ops_rwsem); |
5f256bec EB |
1186 | } |
1187 | EXPORT_SYMBOL_GPL(unregister_pernet_subsys); | |
1188 | ||
1189 | /** | |
1190 | * register_pernet_device - register a network namespace device | |
1191 | * @ops: pernet operations structure for the subsystem | |
1192 | * | |
1193 | * Register a device which has init and exit functions | |
1194 | * that are called when network namespaces are created and | |
1195 | * destroyed respectively. | |
1196 | * | |
1197 | * When registered all network namespace init functions are | |
1198 | * called for every existing network namespace. Allowing kernel | |
1199 | * modules to have a race free view of the set of network namespaces. | |
1200 | * | |
1201 | * When a new network namespace is created all of the init | |
1202 | * methods are called in the order in which they were registered. | |
1203 | * | |
1204 | * When a network namespace is destroyed all of the exit methods | |
1205 | * are called in the reverse of the order with which they were | |
1206 | * registered. | |
1207 | */ | |
1208 | int register_pernet_device(struct pernet_operations *ops) | |
1209 | { | |
1210 | int error; | |
4420bf21 | 1211 | down_write(&pernet_ops_rwsem); |
5f256bec EB |
1212 | error = register_pernet_operations(&pernet_list, ops); |
1213 | if (!error && (first_device == &pernet_list)) | |
1214 | first_device = &ops->list; | |
4420bf21 | 1215 | up_write(&pernet_ops_rwsem); |
5f256bec EB |
1216 | return error; |
1217 | } | |
1218 | EXPORT_SYMBOL_GPL(register_pernet_device); | |
1219 | ||
1220 | /** | |
1221 | * unregister_pernet_device - unregister a network namespace netdevice | |
1222 | * @ops: pernet operations structure to manipulate | |
1223 | * | |
1224 | * Remove the pernet operations structure from the list to be | |
53379e57 | 1225 | * used when network namespaces are created or destroyed. In |
5f256bec EB |
1226 | * addition run the exit method for all existing network |
1227 | * namespaces. | |
1228 | */ | |
1229 | void unregister_pernet_device(struct pernet_operations *ops) | |
1230 | { | |
4420bf21 | 1231 | down_write(&pernet_ops_rwsem); |
5f256bec EB |
1232 | if (&ops->list == first_device) |
1233 | first_device = first_device->next; | |
1234 | unregister_pernet_operations(ops); | |
4420bf21 | 1235 | up_write(&pernet_ops_rwsem); |
5f256bec EB |
1236 | } |
1237 | EXPORT_SYMBOL_GPL(unregister_pernet_device); | |
13b6f576 EB |
1238 | |
1239 | #ifdef CONFIG_NET_NS | |
64964528 | 1240 | static struct ns_common *netns_get(struct task_struct *task) |
13b6f576 | 1241 | { |
f0630529 EB |
1242 | struct net *net = NULL; |
1243 | struct nsproxy *nsproxy; | |
1244 | ||
728dba3a EB |
1245 | task_lock(task); |
1246 | nsproxy = task->nsproxy; | |
f0630529 EB |
1247 | if (nsproxy) |
1248 | net = get_net(nsproxy->net_ns); | |
728dba3a | 1249 | task_unlock(task); |
f0630529 | 1250 | |
ff24870f AV |
1251 | return net ? &net->ns : NULL; |
1252 | } | |
1253 | ||
1254 | static inline struct net *to_net_ns(struct ns_common *ns) | |
1255 | { | |
1256 | return container_of(ns, struct net, ns); | |
13b6f576 EB |
1257 | } |
1258 | ||
64964528 | 1259 | static void netns_put(struct ns_common *ns) |
13b6f576 | 1260 | { |
ff24870f | 1261 | put_net(to_net_ns(ns)); |
13b6f576 EB |
1262 | } |
1263 | ||
64964528 | 1264 | static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns) |
13b6f576 | 1265 | { |
ff24870f | 1266 | struct net *net = to_net_ns(ns); |
142e1d1d | 1267 | |
5e4a0847 | 1268 | if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) || |
c7b96acf | 1269 | !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) |
142e1d1d EB |
1270 | return -EPERM; |
1271 | ||
13b6f576 | 1272 | put_net(nsproxy->net_ns); |
142e1d1d | 1273 | nsproxy->net_ns = get_net(net); |
13b6f576 EB |
1274 | return 0; |
1275 | } | |
1276 | ||
bcac25a5 AV |
1277 | static struct user_namespace *netns_owner(struct ns_common *ns) |
1278 | { | |
1279 | return to_net_ns(ns)->user_ns; | |
1280 | } | |
1281 | ||
13b6f576 EB |
1282 | const struct proc_ns_operations netns_operations = { |
1283 | .name = "net", | |
1284 | .type = CLONE_NEWNET, | |
1285 | .get = netns_get, | |
1286 | .put = netns_put, | |
1287 | .install = netns_install, | |
bcac25a5 | 1288 | .owner = netns_owner, |
13b6f576 EB |
1289 | }; |
1290 | #endif |