]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/netlink/af_netlink.c
netlink: add netlink_skb_set_owner_r()
[mirror_ubuntu-jammy-kernel.git] / net / netlink / af_netlink.c
CommitLineData
1da177e4
LT
1/*
2 * NETLINK Kernel-user communication protocol.
3 *
113aa838 4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
746fac4d 11 *
1da177e4
LT
12 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
13 * added netlink_proto_exit
14 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
15 * use nlk_sk, as sk->protinfo is on a diet 8)
4fdb3bb7
HW
16 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org>
17 * - inc module use count of module that owns
18 * the kernel socket in case userspace opens
19 * socket of same protocol
20 * - remove all module support, since netlink is
21 * mandatory if CONFIG_NET=y these days
1da177e4
LT
22 */
23
1da177e4
LT
24#include <linux/module.h>
25
4fc268d2 26#include <linux/capability.h>
1da177e4
LT
27#include <linux/kernel.h>
28#include <linux/init.h>
1da177e4
LT
29#include <linux/signal.h>
30#include <linux/sched.h>
31#include <linux/errno.h>
32#include <linux/string.h>
33#include <linux/stat.h>
34#include <linux/socket.h>
35#include <linux/un.h>
36#include <linux/fcntl.h>
37#include <linux/termios.h>
38#include <linux/sockios.h>
39#include <linux/net.h>
40#include <linux/fs.h>
41#include <linux/slab.h>
42#include <asm/uaccess.h>
43#include <linux/skbuff.h>
44#include <linux/netdevice.h>
45#include <linux/rtnetlink.h>
46#include <linux/proc_fs.h>
47#include <linux/seq_file.h>
1da177e4
LT
48#include <linux/notifier.h>
49#include <linux/security.h>
50#include <linux/jhash.h>
51#include <linux/jiffies.h>
52#include <linux/random.h>
53#include <linux/bitops.h>
54#include <linux/mm.h>
55#include <linux/types.h>
54e0f520 56#include <linux/audit.h>
af65bdfc 57#include <linux/mutex.h>
54e0f520 58
457c4cbc 59#include <net/net_namespace.h>
1da177e4
LT
60#include <net/sock.h>
61#include <net/scm.h>
82ace47a 62#include <net/netlink.h>
1da177e4 63
0f29c768 64#include "af_netlink.h"
1da177e4 65
5c398dc8
ED
66struct listeners {
67 struct rcu_head rcu;
68 unsigned long masks[0];
6c04bb18
JB
69};
70
cd967e05
PM
71/* state bits */
72#define NETLINK_CONGESTED 0x0
73
74/* flags */
77247bbb 75#define NETLINK_KERNEL_SOCKET 0x1
9a4595bc 76#define NETLINK_RECV_PKTINFO 0x2
be0c22a4 77#define NETLINK_BROADCAST_SEND_ERROR 0x4
38938bfe 78#define NETLINK_RECV_NO_ENOBUFS 0x8
77247bbb 79
035c4c16 80static inline int netlink_is_kernel(struct sock *sk)
aed81560
DL
81{
82 return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
83}
84
0f29c768
AV
85struct netlink_table *nl_table;
86EXPORT_SYMBOL_GPL(nl_table);
1da177e4
LT
87
88static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
89
90static int netlink_dump(struct sock *sk);
1da177e4 91
0f29c768
AV
92DEFINE_RWLOCK(nl_table_lock);
93EXPORT_SYMBOL_GPL(nl_table_lock);
1da177e4
LT
94static atomic_t nl_table_users = ATOMIC_INIT(0);
95
6d772ac5
ED
96#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
97
e041c683 98static ATOMIC_NOTIFIER_HEAD(netlink_chain);
1da177e4 99
b57ef81f 100static inline u32 netlink_group_mask(u32 group)
d629b836
PM
101{
102 return group ? 1 << (group - 1) : 0;
103}
104
15e47304 105static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u32 portid)
1da177e4 106{
15e47304 107 return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask];
1da177e4
LT
108}
109
658cb354
ED
110static void netlink_destroy_callback(struct netlink_callback *cb)
111{
112 kfree_skb(cb->skb);
113 kfree(cb);
114}
115
bfb253c9
ED
116static void netlink_consume_callback(struct netlink_callback *cb)
117{
118 consume_skb(cb->skb);
119 kfree(cb);
120}
121
cf0a018a
PM
122static void netlink_skb_destructor(struct sk_buff *skb)
123{
124 sock_rfree(skb);
125}
126
127static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
128{
129 WARN_ON(skb->sk != NULL);
130 skb->sk = sk;
131 skb->destructor = netlink_skb_destructor;
132 atomic_add(skb->truesize, &sk->sk_rmem_alloc);
133 sk_mem_charge(sk, skb->truesize);
134}
135
1da177e4
LT
136static void netlink_sock_destruct(struct sock *sk)
137{
3f660d66
HX
138 struct netlink_sock *nlk = nlk_sk(sk);
139
3f660d66
HX
140 if (nlk->cb) {
141 if (nlk->cb->done)
142 nlk->cb->done(nlk->cb);
6dc878a8
G
143
144 module_put(nlk->cb->module);
3f660d66
HX
145 netlink_destroy_callback(nlk->cb);
146 }
147
1da177e4
LT
148 skb_queue_purge(&sk->sk_receive_queue);
149
150 if (!sock_flag(sk, SOCK_DEAD)) {
6ac552fd 151 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
1da177e4
LT
152 return;
153 }
547b792c
IJ
154
155 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
156 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
157 WARN_ON(nlk_sk(sk)->groups);
1da177e4
LT
158}
159
6ac552fd
PM
160/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
161 * SMP. Look, when several writers sleep and reader wakes them up, all but one
1da177e4
LT
162 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
163 * this, _but_ remember, it adds useless work on UP machines.
164 */
165
d136f1bd 166void netlink_table_grab(void)
9a429c49 167 __acquires(nl_table_lock)
1da177e4 168{
d136f1bd
JB
169 might_sleep();
170
6abd219c 171 write_lock_irq(&nl_table_lock);
1da177e4
LT
172
173 if (atomic_read(&nl_table_users)) {
174 DECLARE_WAITQUEUE(wait, current);
175
176 add_wait_queue_exclusive(&nl_table_wait, &wait);
6ac552fd 177 for (;;) {
1da177e4
LT
178 set_current_state(TASK_UNINTERRUPTIBLE);
179 if (atomic_read(&nl_table_users) == 0)
180 break;
6abd219c 181 write_unlock_irq(&nl_table_lock);
1da177e4 182 schedule();
6abd219c 183 write_lock_irq(&nl_table_lock);
1da177e4
LT
184 }
185
186 __set_current_state(TASK_RUNNING);
187 remove_wait_queue(&nl_table_wait, &wait);
188 }
189}
190
d136f1bd 191void netlink_table_ungrab(void)
9a429c49 192 __releases(nl_table_lock)
1da177e4 193{
6abd219c 194 write_unlock_irq(&nl_table_lock);
1da177e4
LT
195 wake_up(&nl_table_wait);
196}
197
6ac552fd 198static inline void
1da177e4
LT
199netlink_lock_table(void)
200{
201 /* read_lock() synchronizes us to netlink_table_grab */
202
203 read_lock(&nl_table_lock);
204 atomic_inc(&nl_table_users);
205 read_unlock(&nl_table_lock);
206}
207
6ac552fd 208static inline void
1da177e4
LT
209netlink_unlock_table(void)
210{
211 if (atomic_dec_and_test(&nl_table_users))
212 wake_up(&nl_table_wait);
213}
214
15e47304 215static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
1da177e4 216{
15e47304 217 struct nl_portid_hash *hash = &nl_table[protocol].hash;
1da177e4
LT
218 struct hlist_head *head;
219 struct sock *sk;
1da177e4
LT
220
221 read_lock(&nl_table_lock);
15e47304 222 head = nl_portid_hashfn(hash, portid);
b67bfe0d 223 sk_for_each(sk, head) {
15e47304 224 if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) {
1da177e4
LT
225 sock_hold(sk);
226 goto found;
227 }
228 }
229 sk = NULL;
230found:
231 read_unlock(&nl_table_lock);
232 return sk;
233}
234
15e47304 235static struct hlist_head *nl_portid_hash_zalloc(size_t size)
1da177e4
LT
236{
237 if (size <= PAGE_SIZE)
ea72912c 238 return kzalloc(size, GFP_ATOMIC);
1da177e4
LT
239 else
240 return (struct hlist_head *)
ea72912c
ED
241 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
242 get_order(size));
1da177e4
LT
243}
244
15e47304 245static void nl_portid_hash_free(struct hlist_head *table, size_t size)
1da177e4
LT
246{
247 if (size <= PAGE_SIZE)
248 kfree(table);
249 else
250 free_pages((unsigned long)table, get_order(size));
251}
252
15e47304 253static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow)
1da177e4
LT
254{
255 unsigned int omask, mask, shift;
256 size_t osize, size;
257 struct hlist_head *otable, *table;
258 int i;
259
260 omask = mask = hash->mask;
261 osize = size = (mask + 1) * sizeof(*table);
262 shift = hash->shift;
263
264 if (grow) {
265 if (++shift > hash->max_shift)
266 return 0;
267 mask = mask * 2 + 1;
268 size *= 2;
269 }
270
15e47304 271 table = nl_portid_hash_zalloc(size);
1da177e4
LT
272 if (!table)
273 return 0;
274
1da177e4
LT
275 otable = hash->table;
276 hash->table = table;
277 hash->mask = mask;
278 hash->shift = shift;
279 get_random_bytes(&hash->rnd, sizeof(hash->rnd));
280
281 for (i = 0; i <= omask; i++) {
282 struct sock *sk;
b67bfe0d 283 struct hlist_node *tmp;
1da177e4 284
b67bfe0d 285 sk_for_each_safe(sk, tmp, &otable[i])
15e47304 286 __sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid));
1da177e4
LT
287 }
288
15e47304 289 nl_portid_hash_free(otable, osize);
1da177e4
LT
290 hash->rehash_time = jiffies + 10 * 60 * HZ;
291 return 1;
292}
293
15e47304 294static inline int nl_portid_hash_dilute(struct nl_portid_hash *hash, int len)
1da177e4
LT
295{
296 int avg = hash->entries >> hash->shift;
297
15e47304 298 if (unlikely(avg > 1) && nl_portid_hash_rehash(hash, 1))
1da177e4
LT
299 return 1;
300
301 if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) {
15e47304 302 nl_portid_hash_rehash(hash, 0);
1da177e4
LT
303 return 1;
304 }
305
306 return 0;
307}
308
90ddc4f0 309static const struct proto_ops netlink_ops;
1da177e4 310
4277a083
PM
311static void
312netlink_update_listeners(struct sock *sk)
313{
314 struct netlink_table *tbl = &nl_table[sk->sk_protocol];
4277a083
PM
315 unsigned long mask;
316 unsigned int i;
6d772ac5
ED
317 struct listeners *listeners;
318
319 listeners = nl_deref_protected(tbl->listeners);
320 if (!listeners)
321 return;
4277a083 322
b4ff4f04 323 for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
4277a083 324 mask = 0;
b67bfe0d 325 sk_for_each_bound(sk, &tbl->mc_list) {
b4ff4f04
JB
326 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
327 mask |= nlk_sk(sk)->groups[i];
328 }
6d772ac5 329 listeners->masks[i] = mask;
4277a083
PM
330 }
331 /* this function is only called with the netlink table "grabbed", which
332 * makes sure updates are visible before bind or setsockopt return. */
333}
334
15e47304 335static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
1da177e4 336{
15e47304 337 struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash;
1da177e4
LT
338 struct hlist_head *head;
339 int err = -EADDRINUSE;
340 struct sock *osk;
1da177e4
LT
341 int len;
342
343 netlink_table_grab();
15e47304 344 head = nl_portid_hashfn(hash, portid);
1da177e4 345 len = 0;
b67bfe0d 346 sk_for_each(osk, head) {
15e47304 347 if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid))
1da177e4
LT
348 break;
349 len++;
350 }
b67bfe0d 351 if (osk)
1da177e4
LT
352 goto err;
353
354 err = -EBUSY;
15e47304 355 if (nlk_sk(sk)->portid)
1da177e4
LT
356 goto err;
357
358 err = -ENOMEM;
359 if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX))
360 goto err;
361
15e47304
EB
362 if (len && nl_portid_hash_dilute(hash, len))
363 head = nl_portid_hashfn(hash, portid);
1da177e4 364 hash->entries++;
15e47304 365 nlk_sk(sk)->portid = portid;
1da177e4
LT
366 sk_add_node(sk, head);
367 err = 0;
368
369err:
370 netlink_table_ungrab();
371 return err;
372}
373
374static void netlink_remove(struct sock *sk)
375{
376 netlink_table_grab();
d470e3b4
DM
377 if (sk_del_node_init(sk))
378 nl_table[sk->sk_protocol].hash.entries--;
f7fa9b10 379 if (nlk_sk(sk)->subscriptions)
1da177e4
LT
380 __sk_del_bind_node(sk);
381 netlink_table_ungrab();
382}
383
384static struct proto netlink_proto = {
385 .name = "NETLINK",
386 .owner = THIS_MODULE,
387 .obj_size = sizeof(struct netlink_sock),
388};
389
1b8d7ae4
EB
390static int __netlink_create(struct net *net, struct socket *sock,
391 struct mutex *cb_mutex, int protocol)
1da177e4
LT
392{
393 struct sock *sk;
394 struct netlink_sock *nlk;
ab33a171
PM
395
396 sock->ops = &netlink_ops;
397
6257ff21 398 sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
ab33a171
PM
399 if (!sk)
400 return -ENOMEM;
401
402 sock_init_data(sock, sk);
403
404 nlk = nlk_sk(sk);
658cb354 405 if (cb_mutex) {
ffa4d721 406 nlk->cb_mutex = cb_mutex;
658cb354 407 } else {
ffa4d721
PM
408 nlk->cb_mutex = &nlk->cb_def_mutex;
409 mutex_init(nlk->cb_mutex);
410 }
ab33a171
PM
411 init_waitqueue_head(&nlk->wait);
412
413 sk->sk_destruct = netlink_sock_destruct;
414 sk->sk_protocol = protocol;
415 return 0;
416}
417
3f378b68
EP
418static int netlink_create(struct net *net, struct socket *sock, int protocol,
419 int kern)
ab33a171
PM
420{
421 struct module *module = NULL;
af65bdfc 422 struct mutex *cb_mutex;
f7fa9b10 423 struct netlink_sock *nlk;
03292745 424 void (*bind)(int group);
ab33a171 425 int err = 0;
1da177e4
LT
426
427 sock->state = SS_UNCONNECTED;
428
429 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
430 return -ESOCKTNOSUPPORT;
431
6ac552fd 432 if (protocol < 0 || protocol >= MAX_LINKS)
1da177e4
LT
433 return -EPROTONOSUPPORT;
434
77247bbb 435 netlink_lock_table();
95a5afca 436#ifdef CONFIG_MODULES
ab33a171 437 if (!nl_table[protocol].registered) {
77247bbb 438 netlink_unlock_table();
4fdb3bb7 439 request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
77247bbb 440 netlink_lock_table();
4fdb3bb7 441 }
ab33a171
PM
442#endif
443 if (nl_table[protocol].registered &&
444 try_module_get(nl_table[protocol].module))
445 module = nl_table[protocol].module;
974c37e9
AD
446 else
447 err = -EPROTONOSUPPORT;
af65bdfc 448 cb_mutex = nl_table[protocol].cb_mutex;
03292745 449 bind = nl_table[protocol].bind;
77247bbb 450 netlink_unlock_table();
4fdb3bb7 451
974c37e9
AD
452 if (err < 0)
453 goto out;
454
6ac552fd
PM
455 err = __netlink_create(net, sock, cb_mutex, protocol);
456 if (err < 0)
f7fa9b10
PM
457 goto out_module;
458
6f756a8c 459 local_bh_disable();
c1fd3b94 460 sock_prot_inuse_add(net, &netlink_proto, 1);
6f756a8c
DM
461 local_bh_enable();
462
f7fa9b10 463 nlk = nlk_sk(sock->sk);
f7fa9b10 464 nlk->module = module;
03292745 465 nlk->netlink_bind = bind;
ab33a171
PM
466out:
467 return err;
1da177e4 468
ab33a171
PM
469out_module:
470 module_put(module);
471 goto out;
1da177e4
LT
472}
473
474static int netlink_release(struct socket *sock)
475{
476 struct sock *sk = sock->sk;
477 struct netlink_sock *nlk;
478
479 if (!sk)
480 return 0;
481
482 netlink_remove(sk);
ac57b3a9 483 sock_orphan(sk);
1da177e4
LT
484 nlk = nlk_sk(sk);
485
3f660d66
HX
486 /*
487 * OK. Socket is unlinked, any packets that arrive now
488 * will be purged.
489 */
1da177e4 490
1da177e4
LT
491 sock->sk = NULL;
492 wake_up_interruptible_all(&nlk->wait);
493
494 skb_queue_purge(&sk->sk_write_queue);
495
15e47304 496 if (nlk->portid) {
1da177e4 497 struct netlink_notify n = {
3b1e0a65 498 .net = sock_net(sk),
1da177e4 499 .protocol = sk->sk_protocol,
15e47304 500 .portid = nlk->portid,
1da177e4 501 };
e041c683
AS
502 atomic_notifier_call_chain(&netlink_chain,
503 NETLINK_URELEASE, &n);
746fac4d 504 }
4fdb3bb7 505
5e7c001c 506 module_put(nlk->module);
4fdb3bb7 507
4277a083 508 netlink_table_grab();
aed81560 509 if (netlink_is_kernel(sk)) {
869e58f8
DL
510 BUG_ON(nl_table[sk->sk_protocol].registered == 0);
511 if (--nl_table[sk->sk_protocol].registered == 0) {
6d772ac5
ED
512 struct listeners *old;
513
514 old = nl_deref_protected(nl_table[sk->sk_protocol].listeners);
515 RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL);
516 kfree_rcu(old, rcu);
869e58f8 517 nl_table[sk->sk_protocol].module = NULL;
9785e10a
PNA
518 nl_table[sk->sk_protocol].bind = NULL;
519 nl_table[sk->sk_protocol].flags = 0;
869e58f8
DL
520 nl_table[sk->sk_protocol].registered = 0;
521 }
658cb354 522 } else if (nlk->subscriptions) {
4277a083 523 netlink_update_listeners(sk);
658cb354 524 }
4277a083 525 netlink_table_ungrab();
77247bbb 526
f7fa9b10
PM
527 kfree(nlk->groups);
528 nlk->groups = NULL;
529
3755810c 530 local_bh_disable();
c1fd3b94 531 sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
3755810c 532 local_bh_enable();
1da177e4
LT
533 sock_put(sk);
534 return 0;
535}
536
537static int netlink_autobind(struct socket *sock)
538{
539 struct sock *sk = sock->sk;
3b1e0a65 540 struct net *net = sock_net(sk);
15e47304 541 struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash;
1da177e4
LT
542 struct hlist_head *head;
543 struct sock *osk;
15e47304 544 s32 portid = task_tgid_vnr(current);
1da177e4
LT
545 int err;
546 static s32 rover = -4097;
547
548retry:
549 cond_resched();
550 netlink_table_grab();
15e47304 551 head = nl_portid_hashfn(hash, portid);
b67bfe0d 552 sk_for_each(osk, head) {
878628fb 553 if (!net_eq(sock_net(osk), net))
b4b51029 554 continue;
15e47304
EB
555 if (nlk_sk(osk)->portid == portid) {
556 /* Bind collision, search negative portid values. */
557 portid = rover--;
1da177e4
LT
558 if (rover > -4097)
559 rover = -4097;
560 netlink_table_ungrab();
561 goto retry;
562 }
563 }
564 netlink_table_ungrab();
565
15e47304 566 err = netlink_insert(sk, net, portid);
1da177e4
LT
567 if (err == -EADDRINUSE)
568 goto retry;
d470e3b4
DM
569
570 /* If 2 threads race to autobind, that is fine. */
571 if (err == -EBUSY)
572 err = 0;
573
574 return err;
1da177e4
LT
575}
576
b57ef81f 577static inline int netlink_capable(const struct socket *sock, unsigned int flag)
746fac4d 578{
9785e10a 579 return (nl_table[sock->sk->sk_protocol].flags & flag) ||
df008c91 580 ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN);
746fac4d 581}
1da177e4 582
f7fa9b10
PM
583static void
584netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions)
585{
586 struct netlink_sock *nlk = nlk_sk(sk);
587
588 if (nlk->subscriptions && !subscriptions)
589 __sk_del_bind_node(sk);
590 else if (!nlk->subscriptions && subscriptions)
591 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
592 nlk->subscriptions = subscriptions;
593}
594
b4ff4f04 595static int netlink_realloc_groups(struct sock *sk)
513c2500
PM
596{
597 struct netlink_sock *nlk = nlk_sk(sk);
598 unsigned int groups;
b4ff4f04 599 unsigned long *new_groups;
513c2500
PM
600 int err = 0;
601
b4ff4f04
JB
602 netlink_table_grab();
603
513c2500 604 groups = nl_table[sk->sk_protocol].groups;
b4ff4f04 605 if (!nl_table[sk->sk_protocol].registered) {
513c2500 606 err = -ENOENT;
b4ff4f04
JB
607 goto out_unlock;
608 }
513c2500 609
b4ff4f04
JB
610 if (nlk->ngroups >= groups)
611 goto out_unlock;
513c2500 612
b4ff4f04
JB
613 new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC);
614 if (new_groups == NULL) {
615 err = -ENOMEM;
616 goto out_unlock;
617 }
6ac552fd 618 memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
b4ff4f04
JB
619 NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));
620
621 nlk->groups = new_groups;
513c2500 622 nlk->ngroups = groups;
b4ff4f04
JB
623 out_unlock:
624 netlink_table_ungrab();
625 return err;
513c2500
PM
626}
627
6ac552fd
PM
628static int netlink_bind(struct socket *sock, struct sockaddr *addr,
629 int addr_len)
1da177e4
LT
630{
631 struct sock *sk = sock->sk;
3b1e0a65 632 struct net *net = sock_net(sk);
1da177e4
LT
633 struct netlink_sock *nlk = nlk_sk(sk);
634 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
635 int err;
746fac4d 636
4e4b5376
HFS
637 if (addr_len < sizeof(struct sockaddr_nl))
638 return -EINVAL;
639
1da177e4
LT
640 if (nladdr->nl_family != AF_NETLINK)
641 return -EINVAL;
642
643 /* Only superuser is allowed to listen multicasts */
513c2500 644 if (nladdr->nl_groups) {
9785e10a 645 if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
513c2500 646 return -EPERM;
b4ff4f04
JB
647 err = netlink_realloc_groups(sk);
648 if (err)
649 return err;
513c2500 650 }
1da177e4 651
15e47304
EB
652 if (nlk->portid) {
653 if (nladdr->nl_pid != nlk->portid)
1da177e4
LT
654 return -EINVAL;
655 } else {
656 err = nladdr->nl_pid ?
b4b51029 657 netlink_insert(sk, net, nladdr->nl_pid) :
1da177e4
LT
658 netlink_autobind(sock);
659 if (err)
660 return err;
661 }
662
513c2500 663 if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
1da177e4
LT
664 return 0;
665
666 netlink_table_grab();
f7fa9b10 667 netlink_update_subscriptions(sk, nlk->subscriptions +
746fac4d
YH
668 hweight32(nladdr->nl_groups) -
669 hweight32(nlk->groups[0]));
670 nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups;
4277a083 671 netlink_update_listeners(sk);
1da177e4
LT
672 netlink_table_ungrab();
673
03292745
PNA
674 if (nlk->netlink_bind && nlk->groups[0]) {
675 int i;
676
677 for (i=0; i<nlk->ngroups; i++) {
678 if (test_bit(i, nlk->groups))
679 nlk->netlink_bind(i);
680 }
681 }
682
1da177e4
LT
683 return 0;
684}
685
686static int netlink_connect(struct socket *sock, struct sockaddr *addr,
687 int alen, int flags)
688{
689 int err = 0;
690 struct sock *sk = sock->sk;
691 struct netlink_sock *nlk = nlk_sk(sk);
6ac552fd 692 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
1da177e4 693
6503d961
CG
694 if (alen < sizeof(addr->sa_family))
695 return -EINVAL;
696
1da177e4
LT
697 if (addr->sa_family == AF_UNSPEC) {
698 sk->sk_state = NETLINK_UNCONNECTED;
15e47304 699 nlk->dst_portid = 0;
d629b836 700 nlk->dst_group = 0;
1da177e4
LT
701 return 0;
702 }
703 if (addr->sa_family != AF_NETLINK)
704 return -EINVAL;
705
706 /* Only superuser is allowed to send multicasts */
9785e10a 707 if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
1da177e4
LT
708 return -EPERM;
709
15e47304 710 if (!nlk->portid)
1da177e4
LT
711 err = netlink_autobind(sock);
712
713 if (err == 0) {
714 sk->sk_state = NETLINK_CONNECTED;
15e47304 715 nlk->dst_portid = nladdr->nl_pid;
d629b836 716 nlk->dst_group = ffs(nladdr->nl_groups);
1da177e4
LT
717 }
718
719 return err;
720}
721
6ac552fd
PM
722static int netlink_getname(struct socket *sock, struct sockaddr *addr,
723 int *addr_len, int peer)
1da177e4
LT
724{
725 struct sock *sk = sock->sk;
726 struct netlink_sock *nlk = nlk_sk(sk);
13cfa97b 727 DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr);
746fac4d 728
1da177e4
LT
729 nladdr->nl_family = AF_NETLINK;
730 nladdr->nl_pad = 0;
731 *addr_len = sizeof(*nladdr);
732
733 if (peer) {
15e47304 734 nladdr->nl_pid = nlk->dst_portid;
d629b836 735 nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
1da177e4 736 } else {
15e47304 737 nladdr->nl_pid = nlk->portid;
513c2500 738 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
1da177e4
LT
739 }
740 return 0;
741}
742
743static void netlink_overrun(struct sock *sk)
744{
38938bfe
PNA
745 struct netlink_sock *nlk = nlk_sk(sk);
746
747 if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
cd967e05 748 if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) {
38938bfe
PNA
749 sk->sk_err = ENOBUFS;
750 sk->sk_error_report(sk);
751 }
1da177e4 752 }
38938bfe 753 atomic_inc(&sk->sk_drops);
1da177e4
LT
754}
755
15e47304 756static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
1da177e4 757{
1da177e4
LT
758 struct sock *sock;
759 struct netlink_sock *nlk;
760
15e47304 761 sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid);
1da177e4
LT
762 if (!sock)
763 return ERR_PTR(-ECONNREFUSED);
764
765 /* Don't bother queuing skb if kernel socket has no input function */
766 nlk = nlk_sk(sock);
cd40b7d3 767 if (sock->sk_state == NETLINK_CONNECTED &&
15e47304 768 nlk->dst_portid != nlk_sk(ssk)->portid) {
1da177e4
LT
769 sock_put(sock);
770 return ERR_PTR(-ECONNREFUSED);
771 }
772 return sock;
773}
774
775struct sock *netlink_getsockbyfilp(struct file *filp)
776{
496ad9aa 777 struct inode *inode = file_inode(filp);
1da177e4
LT
778 struct sock *sock;
779
780 if (!S_ISSOCK(inode->i_mode))
781 return ERR_PTR(-ENOTSOCK);
782
783 sock = SOCKET_I(inode)->sk;
784 if (sock->sk_family != AF_NETLINK)
785 return ERR_PTR(-EINVAL);
786
787 sock_hold(sock);
788 return sock;
789}
790
791/*
792 * Attach a skb to a netlink socket.
793 * The caller must hold a reference to the destination socket. On error, the
794 * reference is dropped. The skb is not send to the destination, just all
795 * all error checks are performed and memory in the queue is reserved.
796 * Return values:
797 * < 0: error. skb freed, reference to sock dropped.
798 * 0: continue
799 * 1: repeat lookup - reference dropped while waiting for socket memory.
800 */
9457afee 801int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
c3d8d1e3 802 long *timeo, struct sock *ssk)
1da177e4
LT
803{
804 struct netlink_sock *nlk;
805
806 nlk = nlk_sk(sk);
807
808 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
cd967e05 809 test_bit(NETLINK_CONGESTED, &nlk->state)) {
1da177e4 810 DECLARE_WAITQUEUE(wait, current);
c3d8d1e3 811 if (!*timeo) {
aed81560 812 if (!ssk || netlink_is_kernel(ssk))
1da177e4
LT
813 netlink_overrun(sk);
814 sock_put(sk);
815 kfree_skb(skb);
816 return -EAGAIN;
817 }
818
819 __set_current_state(TASK_INTERRUPTIBLE);
820 add_wait_queue(&nlk->wait, &wait);
821
822 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
cd967e05 823 test_bit(NETLINK_CONGESTED, &nlk->state)) &&
1da177e4 824 !sock_flag(sk, SOCK_DEAD))
c3d8d1e3 825 *timeo = schedule_timeout(*timeo);
1da177e4
LT
826
827 __set_current_state(TASK_RUNNING);
828 remove_wait_queue(&nlk->wait, &wait);
829 sock_put(sk);
830
831 if (signal_pending(current)) {
832 kfree_skb(skb);
c3d8d1e3 833 return sock_intr_errno(*timeo);
1da177e4
LT
834 }
835 return 1;
836 }
cf0a018a 837 netlink_skb_set_owner_r(skb, sk);
1da177e4
LT
838 return 0;
839}
840
4a7e7c2a 841static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
1da177e4 842{
1da177e4
LT
843 int len = skb->len;
844
1da177e4
LT
845 skb_queue_tail(&sk->sk_receive_queue, skb);
846 sk->sk_data_ready(sk, len);
4a7e7c2a
ED
847 return len;
848}
849
850int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
851{
852 int len = __netlink_sendskb(sk, skb);
853
1da177e4
LT
854 sock_put(sk);
855 return len;
856}
857
858void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
859{
860 kfree_skb(skb);
861 sock_put(sk);
862}
863
b57ef81f 864static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
1da177e4
LT
865{
866 int delta;
867
1298ca46 868 WARN_ON(skb->sk != NULL);
1da177e4 869
4305b541 870 delta = skb->end - skb->tail;
1da177e4
LT
871 if (delta * 2 < skb->truesize)
872 return skb;
873
874 if (skb_shared(skb)) {
875 struct sk_buff *nskb = skb_clone(skb, allocation);
876 if (!nskb)
877 return skb;
8460c00f 878 consume_skb(skb);
1da177e4
LT
879 skb = nskb;
880 }
881
882 if (!pskb_expand_head(skb, 0, -delta, allocation))
883 skb->truesize -= delta;
884
885 return skb;
886}
887
b57ef81f 888static void netlink_rcv_wake(struct sock *sk)
cd40b7d3
DL
889{
890 struct netlink_sock *nlk = nlk_sk(sk);
891
892 if (skb_queue_empty(&sk->sk_receive_queue))
cd967e05
PM
893 clear_bit(NETLINK_CONGESTED, &nlk->state);
894 if (!test_bit(NETLINK_CONGESTED, &nlk->state))
cd40b7d3
DL
895 wake_up_interruptible(&nlk->wait);
896}
897
3fbc2905
EB
898static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
899 struct sock *ssk)
cd40b7d3
DL
900{
901 int ret;
902 struct netlink_sock *nlk = nlk_sk(sk);
903
904 ret = -ECONNREFUSED;
905 if (nlk->netlink_rcv != NULL) {
906 ret = skb->len;
cf0a018a 907 netlink_skb_set_owner_r(skb, sk);
e32123e5 908 NETLINK_CB(skb).sk = ssk;
cd40b7d3 909 nlk->netlink_rcv(skb);
bfb253c9
ED
910 consume_skb(skb);
911 } else {
912 kfree_skb(skb);
cd40b7d3 913 }
cd40b7d3
DL
914 sock_put(sk);
915 return ret;
916}
917
918int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
15e47304 919 u32 portid, int nonblock)
1da177e4
LT
920{
921 struct sock *sk;
922 int err;
923 long timeo;
924
925 skb = netlink_trim(skb, gfp_any());
926
927 timeo = sock_sndtimeo(ssk, nonblock);
928retry:
15e47304 929 sk = netlink_getsockbyportid(ssk, portid);
1da177e4
LT
930 if (IS_ERR(sk)) {
931 kfree_skb(skb);
932 return PTR_ERR(sk);
933 }
cd40b7d3 934 if (netlink_is_kernel(sk))
3fbc2905 935 return netlink_unicast_kernel(sk, skb, ssk);
cd40b7d3 936
b1153f29 937 if (sk_filter(sk, skb)) {
84874607 938 err = skb->len;
b1153f29
SH
939 kfree_skb(skb);
940 sock_put(sk);
941 return err;
942 }
943
9457afee 944 err = netlink_attachskb(sk, skb, &timeo, ssk);
1da177e4
LT
945 if (err == 1)
946 goto retry;
947 if (err)
948 return err;
949
7ee015e0 950 return netlink_sendskb(sk, skb);
1da177e4 951}
6ac552fd 952EXPORT_SYMBOL(netlink_unicast);
1da177e4 953
4277a083
PM
954int netlink_has_listeners(struct sock *sk, unsigned int group)
955{
956 int res = 0;
5c398dc8 957 struct listeners *listeners;
4277a083 958
aed81560 959 BUG_ON(!netlink_is_kernel(sk));
b4ff4f04
JB
960
961 rcu_read_lock();
962 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
963
6d772ac5 964 if (listeners && group - 1 < nl_table[sk->sk_protocol].groups)
5c398dc8 965 res = test_bit(group - 1, listeners->masks);
b4ff4f04
JB
966
967 rcu_read_unlock();
968
4277a083
PM
969 return res;
970}
971EXPORT_SYMBOL_GPL(netlink_has_listeners);
972
b57ef81f 973static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
1da177e4
LT
974{
975 struct netlink_sock *nlk = nlk_sk(sk);
976
977 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
cd967e05 978 !test_bit(NETLINK_CONGESTED, &nlk->state)) {
cf0a018a 979 netlink_skb_set_owner_r(skb, sk);
4a7e7c2a 980 __netlink_sendskb(sk, skb);
2c645800 981 return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
1da177e4
LT
982 }
983 return -1;
984}
985
986struct netlink_broadcast_data {
987 struct sock *exclude_sk;
b4b51029 988 struct net *net;
15e47304 989 u32 portid;
1da177e4
LT
990 u32 group;
991 int failure;
ff491a73 992 int delivery_failure;
1da177e4
LT
993 int congested;
994 int delivered;
7d877f3b 995 gfp_t allocation;
1da177e4 996 struct sk_buff *skb, *skb2;
910a7e90
EB
997 int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
998 void *tx_data;
1da177e4
LT
999};
1000
b57ef81f 1001static int do_one_broadcast(struct sock *sk,
1da177e4
LT
1002 struct netlink_broadcast_data *p)
1003{
1004 struct netlink_sock *nlk = nlk_sk(sk);
1005 int val;
1006
1007 if (p->exclude_sk == sk)
1008 goto out;
1009
15e47304 1010 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
f7fa9b10 1011 !test_bit(p->group - 1, nlk->groups))
1da177e4
LT
1012 goto out;
1013
878628fb 1014 if (!net_eq(sock_net(sk), p->net))
b4b51029
EB
1015 goto out;
1016
1da177e4
LT
1017 if (p->failure) {
1018 netlink_overrun(sk);
1019 goto out;
1020 }
1021
1022 sock_hold(sk);
1023 if (p->skb2 == NULL) {
68acc024 1024 if (skb_shared(p->skb)) {
1da177e4
LT
1025 p->skb2 = skb_clone(p->skb, p->allocation);
1026 } else {
68acc024
TC
1027 p->skb2 = skb_get(p->skb);
1028 /*
1029 * skb ownership may have been set when
1030 * delivered to a previous socket.
1031 */
1032 skb_orphan(p->skb2);
1da177e4
LT
1033 }
1034 }
1035 if (p->skb2 == NULL) {
1036 netlink_overrun(sk);
1037 /* Clone failed. Notify ALL listeners. */
1038 p->failure = 1;
be0c22a4
PNA
1039 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1040 p->delivery_failure = 1;
910a7e90
EB
1041 } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
1042 kfree_skb(p->skb2);
1043 p->skb2 = NULL;
b1153f29
SH
1044 } else if (sk_filter(sk, p->skb2)) {
1045 kfree_skb(p->skb2);
1046 p->skb2 = NULL;
1da177e4
LT
1047 } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
1048 netlink_overrun(sk);
be0c22a4
PNA
1049 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1050 p->delivery_failure = 1;
1da177e4
LT
1051 } else {
1052 p->congested |= val;
1053 p->delivered = 1;
1054 p->skb2 = NULL;
1055 }
1056 sock_put(sk);
1057
1058out:
1059 return 0;
1060}
1061
15e47304 1062int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid,
910a7e90
EB
1063 u32 group, gfp_t allocation,
1064 int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
1065 void *filter_data)
1da177e4 1066{
3b1e0a65 1067 struct net *net = sock_net(ssk);
1da177e4 1068 struct netlink_broadcast_data info;
1da177e4
LT
1069 struct sock *sk;
1070
1071 skb = netlink_trim(skb, allocation);
1072
1073 info.exclude_sk = ssk;
b4b51029 1074 info.net = net;
15e47304 1075 info.portid = portid;
1da177e4
LT
1076 info.group = group;
1077 info.failure = 0;
ff491a73 1078 info.delivery_failure = 0;
1da177e4
LT
1079 info.congested = 0;
1080 info.delivered = 0;
1081 info.allocation = allocation;
1082 info.skb = skb;
1083 info.skb2 = NULL;
910a7e90
EB
1084 info.tx_filter = filter;
1085 info.tx_data = filter_data;
1da177e4
LT
1086
1087 /* While we sleep in clone, do not allow to change socket list */
1088
1089 netlink_lock_table();
1090
b67bfe0d 1091 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
1da177e4
LT
1092 do_one_broadcast(sk, &info);
1093
70d4bf6d 1094 consume_skb(skb);
aa1c6a6f 1095
1da177e4
LT
1096 netlink_unlock_table();
1097
70d4bf6d
NH
1098 if (info.delivery_failure) {
1099 kfree_skb(info.skb2);
ff491a73 1100 return -ENOBUFS;
658cb354
ED
1101 }
1102 consume_skb(info.skb2);
ff491a73 1103
1da177e4
LT
1104 if (info.delivered) {
1105 if (info.congested && (allocation & __GFP_WAIT))
1106 yield();
1107 return 0;
1108 }
1da177e4
LT
1109 return -ESRCH;
1110}
910a7e90
EB
1111EXPORT_SYMBOL(netlink_broadcast_filtered);
1112
15e47304 1113int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
910a7e90
EB
1114 u32 group, gfp_t allocation)
1115{
15e47304 1116 return netlink_broadcast_filtered(ssk, skb, portid, group, allocation,
910a7e90
EB
1117 NULL, NULL);
1118}
6ac552fd 1119EXPORT_SYMBOL(netlink_broadcast);
1da177e4
LT
1120
1121struct netlink_set_err_data {
1122 struct sock *exclude_sk;
15e47304 1123 u32 portid;
1da177e4
LT
1124 u32 group;
1125 int code;
1126};
1127
b57ef81f 1128static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
1da177e4
LT
1129{
1130 struct netlink_sock *nlk = nlk_sk(sk);
1a50307b 1131 int ret = 0;
1da177e4
LT
1132
1133 if (sk == p->exclude_sk)
1134 goto out;
1135
09ad9bc7 1136 if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
b4b51029
EB
1137 goto out;
1138
15e47304 1139 if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
f7fa9b10 1140 !test_bit(p->group - 1, nlk->groups))
1da177e4
LT
1141 goto out;
1142
1a50307b
PNA
1143 if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
1144 ret = 1;
1145 goto out;
1146 }
1147
1da177e4
LT
1148 sk->sk_err = p->code;
1149 sk->sk_error_report(sk);
1150out:
1a50307b 1151 return ret;
1da177e4
LT
1152}
1153
4843b93c
PNA
1154/**
1155 * netlink_set_err - report error to broadcast listeners
1156 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
15e47304 1157 * @portid: the PORTID of a process that we want to skip (if any)
4843b93c
PNA
1158 * @groups: the broadcast group that will notice the error
1159 * @code: error code, must be negative (as usual in kernelspace)
1a50307b
PNA
1160 *
1161 * This function returns the number of broadcast listeners that have set the
1162 * NETLINK_RECV_NO_ENOBUFS socket option.
4843b93c 1163 */
15e47304 1164int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
1da177e4
LT
1165{
1166 struct netlink_set_err_data info;
1da177e4 1167 struct sock *sk;
1a50307b 1168 int ret = 0;
1da177e4
LT
1169
1170 info.exclude_sk = ssk;
15e47304 1171 info.portid = portid;
1da177e4 1172 info.group = group;
4843b93c
PNA
1173 /* sk->sk_err wants a positive error value */
1174 info.code = -code;
1da177e4
LT
1175
1176 read_lock(&nl_table_lock);
1177
b67bfe0d 1178 sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
1a50307b 1179 ret += do_one_set_err(sk, &info);
1da177e4
LT
1180
1181 read_unlock(&nl_table_lock);
1a50307b 1182 return ret;
1da177e4 1183}
dd5b6ce6 1184EXPORT_SYMBOL(netlink_set_err);
1da177e4 1185
84659eb5
JB
1186/* must be called with netlink table grabbed */
1187static void netlink_update_socket_mc(struct netlink_sock *nlk,
1188 unsigned int group,
1189 int is_new)
1190{
1191 int old, new = !!is_new, subscriptions;
1192
1193 old = test_bit(group - 1, nlk->groups);
1194 subscriptions = nlk->subscriptions - old + new;
1195 if (new)
1196 __set_bit(group - 1, nlk->groups);
1197 else
1198 __clear_bit(group - 1, nlk->groups);
1199 netlink_update_subscriptions(&nlk->sk, subscriptions);
1200 netlink_update_listeners(&nlk->sk);
1201}
1202
9a4595bc 1203static int netlink_setsockopt(struct socket *sock, int level, int optname,
b7058842 1204 char __user *optval, unsigned int optlen)
9a4595bc
PM
1205{
1206 struct sock *sk = sock->sk;
1207 struct netlink_sock *nlk = nlk_sk(sk);
eb496534
JB
1208 unsigned int val = 0;
1209 int err;
9a4595bc
PM
1210
1211 if (level != SOL_NETLINK)
1212 return -ENOPROTOOPT;
1213
1214 if (optlen >= sizeof(int) &&
eb496534 1215 get_user(val, (unsigned int __user *)optval))
9a4595bc
PM
1216 return -EFAULT;
1217
1218 switch (optname) {
1219 case NETLINK_PKTINFO:
1220 if (val)
1221 nlk->flags |= NETLINK_RECV_PKTINFO;
1222 else
1223 nlk->flags &= ~NETLINK_RECV_PKTINFO;
1224 err = 0;
1225 break;
1226 case NETLINK_ADD_MEMBERSHIP:
1227 case NETLINK_DROP_MEMBERSHIP: {
9785e10a 1228 if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
9a4595bc 1229 return -EPERM;
b4ff4f04
JB
1230 err = netlink_realloc_groups(sk);
1231 if (err)
1232 return err;
9a4595bc
PM
1233 if (!val || val - 1 >= nlk->ngroups)
1234 return -EINVAL;
1235 netlink_table_grab();
84659eb5
JB
1236 netlink_update_socket_mc(nlk, val,
1237 optname == NETLINK_ADD_MEMBERSHIP);
9a4595bc 1238 netlink_table_ungrab();
03292745
PNA
1239
1240 if (nlk->netlink_bind)
1241 nlk->netlink_bind(val);
1242
9a4595bc
PM
1243 err = 0;
1244 break;
1245 }
be0c22a4
PNA
1246 case NETLINK_BROADCAST_ERROR:
1247 if (val)
1248 nlk->flags |= NETLINK_BROADCAST_SEND_ERROR;
1249 else
1250 nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
1251 err = 0;
1252 break;
38938bfe
PNA
1253 case NETLINK_NO_ENOBUFS:
1254 if (val) {
1255 nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
cd967e05 1256 clear_bit(NETLINK_CONGESTED, &nlk->state);
38938bfe 1257 wake_up_interruptible(&nlk->wait);
658cb354 1258 } else {
38938bfe 1259 nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
658cb354 1260 }
38938bfe
PNA
1261 err = 0;
1262 break;
9a4595bc
PM
1263 default:
1264 err = -ENOPROTOOPT;
1265 }
1266 return err;
1267}
1268
1269static int netlink_getsockopt(struct socket *sock, int level, int optname,
746fac4d 1270 char __user *optval, int __user *optlen)
9a4595bc
PM
1271{
1272 struct sock *sk = sock->sk;
1273 struct netlink_sock *nlk = nlk_sk(sk);
1274 int len, val, err;
1275
1276 if (level != SOL_NETLINK)
1277 return -ENOPROTOOPT;
1278
1279 if (get_user(len, optlen))
1280 return -EFAULT;
1281 if (len < 0)
1282 return -EINVAL;
1283
1284 switch (optname) {
1285 case NETLINK_PKTINFO:
1286 if (len < sizeof(int))
1287 return -EINVAL;
1288 len = sizeof(int);
1289 val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
a27b58fe
HC
1290 if (put_user(len, optlen) ||
1291 put_user(val, optval))
1292 return -EFAULT;
9a4595bc
PM
1293 err = 0;
1294 break;
be0c22a4
PNA
1295 case NETLINK_BROADCAST_ERROR:
1296 if (len < sizeof(int))
1297 return -EINVAL;
1298 len = sizeof(int);
1299 val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
1300 if (put_user(len, optlen) ||
1301 put_user(val, optval))
1302 return -EFAULT;
1303 err = 0;
1304 break;
38938bfe
PNA
1305 case NETLINK_NO_ENOBUFS:
1306 if (len < sizeof(int))
1307 return -EINVAL;
1308 len = sizeof(int);
1309 val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0;
1310 if (put_user(len, optlen) ||
1311 put_user(val, optval))
1312 return -EFAULT;
1313 err = 0;
1314 break;
9a4595bc
PM
1315 default:
1316 err = -ENOPROTOOPT;
1317 }
1318 return err;
1319}
1320
1321static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
1322{
1323 struct nl_pktinfo info;
1324
1325 info.group = NETLINK_CB(skb).dst_group;
1326 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
1327}
1328
1da177e4
LT
1329static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1330 struct msghdr *msg, size_t len)
1331{
1332 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1333 struct sock *sk = sock->sk;
1334 struct netlink_sock *nlk = nlk_sk(sk);
6ac552fd 1335 struct sockaddr_nl *addr = msg->msg_name;
15e47304 1336 u32 dst_portid;
d629b836 1337 u32 dst_group;
1da177e4
LT
1338 struct sk_buff *skb;
1339 int err;
1340 struct scm_cookie scm;
1341
1342 if (msg->msg_flags&MSG_OOB)
1343 return -EOPNOTSUPP;
1344
16e57262 1345 if (NULL == siocb->scm)
1da177e4 1346 siocb->scm = &scm;
16e57262 1347
e0e3cea4 1348 err = scm_send(sock, msg, siocb->scm, true);
1da177e4
LT
1349 if (err < 0)
1350 return err;
1351
1352 if (msg->msg_namelen) {
b47030c7 1353 err = -EINVAL;
1da177e4 1354 if (addr->nl_family != AF_NETLINK)
b47030c7 1355 goto out;
15e47304 1356 dst_portid = addr->nl_pid;
d629b836 1357 dst_group = ffs(addr->nl_groups);
b47030c7 1358 err = -EPERM;
15e47304 1359 if ((dst_group || dst_portid) &&
9785e10a 1360 !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
b47030c7 1361 goto out;
1da177e4 1362 } else {
15e47304 1363 dst_portid = nlk->dst_portid;
d629b836 1364 dst_group = nlk->dst_group;
1da177e4
LT
1365 }
1366
15e47304 1367 if (!nlk->portid) {
1da177e4
LT
1368 err = netlink_autobind(sock);
1369 if (err)
1370 goto out;
1371 }
1372
1373 err = -EMSGSIZE;
1374 if (len > sk->sk_sndbuf - 32)
1375 goto out;
1376 err = -ENOBUFS;
339bf98f 1377 skb = alloc_skb(len, GFP_KERNEL);
6ac552fd 1378 if (skb == NULL)
1da177e4
LT
1379 goto out;
1380
15e47304 1381 NETLINK_CB(skb).portid = nlk->portid;
d629b836 1382 NETLINK_CB(skb).dst_group = dst_group;
dbe9a417 1383 NETLINK_CB(skb).creds = siocb->scm->creds;
1da177e4 1384
1da177e4 1385 err = -EFAULT;
6ac552fd 1386 if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
1da177e4
LT
1387 kfree_skb(skb);
1388 goto out;
1389 }
1390
1391 err = security_netlink_send(sk, skb);
1392 if (err) {
1393 kfree_skb(skb);
1394 goto out;
1395 }
1396
d629b836 1397 if (dst_group) {
1da177e4 1398 atomic_inc(&skb->users);
15e47304 1399 netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL);
1da177e4 1400 }
15e47304 1401 err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT);
1da177e4
LT
1402
1403out:
b47030c7 1404 scm_destroy(siocb->scm);
1da177e4
LT
1405 return err;
1406}
1407
1408static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1409 struct msghdr *msg, size_t len,
1410 int flags)
1411{
1412 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1413 struct scm_cookie scm;
1414 struct sock *sk = sock->sk;
1415 struct netlink_sock *nlk = nlk_sk(sk);
1416 int noblock = flags&MSG_DONTWAIT;
1417 size_t copied;
68d6ac6d 1418 struct sk_buff *skb, *data_skb;
b44d211e 1419 int err, ret;
1da177e4
LT
1420
1421 if (flags&MSG_OOB)
1422 return -EOPNOTSUPP;
1423
1424 copied = 0;
1425
6ac552fd
PM
1426 skb = skb_recv_datagram(sk, flags, noblock, &err);
1427 if (skb == NULL)
1da177e4
LT
1428 goto out;
1429
68d6ac6d
JB
1430 data_skb = skb;
1431
1dacc76d
JB
1432#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
1433 if (unlikely(skb_shinfo(skb)->frag_list)) {
1dacc76d 1434 /*
68d6ac6d
JB
1435 * If this skb has a frag_list, then here that means that we
1436 * will have to use the frag_list skb's data for compat tasks
1437 * and the regular skb's data for normal (non-compat) tasks.
1dacc76d 1438 *
68d6ac6d
JB
1439 * If we need to send the compat skb, assign it to the
1440 * 'data_skb' variable so that it will be used below for data
1441 * copying. We keep 'skb' for everything else, including
1442 * freeing both later.
1dacc76d 1443 */
68d6ac6d
JB
1444 if (flags & MSG_CMSG_COMPAT)
1445 data_skb = skb_shinfo(skb)->frag_list;
1dacc76d
JB
1446 }
1447#endif
1448
1da177e4
LT
1449 msg->msg_namelen = 0;
1450
68d6ac6d 1451 copied = data_skb->len;
1da177e4
LT
1452 if (len < copied) {
1453 msg->msg_flags |= MSG_TRUNC;
1454 copied = len;
1455 }
1456
68d6ac6d
JB
1457 skb_reset_transport_header(data_skb);
1458 err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied);
1da177e4
LT
1459
1460 if (msg->msg_name) {
6ac552fd 1461 struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name;
1da177e4
LT
1462 addr->nl_family = AF_NETLINK;
1463 addr->nl_pad = 0;
15e47304 1464 addr->nl_pid = NETLINK_CB(skb).portid;
d629b836 1465 addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group);
1da177e4
LT
1466 msg->msg_namelen = sizeof(*addr);
1467 }
1468
cc9a06cd
PM
1469 if (nlk->flags & NETLINK_RECV_PKTINFO)
1470 netlink_cmsg_recv_pktinfo(msg, skb);
1471
1da177e4
LT
1472 if (NULL == siocb->scm) {
1473 memset(&scm, 0, sizeof(scm));
1474 siocb->scm = &scm;
1475 }
1476 siocb->scm->creds = *NETLINK_CREDS(skb);
188ccb55 1477 if (flags & MSG_TRUNC)
68d6ac6d 1478 copied = data_skb->len;
daa3766e 1479
1da177e4
LT
1480 skb_free_datagram(sk, skb);
1481
b44d211e
AV
1482 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
1483 ret = netlink_dump(sk);
1484 if (ret) {
1485 sk->sk_err = ret;
1486 sk->sk_error_report(sk);
1487 }
1488 }
1da177e4
LT
1489
1490 scm_recv(sock, msg, siocb->scm, flags);
1da177e4
LT
1491out:
1492 netlink_rcv_wake(sk);
1493 return err ? : copied;
1494}
1495
1496static void netlink_data_ready(struct sock *sk, int len)
1497{
cd40b7d3 1498 BUG();
1da177e4
LT
1499}
1500
1501/*
746fac4d 1502 * We export these functions to other modules. They provide a
1da177e4
LT
1503 * complete set of kernel non-blocking support for message
1504 * queueing.
1505 */
1506
1507struct sock *
9f00d977
PNA
1508__netlink_kernel_create(struct net *net, int unit, struct module *module,
1509 struct netlink_kernel_cfg *cfg)
1da177e4
LT
1510{
1511 struct socket *sock;
1512 struct sock *sk;
77247bbb 1513 struct netlink_sock *nlk;
5c398dc8 1514 struct listeners *listeners = NULL;
a31f2d17
PNA
1515 struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL;
1516 unsigned int groups;
1da177e4 1517
fab2caf6 1518 BUG_ON(!nl_table);
1da177e4 1519
6ac552fd 1520 if (unit < 0 || unit >= MAX_LINKS)
1da177e4
LT
1521 return NULL;
1522
1523 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
1524 return NULL;
1525
23fe1866
PE
1526 /*
1527 * We have to just have a reference on the net from sk, but don't
1528 * get_net it. Besides, we cannot get and then put the net here.
1529 * So we create one inside init_net and the move it to net.
1530 */
1531
1532 if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
1533 goto out_sock_release_nosk;
1534
1535 sk = sock->sk;
edf02087 1536 sk_change_net(sk, net);
4fdb3bb7 1537
a31f2d17 1538 if (!cfg || cfg->groups < 32)
4277a083 1539 groups = 32;
a31f2d17
PNA
1540 else
1541 groups = cfg->groups;
4277a083 1542
5c398dc8 1543 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
4277a083
PM
1544 if (!listeners)
1545 goto out_sock_release;
1546
1da177e4 1547 sk->sk_data_ready = netlink_data_ready;
a31f2d17
PNA
1548 if (cfg && cfg->input)
1549 nlk_sk(sk)->netlink_rcv = cfg->input;
1da177e4 1550
b4b51029 1551 if (netlink_insert(sk, net, 0))
77247bbb 1552 goto out_sock_release;
4fdb3bb7 1553
77247bbb
PM
1554 nlk = nlk_sk(sk);
1555 nlk->flags |= NETLINK_KERNEL_SOCKET;
4fdb3bb7 1556
4fdb3bb7 1557 netlink_table_grab();
b4b51029
EB
1558 if (!nl_table[unit].registered) {
1559 nl_table[unit].groups = groups;
5c398dc8 1560 rcu_assign_pointer(nl_table[unit].listeners, listeners);
b4b51029
EB
1561 nl_table[unit].cb_mutex = cb_mutex;
1562 nl_table[unit].module = module;
9785e10a
PNA
1563 if (cfg) {
1564 nl_table[unit].bind = cfg->bind;
1565 nl_table[unit].flags = cfg->flags;
1566 }
b4b51029 1567 nl_table[unit].registered = 1;
f937f1f4
JJ
1568 } else {
1569 kfree(listeners);
869e58f8 1570 nl_table[unit].registered++;
b4b51029 1571 }
4fdb3bb7 1572 netlink_table_ungrab();
77247bbb
PM
1573 return sk;
1574
4fdb3bb7 1575out_sock_release:
4277a083 1576 kfree(listeners);
9dfbec1f 1577 netlink_kernel_release(sk);
23fe1866
PE
1578 return NULL;
1579
1580out_sock_release_nosk:
4fdb3bb7 1581 sock_release(sock);
77247bbb 1582 return NULL;
1da177e4 1583}
9f00d977 1584EXPORT_SYMBOL(__netlink_kernel_create);
b7c6ba6e
DL
1585
1586void
1587netlink_kernel_release(struct sock *sk)
1588{
edf02087 1589 sk_release_kernel(sk);
b7c6ba6e
DL
1590}
1591EXPORT_SYMBOL(netlink_kernel_release);
1592
d136f1bd 1593int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
b4ff4f04 1594{
5c398dc8 1595 struct listeners *new, *old;
b4ff4f04 1596 struct netlink_table *tbl = &nl_table[sk->sk_protocol];
b4ff4f04
JB
1597
1598 if (groups < 32)
1599 groups = 32;
1600
b4ff4f04 1601 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
5c398dc8
ED
1602 new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
1603 if (!new)
d136f1bd 1604 return -ENOMEM;
6d772ac5 1605 old = nl_deref_protected(tbl->listeners);
5c398dc8
ED
1606 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
1607 rcu_assign_pointer(tbl->listeners, new);
1608
37b6b935 1609 kfree_rcu(old, rcu);
b4ff4f04
JB
1610 }
1611 tbl->groups = groups;
1612
d136f1bd
JB
1613 return 0;
1614}
1615
1616/**
1617 * netlink_change_ngroups - change number of multicast groups
1618 *
1619 * This changes the number of multicast groups that are available
1620 * on a certain netlink family. Note that it is not possible to
1621 * change the number of groups to below 32. Also note that it does
1622 * not implicitly call netlink_clear_multicast_users() when the
1623 * number of groups is reduced.
1624 *
1625 * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
1626 * @groups: The new number of groups.
1627 */
1628int netlink_change_ngroups(struct sock *sk, unsigned int groups)
1629{
1630 int err;
1631
1632 netlink_table_grab();
1633 err = __netlink_change_ngroups(sk, groups);
b4ff4f04 1634 netlink_table_ungrab();
d136f1bd 1635
b4ff4f04
JB
1636 return err;
1637}
b4ff4f04 1638
b8273570
JB
1639void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
1640{
1641 struct sock *sk;
b8273570
JB
1642 struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
1643
b67bfe0d 1644 sk_for_each_bound(sk, &tbl->mc_list)
b8273570
JB
1645 netlink_update_socket_mc(nlk_sk(sk), group, 0);
1646}
1647
84659eb5
JB
1648/**
1649 * netlink_clear_multicast_users - kick off multicast listeners
1650 *
1651 * This function removes all listeners from the given group.
1652 * @ksk: The kernel netlink socket, as returned by
1653 * netlink_kernel_create().
1654 * @group: The multicast group to clear.
1655 */
1656void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
1657{
84659eb5 1658 netlink_table_grab();
b8273570 1659 __netlink_clear_multicast_users(ksk, group);
84659eb5
JB
1660 netlink_table_ungrab();
1661}
84659eb5 1662
a46621a3 1663struct nlmsghdr *
15e47304 1664__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags)
a46621a3
DV
1665{
1666 struct nlmsghdr *nlh;
573ce260 1667 int size = nlmsg_msg_size(len);
a46621a3
DV
1668
1669 nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size));
1670 nlh->nlmsg_type = type;
1671 nlh->nlmsg_len = size;
1672 nlh->nlmsg_flags = flags;
15e47304 1673 nlh->nlmsg_pid = portid;
a46621a3
DV
1674 nlh->nlmsg_seq = seq;
1675 if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
573ce260 1676 memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size);
a46621a3
DV
1677 return nlh;
1678}
1679EXPORT_SYMBOL(__nlmsg_put);
1680
1da177e4
LT
1681/*
1682 * It looks a bit ugly.
1683 * It would be better to create kernel thread.
1684 */
1685
1686static int netlink_dump(struct sock *sk)
1687{
1688 struct netlink_sock *nlk = nlk_sk(sk);
1689 struct netlink_callback *cb;
c7ac8679 1690 struct sk_buff *skb = NULL;
1da177e4 1691 struct nlmsghdr *nlh;
bf8b79e4 1692 int len, err = -ENOBUFS;
c7ac8679 1693 int alloc_size;
1da177e4 1694
af65bdfc 1695 mutex_lock(nlk->cb_mutex);
1da177e4
LT
1696
1697 cb = nlk->cb;
1698 if (cb == NULL) {
bf8b79e4
TG
1699 err = -EINVAL;
1700 goto errout_skb;
1da177e4
LT
1701 }
1702
c7ac8679
GR
1703 alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
1704
1705 skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL);
1706 if (!skb)
c63d6ea3 1707 goto errout_skb;
c7ac8679 1708
1da177e4
LT
1709 len = cb->dump(skb, cb);
1710
1711 if (len > 0) {
af65bdfc 1712 mutex_unlock(nlk->cb_mutex);
b1153f29
SH
1713
1714 if (sk_filter(sk, skb))
1715 kfree_skb(skb);
4a7e7c2a
ED
1716 else
1717 __netlink_sendskb(sk, skb);
1da177e4
LT
1718 return 0;
1719 }
1720
bf8b79e4
TG
1721 nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
1722 if (!nlh)
1723 goto errout_skb;
1724
670dc283
JB
1725 nl_dump_check_consistent(cb, nlh);
1726
bf8b79e4
TG
1727 memcpy(nlmsg_data(nlh), &len, sizeof(len));
1728
b1153f29
SH
1729 if (sk_filter(sk, skb))
1730 kfree_skb(skb);
4a7e7c2a
ED
1731 else
1732 __netlink_sendskb(sk, skb);
1da177e4 1733
a8f74b22
TG
1734 if (cb->done)
1735 cb->done(cb);
1da177e4 1736 nlk->cb = NULL;
af65bdfc 1737 mutex_unlock(nlk->cb_mutex);
1da177e4 1738
6dc878a8 1739 module_put(cb->module);
bfb253c9 1740 netlink_consume_callback(cb);
1da177e4 1741 return 0;
1797754e 1742
bf8b79e4 1743errout_skb:
af65bdfc 1744 mutex_unlock(nlk->cb_mutex);
bf8b79e4 1745 kfree_skb(skb);
bf8b79e4 1746 return err;
1da177e4
LT
1747}
1748
6dc878a8
G
1749int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1750 const struct nlmsghdr *nlh,
1751 struct netlink_dump_control *control)
1da177e4
LT
1752{
1753 struct netlink_callback *cb;
1754 struct sock *sk;
1755 struct netlink_sock *nlk;
b44d211e 1756 int ret;
1da177e4 1757
0da974f4 1758 cb = kzalloc(sizeof(*cb), GFP_KERNEL);
1da177e4
LT
1759 if (cb == NULL)
1760 return -ENOBUFS;
1761
80d326fa
PNA
1762 cb->dump = control->dump;
1763 cb->done = control->done;
1da177e4 1764 cb->nlh = nlh;
7175c883 1765 cb->data = control->data;
6dc878a8 1766 cb->module = control->module;
80d326fa 1767 cb->min_dump_alloc = control->min_dump_alloc;
1da177e4
LT
1768 atomic_inc(&skb->users);
1769 cb->skb = skb;
1770
15e47304 1771 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
1da177e4
LT
1772 if (sk == NULL) {
1773 netlink_destroy_callback(cb);
1774 return -ECONNREFUSED;
1775 }
1776 nlk = nlk_sk(sk);
6dc878a8 1777
af65bdfc 1778 mutex_lock(nlk->cb_mutex);
6dc878a8 1779 /* A dump is in progress... */
3f660d66 1780 if (nlk->cb) {
af65bdfc 1781 mutex_unlock(nlk->cb_mutex);
1da177e4 1782 netlink_destroy_callback(cb);
6dc878a8
G
1783 ret = -EBUSY;
1784 goto out;
1da177e4 1785 }
6dc878a8
G
1786 /* add reference of module which cb->dump belongs to */
1787 if (!try_module_get(cb->module)) {
1788 mutex_unlock(nlk->cb_mutex);
1789 netlink_destroy_callback(cb);
1790 ret = -EPROTONOSUPPORT;
1791 goto out;
1792 }
1793
1da177e4 1794 nlk->cb = cb;
af65bdfc 1795 mutex_unlock(nlk->cb_mutex);
1da177e4 1796
b44d211e 1797 ret = netlink_dump(sk);
6dc878a8 1798out:
1da177e4 1799 sock_put(sk);
5c58298c 1800
b44d211e
AV
1801 if (ret)
1802 return ret;
1803
5c58298c
DL
1804 /* We successfully started a dump, by returning -EINTR we
1805 * signal not to send ACK even if it was requested.
1806 */
1807 return -EINTR;
1da177e4 1808}
6dc878a8 1809EXPORT_SYMBOL(__netlink_dump_start);
1da177e4
LT
1810
1811void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1812{
1813 struct sk_buff *skb;
1814 struct nlmsghdr *rep;
1815 struct nlmsgerr *errmsg;
339bf98f 1816 size_t payload = sizeof(*errmsg);
1da177e4 1817
339bf98f
TG
1818 /* error messages get the original request appened */
1819 if (err)
1820 payload += nlmsg_len(nlh);
1da177e4 1821
339bf98f 1822 skb = nlmsg_new(payload, GFP_KERNEL);
1da177e4
LT
1823 if (!skb) {
1824 struct sock *sk;
1825
3b1e0a65 1826 sk = netlink_lookup(sock_net(in_skb->sk),
b4b51029 1827 in_skb->sk->sk_protocol,
15e47304 1828 NETLINK_CB(in_skb).portid);
1da177e4
LT
1829 if (sk) {
1830 sk->sk_err = ENOBUFS;
1831 sk->sk_error_report(sk);
1832 sock_put(sk);
1833 }
1834 return;
1835 }
1836
15e47304 1837 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
5dba93ae 1838 NLMSG_ERROR, payload, 0);
bf8b79e4 1839 errmsg = nlmsg_data(rep);
1da177e4 1840 errmsg->error = err;
bf8b79e4 1841 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
15e47304 1842 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT);
1da177e4 1843}
6ac552fd 1844EXPORT_SYMBOL(netlink_ack);
1da177e4 1845
cd40b7d3 1846int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1d00a4eb 1847 struct nlmsghdr *))
82ace47a 1848{
82ace47a
TG
1849 struct nlmsghdr *nlh;
1850 int err;
1851
1852 while (skb->len >= nlmsg_total_size(0)) {
cd40b7d3
DL
1853 int msglen;
1854
b529ccf2 1855 nlh = nlmsg_hdr(skb);
d35b6856 1856 err = 0;
82ace47a 1857
ad8e4b75 1858 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
82ace47a
TG
1859 return 0;
1860
d35b6856
TG
1861 /* Only requests are handled by the kernel */
1862 if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
5c58298c 1863 goto ack;
45e7ae7f
TG
1864
1865 /* Skip control messages */
1866 if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
5c58298c 1867 goto ack;
d35b6856 1868
1d00a4eb 1869 err = cb(skb, nlh);
5c58298c
DL
1870 if (err == -EINTR)
1871 goto skip;
1872
1873ack:
d35b6856 1874 if (nlh->nlmsg_flags & NLM_F_ACK || err)
82ace47a 1875 netlink_ack(skb, nlh, err);
82ace47a 1876
5c58298c 1877skip:
6ac552fd 1878 msglen = NLMSG_ALIGN(nlh->nlmsg_len);
cd40b7d3
DL
1879 if (msglen > skb->len)
1880 msglen = skb->len;
1881 skb_pull(skb, msglen);
82ace47a
TG
1882 }
1883
1884 return 0;
1885}
6ac552fd 1886EXPORT_SYMBOL(netlink_rcv_skb);
82ace47a 1887
d387f6ad
TG
1888/**
1889 * nlmsg_notify - send a notification netlink message
1890 * @sk: netlink socket to use
1891 * @skb: notification message
15e47304 1892 * @portid: destination netlink portid for reports or 0
d387f6ad
TG
1893 * @group: destination multicast group or 0
1894 * @report: 1 to report back, 0 to disable
1895 * @flags: allocation flags
1896 */
15e47304 1897int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
d387f6ad
TG
1898 unsigned int group, int report, gfp_t flags)
1899{
1900 int err = 0;
1901
1902 if (group) {
15e47304 1903 int exclude_portid = 0;
d387f6ad
TG
1904
1905 if (report) {
1906 atomic_inc(&skb->users);
15e47304 1907 exclude_portid = portid;
d387f6ad
TG
1908 }
1909
1ce85fe4
PNA
1910 /* errors reported via destination sk->sk_err, but propagate
1911 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
15e47304 1912 err = nlmsg_multicast(sk, skb, exclude_portid, group, flags);
d387f6ad
TG
1913 }
1914
1ce85fe4
PNA
1915 if (report) {
1916 int err2;
1917
15e47304 1918 err2 = nlmsg_unicast(sk, skb, portid);
1ce85fe4
PNA
1919 if (!err || err == -ESRCH)
1920 err = err2;
1921 }
d387f6ad
TG
1922
1923 return err;
1924}
6ac552fd 1925EXPORT_SYMBOL(nlmsg_notify);
d387f6ad 1926
1da177e4
LT
1927#ifdef CONFIG_PROC_FS
1928struct nl_seq_iter {
e372c414 1929 struct seq_net_private p;
1da177e4
LT
1930 int link;
1931 int hash_idx;
1932};
1933
1934static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
1935{
1936 struct nl_seq_iter *iter = seq->private;
1937 int i, j;
1938 struct sock *s;
1da177e4
LT
1939 loff_t off = 0;
1940
6ac552fd 1941 for (i = 0; i < MAX_LINKS; i++) {
15e47304 1942 struct nl_portid_hash *hash = &nl_table[i].hash;
1da177e4
LT
1943
1944 for (j = 0; j <= hash->mask; j++) {
b67bfe0d 1945 sk_for_each(s, &hash->table[j]) {
1218854a 1946 if (sock_net(s) != seq_file_net(seq))
b4b51029 1947 continue;
1da177e4
LT
1948 if (off == pos) {
1949 iter->link = i;
1950 iter->hash_idx = j;
1951 return s;
1952 }
1953 ++off;
1954 }
1955 }
1956 }
1957 return NULL;
1958}
1959
1960static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
9a429c49 1961 __acquires(nl_table_lock)
1da177e4
LT
1962{
1963 read_lock(&nl_table_lock);
1964 return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
1965}
1966
1967static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1968{
1969 struct sock *s;
1970 struct nl_seq_iter *iter;
1971 int i, j;
1972
1973 ++*pos;
1974
1975 if (v == SEQ_START_TOKEN)
1976 return netlink_seq_socket_idx(seq, 0);
746fac4d 1977
b4b51029
EB
1978 iter = seq->private;
1979 s = v;
1980 do {
1981 s = sk_next(s);
1218854a 1982 } while (s && sock_net(s) != seq_file_net(seq));
1da177e4
LT
1983 if (s)
1984 return s;
1985
1da177e4
LT
1986 i = iter->link;
1987 j = iter->hash_idx + 1;
1988
1989 do {
15e47304 1990 struct nl_portid_hash *hash = &nl_table[i].hash;
1da177e4
LT
1991
1992 for (; j <= hash->mask; j++) {
1993 s = sk_head(&hash->table[j]);
1218854a 1994 while (s && sock_net(s) != seq_file_net(seq))
b4b51029 1995 s = sk_next(s);
1da177e4
LT
1996 if (s) {
1997 iter->link = i;
1998 iter->hash_idx = j;
1999 return s;
2000 }
2001 }
2002
2003 j = 0;
2004 } while (++i < MAX_LINKS);
2005
2006 return NULL;
2007}
2008
2009static void netlink_seq_stop(struct seq_file *seq, void *v)
9a429c49 2010 __releases(nl_table_lock)
1da177e4
LT
2011{
2012 read_unlock(&nl_table_lock);
2013}
2014
2015
2016static int netlink_seq_show(struct seq_file *seq, void *v)
2017{
658cb354 2018 if (v == SEQ_START_TOKEN) {
1da177e4
LT
2019 seq_puts(seq,
2020 "sk Eth Pid Groups "
cf0aa4e0 2021 "Rmem Wmem Dump Locks Drops Inode\n");
658cb354 2022 } else {
1da177e4
LT
2023 struct sock *s = v;
2024 struct netlink_sock *nlk = nlk_sk(s);
2025
9f1e0ad0 2026 seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %pK %-8d %-8d %-8lu\n",
1da177e4
LT
2027 s,
2028 s->sk_protocol,
15e47304 2029 nlk->portid,
513c2500 2030 nlk->groups ? (u32)nlk->groups[0] : 0,
31e6d363
ED
2031 sk_rmem_alloc_get(s),
2032 sk_wmem_alloc_get(s),
1da177e4 2033 nlk->cb,
38938bfe 2034 atomic_read(&s->sk_refcnt),
cf0aa4e0
MY
2035 atomic_read(&s->sk_drops),
2036 sock_i_ino(s)
1da177e4
LT
2037 );
2038
2039 }
2040 return 0;
2041}
2042
56b3d975 2043static const struct seq_operations netlink_seq_ops = {
1da177e4
LT
2044 .start = netlink_seq_start,
2045 .next = netlink_seq_next,
2046 .stop = netlink_seq_stop,
2047 .show = netlink_seq_show,
2048};
2049
2050
2051static int netlink_seq_open(struct inode *inode, struct file *file)
2052{
e372c414
DL
2053 return seq_open_net(inode, file, &netlink_seq_ops,
2054 sizeof(struct nl_seq_iter));
b4b51029
EB
2055}
2056
da7071d7 2057static const struct file_operations netlink_seq_fops = {
1da177e4
LT
2058 .owner = THIS_MODULE,
2059 .open = netlink_seq_open,
2060 .read = seq_read,
2061 .llseek = seq_lseek,
e372c414 2062 .release = seq_release_net,
1da177e4
LT
2063};
2064
2065#endif
2066
2067int netlink_register_notifier(struct notifier_block *nb)
2068{
e041c683 2069 return atomic_notifier_chain_register(&netlink_chain, nb);
1da177e4 2070}
6ac552fd 2071EXPORT_SYMBOL(netlink_register_notifier);
1da177e4
LT
2072
2073int netlink_unregister_notifier(struct notifier_block *nb)
2074{
e041c683 2075 return atomic_notifier_chain_unregister(&netlink_chain, nb);
1da177e4 2076}
6ac552fd 2077EXPORT_SYMBOL(netlink_unregister_notifier);
746fac4d 2078
90ddc4f0 2079static const struct proto_ops netlink_ops = {
1da177e4
LT
2080 .family = PF_NETLINK,
2081 .owner = THIS_MODULE,
2082 .release = netlink_release,
2083 .bind = netlink_bind,
2084 .connect = netlink_connect,
2085 .socketpair = sock_no_socketpair,
2086 .accept = sock_no_accept,
2087 .getname = netlink_getname,
2088 .poll = datagram_poll,
2089 .ioctl = sock_no_ioctl,
2090 .listen = sock_no_listen,
2091 .shutdown = sock_no_shutdown,
9a4595bc
PM
2092 .setsockopt = netlink_setsockopt,
2093 .getsockopt = netlink_getsockopt,
1da177e4
LT
2094 .sendmsg = netlink_sendmsg,
2095 .recvmsg = netlink_recvmsg,
2096 .mmap = sock_no_mmap,
2097 .sendpage = sock_no_sendpage,
2098};
2099
ec1b4cf7 2100static const struct net_proto_family netlink_family_ops = {
1da177e4
LT
2101 .family = PF_NETLINK,
2102 .create = netlink_create,
2103 .owner = THIS_MODULE, /* for consistency 8) */
2104};
2105
4665079c 2106static int __net_init netlink_net_init(struct net *net)
b4b51029
EB
2107{
2108#ifdef CONFIG_PROC_FS
d4beaa66 2109 if (!proc_create("netlink", 0, net->proc_net, &netlink_seq_fops))
b4b51029
EB
2110 return -ENOMEM;
2111#endif
2112 return 0;
2113}
2114
4665079c 2115static void __net_exit netlink_net_exit(struct net *net)
b4b51029
EB
2116{
2117#ifdef CONFIG_PROC_FS
ece31ffd 2118 remove_proc_entry("netlink", net->proc_net);
b4b51029
EB
2119#endif
2120}
2121
b963ea89
DM
2122static void __init netlink_add_usersock_entry(void)
2123{
5c398dc8 2124 struct listeners *listeners;
b963ea89
DM
2125 int groups = 32;
2126
5c398dc8 2127 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
b963ea89 2128 if (!listeners)
5c398dc8 2129 panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
b963ea89
DM
2130
2131 netlink_table_grab();
2132
2133 nl_table[NETLINK_USERSOCK].groups = groups;
5c398dc8 2134 rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
b963ea89
DM
2135 nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
2136 nl_table[NETLINK_USERSOCK].registered = 1;
9785e10a 2137 nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND;
b963ea89
DM
2138
2139 netlink_table_ungrab();
2140}
2141
022cbae6 2142static struct pernet_operations __net_initdata netlink_net_ops = {
b4b51029
EB
2143 .init = netlink_net_init,
2144 .exit = netlink_net_exit,
2145};
2146
1da177e4
LT
2147static int __init netlink_proto_init(void)
2148{
1da177e4 2149 int i;
26ff5ddc 2150 unsigned long limit;
1da177e4
LT
2151 unsigned int order;
2152 int err = proto_register(&netlink_proto, 0);
2153
2154 if (err != 0)
2155 goto out;
2156
fab25745 2157 BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
1da177e4 2158
0da974f4 2159 nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
fab2caf6
AM
2160 if (!nl_table)
2161 goto panic;
1da177e4 2162
4481374c
JB
2163 if (totalram_pages >= (128 * 1024))
2164 limit = totalram_pages >> (21 - PAGE_SHIFT);
1da177e4 2165 else
4481374c 2166 limit = totalram_pages >> (23 - PAGE_SHIFT);
1da177e4 2167
26ff5ddc
DC
2168 order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
2169 limit = (1UL << order) / sizeof(struct hlist_head);
2170 order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1;
1da177e4
LT
2171
2172 for (i = 0; i < MAX_LINKS; i++) {
15e47304 2173 struct nl_portid_hash *hash = &nl_table[i].hash;
1da177e4 2174
15e47304 2175 hash->table = nl_portid_hash_zalloc(1 * sizeof(*hash->table));
1da177e4
LT
2176 if (!hash->table) {
2177 while (i-- > 0)
15e47304 2178 nl_portid_hash_free(nl_table[i].hash.table,
1da177e4
LT
2179 1 * sizeof(*hash->table));
2180 kfree(nl_table);
fab2caf6 2181 goto panic;
1da177e4 2182 }
1da177e4
LT
2183 hash->max_shift = order;
2184 hash->shift = 0;
2185 hash->mask = 0;
2186 hash->rehash_time = jiffies;
2187 }
2188
b963ea89
DM
2189 netlink_add_usersock_entry();
2190
1da177e4 2191 sock_register(&netlink_family_ops);
b4b51029 2192 register_pernet_subsys(&netlink_net_ops);
746fac4d 2193 /* The netlink device handler may be needed early. */
1da177e4
LT
2194 rtnetlink_init();
2195out:
2196 return err;
fab2caf6
AM
2197panic:
2198 panic("netlink_init: Cannot allocate nl_table\n");
1da177e4
LT
2199}
2200
1da177e4 2201core_initcall(netlink_proto_init);