]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/ipv4/ipmr.c
UBUNTU: Ubuntu-4.15.0-96.97
[mirror_ubuntu-bionic-kernel.git] / net / ipv4 / ipmr.c
CommitLineData
1da177e4
LT
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
113aa838 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
1da177e4
LT
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
f77f13e2 25 * Relax this requirement to work with older peers.
1da177e4
LT
26 *
27 */
28
7c0f6ba6 29#include <linux/uaccess.h>
1da177e4 30#include <linux/types.h>
4fc268d2 31#include <linux/capability.h>
1da177e4
LT
32#include <linux/errno.h>
33#include <linux/timer.h>
34#include <linux/mm.h>
35#include <linux/kernel.h>
36#include <linux/fcntl.h>
37#include <linux/stat.h>
38#include <linux/socket.h>
39#include <linux/in.h>
40#include <linux/inet.h>
41#include <linux/netdevice.h>
42#include <linux/inetdevice.h>
43#include <linux/igmp.h>
44#include <linux/proc_fs.h>
45#include <linux/seq_file.h>
46#include <linux/mroute.h>
47#include <linux/init.h>
46f25dff 48#include <linux/if_ether.h>
5a0e3ad6 49#include <linux/slab.h>
457c4cbc 50#include <net/net_namespace.h>
1da177e4
LT
51#include <net/ip.h>
52#include <net/protocol.h>
53#include <linux/skbuff.h>
14c85021 54#include <net/route.h>
1da177e4
LT
55#include <net/sock.h>
56#include <net/icmp.h>
57#include <net/udp.h>
58#include <net/raw.h>
59#include <linux/notifier.h>
60#include <linux/if_arp.h>
61#include <linux/netfilter_ipv4.h>
709b46e8 62#include <linux/compat.h>
bc3b2d7f 63#include <linux/export.h>
c5441932 64#include <net/ip_tunnels.h>
1da177e4 65#include <net/checksum.h>
dc5fc579 66#include <net/netlink.h>
f0ad0860 67#include <net/fib_rules.h>
d67b8c61 68#include <linux/netconf.h>
ccbb0aa6 69#include <net/nexthop.h>
5d8b3e69 70#include <net/switchdev.h>
1da177e4 71
ee114998
GS
72#include <linux/nospec.h>
73
f0ad0860
PM
74struct ipmr_rule {
75 struct fib_rule common;
76};
77
78struct ipmr_result {
79 struct mr_table *mrt;
80};
81
1da177e4 82/* Big lock, protecting vif table, mrt cache and mroute socket state.
a8cb16dd 83 * Note that the changes are semaphored via rtnl_lock.
1da177e4
LT
84 */
85
86static DEFINE_RWLOCK(mrt_lock);
87
7ef8f65d 88/* Multicast router control variables */
1da177e4 89
1da177e4
LT
90/* Special spinlock for queue of unresolved entries */
91static DEFINE_SPINLOCK(mfc_unres_lock);
92
93/* We return to original Alan's scheme. Hash table of resolved
a8cb16dd
ED
94 * entries is changed only in process context and protected
95 * with weak lock mrt_lock. Queue of unresolved entries is protected
96 * with strong spinlock mfc_unres_lock.
97 *
98 * In this case data path is free of exclusive locks at all.
1da177e4
LT
99 */
100
e18b890b 101static struct kmem_cache *mrt_cachep __read_mostly;
1da177e4 102
f0ad0860 103static struct mr_table *ipmr_new_table(struct net *net, u32 id);
acbb219d
FR
104static void ipmr_free_table(struct mr_table *mrt);
105
c4854ec8 106static void ip_mr_forward(struct net *net, struct mr_table *mrt,
4b1f0d33
DS
107 struct net_device *dev, struct sk_buff *skb,
108 struct mfc_cache *cache, int local);
0c12295a 109static int ipmr_cache_report(struct mr_table *mrt,
4feb88e5 110 struct sk_buff *pkt, vifi_t vifi, int assert);
cb6a4e46
PM
111static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
112 struct mfc_cache *c, struct rtmsg *rtm);
8cd3ac9f
ND
113static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
114 int cmd);
5a645dd8 115static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
0e615e96 116static void mroute_clean_tables(struct mr_table *mrt, bool all);
e99e88a9 117static void ipmr_expire_process(struct timer_list *t);
f0ad0860
PM
118
119#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
120#define ipmr_for_each_table(mrt, net) \
121 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
122
123static struct mr_table *ipmr_get_table(struct net *net, u32 id)
124{
125 struct mr_table *mrt;
126
127 ipmr_for_each_table(mrt, net) {
128 if (mrt->id == id)
129 return mrt;
130 }
131 return NULL;
132}
133
da91981b 134static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
f0ad0860
PM
135 struct mr_table **mrt)
136{
f0ad0860 137 int err;
95f4a45d
HFS
138 struct ipmr_result res;
139 struct fib_lookup_arg arg = {
140 .result = &res,
141 .flags = FIB_LOOKUP_NOREF,
142 };
f0ad0860 143
e58e4159
DA
144 /* update flow if oif or iif point to device enslaved to l3mdev */
145 l3mdev_update_flow(net, flowi4_to_flowi(flp4));
146
da91981b
DM
147 err = fib_rules_lookup(net->ipv4.mr_rules_ops,
148 flowi4_to_flowi(flp4), 0, &arg);
f0ad0860
PM
149 if (err < 0)
150 return err;
151 *mrt = res.mrt;
152 return 0;
153}
154
155static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
156 int flags, struct fib_lookup_arg *arg)
157{
158 struct ipmr_result *res = arg->result;
159 struct mr_table *mrt;
1da177e4 160
f0ad0860
PM
161 switch (rule->action) {
162 case FR_ACT_TO_TBL:
163 break;
164 case FR_ACT_UNREACHABLE:
165 return -ENETUNREACH;
166 case FR_ACT_PROHIBIT:
167 return -EACCES;
168 case FR_ACT_BLACKHOLE:
169 default:
170 return -EINVAL;
171 }
172
e58e4159
DA
173 arg->table = fib_rule_get_table(rule, arg);
174
175 mrt = ipmr_get_table(rule->fr_net, arg->table);
51456b29 176 if (!mrt)
f0ad0860
PM
177 return -EAGAIN;
178 res->mrt = mrt;
179 return 0;
180}
181
182static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
183{
184 return 1;
185}
186
187static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
188 FRA_GENERIC_POLICY,
189};
190
191static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
192 struct fib_rule_hdr *frh, struct nlattr **tb)
193{
194 return 0;
195}
196
197static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
198 struct nlattr **tb)
199{
200 return 1;
201}
202
203static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
204 struct fib_rule_hdr *frh)
205{
206 frh->dst_len = 0;
207 frh->src_len = 0;
208 frh->tos = 0;
209 return 0;
210}
211
04a6f82c 212static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = {
25239cee 213 .family = RTNL_FAMILY_IPMR,
f0ad0860
PM
214 .rule_size = sizeof(struct ipmr_rule),
215 .addr_size = sizeof(u32),
216 .action = ipmr_rule_action,
217 .match = ipmr_rule_match,
218 .configure = ipmr_rule_configure,
219 .compare = ipmr_rule_compare,
f0ad0860
PM
220 .fill = ipmr_rule_fill,
221 .nlgroup = RTNLGRP_IPV4_RULE,
222 .policy = ipmr_rule_policy,
223 .owner = THIS_MODULE,
224};
225
226static int __net_init ipmr_rules_init(struct net *net)
227{
228 struct fib_rules_ops *ops;
229 struct mr_table *mrt;
230 int err;
231
232 ops = fib_rules_register(&ipmr_rules_ops_template, net);
233 if (IS_ERR(ops))
234 return PTR_ERR(ops);
235
236 INIT_LIST_HEAD(&net->ipv4.mr_tables);
237
238 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
1113ebbc
NA
239 if (IS_ERR(mrt)) {
240 err = PTR_ERR(mrt);
f0ad0860
PM
241 goto err1;
242 }
243
244 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
245 if (err < 0)
246 goto err2;
247
248 net->ipv4.mr_rules_ops = ops;
249 return 0;
250
251err2:
f243e5a7 252 ipmr_free_table(mrt);
f0ad0860
PM
253err1:
254 fib_rules_unregister(ops);
255 return err;
256}
257
258static void __net_exit ipmr_rules_exit(struct net *net)
259{
260 struct mr_table *mrt, *next;
261
ed785309 262 rtnl_lock();
035320d5
ED
263 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
264 list_del(&mrt->list);
acbb219d 265 ipmr_free_table(mrt);
035320d5 266 }
f0ad0860 267 fib_rules_unregister(net->ipv4.mr_rules_ops);
419df12f 268 rtnl_unlock();
f0ad0860 269}
4d65b948
YG
270
271static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
272{
273 return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR);
274}
275
276static unsigned int ipmr_rules_seq_read(struct net *net)
277{
278 return fib_rules_seq_read(net, RTNL_FAMILY_IPMR);
279}
478e4c2f
YG
280
281bool ipmr_rule_default(const struct fib_rule *rule)
282{
283 return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT;
284}
285EXPORT_SYMBOL(ipmr_rule_default);
f0ad0860
PM
286#else
287#define ipmr_for_each_table(mrt, net) \
288 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
289
290static struct mr_table *ipmr_get_table(struct net *net, u32 id)
291{
292 return net->ipv4.mrt;
293}
294
da91981b 295static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
f0ad0860
PM
296 struct mr_table **mrt)
297{
298 *mrt = net->ipv4.mrt;
299 return 0;
300}
301
302static int __net_init ipmr_rules_init(struct net *net)
303{
1113ebbc
NA
304 struct mr_table *mrt;
305
306 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
307 if (IS_ERR(mrt))
308 return PTR_ERR(mrt);
309 net->ipv4.mrt = mrt;
310 return 0;
f0ad0860
PM
311}
312
313static void __net_exit ipmr_rules_exit(struct net *net)
314{
ed785309 315 rtnl_lock();
acbb219d 316 ipmr_free_table(net->ipv4.mrt);
ed785309
WC
317 net->ipv4.mrt = NULL;
318 rtnl_unlock();
f0ad0860 319}
4d65b948
YG
320
321static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
322{
323 return 0;
324}
325
326static unsigned int ipmr_rules_seq_read(struct net *net)
327{
328 return 0;
329}
478e4c2f
YG
330
331bool ipmr_rule_default(const struct fib_rule *rule)
332{
333 return true;
334}
335EXPORT_SYMBOL(ipmr_rule_default);
f0ad0860
PM
336#endif
337
8fb472c0
NA
338static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
339 const void *ptr)
340{
341 const struct mfc_cache_cmp_arg *cmparg = arg->key;
342 struct mfc_cache *c = (struct mfc_cache *)ptr;
343
344 return cmparg->mfc_mcastgrp != c->mfc_mcastgrp ||
345 cmparg->mfc_origin != c->mfc_origin;
346}
347
348static const struct rhashtable_params ipmr_rht_params = {
349 .head_offset = offsetof(struct mfc_cache, mnode),
350 .key_offset = offsetof(struct mfc_cache, cmparg),
351 .key_len = sizeof(struct mfc_cache_cmp_arg),
352 .nelem_hint = 3,
353 .locks_mul = 1,
354 .obj_cmpfn = ipmr_hash_cmp,
355 .automatic_shrinking = true,
356};
357
f0ad0860
PM
358static struct mr_table *ipmr_new_table(struct net *net, u32 id)
359{
360 struct mr_table *mrt;
5d768e8e 361 int err;
1da177e4 362
1113ebbc
NA
363 /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */
364 if (id != RT_TABLE_DEFAULT && id >= 1000000000)
365 return ERR_PTR(-EINVAL);
366
f0ad0860 367 mrt = ipmr_get_table(net, id);
00db4124 368 if (mrt)
f0ad0860
PM
369 return mrt;
370
371 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
51456b29 372 if (!mrt)
1113ebbc 373 return ERR_PTR(-ENOMEM);
8de53dfb 374 write_pnet(&mrt->net, net);
f0ad0860
PM
375 mrt->id = id;
376
5d768e8e
ED
377 err = rhltable_init(&mrt->mfc_hash, &ipmr_rht_params);
378 if (err) {
379 kfree(mrt);
380 return ERR_PTR(err);
381 }
8fb472c0 382 INIT_LIST_HEAD(&mrt->mfc_cache_list);
f0ad0860
PM
383 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
384
e99e88a9 385 timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
f0ad0860 386
f0ad0860 387 mrt->mroute_reg_vif_num = -1;
f0ad0860
PM
388#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
389 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
390#endif
391 return mrt;
392}
1da177e4 393
acbb219d
FR
394static void ipmr_free_table(struct mr_table *mrt)
395{
396 del_timer_sync(&mrt->ipmr_expire_timer);
0e615e96 397 mroute_clean_tables(mrt, true);
8fb472c0 398 rhltable_destroy(&mrt->mfc_hash);
acbb219d
FR
399 kfree(mrt);
400}
401
1da177e4
LT
402/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
403
d607032d
WC
404static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
405{
4feb88e5
BT
406 struct net *net = dev_net(dev);
407
d607032d
WC
408 dev_close(dev);
409
4feb88e5 410 dev = __dev_get_by_name(net, "tunl0");
d607032d 411 if (dev) {
5bc3eb7e 412 const struct net_device_ops *ops = dev->netdev_ops;
d607032d 413 struct ifreq ifr;
d607032d
WC
414 struct ip_tunnel_parm p;
415
416 memset(&p, 0, sizeof(p));
417 p.iph.daddr = v->vifc_rmt_addr.s_addr;
418 p.iph.saddr = v->vifc_lcl_addr.s_addr;
419 p.iph.version = 4;
420 p.iph.ihl = 5;
421 p.iph.protocol = IPPROTO_IPIP;
422 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
423 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
424
5bc3eb7e
SH
425 if (ops->ndo_do_ioctl) {
426 mm_segment_t oldfs = get_fs();
427
428 set_fs(KERNEL_DS);
429 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
430 set_fs(oldfs);
431 }
d607032d
WC
432 }
433}
434
a0b47736
NA
435/* Initialize ipmr pimreg/tunnel in_device */
436static bool ipmr_init_vif_indev(const struct net_device *dev)
437{
438 struct in_device *in_dev;
439
440 ASSERT_RTNL();
441
442 in_dev = __in_dev_get_rtnl(dev);
443 if (!in_dev)
444 return false;
445 ipv4_devconf_setall(in_dev);
446 neigh_parms_data_state_setall(in_dev->arp_parms);
447 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
448
449 return true;
450}
451
7ef8f65d 452static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
1da177e4
LT
453{
454 struct net_device *dev;
455
4feb88e5 456 dev = __dev_get_by_name(net, "tunl0");
1da177e4
LT
457
458 if (dev) {
5bc3eb7e 459 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
460 int err;
461 struct ifreq ifr;
1da177e4 462 struct ip_tunnel_parm p;
1da177e4
LT
463
464 memset(&p, 0, sizeof(p));
465 p.iph.daddr = v->vifc_rmt_addr.s_addr;
466 p.iph.saddr = v->vifc_lcl_addr.s_addr;
467 p.iph.version = 4;
468 p.iph.ihl = 5;
469 p.iph.protocol = IPPROTO_IPIP;
470 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ba93ef74 471 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1da177e4 472
5bc3eb7e
SH
473 if (ops->ndo_do_ioctl) {
474 mm_segment_t oldfs = get_fs();
475
476 set_fs(KERNEL_DS);
477 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
478 set_fs(oldfs);
a8cb16dd 479 } else {
5bc3eb7e 480 err = -EOPNOTSUPP;
a8cb16dd 481 }
1da177e4
LT
482 dev = NULL;
483
4feb88e5
BT
484 if (err == 0 &&
485 (dev = __dev_get_by_name(net, p.name)) != NULL) {
1da177e4 486 dev->flags |= IFF_MULTICAST;
a0b47736 487 if (!ipmr_init_vif_indev(dev))
1da177e4 488 goto failure;
1da177e4
LT
489 if (dev_open(dev))
490 goto failure;
7dc00c82 491 dev_hold(dev);
1da177e4
LT
492 }
493 }
494 return dev;
495
496failure:
1da177e4
LT
497 unregister_netdevice(dev);
498 return NULL;
499}
500
c316c629 501#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
6fef4c0c 502static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 503{
4feb88e5 504 struct net *net = dev_net(dev);
f0ad0860 505 struct mr_table *mrt;
da91981b
DM
506 struct flowi4 fl4 = {
507 .flowi4_oif = dev->ifindex,
6a662719 508 .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
da91981b 509 .flowi4_mark = skb->mark,
f0ad0860
PM
510 };
511 int err;
512
da91981b 513 err = ipmr_fib_lookup(net, &fl4, &mrt);
e40dbc51
BG
514 if (err < 0) {
515 kfree_skb(skb);
f0ad0860 516 return err;
e40dbc51 517 }
4feb88e5 518
1da177e4 519 read_lock(&mrt_lock);
cf3677ae
PE
520 dev->stats.tx_bytes += skb->len;
521 dev->stats.tx_packets++;
0c12295a 522 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
1da177e4
LT
523 read_unlock(&mrt_lock);
524 kfree_skb(skb);
6ed10654 525 return NETDEV_TX_OK;
1da177e4
LT
526}
527
ee9b9596
ND
528static int reg_vif_get_iflink(const struct net_device *dev)
529{
530 return 0;
531}
532
007c3838
SH
533static const struct net_device_ops reg_vif_netdev_ops = {
534 .ndo_start_xmit = reg_vif_xmit,
ee9b9596 535 .ndo_get_iflink = reg_vif_get_iflink,
007c3838
SH
536};
537
1da177e4
LT
538static void reg_vif_setup(struct net_device *dev)
539{
540 dev->type = ARPHRD_PIMREG;
46f25dff 541 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
1da177e4 542 dev->flags = IFF_NOARP;
70cb4a45 543 dev->netdev_ops = &reg_vif_netdev_ops;
cf124db5 544 dev->needs_free_netdev = true;
403dbb97 545 dev->features |= NETIF_F_NETNS_LOCAL;
1da177e4
LT
546}
547
f0ad0860 548static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
1da177e4
LT
549{
550 struct net_device *dev;
f0ad0860 551 char name[IFNAMSIZ];
1da177e4 552
f0ad0860
PM
553 if (mrt->id == RT_TABLE_DEFAULT)
554 sprintf(name, "pimreg");
555 else
556 sprintf(name, "pimreg%u", mrt->id);
1da177e4 557
c835a677 558 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
1da177e4 559
51456b29 560 if (!dev)
1da177e4
LT
561 return NULL;
562
403dbb97
TG
563 dev_net_set(dev, net);
564
1da177e4
LT
565 if (register_netdevice(dev)) {
566 free_netdev(dev);
567 return NULL;
568 }
1da177e4 569
a0b47736 570 if (!ipmr_init_vif_indev(dev))
1da177e4 571 goto failure;
1da177e4
LT
572 if (dev_open(dev))
573 goto failure;
574
7dc00c82
WC
575 dev_hold(dev);
576
1da177e4
LT
577 return dev;
578
579failure:
1da177e4
LT
580 unregister_netdevice(dev);
581 return NULL;
582}
c316c629
NA
583
584/* called with rcu_read_lock() */
585static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
586 unsigned int pimlen)
587{
588 struct net_device *reg_dev = NULL;
589 struct iphdr *encap;
590
591 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
7ef8f65d 592 /* Check that:
c316c629
NA
593 * a. packet is really sent to a multicast group
594 * b. packet is not a NULL-REGISTER
595 * c. packet is not truncated
596 */
597 if (!ipv4_is_multicast(encap->daddr) ||
598 encap->tot_len == 0 ||
599 ntohs(encap->tot_len) + pimlen > skb->len)
600 return 1;
601
602 read_lock(&mrt_lock);
603 if (mrt->mroute_reg_vif_num >= 0)
604 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
605 read_unlock(&mrt_lock);
606
607 if (!reg_dev)
608 return 1;
609
610 skb->mac_header = skb->network_header;
611 skb_pull(skb, (u8 *)encap - skb->data);
612 skb_reset_network_header(skb);
613 skb->protocol = htons(ETH_P_IP);
614 skb->ip_summed = CHECKSUM_NONE;
615
616 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
617
618 netif_rx(skb);
619
620 return NET_RX_SUCCESS;
621}
622#else
623static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
624{
625 return NULL;
626}
1da177e4
LT
627#endif
628
4d65b948
YG
629static int call_ipmr_vif_entry_notifier(struct notifier_block *nb,
630 struct net *net,
631 enum fib_event_type event_type,
632 struct vif_device *vif,
633 vifi_t vif_index, u32 tb_id)
634{
635 struct vif_entry_notifier_info info = {
636 .info = {
637 .family = RTNL_FAMILY_IPMR,
638 .net = net,
639 },
640 .dev = vif->dev,
641 .vif_index = vif_index,
642 .vif_flags = vif->flags,
643 .tb_id = tb_id,
644 };
645
646 return call_fib_notifier(nb, net, event_type, &info.info);
647}
648
b362053a
YG
649static int call_ipmr_vif_entry_notifiers(struct net *net,
650 enum fib_event_type event_type,
651 struct vif_device *vif,
652 vifi_t vif_index, u32 tb_id)
653{
654 struct vif_entry_notifier_info info = {
655 .info = {
656 .family = RTNL_FAMILY_IPMR,
657 .net = net,
658 },
659 .dev = vif->dev,
660 .vif_index = vif_index,
661 .vif_flags = vif->flags,
662 .tb_id = tb_id,
663 };
664
665 ASSERT_RTNL();
666 net->ipv4.ipmr_seq++;
667 return call_fib_notifiers(net, event_type, &info.info);
668}
669
4d65b948
YG
670static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
671 struct net *net,
672 enum fib_event_type event_type,
673 struct mfc_cache *mfc, u32 tb_id)
674{
675 struct mfc_entry_notifier_info info = {
676 .info = {
677 .family = RTNL_FAMILY_IPMR,
678 .net = net,
679 },
680 .mfc = mfc,
681 .tb_id = tb_id
682 };
683
684 return call_fib_notifier(nb, net, event_type, &info.info);
685}
686
b362053a
YG
687static int call_ipmr_mfc_entry_notifiers(struct net *net,
688 enum fib_event_type event_type,
689 struct mfc_cache *mfc, u32 tb_id)
690{
691 struct mfc_entry_notifier_info info = {
692 .info = {
693 .family = RTNL_FAMILY_IPMR,
694 .net = net,
695 },
696 .mfc = mfc,
697 .tb_id = tb_id
698 };
699
700 ASSERT_RTNL();
701 net->ipv4.ipmr_seq++;
702 return call_fib_notifiers(net, event_type, &info.info);
703}
704
2c53040f
BH
705/**
706 * vif_delete - Delete a VIF entry
7dc00c82 707 * @notify: Set to 1, if the caller is a notifier_call
1da177e4 708 */
0c12295a 709static int vif_delete(struct mr_table *mrt, int vifi, int notify,
d17fa6fa 710 struct list_head *head)
1da177e4 711{
b362053a 712 struct net *net = read_pnet(&mrt->net);
1da177e4
LT
713 struct vif_device *v;
714 struct net_device *dev;
715 struct in_device *in_dev;
716
0c12295a 717 if (vifi < 0 || vifi >= mrt->maxvif)
1da177e4
LT
718 return -EADDRNOTAVAIL;
719
0c12295a 720 v = &mrt->vif_table[vifi];
1da177e4 721
b362053a
YG
722 if (VIF_EXISTS(mrt, vifi))
723 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi,
724 mrt->id);
725
1da177e4
LT
726 write_lock_bh(&mrt_lock);
727 dev = v->dev;
728 v->dev = NULL;
729
730 if (!dev) {
731 write_unlock_bh(&mrt_lock);
732 return -EADDRNOTAVAIL;
733 }
734
0c12295a
PM
735 if (vifi == mrt->mroute_reg_vif_num)
736 mrt->mroute_reg_vif_num = -1;
1da177e4 737
a8cb16dd 738 if (vifi + 1 == mrt->maxvif) {
1da177e4 739 int tmp;
a8cb16dd
ED
740
741 for (tmp = vifi - 1; tmp >= 0; tmp--) {
0c12295a 742 if (VIF_EXISTS(mrt, tmp))
1da177e4
LT
743 break;
744 }
0c12295a 745 mrt->maxvif = tmp+1;
1da177e4
LT
746 }
747
748 write_unlock_bh(&mrt_lock);
749
750 dev_set_allmulti(dev, -1);
751
a8cb16dd
ED
752 in_dev = __in_dev_get_rtnl(dev);
753 if (in_dev) {
42f811b8 754 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
3b022865 755 inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
d67b8c61
ND
756 NETCONFA_MC_FORWARDING,
757 dev->ifindex, &in_dev->cnf);
1da177e4
LT
758 ip_rt_multicast_event(in_dev);
759 }
760
a8cb16dd 761 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify)
d17fa6fa 762 unregister_netdevice_queue(dev, head);
1da177e4
LT
763
764 dev_put(dev);
765 return 0;
766}
767
a8c9486b 768static void ipmr_cache_free_rcu(struct rcu_head *head)
5c0a66f5 769{
a8c9486b
ED
770 struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
771
5c0a66f5
BT
772 kmem_cache_free(mrt_cachep, c);
773}
774
310ebbba 775void ipmr_cache_free(struct mfc_cache *c)
a8c9486b
ED
776{
777 call_rcu(&c->rcu, ipmr_cache_free_rcu);
778}
310ebbba 779EXPORT_SYMBOL(ipmr_cache_free);
a8c9486b 780
1da177e4 781/* Destroy an unresolved cache entry, killing queued skbs
a8cb16dd 782 * and reporting error to netlink readers.
1da177e4 783 */
0c12295a 784static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
1da177e4 785{
8de53dfb 786 struct net *net = read_pnet(&mrt->net);
1da177e4 787 struct sk_buff *skb;
9ef1d4c7 788 struct nlmsgerr *e;
1da177e4 789
0c12295a 790 atomic_dec(&mrt->cache_resolve_queue_len);
1da177e4 791
c354e124 792 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
eddc9ec5 793 if (ip_hdr(skb)->version == 0) {
af72868b
JB
794 struct nlmsghdr *nlh = skb_pull(skb,
795 sizeof(struct iphdr));
1da177e4 796 nlh->nlmsg_type = NLMSG_ERROR;
573ce260 797 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1da177e4 798 skb_trim(skb, nlh->nlmsg_len);
573ce260 799 e = nlmsg_data(nlh);
9ef1d4c7
PM
800 e->error = -ETIMEDOUT;
801 memset(&e->msg, 0, sizeof(e->msg));
2942e900 802
15e47304 803 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
a8cb16dd 804 } else {
1da177e4 805 kfree_skb(skb);
a8cb16dd 806 }
1da177e4
LT
807 }
808
5c0a66f5 809 ipmr_cache_free(c);
1da177e4
LT
810}
811
e258beb2 812/* Timer process for the unresolved queue. */
e99e88a9 813static void ipmr_expire_process(struct timer_list *t)
1da177e4 814{
e99e88a9 815 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
1da177e4
LT
816 unsigned long now;
817 unsigned long expires;
862465f2 818 struct mfc_cache *c, *next;
1da177e4
LT
819
820 if (!spin_trylock(&mfc_unres_lock)) {
0c12295a 821 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
1da177e4
LT
822 return;
823 }
824
0c12295a 825 if (list_empty(&mrt->mfc_unres_queue))
1da177e4
LT
826 goto out;
827
828 now = jiffies;
829 expires = 10*HZ;
1da177e4 830
0c12295a 831 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1da177e4
LT
832 if (time_after(c->mfc_un.unres.expires, now)) {
833 unsigned long interval = c->mfc_un.unres.expires - now;
834 if (interval < expires)
835 expires = interval;
1da177e4
LT
836 continue;
837 }
838
862465f2 839 list_del(&c->list);
8cd3ac9f 840 mroute_netlink_event(mrt, c, RTM_DELROUTE);
0c12295a 841 ipmr_destroy_unres(mrt, c);
1da177e4
LT
842 }
843
0c12295a
PM
844 if (!list_empty(&mrt->mfc_unres_queue))
845 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
1da177e4
LT
846
847out:
848 spin_unlock(&mfc_unres_lock);
849}
850
851/* Fill oifs list. It is called under write locked mrt_lock. */
0c12295a 852static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
d658f8a0 853 unsigned char *ttls)
1da177e4
LT
854{
855 int vifi;
856
857 cache->mfc_un.res.minvif = MAXVIFS;
858 cache->mfc_un.res.maxvif = 0;
859 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
860
0c12295a
PM
861 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
862 if (VIF_EXISTS(mrt, vifi) &&
cf958ae3 863 ttls[vifi] && ttls[vifi] < 255) {
1da177e4
LT
864 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
865 if (cache->mfc_un.res.minvif > vifi)
866 cache->mfc_un.res.minvif = vifi;
867 if (cache->mfc_un.res.maxvif <= vifi)
868 cache->mfc_un.res.maxvif = vifi + 1;
869 }
870 }
90b5ca17 871 cache->mfc_un.res.lastuse = jiffies;
1da177e4
LT
872}
873
0c12295a
PM
874static int vif_add(struct net *net, struct mr_table *mrt,
875 struct vifctl *vifc, int mrtsock)
1da177e4
LT
876{
877 int vifi = vifc->vifc_vifi;
5d8b3e69
YG
878 struct switchdev_attr attr = {
879 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
880 };
0c12295a 881 struct vif_device *v = &mrt->vif_table[vifi];
1da177e4
LT
882 struct net_device *dev;
883 struct in_device *in_dev;
d607032d 884 int err;
1da177e4
LT
885
886 /* Is vif busy ? */
0c12295a 887 if (VIF_EXISTS(mrt, vifi))
1da177e4
LT
888 return -EADDRINUSE;
889
890 switch (vifc->vifc_flags) {
1da177e4 891 case VIFF_REGISTER:
1973a4ea 892 if (!ipmr_pimsm_enabled())
c316c629
NA
893 return -EINVAL;
894 /* Special Purpose VIF in PIM
1da177e4
LT
895 * All the packets will be sent to the daemon
896 */
0c12295a 897 if (mrt->mroute_reg_vif_num >= 0)
1da177e4 898 return -EADDRINUSE;
f0ad0860 899 dev = ipmr_reg_vif(net, mrt);
1da177e4
LT
900 if (!dev)
901 return -ENOBUFS;
d607032d
WC
902 err = dev_set_allmulti(dev, 1);
903 if (err) {
904 unregister_netdevice(dev);
7dc00c82 905 dev_put(dev);
d607032d
WC
906 return err;
907 }
1da177e4 908 break;
e905a9ed 909 case VIFF_TUNNEL:
4feb88e5 910 dev = ipmr_new_tunnel(net, vifc);
1da177e4
LT
911 if (!dev)
912 return -ENOBUFS;
d607032d
WC
913 err = dev_set_allmulti(dev, 1);
914 if (err) {
915 ipmr_del_tunnel(dev, vifc);
7dc00c82 916 dev_put(dev);
d607032d
WC
917 return err;
918 }
1da177e4 919 break;
ee5e81f0 920 case VIFF_USE_IFINDEX:
1da177e4 921 case 0:
ee5e81f0
I
922 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
923 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
51456b29 924 if (dev && !__in_dev_get_rtnl(dev)) {
ee5e81f0
I
925 dev_put(dev);
926 return -EADDRNOTAVAIL;
927 }
a8cb16dd 928 } else {
ee5e81f0 929 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
a8cb16dd 930 }
1da177e4
LT
931 if (!dev)
932 return -EADDRNOTAVAIL;
d607032d 933 err = dev_set_allmulti(dev, 1);
7dc00c82
WC
934 if (err) {
935 dev_put(dev);
d607032d 936 return err;
7dc00c82 937 }
1da177e4
LT
938 break;
939 default:
940 return -EINVAL;
941 }
942
a8cb16dd
ED
943 in_dev = __in_dev_get_rtnl(dev);
944 if (!in_dev) {
d0490cfd 945 dev_put(dev);
1da177e4 946 return -EADDRNOTAVAIL;
d0490cfd 947 }
42f811b8 948 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
3b022865
DA
949 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING,
950 dev->ifindex, &in_dev->cnf);
1da177e4
LT
951 ip_rt_multicast_event(in_dev);
952
a8cb16dd
ED
953 /* Fill in the VIF structures */
954
5d8b3e69
YG
955 attr.orig_dev = dev;
956 if (!switchdev_port_attr_get(dev, &attr)) {
957 memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len);
958 v->dev_parent_id.id_len = attr.u.ppid.id_len;
959 } else {
960 v->dev_parent_id.id_len = 0;
961 }
c354e124
JK
962 v->rate_limit = vifc->vifc_rate_limit;
963 v->local = vifc->vifc_lcl_addr.s_addr;
964 v->remote = vifc->vifc_rmt_addr.s_addr;
965 v->flags = vifc->vifc_flags;
1da177e4
LT
966 if (!mrtsock)
967 v->flags |= VIFF_STATIC;
c354e124 968 v->threshold = vifc->vifc_threshold;
1da177e4
LT
969 v->bytes_in = 0;
970 v->bytes_out = 0;
971 v->pkt_in = 0;
972 v->pkt_out = 0;
973 v->link = dev->ifindex;
a8cb16dd 974 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER))
a54acb3a 975 v->link = dev_get_iflink(dev);
1da177e4
LT
976
977 /* And finish update writing critical data */
978 write_lock_bh(&mrt_lock);
c354e124 979 v->dev = dev;
a8cb16dd 980 if (v->flags & VIFF_REGISTER)
0c12295a 981 mrt->mroute_reg_vif_num = vifi;
0c12295a
PM
982 if (vifi+1 > mrt->maxvif)
983 mrt->maxvif = vifi+1;
1da177e4 984 write_unlock_bh(&mrt_lock);
b362053a 985 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id);
1da177e4
LT
986 return 0;
987}
988
a8c9486b 989/* called with rcu_read_lock() */
0c12295a 990static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
4feb88e5
BT
991 __be32 origin,
992 __be32 mcastgrp)
1da177e4 993{
8fb472c0
NA
994 struct mfc_cache_cmp_arg arg = {
995 .mfc_mcastgrp = mcastgrp,
996 .mfc_origin = origin
997 };
998 struct rhlist_head *tmp, *list;
1da177e4
LT
999 struct mfc_cache *c;
1000
8fb472c0
NA
1001 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
1002 rhl_for_each_entry_rcu(c, tmp, list, mnode)
1003 return c;
1004
862465f2 1005 return NULL;
1da177e4
LT
1006}
1007
660b26dc
ND
1008/* Look for a (*,*,oif) entry */
1009static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt,
1010 int vifi)
1011{
8fb472c0
NA
1012 struct mfc_cache_cmp_arg arg = {
1013 .mfc_mcastgrp = htonl(INADDR_ANY),
1014 .mfc_origin = htonl(INADDR_ANY)
1015 };
1016 struct rhlist_head *tmp, *list;
660b26dc
ND
1017 struct mfc_cache *c;
1018
8fb472c0
NA
1019 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
1020 rhl_for_each_entry_rcu(c, tmp, list, mnode)
1021 if (c->mfc_un.res.ttls[vifi] < 255)
660b26dc
ND
1022 return c;
1023
1024 return NULL;
1025}
1026
1027/* Look for a (*,G) entry */
1028static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
1029 __be32 mcastgrp, int vifi)
1030{
8fb472c0
NA
1031 struct mfc_cache_cmp_arg arg = {
1032 .mfc_mcastgrp = mcastgrp,
1033 .mfc_origin = htonl(INADDR_ANY)
1034 };
1035 struct rhlist_head *tmp, *list;
660b26dc
ND
1036 struct mfc_cache *c, *proxy;
1037
360eb5da 1038 if (mcastgrp == htonl(INADDR_ANY))
660b26dc
ND
1039 goto skip;
1040
8fb472c0
NA
1041 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
1042 rhl_for_each_entry_rcu(c, tmp, list, mnode) {
1043 if (c->mfc_un.res.ttls[vifi] < 255)
1044 return c;
1045
1046 /* It's ok if the vifi is part of the static tree */
1047 proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent);
1048 if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
1049 return c;
1050 }
660b26dc
ND
1051
1052skip:
1053 return ipmr_cache_find_any_parent(mrt, vifi);
1054}
1055
8fb472c0
NA
1056/* Look for a (S,G,iif) entry if parent != -1 */
1057static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt,
1058 __be32 origin, __be32 mcastgrp,
1059 int parent)
1060{
1061 struct mfc_cache_cmp_arg arg = {
1062 .mfc_mcastgrp = mcastgrp,
1063 .mfc_origin = origin,
1064 };
1065 struct rhlist_head *tmp, *list;
1066 struct mfc_cache *c;
1067
1068 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
1069 rhl_for_each_entry_rcu(c, tmp, list, mnode)
1070 if (parent == -1 || parent == c->mfc_parent)
1071 return c;
1072
1073 return NULL;
1074}
1075
7ef8f65d 1076/* Allocate a multicast cache entry */
d658f8a0 1077static struct mfc_cache *ipmr_cache_alloc(void)
1da177e4 1078{
c354e124 1079 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
a8c9486b 1080
70a0dec4
TG
1081 if (c) {
1082 c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
a8c9486b 1083 c->mfc_un.res.minvif = MAXVIFS;
310ebbba 1084 refcount_set(&c->mfc_un.res.refcount, 1);
70a0dec4 1085 }
1da177e4
LT
1086 return c;
1087}
1088
d658f8a0 1089static struct mfc_cache *ipmr_cache_alloc_unres(void)
1da177e4 1090{
c354e124 1091 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
a8c9486b
ED
1092
1093 if (c) {
1094 skb_queue_head_init(&c->mfc_un.unres.unresolved);
1095 c->mfc_un.unres.expires = jiffies + 10*HZ;
1096 }
1da177e4
LT
1097 return c;
1098}
1099
7ef8f65d 1100/* A cache entry has gone into a resolved state from queued */
0c12295a
PM
1101static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
1102 struct mfc_cache *uc, struct mfc_cache *c)
1da177e4
LT
1103{
1104 struct sk_buff *skb;
9ef1d4c7 1105 struct nlmsgerr *e;
1da177e4 1106
a8cb16dd 1107 /* Play the pending entries through our router */
c354e124 1108 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
eddc9ec5 1109 if (ip_hdr(skb)->version == 0) {
af72868b
JB
1110 struct nlmsghdr *nlh = skb_pull(skb,
1111 sizeof(struct iphdr));
1da177e4 1112
573ce260 1113 if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
a8cb16dd
ED
1114 nlh->nlmsg_len = skb_tail_pointer(skb) -
1115 (u8 *)nlh;
1da177e4
LT
1116 } else {
1117 nlh->nlmsg_type = NLMSG_ERROR;
573ce260 1118 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1da177e4 1119 skb_trim(skb, nlh->nlmsg_len);
573ce260 1120 e = nlmsg_data(nlh);
9ef1d4c7
PM
1121 e->error = -EMSGSIZE;
1122 memset(&e->msg, 0, sizeof(e->msg));
1da177e4 1123 }
2942e900 1124
15e47304 1125 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
a8cb16dd 1126 } else {
4b1f0d33 1127 ip_mr_forward(net, mrt, skb->dev, skb, c, 0);
a8cb16dd 1128 }
1da177e4
LT
1129 }
1130}
1131
5a645dd8 1132/* Bounce a cache query up to mrouted and netlink.
1da177e4 1133 *
c316c629 1134 * Called under mrt_lock.
1da177e4 1135 */
0c12295a 1136static int ipmr_cache_report(struct mr_table *mrt,
4feb88e5 1137 struct sk_buff *pkt, vifi_t vifi, int assert)
1da177e4 1138{
c9bdd4b5 1139 const int ihl = ip_hdrlen(pkt);
c316c629 1140 struct sock *mroute_sk;
1da177e4
LT
1141 struct igmphdr *igmp;
1142 struct igmpmsg *msg;
c316c629 1143 struct sk_buff *skb;
1da177e4
LT
1144 int ret;
1145
1da177e4
LT
1146 if (assert == IGMPMSG_WHOLEPKT)
1147 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
1148 else
1da177e4
LT
1149 skb = alloc_skb(128, GFP_ATOMIC);
1150
132adf54 1151 if (!skb)
1da177e4
LT
1152 return -ENOBUFS;
1153
1da177e4
LT
1154 if (assert == IGMPMSG_WHOLEPKT) {
1155 /* Ugly, but we have no choice with this interface.
a8cb16dd
ED
1156 * Duplicate old header, fix ihl, length etc.
1157 * And all this only to mangle msg->im_msgtype and
1158 * to set msg->im_mbz to "mbz" :-)
1da177e4 1159 */
878c8145
ACM
1160 skb_push(skb, sizeof(struct iphdr));
1161 skb_reset_network_header(skb);
badff6d0 1162 skb_reset_transport_header(skb);
0272ffc4 1163 msg = (struct igmpmsg *)skb_network_header(skb);
d56f90a7 1164 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1da177e4
LT
1165 msg->im_msgtype = IGMPMSG_WHOLEPKT;
1166 msg->im_mbz = 0;
0c12295a 1167 msg->im_vif = mrt->mroute_reg_vif_num;
eddc9ec5
ACM
1168 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
1169 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
1170 sizeof(struct iphdr));
c316c629
NA
1171 } else {
1172 /* Copy the IP header */
1173 skb_set_network_header(skb, skb->len);
1174 skb_put(skb, ihl);
1175 skb_copy_to_linear_data(skb, pkt->data, ihl);
1176 /* Flag to the kernel this is a route add */
1177 ip_hdr(skb)->protocol = 0;
1178 msg = (struct igmpmsg *)skb_network_header(skb);
1179 msg->im_vif = vifi;
1180 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1181 /* Add our header */
4df864c1 1182 igmp = skb_put(skb, sizeof(struct igmphdr));
c316c629
NA
1183 igmp->type = assert;
1184 msg->im_msgtype = assert;
1185 igmp->code = 0;
1186 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
1187 skb->transport_header = skb->network_header;
e905a9ed 1188 }
1da177e4 1189
4c968709
ED
1190 rcu_read_lock();
1191 mroute_sk = rcu_dereference(mrt->mroute_sk);
51456b29 1192 if (!mroute_sk) {
4c968709 1193 rcu_read_unlock();
1da177e4
LT
1194 kfree_skb(skb);
1195 return -EINVAL;
1196 }
1197
5a645dd8
JG
1198 igmpmsg_netlink_event(mrt, skb);
1199
a8cb16dd 1200 /* Deliver to mrouted */
4c968709
ED
1201 ret = sock_queue_rcv_skb(mroute_sk, skb);
1202 rcu_read_unlock();
70a269e6 1203 if (ret < 0) {
e87cc472 1204 net_warn_ratelimited("mroute: pending queue full, dropping entries\n");
1da177e4
LT
1205 kfree_skb(skb);
1206 }
1207
1208 return ret;
1209}
1210
7ef8f65d
NA
1211/* Queue a packet for resolution. It gets locked cache entry! */
1212static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
4b1f0d33 1213 struct sk_buff *skb, struct net_device *dev)
1da177e4 1214{
8fb472c0
NA
1215 const struct iphdr *iph = ip_hdr(skb);
1216 struct mfc_cache *c;
862465f2 1217 bool found = false;
1da177e4 1218 int err;
1da177e4
LT
1219
1220 spin_lock_bh(&mfc_unres_lock);
0c12295a 1221 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
e258beb2 1222 if (c->mfc_mcastgrp == iph->daddr &&
862465f2
PM
1223 c->mfc_origin == iph->saddr) {
1224 found = true;
1da177e4 1225 break;
862465f2 1226 }
1da177e4
LT
1227 }
1228
862465f2 1229 if (!found) {
a8cb16dd 1230 /* Create a new entry if allowable */
0c12295a 1231 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
d658f8a0 1232 (c = ipmr_cache_alloc_unres()) == NULL) {
1da177e4
LT
1233 spin_unlock_bh(&mfc_unres_lock);
1234
1235 kfree_skb(skb);
1236 return -ENOBUFS;
1237 }
1238
a8cb16dd 1239 /* Fill in the new cache entry */
eddc9ec5
ACM
1240 c->mfc_parent = -1;
1241 c->mfc_origin = iph->saddr;
1242 c->mfc_mcastgrp = iph->daddr;
1da177e4 1243
a8cb16dd 1244 /* Reflect first query at mrouted. */
0c12295a 1245 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
4feb88e5 1246 if (err < 0) {
e905a9ed 1247 /* If the report failed throw the cache entry
1da177e4
LT
1248 out - Brad Parker
1249 */
1250 spin_unlock_bh(&mfc_unres_lock);
1251
5c0a66f5 1252 ipmr_cache_free(c);
1da177e4
LT
1253 kfree_skb(skb);
1254 return err;
1255 }
1256
0c12295a
PM
1257 atomic_inc(&mrt->cache_resolve_queue_len);
1258 list_add(&c->list, &mrt->mfc_unres_queue);
8cd3ac9f 1259 mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1da177e4 1260
278554bd
DM
1261 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1262 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1da177e4
LT
1263 }
1264
a8cb16dd 1265 /* See if we can append the packet */
a8cb16dd 1266 if (c->mfc_un.unres.unresolved.qlen > 3) {
1da177e4
LT
1267 kfree_skb(skb);
1268 err = -ENOBUFS;
1269 } else {
4b1f0d33
DS
1270 if (dev) {
1271 skb->dev = dev;
1272 skb->skb_iif = dev->ifindex;
1273 }
c354e124 1274 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1da177e4
LT
1275 err = 0;
1276 }
1277
1278 spin_unlock_bh(&mfc_unres_lock);
1279 return err;
1280}
1281
7ef8f65d 1282/* MFC cache manipulation by user space mroute daemon */
1da177e4 1283
660b26dc 1284static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
1da177e4 1285{
b362053a 1286 struct net *net = read_pnet(&mrt->net);
8fb472c0 1287 struct mfc_cache *c;
1da177e4 1288
8fb472c0
NA
1289 /* The entries are added/deleted only under RTNL */
1290 rcu_read_lock();
1291 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr,
1292 mfc->mfcc_mcastgrp.s_addr, parent);
1293 rcu_read_unlock();
1294 if (!c)
1295 return -ENOENT;
1296 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
1297 list_del_rcu(&c->list);
b362053a 1298 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
8fb472c0 1299 mroute_netlink_event(mrt, c, RTM_DELROUTE);
310ebbba 1300 ipmr_cache_put(c);
1da177e4 1301
8fb472c0 1302 return 0;
1da177e4
LT
1303}
1304
0c12295a 1305static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
660b26dc 1306 struct mfcctl *mfc, int mrtsock, int parent)
1da177e4 1307{
862465f2 1308 struct mfc_cache *uc, *c;
8fb472c0
NA
1309 bool found;
1310 int ret;
1da177e4 1311
a50436f2
PM
1312 if (mfc->mfcc_parent >= MAXVIFS)
1313 return -ENFILE;
1314
8fb472c0
NA
1315 /* The entries are added/deleted only under RTNL */
1316 rcu_read_lock();
1317 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr,
1318 mfc->mfcc_mcastgrp.s_addr, parent);
1319 rcu_read_unlock();
1320 if (c) {
1da177e4
LT
1321 write_lock_bh(&mrt_lock);
1322 c->mfc_parent = mfc->mfcc_parent;
0c12295a 1323 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1da177e4
LT
1324 if (!mrtsock)
1325 c->mfc_flags |= MFC_STATIC;
1326 write_unlock_bh(&mrt_lock);
b362053a
YG
1327 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
1328 mrt->id);
8cd3ac9f 1329 mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1da177e4
LT
1330 return 0;
1331 }
1332
360eb5da 1333 if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) &&
660b26dc 1334 !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1da177e4
LT
1335 return -EINVAL;
1336
d658f8a0 1337 c = ipmr_cache_alloc();
51456b29 1338 if (!c)
1da177e4
LT
1339 return -ENOMEM;
1340
c354e124
JK
1341 c->mfc_origin = mfc->mfcc_origin.s_addr;
1342 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1343 c->mfc_parent = mfc->mfcc_parent;
0c12295a 1344 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1da177e4
LT
1345 if (!mrtsock)
1346 c->mfc_flags |= MFC_STATIC;
1347
8fb472c0
NA
1348 ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode,
1349 ipmr_rht_params);
1350 if (ret) {
1351 pr_err("ipmr: rhtable insert error %d\n", ret);
1352 ipmr_cache_free(c);
1353 return ret;
1354 }
1355 list_add_tail_rcu(&c->list, &mrt->mfc_cache_list);
7ef8f65d
NA
1356 /* Check to see if we resolved a queued list. If so we
1357 * need to send on the frames and tidy up.
1da177e4 1358 */
b0ebb739 1359 found = false;
1da177e4 1360 spin_lock_bh(&mfc_unres_lock);
0c12295a 1361 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
e258beb2 1362 if (uc->mfc_origin == c->mfc_origin &&
1da177e4 1363 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
862465f2 1364 list_del(&uc->list);
0c12295a 1365 atomic_dec(&mrt->cache_resolve_queue_len);
b0ebb739 1366 found = true;
1da177e4
LT
1367 break;
1368 }
1369 }
0c12295a
PM
1370 if (list_empty(&mrt->mfc_unres_queue))
1371 del_timer(&mrt->ipmr_expire_timer);
1da177e4
LT
1372 spin_unlock_bh(&mfc_unres_lock);
1373
b0ebb739 1374 if (found) {
0c12295a 1375 ipmr_cache_resolve(net, mrt, uc, c);
5c0a66f5 1376 ipmr_cache_free(uc);
1da177e4 1377 }
b362053a 1378 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id);
8cd3ac9f 1379 mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1da177e4
LT
1380 return 0;
1381}
1382
7ef8f65d 1383/* Close the multicast socket, and clear the vif tables etc */
0e615e96 1384static void mroute_clean_tables(struct mr_table *mrt, bool all)
1da177e4 1385{
b362053a 1386 struct net *net = read_pnet(&mrt->net);
8fb472c0 1387 struct mfc_cache *c, *tmp;
d17fa6fa 1388 LIST_HEAD(list);
8fb472c0 1389 int i;
e905a9ed 1390
a8cb16dd 1391 /* Shut down all active vif entries */
0c12295a 1392 for (i = 0; i < mrt->maxvif; i++) {
0e615e96
NA
1393 if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
1394 continue;
1395 vif_delete(mrt, i, 0, &list);
1da177e4 1396 }
d17fa6fa 1397 unregister_netdevice_many(&list);
1da177e4 1398
a8cb16dd 1399 /* Wipe the cache */
8fb472c0
NA
1400 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1401 if (!all && (c->mfc_flags & MFC_STATIC))
1402 continue;
1403 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
1404 list_del_rcu(&c->list);
b362053a
YG
1405 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
1406 mrt->id);
8fb472c0 1407 mroute_netlink_event(mrt, c, RTM_DELROUTE);
310ebbba 1408 ipmr_cache_put(c);
1da177e4
LT
1409 }
1410
0c12295a 1411 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1da177e4 1412 spin_lock_bh(&mfc_unres_lock);
8fb472c0 1413 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
862465f2 1414 list_del(&c->list);
8cd3ac9f 1415 mroute_netlink_event(mrt, c, RTM_DELROUTE);
0c12295a 1416 ipmr_destroy_unres(mrt, c);
1da177e4
LT
1417 }
1418 spin_unlock_bh(&mfc_unres_lock);
1419 }
1420}
1421
4c968709
ED
1422/* called from ip_ra_control(), before an RCU grace period,
1423 * we dont need to call synchronize_rcu() here
1424 */
1da177e4
LT
1425static void mrtsock_destruct(struct sock *sk)
1426{
4feb88e5 1427 struct net *net = sock_net(sk);
f0ad0860 1428 struct mr_table *mrt;
4feb88e5 1429
1215e51e 1430 ASSERT_RTNL();
f0ad0860 1431 ipmr_for_each_table(mrt, net) {
4c968709 1432 if (sk == rtnl_dereference(mrt->mroute_sk)) {
f0ad0860 1433 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
3b022865
DA
1434 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
1435 NETCONFA_MC_FORWARDING,
d67b8c61
ND
1436 NETCONFA_IFINDEX_ALL,
1437 net->ipv4.devconf_all);
a9b3cd7f 1438 RCU_INIT_POINTER(mrt->mroute_sk, NULL);
0e615e96 1439 mroute_clean_tables(mrt, false);
f0ad0860 1440 }
1da177e4 1441 }
1da177e4
LT
1442}
1443
7ef8f65d
NA
1444/* Socket options and virtual interface manipulation. The whole
1445 * virtual interface system is a complete heap, but unfortunately
1446 * that's how BSD mrouted happens to think. Maybe one day with a proper
1447 * MOSPF/PIM router set up we can clean this up.
1da177e4 1448 */
e905a9ed 1449
29e97d21
NA
1450int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
1451 unsigned int optlen)
1da177e4 1452{
4feb88e5 1453 struct net *net = sock_net(sk);
29e97d21 1454 int val, ret = 0, parent = 0;
f0ad0860 1455 struct mr_table *mrt;
29e97d21
NA
1456 struct vifctl vif;
1457 struct mfcctl mfc;
1458 u32 uval;
f0ad0860 1459
29e97d21
NA
1460 /* There's one exception to the lock - MRT_DONE which needs to unlock */
1461 rtnl_lock();
5e1859fb 1462 if (sk->sk_type != SOCK_RAW ||
29e97d21
NA
1463 inet_sk(sk)->inet_num != IPPROTO_IGMP) {
1464 ret = -EOPNOTSUPP;
1465 goto out_unlock;
1466 }
5e1859fb 1467
f0ad0860 1468 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
29e97d21
NA
1469 if (!mrt) {
1470 ret = -ENOENT;
1471 goto out_unlock;
1472 }
132adf54 1473 if (optname != MRT_INIT) {
33d480ce 1474 if (sk != rcu_access_pointer(mrt->mroute_sk) &&
29e97d21
NA
1475 !ns_capable(net->user_ns, CAP_NET_ADMIN)) {
1476 ret = -EACCES;
1477 goto out_unlock;
1478 }
1da177e4
LT
1479 }
1480
132adf54
SH
1481 switch (optname) {
1482 case MRT_INIT:
42e6b89c 1483 if (optlen != sizeof(int)) {
29e97d21 1484 ret = -EINVAL;
42e6b89c
NA
1485 break;
1486 }
1487 if (rtnl_dereference(mrt->mroute_sk)) {
29e97d21 1488 ret = -EADDRINUSE;
29e97d21 1489 break;
42e6b89c 1490 }
132adf54
SH
1491
1492 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1493 if (ret == 0) {
cf778b00 1494 rcu_assign_pointer(mrt->mroute_sk, sk);
4feb88e5 1495 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
3b022865
DA
1496 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
1497 NETCONFA_MC_FORWARDING,
d67b8c61
ND
1498 NETCONFA_IFINDEX_ALL,
1499 net->ipv4.devconf_all);
132adf54 1500 }
29e97d21 1501 break;
132adf54 1502 case MRT_DONE:
29e97d21
NA
1503 if (sk != rcu_access_pointer(mrt->mroute_sk)) {
1504 ret = -EACCES;
1505 } else {
29e97d21 1506 ret = ip_ra_control(sk, 0, NULL);
1215e51e 1507 goto out_unlock;
29e97d21
NA
1508 }
1509 break;
132adf54
SH
1510 case MRT_ADD_VIF:
1511 case MRT_DEL_VIF:
29e97d21
NA
1512 if (optlen != sizeof(vif)) {
1513 ret = -EINVAL;
1514 break;
1515 }
1516 if (copy_from_user(&vif, optval, sizeof(vif))) {
1517 ret = -EFAULT;
1518 break;
1519 }
1520 if (vif.vifc_vifi >= MAXVIFS) {
1521 ret = -ENFILE;
1522 break;
1523 }
c354e124 1524 if (optname == MRT_ADD_VIF) {
4c968709
ED
1525 ret = vif_add(net, mrt, &vif,
1526 sk == rtnl_dereference(mrt->mroute_sk));
132adf54 1527 } else {
0c12295a 1528 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
132adf54 1529 }
29e97d21 1530 break;
7ef8f65d
NA
1531 /* Manipulate the forwarding caches. These live
1532 * in a sort of kernel/user symbiosis.
1533 */
132adf54
SH
1534 case MRT_ADD_MFC:
1535 case MRT_DEL_MFC:
660b26dc 1536 parent = -1;
fcfd6dfa 1537 /* fall through */
660b26dc
ND
1538 case MRT_ADD_MFC_PROXY:
1539 case MRT_DEL_MFC_PROXY:
29e97d21
NA
1540 if (optlen != sizeof(mfc)) {
1541 ret = -EINVAL;
1542 break;
1543 }
1544 if (copy_from_user(&mfc, optval, sizeof(mfc))) {
1545 ret = -EFAULT;
1546 break;
1547 }
660b26dc
ND
1548 if (parent == 0)
1549 parent = mfc.mfcc_parent;
660b26dc
ND
1550 if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY)
1551 ret = ipmr_mfc_delete(mrt, &mfc, parent);
132adf54 1552 else
4c968709 1553 ret = ipmr_mfc_add(net, mrt, &mfc,
660b26dc
ND
1554 sk == rtnl_dereference(mrt->mroute_sk),
1555 parent);
29e97d21 1556 break;
7ef8f65d 1557 /* Control PIM assert. */
132adf54 1558 case MRT_ASSERT:
29e97d21
NA
1559 if (optlen != sizeof(val)) {
1560 ret = -EINVAL;
1561 break;
1562 }
1563 if (get_user(val, (int __user *)optval)) {
1564 ret = -EFAULT;
1565 break;
1566 }
1567 mrt->mroute_do_assert = val;
1568 break;
132adf54 1569 case MRT_PIM:
1973a4ea 1570 if (!ipmr_pimsm_enabled()) {
29e97d21
NA
1571 ret = -ENOPROTOOPT;
1572 break;
1573 }
1574 if (optlen != sizeof(val)) {
1575 ret = -EINVAL;
1576 break;
1577 }
1578 if (get_user(val, (int __user *)optval)) {
1579 ret = -EFAULT;
1580 break;
1581 }
ba93ef74 1582
29e97d21
NA
1583 val = !!val;
1584 if (val != mrt->mroute_do_pim) {
1585 mrt->mroute_do_pim = val;
1586 mrt->mroute_do_assert = val;
1da177e4 1587 }
29e97d21 1588 break;
f0ad0860 1589 case MRT_TABLE:
29e97d21
NA
1590 if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) {
1591 ret = -ENOPROTOOPT;
1592 break;
1593 }
1594 if (optlen != sizeof(uval)) {
1595 ret = -EINVAL;
1596 break;
1597 }
1598 if (get_user(uval, (u32 __user *)optval)) {
1599 ret = -EFAULT;
1600 break;
1601 }
f0ad0860 1602
4c968709
ED
1603 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1604 ret = -EBUSY;
1605 } else {
29e97d21 1606 mrt = ipmr_new_table(net, uval);
1113ebbc
NA
1607 if (IS_ERR(mrt))
1608 ret = PTR_ERR(mrt);
5e1859fb 1609 else
29e97d21 1610 raw_sk(sk)->ipmr_table = uval;
4c968709 1611 }
29e97d21 1612 break;
7ef8f65d 1613 /* Spurious command, or MRT_VERSION which you cannot set. */
132adf54 1614 default:
29e97d21 1615 ret = -ENOPROTOOPT;
1da177e4 1616 }
29e97d21
NA
1617out_unlock:
1618 rtnl_unlock();
29e97d21 1619 return ret;
1da177e4
LT
1620}
1621
7ef8f65d 1622/* Getsock opt support for the multicast routing system. */
c354e124 1623int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1da177e4
LT
1624{
1625 int olr;
1626 int val;
4feb88e5 1627 struct net *net = sock_net(sk);
f0ad0860
PM
1628 struct mr_table *mrt;
1629
5e1859fb
ED
1630 if (sk->sk_type != SOCK_RAW ||
1631 inet_sk(sk)->inet_num != IPPROTO_IGMP)
1632 return -EOPNOTSUPP;
1633
f0ad0860 1634 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
51456b29 1635 if (!mrt)
f0ad0860 1636 return -ENOENT;
1da177e4 1637
fe9ef3ce
NA
1638 switch (optname) {
1639 case MRT_VERSION:
1640 val = 0x0305;
1641 break;
1642 case MRT_PIM:
1973a4ea 1643 if (!ipmr_pimsm_enabled())
fe9ef3ce
NA
1644 return -ENOPROTOOPT;
1645 val = mrt->mroute_do_pim;
1646 break;
1647 case MRT_ASSERT:
1648 val = mrt->mroute_do_assert;
1649 break;
1650 default:
1da177e4 1651 return -ENOPROTOOPT;
fe9ef3ce 1652 }
1da177e4
LT
1653
1654 if (get_user(olr, optlen))
1655 return -EFAULT;
1da177e4
LT
1656 olr = min_t(unsigned int, olr, sizeof(int));
1657 if (olr < 0)
1658 return -EINVAL;
c354e124 1659 if (put_user(olr, optlen))
1da177e4 1660 return -EFAULT;
c354e124 1661 if (copy_to_user(optval, &val, olr))
1da177e4
LT
1662 return -EFAULT;
1663 return 0;
1664}
1665
7ef8f65d 1666/* The IP multicast ioctl support routines. */
1da177e4
LT
1667int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1668{
1669 struct sioc_sg_req sr;
1670 struct sioc_vif_req vr;
1671 struct vif_device *vif;
1672 struct mfc_cache *c;
4feb88e5 1673 struct net *net = sock_net(sk);
f0ad0860
PM
1674 struct mr_table *mrt;
1675
1676 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
51456b29 1677 if (!mrt)
f0ad0860 1678 return -ENOENT;
e905a9ed 1679
132adf54
SH
1680 switch (cmd) {
1681 case SIOCGETVIFCNT:
c354e124 1682 if (copy_from_user(&vr, arg, sizeof(vr)))
132adf54 1683 return -EFAULT;
0c12295a 1684 if (vr.vifi >= mrt->maxvif)
132adf54 1685 return -EINVAL;
ee114998 1686 vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
132adf54 1687 read_lock(&mrt_lock);
0c12295a
PM
1688 vif = &mrt->vif_table[vr.vifi];
1689 if (VIF_EXISTS(mrt, vr.vifi)) {
c354e124
JK
1690 vr.icount = vif->pkt_in;
1691 vr.ocount = vif->pkt_out;
1692 vr.ibytes = vif->bytes_in;
1693 vr.obytes = vif->bytes_out;
1da177e4 1694 read_unlock(&mrt_lock);
1da177e4 1695
c354e124 1696 if (copy_to_user(arg, &vr, sizeof(vr)))
132adf54
SH
1697 return -EFAULT;
1698 return 0;
1699 }
1700 read_unlock(&mrt_lock);
1701 return -EADDRNOTAVAIL;
1702 case SIOCGETSGCNT:
c354e124 1703 if (copy_from_user(&sr, arg, sizeof(sr)))
132adf54
SH
1704 return -EFAULT;
1705
a8c9486b 1706 rcu_read_lock();
0c12295a 1707 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
132adf54
SH
1708 if (c) {
1709 sr.pktcnt = c->mfc_un.res.pkt;
1710 sr.bytecnt = c->mfc_un.res.bytes;
1711 sr.wrong_if = c->mfc_un.res.wrong_if;
a8c9486b 1712 rcu_read_unlock();
132adf54 1713
c354e124 1714 if (copy_to_user(arg, &sr, sizeof(sr)))
132adf54
SH
1715 return -EFAULT;
1716 return 0;
1717 }
a8c9486b 1718 rcu_read_unlock();
132adf54
SH
1719 return -EADDRNOTAVAIL;
1720 default:
1721 return -ENOIOCTLCMD;
1da177e4
LT
1722 }
1723}
1724
709b46e8
EB
1725#ifdef CONFIG_COMPAT
1726struct compat_sioc_sg_req {
1727 struct in_addr src;
1728 struct in_addr grp;
1729 compat_ulong_t pktcnt;
1730 compat_ulong_t bytecnt;
1731 compat_ulong_t wrong_if;
1732};
1733
ca6b8bb0
DM
1734struct compat_sioc_vif_req {
1735 vifi_t vifi; /* Which iface */
1736 compat_ulong_t icount;
1737 compat_ulong_t ocount;
1738 compat_ulong_t ibytes;
1739 compat_ulong_t obytes;
1740};
1741
709b46e8
EB
1742int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1743{
0033d5ad 1744 struct compat_sioc_sg_req sr;
ca6b8bb0
DM
1745 struct compat_sioc_vif_req vr;
1746 struct vif_device *vif;
709b46e8
EB
1747 struct mfc_cache *c;
1748 struct net *net = sock_net(sk);
1749 struct mr_table *mrt;
1750
1751 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
51456b29 1752 if (!mrt)
709b46e8
EB
1753 return -ENOENT;
1754
1755 switch (cmd) {
ca6b8bb0
DM
1756 case SIOCGETVIFCNT:
1757 if (copy_from_user(&vr, arg, sizeof(vr)))
1758 return -EFAULT;
1759 if (vr.vifi >= mrt->maxvif)
1760 return -EINVAL;
ee114998 1761 vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
ca6b8bb0
DM
1762 read_lock(&mrt_lock);
1763 vif = &mrt->vif_table[vr.vifi];
1764 if (VIF_EXISTS(mrt, vr.vifi)) {
1765 vr.icount = vif->pkt_in;
1766 vr.ocount = vif->pkt_out;
1767 vr.ibytes = vif->bytes_in;
1768 vr.obytes = vif->bytes_out;
1769 read_unlock(&mrt_lock);
1770
1771 if (copy_to_user(arg, &vr, sizeof(vr)))
1772 return -EFAULT;
1773 return 0;
1774 }
1775 read_unlock(&mrt_lock);
1776 return -EADDRNOTAVAIL;
709b46e8
EB
1777 case SIOCGETSGCNT:
1778 if (copy_from_user(&sr, arg, sizeof(sr)))
1779 return -EFAULT;
1780
1781 rcu_read_lock();
1782 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1783 if (c) {
1784 sr.pktcnt = c->mfc_un.res.pkt;
1785 sr.bytecnt = c->mfc_un.res.bytes;
1786 sr.wrong_if = c->mfc_un.res.wrong_if;
1787 rcu_read_unlock();
1788
1789 if (copy_to_user(arg, &sr, sizeof(sr)))
1790 return -EFAULT;
1791 return 0;
1792 }
1793 rcu_read_unlock();
1794 return -EADDRNOTAVAIL;
1795 default:
1796 return -ENOIOCTLCMD;
1797 }
1798}
1799#endif
1800
1da177e4
LT
1801static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1802{
351638e7 1803 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
4feb88e5 1804 struct net *net = dev_net(dev);
f0ad0860 1805 struct mr_table *mrt;
1da177e4
LT
1806 struct vif_device *v;
1807 int ct;
e9dc8653 1808
1da177e4
LT
1809 if (event != NETDEV_UNREGISTER)
1810 return NOTIFY_DONE;
f0ad0860
PM
1811
1812 ipmr_for_each_table(mrt, net) {
1813 v = &mrt->vif_table[0];
1814 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1815 if (v->dev == dev)
e92036a6 1816 vif_delete(mrt, ct, 1, NULL);
f0ad0860 1817 }
1da177e4
LT
1818 }
1819 return NOTIFY_DONE;
1820}
1821
c354e124 1822static struct notifier_block ip_mr_notifier = {
1da177e4
LT
1823 .notifier_call = ipmr_device_event,
1824};
1825
7ef8f65d
NA
1826/* Encapsulate a packet by attaching a valid IPIP header to it.
1827 * This avoids tunnel drivers and other mess and gives us the speed so
1828 * important for multicast video.
1da177e4 1829 */
b6a7719a
HFS
1830static void ip_encap(struct net *net, struct sk_buff *skb,
1831 __be32 saddr, __be32 daddr)
1da177e4 1832{
8856dfa3 1833 struct iphdr *iph;
b71d1d42 1834 const struct iphdr *old_iph = ip_hdr(skb);
8856dfa3
ACM
1835
1836 skb_push(skb, sizeof(struct iphdr));
b0e380b1 1837 skb->transport_header = skb->network_header;
8856dfa3 1838 skb_reset_network_header(skb);
eddc9ec5 1839 iph = ip_hdr(skb);
1da177e4 1840
a8cb16dd 1841 iph->version = 4;
e023dd64
ACM
1842 iph->tos = old_iph->tos;
1843 iph->ttl = old_iph->ttl;
1da177e4
LT
1844 iph->frag_off = 0;
1845 iph->daddr = daddr;
1846 iph->saddr = saddr;
1847 iph->protocol = IPPROTO_IPIP;
1848 iph->ihl = 5;
1849 iph->tot_len = htons(skb->len);
b6a7719a 1850 ip_select_ident(net, skb, NULL);
1da177e4
LT
1851 ip_send_check(iph);
1852
1da177e4
LT
1853 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1854 nf_reset(skb);
1855}
1856
0c4b51f0
EB
1857static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
1858 struct sk_buff *skb)
1da177e4 1859{
a8cb16dd 1860 struct ip_options *opt = &(IPCB(skb)->opt);
1da177e4 1861
73186df8
DM
1862 IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
1863 IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
1da177e4
LT
1864
1865 if (unlikely(opt->optlen))
1866 ip_forward_options(skb);
1867
13206b6b 1868 return dst_output(net, sk, skb);
1da177e4
LT
1869}
1870
a5bc9294
YG
1871#ifdef CONFIG_NET_SWITCHDEV
1872static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
1873 int in_vifi, int out_vifi)
1874{
1875 struct vif_device *out_vif = &mrt->vif_table[out_vifi];
1876 struct vif_device *in_vif = &mrt->vif_table[in_vifi];
1877
1878 if (!skb->offload_mr_fwd_mark)
1879 return false;
1880 if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len)
1881 return false;
1882 return netdev_phys_item_id_same(&out_vif->dev_parent_id,
1883 &in_vif->dev_parent_id);
1884}
1885#else
1886static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
1887 int in_vifi, int out_vifi)
1888{
1889 return false;
1890}
1891#endif
1892
7ef8f65d 1893/* Processing handlers for ipmr_forward */
1da177e4 1894
0c12295a 1895static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
a5bc9294
YG
1896 int in_vifi, struct sk_buff *skb,
1897 struct mfc_cache *c, int vifi)
1da177e4 1898{
eddc9ec5 1899 const struct iphdr *iph = ip_hdr(skb);
0c12295a 1900 struct vif_device *vif = &mrt->vif_table[vifi];
1da177e4
LT
1901 struct net_device *dev;
1902 struct rtable *rt;
31e4543d 1903 struct flowi4 fl4;
1da177e4
LT
1904 int encap = 0;
1905
51456b29 1906 if (!vif->dev)
1da177e4
LT
1907 goto out_free;
1908
1da177e4
LT
1909 if (vif->flags & VIFF_REGISTER) {
1910 vif->pkt_out++;
c354e124 1911 vif->bytes_out += skb->len;
cf3677ae
PE
1912 vif->dev->stats.tx_bytes += skb->len;
1913 vif->dev->stats.tx_packets++;
0c12295a 1914 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
69ebbf58 1915 goto out_free;
1da177e4 1916 }
1da177e4 1917
a5bc9294
YG
1918 if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi))
1919 goto out_free;
1920
a8cb16dd 1921 if (vif->flags & VIFF_TUNNEL) {
31e4543d 1922 rt = ip_route_output_ports(net, &fl4, NULL,
78fbfd8a
DM
1923 vif->remote, vif->local,
1924 0, 0,
1925 IPPROTO_IPIP,
1926 RT_TOS(iph->tos), vif->link);
b23dd4fe 1927 if (IS_ERR(rt))
1da177e4
LT
1928 goto out_free;
1929 encap = sizeof(struct iphdr);
1930 } else {
31e4543d 1931 rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0,
78fbfd8a
DM
1932 0, 0,
1933 IPPROTO_IPIP,
1934 RT_TOS(iph->tos), vif->link);
b23dd4fe 1935 if (IS_ERR(rt))
1da177e4
LT
1936 goto out_free;
1937 }
1938
d8d1f30b 1939 dev = rt->dst.dev;
1da177e4 1940
d8d1f30b 1941 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
1da177e4 1942 /* Do not fragment multicasts. Alas, IPv4 does not
a8cb16dd
ED
1943 * allow to send ICMP, so that packets will disappear
1944 * to blackhole.
1da177e4 1945 */
73186df8 1946 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
1947 ip_rt_put(rt);
1948 goto out_free;
1949 }
1950
d8d1f30b 1951 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
1da177e4
LT
1952
1953 if (skb_cow(skb, encap)) {
e905a9ed 1954 ip_rt_put(rt);
1da177e4
LT
1955 goto out_free;
1956 }
1957
1958 vif->pkt_out++;
c354e124 1959 vif->bytes_out += skb->len;
1da177e4 1960
adf30907 1961 skb_dst_drop(skb);
d8d1f30b 1962 skb_dst_set(skb, &rt->dst);
eddc9ec5 1963 ip_decrease_ttl(ip_hdr(skb));
1da177e4
LT
1964
1965 /* FIXME: forward and output firewalls used to be called here.
a8cb16dd
ED
1966 * What do we do with netfilter? -- RR
1967 */
1da177e4 1968 if (vif->flags & VIFF_TUNNEL) {
b6a7719a 1969 ip_encap(net, skb, vif->local, vif->remote);
1da177e4 1970 /* FIXME: extra output firewall step used to be here. --RR */
2f4c02d4
PE
1971 vif->dev->stats.tx_packets++;
1972 vif->dev->stats.tx_bytes += skb->len;
1da177e4
LT
1973 }
1974
9ee6c5dc 1975 IPCB(skb)->flags |= IPSKB_FORWARDED;
1da177e4 1976
7ef8f65d 1977 /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1da177e4
LT
1978 * not only before forwarding, but after forwarding on all output
1979 * interfaces. It is clear, if mrouter runs a multicasting
1980 * program, it should receive packets not depending to what interface
1981 * program is joined.
1982 * If we will not make it, the program will have to join on all
1983 * interfaces. On the other hand, multihoming host (or router, but
1984 * not mrouter) cannot join to more than one interface - it will
1985 * result in receiving multiple packets.
1986 */
29a26a56
EB
1987 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
1988 net, NULL, skb, skb->dev, dev,
1da177e4
LT
1989 ipmr_forward_finish);
1990 return;
1991
1992out_free:
1993 kfree_skb(skb);
1da177e4
LT
1994}
1995
0c12295a 1996static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1da177e4
LT
1997{
1998 int ct;
0c12295a
PM
1999
2000 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
2001 if (mrt->vif_table[ct].dev == dev)
1da177e4
LT
2002 break;
2003 }
2004 return ct;
2005}
2006
2007/* "local" means that we should preserve one skb (for local delivery) */
c4854ec8 2008static void ip_mr_forward(struct net *net, struct mr_table *mrt,
4b1f0d33
DS
2009 struct net_device *dev, struct sk_buff *skb,
2010 struct mfc_cache *cache, int local)
1da177e4 2011{
4b1f0d33 2012 int true_vifi = ipmr_find_vif(mrt, dev);
1da177e4
LT
2013 int psend = -1;
2014 int vif, ct;
2015
2016 vif = cache->mfc_parent;
2017 cache->mfc_un.res.pkt++;
2018 cache->mfc_un.res.bytes += skb->len;
43b9e127 2019 cache->mfc_un.res.lastuse = jiffies;
1da177e4 2020
360eb5da 2021 if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
660b26dc
ND
2022 struct mfc_cache *cache_proxy;
2023
2024 /* For an (*,G) entry, we only check that the incomming
2025 * interface is part of the static tree.
2026 */
2027 cache_proxy = ipmr_cache_find_any_parent(mrt, vif);
2028 if (cache_proxy &&
2029 cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2030 goto forward;
2031 }
2032
7ef8f65d 2033 /* Wrong interface: drop packet and (maybe) send PIM assert. */
4b1f0d33 2034 if (mrt->vif_table[vif].dev != dev) {
c7537967 2035 if (rt_is_output_route(skb_rtable(skb))) {
1da177e4 2036 /* It is our own packet, looped back.
a8cb16dd
ED
2037 * Very complicated situation...
2038 *
2039 * The best workaround until routing daemons will be
2040 * fixed is not to redistribute packet, if it was
2041 * send through wrong interface. It means, that
2042 * multicast applications WILL NOT work for
2043 * (S,G), which have default multicast route pointing
2044 * to wrong oif. In any case, it is not a good
2045 * idea to use multicasting applications on router.
1da177e4
LT
2046 */
2047 goto dont_forward;
2048 }
2049
2050 cache->mfc_un.res.wrong_if++;
1da177e4 2051
0c12295a 2052 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1da177e4 2053 /* pimsm uses asserts, when switching from RPT to SPT,
a8cb16dd
ED
2054 * so that we cannot check that packet arrived on an oif.
2055 * It is bad, but otherwise we would need to move pretty
2056 * large chunk of pimd to kernel. Ough... --ANK
1da177e4 2057 */
0c12295a 2058 (mrt->mroute_do_pim ||
6f9374a9 2059 cache->mfc_un.res.ttls[true_vifi] < 255) &&
e905a9ed 2060 time_after(jiffies,
1da177e4
LT
2061 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2062 cache->mfc_un.res.last_assert = jiffies;
0c12295a 2063 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1da177e4
LT
2064 }
2065 goto dont_forward;
2066 }
2067
660b26dc 2068forward:
0c12295a
PM
2069 mrt->vif_table[vif].pkt_in++;
2070 mrt->vif_table[vif].bytes_in += skb->len;
1da177e4 2071
7ef8f65d 2072 /* Forward the frame */
360eb5da
ND
2073 if (cache->mfc_origin == htonl(INADDR_ANY) &&
2074 cache->mfc_mcastgrp == htonl(INADDR_ANY)) {
660b26dc
ND
2075 if (true_vifi >= 0 &&
2076 true_vifi != cache->mfc_parent &&
2077 ip_hdr(skb)->ttl >
2078 cache->mfc_un.res.ttls[cache->mfc_parent]) {
2079 /* It's an (*,*) entry and the packet is not coming from
2080 * the upstream: forward the packet to the upstream
2081 * only.
2082 */
2083 psend = cache->mfc_parent;
2084 goto last_forward;
2085 }
2086 goto dont_forward;
2087 }
a8cb16dd
ED
2088 for (ct = cache->mfc_un.res.maxvif - 1;
2089 ct >= cache->mfc_un.res.minvif; ct--) {
660b26dc 2090 /* For (*,G) entry, don't forward to the incoming interface */
360eb5da
ND
2091 if ((cache->mfc_origin != htonl(INADDR_ANY) ||
2092 ct != true_vifi) &&
660b26dc 2093 ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1da177e4
LT
2094 if (psend != -1) {
2095 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
a8cb16dd 2096
1da177e4 2097 if (skb2)
a5bc9294
YG
2098 ipmr_queue_xmit(net, mrt, true_vifi,
2099 skb2, cache, psend);
1da177e4 2100 }
c354e124 2101 psend = ct;
1da177e4
LT
2102 }
2103 }
660b26dc 2104last_forward:
1da177e4
LT
2105 if (psend != -1) {
2106 if (local) {
2107 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
a8cb16dd 2108
1da177e4 2109 if (skb2)
a5bc9294
YG
2110 ipmr_queue_xmit(net, mrt, true_vifi, skb2,
2111 cache, psend);
1da177e4 2112 } else {
a5bc9294 2113 ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend);
c4854ec8 2114 return;
1da177e4
LT
2115 }
2116 }
2117
2118dont_forward:
2119 if (!local)
2120 kfree_skb(skb);
1da177e4
LT
2121}
2122
417da66f 2123static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
ee3f1aaf 2124{
417da66f
DM
2125 struct rtable *rt = skb_rtable(skb);
2126 struct iphdr *iph = ip_hdr(skb);
da91981b 2127 struct flowi4 fl4 = {
417da66f
DM
2128 .daddr = iph->daddr,
2129 .saddr = iph->saddr,
b0fe4a31 2130 .flowi4_tos = RT_TOS(iph->tos),
4fd551d7
DM
2131 .flowi4_oif = (rt_is_output_route(rt) ?
2132 skb->dev->ifindex : 0),
2133 .flowi4_iif = (rt_is_output_route(rt) ?
1fb9489b 2134 LOOPBACK_IFINDEX :
4fd551d7 2135 skb->dev->ifindex),
b4869889 2136 .flowi4_mark = skb->mark,
ee3f1aaf
DM
2137 };
2138 struct mr_table *mrt;
2139 int err;
2140
da91981b 2141 err = ipmr_fib_lookup(net, &fl4, &mrt);
ee3f1aaf
DM
2142 if (err)
2143 return ERR_PTR(err);
2144 return mrt;
2145}
1da177e4 2146
7ef8f65d
NA
2147/* Multicast packets for forwarding arrive here
2148 * Called with rcu_read_lock();
1da177e4 2149 */
1da177e4
LT
2150int ip_mr_input(struct sk_buff *skb)
2151{
2152 struct mfc_cache *cache;
4feb88e5 2153 struct net *net = dev_net(skb->dev);
511c3f92 2154 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
f0ad0860 2155 struct mr_table *mrt;
bcfc7d33
TW
2156 struct net_device *dev;
2157
2158 /* skb->dev passed in is the loX master dev for vrfs.
2159 * As there are no vifs associated with loopback devices,
2160 * get the proper interface that does have a vif associated with it.
2161 */
2162 dev = skb->dev;
2163 if (netif_is_l3_master(skb->dev)) {
2164 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2165 if (!dev) {
2166 kfree_skb(skb);
2167 return -ENODEV;
2168 }
2169 }
1da177e4
LT
2170
2171 /* Packet is looped back after forward, it should not be
a8cb16dd 2172 * forwarded second time, but still can be delivered locally.
1da177e4 2173 */
4c968709 2174 if (IPCB(skb)->flags & IPSKB_FORWARDED)
1da177e4
LT
2175 goto dont_forward;
2176
417da66f 2177 mrt = ipmr_rt_fib_lookup(net, skb);
ee3f1aaf
DM
2178 if (IS_ERR(mrt)) {
2179 kfree_skb(skb);
2180 return PTR_ERR(mrt);
e40dbc51 2181 }
1da177e4 2182 if (!local) {
4c968709
ED
2183 if (IPCB(skb)->opt.router_alert) {
2184 if (ip_call_ra_chain(skb))
2185 return 0;
2186 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) {
2187 /* IGMPv1 (and broken IGMPv2 implementations sort of
2188 * Cisco IOS <= 11.2(8)) do not put router alert
2189 * option to IGMP packets destined to routable
2190 * groups. It is very bad, because it means
2191 * that we can forward NO IGMP messages.
2192 */
2193 struct sock *mroute_sk;
2194
2195 mroute_sk = rcu_dereference(mrt->mroute_sk);
2196 if (mroute_sk) {
2197 nf_reset(skb);
2198 raw_rcv(mroute_sk, skb);
2199 return 0;
2200 }
1da177e4
LT
2201 }
2202 }
2203
a8c9486b 2204 /* already under rcu_read_lock() */
0c12295a 2205 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
51456b29 2206 if (!cache) {
bcfc7d33 2207 int vif = ipmr_find_vif(mrt, dev);
660b26dc
ND
2208
2209 if (vif >= 0)
2210 cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr,
2211 vif);
2212 }
1da177e4 2213
7ef8f65d 2214 /* No usable cache entry */
51456b29 2215 if (!cache) {
1da177e4
LT
2216 int vif;
2217
2218 if (local) {
2219 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2220 ip_local_deliver(skb);
51456b29 2221 if (!skb2)
1da177e4 2222 return -ENOBUFS;
1da177e4
LT
2223 skb = skb2;
2224 }
2225
a8c9486b 2226 read_lock(&mrt_lock);
bcfc7d33 2227 vif = ipmr_find_vif(mrt, dev);
1da177e4 2228 if (vif >= 0) {
4b1f0d33 2229 int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev);
1da177e4
LT
2230 read_unlock(&mrt_lock);
2231
0eae88f3 2232 return err2;
1da177e4
LT
2233 }
2234 read_unlock(&mrt_lock);
2235 kfree_skb(skb);
2236 return -ENODEV;
2237 }
2238
a8c9486b 2239 read_lock(&mrt_lock);
4b1f0d33 2240 ip_mr_forward(net, mrt, dev, skb, cache, local);
1da177e4
LT
2241 read_unlock(&mrt_lock);
2242
2243 if (local)
2244 return ip_local_deliver(skb);
2245
2246 return 0;
2247
2248dont_forward:
2249 if (local)
2250 return ip_local_deliver(skb);
2251 kfree_skb(skb);
2252 return 0;
2253}
2254
b1879204 2255#ifdef CONFIG_IP_PIMSM_V1
7ef8f65d 2256/* Handle IGMP messages of PIMv1 */
a8cb16dd 2257int pim_rcv_v1(struct sk_buff *skb)
b1879204
IJ
2258{
2259 struct igmphdr *pim;
4feb88e5 2260 struct net *net = dev_net(skb->dev);
f0ad0860 2261 struct mr_table *mrt;
b1879204
IJ
2262
2263 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
2264 goto drop;
2265
2266 pim = igmp_hdr(skb);
2267
417da66f 2268 mrt = ipmr_rt_fib_lookup(net, skb);
ee3f1aaf
DM
2269 if (IS_ERR(mrt))
2270 goto drop;
0c12295a 2271 if (!mrt->mroute_do_pim ||
b1879204
IJ
2272 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
2273 goto drop;
2274
f0ad0860 2275 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
b1879204
IJ
2276drop:
2277 kfree_skb(skb);
2278 }
1da177e4
LT
2279 return 0;
2280}
2281#endif
2282
2283#ifdef CONFIG_IP_PIMSM_V2
a8cb16dd 2284static int pim_rcv(struct sk_buff *skb)
1da177e4
LT
2285{
2286 struct pimreghdr *pim;
f0ad0860
PM
2287 struct net *net = dev_net(skb->dev);
2288 struct mr_table *mrt;
1da177e4 2289
b1879204 2290 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1da177e4
LT
2291 goto drop;
2292
9c70220b 2293 pim = (struct pimreghdr *)skb_transport_header(skb);
56245cae 2294 if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) ||
a8cb16dd 2295 (pim->flags & PIM_NULL_REGISTER) ||
e905a9ed 2296 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
d3bc23e7 2297 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1da177e4
LT
2298 goto drop;
2299
417da66f 2300 mrt = ipmr_rt_fib_lookup(net, skb);
ee3f1aaf
DM
2301 if (IS_ERR(mrt))
2302 goto drop;
f0ad0860 2303 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
b1879204
IJ
2304drop:
2305 kfree_skb(skb);
2306 }
1da177e4
LT
2307 return 0;
2308}
2309#endif
2310
cb6a4e46
PM
2311static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2312 struct mfc_cache *c, struct rtmsg *rtm)
1da177e4 2313{
adfa85e4 2314 struct rta_mfc_stats mfcs;
43b9e127
NA
2315 struct nlattr *mp_attr;
2316 struct rtnexthop *nhp;
b5036cd4 2317 unsigned long lastuse;
43b9e127 2318 int ct;
1da177e4 2319
7438189b 2320 /* If cache is unresolved, don't try to parse IIF and OIF */
1708ebc9
NA
2321 if (c->mfc_parent >= MAXVIFS) {
2322 rtm->rtm_flags |= RTNH_F_UNRESOLVED;
7438189b 2323 return -ENOENT;
1708ebc9 2324 }
7438189b 2325
92a395e5
TG
2326 if (VIF_EXISTS(mrt, c->mfc_parent) &&
2327 nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
2328 return -EMSGSIZE;
1da177e4 2329
c7c0bbea
YG
2330 if (c->mfc_flags & MFC_OFFLOAD)
2331 rtm->rtm_flags |= RTNH_F_OFFLOAD;
2332
92a395e5
TG
2333 if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
2334 return -EMSGSIZE;
1da177e4
LT
2335
2336 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
0c12295a 2337 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
92a395e5
TG
2338 if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) {
2339 nla_nest_cancel(skb, mp_attr);
2340 return -EMSGSIZE;
2341 }
2342
1da177e4
LT
2343 nhp->rtnh_flags = 0;
2344 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
0c12295a 2345 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1da177e4
LT
2346 nhp->rtnh_len = sizeof(*nhp);
2347 }
2348 }
92a395e5
TG
2349
2350 nla_nest_end(skb, mp_attr);
2351
b5036cd4
NA
2352 lastuse = READ_ONCE(c->mfc_un.res.lastuse);
2353 lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
2354
adfa85e4
ND
2355 mfcs.mfcs_packets = c->mfc_un.res.pkt;
2356 mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2357 mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
43b9e127 2358 if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
b5036cd4 2359 nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
43b9e127 2360 RTA_PAD))
adfa85e4
ND
2361 return -EMSGSIZE;
2362
1da177e4
LT
2363 rtm->rtm_type = RTN_MULTICAST;
2364 return 1;
1da177e4
LT
2365}
2366
9a1b9496
DM
2367int ipmr_get_route(struct net *net, struct sk_buff *skb,
2368 __be32 saddr, __be32 daddr,
9f09eaea 2369 struct rtmsg *rtm, u32 portid)
1da177e4 2370{
1da177e4 2371 struct mfc_cache *cache;
9a1b9496
DM
2372 struct mr_table *mrt;
2373 int err;
1da177e4 2374
f0ad0860 2375 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
51456b29 2376 if (!mrt)
f0ad0860
PM
2377 return -ENOENT;
2378
a8c9486b 2379 rcu_read_lock();
9a1b9496 2380 cache = ipmr_cache_find(mrt, saddr, daddr);
51456b29 2381 if (!cache && skb->dev) {
660b26dc 2382 int vif = ipmr_find_vif(mrt, skb->dev);
1da177e4 2383
660b26dc
ND
2384 if (vif >= 0)
2385 cache = ipmr_cache_find_any(mrt, daddr, vif);
2386 }
51456b29 2387 if (!cache) {
72287490 2388 struct sk_buff *skb2;
eddc9ec5 2389 struct iphdr *iph;
1da177e4 2390 struct net_device *dev;
a8cb16dd 2391 int vif = -1;
1da177e4 2392
1da177e4 2393 dev = skb->dev;
a8c9486b 2394 read_lock(&mrt_lock);
a8cb16dd
ED
2395 if (dev)
2396 vif = ipmr_find_vif(mrt, dev);
2397 if (vif < 0) {
1da177e4 2398 read_unlock(&mrt_lock);
a8c9486b 2399 rcu_read_unlock();
1da177e4
LT
2400 return -ENODEV;
2401 }
75e26751
GN
2402
2403 skb2 = skb_realloc_headroom(skb, sizeof(struct iphdr));
72287490
AK
2404 if (!skb2) {
2405 read_unlock(&mrt_lock);
a8c9486b 2406 rcu_read_unlock();
72287490
AK
2407 return -ENOMEM;
2408 }
2409
2cf75070 2410 NETLINK_CB(skb2).portid = portid;
e2d1bca7
ACM
2411 skb_push(skb2, sizeof(struct iphdr));
2412 skb_reset_network_header(skb2);
eddc9ec5
ACM
2413 iph = ip_hdr(skb2);
2414 iph->ihl = sizeof(struct iphdr) >> 2;
9a1b9496
DM
2415 iph->saddr = saddr;
2416 iph->daddr = daddr;
eddc9ec5 2417 iph->version = 0;
4b1f0d33 2418 err = ipmr_cache_unresolved(mrt, vif, skb2, dev);
1da177e4 2419 read_unlock(&mrt_lock);
a8c9486b 2420 rcu_read_unlock();
1da177e4
LT
2421 return err;
2422 }
2423
a8c9486b 2424 read_lock(&mrt_lock);
cb6a4e46 2425 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
1da177e4 2426 read_unlock(&mrt_lock);
a8c9486b 2427 rcu_read_unlock();
1da177e4
LT
2428 return err;
2429}
2430
cb6a4e46 2431static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
65886f43
ND
2432 u32 portid, u32 seq, struct mfc_cache *c, int cmd,
2433 int flags)
cb6a4e46
PM
2434{
2435 struct nlmsghdr *nlh;
2436 struct rtmsg *rtm;
1eb99af5 2437 int err;
cb6a4e46 2438
65886f43 2439 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
51456b29 2440 if (!nlh)
cb6a4e46
PM
2441 return -EMSGSIZE;
2442
2443 rtm = nlmsg_data(nlh);
2444 rtm->rtm_family = RTNL_FAMILY_IPMR;
2445 rtm->rtm_dst_len = 32;
2446 rtm->rtm_src_len = 32;
2447 rtm->rtm_tos = 0;
2448 rtm->rtm_table = mrt->id;
f3756b79
DM
2449 if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2450 goto nla_put_failure;
cb6a4e46
PM
2451 rtm->rtm_type = RTN_MULTICAST;
2452 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
9a68ac72
ND
2453 if (c->mfc_flags & MFC_STATIC)
2454 rtm->rtm_protocol = RTPROT_STATIC;
2455 else
2456 rtm->rtm_protocol = RTPROT_MROUTED;
cb6a4e46
PM
2457 rtm->rtm_flags = 0;
2458
930345ea
JB
2459 if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) ||
2460 nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp))
f3756b79 2461 goto nla_put_failure;
1eb99af5
ND
2462 err = __ipmr_fill_mroute(mrt, skb, c, rtm);
2463 /* do not break the dump if cache is unresolved */
2464 if (err < 0 && err != -ENOENT)
cb6a4e46
PM
2465 goto nla_put_failure;
2466
053c095a
JB
2467 nlmsg_end(skb, nlh);
2468 return 0;
cb6a4e46
PM
2469
2470nla_put_failure:
2471 nlmsg_cancel(skb, nlh);
2472 return -EMSGSIZE;
2473}
2474
8cd3ac9f
ND
2475static size_t mroute_msgsize(bool unresolved, int maxvif)
2476{
2477 size_t len =
2478 NLMSG_ALIGN(sizeof(struct rtmsg))
2479 + nla_total_size(4) /* RTA_TABLE */
2480 + nla_total_size(4) /* RTA_SRC */
2481 + nla_total_size(4) /* RTA_DST */
2482 ;
2483
2484 if (!unresolved)
2485 len = len
2486 + nla_total_size(4) /* RTA_IIF */
2487 + nla_total_size(0) /* RTA_MULTIPATH */
2488 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2489 /* RTA_MFC_STATS */
a9a08042 2490 + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
8cd3ac9f
ND
2491 ;
2492
2493 return len;
2494}
2495
2496static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
2497 int cmd)
2498{
2499 struct net *net = read_pnet(&mrt->net);
2500 struct sk_buff *skb;
2501 int err = -ENOBUFS;
2502
2503 skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif),
2504 GFP_ATOMIC);
51456b29 2505 if (!skb)
8cd3ac9f
ND
2506 goto errout;
2507
65886f43 2508 err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
8cd3ac9f
ND
2509 if (err < 0)
2510 goto errout;
2511
2512 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC);
2513 return;
2514
2515errout:
2516 kfree_skb(skb);
2517 if (err < 0)
2518 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err);
2519}
2520
5a645dd8
JG
2521static size_t igmpmsg_netlink_msgsize(size_t payloadlen)
2522{
2523 size_t len =
2524 NLMSG_ALIGN(sizeof(struct rtgenmsg))
2525 + nla_total_size(1) /* IPMRA_CREPORT_MSGTYPE */
2526 + nla_total_size(4) /* IPMRA_CREPORT_VIF_ID */
2527 + nla_total_size(4) /* IPMRA_CREPORT_SRC_ADDR */
2528 + nla_total_size(4) /* IPMRA_CREPORT_DST_ADDR */
2529 /* IPMRA_CREPORT_PKT */
2530 + nla_total_size(payloadlen)
2531 ;
2532
2533 return len;
2534}
2535
2536static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2537{
2538 struct net *net = read_pnet(&mrt->net);
2539 struct nlmsghdr *nlh;
2540 struct rtgenmsg *rtgenm;
2541 struct igmpmsg *msg;
2542 struct sk_buff *skb;
2543 struct nlattr *nla;
2544 int payloadlen;
2545
2546 payloadlen = pkt->len - sizeof(struct igmpmsg);
2547 msg = (struct igmpmsg *)skb_network_header(pkt);
2548
2549 skb = nlmsg_new(igmpmsg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2550 if (!skb)
2551 goto errout;
2552
2553 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2554 sizeof(struct rtgenmsg), 0);
2555 if (!nlh)
2556 goto errout;
2557 rtgenm = nlmsg_data(nlh);
2558 rtgenm->rtgen_family = RTNL_FAMILY_IPMR;
2559 if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) ||
2560 nla_put_u32(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif) ||
2561 nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR,
2562 msg->im_src.s_addr) ||
2563 nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR,
2564 msg->im_dst.s_addr))
2565 goto nla_put_failure;
2566
2567 nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen);
2568 if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg),
2569 nla_data(nla), payloadlen))
2570 goto nla_put_failure;
2571
2572 nlmsg_end(skb, nlh);
2573
2574 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC);
2575 return;
2576
2577nla_put_failure:
2578 nlmsg_cancel(skb, nlh);
2579errout:
2580 kfree_skb(skb);
2581 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS);
2582}
2583
4f75ba69
DS
2584static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2585 struct netlink_ext_ack *extack)
2586{
2587 struct net *net = sock_net(in_skb->sk);
2588 struct nlattr *tb[RTA_MAX + 1];
2589 struct sk_buff *skb = NULL;
2590 struct mfc_cache *cache;
2591 struct mr_table *mrt;
2592 struct rtmsg *rtm;
2593 __be32 src, grp;
2594 u32 tableid;
2595 int err;
2596
2597 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
2598 rtm_ipv4_policy, extack);
2599 if (err < 0)
2600 goto errout;
2601
2602 rtm = nlmsg_data(nlh);
2603
2604 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
2605 grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
2606 tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
2607
2608 mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT);
2e3d232e
DC
2609 if (!mrt) {
2610 err = -ENOENT;
4f75ba69
DS
2611 goto errout_free;
2612 }
2613
2614 /* entries are added/deleted only under RTNL */
2615 rcu_read_lock();
2616 cache = ipmr_cache_find(mrt, src, grp);
2617 rcu_read_unlock();
2618 if (!cache) {
2619 err = -ENOENT;
2620 goto errout_free;
2621 }
2622
2623 skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL);
2624 if (!skb) {
2625 err = -ENOBUFS;
2626 goto errout_free;
2627 }
2628
2629 err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
2630 nlh->nlmsg_seq, cache,
2631 RTM_NEWROUTE, 0);
2632 if (err < 0)
2633 goto errout_free;
2634
2635 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2636
2637errout:
2638 return err;
2639
2640errout_free:
2641 kfree_skb(skb);
2642 goto errout;
2643}
2644
cb6a4e46
PM
2645static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2646{
2647 struct net *net = sock_net(skb->sk);
2648 struct mr_table *mrt;
2649 struct mfc_cache *mfc;
2650 unsigned int t = 0, s_t;
cb6a4e46
PM
2651 unsigned int e = 0, s_e;
2652
2653 s_t = cb->args[0];
8fb472c0 2654 s_e = cb->args[1];
cb6a4e46 2655
a8c9486b 2656 rcu_read_lock();
cb6a4e46
PM
2657 ipmr_for_each_table(mrt, net) {
2658 if (t < s_t)
2659 goto next_table;
8fb472c0
NA
2660 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
2661 if (e < s_e)
2662 goto next_entry;
2663 if (ipmr_fill_mroute(mrt, skb,
2664 NETLINK_CB(cb->skb).portid,
2665 cb->nlh->nlmsg_seq,
2666 mfc, RTM_NEWROUTE,
2667 NLM_F_MULTI) < 0)
2668 goto done;
cb6a4e46 2669next_entry:
8fb472c0 2670 e++;
cb6a4e46 2671 }
8fb472c0 2672
1eb99af5
ND
2673 spin_lock_bh(&mfc_unres_lock);
2674 list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
2675 if (e < s_e)
2676 goto next_entry2;
2677 if (ipmr_fill_mroute(mrt, skb,
2678 NETLINK_CB(cb->skb).portid,
2679 cb->nlh->nlmsg_seq,
65886f43
ND
2680 mfc, RTM_NEWROUTE,
2681 NLM_F_MULTI) < 0) {
1eb99af5
ND
2682 spin_unlock_bh(&mfc_unres_lock);
2683 goto done;
2684 }
2685next_entry2:
2686 e++;
2687 }
2688 spin_unlock_bh(&mfc_unres_lock);
8fb472c0
NA
2689 e = 0;
2690 s_e = 0;
cb6a4e46
PM
2691next_table:
2692 t++;
2693 }
2694done:
a8c9486b 2695 rcu_read_unlock();
cb6a4e46 2696
8fb472c0 2697 cb->args[1] = e;
cb6a4e46
PM
2698 cb->args[0] = t;
2699
2700 return skb->len;
2701}
2702
ccbb0aa6
NA
2703static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
2704 [RTA_SRC] = { .type = NLA_U32 },
2705 [RTA_DST] = { .type = NLA_U32 },
2706 [RTA_IIF] = { .type = NLA_U32 },
2707 [RTA_TABLE] = { .type = NLA_U32 },
2708 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2709};
2710
2711static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol)
2712{
2713 switch (rtm_protocol) {
2714 case RTPROT_STATIC:
2715 case RTPROT_MROUTED:
2716 return true;
2717 }
2718 return false;
2719}
2720
2721static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc)
2722{
2723 struct rtnexthop *rtnh = nla_data(nla);
2724 int remaining = nla_len(nla), vifi = 0;
2725
2726 while (rtnh_ok(rtnh, remaining)) {
2727 mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops;
2728 if (++vifi == MAXVIFS)
2729 break;
2730 rtnh = rtnh_next(rtnh, &remaining);
2731 }
2732
2733 return remaining > 0 ? -EINVAL : vifi;
2734}
2735
2736/* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */
2737static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh,
2738 struct mfcctl *mfcc, int *mrtsock,
c21ef3e3
DA
2739 struct mr_table **mrtret,
2740 struct netlink_ext_ack *extack)
ccbb0aa6
NA
2741{
2742 struct net_device *dev = NULL;
2743 u32 tblid = RT_TABLE_DEFAULT;
2744 struct mr_table *mrt;
2745 struct nlattr *attr;
2746 struct rtmsg *rtm;
2747 int ret, rem;
2748
fceb6435 2749 ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy,
c21ef3e3 2750 extack);
ccbb0aa6
NA
2751 if (ret < 0)
2752 goto out;
2753 rtm = nlmsg_data(nlh);
2754
2755 ret = -EINVAL;
2756 if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 ||
2757 rtm->rtm_type != RTN_MULTICAST ||
2758 rtm->rtm_scope != RT_SCOPE_UNIVERSE ||
2759 !ipmr_rtm_validate_proto(rtm->rtm_protocol))
2760 goto out;
2761
2762 memset(mfcc, 0, sizeof(*mfcc));
2763 mfcc->mfcc_parent = -1;
2764 ret = 0;
2765 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) {
2766 switch (nla_type(attr)) {
2767 case RTA_SRC:
2768 mfcc->mfcc_origin.s_addr = nla_get_be32(attr);
2769 break;
2770 case RTA_DST:
2771 mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr);
2772 break;
2773 case RTA_IIF:
2774 dev = __dev_get_by_index(net, nla_get_u32(attr));
2775 if (!dev) {
2776 ret = -ENODEV;
2777 goto out;
2778 }
2779 break;
2780 case RTA_MULTIPATH:
2781 if (ipmr_nla_get_ttls(attr, mfcc) < 0) {
2782 ret = -EINVAL;
2783 goto out;
2784 }
2785 break;
2786 case RTA_PREFSRC:
2787 ret = 1;
2788 break;
2789 case RTA_TABLE:
2790 tblid = nla_get_u32(attr);
2791 break;
2792 }
2793 }
2794 mrt = ipmr_get_table(net, tblid);
2795 if (!mrt) {
2796 ret = -ENOENT;
2797 goto out;
2798 }
2799 *mrtret = mrt;
2800 *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0;
2801 if (dev)
2802 mfcc->mfcc_parent = ipmr_find_vif(mrt, dev);
2803
2804out:
2805 return ret;
2806}
2807
2808/* takes care of both newroute and delroute */
c21ef3e3
DA
2809static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh,
2810 struct netlink_ext_ack *extack)
ccbb0aa6
NA
2811{
2812 struct net *net = sock_net(skb->sk);
2813 int ret, mrtsock, parent;
2814 struct mr_table *tbl;
2815 struct mfcctl mfcc;
2816
2817 mrtsock = 0;
2818 tbl = NULL;
c21ef3e3 2819 ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack);
ccbb0aa6
NA
2820 if (ret < 0)
2821 return ret;
2822
2823 parent = ret ? mfcc.mfcc_parent : -1;
2824 if (nlh->nlmsg_type == RTM_NEWROUTE)
2825 return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent);
2826 else
2827 return ipmr_mfc_delete(tbl, &mfcc, parent);
2828}
2829
772c344d
NA
2830static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb)
2831{
2832 u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len);
2833
2834 if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) ||
2835 nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) ||
2836 nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM,
2837 mrt->mroute_reg_vif_num) ||
2838 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT,
2839 mrt->mroute_do_assert) ||
2840 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim))
2841 return false;
2842
2843 return true;
2844}
2845
2846static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb)
2847{
2848 struct nlattr *vif_nest;
2849 struct vif_device *vif;
2850
2851 /* if the VIF doesn't exist just continue */
2852 if (!VIF_EXISTS(mrt, vifid))
2853 return true;
2854
2855 vif = &mrt->vif_table[vifid];
2856 vif_nest = nla_nest_start(skb, IPMRA_VIF);
2857 if (!vif_nest)
2858 return false;
2859 if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) ||
2860 nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) ||
2861 nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) ||
2862 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in,
2863 IPMRA_VIFA_PAD) ||
2864 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, vif->bytes_out,
2865 IPMRA_VIFA_PAD) ||
2866 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, vif->pkt_in,
2867 IPMRA_VIFA_PAD) ||
2868 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, vif->pkt_out,
2869 IPMRA_VIFA_PAD) ||
2870 nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) ||
2871 nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) {
2872 nla_nest_cancel(skb, vif_nest);
2873 return false;
2874 }
2875 nla_nest_end(skb, vif_nest);
2876
2877 return true;
2878}
2879
2880static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb)
2881{
2882 struct net *net = sock_net(skb->sk);
2883 struct nlmsghdr *nlh = NULL;
2884 unsigned int t = 0, s_t;
2885 unsigned int e = 0, s_e;
2886 struct mr_table *mrt;
2887
2888 s_t = cb->args[0];
2889 s_e = cb->args[1];
2890
2891 ipmr_for_each_table(mrt, net) {
2892 struct nlattr *vifs, *af;
2893 struct ifinfomsg *hdr;
2894 u32 i;
2895
2896 if (t < s_t)
2897 goto skip_table;
2898 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
2899 cb->nlh->nlmsg_seq, RTM_NEWLINK,
2900 sizeof(*hdr), NLM_F_MULTI);
2901 if (!nlh)
2902 break;
2903
2904 hdr = nlmsg_data(nlh);
2905 memset(hdr, 0, sizeof(*hdr));
2906 hdr->ifi_family = RTNL_FAMILY_IPMR;
2907
2908 af = nla_nest_start(skb, IFLA_AF_SPEC);
2909 if (!af) {
2910 nlmsg_cancel(skb, nlh);
2911 goto out;
2912 }
2913
2914 if (!ipmr_fill_table(mrt, skb)) {
2915 nlmsg_cancel(skb, nlh);
2916 goto out;
2917 }
2918
2919 vifs = nla_nest_start(skb, IPMRA_TABLE_VIFS);
2920 if (!vifs) {
2921 nla_nest_end(skb, af);
2922 nlmsg_end(skb, nlh);
2923 goto out;
2924 }
2925 for (i = 0; i < mrt->maxvif; i++) {
2926 if (e < s_e)
2927 goto skip_entry;
2928 if (!ipmr_fill_vif(mrt, i, skb)) {
2929 nla_nest_end(skb, vifs);
2930 nla_nest_end(skb, af);
2931 nlmsg_end(skb, nlh);
2932 goto out;
2933 }
2934skip_entry:
2935 e++;
2936 }
2937 s_e = 0;
2938 e = 0;
2939 nla_nest_end(skb, vifs);
2940 nla_nest_end(skb, af);
2941 nlmsg_end(skb, nlh);
2942skip_table:
2943 t++;
2944 }
2945
2946out:
2947 cb->args[1] = e;
2948 cb->args[0] = t;
2949
2950 return skb->len;
2951}
2952
e905a9ed 2953#ifdef CONFIG_PROC_FS
7ef8f65d
NA
2954/* The /proc interfaces to multicast routing :
2955 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
1da177e4
LT
2956 */
2957struct ipmr_vif_iter {
f6bb4514 2958 struct seq_net_private p;
f0ad0860 2959 struct mr_table *mrt;
1da177e4
LT
2960 int ct;
2961};
2962
f6bb4514
BT
2963static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2964 struct ipmr_vif_iter *iter,
1da177e4
LT
2965 loff_t pos)
2966{
f0ad0860 2967 struct mr_table *mrt = iter->mrt;
0c12295a
PM
2968
2969 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2970 if (!VIF_EXISTS(mrt, iter->ct))
1da177e4 2971 continue;
e905a9ed 2972 if (pos-- == 0)
0c12295a 2973 return &mrt->vif_table[iter->ct];
1da177e4
LT
2974 }
2975 return NULL;
2976}
2977
2978static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
ba93ef74 2979 __acquires(mrt_lock)
1da177e4 2980{
f0ad0860 2981 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 2982 struct net *net = seq_file_net(seq);
f0ad0860
PM
2983 struct mr_table *mrt;
2984
2985 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
51456b29 2986 if (!mrt)
f0ad0860
PM
2987 return ERR_PTR(-ENOENT);
2988
2989 iter->mrt = mrt;
f6bb4514 2990
1da177e4 2991 read_lock(&mrt_lock);
f6bb4514 2992 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
2993 : SEQ_START_TOKEN;
2994}
2995
2996static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2997{
2998 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 2999 struct net *net = seq_file_net(seq);
f0ad0860 3000 struct mr_table *mrt = iter->mrt;
1da177e4
LT
3001
3002 ++*pos;
3003 if (v == SEQ_START_TOKEN)
f6bb4514 3004 return ipmr_vif_seq_idx(net, iter, 0);
e905a9ed 3005
0c12295a
PM
3006 while (++iter->ct < mrt->maxvif) {
3007 if (!VIF_EXISTS(mrt, iter->ct))
1da177e4 3008 continue;
0c12295a 3009 return &mrt->vif_table[iter->ct];
1da177e4
LT
3010 }
3011 return NULL;
3012}
3013
3014static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
ba93ef74 3015 __releases(mrt_lock)
1da177e4
LT
3016{
3017 read_unlock(&mrt_lock);
3018}
3019
3020static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
3021{
f0ad0860
PM
3022 struct ipmr_vif_iter *iter = seq->private;
3023 struct mr_table *mrt = iter->mrt;
f6bb4514 3024
1da177e4 3025 if (v == SEQ_START_TOKEN) {
e905a9ed 3026 seq_puts(seq,
1da177e4
LT
3027 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
3028 } else {
3029 const struct vif_device *vif = v;
3030 const char *name = vif->dev ? vif->dev->name : "none";
3031
3032 seq_printf(seq,
5b5e0928 3033 "%2zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
0c12295a 3034 vif - mrt->vif_table,
e905a9ed 3035 name, vif->bytes_in, vif->pkt_in,
1da177e4
LT
3036 vif->bytes_out, vif->pkt_out,
3037 vif->flags, vif->local, vif->remote);
3038 }
3039 return 0;
3040}
3041
f690808e 3042static const struct seq_operations ipmr_vif_seq_ops = {
1da177e4
LT
3043 .start = ipmr_vif_seq_start,
3044 .next = ipmr_vif_seq_next,
3045 .stop = ipmr_vif_seq_stop,
3046 .show = ipmr_vif_seq_show,
3047};
3048
3049static int ipmr_vif_open(struct inode *inode, struct file *file)
3050{
f6bb4514
BT
3051 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
3052 sizeof(struct ipmr_vif_iter));
1da177e4
LT
3053}
3054
9a32144e 3055static const struct file_operations ipmr_vif_fops = {
1da177e4
LT
3056 .owner = THIS_MODULE,
3057 .open = ipmr_vif_open,
3058 .read = seq_read,
3059 .llseek = seq_lseek,
f6bb4514 3060 .release = seq_release_net,
1da177e4
LT
3061};
3062
3063struct ipmr_mfc_iter {
f6bb4514 3064 struct seq_net_private p;
f0ad0860 3065 struct mr_table *mrt;
862465f2 3066 struct list_head *cache;
1da177e4
LT
3067};
3068
f6bb4514
BT
3069static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
3070 struct ipmr_mfc_iter *it, loff_t pos)
1da177e4 3071{
f0ad0860 3072 struct mr_table *mrt = it->mrt;
1da177e4
LT
3073 struct mfc_cache *mfc;
3074
a8c9486b 3075 rcu_read_lock();
8fb472c0
NA
3076 it->cache = &mrt->mfc_cache_list;
3077 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
3078 if (pos-- == 0)
3079 return mfc;
a8c9486b 3080 rcu_read_unlock();
1da177e4 3081
1da177e4 3082 spin_lock_bh(&mfc_unres_lock);
0c12295a 3083 it->cache = &mrt->mfc_unres_queue;
862465f2 3084 list_for_each_entry(mfc, it->cache, list)
e258beb2 3085 if (pos-- == 0)
1da177e4
LT
3086 return mfc;
3087 spin_unlock_bh(&mfc_unres_lock);
3088
3089 it->cache = NULL;
3090 return NULL;
3091}
3092
3093
3094static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
3095{
3096 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 3097 struct net *net = seq_file_net(seq);
f0ad0860 3098 struct mr_table *mrt;
f6bb4514 3099
f0ad0860 3100 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
51456b29 3101 if (!mrt)
f0ad0860 3102 return ERR_PTR(-ENOENT);
f6bb4514 3103
f0ad0860 3104 it->mrt = mrt;
1da177e4 3105 it->cache = NULL;
f6bb4514 3106 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
3107 : SEQ_START_TOKEN;
3108}
3109
3110static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3111{
1da177e4 3112 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 3113 struct net *net = seq_file_net(seq);
f0ad0860 3114 struct mr_table *mrt = it->mrt;
8fb472c0 3115 struct mfc_cache *mfc = v;
1da177e4
LT
3116
3117 ++*pos;
3118
3119 if (v == SEQ_START_TOKEN)
f6bb4514 3120 return ipmr_mfc_seq_idx(net, seq->private, 0);
1da177e4 3121
862465f2
PM
3122 if (mfc->list.next != it->cache)
3123 return list_entry(mfc->list.next, struct mfc_cache, list);
e905a9ed 3124
0c12295a 3125 if (it->cache == &mrt->mfc_unres_queue)
1da177e4
LT
3126 goto end_of_list;
3127
1da177e4 3128 /* exhausted cache_array, show unresolved */
a8c9486b 3129 rcu_read_unlock();
0c12295a 3130 it->cache = &mrt->mfc_unres_queue;
e905a9ed 3131
1da177e4 3132 spin_lock_bh(&mfc_unres_lock);
862465f2
PM
3133 if (!list_empty(it->cache))
3134 return list_first_entry(it->cache, struct mfc_cache, list);
1da177e4 3135
a8cb16dd 3136end_of_list:
1da177e4
LT
3137 spin_unlock_bh(&mfc_unres_lock);
3138 it->cache = NULL;
3139
3140 return NULL;
3141}
3142
3143static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
3144{
3145 struct ipmr_mfc_iter *it = seq->private;
f0ad0860 3146 struct mr_table *mrt = it->mrt;
1da177e4 3147
0c12295a 3148 if (it->cache == &mrt->mfc_unres_queue)
1da177e4 3149 spin_unlock_bh(&mfc_unres_lock);
8fb472c0 3150 else if (it->cache == &mrt->mfc_cache_list)
a8c9486b 3151 rcu_read_unlock();
1da177e4
LT
3152}
3153
3154static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
3155{
3156 int n;
3157
3158 if (v == SEQ_START_TOKEN) {
e905a9ed 3159 seq_puts(seq,
1da177e4
LT
3160 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
3161 } else {
3162 const struct mfc_cache *mfc = v;
3163 const struct ipmr_mfc_iter *it = seq->private;
f0ad0860 3164 const struct mr_table *mrt = it->mrt;
e905a9ed 3165
0eae88f3
ED
3166 seq_printf(seq, "%08X %08X %-3hd",
3167 (__force u32) mfc->mfc_mcastgrp,
3168 (__force u32) mfc->mfc_origin,
1ea472e2 3169 mfc->mfc_parent);
1da177e4 3170
0c12295a 3171 if (it->cache != &mrt->mfc_unres_queue) {
1ea472e2
BT
3172 seq_printf(seq, " %8lu %8lu %8lu",
3173 mfc->mfc_un.res.pkt,
3174 mfc->mfc_un.res.bytes,
3175 mfc->mfc_un.res.wrong_if);
132adf54 3176 for (n = mfc->mfc_un.res.minvif;
a8cb16dd 3177 n < mfc->mfc_un.res.maxvif; n++) {
0c12295a 3178 if (VIF_EXISTS(mrt, n) &&
cf958ae3
BT
3179 mfc->mfc_un.res.ttls[n] < 255)
3180 seq_printf(seq,
e905a9ed 3181 " %2d:%-3d",
1da177e4
LT
3182 n, mfc->mfc_un.res.ttls[n]);
3183 }
1ea472e2
BT
3184 } else {
3185 /* unresolved mfc_caches don't contain
3186 * pkt, bytes and wrong_if values
3187 */
3188 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1da177e4
LT
3189 }
3190 seq_putc(seq, '\n');
3191 }
3192 return 0;
3193}
3194
f690808e 3195static const struct seq_operations ipmr_mfc_seq_ops = {
1da177e4
LT
3196 .start = ipmr_mfc_seq_start,
3197 .next = ipmr_mfc_seq_next,
3198 .stop = ipmr_mfc_seq_stop,
3199 .show = ipmr_mfc_seq_show,
3200};
3201
3202static int ipmr_mfc_open(struct inode *inode, struct file *file)
3203{
f6bb4514
BT
3204 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
3205 sizeof(struct ipmr_mfc_iter));
1da177e4
LT
3206}
3207
9a32144e 3208static const struct file_operations ipmr_mfc_fops = {
1da177e4
LT
3209 .owner = THIS_MODULE,
3210 .open = ipmr_mfc_open,
3211 .read = seq_read,
3212 .llseek = seq_lseek,
f6bb4514 3213 .release = seq_release_net,
1da177e4 3214};
e905a9ed 3215#endif
1da177e4
LT
3216
3217#ifdef CONFIG_IP_PIMSM_V2
32613090 3218static const struct net_protocol pim_protocol = {
1da177e4 3219 .handler = pim_rcv,
403dbb97 3220 .netns_ok = 1,
1da177e4
LT
3221};
3222#endif
3223
4d65b948
YG
3224static unsigned int ipmr_seq_read(struct net *net)
3225{
3226 ASSERT_RTNL();
3227
3228 return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net);
3229}
3230
3231static int ipmr_dump(struct net *net, struct notifier_block *nb)
3232{
3233 struct mr_table *mrt;
3234 int err;
3235
3236 err = ipmr_rules_dump(net, nb);
3237 if (err)
3238 return err;
3239
3240 ipmr_for_each_table(mrt, net) {
3241 struct vif_device *v = &mrt->vif_table[0];
3242 struct mfc_cache *mfc;
3243 int vifi;
3244
3245 /* Notifiy on table VIF entries */
3246 read_lock(&mrt_lock);
3247 for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
3248 if (!v->dev)
3249 continue;
3250
3251 call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD,
3252 v, vifi, mrt->id);
3253 }
3254 read_unlock(&mrt_lock);
3255
3256 /* Notify on table MFC entries */
3257 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
3258 call_ipmr_mfc_entry_notifier(nb, net,
3259 FIB_EVENT_ENTRY_ADD, mfc,
3260 mrt->id);
3261 }
3262
3263 return 0;
3264}
3265
3266static const struct fib_notifier_ops ipmr_notifier_ops_template = {
3267 .family = RTNL_FAMILY_IPMR,
3268 .fib_seq_read = ipmr_seq_read,
3269 .fib_dump = ipmr_dump,
3270 .owner = THIS_MODULE,
3271};
3272
ef739d8a 3273static int __net_init ipmr_notifier_init(struct net *net)
4d65b948
YG
3274{
3275 struct fib_notifier_ops *ops;
3276
3277 net->ipv4.ipmr_seq = 0;
3278
3279 ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net);
3280 if (IS_ERR(ops))
3281 return PTR_ERR(ops);
3282 net->ipv4.ipmr_notifier_ops = ops;
3283
3284 return 0;
3285}
3286
3287static void __net_exit ipmr_notifier_exit(struct net *net)
3288{
3289 fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops);
3290 net->ipv4.ipmr_notifier_ops = NULL;
3291}
3292
7ef8f65d 3293/* Setup for IP multicast routing */
cf958ae3
BT
3294static int __net_init ipmr_net_init(struct net *net)
3295{
f0ad0860 3296 int err;
cf958ae3 3297
4d65b948
YG
3298 err = ipmr_notifier_init(net);
3299 if (err)
3300 goto ipmr_notifier_fail;
3301
f0ad0860
PM
3302 err = ipmr_rules_init(net);
3303 if (err < 0)
4d65b948 3304 goto ipmr_rules_fail;
f6bb4514
BT
3305
3306#ifdef CONFIG_PROC_FS
3307 err = -ENOMEM;
d4beaa66 3308 if (!proc_create("ip_mr_vif", 0, net->proc_net, &ipmr_vif_fops))
f6bb4514 3309 goto proc_vif_fail;
d4beaa66 3310 if (!proc_create("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_fops))
f6bb4514
BT
3311 goto proc_cache_fail;
3312#endif
2bb8b26c
BT
3313 return 0;
3314
f6bb4514
BT
3315#ifdef CONFIG_PROC_FS
3316proc_cache_fail:
ece31ffd 3317 remove_proc_entry("ip_mr_vif", net->proc_net);
f6bb4514 3318proc_vif_fail:
f0ad0860 3319 ipmr_rules_exit(net);
f6bb4514 3320#endif
4d65b948
YG
3321ipmr_rules_fail:
3322 ipmr_notifier_exit(net);
3323ipmr_notifier_fail:
cf958ae3
BT
3324 return err;
3325}
3326
3327static void __net_exit ipmr_net_exit(struct net *net)
3328{
f6bb4514 3329#ifdef CONFIG_PROC_FS
ece31ffd
G
3330 remove_proc_entry("ip_mr_cache", net->proc_net);
3331 remove_proc_entry("ip_mr_vif", net->proc_net);
f6bb4514 3332#endif
4d65b948 3333 ipmr_notifier_exit(net);
f0ad0860 3334 ipmr_rules_exit(net);
cf958ae3
BT
3335}
3336
3337static struct pernet_operations ipmr_net_ops = {
3338 .init = ipmr_net_init,
3339 .exit = ipmr_net_exit,
3340};
e905a9ed 3341
03d2f897 3342int __init ip_mr_init(void)
1da177e4 3343{
03d2f897
WC
3344 int err;
3345
1da177e4
LT
3346 mrt_cachep = kmem_cache_create("ip_mrt_cache",
3347 sizeof(struct mfc_cache),
a8c9486b 3348 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
20c2df83 3349 NULL);
03d2f897 3350
cf958ae3
BT
3351 err = register_pernet_subsys(&ipmr_net_ops);
3352 if (err)
3353 goto reg_pernet_fail;
3354
03d2f897
WC
3355 err = register_netdevice_notifier(&ip_mr_notifier);
3356 if (err)
3357 goto reg_notif_fail;
403dbb97
TG
3358#ifdef CONFIG_IP_PIMSM_V2
3359 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
058bd4d2 3360 pr_err("%s: can't add PIM protocol\n", __func__);
403dbb97
TG
3361 err = -EAGAIN;
3362 goto add_proto_fail;
3363 }
3364#endif
c7ac8679 3365 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE,
b97bac64 3366 ipmr_rtm_getroute, ipmr_rtm_dumproute, 0);
ccbb0aa6 3367 rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE,
b97bac64 3368 ipmr_rtm_route, NULL, 0);
ccbb0aa6 3369 rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE,
b97bac64 3370 ipmr_rtm_route, NULL, 0);
772c344d
NA
3371
3372 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK,
b97bac64 3373 NULL, ipmr_rtm_dumplink, 0);
03d2f897 3374 return 0;
f6bb4514 3375
403dbb97
TG
3376#ifdef CONFIG_IP_PIMSM_V2
3377add_proto_fail:
3378 unregister_netdevice_notifier(&ip_mr_notifier);
3379#endif
c3e38896 3380reg_notif_fail:
cf958ae3
BT
3381 unregister_pernet_subsys(&ipmr_net_ops);
3382reg_pernet_fail:
c3e38896 3383 kmem_cache_destroy(mrt_cachep);
03d2f897 3384 return err;
1da177e4 3385}