]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/ipv4/ipmr.c
ipmr: RCU conversion of mroute_sk
[mirror_ubuntu-jammy-kernel.git] / net / ipv4 / ipmr.c
CommitLineData
1da177e4
LT
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
113aa838 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
1da177e4
LT
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
f77f13e2 25 * Relax this requirement to work with older peers.
1da177e4
LT
26 *
27 */
28
1da177e4
LT
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
4fc268d2 32#include <linux/capability.h>
1da177e4
LT
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
46f25dff 49#include <linux/if_ether.h>
5a0e3ad6 50#include <linux/slab.h>
457c4cbc 51#include <net/net_namespace.h>
1da177e4
LT
52#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
14c85021 55#include <net/route.h>
1da177e4
LT
56#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
dc5fc579 65#include <net/netlink.h>
f0ad0860 66#include <net/fib_rules.h>
1da177e4
LT
67
68#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69#define CONFIG_IP_PIMSM 1
70#endif
71
0c12295a 72struct mr_table {
f0ad0860 73 struct list_head list;
8de53dfb
PM
74#ifdef CONFIG_NET_NS
75 struct net *net;
76#endif
f0ad0860 77 u32 id;
4c968709 78 struct sock __rcu *mroute_sk;
0c12295a
PM
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
83 int maxvif;
84 atomic_t cache_resolve_queue_len;
85 int mroute_do_assert;
86 int mroute_do_pim;
87#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
89#endif
90};
91
f0ad0860
PM
92struct ipmr_rule {
93 struct fib_rule common;
94};
95
96struct ipmr_result {
97 struct mr_table *mrt;
98};
99
1da177e4
LT
100/* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock.
102 */
103
104static DEFINE_RWLOCK(mrt_lock);
105
106/*
107 * Multicast router control variables
108 */
109
0c12295a 110#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
1da177e4
LT
111
112/* Special spinlock for queue of unresolved entries */
113static DEFINE_SPINLOCK(mfc_unres_lock);
114
115/* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock.
119
120 In this case data path is free of exclusive locks at all.
121 */
122
e18b890b 123static struct kmem_cache *mrt_cachep __read_mostly;
1da177e4 124
f0ad0860 125static struct mr_table *ipmr_new_table(struct net *net, u32 id);
0c12295a
PM
126static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache,
128 int local);
129static int ipmr_cache_report(struct mr_table *mrt,
4feb88e5 130 struct sk_buff *pkt, vifi_t vifi, int assert);
cb6a4e46
PM
131static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132 struct mfc_cache *c, struct rtmsg *rtm);
f0ad0860
PM
133static void ipmr_expire_process(unsigned long arg);
134
135#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136#define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
138
139static struct mr_table *ipmr_get_table(struct net *net, u32 id)
140{
141 struct mr_table *mrt;
142
143 ipmr_for_each_table(mrt, net) {
144 if (mrt->id == id)
145 return mrt;
146 }
147 return NULL;
148}
149
150static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt)
152{
153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, };
155 int err;
156
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158 if (err < 0)
159 return err;
160 *mrt = res.mrt;
161 return 0;
162}
163
164static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg)
166{
167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt;
1da177e4 169
f0ad0860
PM
170 switch (rule->action) {
171 case FR_ACT_TO_TBL:
172 break;
173 case FR_ACT_UNREACHABLE:
174 return -ENETUNREACH;
175 case FR_ACT_PROHIBIT:
176 return -EACCES;
177 case FR_ACT_BLACKHOLE:
178 default:
179 return -EINVAL;
180 }
181
182 mrt = ipmr_get_table(rule->fr_net, rule->table);
183 if (mrt == NULL)
184 return -EAGAIN;
185 res->mrt = mrt;
186 return 0;
187}
188
189static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
190{
191 return 1;
192}
193
194static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195 FRA_GENERIC_POLICY,
196};
197
198static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb)
200{
201 return 0;
202}
203
204static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 struct nlattr **tb)
206{
207 return 1;
208}
209
210static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh)
212{
213 frh->dst_len = 0;
214 frh->src_len = 0;
215 frh->tos = 0;
216 return 0;
217}
218
3d0c9c4e 219static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
25239cee 220 .family = RTNL_FAMILY_IPMR,
f0ad0860
PM
221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE,
232};
233
234static int __net_init ipmr_rules_init(struct net *net)
235{
236 struct fib_rules_ops *ops;
237 struct mr_table *mrt;
238 int err;
239
240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
241 if (IS_ERR(ops))
242 return PTR_ERR(ops);
243
244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
245
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247 if (mrt == NULL) {
248 err = -ENOMEM;
249 goto err1;
250 }
251
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253 if (err < 0)
254 goto err2;
255
256 net->ipv4.mr_rules_ops = ops;
257 return 0;
258
259err2:
260 kfree(mrt);
261err1:
262 fib_rules_unregister(ops);
263 return err;
264}
265
266static void __net_exit ipmr_rules_exit(struct net *net)
267{
268 struct mr_table *mrt, *next;
269
035320d5
ED
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
271 list_del(&mrt->list);
f0ad0860 272 kfree(mrt);
035320d5 273 }
f0ad0860
PM
274 fib_rules_unregister(net->ipv4.mr_rules_ops);
275}
276#else
277#define ipmr_for_each_table(mrt, net) \
278 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
279
280static struct mr_table *ipmr_get_table(struct net *net, u32 id)
281{
282 return net->ipv4.mrt;
283}
284
285static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
286 struct mr_table **mrt)
287{
288 *mrt = net->ipv4.mrt;
289 return 0;
290}
291
292static int __net_init ipmr_rules_init(struct net *net)
293{
294 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
295 return net->ipv4.mrt ? 0 : -ENOMEM;
296}
297
298static void __net_exit ipmr_rules_exit(struct net *net)
299{
300 kfree(net->ipv4.mrt);
301}
302#endif
303
304static struct mr_table *ipmr_new_table(struct net *net, u32 id)
305{
306 struct mr_table *mrt;
307 unsigned int i;
1da177e4 308
f0ad0860
PM
309 mrt = ipmr_get_table(net, id);
310 if (mrt != NULL)
311 return mrt;
312
313 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
314 if (mrt == NULL)
315 return NULL;
8de53dfb 316 write_pnet(&mrt->net, net);
f0ad0860
PM
317 mrt->id = id;
318
319 /* Forwarding cache */
320 for (i = 0; i < MFC_LINES; i++)
321 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
322
323 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
324
325 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
326 (unsigned long)mrt);
327
328#ifdef CONFIG_IP_PIMSM
329 mrt->mroute_reg_vif_num = -1;
330#endif
331#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
332 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
333#endif
334 return mrt;
335}
1da177e4
LT
336
337/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
338
d607032d
WC
339static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
340{
4feb88e5
BT
341 struct net *net = dev_net(dev);
342
d607032d
WC
343 dev_close(dev);
344
4feb88e5 345 dev = __dev_get_by_name(net, "tunl0");
d607032d 346 if (dev) {
5bc3eb7e 347 const struct net_device_ops *ops = dev->netdev_ops;
d607032d 348 struct ifreq ifr;
d607032d
WC
349 struct ip_tunnel_parm p;
350
351 memset(&p, 0, sizeof(p));
352 p.iph.daddr = v->vifc_rmt_addr.s_addr;
353 p.iph.saddr = v->vifc_lcl_addr.s_addr;
354 p.iph.version = 4;
355 p.iph.ihl = 5;
356 p.iph.protocol = IPPROTO_IPIP;
357 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
358 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
359
5bc3eb7e
SH
360 if (ops->ndo_do_ioctl) {
361 mm_segment_t oldfs = get_fs();
362
363 set_fs(KERNEL_DS);
364 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
365 set_fs(oldfs);
366 }
d607032d
WC
367 }
368}
369
1da177e4 370static
4feb88e5 371struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
1da177e4
LT
372{
373 struct net_device *dev;
374
4feb88e5 375 dev = __dev_get_by_name(net, "tunl0");
1da177e4
LT
376
377 if (dev) {
5bc3eb7e 378 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
379 int err;
380 struct ifreq ifr;
1da177e4
LT
381 struct ip_tunnel_parm p;
382 struct in_device *in_dev;
383
384 memset(&p, 0, sizeof(p));
385 p.iph.daddr = v->vifc_rmt_addr.s_addr;
386 p.iph.saddr = v->vifc_lcl_addr.s_addr;
387 p.iph.version = 4;
388 p.iph.ihl = 5;
389 p.iph.protocol = IPPROTO_IPIP;
390 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ba93ef74 391 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1da177e4 392
5bc3eb7e
SH
393 if (ops->ndo_do_ioctl) {
394 mm_segment_t oldfs = get_fs();
395
396 set_fs(KERNEL_DS);
397 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
398 set_fs(oldfs);
399 } else
400 err = -EOPNOTSUPP;
1da177e4
LT
401
402 dev = NULL;
403
4feb88e5
BT
404 if (err == 0 &&
405 (dev = __dev_get_by_name(net, p.name)) != NULL) {
1da177e4
LT
406 dev->flags |= IFF_MULTICAST;
407
e5ed6399 408 in_dev = __in_dev_get_rtnl(dev);
71e27da9 409 if (in_dev == NULL)
1da177e4 410 goto failure;
71e27da9
HX
411
412 ipv4_devconf_setall(in_dev);
413 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
1da177e4
LT
414
415 if (dev_open(dev))
416 goto failure;
7dc00c82 417 dev_hold(dev);
1da177e4
LT
418 }
419 }
420 return dev;
421
422failure:
423 /* allow the register to be completed before unregistering. */
424 rtnl_unlock();
425 rtnl_lock();
426
427 unregister_netdevice(dev);
428 return NULL;
429}
430
431#ifdef CONFIG_IP_PIMSM
432
6fef4c0c 433static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 434{
4feb88e5 435 struct net *net = dev_net(dev);
f0ad0860
PM
436 struct mr_table *mrt;
437 struct flowi fl = {
438 .oif = dev->ifindex,
439 .iif = skb->skb_iif,
440 .mark = skb->mark,
441 };
442 int err;
443
444 err = ipmr_fib_lookup(net, &fl, &mrt);
e40dbc51
BG
445 if (err < 0) {
446 kfree_skb(skb);
f0ad0860 447 return err;
e40dbc51 448 }
4feb88e5 449
1da177e4 450 read_lock(&mrt_lock);
cf3677ae
PE
451 dev->stats.tx_bytes += skb->len;
452 dev->stats.tx_packets++;
0c12295a 453 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
1da177e4
LT
454 read_unlock(&mrt_lock);
455 kfree_skb(skb);
6ed10654 456 return NETDEV_TX_OK;
1da177e4
LT
457}
458
007c3838
SH
459static const struct net_device_ops reg_vif_netdev_ops = {
460 .ndo_start_xmit = reg_vif_xmit,
461};
462
1da177e4
LT
463static void reg_vif_setup(struct net_device *dev)
464{
465 dev->type = ARPHRD_PIMREG;
46f25dff 466 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
1da177e4 467 dev->flags = IFF_NOARP;
007c3838 468 dev->netdev_ops = &reg_vif_netdev_ops,
1da177e4 469 dev->destructor = free_netdev;
403dbb97 470 dev->features |= NETIF_F_NETNS_LOCAL;
1da177e4
LT
471}
472
f0ad0860 473static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
1da177e4
LT
474{
475 struct net_device *dev;
476 struct in_device *in_dev;
f0ad0860 477 char name[IFNAMSIZ];
1da177e4 478
f0ad0860
PM
479 if (mrt->id == RT_TABLE_DEFAULT)
480 sprintf(name, "pimreg");
481 else
482 sprintf(name, "pimreg%u", mrt->id);
1da177e4 483
f0ad0860 484 dev = alloc_netdev(0, name, reg_vif_setup);
1da177e4
LT
485
486 if (dev == NULL)
487 return NULL;
488
403dbb97
TG
489 dev_net_set(dev, net);
490
1da177e4
LT
491 if (register_netdevice(dev)) {
492 free_netdev(dev);
493 return NULL;
494 }
495 dev->iflink = 0;
496
71e27da9
HX
497 rcu_read_lock();
498 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
499 rcu_read_unlock();
1da177e4 500 goto failure;
71e27da9 501 }
1da177e4 502
71e27da9
HX
503 ipv4_devconf_setall(in_dev);
504 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
505 rcu_read_unlock();
1da177e4
LT
506
507 if (dev_open(dev))
508 goto failure;
509
7dc00c82
WC
510 dev_hold(dev);
511
1da177e4
LT
512 return dev;
513
514failure:
515 /* allow the register to be completed before unregistering. */
516 rtnl_unlock();
517 rtnl_lock();
518
519 unregister_netdevice(dev);
520 return NULL;
521}
522#endif
523
524/*
525 * Delete a VIF entry
7dc00c82 526 * @notify: Set to 1, if the caller is a notifier_call
1da177e4 527 */
e905a9ed 528
0c12295a 529static int vif_delete(struct mr_table *mrt, int vifi, int notify,
d17fa6fa 530 struct list_head *head)
1da177e4
LT
531{
532 struct vif_device *v;
533 struct net_device *dev;
534 struct in_device *in_dev;
535
0c12295a 536 if (vifi < 0 || vifi >= mrt->maxvif)
1da177e4
LT
537 return -EADDRNOTAVAIL;
538
0c12295a 539 v = &mrt->vif_table[vifi];
1da177e4
LT
540
541 write_lock_bh(&mrt_lock);
542 dev = v->dev;
543 v->dev = NULL;
544
545 if (!dev) {
546 write_unlock_bh(&mrt_lock);
547 return -EADDRNOTAVAIL;
548 }
549
550#ifdef CONFIG_IP_PIMSM
0c12295a
PM
551 if (vifi == mrt->mroute_reg_vif_num)
552 mrt->mroute_reg_vif_num = -1;
1da177e4
LT
553#endif
554
0c12295a 555 if (vifi+1 == mrt->maxvif) {
1da177e4
LT
556 int tmp;
557 for (tmp=vifi-1; tmp>=0; tmp--) {
0c12295a 558 if (VIF_EXISTS(mrt, tmp))
1da177e4
LT
559 break;
560 }
0c12295a 561 mrt->maxvif = tmp+1;
1da177e4
LT
562 }
563
564 write_unlock_bh(&mrt_lock);
565
566 dev_set_allmulti(dev, -1);
567
e5ed6399 568 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
42f811b8 569 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
1da177e4
LT
570 ip_rt_multicast_event(in_dev);
571 }
572
7dc00c82 573 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
d17fa6fa 574 unregister_netdevice_queue(dev, head);
1da177e4
LT
575
576 dev_put(dev);
577 return 0;
578}
579
5c0a66f5
BT
580static inline void ipmr_cache_free(struct mfc_cache *c)
581{
5c0a66f5
BT
582 kmem_cache_free(mrt_cachep, c);
583}
584
1da177e4
LT
585/* Destroy an unresolved cache entry, killing queued skbs
586 and reporting error to netlink readers.
587 */
588
0c12295a 589static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
1da177e4 590{
8de53dfb 591 struct net *net = read_pnet(&mrt->net);
1da177e4 592 struct sk_buff *skb;
9ef1d4c7 593 struct nlmsgerr *e;
1da177e4 594
0c12295a 595 atomic_dec(&mrt->cache_resolve_queue_len);
1da177e4 596
c354e124 597 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
eddc9ec5 598 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
599 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
600 nlh->nlmsg_type = NLMSG_ERROR;
601 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
602 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
603 e = NLMSG_DATA(nlh);
604 e->error = -ETIMEDOUT;
605 memset(&e->msg, 0, sizeof(e->msg));
2942e900 606
4feb88e5 607 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4
LT
608 } else
609 kfree_skb(skb);
610 }
611
5c0a66f5 612 ipmr_cache_free(c);
1da177e4
LT
613}
614
615
e258beb2 616/* Timer process for the unresolved queue. */
1da177e4 617
e258beb2 618static void ipmr_expire_process(unsigned long arg)
1da177e4 619{
0c12295a 620 struct mr_table *mrt = (struct mr_table *)arg;
1da177e4
LT
621 unsigned long now;
622 unsigned long expires;
862465f2 623 struct mfc_cache *c, *next;
1da177e4
LT
624
625 if (!spin_trylock(&mfc_unres_lock)) {
0c12295a 626 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
1da177e4
LT
627 return;
628 }
629
0c12295a 630 if (list_empty(&mrt->mfc_unres_queue))
1da177e4
LT
631 goto out;
632
633 now = jiffies;
634 expires = 10*HZ;
1da177e4 635
0c12295a 636 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1da177e4
LT
637 if (time_after(c->mfc_un.unres.expires, now)) {
638 unsigned long interval = c->mfc_un.unres.expires - now;
639 if (interval < expires)
640 expires = interval;
1da177e4
LT
641 continue;
642 }
643
862465f2 644 list_del(&c->list);
0c12295a 645 ipmr_destroy_unres(mrt, c);
1da177e4
LT
646 }
647
0c12295a
PM
648 if (!list_empty(&mrt->mfc_unres_queue))
649 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
1da177e4
LT
650
651out:
652 spin_unlock(&mfc_unres_lock);
653}
654
655/* Fill oifs list. It is called under write locked mrt_lock. */
656
0c12295a 657static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
d658f8a0 658 unsigned char *ttls)
1da177e4
LT
659{
660 int vifi;
661
662 cache->mfc_un.res.minvif = MAXVIFS;
663 cache->mfc_un.res.maxvif = 0;
664 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
665
0c12295a
PM
666 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
667 if (VIF_EXISTS(mrt, vifi) &&
cf958ae3 668 ttls[vifi] && ttls[vifi] < 255) {
1da177e4
LT
669 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
670 if (cache->mfc_un.res.minvif > vifi)
671 cache->mfc_un.res.minvif = vifi;
672 if (cache->mfc_un.res.maxvif <= vifi)
673 cache->mfc_un.res.maxvif = vifi + 1;
674 }
675 }
676}
677
0c12295a
PM
678static int vif_add(struct net *net, struct mr_table *mrt,
679 struct vifctl *vifc, int mrtsock)
1da177e4
LT
680{
681 int vifi = vifc->vifc_vifi;
0c12295a 682 struct vif_device *v = &mrt->vif_table[vifi];
1da177e4
LT
683 struct net_device *dev;
684 struct in_device *in_dev;
d607032d 685 int err;
1da177e4
LT
686
687 /* Is vif busy ? */
0c12295a 688 if (VIF_EXISTS(mrt, vifi))
1da177e4
LT
689 return -EADDRINUSE;
690
691 switch (vifc->vifc_flags) {
692#ifdef CONFIG_IP_PIMSM
693 case VIFF_REGISTER:
694 /*
695 * Special Purpose VIF in PIM
696 * All the packets will be sent to the daemon
697 */
0c12295a 698 if (mrt->mroute_reg_vif_num >= 0)
1da177e4 699 return -EADDRINUSE;
f0ad0860 700 dev = ipmr_reg_vif(net, mrt);
1da177e4
LT
701 if (!dev)
702 return -ENOBUFS;
d607032d
WC
703 err = dev_set_allmulti(dev, 1);
704 if (err) {
705 unregister_netdevice(dev);
7dc00c82 706 dev_put(dev);
d607032d
WC
707 return err;
708 }
1da177e4
LT
709 break;
710#endif
e905a9ed 711 case VIFF_TUNNEL:
4feb88e5 712 dev = ipmr_new_tunnel(net, vifc);
1da177e4
LT
713 if (!dev)
714 return -ENOBUFS;
d607032d
WC
715 err = dev_set_allmulti(dev, 1);
716 if (err) {
717 ipmr_del_tunnel(dev, vifc);
7dc00c82 718 dev_put(dev);
d607032d
WC
719 return err;
720 }
1da177e4 721 break;
ee5e81f0
I
722
723 case VIFF_USE_IFINDEX:
1da177e4 724 case 0:
ee5e81f0
I
725 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
726 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
95ae6b22 727 if (dev && __in_dev_get_rtnl(dev) == NULL) {
ee5e81f0
I
728 dev_put(dev);
729 return -EADDRNOTAVAIL;
730 }
731 } else
732 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
733
1da177e4
LT
734 if (!dev)
735 return -EADDRNOTAVAIL;
d607032d 736 err = dev_set_allmulti(dev, 1);
7dc00c82
WC
737 if (err) {
738 dev_put(dev);
d607032d 739 return err;
7dc00c82 740 }
1da177e4
LT
741 break;
742 default:
743 return -EINVAL;
744 }
745
d0490cfd
DC
746 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
747 dev_put(dev);
1da177e4 748 return -EADDRNOTAVAIL;
d0490cfd 749 }
42f811b8 750 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
1da177e4
LT
751 ip_rt_multicast_event(in_dev);
752
753 /*
754 * Fill in the VIF structures
755 */
c354e124
JK
756 v->rate_limit = vifc->vifc_rate_limit;
757 v->local = vifc->vifc_lcl_addr.s_addr;
758 v->remote = vifc->vifc_rmt_addr.s_addr;
759 v->flags = vifc->vifc_flags;
1da177e4
LT
760 if (!mrtsock)
761 v->flags |= VIFF_STATIC;
c354e124 762 v->threshold = vifc->vifc_threshold;
1da177e4
LT
763 v->bytes_in = 0;
764 v->bytes_out = 0;
765 v->pkt_in = 0;
766 v->pkt_out = 0;
767 v->link = dev->ifindex;
768 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
769 v->link = dev->iflink;
770
771 /* And finish update writing critical data */
772 write_lock_bh(&mrt_lock);
c354e124 773 v->dev = dev;
1da177e4
LT
774#ifdef CONFIG_IP_PIMSM
775 if (v->flags&VIFF_REGISTER)
0c12295a 776 mrt->mroute_reg_vif_num = vifi;
1da177e4 777#endif
0c12295a
PM
778 if (vifi+1 > mrt->maxvif)
779 mrt->maxvif = vifi+1;
1da177e4
LT
780 write_unlock_bh(&mrt_lock);
781 return 0;
782}
783
0c12295a 784static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
4feb88e5
BT
785 __be32 origin,
786 __be32 mcastgrp)
1da177e4 787{
c354e124 788 int line = MFC_HASH(mcastgrp, origin);
1da177e4
LT
789 struct mfc_cache *c;
790
0c12295a 791 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
862465f2
PM
792 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
793 return c;
1da177e4 794 }
862465f2 795 return NULL;
1da177e4
LT
796}
797
798/*
799 * Allocate a multicast cache entry
800 */
d658f8a0 801static struct mfc_cache *ipmr_cache_alloc(void)
1da177e4 802{
c354e124
JK
803 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
804 if (c == NULL)
1da177e4 805 return NULL;
1da177e4
LT
806 c->mfc_un.res.minvif = MAXVIFS;
807 return c;
808}
809
d658f8a0 810static struct mfc_cache *ipmr_cache_alloc_unres(void)
1da177e4 811{
c354e124
JK
812 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
813 if (c == NULL)
1da177e4 814 return NULL;
1da177e4
LT
815 skb_queue_head_init(&c->mfc_un.unres.unresolved);
816 c->mfc_un.unres.expires = jiffies + 10*HZ;
817 return c;
818}
819
820/*
821 * A cache entry has gone into a resolved state from queued
822 */
e905a9ed 823
0c12295a
PM
824static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
825 struct mfc_cache *uc, struct mfc_cache *c)
1da177e4
LT
826{
827 struct sk_buff *skb;
9ef1d4c7 828 struct nlmsgerr *e;
1da177e4
LT
829
830 /*
831 * Play the pending entries through our router
832 */
833
c354e124 834 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
eddc9ec5 835 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
836 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
837
cb6a4e46 838 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
27a884dc
ACM
839 nlh->nlmsg_len = (skb_tail_pointer(skb) -
840 (u8 *)nlh);
1da177e4
LT
841 } else {
842 nlh->nlmsg_type = NLMSG_ERROR;
843 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
844 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
845 e = NLMSG_DATA(nlh);
846 e->error = -EMSGSIZE;
847 memset(&e->msg, 0, sizeof(e->msg));
1da177e4 848 }
2942e900 849
d658f8a0 850 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4 851 } else
0c12295a 852 ip_mr_forward(net, mrt, skb, c, 0);
1da177e4
LT
853 }
854}
855
856/*
857 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
858 * expects the following bizarre scheme.
859 *
860 * Called under mrt_lock.
861 */
e905a9ed 862
0c12295a 863static int ipmr_cache_report(struct mr_table *mrt,
4feb88e5 864 struct sk_buff *pkt, vifi_t vifi, int assert)
1da177e4
LT
865{
866 struct sk_buff *skb;
c9bdd4b5 867 const int ihl = ip_hdrlen(pkt);
1da177e4
LT
868 struct igmphdr *igmp;
869 struct igmpmsg *msg;
4c968709 870 struct sock *mroute_sk;
1da177e4
LT
871 int ret;
872
873#ifdef CONFIG_IP_PIMSM
874 if (assert == IGMPMSG_WHOLEPKT)
875 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
876 else
877#endif
878 skb = alloc_skb(128, GFP_ATOMIC);
879
132adf54 880 if (!skb)
1da177e4
LT
881 return -ENOBUFS;
882
883#ifdef CONFIG_IP_PIMSM
884 if (assert == IGMPMSG_WHOLEPKT) {
885 /* Ugly, but we have no choice with this interface.
886 Duplicate old header, fix ihl, length etc.
887 And all this only to mangle msg->im_msgtype and
888 to set msg->im_mbz to "mbz" :-)
889 */
878c8145
ACM
890 skb_push(skb, sizeof(struct iphdr));
891 skb_reset_network_header(skb);
badff6d0 892 skb_reset_transport_header(skb);
0272ffc4 893 msg = (struct igmpmsg *)skb_network_header(skb);
d56f90a7 894 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1da177e4
LT
895 msg->im_msgtype = IGMPMSG_WHOLEPKT;
896 msg->im_mbz = 0;
0c12295a 897 msg->im_vif = mrt->mroute_reg_vif_num;
eddc9ec5
ACM
898 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
899 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
900 sizeof(struct iphdr));
e905a9ed 901 } else
1da177e4 902#endif
e905a9ed
YH
903 {
904
1da177e4
LT
905 /*
906 * Copy the IP header
907 */
908
27a884dc 909 skb->network_header = skb->tail;
ddc7b8e3 910 skb_put(skb, ihl);
27d7ff46 911 skb_copy_to_linear_data(skb, pkt->data, ihl);
eddc9ec5
ACM
912 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
913 msg = (struct igmpmsg *)skb_network_header(skb);
1da177e4 914 msg->im_vif = vifi;
adf30907 915 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1da177e4
LT
916
917 /*
918 * Add our header
919 */
920
c354e124 921 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
1da177e4
LT
922 igmp->type =
923 msg->im_msgtype = assert;
924 igmp->code = 0;
eddc9ec5 925 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
b0e380b1 926 skb->transport_header = skb->network_header;
e905a9ed 927 }
1da177e4 928
4c968709
ED
929 rcu_read_lock();
930 mroute_sk = rcu_dereference(mrt->mroute_sk);
931 if (mroute_sk == NULL) {
932 rcu_read_unlock();
1da177e4
LT
933 kfree_skb(skb);
934 return -EINVAL;
935 }
936
937 /*
938 * Deliver to mrouted
939 */
4c968709
ED
940 ret = sock_queue_rcv_skb(mroute_sk, skb);
941 rcu_read_unlock();
70a269e6 942 if (ret < 0) {
1da177e4
LT
943 if (net_ratelimit())
944 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
945 kfree_skb(skb);
946 }
947
948 return ret;
949}
950
951/*
952 * Queue a packet for resolution. It gets locked cache entry!
953 */
e905a9ed 954
1da177e4 955static int
0c12295a 956ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
1da177e4 957{
862465f2 958 bool found = false;
1da177e4
LT
959 int err;
960 struct mfc_cache *c;
eddc9ec5 961 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
962
963 spin_lock_bh(&mfc_unres_lock);
0c12295a 964 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
e258beb2 965 if (c->mfc_mcastgrp == iph->daddr &&
862465f2
PM
966 c->mfc_origin == iph->saddr) {
967 found = true;
1da177e4 968 break;
862465f2 969 }
1da177e4
LT
970 }
971
862465f2 972 if (!found) {
1da177e4
LT
973 /*
974 * Create a new entry if allowable
975 */
976
0c12295a 977 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
d658f8a0 978 (c = ipmr_cache_alloc_unres()) == NULL) {
1da177e4
LT
979 spin_unlock_bh(&mfc_unres_lock);
980
981 kfree_skb(skb);
982 return -ENOBUFS;
983 }
984
985 /*
986 * Fill in the new cache entry
987 */
eddc9ec5
ACM
988 c->mfc_parent = -1;
989 c->mfc_origin = iph->saddr;
990 c->mfc_mcastgrp = iph->daddr;
1da177e4
LT
991
992 /*
993 * Reflect first query at mrouted.
994 */
0c12295a 995 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
4feb88e5 996 if (err < 0) {
e905a9ed 997 /* If the report failed throw the cache entry
1da177e4
LT
998 out - Brad Parker
999 */
1000 spin_unlock_bh(&mfc_unres_lock);
1001
5c0a66f5 1002 ipmr_cache_free(c);
1da177e4
LT
1003 kfree_skb(skb);
1004 return err;
1005 }
1006
0c12295a
PM
1007 atomic_inc(&mrt->cache_resolve_queue_len);
1008 list_add(&c->list, &mrt->mfc_unres_queue);
1da177e4 1009
278554bd
DM
1010 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1011 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1da177e4
LT
1012 }
1013
1014 /*
1015 * See if we can append the packet
1016 */
1017 if (c->mfc_un.unres.unresolved.qlen>3) {
1018 kfree_skb(skb);
1019 err = -ENOBUFS;
1020 } else {
c354e124 1021 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1da177e4
LT
1022 err = 0;
1023 }
1024
1025 spin_unlock_bh(&mfc_unres_lock);
1026 return err;
1027}
1028
1029/*
1030 * MFC cache manipulation by user space mroute daemon
1031 */
1032
0c12295a 1033static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1da177e4
LT
1034{
1035 int line;
862465f2 1036 struct mfc_cache *c, *next;
1da177e4 1037
c354e124 1038 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 1039
0c12295a 1040 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1da177e4
LT
1041 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1042 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1043 write_lock_bh(&mrt_lock);
862465f2 1044 list_del(&c->list);
1da177e4
LT
1045 write_unlock_bh(&mrt_lock);
1046
5c0a66f5 1047 ipmr_cache_free(c);
1da177e4
LT
1048 return 0;
1049 }
1050 }
1051 return -ENOENT;
1052}
1053
0c12295a
PM
1054static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1055 struct mfcctl *mfc, int mrtsock)
1da177e4 1056{
862465f2 1057 bool found = false;
1da177e4 1058 int line;
862465f2 1059 struct mfc_cache *uc, *c;
1da177e4 1060
a50436f2
PM
1061 if (mfc->mfcc_parent >= MAXVIFS)
1062 return -ENFILE;
1063
c354e124 1064 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 1065
0c12295a 1066 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1da177e4 1067 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
862465f2
PM
1068 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1069 found = true;
1da177e4 1070 break;
862465f2 1071 }
1da177e4
LT
1072 }
1073
862465f2 1074 if (found) {
1da177e4
LT
1075 write_lock_bh(&mrt_lock);
1076 c->mfc_parent = mfc->mfcc_parent;
0c12295a 1077 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1da177e4
LT
1078 if (!mrtsock)
1079 c->mfc_flags |= MFC_STATIC;
1080 write_unlock_bh(&mrt_lock);
1081 return 0;
1082 }
1083
f97c1e0c 1084 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1da177e4
LT
1085 return -EINVAL;
1086
d658f8a0 1087 c = ipmr_cache_alloc();
c354e124 1088 if (c == NULL)
1da177e4
LT
1089 return -ENOMEM;
1090
c354e124
JK
1091 c->mfc_origin = mfc->mfcc_origin.s_addr;
1092 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1093 c->mfc_parent = mfc->mfcc_parent;
0c12295a 1094 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1da177e4
LT
1095 if (!mrtsock)
1096 c->mfc_flags |= MFC_STATIC;
1097
1098 write_lock_bh(&mrt_lock);
0c12295a 1099 list_add(&c->list, &mrt->mfc_cache_array[line]);
1da177e4
LT
1100 write_unlock_bh(&mrt_lock);
1101
1102 /*
1103 * Check to see if we resolved a queued list. If so we
1104 * need to send on the frames and tidy up.
1105 */
b0ebb739 1106 found = false;
1da177e4 1107 spin_lock_bh(&mfc_unres_lock);
0c12295a 1108 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
e258beb2 1109 if (uc->mfc_origin == c->mfc_origin &&
1da177e4 1110 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
862465f2 1111 list_del(&uc->list);
0c12295a 1112 atomic_dec(&mrt->cache_resolve_queue_len);
b0ebb739 1113 found = true;
1da177e4
LT
1114 break;
1115 }
1116 }
0c12295a
PM
1117 if (list_empty(&mrt->mfc_unres_queue))
1118 del_timer(&mrt->ipmr_expire_timer);
1da177e4
LT
1119 spin_unlock_bh(&mfc_unres_lock);
1120
b0ebb739 1121 if (found) {
0c12295a 1122 ipmr_cache_resolve(net, mrt, uc, c);
5c0a66f5 1123 ipmr_cache_free(uc);
1da177e4
LT
1124 }
1125 return 0;
1126}
1127
1128/*
1129 * Close the multicast socket, and clear the vif tables etc
1130 */
e905a9ed 1131
0c12295a 1132static void mroute_clean_tables(struct mr_table *mrt)
1da177e4
LT
1133{
1134 int i;
d17fa6fa 1135 LIST_HEAD(list);
862465f2 1136 struct mfc_cache *c, *next;
e905a9ed 1137
1da177e4
LT
1138 /*
1139 * Shut down all active vif entries
1140 */
0c12295a
PM
1141 for (i = 0; i < mrt->maxvif; i++) {
1142 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1143 vif_delete(mrt, i, 0, &list);
1da177e4 1144 }
d17fa6fa 1145 unregister_netdevice_many(&list);
1da177e4
LT
1146
1147 /*
1148 * Wipe the cache
1149 */
862465f2 1150 for (i = 0; i < MFC_LINES; i++) {
0c12295a 1151 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
862465f2 1152 if (c->mfc_flags&MFC_STATIC)
1da177e4 1153 continue;
1da177e4 1154 write_lock_bh(&mrt_lock);
862465f2 1155 list_del(&c->list);
1da177e4
LT
1156 write_unlock_bh(&mrt_lock);
1157
5c0a66f5 1158 ipmr_cache_free(c);
1da177e4
LT
1159 }
1160 }
1161
0c12295a 1162 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1da177e4 1163 spin_lock_bh(&mfc_unres_lock);
0c12295a 1164 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
862465f2 1165 list_del(&c->list);
0c12295a 1166 ipmr_destroy_unres(mrt, c);
1da177e4
LT
1167 }
1168 spin_unlock_bh(&mfc_unres_lock);
1169 }
1170}
1171
4c968709
ED
1172/* called from ip_ra_control(), before an RCU grace period,
1173 * we dont need to call synchronize_rcu() here
1174 */
1da177e4
LT
1175static void mrtsock_destruct(struct sock *sk)
1176{
4feb88e5 1177 struct net *net = sock_net(sk);
f0ad0860 1178 struct mr_table *mrt;
4feb88e5 1179
1da177e4 1180 rtnl_lock();
f0ad0860 1181 ipmr_for_each_table(mrt, net) {
4c968709 1182 if (sk == rtnl_dereference(mrt->mroute_sk)) {
f0ad0860 1183 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
4c968709 1184 rcu_assign_pointer(mrt->mroute_sk, NULL);
f0ad0860
PM
1185 mroute_clean_tables(mrt);
1186 }
1da177e4
LT
1187 }
1188 rtnl_unlock();
1189}
1190
1191/*
1192 * Socket options and virtual interface manipulation. The whole
1193 * virtual interface system is a complete heap, but unfortunately
1194 * that's how BSD mrouted happens to think. Maybe one day with a proper
1195 * MOSPF/PIM router set up we can clean this up.
1196 */
e905a9ed 1197
b7058842 1198int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
1199{
1200 int ret;
1201 struct vifctl vif;
1202 struct mfcctl mfc;
4feb88e5 1203 struct net *net = sock_net(sk);
f0ad0860
PM
1204 struct mr_table *mrt;
1205
1206 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1207 if (mrt == NULL)
1208 return -ENOENT;
e905a9ed 1209
132adf54 1210 if (optname != MRT_INIT) {
4c968709
ED
1211 if (sk != rcu_dereference_raw(mrt->mroute_sk) &&
1212 !capable(CAP_NET_ADMIN))
1da177e4
LT
1213 return -EACCES;
1214 }
1215
132adf54
SH
1216 switch (optname) {
1217 case MRT_INIT:
1218 if (sk->sk_type != SOCK_RAW ||
c720c7e8 1219 inet_sk(sk)->inet_num != IPPROTO_IGMP)
132adf54 1220 return -EOPNOTSUPP;
c354e124 1221 if (optlen != sizeof(int))
132adf54 1222 return -ENOPROTOOPT;
1da177e4 1223
132adf54 1224 rtnl_lock();
4c968709 1225 if (rtnl_dereference(mrt->mroute_sk)) {
1da177e4 1226 rtnl_unlock();
132adf54
SH
1227 return -EADDRINUSE;
1228 }
1229
1230 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1231 if (ret == 0) {
4c968709 1232 rcu_assign_pointer(mrt->mroute_sk, sk);
4feb88e5 1233 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
132adf54
SH
1234 }
1235 rtnl_unlock();
1236 return ret;
1237 case MRT_DONE:
4c968709 1238 if (sk != rcu_dereference_raw(mrt->mroute_sk))
132adf54
SH
1239 return -EACCES;
1240 return ip_ra_control(sk, 0, NULL);
1241 case MRT_ADD_VIF:
1242 case MRT_DEL_VIF:
c354e124 1243 if (optlen != sizeof(vif))
132adf54 1244 return -EINVAL;
c354e124 1245 if (copy_from_user(&vif, optval, sizeof(vif)))
132adf54
SH
1246 return -EFAULT;
1247 if (vif.vifc_vifi >= MAXVIFS)
1248 return -ENFILE;
1249 rtnl_lock();
c354e124 1250 if (optname == MRT_ADD_VIF) {
4c968709
ED
1251 ret = vif_add(net, mrt, &vif,
1252 sk == rtnl_dereference(mrt->mroute_sk));
132adf54 1253 } else {
0c12295a 1254 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
132adf54
SH
1255 }
1256 rtnl_unlock();
1257 return ret;
1da177e4
LT
1258
1259 /*
1260 * Manipulate the forwarding caches. These live
1261 * in a sort of kernel/user symbiosis.
1262 */
132adf54
SH
1263 case MRT_ADD_MFC:
1264 case MRT_DEL_MFC:
c354e124 1265 if (optlen != sizeof(mfc))
132adf54 1266 return -EINVAL;
c354e124 1267 if (copy_from_user(&mfc, optval, sizeof(mfc)))
132adf54
SH
1268 return -EFAULT;
1269 rtnl_lock();
c354e124 1270 if (optname == MRT_DEL_MFC)
0c12295a 1271 ret = ipmr_mfc_delete(mrt, &mfc);
132adf54 1272 else
4c968709
ED
1273 ret = ipmr_mfc_add(net, mrt, &mfc,
1274 sk == rtnl_dereference(mrt->mroute_sk));
132adf54
SH
1275 rtnl_unlock();
1276 return ret;
1da177e4
LT
1277 /*
1278 * Control PIM assert.
1279 */
132adf54
SH
1280 case MRT_ASSERT:
1281 {
1282 int v;
1283 if (get_user(v,(int __user *)optval))
1284 return -EFAULT;
0c12295a 1285 mrt->mroute_do_assert = (v) ? 1 : 0;
132adf54
SH
1286 return 0;
1287 }
1da177e4 1288#ifdef CONFIG_IP_PIMSM
132adf54
SH
1289 case MRT_PIM:
1290 {
ba93ef74
SH
1291 int v;
1292
132adf54
SH
1293 if (get_user(v,(int __user *)optval))
1294 return -EFAULT;
ba93ef74
SH
1295 v = (v) ? 1 : 0;
1296
132adf54
SH
1297 rtnl_lock();
1298 ret = 0;
0c12295a
PM
1299 if (v != mrt->mroute_do_pim) {
1300 mrt->mroute_do_pim = v;
1301 mrt->mroute_do_assert = v;
1da177e4 1302 }
132adf54
SH
1303 rtnl_unlock();
1304 return ret;
1305 }
f0ad0860
PM
1306#endif
1307#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1308 case MRT_TABLE:
1309 {
1310 u32 v;
1311
1312 if (optlen != sizeof(u32))
1313 return -EINVAL;
1314 if (get_user(v, (u32 __user *)optval))
1315 return -EFAULT;
f0ad0860
PM
1316
1317 rtnl_lock();
1318 ret = 0;
4c968709
ED
1319 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1320 ret = -EBUSY;
1321 } else {
1322 if (!ipmr_new_table(net, v))
1323 ret = -ENOMEM;
1324 raw_sk(sk)->ipmr_table = v;
1325 }
f0ad0860
PM
1326 rtnl_unlock();
1327 return ret;
1328 }
1da177e4 1329#endif
132adf54
SH
1330 /*
1331 * Spurious command, or MRT_VERSION which you cannot
1332 * set.
1333 */
1334 default:
1335 return -ENOPROTOOPT;
1da177e4
LT
1336 }
1337}
1338
1339/*
1340 * Getsock opt support for the multicast routing system.
1341 */
e905a9ed 1342
c354e124 1343int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1da177e4
LT
1344{
1345 int olr;
1346 int val;
4feb88e5 1347 struct net *net = sock_net(sk);
f0ad0860
PM
1348 struct mr_table *mrt;
1349
1350 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1351 if (mrt == NULL)
1352 return -ENOENT;
1da177e4 1353
c354e124 1354 if (optname != MRT_VERSION &&
1da177e4
LT
1355#ifdef CONFIG_IP_PIMSM
1356 optname!=MRT_PIM &&
1357#endif
1358 optname!=MRT_ASSERT)
1359 return -ENOPROTOOPT;
1360
1361 if (get_user(olr, optlen))
1362 return -EFAULT;
1363
1364 olr = min_t(unsigned int, olr, sizeof(int));
1365 if (olr < 0)
1366 return -EINVAL;
e905a9ed 1367
c354e124 1368 if (put_user(olr, optlen))
1da177e4 1369 return -EFAULT;
c354e124
JK
1370 if (optname == MRT_VERSION)
1371 val = 0x0305;
1da177e4 1372#ifdef CONFIG_IP_PIMSM
c354e124 1373 else if (optname == MRT_PIM)
0c12295a 1374 val = mrt->mroute_do_pim;
1da177e4
LT
1375#endif
1376 else
0c12295a 1377 val = mrt->mroute_do_assert;
c354e124 1378 if (copy_to_user(optval, &val, olr))
1da177e4
LT
1379 return -EFAULT;
1380 return 0;
1381}
1382
1383/*
1384 * The IP multicast ioctl support routines.
1385 */
e905a9ed 1386
1da177e4
LT
1387int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1388{
1389 struct sioc_sg_req sr;
1390 struct sioc_vif_req vr;
1391 struct vif_device *vif;
1392 struct mfc_cache *c;
4feb88e5 1393 struct net *net = sock_net(sk);
f0ad0860
PM
1394 struct mr_table *mrt;
1395
1396 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1397 if (mrt == NULL)
1398 return -ENOENT;
e905a9ed 1399
132adf54
SH
1400 switch (cmd) {
1401 case SIOCGETVIFCNT:
c354e124 1402 if (copy_from_user(&vr, arg, sizeof(vr)))
132adf54 1403 return -EFAULT;
0c12295a 1404 if (vr.vifi >= mrt->maxvif)
132adf54
SH
1405 return -EINVAL;
1406 read_lock(&mrt_lock);
0c12295a
PM
1407 vif = &mrt->vif_table[vr.vifi];
1408 if (VIF_EXISTS(mrt, vr.vifi)) {
c354e124
JK
1409 vr.icount = vif->pkt_in;
1410 vr.ocount = vif->pkt_out;
1411 vr.ibytes = vif->bytes_in;
1412 vr.obytes = vif->bytes_out;
1da177e4 1413 read_unlock(&mrt_lock);
1da177e4 1414
c354e124 1415 if (copy_to_user(arg, &vr, sizeof(vr)))
132adf54
SH
1416 return -EFAULT;
1417 return 0;
1418 }
1419 read_unlock(&mrt_lock);
1420 return -EADDRNOTAVAIL;
1421 case SIOCGETSGCNT:
c354e124 1422 if (copy_from_user(&sr, arg, sizeof(sr)))
132adf54
SH
1423 return -EFAULT;
1424
1425 read_lock(&mrt_lock);
0c12295a 1426 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
132adf54
SH
1427 if (c) {
1428 sr.pktcnt = c->mfc_un.res.pkt;
1429 sr.bytecnt = c->mfc_un.res.bytes;
1430 sr.wrong_if = c->mfc_un.res.wrong_if;
1da177e4 1431 read_unlock(&mrt_lock);
132adf54 1432
c354e124 1433 if (copy_to_user(arg, &sr, sizeof(sr)))
132adf54
SH
1434 return -EFAULT;
1435 return 0;
1436 }
1437 read_unlock(&mrt_lock);
1438 return -EADDRNOTAVAIL;
1439 default:
1440 return -ENOIOCTLCMD;
1da177e4
LT
1441 }
1442}
1443
1444
1445static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1446{
e9dc8653 1447 struct net_device *dev = ptr;
4feb88e5 1448 struct net *net = dev_net(dev);
f0ad0860 1449 struct mr_table *mrt;
1da177e4
LT
1450 struct vif_device *v;
1451 int ct;
d17fa6fa 1452 LIST_HEAD(list);
e9dc8653 1453
1da177e4
LT
1454 if (event != NETDEV_UNREGISTER)
1455 return NOTIFY_DONE;
f0ad0860
PM
1456
1457 ipmr_for_each_table(mrt, net) {
1458 v = &mrt->vif_table[0];
1459 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1460 if (v->dev == dev)
1461 vif_delete(mrt, ct, 1, &list);
1462 }
1da177e4 1463 }
d17fa6fa 1464 unregister_netdevice_many(&list);
1da177e4
LT
1465 return NOTIFY_DONE;
1466}
1467
1468
c354e124 1469static struct notifier_block ip_mr_notifier = {
1da177e4
LT
1470 .notifier_call = ipmr_device_event,
1471};
1472
1473/*
1474 * Encapsulate a packet by attaching a valid IPIP header to it.
1475 * This avoids tunnel drivers and other mess and gives us the speed so
1476 * important for multicast video.
1477 */
e905a9ed 1478
114c7844 1479static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 1480{
8856dfa3 1481 struct iphdr *iph;
eddc9ec5 1482 struct iphdr *old_iph = ip_hdr(skb);
8856dfa3
ACM
1483
1484 skb_push(skb, sizeof(struct iphdr));
b0e380b1 1485 skb->transport_header = skb->network_header;
8856dfa3 1486 skb_reset_network_header(skb);
eddc9ec5 1487 iph = ip_hdr(skb);
1da177e4
LT
1488
1489 iph->version = 4;
e023dd64
ACM
1490 iph->tos = old_iph->tos;
1491 iph->ttl = old_iph->ttl;
1da177e4
LT
1492 iph->frag_off = 0;
1493 iph->daddr = daddr;
1494 iph->saddr = saddr;
1495 iph->protocol = IPPROTO_IPIP;
1496 iph->ihl = 5;
1497 iph->tot_len = htons(skb->len);
adf30907 1498 ip_select_ident(iph, skb_dst(skb), NULL);
1da177e4
LT
1499 ip_send_check(iph);
1500
1da177e4
LT
1501 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1502 nf_reset(skb);
1503}
1504
1505static inline int ipmr_forward_finish(struct sk_buff *skb)
1506{
1507 struct ip_options * opt = &(IPCB(skb)->opt);
1508
adf30907 1509 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1da177e4
LT
1510
1511 if (unlikely(opt->optlen))
1512 ip_forward_options(skb);
1513
1514 return dst_output(skb);
1515}
1516
1517/*
1518 * Processing handlers for ipmr_forward
1519 */
1520
0c12295a
PM
1521static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1522 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1da177e4 1523{
eddc9ec5 1524 const struct iphdr *iph = ip_hdr(skb);
0c12295a 1525 struct vif_device *vif = &mrt->vif_table[vifi];
1da177e4
LT
1526 struct net_device *dev;
1527 struct rtable *rt;
1528 int encap = 0;
1529
1530 if (vif->dev == NULL)
1531 goto out_free;
1532
1533#ifdef CONFIG_IP_PIMSM
1534 if (vif->flags & VIFF_REGISTER) {
1535 vif->pkt_out++;
c354e124 1536 vif->bytes_out += skb->len;
cf3677ae
PE
1537 vif->dev->stats.tx_bytes += skb->len;
1538 vif->dev->stats.tx_packets++;
0c12295a 1539 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
69ebbf58 1540 goto out_free;
1da177e4
LT
1541 }
1542#endif
1543
1544 if (vif->flags&VIFF_TUNNEL) {
1545 struct flowi fl = { .oif = vif->link,
1546 .nl_u = { .ip4_u =
1547 { .daddr = vif->remote,
1548 .saddr = vif->local,
1549 .tos = RT_TOS(iph->tos) } },
1550 .proto = IPPROTO_IPIP };
4feb88e5 1551 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1552 goto out_free;
1553 encap = sizeof(struct iphdr);
1554 } else {
1555 struct flowi fl = { .oif = vif->link,
1556 .nl_u = { .ip4_u =
1557 { .daddr = iph->daddr,
1558 .tos = RT_TOS(iph->tos) } },
1559 .proto = IPPROTO_IPIP };
4feb88e5 1560 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1561 goto out_free;
1562 }
1563
d8d1f30b 1564 dev = rt->dst.dev;
1da177e4 1565
d8d1f30b 1566 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
1da177e4
LT
1567 /* Do not fragment multicasts. Alas, IPv4 does not
1568 allow to send ICMP, so that packets will disappear
1569 to blackhole.
1570 */
1571
7c73a6fa 1572 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
1573 ip_rt_put(rt);
1574 goto out_free;
1575 }
1576
d8d1f30b 1577 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
1da177e4
LT
1578
1579 if (skb_cow(skb, encap)) {
e905a9ed 1580 ip_rt_put(rt);
1da177e4
LT
1581 goto out_free;
1582 }
1583
1584 vif->pkt_out++;
c354e124 1585 vif->bytes_out += skb->len;
1da177e4 1586
adf30907 1587 skb_dst_drop(skb);
d8d1f30b 1588 skb_dst_set(skb, &rt->dst);
eddc9ec5 1589 ip_decrease_ttl(ip_hdr(skb));
1da177e4
LT
1590
1591 /* FIXME: forward and output firewalls used to be called here.
1592 * What do we do with netfilter? -- RR */
1593 if (vif->flags & VIFF_TUNNEL) {
1594 ip_encap(skb, vif->local, vif->remote);
1595 /* FIXME: extra output firewall step used to be here. --RR */
2f4c02d4
PE
1596 vif->dev->stats.tx_packets++;
1597 vif->dev->stats.tx_bytes += skb->len;
1da177e4
LT
1598 }
1599
1600 IPCB(skb)->flags |= IPSKB_FORWARDED;
1601
1602 /*
1603 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1604 * not only before forwarding, but after forwarding on all output
1605 * interfaces. It is clear, if mrouter runs a multicasting
1606 * program, it should receive packets not depending to what interface
1607 * program is joined.
1608 * If we will not make it, the program will have to join on all
1609 * interfaces. On the other hand, multihoming host (or router, but
1610 * not mrouter) cannot join to more than one interface - it will
1611 * result in receiving multiple packets.
1612 */
9bbc768a 1613 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
1da177e4
LT
1614 ipmr_forward_finish);
1615 return;
1616
1617out_free:
1618 kfree_skb(skb);
1da177e4
LT
1619}
1620
0c12295a 1621static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1da177e4
LT
1622{
1623 int ct;
0c12295a
PM
1624
1625 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1626 if (mrt->vif_table[ct].dev == dev)
1da177e4
LT
1627 break;
1628 }
1629 return ct;
1630}
1631
1632/* "local" means that we should preserve one skb (for local delivery) */
1633
0c12295a
PM
1634static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1635 struct sk_buff *skb, struct mfc_cache *cache,
1636 int local)
1da177e4
LT
1637{
1638 int psend = -1;
1639 int vif, ct;
1640
1641 vif = cache->mfc_parent;
1642 cache->mfc_un.res.pkt++;
1643 cache->mfc_un.res.bytes += skb->len;
1644
1645 /*
1646 * Wrong interface: drop packet and (maybe) send PIM assert.
1647 */
0c12295a 1648 if (mrt->vif_table[vif].dev != skb->dev) {
1da177e4
LT
1649 int true_vifi;
1650
511c3f92 1651 if (skb_rtable(skb)->fl.iif == 0) {
1da177e4
LT
1652 /* It is our own packet, looped back.
1653 Very complicated situation...
1654
1655 The best workaround until routing daemons will be
1656 fixed is not to redistribute packet, if it was
1657 send through wrong interface. It means, that
1658 multicast applications WILL NOT work for
1659 (S,G), which have default multicast route pointing
1660 to wrong oif. In any case, it is not a good
1661 idea to use multicasting applications on router.
1662 */
1663 goto dont_forward;
1664 }
1665
1666 cache->mfc_un.res.wrong_if++;
0c12295a 1667 true_vifi = ipmr_find_vif(mrt, skb->dev);
1da177e4 1668
0c12295a 1669 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1da177e4
LT
1670 /* pimsm uses asserts, when switching from RPT to SPT,
1671 so that we cannot check that packet arrived on an oif.
1672 It is bad, but otherwise we would need to move pretty
1673 large chunk of pimd to kernel. Ough... --ANK
1674 */
0c12295a 1675 (mrt->mroute_do_pim ||
6f9374a9 1676 cache->mfc_un.res.ttls[true_vifi] < 255) &&
e905a9ed 1677 time_after(jiffies,
1da177e4
LT
1678 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1679 cache->mfc_un.res.last_assert = jiffies;
0c12295a 1680 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1da177e4
LT
1681 }
1682 goto dont_forward;
1683 }
1684
0c12295a
PM
1685 mrt->vif_table[vif].pkt_in++;
1686 mrt->vif_table[vif].bytes_in += skb->len;
1da177e4
LT
1687
1688 /*
1689 * Forward the frame
1690 */
1691 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
eddc9ec5 1692 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1da177e4
LT
1693 if (psend != -1) {
1694 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1695 if (skb2)
0c12295a
PM
1696 ipmr_queue_xmit(net, mrt, skb2, cache,
1697 psend);
1da177e4 1698 }
c354e124 1699 psend = ct;
1da177e4
LT
1700 }
1701 }
1702 if (psend != -1) {
1703 if (local) {
1704 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1705 if (skb2)
0c12295a 1706 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1da177e4 1707 } else {
0c12295a 1708 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1da177e4
LT
1709 return 0;
1710 }
1711 }
1712
1713dont_forward:
1714 if (!local)
1715 kfree_skb(skb);
1716 return 0;
1717}
1718
1719
1720/*
1721 * Multicast packets for forwarding arrive here
4c968709 1722 * Called with rcu_read_lock();
1da177e4
LT
1723 */
1724
1725int ip_mr_input(struct sk_buff *skb)
1726{
1727 struct mfc_cache *cache;
4feb88e5 1728 struct net *net = dev_net(skb->dev);
511c3f92 1729 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
f0ad0860
PM
1730 struct mr_table *mrt;
1731 int err;
1da177e4
LT
1732
1733 /* Packet is looped back after forward, it should not be
1734 forwarded second time, but still can be delivered locally.
1735 */
4c968709 1736 if (IPCB(skb)->flags & IPSKB_FORWARDED)
1da177e4
LT
1737 goto dont_forward;
1738
f0ad0860 1739 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
e40dbc51
BG
1740 if (err < 0) {
1741 kfree_skb(skb);
f0ad0860 1742 return err;
e40dbc51 1743 }
f0ad0860 1744
1da177e4 1745 if (!local) {
4c968709
ED
1746 if (IPCB(skb)->opt.router_alert) {
1747 if (ip_call_ra_chain(skb))
1748 return 0;
1749 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) {
1750 /* IGMPv1 (and broken IGMPv2 implementations sort of
1751 * Cisco IOS <= 11.2(8)) do not put router alert
1752 * option to IGMP packets destined to routable
1753 * groups. It is very bad, because it means
1754 * that we can forward NO IGMP messages.
1755 */
1756 struct sock *mroute_sk;
1757
1758 mroute_sk = rcu_dereference(mrt->mroute_sk);
1759 if (mroute_sk) {
1760 nf_reset(skb);
1761 raw_rcv(mroute_sk, skb);
1762 return 0;
1763 }
1da177e4
LT
1764 }
1765 }
1766
1767 read_lock(&mrt_lock);
0c12295a 1768 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1da177e4
LT
1769
1770 /*
1771 * No usable cache entry
1772 */
c354e124 1773 if (cache == NULL) {
1da177e4
LT
1774 int vif;
1775
1776 if (local) {
1777 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1778 ip_local_deliver(skb);
1779 if (skb2 == NULL) {
1780 read_unlock(&mrt_lock);
1781 return -ENOBUFS;
1782 }
1783 skb = skb2;
1784 }
1785
0c12295a 1786 vif = ipmr_find_vif(mrt, skb->dev);
1da177e4 1787 if (vif >= 0) {
0eae88f3 1788 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
1da177e4
LT
1789 read_unlock(&mrt_lock);
1790
0eae88f3 1791 return err2;
1da177e4
LT
1792 }
1793 read_unlock(&mrt_lock);
1794 kfree_skb(skb);
1795 return -ENODEV;
1796 }
1797
0c12295a 1798 ip_mr_forward(net, mrt, skb, cache, local);
1da177e4
LT
1799
1800 read_unlock(&mrt_lock);
1801
1802 if (local)
1803 return ip_local_deliver(skb);
1804
1805 return 0;
1806
1807dont_forward:
1808 if (local)
1809 return ip_local_deliver(skb);
1810 kfree_skb(skb);
1811 return 0;
1812}
1813
b1879204 1814#ifdef CONFIG_IP_PIMSM
55747a0a 1815/* called with rcu_read_lock() */
f0ad0860
PM
1816static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1817 unsigned int pimlen)
1da177e4 1818{
b1879204
IJ
1819 struct net_device *reg_dev = NULL;
1820 struct iphdr *encap;
1da177e4 1821
b1879204 1822 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1da177e4
LT
1823 /*
1824 Check that:
1825 a. packet is really destinted to a multicast group
1826 b. packet is not a NULL-REGISTER
1827 c. packet is not truncated
1828 */
f97c1e0c 1829 if (!ipv4_is_multicast(encap->daddr) ||
1da177e4 1830 encap->tot_len == 0 ||
b1879204
IJ
1831 ntohs(encap->tot_len) + pimlen > skb->len)
1832 return 1;
1da177e4
LT
1833
1834 read_lock(&mrt_lock);
0c12295a
PM
1835 if (mrt->mroute_reg_vif_num >= 0)
1836 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1da177e4
LT
1837 read_unlock(&mrt_lock);
1838
e905a9ed 1839 if (reg_dev == NULL)
b1879204 1840 return 1;
1da177e4 1841
b0e380b1 1842 skb->mac_header = skb->network_header;
55747a0a 1843 skb_pull(skb, (u8 *)encap - skb->data);
31c7711b 1844 skb_reset_network_header(skb);
1da177e4 1845 skb->protocol = htons(ETH_P_IP);
55747a0a 1846 skb->ip_summed = CHECKSUM_NONE;
1da177e4 1847 skb->pkt_type = PACKET_HOST;
d19d56dd
ED
1848
1849 skb_tunnel_rx(skb, reg_dev);
1850
1da177e4 1851 netif_rx(skb);
b1879204 1852
55747a0a 1853 return NET_RX_SUCCESS;
b1879204
IJ
1854}
1855#endif
1856
1857#ifdef CONFIG_IP_PIMSM_V1
1858/*
1859 * Handle IGMP messages of PIMv1
1860 */
1861
1862int pim_rcv_v1(struct sk_buff * skb)
1863{
1864 struct igmphdr *pim;
4feb88e5 1865 struct net *net = dev_net(skb->dev);
f0ad0860 1866 struct mr_table *mrt;
b1879204
IJ
1867
1868 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1869 goto drop;
1870
1871 pim = igmp_hdr(skb);
1872
f0ad0860
PM
1873 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1874 goto drop;
1875
0c12295a 1876 if (!mrt->mroute_do_pim ||
b1879204
IJ
1877 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1878 goto drop;
1879
f0ad0860 1880 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
b1879204
IJ
1881drop:
1882 kfree_skb(skb);
1883 }
1da177e4
LT
1884 return 0;
1885}
1886#endif
1887
1888#ifdef CONFIG_IP_PIMSM_V2
1889static int pim_rcv(struct sk_buff * skb)
1890{
1891 struct pimreghdr *pim;
f0ad0860
PM
1892 struct net *net = dev_net(skb->dev);
1893 struct mr_table *mrt;
1da177e4 1894
b1879204 1895 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1da177e4
LT
1896 goto drop;
1897
9c70220b 1898 pim = (struct pimreghdr *)skb_transport_header(skb);
e905a9ed 1899 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1da177e4 1900 (pim->flags&PIM_NULL_REGISTER) ||
e905a9ed 1901 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
d3bc23e7 1902 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1da177e4
LT
1903 goto drop;
1904
f0ad0860
PM
1905 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1906 goto drop;
1907
1908 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
b1879204
IJ
1909drop:
1910 kfree_skb(skb);
1911 }
1da177e4
LT
1912 return 0;
1913}
1914#endif
1915
cb6a4e46
PM
1916static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1917 struct mfc_cache *c, struct rtmsg *rtm)
1da177e4
LT
1918{
1919 int ct;
1920 struct rtnexthop *nhp;
27a884dc 1921 u8 *b = skb_tail_pointer(skb);
1da177e4
LT
1922 struct rtattr *mp_head;
1923
7438189b 1924 /* If cache is unresolved, don't try to parse IIF and OIF */
ed0f160a 1925 if (c->mfc_parent >= MAXVIFS)
7438189b
ND
1926 return -ENOENT;
1927
0c12295a
PM
1928 if (VIF_EXISTS(mrt, c->mfc_parent))
1929 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1da177e4 1930
c354e124 1931 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1da177e4
LT
1932
1933 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
0c12295a 1934 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1da177e4
LT
1935 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1936 goto rtattr_failure;
c354e124 1937 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1da177e4
LT
1938 nhp->rtnh_flags = 0;
1939 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
0c12295a 1940 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1da177e4
LT
1941 nhp->rtnh_len = sizeof(*nhp);
1942 }
1943 }
1944 mp_head->rta_type = RTA_MULTIPATH;
27a884dc 1945 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1da177e4
LT
1946 rtm->rtm_type = RTN_MULTICAST;
1947 return 1;
1948
1949rtattr_failure:
dc5fc579 1950 nlmsg_trim(skb, b);
1da177e4
LT
1951 return -EMSGSIZE;
1952}
1953
4feb88e5
BT
1954int ipmr_get_route(struct net *net,
1955 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1da177e4
LT
1956{
1957 int err;
f0ad0860 1958 struct mr_table *mrt;
1da177e4 1959 struct mfc_cache *cache;
511c3f92 1960 struct rtable *rt = skb_rtable(skb);
1da177e4 1961
f0ad0860
PM
1962 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1963 if (mrt == NULL)
1964 return -ENOENT;
1965
1da177e4 1966 read_lock(&mrt_lock);
0c12295a 1967 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1da177e4 1968
c354e124 1969 if (cache == NULL) {
72287490 1970 struct sk_buff *skb2;
eddc9ec5 1971 struct iphdr *iph;
1da177e4
LT
1972 struct net_device *dev;
1973 int vif;
1974
1975 if (nowait) {
1976 read_unlock(&mrt_lock);
1977 return -EAGAIN;
1978 }
1979
1980 dev = skb->dev;
0c12295a 1981 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1da177e4
LT
1982 read_unlock(&mrt_lock);
1983 return -ENODEV;
1984 }
72287490
AK
1985 skb2 = skb_clone(skb, GFP_ATOMIC);
1986 if (!skb2) {
1987 read_unlock(&mrt_lock);
1988 return -ENOMEM;
1989 }
1990
e2d1bca7
ACM
1991 skb_push(skb2, sizeof(struct iphdr));
1992 skb_reset_network_header(skb2);
eddc9ec5
ACM
1993 iph = ip_hdr(skb2);
1994 iph->ihl = sizeof(struct iphdr) >> 2;
1995 iph->saddr = rt->rt_src;
1996 iph->daddr = rt->rt_dst;
1997 iph->version = 0;
0c12295a 1998 err = ipmr_cache_unresolved(mrt, vif, skb2);
1da177e4
LT
1999 read_unlock(&mrt_lock);
2000 return err;
2001 }
2002
2003 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2004 cache->mfc_flags |= MFC_NOTIFY;
cb6a4e46 2005 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
1da177e4
LT
2006 read_unlock(&mrt_lock);
2007 return err;
2008}
2009
cb6a4e46
PM
2010static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2011 u32 pid, u32 seq, struct mfc_cache *c)
2012{
2013 struct nlmsghdr *nlh;
2014 struct rtmsg *rtm;
2015
2016 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2017 if (nlh == NULL)
2018 return -EMSGSIZE;
2019
2020 rtm = nlmsg_data(nlh);
2021 rtm->rtm_family = RTNL_FAMILY_IPMR;
2022 rtm->rtm_dst_len = 32;
2023 rtm->rtm_src_len = 32;
2024 rtm->rtm_tos = 0;
2025 rtm->rtm_table = mrt->id;
2026 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2027 rtm->rtm_type = RTN_MULTICAST;
2028 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2029 rtm->rtm_protocol = RTPROT_UNSPEC;
2030 rtm->rtm_flags = 0;
2031
2032 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2033 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2034
2035 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2036 goto nla_put_failure;
2037
2038 return nlmsg_end(skb, nlh);
2039
2040nla_put_failure:
2041 nlmsg_cancel(skb, nlh);
2042 return -EMSGSIZE;
2043}
2044
2045static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2046{
2047 struct net *net = sock_net(skb->sk);
2048 struct mr_table *mrt;
2049 struct mfc_cache *mfc;
2050 unsigned int t = 0, s_t;
2051 unsigned int h = 0, s_h;
2052 unsigned int e = 0, s_e;
2053
2054 s_t = cb->args[0];
2055 s_h = cb->args[1];
2056 s_e = cb->args[2];
2057
2058 read_lock(&mrt_lock);
2059 ipmr_for_each_table(mrt, net) {
2060 if (t < s_t)
2061 goto next_table;
2062 if (t > s_t)
2063 s_h = 0;
2064 for (h = s_h; h < MFC_LINES; h++) {
2065 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
2066 if (e < s_e)
2067 goto next_entry;
2068 if (ipmr_fill_mroute(mrt, skb,
2069 NETLINK_CB(cb->skb).pid,
2070 cb->nlh->nlmsg_seq,
2071 mfc) < 0)
2072 goto done;
2073next_entry:
2074 e++;
2075 }
2076 e = s_e = 0;
2077 }
2078 s_h = 0;
2079next_table:
2080 t++;
2081 }
2082done:
2083 read_unlock(&mrt_lock);
2084
2085 cb->args[2] = e;
2086 cb->args[1] = h;
2087 cb->args[0] = t;
2088
2089 return skb->len;
2090}
2091
e905a9ed 2092#ifdef CONFIG_PROC_FS
1da177e4
LT
2093/*
2094 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2095 */
2096struct ipmr_vif_iter {
f6bb4514 2097 struct seq_net_private p;
f0ad0860 2098 struct mr_table *mrt;
1da177e4
LT
2099 int ct;
2100};
2101
f6bb4514
BT
2102static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2103 struct ipmr_vif_iter *iter,
1da177e4
LT
2104 loff_t pos)
2105{
f0ad0860 2106 struct mr_table *mrt = iter->mrt;
0c12295a
PM
2107
2108 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2109 if (!VIF_EXISTS(mrt, iter->ct))
1da177e4 2110 continue;
e905a9ed 2111 if (pos-- == 0)
0c12295a 2112 return &mrt->vif_table[iter->ct];
1da177e4
LT
2113 }
2114 return NULL;
2115}
2116
2117static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
ba93ef74 2118 __acquires(mrt_lock)
1da177e4 2119{
f0ad0860 2120 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 2121 struct net *net = seq_file_net(seq);
f0ad0860
PM
2122 struct mr_table *mrt;
2123
2124 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2125 if (mrt == NULL)
2126 return ERR_PTR(-ENOENT);
2127
2128 iter->mrt = mrt;
f6bb4514 2129
1da177e4 2130 read_lock(&mrt_lock);
f6bb4514 2131 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
2132 : SEQ_START_TOKEN;
2133}
2134
2135static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2136{
2137 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 2138 struct net *net = seq_file_net(seq);
f0ad0860 2139 struct mr_table *mrt = iter->mrt;
1da177e4
LT
2140
2141 ++*pos;
2142 if (v == SEQ_START_TOKEN)
f6bb4514 2143 return ipmr_vif_seq_idx(net, iter, 0);
e905a9ed 2144
0c12295a
PM
2145 while (++iter->ct < mrt->maxvif) {
2146 if (!VIF_EXISTS(mrt, iter->ct))
1da177e4 2147 continue;
0c12295a 2148 return &mrt->vif_table[iter->ct];
1da177e4
LT
2149 }
2150 return NULL;
2151}
2152
2153static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
ba93ef74 2154 __releases(mrt_lock)
1da177e4
LT
2155{
2156 read_unlock(&mrt_lock);
2157}
2158
2159static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2160{
f0ad0860
PM
2161 struct ipmr_vif_iter *iter = seq->private;
2162 struct mr_table *mrt = iter->mrt;
f6bb4514 2163
1da177e4 2164 if (v == SEQ_START_TOKEN) {
e905a9ed 2165 seq_puts(seq,
1da177e4
LT
2166 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2167 } else {
2168 const struct vif_device *vif = v;
2169 const char *name = vif->dev ? vif->dev->name : "none";
2170
2171 seq_printf(seq,
2172 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
0c12295a 2173 vif - mrt->vif_table,
e905a9ed 2174 name, vif->bytes_in, vif->pkt_in,
1da177e4
LT
2175 vif->bytes_out, vif->pkt_out,
2176 vif->flags, vif->local, vif->remote);
2177 }
2178 return 0;
2179}
2180
f690808e 2181static const struct seq_operations ipmr_vif_seq_ops = {
1da177e4
LT
2182 .start = ipmr_vif_seq_start,
2183 .next = ipmr_vif_seq_next,
2184 .stop = ipmr_vif_seq_stop,
2185 .show = ipmr_vif_seq_show,
2186};
2187
2188static int ipmr_vif_open(struct inode *inode, struct file *file)
2189{
f6bb4514
BT
2190 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2191 sizeof(struct ipmr_vif_iter));
1da177e4
LT
2192}
2193
9a32144e 2194static const struct file_operations ipmr_vif_fops = {
1da177e4
LT
2195 .owner = THIS_MODULE,
2196 .open = ipmr_vif_open,
2197 .read = seq_read,
2198 .llseek = seq_lseek,
f6bb4514 2199 .release = seq_release_net,
1da177e4
LT
2200};
2201
2202struct ipmr_mfc_iter {
f6bb4514 2203 struct seq_net_private p;
f0ad0860 2204 struct mr_table *mrt;
862465f2 2205 struct list_head *cache;
1da177e4
LT
2206 int ct;
2207};
2208
2209
f6bb4514
BT
2210static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2211 struct ipmr_mfc_iter *it, loff_t pos)
1da177e4 2212{
f0ad0860 2213 struct mr_table *mrt = it->mrt;
1da177e4
LT
2214 struct mfc_cache *mfc;
2215
1da177e4 2216 read_lock(&mrt_lock);
862465f2 2217 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
0c12295a 2218 it->cache = &mrt->mfc_cache_array[it->ct];
862465f2 2219 list_for_each_entry(mfc, it->cache, list)
e905a9ed 2220 if (pos-- == 0)
1da177e4 2221 return mfc;
862465f2 2222 }
1da177e4
LT
2223 read_unlock(&mrt_lock);
2224
1da177e4 2225 spin_lock_bh(&mfc_unres_lock);
0c12295a 2226 it->cache = &mrt->mfc_unres_queue;
862465f2 2227 list_for_each_entry(mfc, it->cache, list)
e258beb2 2228 if (pos-- == 0)
1da177e4
LT
2229 return mfc;
2230 spin_unlock_bh(&mfc_unres_lock);
2231
2232 it->cache = NULL;
2233 return NULL;
2234}
2235
2236
2237static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2238{
2239 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 2240 struct net *net = seq_file_net(seq);
f0ad0860 2241 struct mr_table *mrt;
f6bb4514 2242
f0ad0860
PM
2243 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2244 if (mrt == NULL)
2245 return ERR_PTR(-ENOENT);
f6bb4514 2246
f0ad0860 2247 it->mrt = mrt;
1da177e4
LT
2248 it->cache = NULL;
2249 it->ct = 0;
f6bb4514 2250 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
2251 : SEQ_START_TOKEN;
2252}
2253
2254static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2255{
2256 struct mfc_cache *mfc = v;
2257 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 2258 struct net *net = seq_file_net(seq);
f0ad0860 2259 struct mr_table *mrt = it->mrt;
1da177e4
LT
2260
2261 ++*pos;
2262
2263 if (v == SEQ_START_TOKEN)
f6bb4514 2264 return ipmr_mfc_seq_idx(net, seq->private, 0);
1da177e4 2265
862465f2
PM
2266 if (mfc->list.next != it->cache)
2267 return list_entry(mfc->list.next, struct mfc_cache, list);
e905a9ed 2268
0c12295a 2269 if (it->cache == &mrt->mfc_unres_queue)
1da177e4
LT
2270 goto end_of_list;
2271
0c12295a 2272 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
1da177e4
LT
2273
2274 while (++it->ct < MFC_LINES) {
0c12295a 2275 it->cache = &mrt->mfc_cache_array[it->ct];
862465f2
PM
2276 if (list_empty(it->cache))
2277 continue;
2278 return list_first_entry(it->cache, struct mfc_cache, list);
1da177e4
LT
2279 }
2280
2281 /* exhausted cache_array, show unresolved */
2282 read_unlock(&mrt_lock);
0c12295a 2283 it->cache = &mrt->mfc_unres_queue;
1da177e4 2284 it->ct = 0;
e905a9ed 2285
1da177e4 2286 spin_lock_bh(&mfc_unres_lock);
862465f2
PM
2287 if (!list_empty(it->cache))
2288 return list_first_entry(it->cache, struct mfc_cache, list);
1da177e4
LT
2289
2290 end_of_list:
2291 spin_unlock_bh(&mfc_unres_lock);
2292 it->cache = NULL;
2293
2294 return NULL;
2295}
2296
2297static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2298{
2299 struct ipmr_mfc_iter *it = seq->private;
f0ad0860 2300 struct mr_table *mrt = it->mrt;
1da177e4 2301
0c12295a 2302 if (it->cache == &mrt->mfc_unres_queue)
1da177e4 2303 spin_unlock_bh(&mfc_unres_lock);
0c12295a 2304 else if (it->cache == &mrt->mfc_cache_array[it->ct])
1da177e4
LT
2305 read_unlock(&mrt_lock);
2306}
2307
2308static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2309{
2310 int n;
2311
2312 if (v == SEQ_START_TOKEN) {
e905a9ed 2313 seq_puts(seq,
1da177e4
LT
2314 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2315 } else {
2316 const struct mfc_cache *mfc = v;
2317 const struct ipmr_mfc_iter *it = seq->private;
f0ad0860 2318 const struct mr_table *mrt = it->mrt;
e905a9ed 2319
0eae88f3
ED
2320 seq_printf(seq, "%08X %08X %-3hd",
2321 (__force u32) mfc->mfc_mcastgrp,
2322 (__force u32) mfc->mfc_origin,
1ea472e2 2323 mfc->mfc_parent);
1da177e4 2324
0c12295a 2325 if (it->cache != &mrt->mfc_unres_queue) {
1ea472e2
BT
2326 seq_printf(seq, " %8lu %8lu %8lu",
2327 mfc->mfc_un.res.pkt,
2328 mfc->mfc_un.res.bytes,
2329 mfc->mfc_un.res.wrong_if);
132adf54
SH
2330 for (n = mfc->mfc_un.res.minvif;
2331 n < mfc->mfc_un.res.maxvif; n++ ) {
0c12295a 2332 if (VIF_EXISTS(mrt, n) &&
cf958ae3
BT
2333 mfc->mfc_un.res.ttls[n] < 255)
2334 seq_printf(seq,
e905a9ed 2335 " %2d:%-3d",
1da177e4
LT
2336 n, mfc->mfc_un.res.ttls[n]);
2337 }
1ea472e2
BT
2338 } else {
2339 /* unresolved mfc_caches don't contain
2340 * pkt, bytes and wrong_if values
2341 */
2342 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1da177e4
LT
2343 }
2344 seq_putc(seq, '\n');
2345 }
2346 return 0;
2347}
2348
f690808e 2349static const struct seq_operations ipmr_mfc_seq_ops = {
1da177e4
LT
2350 .start = ipmr_mfc_seq_start,
2351 .next = ipmr_mfc_seq_next,
2352 .stop = ipmr_mfc_seq_stop,
2353 .show = ipmr_mfc_seq_show,
2354};
2355
2356static int ipmr_mfc_open(struct inode *inode, struct file *file)
2357{
f6bb4514
BT
2358 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2359 sizeof(struct ipmr_mfc_iter));
1da177e4
LT
2360}
2361
9a32144e 2362static const struct file_operations ipmr_mfc_fops = {
1da177e4
LT
2363 .owner = THIS_MODULE,
2364 .open = ipmr_mfc_open,
2365 .read = seq_read,
2366 .llseek = seq_lseek,
f6bb4514 2367 .release = seq_release_net,
1da177e4 2368};
e905a9ed 2369#endif
1da177e4
LT
2370
2371#ifdef CONFIG_IP_PIMSM_V2
32613090 2372static const struct net_protocol pim_protocol = {
1da177e4 2373 .handler = pim_rcv,
403dbb97 2374 .netns_ok = 1,
1da177e4
LT
2375};
2376#endif
2377
2378
2379/*
2380 * Setup for IP multicast routing
2381 */
cf958ae3
BT
2382static int __net_init ipmr_net_init(struct net *net)
2383{
f0ad0860 2384 int err;
cf958ae3 2385
f0ad0860
PM
2386 err = ipmr_rules_init(net);
2387 if (err < 0)
cf958ae3 2388 goto fail;
f6bb4514
BT
2389
2390#ifdef CONFIG_PROC_FS
2391 err = -ENOMEM;
2392 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2393 goto proc_vif_fail;
2394 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2395 goto proc_cache_fail;
2396#endif
2bb8b26c
BT
2397 return 0;
2398
f6bb4514
BT
2399#ifdef CONFIG_PROC_FS
2400proc_cache_fail:
2401 proc_net_remove(net, "ip_mr_vif");
2402proc_vif_fail:
f0ad0860 2403 ipmr_rules_exit(net);
f6bb4514 2404#endif
cf958ae3
BT
2405fail:
2406 return err;
2407}
2408
2409static void __net_exit ipmr_net_exit(struct net *net)
2410{
f6bb4514
BT
2411#ifdef CONFIG_PROC_FS
2412 proc_net_remove(net, "ip_mr_cache");
2413 proc_net_remove(net, "ip_mr_vif");
2414#endif
f0ad0860 2415 ipmr_rules_exit(net);
cf958ae3
BT
2416}
2417
2418static struct pernet_operations ipmr_net_ops = {
2419 .init = ipmr_net_init,
2420 .exit = ipmr_net_exit,
2421};
e905a9ed 2422
03d2f897 2423int __init ip_mr_init(void)
1da177e4 2424{
03d2f897
WC
2425 int err;
2426
1da177e4
LT
2427 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2428 sizeof(struct mfc_cache),
e5d679f3 2429 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
20c2df83 2430 NULL);
03d2f897
WC
2431 if (!mrt_cachep)
2432 return -ENOMEM;
2433
cf958ae3
BT
2434 err = register_pernet_subsys(&ipmr_net_ops);
2435 if (err)
2436 goto reg_pernet_fail;
2437
03d2f897
WC
2438 err = register_netdevice_notifier(&ip_mr_notifier);
2439 if (err)
2440 goto reg_notif_fail;
403dbb97
TG
2441#ifdef CONFIG_IP_PIMSM_V2
2442 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2443 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2444 err = -EAGAIN;
2445 goto add_proto_fail;
2446 }
2447#endif
cb6a4e46 2448 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
03d2f897 2449 return 0;
f6bb4514 2450
403dbb97
TG
2451#ifdef CONFIG_IP_PIMSM_V2
2452add_proto_fail:
2453 unregister_netdevice_notifier(&ip_mr_notifier);
2454#endif
c3e38896 2455reg_notif_fail:
cf958ae3
BT
2456 unregister_pernet_subsys(&ipmr_net_ops);
2457reg_pernet_fail:
c3e38896 2458 kmem_cache_destroy(mrt_cachep);
03d2f897 2459 return err;
1da177e4 2460}