]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/ipv4/ipmr.c
IPv4: unresolved multicast route cleanup
[mirror_ubuntu-zesty-kernel.git] / net / ipv4 / ipmr.c
CommitLineData
1da177e4
LT
1/*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
113aa838 4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
1da177e4
LT
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
26 *
27 */
28
1da177e4
LT
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <linux/types.h>
4fc268d2 32#include <linux/capability.h>
1da177e4
LT
33#include <linux/errno.h>
34#include <linux/timer.h>
35#include <linux/mm.h>
36#include <linux/kernel.h>
37#include <linux/fcntl.h>
38#include <linux/stat.h>
39#include <linux/socket.h>
40#include <linux/in.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/inetdevice.h>
44#include <linux/igmp.h>
45#include <linux/proc_fs.h>
46#include <linux/seq_file.h>
47#include <linux/mroute.h>
48#include <linux/init.h>
46f25dff 49#include <linux/if_ether.h>
5a0e3ad6 50#include <linux/slab.h>
457c4cbc 51#include <net/net_namespace.h>
1da177e4
LT
52#include <net/ip.h>
53#include <net/protocol.h>
54#include <linux/skbuff.h>
14c85021 55#include <net/route.h>
1da177e4
LT
56#include <net/sock.h>
57#include <net/icmp.h>
58#include <net/udp.h>
59#include <net/raw.h>
60#include <linux/notifier.h>
61#include <linux/if_arp.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ipip.h>
64#include <net/checksum.h>
dc5fc579 65#include <net/netlink.h>
1da177e4
LT
66
67#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
68#define CONFIG_IP_PIMSM 1
69#endif
70
1da177e4
LT
71/* Big lock, protecting vif table, mrt cache and mroute socket state.
72 Note that the changes are semaphored via rtnl_lock.
73 */
74
75static DEFINE_RWLOCK(mrt_lock);
76
77/*
78 * Multicast router control variables
79 */
80
cf958ae3 81#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
1da177e4 82
1da177e4 83static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
1da177e4
LT
84
85/* Special spinlock for queue of unresolved entries */
86static DEFINE_SPINLOCK(mfc_unres_lock);
87
88/* We return to original Alan's scheme. Hash table of resolved
89 entries is changed only in process context and protected
90 with weak lock mrt_lock. Queue of unresolved entries is protected
91 with strong spinlock mfc_unres_lock.
92
93 In this case data path is free of exclusive locks at all.
94 */
95
e18b890b 96static struct kmem_cache *mrt_cachep __read_mostly;
1da177e4
LT
97
98static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
4feb88e5
BT
99static int ipmr_cache_report(struct net *net,
100 struct sk_buff *pkt, vifi_t vifi, int assert);
1da177e4
LT
101static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
102
1da177e4
LT
103static struct timer_list ipmr_expire_timer;
104
105/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
106
d607032d
WC
107static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
108{
4feb88e5
BT
109 struct net *net = dev_net(dev);
110
d607032d
WC
111 dev_close(dev);
112
4feb88e5 113 dev = __dev_get_by_name(net, "tunl0");
d607032d 114 if (dev) {
5bc3eb7e 115 const struct net_device_ops *ops = dev->netdev_ops;
d607032d 116 struct ifreq ifr;
d607032d
WC
117 struct ip_tunnel_parm p;
118
119 memset(&p, 0, sizeof(p));
120 p.iph.daddr = v->vifc_rmt_addr.s_addr;
121 p.iph.saddr = v->vifc_lcl_addr.s_addr;
122 p.iph.version = 4;
123 p.iph.ihl = 5;
124 p.iph.protocol = IPPROTO_IPIP;
125 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
126 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
127
5bc3eb7e
SH
128 if (ops->ndo_do_ioctl) {
129 mm_segment_t oldfs = get_fs();
130
131 set_fs(KERNEL_DS);
132 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
133 set_fs(oldfs);
134 }
d607032d
WC
135 }
136}
137
1da177e4 138static
4feb88e5 139struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
1da177e4
LT
140{
141 struct net_device *dev;
142
4feb88e5 143 dev = __dev_get_by_name(net, "tunl0");
1da177e4
LT
144
145 if (dev) {
5bc3eb7e 146 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
147 int err;
148 struct ifreq ifr;
1da177e4
LT
149 struct ip_tunnel_parm p;
150 struct in_device *in_dev;
151
152 memset(&p, 0, sizeof(p));
153 p.iph.daddr = v->vifc_rmt_addr.s_addr;
154 p.iph.saddr = v->vifc_lcl_addr.s_addr;
155 p.iph.version = 4;
156 p.iph.ihl = 5;
157 p.iph.protocol = IPPROTO_IPIP;
158 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ba93ef74 159 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
1da177e4 160
5bc3eb7e
SH
161 if (ops->ndo_do_ioctl) {
162 mm_segment_t oldfs = get_fs();
163
164 set_fs(KERNEL_DS);
165 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
166 set_fs(oldfs);
167 } else
168 err = -EOPNOTSUPP;
1da177e4
LT
169
170 dev = NULL;
171
4feb88e5
BT
172 if (err == 0 &&
173 (dev = __dev_get_by_name(net, p.name)) != NULL) {
1da177e4
LT
174 dev->flags |= IFF_MULTICAST;
175
e5ed6399 176 in_dev = __in_dev_get_rtnl(dev);
71e27da9 177 if (in_dev == NULL)
1da177e4 178 goto failure;
71e27da9
HX
179
180 ipv4_devconf_setall(in_dev);
181 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
1da177e4
LT
182
183 if (dev_open(dev))
184 goto failure;
7dc00c82 185 dev_hold(dev);
1da177e4
LT
186 }
187 }
188 return dev;
189
190failure:
191 /* allow the register to be completed before unregistering. */
192 rtnl_unlock();
193 rtnl_lock();
194
195 unregister_netdevice(dev);
196 return NULL;
197}
198
199#ifdef CONFIG_IP_PIMSM
200
6fef4c0c 201static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
1da177e4 202{
4feb88e5
BT
203 struct net *net = dev_net(dev);
204
1da177e4 205 read_lock(&mrt_lock);
cf3677ae
PE
206 dev->stats.tx_bytes += skb->len;
207 dev->stats.tx_packets++;
4feb88e5
BT
208 ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
209 IGMPMSG_WHOLEPKT);
1da177e4
LT
210 read_unlock(&mrt_lock);
211 kfree_skb(skb);
6ed10654 212 return NETDEV_TX_OK;
1da177e4
LT
213}
214
007c3838
SH
215static const struct net_device_ops reg_vif_netdev_ops = {
216 .ndo_start_xmit = reg_vif_xmit,
217};
218
1da177e4
LT
219static void reg_vif_setup(struct net_device *dev)
220{
221 dev->type = ARPHRD_PIMREG;
46f25dff 222 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
1da177e4 223 dev->flags = IFF_NOARP;
007c3838 224 dev->netdev_ops = &reg_vif_netdev_ops,
1da177e4 225 dev->destructor = free_netdev;
403dbb97 226 dev->features |= NETIF_F_NETNS_LOCAL;
1da177e4
LT
227}
228
403dbb97 229static struct net_device *ipmr_reg_vif(struct net *net)
1da177e4
LT
230{
231 struct net_device *dev;
232 struct in_device *in_dev;
233
cf3677ae 234 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
1da177e4
LT
235
236 if (dev == NULL)
237 return NULL;
238
403dbb97
TG
239 dev_net_set(dev, net);
240
1da177e4
LT
241 if (register_netdevice(dev)) {
242 free_netdev(dev);
243 return NULL;
244 }
245 dev->iflink = 0;
246
71e27da9
HX
247 rcu_read_lock();
248 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
249 rcu_read_unlock();
1da177e4 250 goto failure;
71e27da9 251 }
1da177e4 252
71e27da9
HX
253 ipv4_devconf_setall(in_dev);
254 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
255 rcu_read_unlock();
1da177e4
LT
256
257 if (dev_open(dev))
258 goto failure;
259
7dc00c82
WC
260 dev_hold(dev);
261
1da177e4
LT
262 return dev;
263
264failure:
265 /* allow the register to be completed before unregistering. */
266 rtnl_unlock();
267 rtnl_lock();
268
269 unregister_netdevice(dev);
270 return NULL;
271}
272#endif
273
274/*
275 * Delete a VIF entry
7dc00c82 276 * @notify: Set to 1, if the caller is a notifier_call
1da177e4 277 */
e905a9ed 278
d17fa6fa
ED
279static int vif_delete(struct net *net, int vifi, int notify,
280 struct list_head *head)
1da177e4
LT
281{
282 struct vif_device *v;
283 struct net_device *dev;
284 struct in_device *in_dev;
285
4feb88e5 286 if (vifi < 0 || vifi >= net->ipv4.maxvif)
1da177e4
LT
287 return -EADDRNOTAVAIL;
288
4feb88e5 289 v = &net->ipv4.vif_table[vifi];
1da177e4
LT
290
291 write_lock_bh(&mrt_lock);
292 dev = v->dev;
293 v->dev = NULL;
294
295 if (!dev) {
296 write_unlock_bh(&mrt_lock);
297 return -EADDRNOTAVAIL;
298 }
299
300#ifdef CONFIG_IP_PIMSM
4feb88e5
BT
301 if (vifi == net->ipv4.mroute_reg_vif_num)
302 net->ipv4.mroute_reg_vif_num = -1;
1da177e4
LT
303#endif
304
4feb88e5 305 if (vifi+1 == net->ipv4.maxvif) {
1da177e4
LT
306 int tmp;
307 for (tmp=vifi-1; tmp>=0; tmp--) {
4feb88e5 308 if (VIF_EXISTS(net, tmp))
1da177e4
LT
309 break;
310 }
4feb88e5 311 net->ipv4.maxvif = tmp+1;
1da177e4
LT
312 }
313
314 write_unlock_bh(&mrt_lock);
315
316 dev_set_allmulti(dev, -1);
317
e5ed6399 318 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
42f811b8 319 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
1da177e4
LT
320 ip_rt_multicast_event(in_dev);
321 }
322
7dc00c82 323 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
d17fa6fa 324 unregister_netdevice_queue(dev, head);
1da177e4
LT
325
326 dev_put(dev);
327 return 0;
328}
329
5c0a66f5
BT
330static inline void ipmr_cache_free(struct mfc_cache *c)
331{
332 release_net(mfc_net(c));
333 kmem_cache_free(mrt_cachep, c);
334}
335
1da177e4
LT
336/* Destroy an unresolved cache entry, killing queued skbs
337 and reporting error to netlink readers.
338 */
339
340static void ipmr_destroy_unres(struct mfc_cache *c)
341{
342 struct sk_buff *skb;
9ef1d4c7 343 struct nlmsgerr *e;
4feb88e5 344 struct net *net = mfc_net(c);
1da177e4 345
4feb88e5 346 atomic_dec(&net->ipv4.cache_resolve_queue_len);
1da177e4 347
c354e124 348 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
eddc9ec5 349 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
350 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
351 nlh->nlmsg_type = NLMSG_ERROR;
352 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
353 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
354 e = NLMSG_DATA(nlh);
355 e->error = -ETIMEDOUT;
356 memset(&e->msg, 0, sizeof(e->msg));
2942e900 357
4feb88e5 358 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1da177e4
LT
359 } else
360 kfree_skb(skb);
361 }
362
5c0a66f5 363 ipmr_cache_free(c);
1da177e4
LT
364}
365
366
367/* Single timer process for all the unresolved queue. */
368
369static void ipmr_expire_process(unsigned long dummy)
370{
371 unsigned long now;
372 unsigned long expires;
373 struct mfc_cache *c, **cp;
374
375 if (!spin_trylock(&mfc_unres_lock)) {
376 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
377 return;
378 }
379
1e8fb3b6 380 if (mfc_unres_queue == NULL)
1da177e4
LT
381 goto out;
382
383 now = jiffies;
384 expires = 10*HZ;
385 cp = &mfc_unres_queue;
386
387 while ((c=*cp) != NULL) {
388 if (time_after(c->mfc_un.unres.expires, now)) {
389 unsigned long interval = c->mfc_un.unres.expires - now;
390 if (interval < expires)
391 expires = interval;
392 cp = &c->next;
393 continue;
394 }
395
396 *cp = c->next;
397
398 ipmr_destroy_unres(c);
399 }
400
1e8fb3b6 401 if (mfc_unres_queue != NULL)
1da177e4
LT
402 mod_timer(&ipmr_expire_timer, jiffies + expires);
403
404out:
405 spin_unlock(&mfc_unres_lock);
406}
407
408/* Fill oifs list. It is called under write locked mrt_lock. */
409
d1b04c08 410static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
1da177e4
LT
411{
412 int vifi;
4feb88e5 413 struct net *net = mfc_net(cache);
1da177e4
LT
414
415 cache->mfc_un.res.minvif = MAXVIFS;
416 cache->mfc_un.res.maxvif = 0;
417 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
418
4feb88e5
BT
419 for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
420 if (VIF_EXISTS(net, vifi) &&
cf958ae3 421 ttls[vifi] && ttls[vifi] < 255) {
1da177e4
LT
422 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
423 if (cache->mfc_un.res.minvif > vifi)
424 cache->mfc_un.res.minvif = vifi;
425 if (cache->mfc_un.res.maxvif <= vifi)
426 cache->mfc_un.res.maxvif = vifi + 1;
427 }
428 }
429}
430
4feb88e5 431static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
1da177e4
LT
432{
433 int vifi = vifc->vifc_vifi;
4feb88e5 434 struct vif_device *v = &net->ipv4.vif_table[vifi];
1da177e4
LT
435 struct net_device *dev;
436 struct in_device *in_dev;
d607032d 437 int err;
1da177e4
LT
438
439 /* Is vif busy ? */
4feb88e5 440 if (VIF_EXISTS(net, vifi))
1da177e4
LT
441 return -EADDRINUSE;
442
443 switch (vifc->vifc_flags) {
444#ifdef CONFIG_IP_PIMSM
445 case VIFF_REGISTER:
446 /*
447 * Special Purpose VIF in PIM
448 * All the packets will be sent to the daemon
449 */
4feb88e5 450 if (net->ipv4.mroute_reg_vif_num >= 0)
1da177e4 451 return -EADDRINUSE;
403dbb97 452 dev = ipmr_reg_vif(net);
1da177e4
LT
453 if (!dev)
454 return -ENOBUFS;
d607032d
WC
455 err = dev_set_allmulti(dev, 1);
456 if (err) {
457 unregister_netdevice(dev);
7dc00c82 458 dev_put(dev);
d607032d
WC
459 return err;
460 }
1da177e4
LT
461 break;
462#endif
e905a9ed 463 case VIFF_TUNNEL:
4feb88e5 464 dev = ipmr_new_tunnel(net, vifc);
1da177e4
LT
465 if (!dev)
466 return -ENOBUFS;
d607032d
WC
467 err = dev_set_allmulti(dev, 1);
468 if (err) {
469 ipmr_del_tunnel(dev, vifc);
7dc00c82 470 dev_put(dev);
d607032d
WC
471 return err;
472 }
1da177e4 473 break;
ee5e81f0
I
474
475 case VIFF_USE_IFINDEX:
1da177e4 476 case 0:
ee5e81f0
I
477 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
478 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
479 if (dev && dev->ip_ptr == NULL) {
480 dev_put(dev);
481 return -EADDRNOTAVAIL;
482 }
483 } else
484 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
485
1da177e4
LT
486 if (!dev)
487 return -EADDRNOTAVAIL;
d607032d 488 err = dev_set_allmulti(dev, 1);
7dc00c82
WC
489 if (err) {
490 dev_put(dev);
d607032d 491 return err;
7dc00c82 492 }
1da177e4
LT
493 break;
494 default:
495 return -EINVAL;
496 }
497
d0490cfd
DC
498 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
499 dev_put(dev);
1da177e4 500 return -EADDRNOTAVAIL;
d0490cfd 501 }
42f811b8 502 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
1da177e4
LT
503 ip_rt_multicast_event(in_dev);
504
505 /*
506 * Fill in the VIF structures
507 */
c354e124
JK
508 v->rate_limit = vifc->vifc_rate_limit;
509 v->local = vifc->vifc_lcl_addr.s_addr;
510 v->remote = vifc->vifc_rmt_addr.s_addr;
511 v->flags = vifc->vifc_flags;
1da177e4
LT
512 if (!mrtsock)
513 v->flags |= VIFF_STATIC;
c354e124 514 v->threshold = vifc->vifc_threshold;
1da177e4
LT
515 v->bytes_in = 0;
516 v->bytes_out = 0;
517 v->pkt_in = 0;
518 v->pkt_out = 0;
519 v->link = dev->ifindex;
520 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
521 v->link = dev->iflink;
522
523 /* And finish update writing critical data */
524 write_lock_bh(&mrt_lock);
c354e124 525 v->dev = dev;
1da177e4
LT
526#ifdef CONFIG_IP_PIMSM
527 if (v->flags&VIFF_REGISTER)
4feb88e5 528 net->ipv4.mroute_reg_vif_num = vifi;
1da177e4 529#endif
4feb88e5
BT
530 if (vifi+1 > net->ipv4.maxvif)
531 net->ipv4.maxvif = vifi+1;
1da177e4
LT
532 write_unlock_bh(&mrt_lock);
533 return 0;
534}
535
4feb88e5
BT
536static struct mfc_cache *ipmr_cache_find(struct net *net,
537 __be32 origin,
538 __be32 mcastgrp)
1da177e4 539{
c354e124 540 int line = MFC_HASH(mcastgrp, origin);
1da177e4
LT
541 struct mfc_cache *c;
542
4feb88e5 543 for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
1da177e4
LT
544 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
545 break;
546 }
547 return c;
548}
549
550/*
551 * Allocate a multicast cache entry
552 */
5c0a66f5 553static struct mfc_cache *ipmr_cache_alloc(struct net *net)
1da177e4 554{
c354e124
JK
555 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
556 if (c == NULL)
1da177e4 557 return NULL;
1da177e4 558 c->mfc_un.res.minvif = MAXVIFS;
5c0a66f5 559 mfc_net_set(c, net);
1da177e4
LT
560 return c;
561}
562
5c0a66f5 563static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
1da177e4 564{
c354e124
JK
565 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
566 if (c == NULL)
1da177e4 567 return NULL;
1da177e4
LT
568 skb_queue_head_init(&c->mfc_un.unres.unresolved);
569 c->mfc_un.unres.expires = jiffies + 10*HZ;
5c0a66f5 570 mfc_net_set(c, net);
1da177e4
LT
571 return c;
572}
573
574/*
575 * A cache entry has gone into a resolved state from queued
576 */
e905a9ed 577
1da177e4
LT
578static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
579{
580 struct sk_buff *skb;
9ef1d4c7 581 struct nlmsgerr *e;
1da177e4
LT
582
583 /*
584 * Play the pending entries through our router
585 */
586
c354e124 587 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
eddc9ec5 588 if (ip_hdr(skb)->version == 0) {
1da177e4
LT
589 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
590
591 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
27a884dc
ACM
592 nlh->nlmsg_len = (skb_tail_pointer(skb) -
593 (u8 *)nlh);
1da177e4
LT
594 } else {
595 nlh->nlmsg_type = NLMSG_ERROR;
596 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
597 skb_trim(skb, nlh->nlmsg_len);
9ef1d4c7
PM
598 e = NLMSG_DATA(nlh);
599 e->error = -EMSGSIZE;
600 memset(&e->msg, 0, sizeof(e->msg));
1da177e4 601 }
2942e900 602
4feb88e5 603 rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
1da177e4
LT
604 } else
605 ip_mr_forward(skb, c, 0);
606 }
607}
608
609/*
610 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
611 * expects the following bizarre scheme.
612 *
613 * Called under mrt_lock.
614 */
e905a9ed 615
4feb88e5
BT
616static int ipmr_cache_report(struct net *net,
617 struct sk_buff *pkt, vifi_t vifi, int assert)
1da177e4
LT
618{
619 struct sk_buff *skb;
c9bdd4b5 620 const int ihl = ip_hdrlen(pkt);
1da177e4
LT
621 struct igmphdr *igmp;
622 struct igmpmsg *msg;
623 int ret;
624
625#ifdef CONFIG_IP_PIMSM
626 if (assert == IGMPMSG_WHOLEPKT)
627 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
628 else
629#endif
630 skb = alloc_skb(128, GFP_ATOMIC);
631
132adf54 632 if (!skb)
1da177e4
LT
633 return -ENOBUFS;
634
635#ifdef CONFIG_IP_PIMSM
636 if (assert == IGMPMSG_WHOLEPKT) {
637 /* Ugly, but we have no choice with this interface.
638 Duplicate old header, fix ihl, length etc.
639 And all this only to mangle msg->im_msgtype and
640 to set msg->im_mbz to "mbz" :-)
641 */
878c8145
ACM
642 skb_push(skb, sizeof(struct iphdr));
643 skb_reset_network_header(skb);
badff6d0 644 skb_reset_transport_header(skb);
0272ffc4 645 msg = (struct igmpmsg *)skb_network_header(skb);
d56f90a7 646 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1da177e4
LT
647 msg->im_msgtype = IGMPMSG_WHOLEPKT;
648 msg->im_mbz = 0;
4feb88e5 649 msg->im_vif = net->ipv4.mroute_reg_vif_num;
eddc9ec5
ACM
650 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
651 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
652 sizeof(struct iphdr));
e905a9ed 653 } else
1da177e4 654#endif
e905a9ed
YH
655 {
656
1da177e4
LT
657 /*
658 * Copy the IP header
659 */
660
27a884dc 661 skb->network_header = skb->tail;
ddc7b8e3 662 skb_put(skb, ihl);
27d7ff46 663 skb_copy_to_linear_data(skb, pkt->data, ihl);
eddc9ec5
ACM
664 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
665 msg = (struct igmpmsg *)skb_network_header(skb);
1da177e4 666 msg->im_vif = vifi;
adf30907 667 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1da177e4
LT
668
669 /*
670 * Add our header
671 */
672
c354e124 673 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
1da177e4
LT
674 igmp->type =
675 msg->im_msgtype = assert;
676 igmp->code = 0;
eddc9ec5 677 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
b0e380b1 678 skb->transport_header = skb->network_header;
e905a9ed 679 }
1da177e4 680
4feb88e5 681 if (net->ipv4.mroute_sk == NULL) {
1da177e4
LT
682 kfree_skb(skb);
683 return -EINVAL;
684 }
685
686 /*
687 * Deliver to mrouted
688 */
4feb88e5 689 ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
70a269e6 690 if (ret < 0) {
1da177e4
LT
691 if (net_ratelimit())
692 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
693 kfree_skb(skb);
694 }
695
696 return ret;
697}
698
699/*
700 * Queue a packet for resolution. It gets locked cache entry!
701 */
e905a9ed 702
1da177e4 703static int
4feb88e5 704ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
1da177e4
LT
705{
706 int err;
707 struct mfc_cache *c;
eddc9ec5 708 const struct iphdr *iph = ip_hdr(skb);
1da177e4
LT
709
710 spin_lock_bh(&mfc_unres_lock);
711 for (c=mfc_unres_queue; c; c=c->next) {
4feb88e5 712 if (net_eq(mfc_net(c), net) &&
1e8fb3b6 713 c->mfc_mcastgrp == iph->daddr &&
eddc9ec5 714 c->mfc_origin == iph->saddr)
1da177e4
LT
715 break;
716 }
717
718 if (c == NULL) {
719 /*
720 * Create a new entry if allowable
721 */
722
4feb88e5
BT
723 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
724 (c = ipmr_cache_alloc_unres(net)) == NULL) {
1da177e4
LT
725 spin_unlock_bh(&mfc_unres_lock);
726
727 kfree_skb(skb);
728 return -ENOBUFS;
729 }
730
731 /*
732 * Fill in the new cache entry
733 */
eddc9ec5
ACM
734 c->mfc_parent = -1;
735 c->mfc_origin = iph->saddr;
736 c->mfc_mcastgrp = iph->daddr;
1da177e4
LT
737
738 /*
739 * Reflect first query at mrouted.
740 */
4feb88e5
BT
741 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
742 if (err < 0) {
e905a9ed 743 /* If the report failed throw the cache entry
1da177e4
LT
744 out - Brad Parker
745 */
746 spin_unlock_bh(&mfc_unres_lock);
747
5c0a66f5 748 ipmr_cache_free(c);
1da177e4
LT
749 kfree_skb(skb);
750 return err;
751 }
752
4feb88e5 753 atomic_inc(&net->ipv4.cache_resolve_queue_len);
1da177e4
LT
754 c->next = mfc_unres_queue;
755 mfc_unres_queue = c;
756
bbd72543
AM
757 if (atomic_read(&net->ipv4.cache_resolve_queue_len) == 1)
758 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
1da177e4
LT
759 }
760
761 /*
762 * See if we can append the packet
763 */
764 if (c->mfc_un.unres.unresolved.qlen>3) {
765 kfree_skb(skb);
766 err = -ENOBUFS;
767 } else {
c354e124 768 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1da177e4
LT
769 err = 0;
770 }
771
772 spin_unlock_bh(&mfc_unres_lock);
773 return err;
774}
775
776/*
777 * MFC cache manipulation by user space mroute daemon
778 */
779
4feb88e5 780static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
1da177e4
LT
781{
782 int line;
783 struct mfc_cache *c, **cp;
784
c354e124 785 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 786
4feb88e5 787 for (cp = &net->ipv4.mfc_cache_array[line];
2bb8b26c 788 (c = *cp) != NULL; cp = &c->next) {
1da177e4
LT
789 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
790 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
791 write_lock_bh(&mrt_lock);
792 *cp = c->next;
793 write_unlock_bh(&mrt_lock);
794
5c0a66f5 795 ipmr_cache_free(c);
1da177e4
LT
796 return 0;
797 }
798 }
799 return -ENOENT;
800}
801
4feb88e5 802static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
1da177e4
LT
803{
804 int line;
805 struct mfc_cache *uc, *c, **cp;
806
a50436f2
PM
807 if (mfc->mfcc_parent >= MAXVIFS)
808 return -ENFILE;
809
c354e124 810 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1da177e4 811
4feb88e5 812 for (cp = &net->ipv4.mfc_cache_array[line];
2bb8b26c 813 (c = *cp) != NULL; cp = &c->next) {
1da177e4
LT
814 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
815 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
816 break;
817 }
818
819 if (c != NULL) {
820 write_lock_bh(&mrt_lock);
821 c->mfc_parent = mfc->mfcc_parent;
d1b04c08 822 ipmr_update_thresholds(c, mfc->mfcc_ttls);
1da177e4
LT
823 if (!mrtsock)
824 c->mfc_flags |= MFC_STATIC;
825 write_unlock_bh(&mrt_lock);
826 return 0;
827 }
828
f97c1e0c 829 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1da177e4
LT
830 return -EINVAL;
831
4feb88e5 832 c = ipmr_cache_alloc(net);
c354e124 833 if (c == NULL)
1da177e4
LT
834 return -ENOMEM;
835
c354e124
JK
836 c->mfc_origin = mfc->mfcc_origin.s_addr;
837 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
838 c->mfc_parent = mfc->mfcc_parent;
d1b04c08 839 ipmr_update_thresholds(c, mfc->mfcc_ttls);
1da177e4
LT
840 if (!mrtsock)
841 c->mfc_flags |= MFC_STATIC;
842
843 write_lock_bh(&mrt_lock);
4feb88e5
BT
844 c->next = net->ipv4.mfc_cache_array[line];
845 net->ipv4.mfc_cache_array[line] = c;
1da177e4
LT
846 write_unlock_bh(&mrt_lock);
847
848 /*
849 * Check to see if we resolved a queued list. If so we
850 * need to send on the frames and tidy up.
851 */
852 spin_lock_bh(&mfc_unres_lock);
853 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
854 cp = &uc->next) {
4feb88e5 855 if (net_eq(mfc_net(uc), net) &&
1e8fb3b6 856 uc->mfc_origin == c->mfc_origin &&
1da177e4
LT
857 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
858 *cp = uc->next;
4feb88e5 859 atomic_dec(&net->ipv4.cache_resolve_queue_len);
1da177e4
LT
860 break;
861 }
862 }
1e8fb3b6
BT
863 if (mfc_unres_queue == NULL)
864 del_timer(&ipmr_expire_timer);
1da177e4
LT
865 spin_unlock_bh(&mfc_unres_lock);
866
867 if (uc) {
868 ipmr_cache_resolve(uc, c);
5c0a66f5 869 ipmr_cache_free(uc);
1da177e4
LT
870 }
871 return 0;
872}
873
874/*
875 * Close the multicast socket, and clear the vif tables etc
876 */
e905a9ed 877
4feb88e5 878static void mroute_clean_tables(struct net *net)
1da177e4
LT
879{
880 int i;
d17fa6fa 881 LIST_HEAD(list);
e905a9ed 882
1da177e4
LT
883 /*
884 * Shut down all active vif entries
885 */
4feb88e5
BT
886 for (i = 0; i < net->ipv4.maxvif; i++) {
887 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
d17fa6fa 888 vif_delete(net, i, 0, &list);
1da177e4 889 }
d17fa6fa 890 unregister_netdevice_many(&list);
1da177e4
LT
891
892 /*
893 * Wipe the cache
894 */
c354e124 895 for (i=0; i<MFC_LINES; i++) {
1da177e4
LT
896 struct mfc_cache *c, **cp;
897
4feb88e5 898 cp = &net->ipv4.mfc_cache_array[i];
1da177e4
LT
899 while ((c = *cp) != NULL) {
900 if (c->mfc_flags&MFC_STATIC) {
901 cp = &c->next;
902 continue;
903 }
904 write_lock_bh(&mrt_lock);
905 *cp = c->next;
906 write_unlock_bh(&mrt_lock);
907
5c0a66f5 908 ipmr_cache_free(c);
1da177e4
LT
909 }
910 }
911
4feb88e5 912 if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
1e8fb3b6 913 struct mfc_cache *c, **cp;
1da177e4
LT
914
915 spin_lock_bh(&mfc_unres_lock);
1e8fb3b6
BT
916 cp = &mfc_unres_queue;
917 while ((c = *cp) != NULL) {
4feb88e5 918 if (!net_eq(mfc_net(c), net)) {
1e8fb3b6
BT
919 cp = &c->next;
920 continue;
921 }
922 *cp = c->next;
1da177e4
LT
923
924 ipmr_destroy_unres(c);
1da177e4
LT
925 }
926 spin_unlock_bh(&mfc_unres_lock);
927 }
928}
929
930static void mrtsock_destruct(struct sock *sk)
931{
4feb88e5
BT
932 struct net *net = sock_net(sk);
933
1da177e4 934 rtnl_lock();
4feb88e5
BT
935 if (sk == net->ipv4.mroute_sk) {
936 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1da177e4
LT
937
938 write_lock_bh(&mrt_lock);
4feb88e5 939 net->ipv4.mroute_sk = NULL;
1da177e4
LT
940 write_unlock_bh(&mrt_lock);
941
4feb88e5 942 mroute_clean_tables(net);
1da177e4
LT
943 }
944 rtnl_unlock();
945}
946
947/*
948 * Socket options and virtual interface manipulation. The whole
949 * virtual interface system is a complete heap, but unfortunately
950 * that's how BSD mrouted happens to think. Maybe one day with a proper
951 * MOSPF/PIM router set up we can clean this up.
952 */
e905a9ed 953
b7058842 954int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
955{
956 int ret;
957 struct vifctl vif;
958 struct mfcctl mfc;
4feb88e5 959 struct net *net = sock_net(sk);
e905a9ed 960
132adf54 961 if (optname != MRT_INIT) {
4feb88e5 962 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
1da177e4
LT
963 return -EACCES;
964 }
965
132adf54
SH
966 switch (optname) {
967 case MRT_INIT:
968 if (sk->sk_type != SOCK_RAW ||
c720c7e8 969 inet_sk(sk)->inet_num != IPPROTO_IGMP)
132adf54 970 return -EOPNOTSUPP;
c354e124 971 if (optlen != sizeof(int))
132adf54 972 return -ENOPROTOOPT;
1da177e4 973
132adf54 974 rtnl_lock();
4feb88e5 975 if (net->ipv4.mroute_sk) {
1da177e4 976 rtnl_unlock();
132adf54
SH
977 return -EADDRINUSE;
978 }
979
980 ret = ip_ra_control(sk, 1, mrtsock_destruct);
981 if (ret == 0) {
982 write_lock_bh(&mrt_lock);
4feb88e5 983 net->ipv4.mroute_sk = sk;
132adf54
SH
984 write_unlock_bh(&mrt_lock);
985
4feb88e5 986 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
132adf54
SH
987 }
988 rtnl_unlock();
989 return ret;
990 case MRT_DONE:
4feb88e5 991 if (sk != net->ipv4.mroute_sk)
132adf54
SH
992 return -EACCES;
993 return ip_ra_control(sk, 0, NULL);
994 case MRT_ADD_VIF:
995 case MRT_DEL_VIF:
c354e124 996 if (optlen != sizeof(vif))
132adf54 997 return -EINVAL;
c354e124 998 if (copy_from_user(&vif, optval, sizeof(vif)))
132adf54
SH
999 return -EFAULT;
1000 if (vif.vifc_vifi >= MAXVIFS)
1001 return -ENFILE;
1002 rtnl_lock();
c354e124 1003 if (optname == MRT_ADD_VIF) {
4feb88e5 1004 ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
132adf54 1005 } else {
d17fa6fa 1006 ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
132adf54
SH
1007 }
1008 rtnl_unlock();
1009 return ret;
1da177e4
LT
1010
1011 /*
1012 * Manipulate the forwarding caches. These live
1013 * in a sort of kernel/user symbiosis.
1014 */
132adf54
SH
1015 case MRT_ADD_MFC:
1016 case MRT_DEL_MFC:
c354e124 1017 if (optlen != sizeof(mfc))
132adf54 1018 return -EINVAL;
c354e124 1019 if (copy_from_user(&mfc, optval, sizeof(mfc)))
132adf54
SH
1020 return -EFAULT;
1021 rtnl_lock();
c354e124 1022 if (optname == MRT_DEL_MFC)
4feb88e5 1023 ret = ipmr_mfc_delete(net, &mfc);
132adf54 1024 else
4feb88e5 1025 ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
132adf54
SH
1026 rtnl_unlock();
1027 return ret;
1da177e4
LT
1028 /*
1029 * Control PIM assert.
1030 */
132adf54
SH
1031 case MRT_ASSERT:
1032 {
1033 int v;
1034 if (get_user(v,(int __user *)optval))
1035 return -EFAULT;
4feb88e5 1036 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
132adf54
SH
1037 return 0;
1038 }
1da177e4 1039#ifdef CONFIG_IP_PIMSM
132adf54
SH
1040 case MRT_PIM:
1041 {
ba93ef74
SH
1042 int v;
1043
132adf54
SH
1044 if (get_user(v,(int __user *)optval))
1045 return -EFAULT;
ba93ef74
SH
1046 v = (v) ? 1 : 0;
1047
132adf54
SH
1048 rtnl_lock();
1049 ret = 0;
4feb88e5
BT
1050 if (v != net->ipv4.mroute_do_pim) {
1051 net->ipv4.mroute_do_pim = v;
1052 net->ipv4.mroute_do_assert = v;
1da177e4 1053 }
132adf54
SH
1054 rtnl_unlock();
1055 return ret;
1056 }
1da177e4 1057#endif
132adf54
SH
1058 /*
1059 * Spurious command, or MRT_VERSION which you cannot
1060 * set.
1061 */
1062 default:
1063 return -ENOPROTOOPT;
1da177e4
LT
1064 }
1065}
1066
1067/*
1068 * Getsock opt support for the multicast routing system.
1069 */
e905a9ed 1070
c354e124 1071int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1da177e4
LT
1072{
1073 int olr;
1074 int val;
4feb88e5 1075 struct net *net = sock_net(sk);
1da177e4 1076
c354e124 1077 if (optname != MRT_VERSION &&
1da177e4
LT
1078#ifdef CONFIG_IP_PIMSM
1079 optname!=MRT_PIM &&
1080#endif
1081 optname!=MRT_ASSERT)
1082 return -ENOPROTOOPT;
1083
1084 if (get_user(olr, optlen))
1085 return -EFAULT;
1086
1087 olr = min_t(unsigned int, olr, sizeof(int));
1088 if (olr < 0)
1089 return -EINVAL;
e905a9ed 1090
c354e124 1091 if (put_user(olr, optlen))
1da177e4 1092 return -EFAULT;
c354e124
JK
1093 if (optname == MRT_VERSION)
1094 val = 0x0305;
1da177e4 1095#ifdef CONFIG_IP_PIMSM
c354e124 1096 else if (optname == MRT_PIM)
4feb88e5 1097 val = net->ipv4.mroute_do_pim;
1da177e4
LT
1098#endif
1099 else
4feb88e5 1100 val = net->ipv4.mroute_do_assert;
c354e124 1101 if (copy_to_user(optval, &val, olr))
1da177e4
LT
1102 return -EFAULT;
1103 return 0;
1104}
1105
1106/*
1107 * The IP multicast ioctl support routines.
1108 */
e905a9ed 1109
1da177e4
LT
1110int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1111{
1112 struct sioc_sg_req sr;
1113 struct sioc_vif_req vr;
1114 struct vif_device *vif;
1115 struct mfc_cache *c;
4feb88e5 1116 struct net *net = sock_net(sk);
e905a9ed 1117
132adf54
SH
1118 switch (cmd) {
1119 case SIOCGETVIFCNT:
c354e124 1120 if (copy_from_user(&vr, arg, sizeof(vr)))
132adf54 1121 return -EFAULT;
4feb88e5 1122 if (vr.vifi >= net->ipv4.maxvif)
132adf54
SH
1123 return -EINVAL;
1124 read_lock(&mrt_lock);
4feb88e5
BT
1125 vif = &net->ipv4.vif_table[vr.vifi];
1126 if (VIF_EXISTS(net, vr.vifi)) {
c354e124
JK
1127 vr.icount = vif->pkt_in;
1128 vr.ocount = vif->pkt_out;
1129 vr.ibytes = vif->bytes_in;
1130 vr.obytes = vif->bytes_out;
1da177e4 1131 read_unlock(&mrt_lock);
1da177e4 1132
c354e124 1133 if (copy_to_user(arg, &vr, sizeof(vr)))
132adf54
SH
1134 return -EFAULT;
1135 return 0;
1136 }
1137 read_unlock(&mrt_lock);
1138 return -EADDRNOTAVAIL;
1139 case SIOCGETSGCNT:
c354e124 1140 if (copy_from_user(&sr, arg, sizeof(sr)))
132adf54
SH
1141 return -EFAULT;
1142
1143 read_lock(&mrt_lock);
4feb88e5 1144 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
132adf54
SH
1145 if (c) {
1146 sr.pktcnt = c->mfc_un.res.pkt;
1147 sr.bytecnt = c->mfc_un.res.bytes;
1148 sr.wrong_if = c->mfc_un.res.wrong_if;
1da177e4 1149 read_unlock(&mrt_lock);
132adf54 1150
c354e124 1151 if (copy_to_user(arg, &sr, sizeof(sr)))
132adf54
SH
1152 return -EFAULT;
1153 return 0;
1154 }
1155 read_unlock(&mrt_lock);
1156 return -EADDRNOTAVAIL;
1157 default:
1158 return -ENOIOCTLCMD;
1da177e4
LT
1159 }
1160}
1161
1162
1163static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1164{
e9dc8653 1165 struct net_device *dev = ptr;
4feb88e5 1166 struct net *net = dev_net(dev);
1da177e4
LT
1167 struct vif_device *v;
1168 int ct;
d17fa6fa 1169 LIST_HEAD(list);
e9dc8653 1170
1da177e4
LT
1171 if (event != NETDEV_UNREGISTER)
1172 return NOTIFY_DONE;
4feb88e5
BT
1173 v = &net->ipv4.vif_table[0];
1174 for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
c354e124 1175 if (v->dev == dev)
d17fa6fa 1176 vif_delete(net, ct, 1, &list);
1da177e4 1177 }
d17fa6fa 1178 unregister_netdevice_many(&list);
1da177e4
LT
1179 return NOTIFY_DONE;
1180}
1181
1182
c354e124 1183static struct notifier_block ip_mr_notifier = {
1da177e4
LT
1184 .notifier_call = ipmr_device_event,
1185};
1186
1187/*
1188 * Encapsulate a packet by attaching a valid IPIP header to it.
1189 * This avoids tunnel drivers and other mess and gives us the speed so
1190 * important for multicast video.
1191 */
e905a9ed 1192
114c7844 1193static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 1194{
8856dfa3 1195 struct iphdr *iph;
eddc9ec5 1196 struct iphdr *old_iph = ip_hdr(skb);
8856dfa3
ACM
1197
1198 skb_push(skb, sizeof(struct iphdr));
b0e380b1 1199 skb->transport_header = skb->network_header;
8856dfa3 1200 skb_reset_network_header(skb);
eddc9ec5 1201 iph = ip_hdr(skb);
1da177e4
LT
1202
1203 iph->version = 4;
e023dd64
ACM
1204 iph->tos = old_iph->tos;
1205 iph->ttl = old_iph->ttl;
1da177e4
LT
1206 iph->frag_off = 0;
1207 iph->daddr = daddr;
1208 iph->saddr = saddr;
1209 iph->protocol = IPPROTO_IPIP;
1210 iph->ihl = 5;
1211 iph->tot_len = htons(skb->len);
adf30907 1212 ip_select_ident(iph, skb_dst(skb), NULL);
1da177e4
LT
1213 ip_send_check(iph);
1214
1da177e4
LT
1215 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1216 nf_reset(skb);
1217}
1218
1219static inline int ipmr_forward_finish(struct sk_buff *skb)
1220{
1221 struct ip_options * opt = &(IPCB(skb)->opt);
1222
adf30907 1223 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1da177e4
LT
1224
1225 if (unlikely(opt->optlen))
1226 ip_forward_options(skb);
1227
1228 return dst_output(skb);
1229}
1230
1231/*
1232 * Processing handlers for ipmr_forward
1233 */
1234
1235static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1236{
4feb88e5 1237 struct net *net = mfc_net(c);
eddc9ec5 1238 const struct iphdr *iph = ip_hdr(skb);
4feb88e5 1239 struct vif_device *vif = &net->ipv4.vif_table[vifi];
1da177e4
LT
1240 struct net_device *dev;
1241 struct rtable *rt;
1242 int encap = 0;
1243
1244 if (vif->dev == NULL)
1245 goto out_free;
1246
1247#ifdef CONFIG_IP_PIMSM
1248 if (vif->flags & VIFF_REGISTER) {
1249 vif->pkt_out++;
c354e124 1250 vif->bytes_out += skb->len;
cf3677ae
PE
1251 vif->dev->stats.tx_bytes += skb->len;
1252 vif->dev->stats.tx_packets++;
4feb88e5 1253 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
69ebbf58 1254 goto out_free;
1da177e4
LT
1255 }
1256#endif
1257
1258 if (vif->flags&VIFF_TUNNEL) {
1259 struct flowi fl = { .oif = vif->link,
1260 .nl_u = { .ip4_u =
1261 { .daddr = vif->remote,
1262 .saddr = vif->local,
1263 .tos = RT_TOS(iph->tos) } },
1264 .proto = IPPROTO_IPIP };
4feb88e5 1265 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1266 goto out_free;
1267 encap = sizeof(struct iphdr);
1268 } else {
1269 struct flowi fl = { .oif = vif->link,
1270 .nl_u = { .ip4_u =
1271 { .daddr = iph->daddr,
1272 .tos = RT_TOS(iph->tos) } },
1273 .proto = IPPROTO_IPIP };
4feb88e5 1274 if (ip_route_output_key(net, &rt, &fl))
1da177e4
LT
1275 goto out_free;
1276 }
1277
1278 dev = rt->u.dst.dev;
1279
1280 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1281 /* Do not fragment multicasts. Alas, IPv4 does not
1282 allow to send ICMP, so that packets will disappear
1283 to blackhole.
1284 */
1285
7c73a6fa 1286 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
1287 ip_rt_put(rt);
1288 goto out_free;
1289 }
1290
1291 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1292
1293 if (skb_cow(skb, encap)) {
e905a9ed 1294 ip_rt_put(rt);
1da177e4
LT
1295 goto out_free;
1296 }
1297
1298 vif->pkt_out++;
c354e124 1299 vif->bytes_out += skb->len;
1da177e4 1300
adf30907
ED
1301 skb_dst_drop(skb);
1302 skb_dst_set(skb, &rt->u.dst);
eddc9ec5 1303 ip_decrease_ttl(ip_hdr(skb));
1da177e4
LT
1304
1305 /* FIXME: forward and output firewalls used to be called here.
1306 * What do we do with netfilter? -- RR */
1307 if (vif->flags & VIFF_TUNNEL) {
1308 ip_encap(skb, vif->local, vif->remote);
1309 /* FIXME: extra output firewall step used to be here. --RR */
2f4c02d4
PE
1310 vif->dev->stats.tx_packets++;
1311 vif->dev->stats.tx_bytes += skb->len;
1da177e4
LT
1312 }
1313
1314 IPCB(skb)->flags |= IPSKB_FORWARDED;
1315
1316 /*
1317 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1318 * not only before forwarding, but after forwarding on all output
1319 * interfaces. It is clear, if mrouter runs a multicasting
1320 * program, it should receive packets not depending to what interface
1321 * program is joined.
1322 * If we will not make it, the program will have to join on all
1323 * interfaces. On the other hand, multihoming host (or router, but
1324 * not mrouter) cannot join to more than one interface - it will
1325 * result in receiving multiple packets.
1326 */
6e23ae2a 1327 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1da177e4
LT
1328 ipmr_forward_finish);
1329 return;
1330
1331out_free:
1332 kfree_skb(skb);
1333 return;
1334}
1335
1336static int ipmr_find_vif(struct net_device *dev)
1337{
4feb88e5 1338 struct net *net = dev_net(dev);
1da177e4 1339 int ct;
4feb88e5
BT
1340 for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1341 if (net->ipv4.vif_table[ct].dev == dev)
1da177e4
LT
1342 break;
1343 }
1344 return ct;
1345}
1346
1347/* "local" means that we should preserve one skb (for local delivery) */
1348
1349static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1350{
1351 int psend = -1;
1352 int vif, ct;
4feb88e5 1353 struct net *net = mfc_net(cache);
1da177e4
LT
1354
1355 vif = cache->mfc_parent;
1356 cache->mfc_un.res.pkt++;
1357 cache->mfc_un.res.bytes += skb->len;
1358
1359 /*
1360 * Wrong interface: drop packet and (maybe) send PIM assert.
1361 */
4feb88e5 1362 if (net->ipv4.vif_table[vif].dev != skb->dev) {
1da177e4
LT
1363 int true_vifi;
1364
511c3f92 1365 if (skb_rtable(skb)->fl.iif == 0) {
1da177e4
LT
1366 /* It is our own packet, looped back.
1367 Very complicated situation...
1368
1369 The best workaround until routing daemons will be
1370 fixed is not to redistribute packet, if it was
1371 send through wrong interface. It means, that
1372 multicast applications WILL NOT work for
1373 (S,G), which have default multicast route pointing
1374 to wrong oif. In any case, it is not a good
1375 idea to use multicasting applications on router.
1376 */
1377 goto dont_forward;
1378 }
1379
1380 cache->mfc_un.res.wrong_if++;
1381 true_vifi = ipmr_find_vif(skb->dev);
1382
4feb88e5 1383 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1da177e4
LT
1384 /* pimsm uses asserts, when switching from RPT to SPT,
1385 so that we cannot check that packet arrived on an oif.
1386 It is bad, but otherwise we would need to move pretty
1387 large chunk of pimd to kernel. Ough... --ANK
1388 */
4feb88e5 1389 (net->ipv4.mroute_do_pim ||
6f9374a9 1390 cache->mfc_un.res.ttls[true_vifi] < 255) &&
e905a9ed 1391 time_after(jiffies,
1da177e4
LT
1392 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1393 cache->mfc_un.res.last_assert = jiffies;
4feb88e5 1394 ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1da177e4
LT
1395 }
1396 goto dont_forward;
1397 }
1398
4feb88e5
BT
1399 net->ipv4.vif_table[vif].pkt_in++;
1400 net->ipv4.vif_table[vif].bytes_in += skb->len;
1da177e4
LT
1401
1402 /*
1403 * Forward the frame
1404 */
1405 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
eddc9ec5 1406 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1da177e4
LT
1407 if (psend != -1) {
1408 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1409 if (skb2)
1410 ipmr_queue_xmit(skb2, cache, psend);
1411 }
c354e124 1412 psend = ct;
1da177e4
LT
1413 }
1414 }
1415 if (psend != -1) {
1416 if (local) {
1417 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1418 if (skb2)
1419 ipmr_queue_xmit(skb2, cache, psend);
1420 } else {
1421 ipmr_queue_xmit(skb, cache, psend);
1422 return 0;
1423 }
1424 }
1425
1426dont_forward:
1427 if (!local)
1428 kfree_skb(skb);
1429 return 0;
1430}
1431
1432
1433/*
1434 * Multicast packets for forwarding arrive here
1435 */
1436
1437int ip_mr_input(struct sk_buff *skb)
1438{
1439 struct mfc_cache *cache;
4feb88e5 1440 struct net *net = dev_net(skb->dev);
511c3f92 1441 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1da177e4
LT
1442
1443 /* Packet is looped back after forward, it should not be
1444 forwarded second time, but still can be delivered locally.
1445 */
1446 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1447 goto dont_forward;
1448
1449 if (!local) {
1450 if (IPCB(skb)->opt.router_alert) {
1451 if (ip_call_ra_chain(skb))
1452 return 0;
eddc9ec5 1453 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1da177e4
LT
1454 /* IGMPv1 (and broken IGMPv2 implementations sort of
1455 Cisco IOS <= 11.2(8)) do not put router alert
1456 option to IGMP packets destined to routable
1457 groups. It is very bad, because it means
1458 that we can forward NO IGMP messages.
1459 */
1460 read_lock(&mrt_lock);
4feb88e5 1461 if (net->ipv4.mroute_sk) {
2715bcf9 1462 nf_reset(skb);
4feb88e5 1463 raw_rcv(net->ipv4.mroute_sk, skb);
1da177e4
LT
1464 read_unlock(&mrt_lock);
1465 return 0;
1466 }
1467 read_unlock(&mrt_lock);
1468 }
1469 }
1470
1471 read_lock(&mrt_lock);
4feb88e5 1472 cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1da177e4
LT
1473
1474 /*
1475 * No usable cache entry
1476 */
c354e124 1477 if (cache == NULL) {
1da177e4
LT
1478 int vif;
1479
1480 if (local) {
1481 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1482 ip_local_deliver(skb);
1483 if (skb2 == NULL) {
1484 read_unlock(&mrt_lock);
1485 return -ENOBUFS;
1486 }
1487 skb = skb2;
1488 }
1489
1490 vif = ipmr_find_vif(skb->dev);
1491 if (vif >= 0) {
4feb88e5 1492 int err = ipmr_cache_unresolved(net, vif, skb);
1da177e4
LT
1493 read_unlock(&mrt_lock);
1494
1495 return err;
1496 }
1497 read_unlock(&mrt_lock);
1498 kfree_skb(skb);
1499 return -ENODEV;
1500 }
1501
1502 ip_mr_forward(skb, cache, local);
1503
1504 read_unlock(&mrt_lock);
1505
1506 if (local)
1507 return ip_local_deliver(skb);
1508
1509 return 0;
1510
1511dont_forward:
1512 if (local)
1513 return ip_local_deliver(skb);
1514 kfree_skb(skb);
1515 return 0;
1516}
1517
b1879204
IJ
1518#ifdef CONFIG_IP_PIMSM
1519static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1da177e4 1520{
b1879204
IJ
1521 struct net_device *reg_dev = NULL;
1522 struct iphdr *encap;
4feb88e5 1523 struct net *net = dev_net(skb->dev);
1da177e4 1524
b1879204 1525 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1da177e4
LT
1526 /*
1527 Check that:
1528 a. packet is really destinted to a multicast group
1529 b. packet is not a NULL-REGISTER
1530 c. packet is not truncated
1531 */
f97c1e0c 1532 if (!ipv4_is_multicast(encap->daddr) ||
1da177e4 1533 encap->tot_len == 0 ||
b1879204
IJ
1534 ntohs(encap->tot_len) + pimlen > skb->len)
1535 return 1;
1da177e4
LT
1536
1537 read_lock(&mrt_lock);
4feb88e5
BT
1538 if (net->ipv4.mroute_reg_vif_num >= 0)
1539 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1da177e4
LT
1540 if (reg_dev)
1541 dev_hold(reg_dev);
1542 read_unlock(&mrt_lock);
1543
e905a9ed 1544 if (reg_dev == NULL)
b1879204 1545 return 1;
1da177e4 1546
b0e380b1 1547 skb->mac_header = skb->network_header;
1da177e4 1548 skb_pull(skb, (u8*)encap - skb->data);
31c7711b 1549 skb_reset_network_header(skb);
1da177e4 1550 skb->dev = reg_dev;
1da177e4
LT
1551 skb->protocol = htons(ETH_P_IP);
1552 skb->ip_summed = 0;
1553 skb->pkt_type = PACKET_HOST;
adf30907 1554 skb_dst_drop(skb);
cf3677ae
PE
1555 reg_dev->stats.rx_bytes += skb->len;
1556 reg_dev->stats.rx_packets++;
1da177e4
LT
1557 nf_reset(skb);
1558 netif_rx(skb);
1559 dev_put(reg_dev);
b1879204 1560
1da177e4 1561 return 0;
b1879204
IJ
1562}
1563#endif
1564
1565#ifdef CONFIG_IP_PIMSM_V1
1566/*
1567 * Handle IGMP messages of PIMv1
1568 */
1569
1570int pim_rcv_v1(struct sk_buff * skb)
1571{
1572 struct igmphdr *pim;
4feb88e5 1573 struct net *net = dev_net(skb->dev);
b1879204
IJ
1574
1575 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1576 goto drop;
1577
1578 pim = igmp_hdr(skb);
1579
4feb88e5 1580 if (!net->ipv4.mroute_do_pim ||
b1879204
IJ
1581 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1582 goto drop;
1583
1584 if (__pim_rcv(skb, sizeof(*pim))) {
1585drop:
1586 kfree_skb(skb);
1587 }
1da177e4
LT
1588 return 0;
1589}
1590#endif
1591
1592#ifdef CONFIG_IP_PIMSM_V2
1593static int pim_rcv(struct sk_buff * skb)
1594{
1595 struct pimreghdr *pim;
1da177e4 1596
b1879204 1597 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1da177e4
LT
1598 goto drop;
1599
9c70220b 1600 pim = (struct pimreghdr *)skb_transport_header(skb);
e905a9ed 1601 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1da177e4 1602 (pim->flags&PIM_NULL_REGISTER) ||
e905a9ed 1603 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
d3bc23e7 1604 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1da177e4
LT
1605 goto drop;
1606
b1879204
IJ
1607 if (__pim_rcv(skb, sizeof(*pim))) {
1608drop:
1609 kfree_skb(skb);
1610 }
1da177e4
LT
1611 return 0;
1612}
1613#endif
1614
1615static int
1616ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1617{
1618 int ct;
1619 struct rtnexthop *nhp;
4feb88e5 1620 struct net *net = mfc_net(c);
27a884dc 1621 u8 *b = skb_tail_pointer(skb);
1da177e4
LT
1622 struct rtattr *mp_head;
1623
7438189b
ND
1624 /* If cache is unresolved, don't try to parse IIF and OIF */
1625 if (c->mfc_parent > MAXVIFS)
1626 return -ENOENT;
1627
1628 if (VIF_EXISTS(net, c->mfc_parent))
1629 RTA_PUT(skb, RTA_IIF, 4, &net->ipv4.vif_table[c->mfc_parent].dev->ifindex);
1da177e4 1630
c354e124 1631 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1da177e4
LT
1632
1633 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
7438189b 1634 if (VIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) {
1da177e4
LT
1635 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1636 goto rtattr_failure;
c354e124 1637 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1da177e4
LT
1638 nhp->rtnh_flags = 0;
1639 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
4feb88e5 1640 nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1da177e4
LT
1641 nhp->rtnh_len = sizeof(*nhp);
1642 }
1643 }
1644 mp_head->rta_type = RTA_MULTIPATH;
27a884dc 1645 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1da177e4
LT
1646 rtm->rtm_type = RTN_MULTICAST;
1647 return 1;
1648
1649rtattr_failure:
dc5fc579 1650 nlmsg_trim(skb, b);
1da177e4
LT
1651 return -EMSGSIZE;
1652}
1653
4feb88e5
BT
1654int ipmr_get_route(struct net *net,
1655 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1da177e4
LT
1656{
1657 int err;
1658 struct mfc_cache *cache;
511c3f92 1659 struct rtable *rt = skb_rtable(skb);
1da177e4
LT
1660
1661 read_lock(&mrt_lock);
4feb88e5 1662 cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1da177e4 1663
c354e124 1664 if (cache == NULL) {
72287490 1665 struct sk_buff *skb2;
eddc9ec5 1666 struct iphdr *iph;
1da177e4
LT
1667 struct net_device *dev;
1668 int vif;
1669
1670 if (nowait) {
1671 read_unlock(&mrt_lock);
1672 return -EAGAIN;
1673 }
1674
1675 dev = skb->dev;
1676 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1677 read_unlock(&mrt_lock);
1678 return -ENODEV;
1679 }
72287490
AK
1680 skb2 = skb_clone(skb, GFP_ATOMIC);
1681 if (!skb2) {
1682 read_unlock(&mrt_lock);
1683 return -ENOMEM;
1684 }
1685
e2d1bca7
ACM
1686 skb_push(skb2, sizeof(struct iphdr));
1687 skb_reset_network_header(skb2);
eddc9ec5
ACM
1688 iph = ip_hdr(skb2);
1689 iph->ihl = sizeof(struct iphdr) >> 2;
1690 iph->saddr = rt->rt_src;
1691 iph->daddr = rt->rt_dst;
1692 iph->version = 0;
4feb88e5 1693 err = ipmr_cache_unresolved(net, vif, skb2);
1da177e4
LT
1694 read_unlock(&mrt_lock);
1695 return err;
1696 }
1697
1698 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1699 cache->mfc_flags |= MFC_NOTIFY;
1700 err = ipmr_fill_mroute(skb, cache, rtm);
1701 read_unlock(&mrt_lock);
1702 return err;
1703}
1704
e905a9ed 1705#ifdef CONFIG_PROC_FS
1da177e4
LT
1706/*
1707 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1708 */
1709struct ipmr_vif_iter {
f6bb4514 1710 struct seq_net_private p;
1da177e4
LT
1711 int ct;
1712};
1713
f6bb4514
BT
1714static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1715 struct ipmr_vif_iter *iter,
1da177e4
LT
1716 loff_t pos)
1717{
f6bb4514
BT
1718 for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1719 if (!VIF_EXISTS(net, iter->ct))
1da177e4 1720 continue;
e905a9ed 1721 if (pos-- == 0)
f6bb4514 1722 return &net->ipv4.vif_table[iter->ct];
1da177e4
LT
1723 }
1724 return NULL;
1725}
1726
1727static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
ba93ef74 1728 __acquires(mrt_lock)
1da177e4 1729{
f6bb4514
BT
1730 struct net *net = seq_file_net(seq);
1731
1da177e4 1732 read_lock(&mrt_lock);
f6bb4514 1733 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
1734 : SEQ_START_TOKEN;
1735}
1736
1737static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1738{
1739 struct ipmr_vif_iter *iter = seq->private;
f6bb4514 1740 struct net *net = seq_file_net(seq);
1da177e4
LT
1741
1742 ++*pos;
1743 if (v == SEQ_START_TOKEN)
f6bb4514 1744 return ipmr_vif_seq_idx(net, iter, 0);
e905a9ed 1745
f6bb4514
BT
1746 while (++iter->ct < net->ipv4.maxvif) {
1747 if (!VIF_EXISTS(net, iter->ct))
1da177e4 1748 continue;
f6bb4514 1749 return &net->ipv4.vif_table[iter->ct];
1da177e4
LT
1750 }
1751 return NULL;
1752}
1753
1754static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
ba93ef74 1755 __releases(mrt_lock)
1da177e4
LT
1756{
1757 read_unlock(&mrt_lock);
1758}
1759
1760static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1761{
f6bb4514
BT
1762 struct net *net = seq_file_net(seq);
1763
1da177e4 1764 if (v == SEQ_START_TOKEN) {
e905a9ed 1765 seq_puts(seq,
1da177e4
LT
1766 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1767 } else {
1768 const struct vif_device *vif = v;
1769 const char *name = vif->dev ? vif->dev->name : "none";
1770
1771 seq_printf(seq,
1772 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
f6bb4514 1773 vif - net->ipv4.vif_table,
e905a9ed 1774 name, vif->bytes_in, vif->pkt_in,
1da177e4
LT
1775 vif->bytes_out, vif->pkt_out,
1776 vif->flags, vif->local, vif->remote);
1777 }
1778 return 0;
1779}
1780
f690808e 1781static const struct seq_operations ipmr_vif_seq_ops = {
1da177e4
LT
1782 .start = ipmr_vif_seq_start,
1783 .next = ipmr_vif_seq_next,
1784 .stop = ipmr_vif_seq_stop,
1785 .show = ipmr_vif_seq_show,
1786};
1787
1788static int ipmr_vif_open(struct inode *inode, struct file *file)
1789{
f6bb4514
BT
1790 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1791 sizeof(struct ipmr_vif_iter));
1da177e4
LT
1792}
1793
9a32144e 1794static const struct file_operations ipmr_vif_fops = {
1da177e4
LT
1795 .owner = THIS_MODULE,
1796 .open = ipmr_vif_open,
1797 .read = seq_read,
1798 .llseek = seq_lseek,
f6bb4514 1799 .release = seq_release_net,
1da177e4
LT
1800};
1801
1802struct ipmr_mfc_iter {
f6bb4514 1803 struct seq_net_private p;
1da177e4
LT
1804 struct mfc_cache **cache;
1805 int ct;
1806};
1807
1808
f6bb4514
BT
1809static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1810 struct ipmr_mfc_iter *it, loff_t pos)
1da177e4
LT
1811{
1812 struct mfc_cache *mfc;
1813
f6bb4514 1814 it->cache = net->ipv4.mfc_cache_array;
1da177e4 1815 read_lock(&mrt_lock);
e905a9ed 1816 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
f6bb4514 1817 for (mfc = net->ipv4.mfc_cache_array[it->ct];
2bb8b26c 1818 mfc; mfc = mfc->next)
e905a9ed 1819 if (pos-- == 0)
1da177e4
LT
1820 return mfc;
1821 read_unlock(&mrt_lock);
1822
1823 it->cache = &mfc_unres_queue;
1824 spin_lock_bh(&mfc_unres_lock);
132adf54 1825 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
f6bb4514
BT
1826 if (net_eq(mfc_net(mfc), net) &&
1827 pos-- == 0)
1da177e4
LT
1828 return mfc;
1829 spin_unlock_bh(&mfc_unres_lock);
1830
1831 it->cache = NULL;
1832 return NULL;
1833}
1834
1835
1836static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1837{
1838 struct ipmr_mfc_iter *it = seq->private;
f6bb4514
BT
1839 struct net *net = seq_file_net(seq);
1840
1da177e4
LT
1841 it->cache = NULL;
1842 it->ct = 0;
f6bb4514 1843 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1da177e4
LT
1844 : SEQ_START_TOKEN;
1845}
1846
1847static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1848{
1849 struct mfc_cache *mfc = v;
1850 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 1851 struct net *net = seq_file_net(seq);
1da177e4
LT
1852
1853 ++*pos;
1854
1855 if (v == SEQ_START_TOKEN)
f6bb4514 1856 return ipmr_mfc_seq_idx(net, seq->private, 0);
1da177e4
LT
1857
1858 if (mfc->next)
1859 return mfc->next;
e905a9ed
YH
1860
1861 if (it->cache == &mfc_unres_queue)
1da177e4
LT
1862 goto end_of_list;
1863
f6bb4514 1864 BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1da177e4
LT
1865
1866 while (++it->ct < MFC_LINES) {
f6bb4514 1867 mfc = net->ipv4.mfc_cache_array[it->ct];
1da177e4
LT
1868 if (mfc)
1869 return mfc;
1870 }
1871
1872 /* exhausted cache_array, show unresolved */
1873 read_unlock(&mrt_lock);
1874 it->cache = &mfc_unres_queue;
1875 it->ct = 0;
e905a9ed 1876
1da177e4
LT
1877 spin_lock_bh(&mfc_unres_lock);
1878 mfc = mfc_unres_queue;
f6bb4514
BT
1879 while (mfc && !net_eq(mfc_net(mfc), net))
1880 mfc = mfc->next;
e905a9ed 1881 if (mfc)
1da177e4
LT
1882 return mfc;
1883
1884 end_of_list:
1885 spin_unlock_bh(&mfc_unres_lock);
1886 it->cache = NULL;
1887
1888 return NULL;
1889}
1890
1891static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1892{
1893 struct ipmr_mfc_iter *it = seq->private;
f6bb4514 1894 struct net *net = seq_file_net(seq);
1da177e4
LT
1895
1896 if (it->cache == &mfc_unres_queue)
1897 spin_unlock_bh(&mfc_unres_lock);
f6bb4514 1898 else if (it->cache == net->ipv4.mfc_cache_array)
1da177e4
LT
1899 read_unlock(&mrt_lock);
1900}
1901
1902static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1903{
1904 int n;
f6bb4514 1905 struct net *net = seq_file_net(seq);
1da177e4
LT
1906
1907 if (v == SEQ_START_TOKEN) {
e905a9ed 1908 seq_puts(seq,
1da177e4
LT
1909 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1910 } else {
1911 const struct mfc_cache *mfc = v;
1912 const struct ipmr_mfc_iter *it = seq->private;
e905a9ed 1913
999890b2 1914 seq_printf(seq, "%08lX %08lX %-3hd",
1da177e4
LT
1915 (unsigned long) mfc->mfc_mcastgrp,
1916 (unsigned long) mfc->mfc_origin,
1ea472e2 1917 mfc->mfc_parent);
1da177e4
LT
1918
1919 if (it->cache != &mfc_unres_queue) {
1ea472e2
BT
1920 seq_printf(seq, " %8lu %8lu %8lu",
1921 mfc->mfc_un.res.pkt,
1922 mfc->mfc_un.res.bytes,
1923 mfc->mfc_un.res.wrong_if);
132adf54
SH
1924 for (n = mfc->mfc_un.res.minvif;
1925 n < mfc->mfc_un.res.maxvif; n++ ) {
f6bb4514 1926 if (VIF_EXISTS(net, n) &&
cf958ae3
BT
1927 mfc->mfc_un.res.ttls[n] < 255)
1928 seq_printf(seq,
e905a9ed 1929 " %2d:%-3d",
1da177e4
LT
1930 n, mfc->mfc_un.res.ttls[n]);
1931 }
1ea472e2
BT
1932 } else {
1933 /* unresolved mfc_caches don't contain
1934 * pkt, bytes and wrong_if values
1935 */
1936 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1da177e4
LT
1937 }
1938 seq_putc(seq, '\n');
1939 }
1940 return 0;
1941}
1942
f690808e 1943static const struct seq_operations ipmr_mfc_seq_ops = {
1da177e4
LT
1944 .start = ipmr_mfc_seq_start,
1945 .next = ipmr_mfc_seq_next,
1946 .stop = ipmr_mfc_seq_stop,
1947 .show = ipmr_mfc_seq_show,
1948};
1949
1950static int ipmr_mfc_open(struct inode *inode, struct file *file)
1951{
f6bb4514
BT
1952 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1953 sizeof(struct ipmr_mfc_iter));
1da177e4
LT
1954}
1955
9a32144e 1956static const struct file_operations ipmr_mfc_fops = {
1da177e4
LT
1957 .owner = THIS_MODULE,
1958 .open = ipmr_mfc_open,
1959 .read = seq_read,
1960 .llseek = seq_lseek,
f6bb4514 1961 .release = seq_release_net,
1da177e4 1962};
e905a9ed 1963#endif
1da177e4
LT
1964
1965#ifdef CONFIG_IP_PIMSM_V2
32613090 1966static const struct net_protocol pim_protocol = {
1da177e4 1967 .handler = pim_rcv,
403dbb97 1968 .netns_ok = 1,
1da177e4
LT
1969};
1970#endif
1971
1972
1973/*
1974 * Setup for IP multicast routing
1975 */
cf958ae3
BT
1976static int __net_init ipmr_net_init(struct net *net)
1977{
1978 int err = 0;
1979
1980 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1981 GFP_KERNEL);
1982 if (!net->ipv4.vif_table) {
1983 err = -ENOMEM;
1984 goto fail;
1985 }
2bb8b26c
BT
1986
1987 /* Forwarding cache */
1988 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1989 sizeof(struct mfc_cache *),
1990 GFP_KERNEL);
1991 if (!net->ipv4.mfc_cache_array) {
1992 err = -ENOMEM;
1993 goto fail_mfc_cache;
1994 }
6c5143db
BT
1995
1996#ifdef CONFIG_IP_PIMSM
1997 net->ipv4.mroute_reg_vif_num = -1;
1998#endif
f6bb4514
BT
1999
2000#ifdef CONFIG_PROC_FS
2001 err = -ENOMEM;
2002 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2003 goto proc_vif_fail;
2004 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2005 goto proc_cache_fail;
2006#endif
2bb8b26c
BT
2007 return 0;
2008
f6bb4514
BT
2009#ifdef CONFIG_PROC_FS
2010proc_cache_fail:
2011 proc_net_remove(net, "ip_mr_vif");
2012proc_vif_fail:
2013 kfree(net->ipv4.mfc_cache_array);
2014#endif
2bb8b26c
BT
2015fail_mfc_cache:
2016 kfree(net->ipv4.vif_table);
cf958ae3
BT
2017fail:
2018 return err;
2019}
2020
2021static void __net_exit ipmr_net_exit(struct net *net)
2022{
f6bb4514
BT
2023#ifdef CONFIG_PROC_FS
2024 proc_net_remove(net, "ip_mr_cache");
2025 proc_net_remove(net, "ip_mr_vif");
2026#endif
2bb8b26c 2027 kfree(net->ipv4.mfc_cache_array);
cf958ae3
BT
2028 kfree(net->ipv4.vif_table);
2029}
2030
2031static struct pernet_operations ipmr_net_ops = {
2032 .init = ipmr_net_init,
2033 .exit = ipmr_net_exit,
2034};
e905a9ed 2035
03d2f897 2036int __init ip_mr_init(void)
1da177e4 2037{
03d2f897
WC
2038 int err;
2039
1da177e4
LT
2040 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2041 sizeof(struct mfc_cache),
e5d679f3 2042 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
20c2df83 2043 NULL);
03d2f897
WC
2044 if (!mrt_cachep)
2045 return -ENOMEM;
2046
cf958ae3
BT
2047 err = register_pernet_subsys(&ipmr_net_ops);
2048 if (err)
2049 goto reg_pernet_fail;
2050
b24b8a24 2051 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
03d2f897
WC
2052 err = register_netdevice_notifier(&ip_mr_notifier);
2053 if (err)
2054 goto reg_notif_fail;
403dbb97
TG
2055#ifdef CONFIG_IP_PIMSM_V2
2056 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2057 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2058 err = -EAGAIN;
2059 goto add_proto_fail;
2060 }
2061#endif
03d2f897 2062 return 0;
f6bb4514 2063
403dbb97
TG
2064#ifdef CONFIG_IP_PIMSM_V2
2065add_proto_fail:
2066 unregister_netdevice_notifier(&ip_mr_notifier);
2067#endif
c3e38896
BT
2068reg_notif_fail:
2069 del_timer(&ipmr_expire_timer);
cf958ae3
BT
2070 unregister_pernet_subsys(&ipmr_net_ops);
2071reg_pernet_fail:
c3e38896 2072 kmem_cache_destroy(mrt_cachep);
03d2f897 2073 return err;
1da177e4 2074}