]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - net/ipv4/ipmr.c
UBUNTU: Ubuntu-4.15.0-96.97
[mirror_ubuntu-bionic-kernel.git] / net / ipv4 / ipmr.c
1 /*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Fixes:
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
22 * overflow.
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requirement to work with older peers.
26 *
27 */
28
29 #include <linux/uaccess.h>
30 #include <linux/types.h>
31 #include <linux/capability.h>
32 #include <linux/errno.h>
33 #include <linux/timer.h>
34 #include <linux/mm.h>
35 #include <linux/kernel.h>
36 #include <linux/fcntl.h>
37 #include <linux/stat.h>
38 #include <linux/socket.h>
39 #include <linux/in.h>
40 #include <linux/inet.h>
41 #include <linux/netdevice.h>
42 #include <linux/inetdevice.h>
43 #include <linux/igmp.h>
44 #include <linux/proc_fs.h>
45 #include <linux/seq_file.h>
46 #include <linux/mroute.h>
47 #include <linux/init.h>
48 #include <linux/if_ether.h>
49 #include <linux/slab.h>
50 #include <net/net_namespace.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/route.h>
55 #include <net/sock.h>
56 #include <net/icmp.h>
57 #include <net/udp.h>
58 #include <net/raw.h>
59 #include <linux/notifier.h>
60 #include <linux/if_arp.h>
61 #include <linux/netfilter_ipv4.h>
62 #include <linux/compat.h>
63 #include <linux/export.h>
64 #include <net/ip_tunnels.h>
65 #include <net/checksum.h>
66 #include <net/netlink.h>
67 #include <net/fib_rules.h>
68 #include <linux/netconf.h>
69 #include <net/nexthop.h>
70 #include <net/switchdev.h>
71
72 #include <linux/nospec.h>
73
74 struct ipmr_rule {
75 struct fib_rule common;
76 };
77
78 struct ipmr_result {
79 struct mr_table *mrt;
80 };
81
82 /* Big lock, protecting vif table, mrt cache and mroute socket state.
83 * Note that the changes are semaphored via rtnl_lock.
84 */
85
86 static DEFINE_RWLOCK(mrt_lock);
87
88 /* Multicast router control variables */
89
90 /* Special spinlock for queue of unresolved entries */
91 static DEFINE_SPINLOCK(mfc_unres_lock);
92
93 /* We return to original Alan's scheme. Hash table of resolved
94 * entries is changed only in process context and protected
95 * with weak lock mrt_lock. Queue of unresolved entries is protected
96 * with strong spinlock mfc_unres_lock.
97 *
98 * In this case data path is free of exclusive locks at all.
99 */
100
101 static struct kmem_cache *mrt_cachep __read_mostly;
102
103 static struct mr_table *ipmr_new_table(struct net *net, u32 id);
104 static void ipmr_free_table(struct mr_table *mrt);
105
106 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
107 struct net_device *dev, struct sk_buff *skb,
108 struct mfc_cache *cache, int local);
109 static int ipmr_cache_report(struct mr_table *mrt,
110 struct sk_buff *pkt, vifi_t vifi, int assert);
111 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
112 struct mfc_cache *c, struct rtmsg *rtm);
113 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
114 int cmd);
115 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
116 static void mroute_clean_tables(struct mr_table *mrt, bool all);
117 static void ipmr_expire_process(struct timer_list *t);
118
119 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
120 #define ipmr_for_each_table(mrt, net) \
121 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
122
123 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
124 {
125 struct mr_table *mrt;
126
127 ipmr_for_each_table(mrt, net) {
128 if (mrt->id == id)
129 return mrt;
130 }
131 return NULL;
132 }
133
134 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
135 struct mr_table **mrt)
136 {
137 int err;
138 struct ipmr_result res;
139 struct fib_lookup_arg arg = {
140 .result = &res,
141 .flags = FIB_LOOKUP_NOREF,
142 };
143
144 /* update flow if oif or iif point to device enslaved to l3mdev */
145 l3mdev_update_flow(net, flowi4_to_flowi(flp4));
146
147 err = fib_rules_lookup(net->ipv4.mr_rules_ops,
148 flowi4_to_flowi(flp4), 0, &arg);
149 if (err < 0)
150 return err;
151 *mrt = res.mrt;
152 return 0;
153 }
154
155 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
156 int flags, struct fib_lookup_arg *arg)
157 {
158 struct ipmr_result *res = arg->result;
159 struct mr_table *mrt;
160
161 switch (rule->action) {
162 case FR_ACT_TO_TBL:
163 break;
164 case FR_ACT_UNREACHABLE:
165 return -ENETUNREACH;
166 case FR_ACT_PROHIBIT:
167 return -EACCES;
168 case FR_ACT_BLACKHOLE:
169 default:
170 return -EINVAL;
171 }
172
173 arg->table = fib_rule_get_table(rule, arg);
174
175 mrt = ipmr_get_table(rule->fr_net, arg->table);
176 if (!mrt)
177 return -EAGAIN;
178 res->mrt = mrt;
179 return 0;
180 }
181
182 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
183 {
184 return 1;
185 }
186
187 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
188 FRA_GENERIC_POLICY,
189 };
190
191 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
192 struct fib_rule_hdr *frh, struct nlattr **tb)
193 {
194 return 0;
195 }
196
197 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
198 struct nlattr **tb)
199 {
200 return 1;
201 }
202
203 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
204 struct fib_rule_hdr *frh)
205 {
206 frh->dst_len = 0;
207 frh->src_len = 0;
208 frh->tos = 0;
209 return 0;
210 }
211
212 static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = {
213 .family = RTNL_FAMILY_IPMR,
214 .rule_size = sizeof(struct ipmr_rule),
215 .addr_size = sizeof(u32),
216 .action = ipmr_rule_action,
217 .match = ipmr_rule_match,
218 .configure = ipmr_rule_configure,
219 .compare = ipmr_rule_compare,
220 .fill = ipmr_rule_fill,
221 .nlgroup = RTNLGRP_IPV4_RULE,
222 .policy = ipmr_rule_policy,
223 .owner = THIS_MODULE,
224 };
225
226 static int __net_init ipmr_rules_init(struct net *net)
227 {
228 struct fib_rules_ops *ops;
229 struct mr_table *mrt;
230 int err;
231
232 ops = fib_rules_register(&ipmr_rules_ops_template, net);
233 if (IS_ERR(ops))
234 return PTR_ERR(ops);
235
236 INIT_LIST_HEAD(&net->ipv4.mr_tables);
237
238 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
239 if (IS_ERR(mrt)) {
240 err = PTR_ERR(mrt);
241 goto err1;
242 }
243
244 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
245 if (err < 0)
246 goto err2;
247
248 net->ipv4.mr_rules_ops = ops;
249 return 0;
250
251 err2:
252 ipmr_free_table(mrt);
253 err1:
254 fib_rules_unregister(ops);
255 return err;
256 }
257
258 static void __net_exit ipmr_rules_exit(struct net *net)
259 {
260 struct mr_table *mrt, *next;
261
262 rtnl_lock();
263 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
264 list_del(&mrt->list);
265 ipmr_free_table(mrt);
266 }
267 fib_rules_unregister(net->ipv4.mr_rules_ops);
268 rtnl_unlock();
269 }
270
271 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
272 {
273 return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR);
274 }
275
276 static unsigned int ipmr_rules_seq_read(struct net *net)
277 {
278 return fib_rules_seq_read(net, RTNL_FAMILY_IPMR);
279 }
280
281 bool ipmr_rule_default(const struct fib_rule *rule)
282 {
283 return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT;
284 }
285 EXPORT_SYMBOL(ipmr_rule_default);
286 #else
287 #define ipmr_for_each_table(mrt, net) \
288 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
289
290 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
291 {
292 return net->ipv4.mrt;
293 }
294
295 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
296 struct mr_table **mrt)
297 {
298 *mrt = net->ipv4.mrt;
299 return 0;
300 }
301
302 static int __net_init ipmr_rules_init(struct net *net)
303 {
304 struct mr_table *mrt;
305
306 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
307 if (IS_ERR(mrt))
308 return PTR_ERR(mrt);
309 net->ipv4.mrt = mrt;
310 return 0;
311 }
312
313 static void __net_exit ipmr_rules_exit(struct net *net)
314 {
315 rtnl_lock();
316 ipmr_free_table(net->ipv4.mrt);
317 net->ipv4.mrt = NULL;
318 rtnl_unlock();
319 }
320
321 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
322 {
323 return 0;
324 }
325
326 static unsigned int ipmr_rules_seq_read(struct net *net)
327 {
328 return 0;
329 }
330
331 bool ipmr_rule_default(const struct fib_rule *rule)
332 {
333 return true;
334 }
335 EXPORT_SYMBOL(ipmr_rule_default);
336 #endif
337
338 static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
339 const void *ptr)
340 {
341 const struct mfc_cache_cmp_arg *cmparg = arg->key;
342 struct mfc_cache *c = (struct mfc_cache *)ptr;
343
344 return cmparg->mfc_mcastgrp != c->mfc_mcastgrp ||
345 cmparg->mfc_origin != c->mfc_origin;
346 }
347
348 static const struct rhashtable_params ipmr_rht_params = {
349 .head_offset = offsetof(struct mfc_cache, mnode),
350 .key_offset = offsetof(struct mfc_cache, cmparg),
351 .key_len = sizeof(struct mfc_cache_cmp_arg),
352 .nelem_hint = 3,
353 .locks_mul = 1,
354 .obj_cmpfn = ipmr_hash_cmp,
355 .automatic_shrinking = true,
356 };
357
358 static struct mr_table *ipmr_new_table(struct net *net, u32 id)
359 {
360 struct mr_table *mrt;
361 int err;
362
363 /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */
364 if (id != RT_TABLE_DEFAULT && id >= 1000000000)
365 return ERR_PTR(-EINVAL);
366
367 mrt = ipmr_get_table(net, id);
368 if (mrt)
369 return mrt;
370
371 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
372 if (!mrt)
373 return ERR_PTR(-ENOMEM);
374 write_pnet(&mrt->net, net);
375 mrt->id = id;
376
377 err = rhltable_init(&mrt->mfc_hash, &ipmr_rht_params);
378 if (err) {
379 kfree(mrt);
380 return ERR_PTR(err);
381 }
382 INIT_LIST_HEAD(&mrt->mfc_cache_list);
383 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
384
385 timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
386
387 mrt->mroute_reg_vif_num = -1;
388 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
389 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
390 #endif
391 return mrt;
392 }
393
394 static void ipmr_free_table(struct mr_table *mrt)
395 {
396 del_timer_sync(&mrt->ipmr_expire_timer);
397 mroute_clean_tables(mrt, true);
398 rhltable_destroy(&mrt->mfc_hash);
399 kfree(mrt);
400 }
401
402 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
403
404 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
405 {
406 struct net *net = dev_net(dev);
407
408 dev_close(dev);
409
410 dev = __dev_get_by_name(net, "tunl0");
411 if (dev) {
412 const struct net_device_ops *ops = dev->netdev_ops;
413 struct ifreq ifr;
414 struct ip_tunnel_parm p;
415
416 memset(&p, 0, sizeof(p));
417 p.iph.daddr = v->vifc_rmt_addr.s_addr;
418 p.iph.saddr = v->vifc_lcl_addr.s_addr;
419 p.iph.version = 4;
420 p.iph.ihl = 5;
421 p.iph.protocol = IPPROTO_IPIP;
422 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
423 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
424
425 if (ops->ndo_do_ioctl) {
426 mm_segment_t oldfs = get_fs();
427
428 set_fs(KERNEL_DS);
429 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
430 set_fs(oldfs);
431 }
432 }
433 }
434
435 /* Initialize ipmr pimreg/tunnel in_device */
436 static bool ipmr_init_vif_indev(const struct net_device *dev)
437 {
438 struct in_device *in_dev;
439
440 ASSERT_RTNL();
441
442 in_dev = __in_dev_get_rtnl(dev);
443 if (!in_dev)
444 return false;
445 ipv4_devconf_setall(in_dev);
446 neigh_parms_data_state_setall(in_dev->arp_parms);
447 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
448
449 return true;
450 }
451
452 static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
453 {
454 struct net_device *dev;
455
456 dev = __dev_get_by_name(net, "tunl0");
457
458 if (dev) {
459 const struct net_device_ops *ops = dev->netdev_ops;
460 int err;
461 struct ifreq ifr;
462 struct ip_tunnel_parm p;
463
464 memset(&p, 0, sizeof(p));
465 p.iph.daddr = v->vifc_rmt_addr.s_addr;
466 p.iph.saddr = v->vifc_lcl_addr.s_addr;
467 p.iph.version = 4;
468 p.iph.ihl = 5;
469 p.iph.protocol = IPPROTO_IPIP;
470 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
471 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
472
473 if (ops->ndo_do_ioctl) {
474 mm_segment_t oldfs = get_fs();
475
476 set_fs(KERNEL_DS);
477 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
478 set_fs(oldfs);
479 } else {
480 err = -EOPNOTSUPP;
481 }
482 dev = NULL;
483
484 if (err == 0 &&
485 (dev = __dev_get_by_name(net, p.name)) != NULL) {
486 dev->flags |= IFF_MULTICAST;
487 if (!ipmr_init_vif_indev(dev))
488 goto failure;
489 if (dev_open(dev))
490 goto failure;
491 dev_hold(dev);
492 }
493 }
494 return dev;
495
496 failure:
497 unregister_netdevice(dev);
498 return NULL;
499 }
500
501 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
502 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
503 {
504 struct net *net = dev_net(dev);
505 struct mr_table *mrt;
506 struct flowi4 fl4 = {
507 .flowi4_oif = dev->ifindex,
508 .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
509 .flowi4_mark = skb->mark,
510 };
511 int err;
512
513 err = ipmr_fib_lookup(net, &fl4, &mrt);
514 if (err < 0) {
515 kfree_skb(skb);
516 return err;
517 }
518
519 read_lock(&mrt_lock);
520 dev->stats.tx_bytes += skb->len;
521 dev->stats.tx_packets++;
522 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
523 read_unlock(&mrt_lock);
524 kfree_skb(skb);
525 return NETDEV_TX_OK;
526 }
527
528 static int reg_vif_get_iflink(const struct net_device *dev)
529 {
530 return 0;
531 }
532
533 static const struct net_device_ops reg_vif_netdev_ops = {
534 .ndo_start_xmit = reg_vif_xmit,
535 .ndo_get_iflink = reg_vif_get_iflink,
536 };
537
538 static void reg_vif_setup(struct net_device *dev)
539 {
540 dev->type = ARPHRD_PIMREG;
541 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
542 dev->flags = IFF_NOARP;
543 dev->netdev_ops = &reg_vif_netdev_ops;
544 dev->needs_free_netdev = true;
545 dev->features |= NETIF_F_NETNS_LOCAL;
546 }
547
548 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
549 {
550 struct net_device *dev;
551 char name[IFNAMSIZ];
552
553 if (mrt->id == RT_TABLE_DEFAULT)
554 sprintf(name, "pimreg");
555 else
556 sprintf(name, "pimreg%u", mrt->id);
557
558 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
559
560 if (!dev)
561 return NULL;
562
563 dev_net_set(dev, net);
564
565 if (register_netdevice(dev)) {
566 free_netdev(dev);
567 return NULL;
568 }
569
570 if (!ipmr_init_vif_indev(dev))
571 goto failure;
572 if (dev_open(dev))
573 goto failure;
574
575 dev_hold(dev);
576
577 return dev;
578
579 failure:
580 unregister_netdevice(dev);
581 return NULL;
582 }
583
584 /* called with rcu_read_lock() */
585 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
586 unsigned int pimlen)
587 {
588 struct net_device *reg_dev = NULL;
589 struct iphdr *encap;
590
591 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
592 /* Check that:
593 * a. packet is really sent to a multicast group
594 * b. packet is not a NULL-REGISTER
595 * c. packet is not truncated
596 */
597 if (!ipv4_is_multicast(encap->daddr) ||
598 encap->tot_len == 0 ||
599 ntohs(encap->tot_len) + pimlen > skb->len)
600 return 1;
601
602 read_lock(&mrt_lock);
603 if (mrt->mroute_reg_vif_num >= 0)
604 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
605 read_unlock(&mrt_lock);
606
607 if (!reg_dev)
608 return 1;
609
610 skb->mac_header = skb->network_header;
611 skb_pull(skb, (u8 *)encap - skb->data);
612 skb_reset_network_header(skb);
613 skb->protocol = htons(ETH_P_IP);
614 skb->ip_summed = CHECKSUM_NONE;
615
616 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
617
618 netif_rx(skb);
619
620 return NET_RX_SUCCESS;
621 }
622 #else
623 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
624 {
625 return NULL;
626 }
627 #endif
628
629 static int call_ipmr_vif_entry_notifier(struct notifier_block *nb,
630 struct net *net,
631 enum fib_event_type event_type,
632 struct vif_device *vif,
633 vifi_t vif_index, u32 tb_id)
634 {
635 struct vif_entry_notifier_info info = {
636 .info = {
637 .family = RTNL_FAMILY_IPMR,
638 .net = net,
639 },
640 .dev = vif->dev,
641 .vif_index = vif_index,
642 .vif_flags = vif->flags,
643 .tb_id = tb_id,
644 };
645
646 return call_fib_notifier(nb, net, event_type, &info.info);
647 }
648
649 static int call_ipmr_vif_entry_notifiers(struct net *net,
650 enum fib_event_type event_type,
651 struct vif_device *vif,
652 vifi_t vif_index, u32 tb_id)
653 {
654 struct vif_entry_notifier_info info = {
655 .info = {
656 .family = RTNL_FAMILY_IPMR,
657 .net = net,
658 },
659 .dev = vif->dev,
660 .vif_index = vif_index,
661 .vif_flags = vif->flags,
662 .tb_id = tb_id,
663 };
664
665 ASSERT_RTNL();
666 net->ipv4.ipmr_seq++;
667 return call_fib_notifiers(net, event_type, &info.info);
668 }
669
670 static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
671 struct net *net,
672 enum fib_event_type event_type,
673 struct mfc_cache *mfc, u32 tb_id)
674 {
675 struct mfc_entry_notifier_info info = {
676 .info = {
677 .family = RTNL_FAMILY_IPMR,
678 .net = net,
679 },
680 .mfc = mfc,
681 .tb_id = tb_id
682 };
683
684 return call_fib_notifier(nb, net, event_type, &info.info);
685 }
686
687 static int call_ipmr_mfc_entry_notifiers(struct net *net,
688 enum fib_event_type event_type,
689 struct mfc_cache *mfc, u32 tb_id)
690 {
691 struct mfc_entry_notifier_info info = {
692 .info = {
693 .family = RTNL_FAMILY_IPMR,
694 .net = net,
695 },
696 .mfc = mfc,
697 .tb_id = tb_id
698 };
699
700 ASSERT_RTNL();
701 net->ipv4.ipmr_seq++;
702 return call_fib_notifiers(net, event_type, &info.info);
703 }
704
705 /**
706 * vif_delete - Delete a VIF entry
707 * @notify: Set to 1, if the caller is a notifier_call
708 */
709 static int vif_delete(struct mr_table *mrt, int vifi, int notify,
710 struct list_head *head)
711 {
712 struct net *net = read_pnet(&mrt->net);
713 struct vif_device *v;
714 struct net_device *dev;
715 struct in_device *in_dev;
716
717 if (vifi < 0 || vifi >= mrt->maxvif)
718 return -EADDRNOTAVAIL;
719
720 v = &mrt->vif_table[vifi];
721
722 if (VIF_EXISTS(mrt, vifi))
723 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi,
724 mrt->id);
725
726 write_lock_bh(&mrt_lock);
727 dev = v->dev;
728 v->dev = NULL;
729
730 if (!dev) {
731 write_unlock_bh(&mrt_lock);
732 return -EADDRNOTAVAIL;
733 }
734
735 if (vifi == mrt->mroute_reg_vif_num)
736 mrt->mroute_reg_vif_num = -1;
737
738 if (vifi + 1 == mrt->maxvif) {
739 int tmp;
740
741 for (tmp = vifi - 1; tmp >= 0; tmp--) {
742 if (VIF_EXISTS(mrt, tmp))
743 break;
744 }
745 mrt->maxvif = tmp+1;
746 }
747
748 write_unlock_bh(&mrt_lock);
749
750 dev_set_allmulti(dev, -1);
751
752 in_dev = __in_dev_get_rtnl(dev);
753 if (in_dev) {
754 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
755 inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
756 NETCONFA_MC_FORWARDING,
757 dev->ifindex, &in_dev->cnf);
758 ip_rt_multicast_event(in_dev);
759 }
760
761 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify)
762 unregister_netdevice_queue(dev, head);
763
764 dev_put(dev);
765 return 0;
766 }
767
768 static void ipmr_cache_free_rcu(struct rcu_head *head)
769 {
770 struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
771
772 kmem_cache_free(mrt_cachep, c);
773 }
774
775 void ipmr_cache_free(struct mfc_cache *c)
776 {
777 call_rcu(&c->rcu, ipmr_cache_free_rcu);
778 }
779 EXPORT_SYMBOL(ipmr_cache_free);
780
781 /* Destroy an unresolved cache entry, killing queued skbs
782 * and reporting error to netlink readers.
783 */
784 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
785 {
786 struct net *net = read_pnet(&mrt->net);
787 struct sk_buff *skb;
788 struct nlmsgerr *e;
789
790 atomic_dec(&mrt->cache_resolve_queue_len);
791
792 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
793 if (ip_hdr(skb)->version == 0) {
794 struct nlmsghdr *nlh = skb_pull(skb,
795 sizeof(struct iphdr));
796 nlh->nlmsg_type = NLMSG_ERROR;
797 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
798 skb_trim(skb, nlh->nlmsg_len);
799 e = nlmsg_data(nlh);
800 e->error = -ETIMEDOUT;
801 memset(&e->msg, 0, sizeof(e->msg));
802
803 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
804 } else {
805 kfree_skb(skb);
806 }
807 }
808
809 ipmr_cache_free(c);
810 }
811
812 /* Timer process for the unresolved queue. */
813 static void ipmr_expire_process(struct timer_list *t)
814 {
815 struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
816 unsigned long now;
817 unsigned long expires;
818 struct mfc_cache *c, *next;
819
820 if (!spin_trylock(&mfc_unres_lock)) {
821 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
822 return;
823 }
824
825 if (list_empty(&mrt->mfc_unres_queue))
826 goto out;
827
828 now = jiffies;
829 expires = 10*HZ;
830
831 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
832 if (time_after(c->mfc_un.unres.expires, now)) {
833 unsigned long interval = c->mfc_un.unres.expires - now;
834 if (interval < expires)
835 expires = interval;
836 continue;
837 }
838
839 list_del(&c->list);
840 mroute_netlink_event(mrt, c, RTM_DELROUTE);
841 ipmr_destroy_unres(mrt, c);
842 }
843
844 if (!list_empty(&mrt->mfc_unres_queue))
845 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
846
847 out:
848 spin_unlock(&mfc_unres_lock);
849 }
850
851 /* Fill oifs list. It is called under write locked mrt_lock. */
852 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
853 unsigned char *ttls)
854 {
855 int vifi;
856
857 cache->mfc_un.res.minvif = MAXVIFS;
858 cache->mfc_un.res.maxvif = 0;
859 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
860
861 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
862 if (VIF_EXISTS(mrt, vifi) &&
863 ttls[vifi] && ttls[vifi] < 255) {
864 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
865 if (cache->mfc_un.res.minvif > vifi)
866 cache->mfc_un.res.minvif = vifi;
867 if (cache->mfc_un.res.maxvif <= vifi)
868 cache->mfc_un.res.maxvif = vifi + 1;
869 }
870 }
871 cache->mfc_un.res.lastuse = jiffies;
872 }
873
874 static int vif_add(struct net *net, struct mr_table *mrt,
875 struct vifctl *vifc, int mrtsock)
876 {
877 int vifi = vifc->vifc_vifi;
878 struct switchdev_attr attr = {
879 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
880 };
881 struct vif_device *v = &mrt->vif_table[vifi];
882 struct net_device *dev;
883 struct in_device *in_dev;
884 int err;
885
886 /* Is vif busy ? */
887 if (VIF_EXISTS(mrt, vifi))
888 return -EADDRINUSE;
889
890 switch (vifc->vifc_flags) {
891 case VIFF_REGISTER:
892 if (!ipmr_pimsm_enabled())
893 return -EINVAL;
894 /* Special Purpose VIF in PIM
895 * All the packets will be sent to the daemon
896 */
897 if (mrt->mroute_reg_vif_num >= 0)
898 return -EADDRINUSE;
899 dev = ipmr_reg_vif(net, mrt);
900 if (!dev)
901 return -ENOBUFS;
902 err = dev_set_allmulti(dev, 1);
903 if (err) {
904 unregister_netdevice(dev);
905 dev_put(dev);
906 return err;
907 }
908 break;
909 case VIFF_TUNNEL:
910 dev = ipmr_new_tunnel(net, vifc);
911 if (!dev)
912 return -ENOBUFS;
913 err = dev_set_allmulti(dev, 1);
914 if (err) {
915 ipmr_del_tunnel(dev, vifc);
916 dev_put(dev);
917 return err;
918 }
919 break;
920 case VIFF_USE_IFINDEX:
921 case 0:
922 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
923 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
924 if (dev && !__in_dev_get_rtnl(dev)) {
925 dev_put(dev);
926 return -EADDRNOTAVAIL;
927 }
928 } else {
929 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
930 }
931 if (!dev)
932 return -EADDRNOTAVAIL;
933 err = dev_set_allmulti(dev, 1);
934 if (err) {
935 dev_put(dev);
936 return err;
937 }
938 break;
939 default:
940 return -EINVAL;
941 }
942
943 in_dev = __in_dev_get_rtnl(dev);
944 if (!in_dev) {
945 dev_put(dev);
946 return -EADDRNOTAVAIL;
947 }
948 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
949 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING,
950 dev->ifindex, &in_dev->cnf);
951 ip_rt_multicast_event(in_dev);
952
953 /* Fill in the VIF structures */
954
955 attr.orig_dev = dev;
956 if (!switchdev_port_attr_get(dev, &attr)) {
957 memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len);
958 v->dev_parent_id.id_len = attr.u.ppid.id_len;
959 } else {
960 v->dev_parent_id.id_len = 0;
961 }
962 v->rate_limit = vifc->vifc_rate_limit;
963 v->local = vifc->vifc_lcl_addr.s_addr;
964 v->remote = vifc->vifc_rmt_addr.s_addr;
965 v->flags = vifc->vifc_flags;
966 if (!mrtsock)
967 v->flags |= VIFF_STATIC;
968 v->threshold = vifc->vifc_threshold;
969 v->bytes_in = 0;
970 v->bytes_out = 0;
971 v->pkt_in = 0;
972 v->pkt_out = 0;
973 v->link = dev->ifindex;
974 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER))
975 v->link = dev_get_iflink(dev);
976
977 /* And finish update writing critical data */
978 write_lock_bh(&mrt_lock);
979 v->dev = dev;
980 if (v->flags & VIFF_REGISTER)
981 mrt->mroute_reg_vif_num = vifi;
982 if (vifi+1 > mrt->maxvif)
983 mrt->maxvif = vifi+1;
984 write_unlock_bh(&mrt_lock);
985 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id);
986 return 0;
987 }
988
989 /* called with rcu_read_lock() */
990 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
991 __be32 origin,
992 __be32 mcastgrp)
993 {
994 struct mfc_cache_cmp_arg arg = {
995 .mfc_mcastgrp = mcastgrp,
996 .mfc_origin = origin
997 };
998 struct rhlist_head *tmp, *list;
999 struct mfc_cache *c;
1000
1001 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
1002 rhl_for_each_entry_rcu(c, tmp, list, mnode)
1003 return c;
1004
1005 return NULL;
1006 }
1007
1008 /* Look for a (*,*,oif) entry */
1009 static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt,
1010 int vifi)
1011 {
1012 struct mfc_cache_cmp_arg arg = {
1013 .mfc_mcastgrp = htonl(INADDR_ANY),
1014 .mfc_origin = htonl(INADDR_ANY)
1015 };
1016 struct rhlist_head *tmp, *list;
1017 struct mfc_cache *c;
1018
1019 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
1020 rhl_for_each_entry_rcu(c, tmp, list, mnode)
1021 if (c->mfc_un.res.ttls[vifi] < 255)
1022 return c;
1023
1024 return NULL;
1025 }
1026
1027 /* Look for a (*,G) entry */
1028 static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
1029 __be32 mcastgrp, int vifi)
1030 {
1031 struct mfc_cache_cmp_arg arg = {
1032 .mfc_mcastgrp = mcastgrp,
1033 .mfc_origin = htonl(INADDR_ANY)
1034 };
1035 struct rhlist_head *tmp, *list;
1036 struct mfc_cache *c, *proxy;
1037
1038 if (mcastgrp == htonl(INADDR_ANY))
1039 goto skip;
1040
1041 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
1042 rhl_for_each_entry_rcu(c, tmp, list, mnode) {
1043 if (c->mfc_un.res.ttls[vifi] < 255)
1044 return c;
1045
1046 /* It's ok if the vifi is part of the static tree */
1047 proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent);
1048 if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
1049 return c;
1050 }
1051
1052 skip:
1053 return ipmr_cache_find_any_parent(mrt, vifi);
1054 }
1055
1056 /* Look for a (S,G,iif) entry if parent != -1 */
1057 static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt,
1058 __be32 origin, __be32 mcastgrp,
1059 int parent)
1060 {
1061 struct mfc_cache_cmp_arg arg = {
1062 .mfc_mcastgrp = mcastgrp,
1063 .mfc_origin = origin,
1064 };
1065 struct rhlist_head *tmp, *list;
1066 struct mfc_cache *c;
1067
1068 list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
1069 rhl_for_each_entry_rcu(c, tmp, list, mnode)
1070 if (parent == -1 || parent == c->mfc_parent)
1071 return c;
1072
1073 return NULL;
1074 }
1075
1076 /* Allocate a multicast cache entry */
1077 static struct mfc_cache *ipmr_cache_alloc(void)
1078 {
1079 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1080
1081 if (c) {
1082 c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
1083 c->mfc_un.res.minvif = MAXVIFS;
1084 refcount_set(&c->mfc_un.res.refcount, 1);
1085 }
1086 return c;
1087 }
1088
1089 static struct mfc_cache *ipmr_cache_alloc_unres(void)
1090 {
1091 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1092
1093 if (c) {
1094 skb_queue_head_init(&c->mfc_un.unres.unresolved);
1095 c->mfc_un.unres.expires = jiffies + 10*HZ;
1096 }
1097 return c;
1098 }
1099
1100 /* A cache entry has gone into a resolved state from queued */
1101 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
1102 struct mfc_cache *uc, struct mfc_cache *c)
1103 {
1104 struct sk_buff *skb;
1105 struct nlmsgerr *e;
1106
1107 /* Play the pending entries through our router */
1108 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1109 if (ip_hdr(skb)->version == 0) {
1110 struct nlmsghdr *nlh = skb_pull(skb,
1111 sizeof(struct iphdr));
1112
1113 if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1114 nlh->nlmsg_len = skb_tail_pointer(skb) -
1115 (u8 *)nlh;
1116 } else {
1117 nlh->nlmsg_type = NLMSG_ERROR;
1118 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1119 skb_trim(skb, nlh->nlmsg_len);
1120 e = nlmsg_data(nlh);
1121 e->error = -EMSGSIZE;
1122 memset(&e->msg, 0, sizeof(e->msg));
1123 }
1124
1125 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1126 } else {
1127 ip_mr_forward(net, mrt, skb->dev, skb, c, 0);
1128 }
1129 }
1130 }
1131
1132 /* Bounce a cache query up to mrouted and netlink.
1133 *
1134 * Called under mrt_lock.
1135 */
1136 static int ipmr_cache_report(struct mr_table *mrt,
1137 struct sk_buff *pkt, vifi_t vifi, int assert)
1138 {
1139 const int ihl = ip_hdrlen(pkt);
1140 struct sock *mroute_sk;
1141 struct igmphdr *igmp;
1142 struct igmpmsg *msg;
1143 struct sk_buff *skb;
1144 int ret;
1145
1146 if (assert == IGMPMSG_WHOLEPKT)
1147 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
1148 else
1149 skb = alloc_skb(128, GFP_ATOMIC);
1150
1151 if (!skb)
1152 return -ENOBUFS;
1153
1154 if (assert == IGMPMSG_WHOLEPKT) {
1155 /* Ugly, but we have no choice with this interface.
1156 * Duplicate old header, fix ihl, length etc.
1157 * And all this only to mangle msg->im_msgtype and
1158 * to set msg->im_mbz to "mbz" :-)
1159 */
1160 skb_push(skb, sizeof(struct iphdr));
1161 skb_reset_network_header(skb);
1162 skb_reset_transport_header(skb);
1163 msg = (struct igmpmsg *)skb_network_header(skb);
1164 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1165 msg->im_msgtype = IGMPMSG_WHOLEPKT;
1166 msg->im_mbz = 0;
1167 msg->im_vif = mrt->mroute_reg_vif_num;
1168 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
1169 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
1170 sizeof(struct iphdr));
1171 } else {
1172 /* Copy the IP header */
1173 skb_set_network_header(skb, skb->len);
1174 skb_put(skb, ihl);
1175 skb_copy_to_linear_data(skb, pkt->data, ihl);
1176 /* Flag to the kernel this is a route add */
1177 ip_hdr(skb)->protocol = 0;
1178 msg = (struct igmpmsg *)skb_network_header(skb);
1179 msg->im_vif = vifi;
1180 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1181 /* Add our header */
1182 igmp = skb_put(skb, sizeof(struct igmphdr));
1183 igmp->type = assert;
1184 msg->im_msgtype = assert;
1185 igmp->code = 0;
1186 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
1187 skb->transport_header = skb->network_header;
1188 }
1189
1190 rcu_read_lock();
1191 mroute_sk = rcu_dereference(mrt->mroute_sk);
1192 if (!mroute_sk) {
1193 rcu_read_unlock();
1194 kfree_skb(skb);
1195 return -EINVAL;
1196 }
1197
1198 igmpmsg_netlink_event(mrt, skb);
1199
1200 /* Deliver to mrouted */
1201 ret = sock_queue_rcv_skb(mroute_sk, skb);
1202 rcu_read_unlock();
1203 if (ret < 0) {
1204 net_warn_ratelimited("mroute: pending queue full, dropping entries\n");
1205 kfree_skb(skb);
1206 }
1207
1208 return ret;
1209 }
1210
1211 /* Queue a packet for resolution. It gets locked cache entry! */
1212 static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
1213 struct sk_buff *skb, struct net_device *dev)
1214 {
1215 const struct iphdr *iph = ip_hdr(skb);
1216 struct mfc_cache *c;
1217 bool found = false;
1218 int err;
1219
1220 spin_lock_bh(&mfc_unres_lock);
1221 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
1222 if (c->mfc_mcastgrp == iph->daddr &&
1223 c->mfc_origin == iph->saddr) {
1224 found = true;
1225 break;
1226 }
1227 }
1228
1229 if (!found) {
1230 /* Create a new entry if allowable */
1231 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1232 (c = ipmr_cache_alloc_unres()) == NULL) {
1233 spin_unlock_bh(&mfc_unres_lock);
1234
1235 kfree_skb(skb);
1236 return -ENOBUFS;
1237 }
1238
1239 /* Fill in the new cache entry */
1240 c->mfc_parent = -1;
1241 c->mfc_origin = iph->saddr;
1242 c->mfc_mcastgrp = iph->daddr;
1243
1244 /* Reflect first query at mrouted. */
1245 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
1246 if (err < 0) {
1247 /* If the report failed throw the cache entry
1248 out - Brad Parker
1249 */
1250 spin_unlock_bh(&mfc_unres_lock);
1251
1252 ipmr_cache_free(c);
1253 kfree_skb(skb);
1254 return err;
1255 }
1256
1257 atomic_inc(&mrt->cache_resolve_queue_len);
1258 list_add(&c->list, &mrt->mfc_unres_queue);
1259 mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1260
1261 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1262 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1263 }
1264
1265 /* See if we can append the packet */
1266 if (c->mfc_un.unres.unresolved.qlen > 3) {
1267 kfree_skb(skb);
1268 err = -ENOBUFS;
1269 } else {
1270 if (dev) {
1271 skb->dev = dev;
1272 skb->skb_iif = dev->ifindex;
1273 }
1274 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1275 err = 0;
1276 }
1277
1278 spin_unlock_bh(&mfc_unres_lock);
1279 return err;
1280 }
1281
1282 /* MFC cache manipulation by user space mroute daemon */
1283
1284 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
1285 {
1286 struct net *net = read_pnet(&mrt->net);
1287 struct mfc_cache *c;
1288
1289 /* The entries are added/deleted only under RTNL */
1290 rcu_read_lock();
1291 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr,
1292 mfc->mfcc_mcastgrp.s_addr, parent);
1293 rcu_read_unlock();
1294 if (!c)
1295 return -ENOENT;
1296 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
1297 list_del_rcu(&c->list);
1298 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
1299 mroute_netlink_event(mrt, c, RTM_DELROUTE);
1300 ipmr_cache_put(c);
1301
1302 return 0;
1303 }
1304
1305 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1306 struct mfcctl *mfc, int mrtsock, int parent)
1307 {
1308 struct mfc_cache *uc, *c;
1309 bool found;
1310 int ret;
1311
1312 if (mfc->mfcc_parent >= MAXVIFS)
1313 return -ENFILE;
1314
1315 /* The entries are added/deleted only under RTNL */
1316 rcu_read_lock();
1317 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr,
1318 mfc->mfcc_mcastgrp.s_addr, parent);
1319 rcu_read_unlock();
1320 if (c) {
1321 write_lock_bh(&mrt_lock);
1322 c->mfc_parent = mfc->mfcc_parent;
1323 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1324 if (!mrtsock)
1325 c->mfc_flags |= MFC_STATIC;
1326 write_unlock_bh(&mrt_lock);
1327 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
1328 mrt->id);
1329 mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1330 return 0;
1331 }
1332
1333 if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) &&
1334 !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1335 return -EINVAL;
1336
1337 c = ipmr_cache_alloc();
1338 if (!c)
1339 return -ENOMEM;
1340
1341 c->mfc_origin = mfc->mfcc_origin.s_addr;
1342 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1343 c->mfc_parent = mfc->mfcc_parent;
1344 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1345 if (!mrtsock)
1346 c->mfc_flags |= MFC_STATIC;
1347
1348 ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode,
1349 ipmr_rht_params);
1350 if (ret) {
1351 pr_err("ipmr: rhtable insert error %d\n", ret);
1352 ipmr_cache_free(c);
1353 return ret;
1354 }
1355 list_add_tail_rcu(&c->list, &mrt->mfc_cache_list);
1356 /* Check to see if we resolved a queued list. If so we
1357 * need to send on the frames and tidy up.
1358 */
1359 found = false;
1360 spin_lock_bh(&mfc_unres_lock);
1361 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
1362 if (uc->mfc_origin == c->mfc_origin &&
1363 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
1364 list_del(&uc->list);
1365 atomic_dec(&mrt->cache_resolve_queue_len);
1366 found = true;
1367 break;
1368 }
1369 }
1370 if (list_empty(&mrt->mfc_unres_queue))
1371 del_timer(&mrt->ipmr_expire_timer);
1372 spin_unlock_bh(&mfc_unres_lock);
1373
1374 if (found) {
1375 ipmr_cache_resolve(net, mrt, uc, c);
1376 ipmr_cache_free(uc);
1377 }
1378 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id);
1379 mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1380 return 0;
1381 }
1382
1383 /* Close the multicast socket, and clear the vif tables etc */
1384 static void mroute_clean_tables(struct mr_table *mrt, bool all)
1385 {
1386 struct net *net = read_pnet(&mrt->net);
1387 struct mfc_cache *c, *tmp;
1388 LIST_HEAD(list);
1389 int i;
1390
1391 /* Shut down all active vif entries */
1392 for (i = 0; i < mrt->maxvif; i++) {
1393 if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
1394 continue;
1395 vif_delete(mrt, i, 0, &list);
1396 }
1397 unregister_netdevice_many(&list);
1398
1399 /* Wipe the cache */
1400 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1401 if (!all && (c->mfc_flags & MFC_STATIC))
1402 continue;
1403 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
1404 list_del_rcu(&c->list);
1405 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
1406 mrt->id);
1407 mroute_netlink_event(mrt, c, RTM_DELROUTE);
1408 ipmr_cache_put(c);
1409 }
1410
1411 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1412 spin_lock_bh(&mfc_unres_lock);
1413 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1414 list_del(&c->list);
1415 mroute_netlink_event(mrt, c, RTM_DELROUTE);
1416 ipmr_destroy_unres(mrt, c);
1417 }
1418 spin_unlock_bh(&mfc_unres_lock);
1419 }
1420 }
1421
1422 /* called from ip_ra_control(), before an RCU grace period,
1423 * we dont need to call synchronize_rcu() here
1424 */
1425 static void mrtsock_destruct(struct sock *sk)
1426 {
1427 struct net *net = sock_net(sk);
1428 struct mr_table *mrt;
1429
1430 ASSERT_RTNL();
1431 ipmr_for_each_table(mrt, net) {
1432 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1433 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1434 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
1435 NETCONFA_MC_FORWARDING,
1436 NETCONFA_IFINDEX_ALL,
1437 net->ipv4.devconf_all);
1438 RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1439 mroute_clean_tables(mrt, false);
1440 }
1441 }
1442 }
1443
1444 /* Socket options and virtual interface manipulation. The whole
1445 * virtual interface system is a complete heap, but unfortunately
1446 * that's how BSD mrouted happens to think. Maybe one day with a proper
1447 * MOSPF/PIM router set up we can clean this up.
1448 */
1449
1450 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
1451 unsigned int optlen)
1452 {
1453 struct net *net = sock_net(sk);
1454 int val, ret = 0, parent = 0;
1455 struct mr_table *mrt;
1456 struct vifctl vif;
1457 struct mfcctl mfc;
1458 u32 uval;
1459
1460 /* There's one exception to the lock - MRT_DONE which needs to unlock */
1461 rtnl_lock();
1462 if (sk->sk_type != SOCK_RAW ||
1463 inet_sk(sk)->inet_num != IPPROTO_IGMP) {
1464 ret = -EOPNOTSUPP;
1465 goto out_unlock;
1466 }
1467
1468 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1469 if (!mrt) {
1470 ret = -ENOENT;
1471 goto out_unlock;
1472 }
1473 if (optname != MRT_INIT) {
1474 if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1475 !ns_capable(net->user_ns, CAP_NET_ADMIN)) {
1476 ret = -EACCES;
1477 goto out_unlock;
1478 }
1479 }
1480
1481 switch (optname) {
1482 case MRT_INIT:
1483 if (optlen != sizeof(int)) {
1484 ret = -EINVAL;
1485 break;
1486 }
1487 if (rtnl_dereference(mrt->mroute_sk)) {
1488 ret = -EADDRINUSE;
1489 break;
1490 }
1491
1492 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1493 if (ret == 0) {
1494 rcu_assign_pointer(mrt->mroute_sk, sk);
1495 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1496 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
1497 NETCONFA_MC_FORWARDING,
1498 NETCONFA_IFINDEX_ALL,
1499 net->ipv4.devconf_all);
1500 }
1501 break;
1502 case MRT_DONE:
1503 if (sk != rcu_access_pointer(mrt->mroute_sk)) {
1504 ret = -EACCES;
1505 } else {
1506 ret = ip_ra_control(sk, 0, NULL);
1507 goto out_unlock;
1508 }
1509 break;
1510 case MRT_ADD_VIF:
1511 case MRT_DEL_VIF:
1512 if (optlen != sizeof(vif)) {
1513 ret = -EINVAL;
1514 break;
1515 }
1516 if (copy_from_user(&vif, optval, sizeof(vif))) {
1517 ret = -EFAULT;
1518 break;
1519 }
1520 if (vif.vifc_vifi >= MAXVIFS) {
1521 ret = -ENFILE;
1522 break;
1523 }
1524 if (optname == MRT_ADD_VIF) {
1525 ret = vif_add(net, mrt, &vif,
1526 sk == rtnl_dereference(mrt->mroute_sk));
1527 } else {
1528 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1529 }
1530 break;
1531 /* Manipulate the forwarding caches. These live
1532 * in a sort of kernel/user symbiosis.
1533 */
1534 case MRT_ADD_MFC:
1535 case MRT_DEL_MFC:
1536 parent = -1;
1537 /* fall through */
1538 case MRT_ADD_MFC_PROXY:
1539 case MRT_DEL_MFC_PROXY:
1540 if (optlen != sizeof(mfc)) {
1541 ret = -EINVAL;
1542 break;
1543 }
1544 if (copy_from_user(&mfc, optval, sizeof(mfc))) {
1545 ret = -EFAULT;
1546 break;
1547 }
1548 if (parent == 0)
1549 parent = mfc.mfcc_parent;
1550 if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY)
1551 ret = ipmr_mfc_delete(mrt, &mfc, parent);
1552 else
1553 ret = ipmr_mfc_add(net, mrt, &mfc,
1554 sk == rtnl_dereference(mrt->mroute_sk),
1555 parent);
1556 break;
1557 /* Control PIM assert. */
1558 case MRT_ASSERT:
1559 if (optlen != sizeof(val)) {
1560 ret = -EINVAL;
1561 break;
1562 }
1563 if (get_user(val, (int __user *)optval)) {
1564 ret = -EFAULT;
1565 break;
1566 }
1567 mrt->mroute_do_assert = val;
1568 break;
1569 case MRT_PIM:
1570 if (!ipmr_pimsm_enabled()) {
1571 ret = -ENOPROTOOPT;
1572 break;
1573 }
1574 if (optlen != sizeof(val)) {
1575 ret = -EINVAL;
1576 break;
1577 }
1578 if (get_user(val, (int __user *)optval)) {
1579 ret = -EFAULT;
1580 break;
1581 }
1582
1583 val = !!val;
1584 if (val != mrt->mroute_do_pim) {
1585 mrt->mroute_do_pim = val;
1586 mrt->mroute_do_assert = val;
1587 }
1588 break;
1589 case MRT_TABLE:
1590 if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) {
1591 ret = -ENOPROTOOPT;
1592 break;
1593 }
1594 if (optlen != sizeof(uval)) {
1595 ret = -EINVAL;
1596 break;
1597 }
1598 if (get_user(uval, (u32 __user *)optval)) {
1599 ret = -EFAULT;
1600 break;
1601 }
1602
1603 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1604 ret = -EBUSY;
1605 } else {
1606 mrt = ipmr_new_table(net, uval);
1607 if (IS_ERR(mrt))
1608 ret = PTR_ERR(mrt);
1609 else
1610 raw_sk(sk)->ipmr_table = uval;
1611 }
1612 break;
1613 /* Spurious command, or MRT_VERSION which you cannot set. */
1614 default:
1615 ret = -ENOPROTOOPT;
1616 }
1617 out_unlock:
1618 rtnl_unlock();
1619 return ret;
1620 }
1621
1622 /* Getsock opt support for the multicast routing system. */
1623 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1624 {
1625 int olr;
1626 int val;
1627 struct net *net = sock_net(sk);
1628 struct mr_table *mrt;
1629
1630 if (sk->sk_type != SOCK_RAW ||
1631 inet_sk(sk)->inet_num != IPPROTO_IGMP)
1632 return -EOPNOTSUPP;
1633
1634 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1635 if (!mrt)
1636 return -ENOENT;
1637
1638 switch (optname) {
1639 case MRT_VERSION:
1640 val = 0x0305;
1641 break;
1642 case MRT_PIM:
1643 if (!ipmr_pimsm_enabled())
1644 return -ENOPROTOOPT;
1645 val = mrt->mroute_do_pim;
1646 break;
1647 case MRT_ASSERT:
1648 val = mrt->mroute_do_assert;
1649 break;
1650 default:
1651 return -ENOPROTOOPT;
1652 }
1653
1654 if (get_user(olr, optlen))
1655 return -EFAULT;
1656 olr = min_t(unsigned int, olr, sizeof(int));
1657 if (olr < 0)
1658 return -EINVAL;
1659 if (put_user(olr, optlen))
1660 return -EFAULT;
1661 if (copy_to_user(optval, &val, olr))
1662 return -EFAULT;
1663 return 0;
1664 }
1665
1666 /* The IP multicast ioctl support routines. */
1667 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1668 {
1669 struct sioc_sg_req sr;
1670 struct sioc_vif_req vr;
1671 struct vif_device *vif;
1672 struct mfc_cache *c;
1673 struct net *net = sock_net(sk);
1674 struct mr_table *mrt;
1675
1676 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1677 if (!mrt)
1678 return -ENOENT;
1679
1680 switch (cmd) {
1681 case SIOCGETVIFCNT:
1682 if (copy_from_user(&vr, arg, sizeof(vr)))
1683 return -EFAULT;
1684 if (vr.vifi >= mrt->maxvif)
1685 return -EINVAL;
1686 vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
1687 read_lock(&mrt_lock);
1688 vif = &mrt->vif_table[vr.vifi];
1689 if (VIF_EXISTS(mrt, vr.vifi)) {
1690 vr.icount = vif->pkt_in;
1691 vr.ocount = vif->pkt_out;
1692 vr.ibytes = vif->bytes_in;
1693 vr.obytes = vif->bytes_out;
1694 read_unlock(&mrt_lock);
1695
1696 if (copy_to_user(arg, &vr, sizeof(vr)))
1697 return -EFAULT;
1698 return 0;
1699 }
1700 read_unlock(&mrt_lock);
1701 return -EADDRNOTAVAIL;
1702 case SIOCGETSGCNT:
1703 if (copy_from_user(&sr, arg, sizeof(sr)))
1704 return -EFAULT;
1705
1706 rcu_read_lock();
1707 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1708 if (c) {
1709 sr.pktcnt = c->mfc_un.res.pkt;
1710 sr.bytecnt = c->mfc_un.res.bytes;
1711 sr.wrong_if = c->mfc_un.res.wrong_if;
1712 rcu_read_unlock();
1713
1714 if (copy_to_user(arg, &sr, sizeof(sr)))
1715 return -EFAULT;
1716 return 0;
1717 }
1718 rcu_read_unlock();
1719 return -EADDRNOTAVAIL;
1720 default:
1721 return -ENOIOCTLCMD;
1722 }
1723 }
1724
1725 #ifdef CONFIG_COMPAT
1726 struct compat_sioc_sg_req {
1727 struct in_addr src;
1728 struct in_addr grp;
1729 compat_ulong_t pktcnt;
1730 compat_ulong_t bytecnt;
1731 compat_ulong_t wrong_if;
1732 };
1733
1734 struct compat_sioc_vif_req {
1735 vifi_t vifi; /* Which iface */
1736 compat_ulong_t icount;
1737 compat_ulong_t ocount;
1738 compat_ulong_t ibytes;
1739 compat_ulong_t obytes;
1740 };
1741
1742 int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1743 {
1744 struct compat_sioc_sg_req sr;
1745 struct compat_sioc_vif_req vr;
1746 struct vif_device *vif;
1747 struct mfc_cache *c;
1748 struct net *net = sock_net(sk);
1749 struct mr_table *mrt;
1750
1751 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1752 if (!mrt)
1753 return -ENOENT;
1754
1755 switch (cmd) {
1756 case SIOCGETVIFCNT:
1757 if (copy_from_user(&vr, arg, sizeof(vr)))
1758 return -EFAULT;
1759 if (vr.vifi >= mrt->maxvif)
1760 return -EINVAL;
1761 vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
1762 read_lock(&mrt_lock);
1763 vif = &mrt->vif_table[vr.vifi];
1764 if (VIF_EXISTS(mrt, vr.vifi)) {
1765 vr.icount = vif->pkt_in;
1766 vr.ocount = vif->pkt_out;
1767 vr.ibytes = vif->bytes_in;
1768 vr.obytes = vif->bytes_out;
1769 read_unlock(&mrt_lock);
1770
1771 if (copy_to_user(arg, &vr, sizeof(vr)))
1772 return -EFAULT;
1773 return 0;
1774 }
1775 read_unlock(&mrt_lock);
1776 return -EADDRNOTAVAIL;
1777 case SIOCGETSGCNT:
1778 if (copy_from_user(&sr, arg, sizeof(sr)))
1779 return -EFAULT;
1780
1781 rcu_read_lock();
1782 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1783 if (c) {
1784 sr.pktcnt = c->mfc_un.res.pkt;
1785 sr.bytecnt = c->mfc_un.res.bytes;
1786 sr.wrong_if = c->mfc_un.res.wrong_if;
1787 rcu_read_unlock();
1788
1789 if (copy_to_user(arg, &sr, sizeof(sr)))
1790 return -EFAULT;
1791 return 0;
1792 }
1793 rcu_read_unlock();
1794 return -EADDRNOTAVAIL;
1795 default:
1796 return -ENOIOCTLCMD;
1797 }
1798 }
1799 #endif
1800
1801 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1802 {
1803 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1804 struct net *net = dev_net(dev);
1805 struct mr_table *mrt;
1806 struct vif_device *v;
1807 int ct;
1808
1809 if (event != NETDEV_UNREGISTER)
1810 return NOTIFY_DONE;
1811
1812 ipmr_for_each_table(mrt, net) {
1813 v = &mrt->vif_table[0];
1814 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1815 if (v->dev == dev)
1816 vif_delete(mrt, ct, 1, NULL);
1817 }
1818 }
1819 return NOTIFY_DONE;
1820 }
1821
1822 static struct notifier_block ip_mr_notifier = {
1823 .notifier_call = ipmr_device_event,
1824 };
1825
1826 /* Encapsulate a packet by attaching a valid IPIP header to it.
1827 * This avoids tunnel drivers and other mess and gives us the speed so
1828 * important for multicast video.
1829 */
1830 static void ip_encap(struct net *net, struct sk_buff *skb,
1831 __be32 saddr, __be32 daddr)
1832 {
1833 struct iphdr *iph;
1834 const struct iphdr *old_iph = ip_hdr(skb);
1835
1836 skb_push(skb, sizeof(struct iphdr));
1837 skb->transport_header = skb->network_header;
1838 skb_reset_network_header(skb);
1839 iph = ip_hdr(skb);
1840
1841 iph->version = 4;
1842 iph->tos = old_iph->tos;
1843 iph->ttl = old_iph->ttl;
1844 iph->frag_off = 0;
1845 iph->daddr = daddr;
1846 iph->saddr = saddr;
1847 iph->protocol = IPPROTO_IPIP;
1848 iph->ihl = 5;
1849 iph->tot_len = htons(skb->len);
1850 ip_select_ident(net, skb, NULL);
1851 ip_send_check(iph);
1852
1853 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1854 nf_reset(skb);
1855 }
1856
1857 static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
1858 struct sk_buff *skb)
1859 {
1860 struct ip_options *opt = &(IPCB(skb)->opt);
1861
1862 IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
1863 IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
1864
1865 if (unlikely(opt->optlen))
1866 ip_forward_options(skb);
1867
1868 return dst_output(net, sk, skb);
1869 }
1870
1871 #ifdef CONFIG_NET_SWITCHDEV
1872 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
1873 int in_vifi, int out_vifi)
1874 {
1875 struct vif_device *out_vif = &mrt->vif_table[out_vifi];
1876 struct vif_device *in_vif = &mrt->vif_table[in_vifi];
1877
1878 if (!skb->offload_mr_fwd_mark)
1879 return false;
1880 if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len)
1881 return false;
1882 return netdev_phys_item_id_same(&out_vif->dev_parent_id,
1883 &in_vif->dev_parent_id);
1884 }
1885 #else
1886 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
1887 int in_vifi, int out_vifi)
1888 {
1889 return false;
1890 }
1891 #endif
1892
1893 /* Processing handlers for ipmr_forward */
1894
1895 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1896 int in_vifi, struct sk_buff *skb,
1897 struct mfc_cache *c, int vifi)
1898 {
1899 const struct iphdr *iph = ip_hdr(skb);
1900 struct vif_device *vif = &mrt->vif_table[vifi];
1901 struct net_device *dev;
1902 struct rtable *rt;
1903 struct flowi4 fl4;
1904 int encap = 0;
1905
1906 if (!vif->dev)
1907 goto out_free;
1908
1909 if (vif->flags & VIFF_REGISTER) {
1910 vif->pkt_out++;
1911 vif->bytes_out += skb->len;
1912 vif->dev->stats.tx_bytes += skb->len;
1913 vif->dev->stats.tx_packets++;
1914 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1915 goto out_free;
1916 }
1917
1918 if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi))
1919 goto out_free;
1920
1921 if (vif->flags & VIFF_TUNNEL) {
1922 rt = ip_route_output_ports(net, &fl4, NULL,
1923 vif->remote, vif->local,
1924 0, 0,
1925 IPPROTO_IPIP,
1926 RT_TOS(iph->tos), vif->link);
1927 if (IS_ERR(rt))
1928 goto out_free;
1929 encap = sizeof(struct iphdr);
1930 } else {
1931 rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0,
1932 0, 0,
1933 IPPROTO_IPIP,
1934 RT_TOS(iph->tos), vif->link);
1935 if (IS_ERR(rt))
1936 goto out_free;
1937 }
1938
1939 dev = rt->dst.dev;
1940
1941 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
1942 /* Do not fragment multicasts. Alas, IPv4 does not
1943 * allow to send ICMP, so that packets will disappear
1944 * to blackhole.
1945 */
1946 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
1947 ip_rt_put(rt);
1948 goto out_free;
1949 }
1950
1951 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
1952
1953 if (skb_cow(skb, encap)) {
1954 ip_rt_put(rt);
1955 goto out_free;
1956 }
1957
1958 vif->pkt_out++;
1959 vif->bytes_out += skb->len;
1960
1961 skb_dst_drop(skb);
1962 skb_dst_set(skb, &rt->dst);
1963 ip_decrease_ttl(ip_hdr(skb));
1964
1965 /* FIXME: forward and output firewalls used to be called here.
1966 * What do we do with netfilter? -- RR
1967 */
1968 if (vif->flags & VIFF_TUNNEL) {
1969 ip_encap(net, skb, vif->local, vif->remote);
1970 /* FIXME: extra output firewall step used to be here. --RR */
1971 vif->dev->stats.tx_packets++;
1972 vif->dev->stats.tx_bytes += skb->len;
1973 }
1974
1975 IPCB(skb)->flags |= IPSKB_FORWARDED;
1976
1977 /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1978 * not only before forwarding, but after forwarding on all output
1979 * interfaces. It is clear, if mrouter runs a multicasting
1980 * program, it should receive packets not depending to what interface
1981 * program is joined.
1982 * If we will not make it, the program will have to join on all
1983 * interfaces. On the other hand, multihoming host (or router, but
1984 * not mrouter) cannot join to more than one interface - it will
1985 * result in receiving multiple packets.
1986 */
1987 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
1988 net, NULL, skb, skb->dev, dev,
1989 ipmr_forward_finish);
1990 return;
1991
1992 out_free:
1993 kfree_skb(skb);
1994 }
1995
1996 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1997 {
1998 int ct;
1999
2000 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
2001 if (mrt->vif_table[ct].dev == dev)
2002 break;
2003 }
2004 return ct;
2005 }
2006
2007 /* "local" means that we should preserve one skb (for local delivery) */
2008 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
2009 struct net_device *dev, struct sk_buff *skb,
2010 struct mfc_cache *cache, int local)
2011 {
2012 int true_vifi = ipmr_find_vif(mrt, dev);
2013 int psend = -1;
2014 int vif, ct;
2015
2016 vif = cache->mfc_parent;
2017 cache->mfc_un.res.pkt++;
2018 cache->mfc_un.res.bytes += skb->len;
2019 cache->mfc_un.res.lastuse = jiffies;
2020
2021 if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
2022 struct mfc_cache *cache_proxy;
2023
2024 /* For an (*,G) entry, we only check that the incomming
2025 * interface is part of the static tree.
2026 */
2027 cache_proxy = ipmr_cache_find_any_parent(mrt, vif);
2028 if (cache_proxy &&
2029 cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2030 goto forward;
2031 }
2032
2033 /* Wrong interface: drop packet and (maybe) send PIM assert. */
2034 if (mrt->vif_table[vif].dev != dev) {
2035 if (rt_is_output_route(skb_rtable(skb))) {
2036 /* It is our own packet, looped back.
2037 * Very complicated situation...
2038 *
2039 * The best workaround until routing daemons will be
2040 * fixed is not to redistribute packet, if it was
2041 * send through wrong interface. It means, that
2042 * multicast applications WILL NOT work for
2043 * (S,G), which have default multicast route pointing
2044 * to wrong oif. In any case, it is not a good
2045 * idea to use multicasting applications on router.
2046 */
2047 goto dont_forward;
2048 }
2049
2050 cache->mfc_un.res.wrong_if++;
2051
2052 if (true_vifi >= 0 && mrt->mroute_do_assert &&
2053 /* pimsm uses asserts, when switching from RPT to SPT,
2054 * so that we cannot check that packet arrived on an oif.
2055 * It is bad, but otherwise we would need to move pretty
2056 * large chunk of pimd to kernel. Ough... --ANK
2057 */
2058 (mrt->mroute_do_pim ||
2059 cache->mfc_un.res.ttls[true_vifi] < 255) &&
2060 time_after(jiffies,
2061 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2062 cache->mfc_un.res.last_assert = jiffies;
2063 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
2064 }
2065 goto dont_forward;
2066 }
2067
2068 forward:
2069 mrt->vif_table[vif].pkt_in++;
2070 mrt->vif_table[vif].bytes_in += skb->len;
2071
2072 /* Forward the frame */
2073 if (cache->mfc_origin == htonl(INADDR_ANY) &&
2074 cache->mfc_mcastgrp == htonl(INADDR_ANY)) {
2075 if (true_vifi >= 0 &&
2076 true_vifi != cache->mfc_parent &&
2077 ip_hdr(skb)->ttl >
2078 cache->mfc_un.res.ttls[cache->mfc_parent]) {
2079 /* It's an (*,*) entry and the packet is not coming from
2080 * the upstream: forward the packet to the upstream
2081 * only.
2082 */
2083 psend = cache->mfc_parent;
2084 goto last_forward;
2085 }
2086 goto dont_forward;
2087 }
2088 for (ct = cache->mfc_un.res.maxvif - 1;
2089 ct >= cache->mfc_un.res.minvif; ct--) {
2090 /* For (*,G) entry, don't forward to the incoming interface */
2091 if ((cache->mfc_origin != htonl(INADDR_ANY) ||
2092 ct != true_vifi) &&
2093 ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
2094 if (psend != -1) {
2095 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2096
2097 if (skb2)
2098 ipmr_queue_xmit(net, mrt, true_vifi,
2099 skb2, cache, psend);
2100 }
2101 psend = ct;
2102 }
2103 }
2104 last_forward:
2105 if (psend != -1) {
2106 if (local) {
2107 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2108
2109 if (skb2)
2110 ipmr_queue_xmit(net, mrt, true_vifi, skb2,
2111 cache, psend);
2112 } else {
2113 ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend);
2114 return;
2115 }
2116 }
2117
2118 dont_forward:
2119 if (!local)
2120 kfree_skb(skb);
2121 }
2122
2123 static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
2124 {
2125 struct rtable *rt = skb_rtable(skb);
2126 struct iphdr *iph = ip_hdr(skb);
2127 struct flowi4 fl4 = {
2128 .daddr = iph->daddr,
2129 .saddr = iph->saddr,
2130 .flowi4_tos = RT_TOS(iph->tos),
2131 .flowi4_oif = (rt_is_output_route(rt) ?
2132 skb->dev->ifindex : 0),
2133 .flowi4_iif = (rt_is_output_route(rt) ?
2134 LOOPBACK_IFINDEX :
2135 skb->dev->ifindex),
2136 .flowi4_mark = skb->mark,
2137 };
2138 struct mr_table *mrt;
2139 int err;
2140
2141 err = ipmr_fib_lookup(net, &fl4, &mrt);
2142 if (err)
2143 return ERR_PTR(err);
2144 return mrt;
2145 }
2146
2147 /* Multicast packets for forwarding arrive here
2148 * Called with rcu_read_lock();
2149 */
2150 int ip_mr_input(struct sk_buff *skb)
2151 {
2152 struct mfc_cache *cache;
2153 struct net *net = dev_net(skb->dev);
2154 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
2155 struct mr_table *mrt;
2156 struct net_device *dev;
2157
2158 /* skb->dev passed in is the loX master dev for vrfs.
2159 * As there are no vifs associated with loopback devices,
2160 * get the proper interface that does have a vif associated with it.
2161 */
2162 dev = skb->dev;
2163 if (netif_is_l3_master(skb->dev)) {
2164 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2165 if (!dev) {
2166 kfree_skb(skb);
2167 return -ENODEV;
2168 }
2169 }
2170
2171 /* Packet is looped back after forward, it should not be
2172 * forwarded second time, but still can be delivered locally.
2173 */
2174 if (IPCB(skb)->flags & IPSKB_FORWARDED)
2175 goto dont_forward;
2176
2177 mrt = ipmr_rt_fib_lookup(net, skb);
2178 if (IS_ERR(mrt)) {
2179 kfree_skb(skb);
2180 return PTR_ERR(mrt);
2181 }
2182 if (!local) {
2183 if (IPCB(skb)->opt.router_alert) {
2184 if (ip_call_ra_chain(skb))
2185 return 0;
2186 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) {
2187 /* IGMPv1 (and broken IGMPv2 implementations sort of
2188 * Cisco IOS <= 11.2(8)) do not put router alert
2189 * option to IGMP packets destined to routable
2190 * groups. It is very bad, because it means
2191 * that we can forward NO IGMP messages.
2192 */
2193 struct sock *mroute_sk;
2194
2195 mroute_sk = rcu_dereference(mrt->mroute_sk);
2196 if (mroute_sk) {
2197 nf_reset(skb);
2198 raw_rcv(mroute_sk, skb);
2199 return 0;
2200 }
2201 }
2202 }
2203
2204 /* already under rcu_read_lock() */
2205 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
2206 if (!cache) {
2207 int vif = ipmr_find_vif(mrt, dev);
2208
2209 if (vif >= 0)
2210 cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr,
2211 vif);
2212 }
2213
2214 /* No usable cache entry */
2215 if (!cache) {
2216 int vif;
2217
2218 if (local) {
2219 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2220 ip_local_deliver(skb);
2221 if (!skb2)
2222 return -ENOBUFS;
2223 skb = skb2;
2224 }
2225
2226 read_lock(&mrt_lock);
2227 vif = ipmr_find_vif(mrt, dev);
2228 if (vif >= 0) {
2229 int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev);
2230 read_unlock(&mrt_lock);
2231
2232 return err2;
2233 }
2234 read_unlock(&mrt_lock);
2235 kfree_skb(skb);
2236 return -ENODEV;
2237 }
2238
2239 read_lock(&mrt_lock);
2240 ip_mr_forward(net, mrt, dev, skb, cache, local);
2241 read_unlock(&mrt_lock);
2242
2243 if (local)
2244 return ip_local_deliver(skb);
2245
2246 return 0;
2247
2248 dont_forward:
2249 if (local)
2250 return ip_local_deliver(skb);
2251 kfree_skb(skb);
2252 return 0;
2253 }
2254
2255 #ifdef CONFIG_IP_PIMSM_V1
2256 /* Handle IGMP messages of PIMv1 */
2257 int pim_rcv_v1(struct sk_buff *skb)
2258 {
2259 struct igmphdr *pim;
2260 struct net *net = dev_net(skb->dev);
2261 struct mr_table *mrt;
2262
2263 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
2264 goto drop;
2265
2266 pim = igmp_hdr(skb);
2267
2268 mrt = ipmr_rt_fib_lookup(net, skb);
2269 if (IS_ERR(mrt))
2270 goto drop;
2271 if (!mrt->mroute_do_pim ||
2272 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
2273 goto drop;
2274
2275 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
2276 drop:
2277 kfree_skb(skb);
2278 }
2279 return 0;
2280 }
2281 #endif
2282
2283 #ifdef CONFIG_IP_PIMSM_V2
2284 static int pim_rcv(struct sk_buff *skb)
2285 {
2286 struct pimreghdr *pim;
2287 struct net *net = dev_net(skb->dev);
2288 struct mr_table *mrt;
2289
2290 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
2291 goto drop;
2292
2293 pim = (struct pimreghdr *)skb_transport_header(skb);
2294 if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) ||
2295 (pim->flags & PIM_NULL_REGISTER) ||
2296 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
2297 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
2298 goto drop;
2299
2300 mrt = ipmr_rt_fib_lookup(net, skb);
2301 if (IS_ERR(mrt))
2302 goto drop;
2303 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
2304 drop:
2305 kfree_skb(skb);
2306 }
2307 return 0;
2308 }
2309 #endif
2310
2311 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2312 struct mfc_cache *c, struct rtmsg *rtm)
2313 {
2314 struct rta_mfc_stats mfcs;
2315 struct nlattr *mp_attr;
2316 struct rtnexthop *nhp;
2317 unsigned long lastuse;
2318 int ct;
2319
2320 /* If cache is unresolved, don't try to parse IIF and OIF */
2321 if (c->mfc_parent >= MAXVIFS) {
2322 rtm->rtm_flags |= RTNH_F_UNRESOLVED;
2323 return -ENOENT;
2324 }
2325
2326 if (VIF_EXISTS(mrt, c->mfc_parent) &&
2327 nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
2328 return -EMSGSIZE;
2329
2330 if (c->mfc_flags & MFC_OFFLOAD)
2331 rtm->rtm_flags |= RTNH_F_OFFLOAD;
2332
2333 if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
2334 return -EMSGSIZE;
2335
2336 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2337 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2338 if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) {
2339 nla_nest_cancel(skb, mp_attr);
2340 return -EMSGSIZE;
2341 }
2342
2343 nhp->rtnh_flags = 0;
2344 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2345 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
2346 nhp->rtnh_len = sizeof(*nhp);
2347 }
2348 }
2349
2350 nla_nest_end(skb, mp_attr);
2351
2352 lastuse = READ_ONCE(c->mfc_un.res.lastuse);
2353 lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
2354
2355 mfcs.mfcs_packets = c->mfc_un.res.pkt;
2356 mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2357 mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2358 if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
2359 nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
2360 RTA_PAD))
2361 return -EMSGSIZE;
2362
2363 rtm->rtm_type = RTN_MULTICAST;
2364 return 1;
2365 }
2366
2367 int ipmr_get_route(struct net *net, struct sk_buff *skb,
2368 __be32 saddr, __be32 daddr,
2369 struct rtmsg *rtm, u32 portid)
2370 {
2371 struct mfc_cache *cache;
2372 struct mr_table *mrt;
2373 int err;
2374
2375 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2376 if (!mrt)
2377 return -ENOENT;
2378
2379 rcu_read_lock();
2380 cache = ipmr_cache_find(mrt, saddr, daddr);
2381 if (!cache && skb->dev) {
2382 int vif = ipmr_find_vif(mrt, skb->dev);
2383
2384 if (vif >= 0)
2385 cache = ipmr_cache_find_any(mrt, daddr, vif);
2386 }
2387 if (!cache) {
2388 struct sk_buff *skb2;
2389 struct iphdr *iph;
2390 struct net_device *dev;
2391 int vif = -1;
2392
2393 dev = skb->dev;
2394 read_lock(&mrt_lock);
2395 if (dev)
2396 vif = ipmr_find_vif(mrt, dev);
2397 if (vif < 0) {
2398 read_unlock(&mrt_lock);
2399 rcu_read_unlock();
2400 return -ENODEV;
2401 }
2402 skb2 = skb_clone(skb, GFP_ATOMIC);
2403 if (!skb2) {
2404 read_unlock(&mrt_lock);
2405 rcu_read_unlock();
2406 return -ENOMEM;
2407 }
2408
2409 NETLINK_CB(skb2).portid = portid;
2410 skb_push(skb2, sizeof(struct iphdr));
2411 skb_reset_network_header(skb2);
2412 iph = ip_hdr(skb2);
2413 iph->ihl = sizeof(struct iphdr) >> 2;
2414 iph->saddr = saddr;
2415 iph->daddr = daddr;
2416 iph->version = 0;
2417 err = ipmr_cache_unresolved(mrt, vif, skb2, dev);
2418 read_unlock(&mrt_lock);
2419 rcu_read_unlock();
2420 return err;
2421 }
2422
2423 read_lock(&mrt_lock);
2424 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
2425 read_unlock(&mrt_lock);
2426 rcu_read_unlock();
2427 return err;
2428 }
2429
2430 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2431 u32 portid, u32 seq, struct mfc_cache *c, int cmd,
2432 int flags)
2433 {
2434 struct nlmsghdr *nlh;
2435 struct rtmsg *rtm;
2436 int err;
2437
2438 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2439 if (!nlh)
2440 return -EMSGSIZE;
2441
2442 rtm = nlmsg_data(nlh);
2443 rtm->rtm_family = RTNL_FAMILY_IPMR;
2444 rtm->rtm_dst_len = 32;
2445 rtm->rtm_src_len = 32;
2446 rtm->rtm_tos = 0;
2447 rtm->rtm_table = mrt->id;
2448 if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2449 goto nla_put_failure;
2450 rtm->rtm_type = RTN_MULTICAST;
2451 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2452 if (c->mfc_flags & MFC_STATIC)
2453 rtm->rtm_protocol = RTPROT_STATIC;
2454 else
2455 rtm->rtm_protocol = RTPROT_MROUTED;
2456 rtm->rtm_flags = 0;
2457
2458 if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) ||
2459 nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp))
2460 goto nla_put_failure;
2461 err = __ipmr_fill_mroute(mrt, skb, c, rtm);
2462 /* do not break the dump if cache is unresolved */
2463 if (err < 0 && err != -ENOENT)
2464 goto nla_put_failure;
2465
2466 nlmsg_end(skb, nlh);
2467 return 0;
2468
2469 nla_put_failure:
2470 nlmsg_cancel(skb, nlh);
2471 return -EMSGSIZE;
2472 }
2473
2474 static size_t mroute_msgsize(bool unresolved, int maxvif)
2475 {
2476 size_t len =
2477 NLMSG_ALIGN(sizeof(struct rtmsg))
2478 + nla_total_size(4) /* RTA_TABLE */
2479 + nla_total_size(4) /* RTA_SRC */
2480 + nla_total_size(4) /* RTA_DST */
2481 ;
2482
2483 if (!unresolved)
2484 len = len
2485 + nla_total_size(4) /* RTA_IIF */
2486 + nla_total_size(0) /* RTA_MULTIPATH */
2487 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2488 /* RTA_MFC_STATS */
2489 + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2490 ;
2491
2492 return len;
2493 }
2494
2495 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
2496 int cmd)
2497 {
2498 struct net *net = read_pnet(&mrt->net);
2499 struct sk_buff *skb;
2500 int err = -ENOBUFS;
2501
2502 skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif),
2503 GFP_ATOMIC);
2504 if (!skb)
2505 goto errout;
2506
2507 err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2508 if (err < 0)
2509 goto errout;
2510
2511 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC);
2512 return;
2513
2514 errout:
2515 kfree_skb(skb);
2516 if (err < 0)
2517 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err);
2518 }
2519
2520 static size_t igmpmsg_netlink_msgsize(size_t payloadlen)
2521 {
2522 size_t len =
2523 NLMSG_ALIGN(sizeof(struct rtgenmsg))
2524 + nla_total_size(1) /* IPMRA_CREPORT_MSGTYPE */
2525 + nla_total_size(4) /* IPMRA_CREPORT_VIF_ID */
2526 + nla_total_size(4) /* IPMRA_CREPORT_SRC_ADDR */
2527 + nla_total_size(4) /* IPMRA_CREPORT_DST_ADDR */
2528 /* IPMRA_CREPORT_PKT */
2529 + nla_total_size(payloadlen)
2530 ;
2531
2532 return len;
2533 }
2534
2535 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2536 {
2537 struct net *net = read_pnet(&mrt->net);
2538 struct nlmsghdr *nlh;
2539 struct rtgenmsg *rtgenm;
2540 struct igmpmsg *msg;
2541 struct sk_buff *skb;
2542 struct nlattr *nla;
2543 int payloadlen;
2544
2545 payloadlen = pkt->len - sizeof(struct igmpmsg);
2546 msg = (struct igmpmsg *)skb_network_header(pkt);
2547
2548 skb = nlmsg_new(igmpmsg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2549 if (!skb)
2550 goto errout;
2551
2552 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2553 sizeof(struct rtgenmsg), 0);
2554 if (!nlh)
2555 goto errout;
2556 rtgenm = nlmsg_data(nlh);
2557 rtgenm->rtgen_family = RTNL_FAMILY_IPMR;
2558 if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) ||
2559 nla_put_u32(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif) ||
2560 nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR,
2561 msg->im_src.s_addr) ||
2562 nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR,
2563 msg->im_dst.s_addr))
2564 goto nla_put_failure;
2565
2566 nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen);
2567 if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg),
2568 nla_data(nla), payloadlen))
2569 goto nla_put_failure;
2570
2571 nlmsg_end(skb, nlh);
2572
2573 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC);
2574 return;
2575
2576 nla_put_failure:
2577 nlmsg_cancel(skb, nlh);
2578 errout:
2579 kfree_skb(skb);
2580 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS);
2581 }
2582
2583 static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2584 struct netlink_ext_ack *extack)
2585 {
2586 struct net *net = sock_net(in_skb->sk);
2587 struct nlattr *tb[RTA_MAX + 1];
2588 struct sk_buff *skb = NULL;
2589 struct mfc_cache *cache;
2590 struct mr_table *mrt;
2591 struct rtmsg *rtm;
2592 __be32 src, grp;
2593 u32 tableid;
2594 int err;
2595
2596 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
2597 rtm_ipv4_policy, extack);
2598 if (err < 0)
2599 goto errout;
2600
2601 rtm = nlmsg_data(nlh);
2602
2603 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
2604 grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
2605 tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
2606
2607 mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT);
2608 if (!mrt) {
2609 err = -ENOENT;
2610 goto errout_free;
2611 }
2612
2613 /* entries are added/deleted only under RTNL */
2614 rcu_read_lock();
2615 cache = ipmr_cache_find(mrt, src, grp);
2616 rcu_read_unlock();
2617 if (!cache) {
2618 err = -ENOENT;
2619 goto errout_free;
2620 }
2621
2622 skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL);
2623 if (!skb) {
2624 err = -ENOBUFS;
2625 goto errout_free;
2626 }
2627
2628 err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
2629 nlh->nlmsg_seq, cache,
2630 RTM_NEWROUTE, 0);
2631 if (err < 0)
2632 goto errout_free;
2633
2634 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2635
2636 errout:
2637 return err;
2638
2639 errout_free:
2640 kfree_skb(skb);
2641 goto errout;
2642 }
2643
2644 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2645 {
2646 struct net *net = sock_net(skb->sk);
2647 struct mr_table *mrt;
2648 struct mfc_cache *mfc;
2649 unsigned int t = 0, s_t;
2650 unsigned int e = 0, s_e;
2651
2652 s_t = cb->args[0];
2653 s_e = cb->args[1];
2654
2655 rcu_read_lock();
2656 ipmr_for_each_table(mrt, net) {
2657 if (t < s_t)
2658 goto next_table;
2659 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
2660 if (e < s_e)
2661 goto next_entry;
2662 if (ipmr_fill_mroute(mrt, skb,
2663 NETLINK_CB(cb->skb).portid,
2664 cb->nlh->nlmsg_seq,
2665 mfc, RTM_NEWROUTE,
2666 NLM_F_MULTI) < 0)
2667 goto done;
2668 next_entry:
2669 e++;
2670 }
2671 e = 0;
2672 s_e = 0;
2673
2674 spin_lock_bh(&mfc_unres_lock);
2675 list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
2676 if (e < s_e)
2677 goto next_entry2;
2678 if (ipmr_fill_mroute(mrt, skb,
2679 NETLINK_CB(cb->skb).portid,
2680 cb->nlh->nlmsg_seq,
2681 mfc, RTM_NEWROUTE,
2682 NLM_F_MULTI) < 0) {
2683 spin_unlock_bh(&mfc_unres_lock);
2684 goto done;
2685 }
2686 next_entry2:
2687 e++;
2688 }
2689 spin_unlock_bh(&mfc_unres_lock);
2690 e = 0;
2691 s_e = 0;
2692 next_table:
2693 t++;
2694 }
2695 done:
2696 rcu_read_unlock();
2697
2698 cb->args[1] = e;
2699 cb->args[0] = t;
2700
2701 return skb->len;
2702 }
2703
2704 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
2705 [RTA_SRC] = { .type = NLA_U32 },
2706 [RTA_DST] = { .type = NLA_U32 },
2707 [RTA_IIF] = { .type = NLA_U32 },
2708 [RTA_TABLE] = { .type = NLA_U32 },
2709 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2710 };
2711
2712 static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol)
2713 {
2714 switch (rtm_protocol) {
2715 case RTPROT_STATIC:
2716 case RTPROT_MROUTED:
2717 return true;
2718 }
2719 return false;
2720 }
2721
2722 static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc)
2723 {
2724 struct rtnexthop *rtnh = nla_data(nla);
2725 int remaining = nla_len(nla), vifi = 0;
2726
2727 while (rtnh_ok(rtnh, remaining)) {
2728 mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops;
2729 if (++vifi == MAXVIFS)
2730 break;
2731 rtnh = rtnh_next(rtnh, &remaining);
2732 }
2733
2734 return remaining > 0 ? -EINVAL : vifi;
2735 }
2736
2737 /* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */
2738 static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh,
2739 struct mfcctl *mfcc, int *mrtsock,
2740 struct mr_table **mrtret,
2741 struct netlink_ext_ack *extack)
2742 {
2743 struct net_device *dev = NULL;
2744 u32 tblid = RT_TABLE_DEFAULT;
2745 struct mr_table *mrt;
2746 struct nlattr *attr;
2747 struct rtmsg *rtm;
2748 int ret, rem;
2749
2750 ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy,
2751 extack);
2752 if (ret < 0)
2753 goto out;
2754 rtm = nlmsg_data(nlh);
2755
2756 ret = -EINVAL;
2757 if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 ||
2758 rtm->rtm_type != RTN_MULTICAST ||
2759 rtm->rtm_scope != RT_SCOPE_UNIVERSE ||
2760 !ipmr_rtm_validate_proto(rtm->rtm_protocol))
2761 goto out;
2762
2763 memset(mfcc, 0, sizeof(*mfcc));
2764 mfcc->mfcc_parent = -1;
2765 ret = 0;
2766 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) {
2767 switch (nla_type(attr)) {
2768 case RTA_SRC:
2769 mfcc->mfcc_origin.s_addr = nla_get_be32(attr);
2770 break;
2771 case RTA_DST:
2772 mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr);
2773 break;
2774 case RTA_IIF:
2775 dev = __dev_get_by_index(net, nla_get_u32(attr));
2776 if (!dev) {
2777 ret = -ENODEV;
2778 goto out;
2779 }
2780 break;
2781 case RTA_MULTIPATH:
2782 if (ipmr_nla_get_ttls(attr, mfcc) < 0) {
2783 ret = -EINVAL;
2784 goto out;
2785 }
2786 break;
2787 case RTA_PREFSRC:
2788 ret = 1;
2789 break;
2790 case RTA_TABLE:
2791 tblid = nla_get_u32(attr);
2792 break;
2793 }
2794 }
2795 mrt = ipmr_get_table(net, tblid);
2796 if (!mrt) {
2797 ret = -ENOENT;
2798 goto out;
2799 }
2800 *mrtret = mrt;
2801 *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0;
2802 if (dev)
2803 mfcc->mfcc_parent = ipmr_find_vif(mrt, dev);
2804
2805 out:
2806 return ret;
2807 }
2808
2809 /* takes care of both newroute and delroute */
2810 static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh,
2811 struct netlink_ext_ack *extack)
2812 {
2813 struct net *net = sock_net(skb->sk);
2814 int ret, mrtsock, parent;
2815 struct mr_table *tbl;
2816 struct mfcctl mfcc;
2817
2818 mrtsock = 0;
2819 tbl = NULL;
2820 ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack);
2821 if (ret < 0)
2822 return ret;
2823
2824 parent = ret ? mfcc.mfcc_parent : -1;
2825 if (nlh->nlmsg_type == RTM_NEWROUTE)
2826 return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent);
2827 else
2828 return ipmr_mfc_delete(tbl, &mfcc, parent);
2829 }
2830
2831 static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb)
2832 {
2833 u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len);
2834
2835 if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) ||
2836 nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) ||
2837 nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM,
2838 mrt->mroute_reg_vif_num) ||
2839 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT,
2840 mrt->mroute_do_assert) ||
2841 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim))
2842 return false;
2843
2844 return true;
2845 }
2846
2847 static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb)
2848 {
2849 struct nlattr *vif_nest;
2850 struct vif_device *vif;
2851
2852 /* if the VIF doesn't exist just continue */
2853 if (!VIF_EXISTS(mrt, vifid))
2854 return true;
2855
2856 vif = &mrt->vif_table[vifid];
2857 vif_nest = nla_nest_start(skb, IPMRA_VIF);
2858 if (!vif_nest)
2859 return false;
2860 if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) ||
2861 nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) ||
2862 nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) ||
2863 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in,
2864 IPMRA_VIFA_PAD) ||
2865 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, vif->bytes_out,
2866 IPMRA_VIFA_PAD) ||
2867 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, vif->pkt_in,
2868 IPMRA_VIFA_PAD) ||
2869 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, vif->pkt_out,
2870 IPMRA_VIFA_PAD) ||
2871 nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) ||
2872 nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) {
2873 nla_nest_cancel(skb, vif_nest);
2874 return false;
2875 }
2876 nla_nest_end(skb, vif_nest);
2877
2878 return true;
2879 }
2880
2881 static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb)
2882 {
2883 struct net *net = sock_net(skb->sk);
2884 struct nlmsghdr *nlh = NULL;
2885 unsigned int t = 0, s_t;
2886 unsigned int e = 0, s_e;
2887 struct mr_table *mrt;
2888
2889 s_t = cb->args[0];
2890 s_e = cb->args[1];
2891
2892 ipmr_for_each_table(mrt, net) {
2893 struct nlattr *vifs, *af;
2894 struct ifinfomsg *hdr;
2895 u32 i;
2896
2897 if (t < s_t)
2898 goto skip_table;
2899 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
2900 cb->nlh->nlmsg_seq, RTM_NEWLINK,
2901 sizeof(*hdr), NLM_F_MULTI);
2902 if (!nlh)
2903 break;
2904
2905 hdr = nlmsg_data(nlh);
2906 memset(hdr, 0, sizeof(*hdr));
2907 hdr->ifi_family = RTNL_FAMILY_IPMR;
2908
2909 af = nla_nest_start(skb, IFLA_AF_SPEC);
2910 if (!af) {
2911 nlmsg_cancel(skb, nlh);
2912 goto out;
2913 }
2914
2915 if (!ipmr_fill_table(mrt, skb)) {
2916 nlmsg_cancel(skb, nlh);
2917 goto out;
2918 }
2919
2920 vifs = nla_nest_start(skb, IPMRA_TABLE_VIFS);
2921 if (!vifs) {
2922 nla_nest_end(skb, af);
2923 nlmsg_end(skb, nlh);
2924 goto out;
2925 }
2926 for (i = 0; i < mrt->maxvif; i++) {
2927 if (e < s_e)
2928 goto skip_entry;
2929 if (!ipmr_fill_vif(mrt, i, skb)) {
2930 nla_nest_end(skb, vifs);
2931 nla_nest_end(skb, af);
2932 nlmsg_end(skb, nlh);
2933 goto out;
2934 }
2935 skip_entry:
2936 e++;
2937 }
2938 s_e = 0;
2939 e = 0;
2940 nla_nest_end(skb, vifs);
2941 nla_nest_end(skb, af);
2942 nlmsg_end(skb, nlh);
2943 skip_table:
2944 t++;
2945 }
2946
2947 out:
2948 cb->args[1] = e;
2949 cb->args[0] = t;
2950
2951 return skb->len;
2952 }
2953
2954 #ifdef CONFIG_PROC_FS
2955 /* The /proc interfaces to multicast routing :
2956 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
2957 */
2958 struct ipmr_vif_iter {
2959 struct seq_net_private p;
2960 struct mr_table *mrt;
2961 int ct;
2962 };
2963
2964 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2965 struct ipmr_vif_iter *iter,
2966 loff_t pos)
2967 {
2968 struct mr_table *mrt = iter->mrt;
2969
2970 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2971 if (!VIF_EXISTS(mrt, iter->ct))
2972 continue;
2973 if (pos-- == 0)
2974 return &mrt->vif_table[iter->ct];
2975 }
2976 return NULL;
2977 }
2978
2979 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
2980 __acquires(mrt_lock)
2981 {
2982 struct ipmr_vif_iter *iter = seq->private;
2983 struct net *net = seq_file_net(seq);
2984 struct mr_table *mrt;
2985
2986 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2987 if (!mrt)
2988 return ERR_PTR(-ENOENT);
2989
2990 iter->mrt = mrt;
2991
2992 read_lock(&mrt_lock);
2993 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
2994 : SEQ_START_TOKEN;
2995 }
2996
2997 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2998 {
2999 struct ipmr_vif_iter *iter = seq->private;
3000 struct net *net = seq_file_net(seq);
3001 struct mr_table *mrt = iter->mrt;
3002
3003 ++*pos;
3004 if (v == SEQ_START_TOKEN)
3005 return ipmr_vif_seq_idx(net, iter, 0);
3006
3007 while (++iter->ct < mrt->maxvif) {
3008 if (!VIF_EXISTS(mrt, iter->ct))
3009 continue;
3010 return &mrt->vif_table[iter->ct];
3011 }
3012 return NULL;
3013 }
3014
3015 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
3016 __releases(mrt_lock)
3017 {
3018 read_unlock(&mrt_lock);
3019 }
3020
3021 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
3022 {
3023 struct ipmr_vif_iter *iter = seq->private;
3024 struct mr_table *mrt = iter->mrt;
3025
3026 if (v == SEQ_START_TOKEN) {
3027 seq_puts(seq,
3028 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
3029 } else {
3030 const struct vif_device *vif = v;
3031 const char *name = vif->dev ? vif->dev->name : "none";
3032
3033 seq_printf(seq,
3034 "%2zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
3035 vif - mrt->vif_table,
3036 name, vif->bytes_in, vif->pkt_in,
3037 vif->bytes_out, vif->pkt_out,
3038 vif->flags, vif->local, vif->remote);
3039 }
3040 return 0;
3041 }
3042
3043 static const struct seq_operations ipmr_vif_seq_ops = {
3044 .start = ipmr_vif_seq_start,
3045 .next = ipmr_vif_seq_next,
3046 .stop = ipmr_vif_seq_stop,
3047 .show = ipmr_vif_seq_show,
3048 };
3049
3050 static int ipmr_vif_open(struct inode *inode, struct file *file)
3051 {
3052 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
3053 sizeof(struct ipmr_vif_iter));
3054 }
3055
3056 static const struct file_operations ipmr_vif_fops = {
3057 .owner = THIS_MODULE,
3058 .open = ipmr_vif_open,
3059 .read = seq_read,
3060 .llseek = seq_lseek,
3061 .release = seq_release_net,
3062 };
3063
3064 struct ipmr_mfc_iter {
3065 struct seq_net_private p;
3066 struct mr_table *mrt;
3067 struct list_head *cache;
3068 };
3069
3070 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
3071 struct ipmr_mfc_iter *it, loff_t pos)
3072 {
3073 struct mr_table *mrt = it->mrt;
3074 struct mfc_cache *mfc;
3075
3076 rcu_read_lock();
3077 it->cache = &mrt->mfc_cache_list;
3078 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
3079 if (pos-- == 0)
3080 return mfc;
3081 rcu_read_unlock();
3082
3083 spin_lock_bh(&mfc_unres_lock);
3084 it->cache = &mrt->mfc_unres_queue;
3085 list_for_each_entry(mfc, it->cache, list)
3086 if (pos-- == 0)
3087 return mfc;
3088 spin_unlock_bh(&mfc_unres_lock);
3089
3090 it->cache = NULL;
3091 return NULL;
3092 }
3093
3094
3095 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
3096 {
3097 struct ipmr_mfc_iter *it = seq->private;
3098 struct net *net = seq_file_net(seq);
3099 struct mr_table *mrt;
3100
3101 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
3102 if (!mrt)
3103 return ERR_PTR(-ENOENT);
3104
3105 it->mrt = mrt;
3106 it->cache = NULL;
3107 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
3108 : SEQ_START_TOKEN;
3109 }
3110
3111 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3112 {
3113 struct ipmr_mfc_iter *it = seq->private;
3114 struct net *net = seq_file_net(seq);
3115 struct mr_table *mrt = it->mrt;
3116 struct mfc_cache *mfc = v;
3117
3118 ++*pos;
3119
3120 if (v == SEQ_START_TOKEN)
3121 return ipmr_mfc_seq_idx(net, seq->private, 0);
3122
3123 if (mfc->list.next != it->cache)
3124 return list_entry(mfc->list.next, struct mfc_cache, list);
3125
3126 if (it->cache == &mrt->mfc_unres_queue)
3127 goto end_of_list;
3128
3129 /* exhausted cache_array, show unresolved */
3130 rcu_read_unlock();
3131 it->cache = &mrt->mfc_unres_queue;
3132
3133 spin_lock_bh(&mfc_unres_lock);
3134 if (!list_empty(it->cache))
3135 return list_first_entry(it->cache, struct mfc_cache, list);
3136
3137 end_of_list:
3138 spin_unlock_bh(&mfc_unres_lock);
3139 it->cache = NULL;
3140
3141 return NULL;
3142 }
3143
3144 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
3145 {
3146 struct ipmr_mfc_iter *it = seq->private;
3147 struct mr_table *mrt = it->mrt;
3148
3149 if (it->cache == &mrt->mfc_unres_queue)
3150 spin_unlock_bh(&mfc_unres_lock);
3151 else if (it->cache == &mrt->mfc_cache_list)
3152 rcu_read_unlock();
3153 }
3154
3155 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
3156 {
3157 int n;
3158
3159 if (v == SEQ_START_TOKEN) {
3160 seq_puts(seq,
3161 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
3162 } else {
3163 const struct mfc_cache *mfc = v;
3164 const struct ipmr_mfc_iter *it = seq->private;
3165 const struct mr_table *mrt = it->mrt;
3166
3167 seq_printf(seq, "%08X %08X %-3hd",
3168 (__force u32) mfc->mfc_mcastgrp,
3169 (__force u32) mfc->mfc_origin,
3170 mfc->mfc_parent);
3171
3172 if (it->cache != &mrt->mfc_unres_queue) {
3173 seq_printf(seq, " %8lu %8lu %8lu",
3174 mfc->mfc_un.res.pkt,
3175 mfc->mfc_un.res.bytes,
3176 mfc->mfc_un.res.wrong_if);
3177 for (n = mfc->mfc_un.res.minvif;
3178 n < mfc->mfc_un.res.maxvif; n++) {
3179 if (VIF_EXISTS(mrt, n) &&
3180 mfc->mfc_un.res.ttls[n] < 255)
3181 seq_printf(seq,
3182 " %2d:%-3d",
3183 n, mfc->mfc_un.res.ttls[n]);
3184 }
3185 } else {
3186 /* unresolved mfc_caches don't contain
3187 * pkt, bytes and wrong_if values
3188 */
3189 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
3190 }
3191 seq_putc(seq, '\n');
3192 }
3193 return 0;
3194 }
3195
3196 static const struct seq_operations ipmr_mfc_seq_ops = {
3197 .start = ipmr_mfc_seq_start,
3198 .next = ipmr_mfc_seq_next,
3199 .stop = ipmr_mfc_seq_stop,
3200 .show = ipmr_mfc_seq_show,
3201 };
3202
3203 static int ipmr_mfc_open(struct inode *inode, struct file *file)
3204 {
3205 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
3206 sizeof(struct ipmr_mfc_iter));
3207 }
3208
3209 static const struct file_operations ipmr_mfc_fops = {
3210 .owner = THIS_MODULE,
3211 .open = ipmr_mfc_open,
3212 .read = seq_read,
3213 .llseek = seq_lseek,
3214 .release = seq_release_net,
3215 };
3216 #endif
3217
3218 #ifdef CONFIG_IP_PIMSM_V2
3219 static const struct net_protocol pim_protocol = {
3220 .handler = pim_rcv,
3221 .netns_ok = 1,
3222 };
3223 #endif
3224
3225 static unsigned int ipmr_seq_read(struct net *net)
3226 {
3227 ASSERT_RTNL();
3228
3229 return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net);
3230 }
3231
3232 static int ipmr_dump(struct net *net, struct notifier_block *nb)
3233 {
3234 struct mr_table *mrt;
3235 int err;
3236
3237 err = ipmr_rules_dump(net, nb);
3238 if (err)
3239 return err;
3240
3241 ipmr_for_each_table(mrt, net) {
3242 struct vif_device *v = &mrt->vif_table[0];
3243 struct mfc_cache *mfc;
3244 int vifi;
3245
3246 /* Notifiy on table VIF entries */
3247 read_lock(&mrt_lock);
3248 for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
3249 if (!v->dev)
3250 continue;
3251
3252 call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD,
3253 v, vifi, mrt->id);
3254 }
3255 read_unlock(&mrt_lock);
3256
3257 /* Notify on table MFC entries */
3258 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
3259 call_ipmr_mfc_entry_notifier(nb, net,
3260 FIB_EVENT_ENTRY_ADD, mfc,
3261 mrt->id);
3262 }
3263
3264 return 0;
3265 }
3266
3267 static const struct fib_notifier_ops ipmr_notifier_ops_template = {
3268 .family = RTNL_FAMILY_IPMR,
3269 .fib_seq_read = ipmr_seq_read,
3270 .fib_dump = ipmr_dump,
3271 .owner = THIS_MODULE,
3272 };
3273
3274 static int __net_init ipmr_notifier_init(struct net *net)
3275 {
3276 struct fib_notifier_ops *ops;
3277
3278 net->ipv4.ipmr_seq = 0;
3279
3280 ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net);
3281 if (IS_ERR(ops))
3282 return PTR_ERR(ops);
3283 net->ipv4.ipmr_notifier_ops = ops;
3284
3285 return 0;
3286 }
3287
3288 static void __net_exit ipmr_notifier_exit(struct net *net)
3289 {
3290 fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops);
3291 net->ipv4.ipmr_notifier_ops = NULL;
3292 }
3293
3294 /* Setup for IP multicast routing */
3295 static int __net_init ipmr_net_init(struct net *net)
3296 {
3297 int err;
3298
3299 err = ipmr_notifier_init(net);
3300 if (err)
3301 goto ipmr_notifier_fail;
3302
3303 err = ipmr_rules_init(net);
3304 if (err < 0)
3305 goto ipmr_rules_fail;
3306
3307 #ifdef CONFIG_PROC_FS
3308 err = -ENOMEM;
3309 if (!proc_create("ip_mr_vif", 0, net->proc_net, &ipmr_vif_fops))
3310 goto proc_vif_fail;
3311 if (!proc_create("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_fops))
3312 goto proc_cache_fail;
3313 #endif
3314 return 0;
3315
3316 #ifdef CONFIG_PROC_FS
3317 proc_cache_fail:
3318 remove_proc_entry("ip_mr_vif", net->proc_net);
3319 proc_vif_fail:
3320 ipmr_rules_exit(net);
3321 #endif
3322 ipmr_rules_fail:
3323 ipmr_notifier_exit(net);
3324 ipmr_notifier_fail:
3325 return err;
3326 }
3327
3328 static void __net_exit ipmr_net_exit(struct net *net)
3329 {
3330 #ifdef CONFIG_PROC_FS
3331 remove_proc_entry("ip_mr_cache", net->proc_net);
3332 remove_proc_entry("ip_mr_vif", net->proc_net);
3333 #endif
3334 ipmr_notifier_exit(net);
3335 ipmr_rules_exit(net);
3336 }
3337
3338 static struct pernet_operations ipmr_net_ops = {
3339 .init = ipmr_net_init,
3340 .exit = ipmr_net_exit,
3341 };
3342
3343 int __init ip_mr_init(void)
3344 {
3345 int err;
3346
3347 mrt_cachep = kmem_cache_create("ip_mrt_cache",
3348 sizeof(struct mfc_cache),
3349 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
3350 NULL);
3351
3352 err = register_pernet_subsys(&ipmr_net_ops);
3353 if (err)
3354 goto reg_pernet_fail;
3355
3356 err = register_netdevice_notifier(&ip_mr_notifier);
3357 if (err)
3358 goto reg_notif_fail;
3359 #ifdef CONFIG_IP_PIMSM_V2
3360 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
3361 pr_err("%s: can't add PIM protocol\n", __func__);
3362 err = -EAGAIN;
3363 goto add_proto_fail;
3364 }
3365 #endif
3366 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE,
3367 ipmr_rtm_getroute, ipmr_rtm_dumproute, 0);
3368 rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE,
3369 ipmr_rtm_route, NULL, 0);
3370 rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE,
3371 ipmr_rtm_route, NULL, 0);
3372
3373 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK,
3374 NULL, ipmr_rtm_dumplink, 0);
3375 return 0;
3376
3377 #ifdef CONFIG_IP_PIMSM_V2
3378 add_proto_fail:
3379 unregister_netdevice_notifier(&ip_mr_notifier);
3380 #endif
3381 reg_notif_fail:
3382 unregister_pernet_subsys(&ipmr_net_ops);
3383 reg_pernet_fail:
3384 kmem_cache_destroy(mrt_cachep);
3385 return err;
3386 }