]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - net/ipv6/ip6mr.c
crypto: algif - explicitly mark end of data
[mirror_ubuntu-bionic-kernel.git] / net / ipv6 / ip6mr.c
1 /*
2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
4 *
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 * 6WIND, Paris, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19 #include <asm/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/timer.h>
24 #include <linux/mm.h>
25 #include <linux/kernel.h>
26 #include <linux/fcntl.h>
27 #include <linux/stat.h>
28 #include <linux/socket.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/inetdevice.h>
32 #include <linux/proc_fs.h>
33 #include <linux/seq_file.h>
34 #include <linux/init.h>
35 #include <linux/slab.h>
36 #include <linux/compat.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
39 #include <net/sock.h>
40 #include <net/raw.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
45 #include <net/fib_rules.h>
46
47 #include <net/ipv6.h>
48 #include <net/ip6_route.h>
49 #include <linux/mroute6.h>
50 #include <linux/pim.h>
51 #include <net/addrconf.h>
52 #include <linux/netfilter_ipv6.h>
53 #include <linux/export.h>
54 #include <net/ip6_checksum.h>
55 #include <linux/netconf.h>
56
57 struct mr6_table {
58 struct list_head list;
59 possible_net_t net;
60 u32 id;
61 struct sock *mroute6_sk;
62 struct timer_list ipmr_expire_timer;
63 struct list_head mfc6_unres_queue;
64 struct list_head mfc6_cache_array[MFC6_LINES];
65 struct mif_device vif6_table[MAXMIFS];
66 int maxvif;
67 atomic_t cache_resolve_queue_len;
68 bool mroute_do_assert;
69 bool mroute_do_pim;
70 #ifdef CONFIG_IPV6_PIMSM_V2
71 int mroute_reg_vif_num;
72 #endif
73 };
74
75 struct ip6mr_rule {
76 struct fib_rule common;
77 };
78
79 struct ip6mr_result {
80 struct mr6_table *mrt;
81 };
82
83 /* Big lock, protecting vif table, mrt cache and mroute socket state.
84 Note that the changes are semaphored via rtnl_lock.
85 */
86
87 static DEFINE_RWLOCK(mrt_lock);
88
89 /*
90 * Multicast router control variables
91 */
92
93 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
94
95 /* Special spinlock for queue of unresolved entries */
96 static DEFINE_SPINLOCK(mfc_unres_lock);
97
98 /* We return to original Alan's scheme. Hash table of resolved
99 entries is changed only in process context and protected
100 with weak lock mrt_lock. Queue of unresolved entries is protected
101 with strong spinlock mfc_unres_lock.
102
103 In this case data path is free of exclusive locks at all.
104 */
105
106 static struct kmem_cache *mrt_cachep __read_mostly;
107
108 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
109 static void ip6mr_free_table(struct mr6_table *mrt);
110
111 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
112 struct sk_buff *skb, struct mfc6_cache *cache);
113 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
114 mifi_t mifi, int assert);
115 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
116 struct mfc6_cache *c, struct rtmsg *rtm);
117 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
118 int cmd);
119 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
120 struct netlink_callback *cb);
121 static void mroute_clean_tables(struct mr6_table *mrt);
122 static void ipmr_expire_process(unsigned long arg);
123
124 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
125 #define ip6mr_for_each_table(mrt, net) \
126 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
127
128 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
129 {
130 struct mr6_table *mrt;
131
132 ip6mr_for_each_table(mrt, net) {
133 if (mrt->id == id)
134 return mrt;
135 }
136 return NULL;
137 }
138
139 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
140 struct mr6_table **mrt)
141 {
142 int err;
143 struct ip6mr_result res;
144 struct fib_lookup_arg arg = {
145 .result = &res,
146 .flags = FIB_LOOKUP_NOREF,
147 };
148
149 err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
150 flowi6_to_flowi(flp6), 0, &arg);
151 if (err < 0)
152 return err;
153 *mrt = res.mrt;
154 return 0;
155 }
156
157 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
158 int flags, struct fib_lookup_arg *arg)
159 {
160 struct ip6mr_result *res = arg->result;
161 struct mr6_table *mrt;
162
163 switch (rule->action) {
164 case FR_ACT_TO_TBL:
165 break;
166 case FR_ACT_UNREACHABLE:
167 return -ENETUNREACH;
168 case FR_ACT_PROHIBIT:
169 return -EACCES;
170 case FR_ACT_BLACKHOLE:
171 default:
172 return -EINVAL;
173 }
174
175 mrt = ip6mr_get_table(rule->fr_net, rule->table);
176 if (!mrt)
177 return -EAGAIN;
178 res->mrt = mrt;
179 return 0;
180 }
181
182 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
183 {
184 return 1;
185 }
186
187 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
188 FRA_GENERIC_POLICY,
189 };
190
191 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
192 struct fib_rule_hdr *frh, struct nlattr **tb)
193 {
194 return 0;
195 }
196
197 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
198 struct nlattr **tb)
199 {
200 return 1;
201 }
202
203 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
204 struct fib_rule_hdr *frh)
205 {
206 frh->dst_len = 0;
207 frh->src_len = 0;
208 frh->tos = 0;
209 return 0;
210 }
211
212 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
213 .family = RTNL_FAMILY_IP6MR,
214 .rule_size = sizeof(struct ip6mr_rule),
215 .addr_size = sizeof(struct in6_addr),
216 .action = ip6mr_rule_action,
217 .match = ip6mr_rule_match,
218 .configure = ip6mr_rule_configure,
219 .compare = ip6mr_rule_compare,
220 .default_pref = fib_default_rule_pref,
221 .fill = ip6mr_rule_fill,
222 .nlgroup = RTNLGRP_IPV6_RULE,
223 .policy = ip6mr_rule_policy,
224 .owner = THIS_MODULE,
225 };
226
227 static int __net_init ip6mr_rules_init(struct net *net)
228 {
229 struct fib_rules_ops *ops;
230 struct mr6_table *mrt;
231 int err;
232
233 ops = fib_rules_register(&ip6mr_rules_ops_template, net);
234 if (IS_ERR(ops))
235 return PTR_ERR(ops);
236
237 INIT_LIST_HEAD(&net->ipv6.mr6_tables);
238
239 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
240 if (!mrt) {
241 err = -ENOMEM;
242 goto err1;
243 }
244
245 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
246 if (err < 0)
247 goto err2;
248
249 net->ipv6.mr6_rules_ops = ops;
250 return 0;
251
252 err2:
253 kfree(mrt);
254 err1:
255 fib_rules_unregister(ops);
256 return err;
257 }
258
259 static void __net_exit ip6mr_rules_exit(struct net *net)
260 {
261 struct mr6_table *mrt, *next;
262
263 rtnl_lock();
264 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
265 list_del(&mrt->list);
266 ip6mr_free_table(mrt);
267 }
268 rtnl_unlock();
269 fib_rules_unregister(net->ipv6.mr6_rules_ops);
270 }
271 #else
272 #define ip6mr_for_each_table(mrt, net) \
273 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
274
275 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
276 {
277 return net->ipv6.mrt6;
278 }
279
280 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
281 struct mr6_table **mrt)
282 {
283 *mrt = net->ipv6.mrt6;
284 return 0;
285 }
286
287 static int __net_init ip6mr_rules_init(struct net *net)
288 {
289 net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
290 return net->ipv6.mrt6 ? 0 : -ENOMEM;
291 }
292
293 static void __net_exit ip6mr_rules_exit(struct net *net)
294 {
295 rtnl_lock();
296 ip6mr_free_table(net->ipv6.mrt6);
297 net->ipv6.mrt6 = NULL;
298 rtnl_unlock();
299 }
300 #endif
301
302 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
303 {
304 struct mr6_table *mrt;
305 unsigned int i;
306
307 mrt = ip6mr_get_table(net, id);
308 if (mrt)
309 return mrt;
310
311 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
312 if (!mrt)
313 return NULL;
314 mrt->id = id;
315 write_pnet(&mrt->net, net);
316
317 /* Forwarding cache */
318 for (i = 0; i < MFC6_LINES; i++)
319 INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
320
321 INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
322
323 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
324 (unsigned long)mrt);
325
326 #ifdef CONFIG_IPV6_PIMSM_V2
327 mrt->mroute_reg_vif_num = -1;
328 #endif
329 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
330 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
331 #endif
332 return mrt;
333 }
334
335 static void ip6mr_free_table(struct mr6_table *mrt)
336 {
337 del_timer(&mrt->ipmr_expire_timer);
338 mroute_clean_tables(mrt);
339 kfree(mrt);
340 }
341
342 #ifdef CONFIG_PROC_FS
343
344 struct ipmr_mfc_iter {
345 struct seq_net_private p;
346 struct mr6_table *mrt;
347 struct list_head *cache;
348 int ct;
349 };
350
351
352 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
353 struct ipmr_mfc_iter *it, loff_t pos)
354 {
355 struct mr6_table *mrt = it->mrt;
356 struct mfc6_cache *mfc;
357
358 read_lock(&mrt_lock);
359 for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
360 it->cache = &mrt->mfc6_cache_array[it->ct];
361 list_for_each_entry(mfc, it->cache, list)
362 if (pos-- == 0)
363 return mfc;
364 }
365 read_unlock(&mrt_lock);
366
367 spin_lock_bh(&mfc_unres_lock);
368 it->cache = &mrt->mfc6_unres_queue;
369 list_for_each_entry(mfc, it->cache, list)
370 if (pos-- == 0)
371 return mfc;
372 spin_unlock_bh(&mfc_unres_lock);
373
374 it->cache = NULL;
375 return NULL;
376 }
377
378 /*
379 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
380 */
381
382 struct ipmr_vif_iter {
383 struct seq_net_private p;
384 struct mr6_table *mrt;
385 int ct;
386 };
387
388 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
389 struct ipmr_vif_iter *iter,
390 loff_t pos)
391 {
392 struct mr6_table *mrt = iter->mrt;
393
394 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
395 if (!MIF_EXISTS(mrt, iter->ct))
396 continue;
397 if (pos-- == 0)
398 return &mrt->vif6_table[iter->ct];
399 }
400 return NULL;
401 }
402
403 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
404 __acquires(mrt_lock)
405 {
406 struct ipmr_vif_iter *iter = seq->private;
407 struct net *net = seq_file_net(seq);
408 struct mr6_table *mrt;
409
410 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
411 if (!mrt)
412 return ERR_PTR(-ENOENT);
413
414 iter->mrt = mrt;
415
416 read_lock(&mrt_lock);
417 return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
418 : SEQ_START_TOKEN;
419 }
420
421 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
422 {
423 struct ipmr_vif_iter *iter = seq->private;
424 struct net *net = seq_file_net(seq);
425 struct mr6_table *mrt = iter->mrt;
426
427 ++*pos;
428 if (v == SEQ_START_TOKEN)
429 return ip6mr_vif_seq_idx(net, iter, 0);
430
431 while (++iter->ct < mrt->maxvif) {
432 if (!MIF_EXISTS(mrt, iter->ct))
433 continue;
434 return &mrt->vif6_table[iter->ct];
435 }
436 return NULL;
437 }
438
439 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
440 __releases(mrt_lock)
441 {
442 read_unlock(&mrt_lock);
443 }
444
445 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
446 {
447 struct ipmr_vif_iter *iter = seq->private;
448 struct mr6_table *mrt = iter->mrt;
449
450 if (v == SEQ_START_TOKEN) {
451 seq_puts(seq,
452 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
453 } else {
454 const struct mif_device *vif = v;
455 const char *name = vif->dev ? vif->dev->name : "none";
456
457 seq_printf(seq,
458 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
459 vif - mrt->vif6_table,
460 name, vif->bytes_in, vif->pkt_in,
461 vif->bytes_out, vif->pkt_out,
462 vif->flags);
463 }
464 return 0;
465 }
466
467 static const struct seq_operations ip6mr_vif_seq_ops = {
468 .start = ip6mr_vif_seq_start,
469 .next = ip6mr_vif_seq_next,
470 .stop = ip6mr_vif_seq_stop,
471 .show = ip6mr_vif_seq_show,
472 };
473
474 static int ip6mr_vif_open(struct inode *inode, struct file *file)
475 {
476 return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
477 sizeof(struct ipmr_vif_iter));
478 }
479
480 static const struct file_operations ip6mr_vif_fops = {
481 .owner = THIS_MODULE,
482 .open = ip6mr_vif_open,
483 .read = seq_read,
484 .llseek = seq_lseek,
485 .release = seq_release_net,
486 };
487
488 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
489 {
490 struct ipmr_mfc_iter *it = seq->private;
491 struct net *net = seq_file_net(seq);
492 struct mr6_table *mrt;
493
494 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
495 if (!mrt)
496 return ERR_PTR(-ENOENT);
497
498 it->mrt = mrt;
499 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
500 : SEQ_START_TOKEN;
501 }
502
503 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
504 {
505 struct mfc6_cache *mfc = v;
506 struct ipmr_mfc_iter *it = seq->private;
507 struct net *net = seq_file_net(seq);
508 struct mr6_table *mrt = it->mrt;
509
510 ++*pos;
511
512 if (v == SEQ_START_TOKEN)
513 return ipmr_mfc_seq_idx(net, seq->private, 0);
514
515 if (mfc->list.next != it->cache)
516 return list_entry(mfc->list.next, struct mfc6_cache, list);
517
518 if (it->cache == &mrt->mfc6_unres_queue)
519 goto end_of_list;
520
521 BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
522
523 while (++it->ct < MFC6_LINES) {
524 it->cache = &mrt->mfc6_cache_array[it->ct];
525 if (list_empty(it->cache))
526 continue;
527 return list_first_entry(it->cache, struct mfc6_cache, list);
528 }
529
530 /* exhausted cache_array, show unresolved */
531 read_unlock(&mrt_lock);
532 it->cache = &mrt->mfc6_unres_queue;
533 it->ct = 0;
534
535 spin_lock_bh(&mfc_unres_lock);
536 if (!list_empty(it->cache))
537 return list_first_entry(it->cache, struct mfc6_cache, list);
538
539 end_of_list:
540 spin_unlock_bh(&mfc_unres_lock);
541 it->cache = NULL;
542
543 return NULL;
544 }
545
546 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
547 {
548 struct ipmr_mfc_iter *it = seq->private;
549 struct mr6_table *mrt = it->mrt;
550
551 if (it->cache == &mrt->mfc6_unres_queue)
552 spin_unlock_bh(&mfc_unres_lock);
553 else if (it->cache == mrt->mfc6_cache_array)
554 read_unlock(&mrt_lock);
555 }
556
557 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
558 {
559 int n;
560
561 if (v == SEQ_START_TOKEN) {
562 seq_puts(seq,
563 "Group "
564 "Origin "
565 "Iif Pkts Bytes Wrong Oifs\n");
566 } else {
567 const struct mfc6_cache *mfc = v;
568 const struct ipmr_mfc_iter *it = seq->private;
569 struct mr6_table *mrt = it->mrt;
570
571 seq_printf(seq, "%pI6 %pI6 %-3hd",
572 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
573 mfc->mf6c_parent);
574
575 if (it->cache != &mrt->mfc6_unres_queue) {
576 seq_printf(seq, " %8lu %8lu %8lu",
577 mfc->mfc_un.res.pkt,
578 mfc->mfc_un.res.bytes,
579 mfc->mfc_un.res.wrong_if);
580 for (n = mfc->mfc_un.res.minvif;
581 n < mfc->mfc_un.res.maxvif; n++) {
582 if (MIF_EXISTS(mrt, n) &&
583 mfc->mfc_un.res.ttls[n] < 255)
584 seq_printf(seq,
585 " %2d:%-3d",
586 n, mfc->mfc_un.res.ttls[n]);
587 }
588 } else {
589 /* unresolved mfc_caches don't contain
590 * pkt, bytes and wrong_if values
591 */
592 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
593 }
594 seq_putc(seq, '\n');
595 }
596 return 0;
597 }
598
599 static const struct seq_operations ipmr_mfc_seq_ops = {
600 .start = ipmr_mfc_seq_start,
601 .next = ipmr_mfc_seq_next,
602 .stop = ipmr_mfc_seq_stop,
603 .show = ipmr_mfc_seq_show,
604 };
605
606 static int ipmr_mfc_open(struct inode *inode, struct file *file)
607 {
608 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
609 sizeof(struct ipmr_mfc_iter));
610 }
611
612 static const struct file_operations ip6mr_mfc_fops = {
613 .owner = THIS_MODULE,
614 .open = ipmr_mfc_open,
615 .read = seq_read,
616 .llseek = seq_lseek,
617 .release = seq_release_net,
618 };
619 #endif
620
621 #ifdef CONFIG_IPV6_PIMSM_V2
622
623 static int pim6_rcv(struct sk_buff *skb)
624 {
625 struct pimreghdr *pim;
626 struct ipv6hdr *encap;
627 struct net_device *reg_dev = NULL;
628 struct net *net = dev_net(skb->dev);
629 struct mr6_table *mrt;
630 struct flowi6 fl6 = {
631 .flowi6_iif = skb->dev->ifindex,
632 .flowi6_mark = skb->mark,
633 };
634 int reg_vif_num;
635
636 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
637 goto drop;
638
639 pim = (struct pimreghdr *)skb_transport_header(skb);
640 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
641 (pim->flags & PIM_NULL_REGISTER) ||
642 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
643 sizeof(*pim), IPPROTO_PIM,
644 csum_partial((void *)pim, sizeof(*pim), 0)) &&
645 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
646 goto drop;
647
648 /* check if the inner packet is destined to mcast group */
649 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
650 sizeof(*pim));
651
652 if (!ipv6_addr_is_multicast(&encap->daddr) ||
653 encap->payload_len == 0 ||
654 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
655 goto drop;
656
657 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
658 goto drop;
659 reg_vif_num = mrt->mroute_reg_vif_num;
660
661 read_lock(&mrt_lock);
662 if (reg_vif_num >= 0)
663 reg_dev = mrt->vif6_table[reg_vif_num].dev;
664 if (reg_dev)
665 dev_hold(reg_dev);
666 read_unlock(&mrt_lock);
667
668 if (!reg_dev)
669 goto drop;
670
671 skb->mac_header = skb->network_header;
672 skb_pull(skb, (u8 *)encap - skb->data);
673 skb_reset_network_header(skb);
674 skb->protocol = htons(ETH_P_IPV6);
675 skb->ip_summed = CHECKSUM_NONE;
676
677 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
678
679 netif_rx(skb);
680
681 dev_put(reg_dev);
682 return 0;
683 drop:
684 kfree_skb(skb);
685 return 0;
686 }
687
688 static const struct inet6_protocol pim6_protocol = {
689 .handler = pim6_rcv,
690 };
691
692 /* Service routines creating virtual interfaces: PIMREG */
693
694 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
695 struct net_device *dev)
696 {
697 struct net *net = dev_net(dev);
698 struct mr6_table *mrt;
699 struct flowi6 fl6 = {
700 .flowi6_oif = dev->ifindex,
701 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
702 .flowi6_mark = skb->mark,
703 };
704 int err;
705
706 err = ip6mr_fib_lookup(net, &fl6, &mrt);
707 if (err < 0) {
708 kfree_skb(skb);
709 return err;
710 }
711
712 read_lock(&mrt_lock);
713 dev->stats.tx_bytes += skb->len;
714 dev->stats.tx_packets++;
715 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
716 read_unlock(&mrt_lock);
717 kfree_skb(skb);
718 return NETDEV_TX_OK;
719 }
720
721 static const struct net_device_ops reg_vif_netdev_ops = {
722 .ndo_start_xmit = reg_vif_xmit,
723 };
724
725 static void reg_vif_setup(struct net_device *dev)
726 {
727 dev->type = ARPHRD_PIMREG;
728 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
729 dev->flags = IFF_NOARP;
730 dev->netdev_ops = &reg_vif_netdev_ops;
731 dev->destructor = free_netdev;
732 dev->features |= NETIF_F_NETNS_LOCAL;
733 }
734
735 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
736 {
737 struct net_device *dev;
738 char name[IFNAMSIZ];
739
740 if (mrt->id == RT6_TABLE_DFLT)
741 sprintf(name, "pim6reg");
742 else
743 sprintf(name, "pim6reg%u", mrt->id);
744
745 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
746 if (!dev)
747 return NULL;
748
749 dev_net_set(dev, net);
750
751 if (register_netdevice(dev)) {
752 free_netdev(dev);
753 return NULL;
754 }
755 dev->iflink = 0;
756
757 if (dev_open(dev))
758 goto failure;
759
760 dev_hold(dev);
761 return dev;
762
763 failure:
764 /* allow the register to be completed before unregistering. */
765 rtnl_unlock();
766 rtnl_lock();
767
768 unregister_netdevice(dev);
769 return NULL;
770 }
771 #endif
772
773 /*
774 * Delete a VIF entry
775 */
776
777 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
778 {
779 struct mif_device *v;
780 struct net_device *dev;
781 struct inet6_dev *in6_dev;
782
783 if (vifi < 0 || vifi >= mrt->maxvif)
784 return -EADDRNOTAVAIL;
785
786 v = &mrt->vif6_table[vifi];
787
788 write_lock_bh(&mrt_lock);
789 dev = v->dev;
790 v->dev = NULL;
791
792 if (!dev) {
793 write_unlock_bh(&mrt_lock);
794 return -EADDRNOTAVAIL;
795 }
796
797 #ifdef CONFIG_IPV6_PIMSM_V2
798 if (vifi == mrt->mroute_reg_vif_num)
799 mrt->mroute_reg_vif_num = -1;
800 #endif
801
802 if (vifi + 1 == mrt->maxvif) {
803 int tmp;
804 for (tmp = vifi - 1; tmp >= 0; tmp--) {
805 if (MIF_EXISTS(mrt, tmp))
806 break;
807 }
808 mrt->maxvif = tmp + 1;
809 }
810
811 write_unlock_bh(&mrt_lock);
812
813 dev_set_allmulti(dev, -1);
814
815 in6_dev = __in6_dev_get(dev);
816 if (in6_dev) {
817 in6_dev->cnf.mc_forwarding--;
818 inet6_netconf_notify_devconf(dev_net(dev),
819 NETCONFA_MC_FORWARDING,
820 dev->ifindex, &in6_dev->cnf);
821 }
822
823 if (v->flags & MIFF_REGISTER)
824 unregister_netdevice_queue(dev, head);
825
826 dev_put(dev);
827 return 0;
828 }
829
830 static inline void ip6mr_cache_free(struct mfc6_cache *c)
831 {
832 kmem_cache_free(mrt_cachep, c);
833 }
834
835 /* Destroy an unresolved cache entry, killing queued skbs
836 and reporting error to netlink readers.
837 */
838
839 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
840 {
841 struct net *net = read_pnet(&mrt->net);
842 struct sk_buff *skb;
843
844 atomic_dec(&mrt->cache_resolve_queue_len);
845
846 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
847 if (ipv6_hdr(skb)->version == 0) {
848 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
849 nlh->nlmsg_type = NLMSG_ERROR;
850 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
851 skb_trim(skb, nlh->nlmsg_len);
852 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
853 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
854 } else
855 kfree_skb(skb);
856 }
857
858 ip6mr_cache_free(c);
859 }
860
861
862 /* Timer process for all the unresolved queue. */
863
864 static void ipmr_do_expire_process(struct mr6_table *mrt)
865 {
866 unsigned long now = jiffies;
867 unsigned long expires = 10 * HZ;
868 struct mfc6_cache *c, *next;
869
870 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
871 if (time_after(c->mfc_un.unres.expires, now)) {
872 /* not yet... */
873 unsigned long interval = c->mfc_un.unres.expires - now;
874 if (interval < expires)
875 expires = interval;
876 continue;
877 }
878
879 list_del(&c->list);
880 mr6_netlink_event(mrt, c, RTM_DELROUTE);
881 ip6mr_destroy_unres(mrt, c);
882 }
883
884 if (!list_empty(&mrt->mfc6_unres_queue))
885 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
886 }
887
888 static void ipmr_expire_process(unsigned long arg)
889 {
890 struct mr6_table *mrt = (struct mr6_table *)arg;
891
892 if (!spin_trylock(&mfc_unres_lock)) {
893 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
894 return;
895 }
896
897 if (!list_empty(&mrt->mfc6_unres_queue))
898 ipmr_do_expire_process(mrt);
899
900 spin_unlock(&mfc_unres_lock);
901 }
902
903 /* Fill oifs list. It is called under write locked mrt_lock. */
904
905 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
906 unsigned char *ttls)
907 {
908 int vifi;
909
910 cache->mfc_un.res.minvif = MAXMIFS;
911 cache->mfc_un.res.maxvif = 0;
912 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
913
914 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
915 if (MIF_EXISTS(mrt, vifi) &&
916 ttls[vifi] && ttls[vifi] < 255) {
917 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
918 if (cache->mfc_un.res.minvif > vifi)
919 cache->mfc_un.res.minvif = vifi;
920 if (cache->mfc_un.res.maxvif <= vifi)
921 cache->mfc_un.res.maxvif = vifi + 1;
922 }
923 }
924 }
925
926 static int mif6_add(struct net *net, struct mr6_table *mrt,
927 struct mif6ctl *vifc, int mrtsock)
928 {
929 int vifi = vifc->mif6c_mifi;
930 struct mif_device *v = &mrt->vif6_table[vifi];
931 struct net_device *dev;
932 struct inet6_dev *in6_dev;
933 int err;
934
935 /* Is vif busy ? */
936 if (MIF_EXISTS(mrt, vifi))
937 return -EADDRINUSE;
938
939 switch (vifc->mif6c_flags) {
940 #ifdef CONFIG_IPV6_PIMSM_V2
941 case MIFF_REGISTER:
942 /*
943 * Special Purpose VIF in PIM
944 * All the packets will be sent to the daemon
945 */
946 if (mrt->mroute_reg_vif_num >= 0)
947 return -EADDRINUSE;
948 dev = ip6mr_reg_vif(net, mrt);
949 if (!dev)
950 return -ENOBUFS;
951 err = dev_set_allmulti(dev, 1);
952 if (err) {
953 unregister_netdevice(dev);
954 dev_put(dev);
955 return err;
956 }
957 break;
958 #endif
959 case 0:
960 dev = dev_get_by_index(net, vifc->mif6c_pifi);
961 if (!dev)
962 return -EADDRNOTAVAIL;
963 err = dev_set_allmulti(dev, 1);
964 if (err) {
965 dev_put(dev);
966 return err;
967 }
968 break;
969 default:
970 return -EINVAL;
971 }
972
973 in6_dev = __in6_dev_get(dev);
974 if (in6_dev) {
975 in6_dev->cnf.mc_forwarding++;
976 inet6_netconf_notify_devconf(dev_net(dev),
977 NETCONFA_MC_FORWARDING,
978 dev->ifindex, &in6_dev->cnf);
979 }
980
981 /*
982 * Fill in the VIF structures
983 */
984 v->rate_limit = vifc->vifc_rate_limit;
985 v->flags = vifc->mif6c_flags;
986 if (!mrtsock)
987 v->flags |= VIFF_STATIC;
988 v->threshold = vifc->vifc_threshold;
989 v->bytes_in = 0;
990 v->bytes_out = 0;
991 v->pkt_in = 0;
992 v->pkt_out = 0;
993 v->link = dev->ifindex;
994 if (v->flags & MIFF_REGISTER)
995 v->link = dev->iflink;
996
997 /* And finish update writing critical data */
998 write_lock_bh(&mrt_lock);
999 v->dev = dev;
1000 #ifdef CONFIG_IPV6_PIMSM_V2
1001 if (v->flags & MIFF_REGISTER)
1002 mrt->mroute_reg_vif_num = vifi;
1003 #endif
1004 if (vifi + 1 > mrt->maxvif)
1005 mrt->maxvif = vifi + 1;
1006 write_unlock_bh(&mrt_lock);
1007 return 0;
1008 }
1009
1010 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1011 const struct in6_addr *origin,
1012 const struct in6_addr *mcastgrp)
1013 {
1014 int line = MFC6_HASH(mcastgrp, origin);
1015 struct mfc6_cache *c;
1016
1017 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1018 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1019 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1020 return c;
1021 }
1022 return NULL;
1023 }
1024
1025 /* Look for a (*,*,oif) entry */
1026 static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
1027 mifi_t mifi)
1028 {
1029 int line = MFC6_HASH(&in6addr_any, &in6addr_any);
1030 struct mfc6_cache *c;
1031
1032 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1033 if (ipv6_addr_any(&c->mf6c_origin) &&
1034 ipv6_addr_any(&c->mf6c_mcastgrp) &&
1035 (c->mfc_un.res.ttls[mifi] < 255))
1036 return c;
1037
1038 return NULL;
1039 }
1040
1041 /* Look for a (*,G) entry */
1042 static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
1043 struct in6_addr *mcastgrp,
1044 mifi_t mifi)
1045 {
1046 int line = MFC6_HASH(mcastgrp, &in6addr_any);
1047 struct mfc6_cache *c, *proxy;
1048
1049 if (ipv6_addr_any(mcastgrp))
1050 goto skip;
1051
1052 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1053 if (ipv6_addr_any(&c->mf6c_origin) &&
1054 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
1055 if (c->mfc_un.res.ttls[mifi] < 255)
1056 return c;
1057
1058 /* It's ok if the mifi is part of the static tree */
1059 proxy = ip6mr_cache_find_any_parent(mrt,
1060 c->mf6c_parent);
1061 if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
1062 return c;
1063 }
1064
1065 skip:
1066 return ip6mr_cache_find_any_parent(mrt, mifi);
1067 }
1068
1069 /*
1070 * Allocate a multicast cache entry
1071 */
1072 static struct mfc6_cache *ip6mr_cache_alloc(void)
1073 {
1074 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1075 if (!c)
1076 return NULL;
1077 c->mfc_un.res.minvif = MAXMIFS;
1078 return c;
1079 }
1080
1081 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1082 {
1083 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1084 if (!c)
1085 return NULL;
1086 skb_queue_head_init(&c->mfc_un.unres.unresolved);
1087 c->mfc_un.unres.expires = jiffies + 10 * HZ;
1088 return c;
1089 }
1090
1091 /*
1092 * A cache entry has gone into a resolved state from queued
1093 */
1094
1095 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1096 struct mfc6_cache *uc, struct mfc6_cache *c)
1097 {
1098 struct sk_buff *skb;
1099
1100 /*
1101 * Play the pending entries through our router
1102 */
1103
1104 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1105 if (ipv6_hdr(skb)->version == 0) {
1106 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1107
1108 if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1109 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1110 } else {
1111 nlh->nlmsg_type = NLMSG_ERROR;
1112 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1113 skb_trim(skb, nlh->nlmsg_len);
1114 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1115 }
1116 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1117 } else
1118 ip6_mr_forward(net, mrt, skb, c);
1119 }
1120 }
1121
1122 /*
1123 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1124 * expects the following bizarre scheme.
1125 *
1126 * Called under mrt_lock.
1127 */
1128
1129 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1130 mifi_t mifi, int assert)
1131 {
1132 struct sk_buff *skb;
1133 struct mrt6msg *msg;
1134 int ret;
1135
1136 #ifdef CONFIG_IPV6_PIMSM_V2
1137 if (assert == MRT6MSG_WHOLEPKT)
1138 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1139 +sizeof(*msg));
1140 else
1141 #endif
1142 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1143
1144 if (!skb)
1145 return -ENOBUFS;
1146
1147 /* I suppose that internal messages
1148 * do not require checksums */
1149
1150 skb->ip_summed = CHECKSUM_UNNECESSARY;
1151
1152 #ifdef CONFIG_IPV6_PIMSM_V2
1153 if (assert == MRT6MSG_WHOLEPKT) {
1154 /* Ugly, but we have no choice with this interface.
1155 Duplicate old header, fix length etc.
1156 And all this only to mangle msg->im6_msgtype and
1157 to set msg->im6_mbz to "mbz" :-)
1158 */
1159 skb_push(skb, -skb_network_offset(pkt));
1160
1161 skb_push(skb, sizeof(*msg));
1162 skb_reset_transport_header(skb);
1163 msg = (struct mrt6msg *)skb_transport_header(skb);
1164 msg->im6_mbz = 0;
1165 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1166 msg->im6_mif = mrt->mroute_reg_vif_num;
1167 msg->im6_pad = 0;
1168 msg->im6_src = ipv6_hdr(pkt)->saddr;
1169 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1170
1171 skb->ip_summed = CHECKSUM_UNNECESSARY;
1172 } else
1173 #endif
1174 {
1175 /*
1176 * Copy the IP header
1177 */
1178
1179 skb_put(skb, sizeof(struct ipv6hdr));
1180 skb_reset_network_header(skb);
1181 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1182
1183 /*
1184 * Add our header
1185 */
1186 skb_put(skb, sizeof(*msg));
1187 skb_reset_transport_header(skb);
1188 msg = (struct mrt6msg *)skb_transport_header(skb);
1189
1190 msg->im6_mbz = 0;
1191 msg->im6_msgtype = assert;
1192 msg->im6_mif = mifi;
1193 msg->im6_pad = 0;
1194 msg->im6_src = ipv6_hdr(pkt)->saddr;
1195 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1196
1197 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1198 skb->ip_summed = CHECKSUM_UNNECESSARY;
1199 }
1200
1201 if (!mrt->mroute6_sk) {
1202 kfree_skb(skb);
1203 return -EINVAL;
1204 }
1205
1206 /*
1207 * Deliver to user space multicast routing algorithms
1208 */
1209 ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1210 if (ret < 0) {
1211 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1212 kfree_skb(skb);
1213 }
1214
1215 return ret;
1216 }
1217
1218 /*
1219 * Queue a packet for resolution. It gets locked cache entry!
1220 */
1221
1222 static int
1223 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1224 {
1225 bool found = false;
1226 int err;
1227 struct mfc6_cache *c;
1228
1229 spin_lock_bh(&mfc_unres_lock);
1230 list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1231 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1232 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1233 found = true;
1234 break;
1235 }
1236 }
1237
1238 if (!found) {
1239 /*
1240 * Create a new entry if allowable
1241 */
1242
1243 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1244 (c = ip6mr_cache_alloc_unres()) == NULL) {
1245 spin_unlock_bh(&mfc_unres_lock);
1246
1247 kfree_skb(skb);
1248 return -ENOBUFS;
1249 }
1250
1251 /*
1252 * Fill in the new cache entry
1253 */
1254 c->mf6c_parent = -1;
1255 c->mf6c_origin = ipv6_hdr(skb)->saddr;
1256 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1257
1258 /*
1259 * Reflect first query at pim6sd
1260 */
1261 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1262 if (err < 0) {
1263 /* If the report failed throw the cache entry
1264 out - Brad Parker
1265 */
1266 spin_unlock_bh(&mfc_unres_lock);
1267
1268 ip6mr_cache_free(c);
1269 kfree_skb(skb);
1270 return err;
1271 }
1272
1273 atomic_inc(&mrt->cache_resolve_queue_len);
1274 list_add(&c->list, &mrt->mfc6_unres_queue);
1275 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1276
1277 ipmr_do_expire_process(mrt);
1278 }
1279
1280 /*
1281 * See if we can append the packet
1282 */
1283 if (c->mfc_un.unres.unresolved.qlen > 3) {
1284 kfree_skb(skb);
1285 err = -ENOBUFS;
1286 } else {
1287 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1288 err = 0;
1289 }
1290
1291 spin_unlock_bh(&mfc_unres_lock);
1292 return err;
1293 }
1294
1295 /*
1296 * MFC6 cache manipulation by user space
1297 */
1298
1299 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1300 int parent)
1301 {
1302 int line;
1303 struct mfc6_cache *c, *next;
1304
1305 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1306
1307 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1308 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1309 ipv6_addr_equal(&c->mf6c_mcastgrp,
1310 &mfc->mf6cc_mcastgrp.sin6_addr) &&
1311 (parent == -1 || parent == c->mf6c_parent)) {
1312 write_lock_bh(&mrt_lock);
1313 list_del(&c->list);
1314 write_unlock_bh(&mrt_lock);
1315
1316 mr6_netlink_event(mrt, c, RTM_DELROUTE);
1317 ip6mr_cache_free(c);
1318 return 0;
1319 }
1320 }
1321 return -ENOENT;
1322 }
1323
1324 static int ip6mr_device_event(struct notifier_block *this,
1325 unsigned long event, void *ptr)
1326 {
1327 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1328 struct net *net = dev_net(dev);
1329 struct mr6_table *mrt;
1330 struct mif_device *v;
1331 int ct;
1332 LIST_HEAD(list);
1333
1334 if (event != NETDEV_UNREGISTER)
1335 return NOTIFY_DONE;
1336
1337 ip6mr_for_each_table(mrt, net) {
1338 v = &mrt->vif6_table[0];
1339 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1340 if (v->dev == dev)
1341 mif6_delete(mrt, ct, &list);
1342 }
1343 }
1344 unregister_netdevice_many(&list);
1345
1346 return NOTIFY_DONE;
1347 }
1348
1349 static struct notifier_block ip6_mr_notifier = {
1350 .notifier_call = ip6mr_device_event
1351 };
1352
1353 /*
1354 * Setup for IP multicast routing
1355 */
1356
1357 static int __net_init ip6mr_net_init(struct net *net)
1358 {
1359 int err;
1360
1361 err = ip6mr_rules_init(net);
1362 if (err < 0)
1363 goto fail;
1364
1365 #ifdef CONFIG_PROC_FS
1366 err = -ENOMEM;
1367 if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1368 goto proc_vif_fail;
1369 if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1370 goto proc_cache_fail;
1371 #endif
1372
1373 return 0;
1374
1375 #ifdef CONFIG_PROC_FS
1376 proc_cache_fail:
1377 remove_proc_entry("ip6_mr_vif", net->proc_net);
1378 proc_vif_fail:
1379 ip6mr_rules_exit(net);
1380 #endif
1381 fail:
1382 return err;
1383 }
1384
1385 static void __net_exit ip6mr_net_exit(struct net *net)
1386 {
1387 #ifdef CONFIG_PROC_FS
1388 remove_proc_entry("ip6_mr_cache", net->proc_net);
1389 remove_proc_entry("ip6_mr_vif", net->proc_net);
1390 #endif
1391 ip6mr_rules_exit(net);
1392 }
1393
1394 static struct pernet_operations ip6mr_net_ops = {
1395 .init = ip6mr_net_init,
1396 .exit = ip6mr_net_exit,
1397 };
1398
1399 int __init ip6_mr_init(void)
1400 {
1401 int err;
1402
1403 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1404 sizeof(struct mfc6_cache),
1405 0, SLAB_HWCACHE_ALIGN,
1406 NULL);
1407 if (!mrt_cachep)
1408 return -ENOMEM;
1409
1410 err = register_pernet_subsys(&ip6mr_net_ops);
1411 if (err)
1412 goto reg_pernet_fail;
1413
1414 err = register_netdevice_notifier(&ip6_mr_notifier);
1415 if (err)
1416 goto reg_notif_fail;
1417 #ifdef CONFIG_IPV6_PIMSM_V2
1418 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1419 pr_err("%s: can't add PIM protocol\n", __func__);
1420 err = -EAGAIN;
1421 goto add_proto_fail;
1422 }
1423 #endif
1424 rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1425 ip6mr_rtm_dumproute, NULL);
1426 return 0;
1427 #ifdef CONFIG_IPV6_PIMSM_V2
1428 add_proto_fail:
1429 unregister_netdevice_notifier(&ip6_mr_notifier);
1430 #endif
1431 reg_notif_fail:
1432 unregister_pernet_subsys(&ip6mr_net_ops);
1433 reg_pernet_fail:
1434 kmem_cache_destroy(mrt_cachep);
1435 return err;
1436 }
1437
1438 void ip6_mr_cleanup(void)
1439 {
1440 rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1441 #ifdef CONFIG_IPV6_PIMSM_V2
1442 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1443 #endif
1444 unregister_netdevice_notifier(&ip6_mr_notifier);
1445 unregister_pernet_subsys(&ip6mr_net_ops);
1446 kmem_cache_destroy(mrt_cachep);
1447 }
1448
1449 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1450 struct mf6cctl *mfc, int mrtsock, int parent)
1451 {
1452 bool found = false;
1453 int line;
1454 struct mfc6_cache *uc, *c;
1455 unsigned char ttls[MAXMIFS];
1456 int i;
1457
1458 if (mfc->mf6cc_parent >= MAXMIFS)
1459 return -ENFILE;
1460
1461 memset(ttls, 255, MAXMIFS);
1462 for (i = 0; i < MAXMIFS; i++) {
1463 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1464 ttls[i] = 1;
1465
1466 }
1467
1468 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1469
1470 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1471 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1472 ipv6_addr_equal(&c->mf6c_mcastgrp,
1473 &mfc->mf6cc_mcastgrp.sin6_addr) &&
1474 (parent == -1 || parent == mfc->mf6cc_parent)) {
1475 found = true;
1476 break;
1477 }
1478 }
1479
1480 if (found) {
1481 write_lock_bh(&mrt_lock);
1482 c->mf6c_parent = mfc->mf6cc_parent;
1483 ip6mr_update_thresholds(mrt, c, ttls);
1484 if (!mrtsock)
1485 c->mfc_flags |= MFC_STATIC;
1486 write_unlock_bh(&mrt_lock);
1487 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1488 return 0;
1489 }
1490
1491 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1492 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1493 return -EINVAL;
1494
1495 c = ip6mr_cache_alloc();
1496 if (!c)
1497 return -ENOMEM;
1498
1499 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1500 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1501 c->mf6c_parent = mfc->mf6cc_parent;
1502 ip6mr_update_thresholds(mrt, c, ttls);
1503 if (!mrtsock)
1504 c->mfc_flags |= MFC_STATIC;
1505
1506 write_lock_bh(&mrt_lock);
1507 list_add(&c->list, &mrt->mfc6_cache_array[line]);
1508 write_unlock_bh(&mrt_lock);
1509
1510 /*
1511 * Check to see if we resolved a queued list. If so we
1512 * need to send on the frames and tidy up.
1513 */
1514 found = false;
1515 spin_lock_bh(&mfc_unres_lock);
1516 list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1517 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1518 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1519 list_del(&uc->list);
1520 atomic_dec(&mrt->cache_resolve_queue_len);
1521 found = true;
1522 break;
1523 }
1524 }
1525 if (list_empty(&mrt->mfc6_unres_queue))
1526 del_timer(&mrt->ipmr_expire_timer);
1527 spin_unlock_bh(&mfc_unres_lock);
1528
1529 if (found) {
1530 ip6mr_cache_resolve(net, mrt, uc, c);
1531 ip6mr_cache_free(uc);
1532 }
1533 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1534 return 0;
1535 }
1536
1537 /*
1538 * Close the multicast socket, and clear the vif tables etc
1539 */
1540
1541 static void mroute_clean_tables(struct mr6_table *mrt)
1542 {
1543 int i;
1544 LIST_HEAD(list);
1545 struct mfc6_cache *c, *next;
1546
1547 /*
1548 * Shut down all active vif entries
1549 */
1550 for (i = 0; i < mrt->maxvif; i++) {
1551 if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1552 mif6_delete(mrt, i, &list);
1553 }
1554 unregister_netdevice_many(&list);
1555
1556 /*
1557 * Wipe the cache
1558 */
1559 for (i = 0; i < MFC6_LINES; i++) {
1560 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1561 if (c->mfc_flags & MFC_STATIC)
1562 continue;
1563 write_lock_bh(&mrt_lock);
1564 list_del(&c->list);
1565 write_unlock_bh(&mrt_lock);
1566
1567 mr6_netlink_event(mrt, c, RTM_DELROUTE);
1568 ip6mr_cache_free(c);
1569 }
1570 }
1571
1572 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1573 spin_lock_bh(&mfc_unres_lock);
1574 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1575 list_del(&c->list);
1576 mr6_netlink_event(mrt, c, RTM_DELROUTE);
1577 ip6mr_destroy_unres(mrt, c);
1578 }
1579 spin_unlock_bh(&mfc_unres_lock);
1580 }
1581 }
1582
1583 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1584 {
1585 int err = 0;
1586 struct net *net = sock_net(sk);
1587
1588 rtnl_lock();
1589 write_lock_bh(&mrt_lock);
1590 if (likely(mrt->mroute6_sk == NULL)) {
1591 mrt->mroute6_sk = sk;
1592 net->ipv6.devconf_all->mc_forwarding++;
1593 inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1594 NETCONFA_IFINDEX_ALL,
1595 net->ipv6.devconf_all);
1596 }
1597 else
1598 err = -EADDRINUSE;
1599 write_unlock_bh(&mrt_lock);
1600
1601 rtnl_unlock();
1602
1603 return err;
1604 }
1605
1606 int ip6mr_sk_done(struct sock *sk)
1607 {
1608 int err = -EACCES;
1609 struct net *net = sock_net(sk);
1610 struct mr6_table *mrt;
1611
1612 rtnl_lock();
1613 ip6mr_for_each_table(mrt, net) {
1614 if (sk == mrt->mroute6_sk) {
1615 write_lock_bh(&mrt_lock);
1616 mrt->mroute6_sk = NULL;
1617 net->ipv6.devconf_all->mc_forwarding--;
1618 inet6_netconf_notify_devconf(net,
1619 NETCONFA_MC_FORWARDING,
1620 NETCONFA_IFINDEX_ALL,
1621 net->ipv6.devconf_all);
1622 write_unlock_bh(&mrt_lock);
1623
1624 mroute_clean_tables(mrt);
1625 err = 0;
1626 break;
1627 }
1628 }
1629 rtnl_unlock();
1630
1631 return err;
1632 }
1633
1634 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1635 {
1636 struct mr6_table *mrt;
1637 struct flowi6 fl6 = {
1638 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
1639 .flowi6_oif = skb->dev->ifindex,
1640 .flowi6_mark = skb->mark,
1641 };
1642
1643 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1644 return NULL;
1645
1646 return mrt->mroute6_sk;
1647 }
1648
1649 /*
1650 * Socket options and virtual interface manipulation. The whole
1651 * virtual interface system is a complete heap, but unfortunately
1652 * that's how BSD mrouted happens to think. Maybe one day with a proper
1653 * MOSPF/PIM router set up we can clean this up.
1654 */
1655
1656 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1657 {
1658 int ret, parent = 0;
1659 struct mif6ctl vif;
1660 struct mf6cctl mfc;
1661 mifi_t mifi;
1662 struct net *net = sock_net(sk);
1663 struct mr6_table *mrt;
1664
1665 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1666 if (!mrt)
1667 return -ENOENT;
1668
1669 if (optname != MRT6_INIT) {
1670 if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1671 return -EACCES;
1672 }
1673
1674 switch (optname) {
1675 case MRT6_INIT:
1676 if (sk->sk_type != SOCK_RAW ||
1677 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1678 return -EOPNOTSUPP;
1679 if (optlen < sizeof(int))
1680 return -EINVAL;
1681
1682 return ip6mr_sk_init(mrt, sk);
1683
1684 case MRT6_DONE:
1685 return ip6mr_sk_done(sk);
1686
1687 case MRT6_ADD_MIF:
1688 if (optlen < sizeof(vif))
1689 return -EINVAL;
1690 if (copy_from_user(&vif, optval, sizeof(vif)))
1691 return -EFAULT;
1692 if (vif.mif6c_mifi >= MAXMIFS)
1693 return -ENFILE;
1694 rtnl_lock();
1695 ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1696 rtnl_unlock();
1697 return ret;
1698
1699 case MRT6_DEL_MIF:
1700 if (optlen < sizeof(mifi_t))
1701 return -EINVAL;
1702 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1703 return -EFAULT;
1704 rtnl_lock();
1705 ret = mif6_delete(mrt, mifi, NULL);
1706 rtnl_unlock();
1707 return ret;
1708
1709 /*
1710 * Manipulate the forwarding caches. These live
1711 * in a sort of kernel/user symbiosis.
1712 */
1713 case MRT6_ADD_MFC:
1714 case MRT6_DEL_MFC:
1715 parent = -1;
1716 case MRT6_ADD_MFC_PROXY:
1717 case MRT6_DEL_MFC_PROXY:
1718 if (optlen < sizeof(mfc))
1719 return -EINVAL;
1720 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1721 return -EFAULT;
1722 if (parent == 0)
1723 parent = mfc.mf6cc_parent;
1724 rtnl_lock();
1725 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1726 ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1727 else
1728 ret = ip6mr_mfc_add(net, mrt, &mfc,
1729 sk == mrt->mroute6_sk, parent);
1730 rtnl_unlock();
1731 return ret;
1732
1733 /*
1734 * Control PIM assert (to activate pim will activate assert)
1735 */
1736 case MRT6_ASSERT:
1737 {
1738 int v;
1739
1740 if (optlen != sizeof(v))
1741 return -EINVAL;
1742 if (get_user(v, (int __user *)optval))
1743 return -EFAULT;
1744 mrt->mroute_do_assert = v;
1745 return 0;
1746 }
1747
1748 #ifdef CONFIG_IPV6_PIMSM_V2
1749 case MRT6_PIM:
1750 {
1751 int v;
1752
1753 if (optlen != sizeof(v))
1754 return -EINVAL;
1755 if (get_user(v, (int __user *)optval))
1756 return -EFAULT;
1757 v = !!v;
1758 rtnl_lock();
1759 ret = 0;
1760 if (v != mrt->mroute_do_pim) {
1761 mrt->mroute_do_pim = v;
1762 mrt->mroute_do_assert = v;
1763 }
1764 rtnl_unlock();
1765 return ret;
1766 }
1767
1768 #endif
1769 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1770 case MRT6_TABLE:
1771 {
1772 u32 v;
1773
1774 if (optlen != sizeof(u32))
1775 return -EINVAL;
1776 if (get_user(v, (u32 __user *)optval))
1777 return -EFAULT;
1778 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1779 if (v != RT_TABLE_DEFAULT && v >= 100000000)
1780 return -EINVAL;
1781 if (sk == mrt->mroute6_sk)
1782 return -EBUSY;
1783
1784 rtnl_lock();
1785 ret = 0;
1786 if (!ip6mr_new_table(net, v))
1787 ret = -ENOMEM;
1788 raw6_sk(sk)->ip6mr_table = v;
1789 rtnl_unlock();
1790 return ret;
1791 }
1792 #endif
1793 /*
1794 * Spurious command, or MRT6_VERSION which you cannot
1795 * set.
1796 */
1797 default:
1798 return -ENOPROTOOPT;
1799 }
1800 }
1801
1802 /*
1803 * Getsock opt support for the multicast routing system.
1804 */
1805
1806 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1807 int __user *optlen)
1808 {
1809 int olr;
1810 int val;
1811 struct net *net = sock_net(sk);
1812 struct mr6_table *mrt;
1813
1814 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1815 if (!mrt)
1816 return -ENOENT;
1817
1818 switch (optname) {
1819 case MRT6_VERSION:
1820 val = 0x0305;
1821 break;
1822 #ifdef CONFIG_IPV6_PIMSM_V2
1823 case MRT6_PIM:
1824 val = mrt->mroute_do_pim;
1825 break;
1826 #endif
1827 case MRT6_ASSERT:
1828 val = mrt->mroute_do_assert;
1829 break;
1830 default:
1831 return -ENOPROTOOPT;
1832 }
1833
1834 if (get_user(olr, optlen))
1835 return -EFAULT;
1836
1837 olr = min_t(int, olr, sizeof(int));
1838 if (olr < 0)
1839 return -EINVAL;
1840
1841 if (put_user(olr, optlen))
1842 return -EFAULT;
1843 if (copy_to_user(optval, &val, olr))
1844 return -EFAULT;
1845 return 0;
1846 }
1847
1848 /*
1849 * The IP multicast ioctl support routines.
1850 */
1851
1852 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1853 {
1854 struct sioc_sg_req6 sr;
1855 struct sioc_mif_req6 vr;
1856 struct mif_device *vif;
1857 struct mfc6_cache *c;
1858 struct net *net = sock_net(sk);
1859 struct mr6_table *mrt;
1860
1861 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1862 if (!mrt)
1863 return -ENOENT;
1864
1865 switch (cmd) {
1866 case SIOCGETMIFCNT_IN6:
1867 if (copy_from_user(&vr, arg, sizeof(vr)))
1868 return -EFAULT;
1869 if (vr.mifi >= mrt->maxvif)
1870 return -EINVAL;
1871 read_lock(&mrt_lock);
1872 vif = &mrt->vif6_table[vr.mifi];
1873 if (MIF_EXISTS(mrt, vr.mifi)) {
1874 vr.icount = vif->pkt_in;
1875 vr.ocount = vif->pkt_out;
1876 vr.ibytes = vif->bytes_in;
1877 vr.obytes = vif->bytes_out;
1878 read_unlock(&mrt_lock);
1879
1880 if (copy_to_user(arg, &vr, sizeof(vr)))
1881 return -EFAULT;
1882 return 0;
1883 }
1884 read_unlock(&mrt_lock);
1885 return -EADDRNOTAVAIL;
1886 case SIOCGETSGCNT_IN6:
1887 if (copy_from_user(&sr, arg, sizeof(sr)))
1888 return -EFAULT;
1889
1890 read_lock(&mrt_lock);
1891 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1892 if (c) {
1893 sr.pktcnt = c->mfc_un.res.pkt;
1894 sr.bytecnt = c->mfc_un.res.bytes;
1895 sr.wrong_if = c->mfc_un.res.wrong_if;
1896 read_unlock(&mrt_lock);
1897
1898 if (copy_to_user(arg, &sr, sizeof(sr)))
1899 return -EFAULT;
1900 return 0;
1901 }
1902 read_unlock(&mrt_lock);
1903 return -EADDRNOTAVAIL;
1904 default:
1905 return -ENOIOCTLCMD;
1906 }
1907 }
1908
1909 #ifdef CONFIG_COMPAT
1910 struct compat_sioc_sg_req6 {
1911 struct sockaddr_in6 src;
1912 struct sockaddr_in6 grp;
1913 compat_ulong_t pktcnt;
1914 compat_ulong_t bytecnt;
1915 compat_ulong_t wrong_if;
1916 };
1917
1918 struct compat_sioc_mif_req6 {
1919 mifi_t mifi;
1920 compat_ulong_t icount;
1921 compat_ulong_t ocount;
1922 compat_ulong_t ibytes;
1923 compat_ulong_t obytes;
1924 };
1925
1926 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1927 {
1928 struct compat_sioc_sg_req6 sr;
1929 struct compat_sioc_mif_req6 vr;
1930 struct mif_device *vif;
1931 struct mfc6_cache *c;
1932 struct net *net = sock_net(sk);
1933 struct mr6_table *mrt;
1934
1935 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1936 if (!mrt)
1937 return -ENOENT;
1938
1939 switch (cmd) {
1940 case SIOCGETMIFCNT_IN6:
1941 if (copy_from_user(&vr, arg, sizeof(vr)))
1942 return -EFAULT;
1943 if (vr.mifi >= mrt->maxvif)
1944 return -EINVAL;
1945 read_lock(&mrt_lock);
1946 vif = &mrt->vif6_table[vr.mifi];
1947 if (MIF_EXISTS(mrt, vr.mifi)) {
1948 vr.icount = vif->pkt_in;
1949 vr.ocount = vif->pkt_out;
1950 vr.ibytes = vif->bytes_in;
1951 vr.obytes = vif->bytes_out;
1952 read_unlock(&mrt_lock);
1953
1954 if (copy_to_user(arg, &vr, sizeof(vr)))
1955 return -EFAULT;
1956 return 0;
1957 }
1958 read_unlock(&mrt_lock);
1959 return -EADDRNOTAVAIL;
1960 case SIOCGETSGCNT_IN6:
1961 if (copy_from_user(&sr, arg, sizeof(sr)))
1962 return -EFAULT;
1963
1964 read_lock(&mrt_lock);
1965 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1966 if (c) {
1967 sr.pktcnt = c->mfc_un.res.pkt;
1968 sr.bytecnt = c->mfc_un.res.bytes;
1969 sr.wrong_if = c->mfc_un.res.wrong_if;
1970 read_unlock(&mrt_lock);
1971
1972 if (copy_to_user(arg, &sr, sizeof(sr)))
1973 return -EFAULT;
1974 return 0;
1975 }
1976 read_unlock(&mrt_lock);
1977 return -EADDRNOTAVAIL;
1978 default:
1979 return -ENOIOCTLCMD;
1980 }
1981 }
1982 #endif
1983
1984 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1985 {
1986 IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1987 IPSTATS_MIB_OUTFORWDATAGRAMS);
1988 IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1989 IPSTATS_MIB_OUTOCTETS, skb->len);
1990 return dst_output(skb);
1991 }
1992
1993 /*
1994 * Processing handlers for ip6mr_forward
1995 */
1996
1997 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1998 struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1999 {
2000 struct ipv6hdr *ipv6h;
2001 struct mif_device *vif = &mrt->vif6_table[vifi];
2002 struct net_device *dev;
2003 struct dst_entry *dst;
2004 struct flowi6 fl6;
2005
2006 if (!vif->dev)
2007 goto out_free;
2008
2009 #ifdef CONFIG_IPV6_PIMSM_V2
2010 if (vif->flags & MIFF_REGISTER) {
2011 vif->pkt_out++;
2012 vif->bytes_out += skb->len;
2013 vif->dev->stats.tx_bytes += skb->len;
2014 vif->dev->stats.tx_packets++;
2015 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2016 goto out_free;
2017 }
2018 #endif
2019
2020 ipv6h = ipv6_hdr(skb);
2021
2022 fl6 = (struct flowi6) {
2023 .flowi6_oif = vif->link,
2024 .daddr = ipv6h->daddr,
2025 };
2026
2027 dst = ip6_route_output(net, NULL, &fl6);
2028 if (dst->error) {
2029 dst_release(dst);
2030 goto out_free;
2031 }
2032
2033 skb_dst_drop(skb);
2034 skb_dst_set(skb, dst);
2035
2036 /*
2037 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2038 * not only before forwarding, but after forwarding on all output
2039 * interfaces. It is clear, if mrouter runs a multicasting
2040 * program, it should receive packets not depending to what interface
2041 * program is joined.
2042 * If we will not make it, the program will have to join on all
2043 * interfaces. On the other hand, multihoming host (or router, but
2044 * not mrouter) cannot join to more than one interface - it will
2045 * result in receiving multiple packets.
2046 */
2047 dev = vif->dev;
2048 skb->dev = dev;
2049 vif->pkt_out++;
2050 vif->bytes_out += skb->len;
2051
2052 /* We are about to write */
2053 /* XXX: extension headers? */
2054 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2055 goto out_free;
2056
2057 ipv6h = ipv6_hdr(skb);
2058 ipv6h->hop_limit--;
2059
2060 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2061
2062 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
2063 ip6mr_forward2_finish);
2064
2065 out_free:
2066 kfree_skb(skb);
2067 return 0;
2068 }
2069
2070 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2071 {
2072 int ct;
2073
2074 for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2075 if (mrt->vif6_table[ct].dev == dev)
2076 break;
2077 }
2078 return ct;
2079 }
2080
2081 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2082 struct sk_buff *skb, struct mfc6_cache *cache)
2083 {
2084 int psend = -1;
2085 int vif, ct;
2086 int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2087
2088 vif = cache->mf6c_parent;
2089 cache->mfc_un.res.pkt++;
2090 cache->mfc_un.res.bytes += skb->len;
2091
2092 if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
2093 struct mfc6_cache *cache_proxy;
2094
2095 /* For an (*,G) entry, we only check that the incoming
2096 * interface is part of the static tree.
2097 */
2098 cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
2099 if (cache_proxy &&
2100 cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2101 goto forward;
2102 }
2103
2104 /*
2105 * Wrong interface: drop packet and (maybe) send PIM assert.
2106 */
2107 if (mrt->vif6_table[vif].dev != skb->dev) {
2108 cache->mfc_un.res.wrong_if++;
2109
2110 if (true_vifi >= 0 && mrt->mroute_do_assert &&
2111 /* pimsm uses asserts, when switching from RPT to SPT,
2112 so that we cannot check that packet arrived on an oif.
2113 It is bad, but otherwise we would need to move pretty
2114 large chunk of pimd to kernel. Ough... --ANK
2115 */
2116 (mrt->mroute_do_pim ||
2117 cache->mfc_un.res.ttls[true_vifi] < 255) &&
2118 time_after(jiffies,
2119 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2120 cache->mfc_un.res.last_assert = jiffies;
2121 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2122 }
2123 goto dont_forward;
2124 }
2125
2126 forward:
2127 mrt->vif6_table[vif].pkt_in++;
2128 mrt->vif6_table[vif].bytes_in += skb->len;
2129
2130 /*
2131 * Forward the frame
2132 */
2133 if (ipv6_addr_any(&cache->mf6c_origin) &&
2134 ipv6_addr_any(&cache->mf6c_mcastgrp)) {
2135 if (true_vifi >= 0 &&
2136 true_vifi != cache->mf6c_parent &&
2137 ipv6_hdr(skb)->hop_limit >
2138 cache->mfc_un.res.ttls[cache->mf6c_parent]) {
2139 /* It's an (*,*) entry and the packet is not coming from
2140 * the upstream: forward the packet to the upstream
2141 * only.
2142 */
2143 psend = cache->mf6c_parent;
2144 goto last_forward;
2145 }
2146 goto dont_forward;
2147 }
2148 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2149 /* For (*,G) entry, don't forward to the incoming interface */
2150 if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
2151 ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2152 if (psend != -1) {
2153 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2154 if (skb2)
2155 ip6mr_forward2(net, mrt, skb2, cache, psend);
2156 }
2157 psend = ct;
2158 }
2159 }
2160 last_forward:
2161 if (psend != -1) {
2162 ip6mr_forward2(net, mrt, skb, cache, psend);
2163 return;
2164 }
2165
2166 dont_forward:
2167 kfree_skb(skb);
2168 }
2169
2170
2171 /*
2172 * Multicast packets for forwarding arrive here
2173 */
2174
2175 int ip6_mr_input(struct sk_buff *skb)
2176 {
2177 struct mfc6_cache *cache;
2178 struct net *net = dev_net(skb->dev);
2179 struct mr6_table *mrt;
2180 struct flowi6 fl6 = {
2181 .flowi6_iif = skb->dev->ifindex,
2182 .flowi6_mark = skb->mark,
2183 };
2184 int err;
2185
2186 err = ip6mr_fib_lookup(net, &fl6, &mrt);
2187 if (err < 0) {
2188 kfree_skb(skb);
2189 return err;
2190 }
2191
2192 read_lock(&mrt_lock);
2193 cache = ip6mr_cache_find(mrt,
2194 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2195 if (!cache) {
2196 int vif = ip6mr_find_vif(mrt, skb->dev);
2197
2198 if (vif >= 0)
2199 cache = ip6mr_cache_find_any(mrt,
2200 &ipv6_hdr(skb)->daddr,
2201 vif);
2202 }
2203
2204 /*
2205 * No usable cache entry
2206 */
2207 if (!cache) {
2208 int vif;
2209
2210 vif = ip6mr_find_vif(mrt, skb->dev);
2211 if (vif >= 0) {
2212 int err = ip6mr_cache_unresolved(mrt, vif, skb);
2213 read_unlock(&mrt_lock);
2214
2215 return err;
2216 }
2217 read_unlock(&mrt_lock);
2218 kfree_skb(skb);
2219 return -ENODEV;
2220 }
2221
2222 ip6_mr_forward(net, mrt, skb, cache);
2223
2224 read_unlock(&mrt_lock);
2225
2226 return 0;
2227 }
2228
2229
2230 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2231 struct mfc6_cache *c, struct rtmsg *rtm)
2232 {
2233 int ct;
2234 struct rtnexthop *nhp;
2235 struct nlattr *mp_attr;
2236 struct rta_mfc_stats mfcs;
2237
2238 /* If cache is unresolved, don't try to parse IIF and OIF */
2239 if (c->mf6c_parent >= MAXMIFS)
2240 return -ENOENT;
2241
2242 if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2243 nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2244 return -EMSGSIZE;
2245 mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2246 if (!mp_attr)
2247 return -EMSGSIZE;
2248
2249 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2250 if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2251 nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2252 if (!nhp) {
2253 nla_nest_cancel(skb, mp_attr);
2254 return -EMSGSIZE;
2255 }
2256
2257 nhp->rtnh_flags = 0;
2258 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2259 nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2260 nhp->rtnh_len = sizeof(*nhp);
2261 }
2262 }
2263
2264 nla_nest_end(skb, mp_attr);
2265
2266 mfcs.mfcs_packets = c->mfc_un.res.pkt;
2267 mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2268 mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2269 if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
2270 return -EMSGSIZE;
2271
2272 rtm->rtm_type = RTN_MULTICAST;
2273 return 1;
2274 }
2275
2276 int ip6mr_get_route(struct net *net,
2277 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2278 {
2279 int err;
2280 struct mr6_table *mrt;
2281 struct mfc6_cache *cache;
2282 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2283
2284 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2285 if (!mrt)
2286 return -ENOENT;
2287
2288 read_lock(&mrt_lock);
2289 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2290 if (!cache && skb->dev) {
2291 int vif = ip6mr_find_vif(mrt, skb->dev);
2292
2293 if (vif >= 0)
2294 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2295 vif);
2296 }
2297
2298 if (!cache) {
2299 struct sk_buff *skb2;
2300 struct ipv6hdr *iph;
2301 struct net_device *dev;
2302 int vif;
2303
2304 if (nowait) {
2305 read_unlock(&mrt_lock);
2306 return -EAGAIN;
2307 }
2308
2309 dev = skb->dev;
2310 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2311 read_unlock(&mrt_lock);
2312 return -ENODEV;
2313 }
2314
2315 /* really correct? */
2316 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2317 if (!skb2) {
2318 read_unlock(&mrt_lock);
2319 return -ENOMEM;
2320 }
2321
2322 skb_reset_transport_header(skb2);
2323
2324 skb_put(skb2, sizeof(struct ipv6hdr));
2325 skb_reset_network_header(skb2);
2326
2327 iph = ipv6_hdr(skb2);
2328 iph->version = 0;
2329 iph->priority = 0;
2330 iph->flow_lbl[0] = 0;
2331 iph->flow_lbl[1] = 0;
2332 iph->flow_lbl[2] = 0;
2333 iph->payload_len = 0;
2334 iph->nexthdr = IPPROTO_NONE;
2335 iph->hop_limit = 0;
2336 iph->saddr = rt->rt6i_src.addr;
2337 iph->daddr = rt->rt6i_dst.addr;
2338
2339 err = ip6mr_cache_unresolved(mrt, vif, skb2);
2340 read_unlock(&mrt_lock);
2341
2342 return err;
2343 }
2344
2345 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2346 cache->mfc_flags |= MFC_NOTIFY;
2347
2348 err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2349 read_unlock(&mrt_lock);
2350 return err;
2351 }
2352
2353 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2354 u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2355 int flags)
2356 {
2357 struct nlmsghdr *nlh;
2358 struct rtmsg *rtm;
2359 int err;
2360
2361 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2362 if (!nlh)
2363 return -EMSGSIZE;
2364
2365 rtm = nlmsg_data(nlh);
2366 rtm->rtm_family = RTNL_FAMILY_IP6MR;
2367 rtm->rtm_dst_len = 128;
2368 rtm->rtm_src_len = 128;
2369 rtm->rtm_tos = 0;
2370 rtm->rtm_table = mrt->id;
2371 if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2372 goto nla_put_failure;
2373 rtm->rtm_type = RTN_MULTICAST;
2374 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2375 if (c->mfc_flags & MFC_STATIC)
2376 rtm->rtm_protocol = RTPROT_STATIC;
2377 else
2378 rtm->rtm_protocol = RTPROT_MROUTED;
2379 rtm->rtm_flags = 0;
2380
2381 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2382 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2383 goto nla_put_failure;
2384 err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2385 /* do not break the dump if cache is unresolved */
2386 if (err < 0 && err != -ENOENT)
2387 goto nla_put_failure;
2388
2389 nlmsg_end(skb, nlh);
2390 return 0;
2391
2392 nla_put_failure:
2393 nlmsg_cancel(skb, nlh);
2394 return -EMSGSIZE;
2395 }
2396
2397 static int mr6_msgsize(bool unresolved, int maxvif)
2398 {
2399 size_t len =
2400 NLMSG_ALIGN(sizeof(struct rtmsg))
2401 + nla_total_size(4) /* RTA_TABLE */
2402 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */
2403 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */
2404 ;
2405
2406 if (!unresolved)
2407 len = len
2408 + nla_total_size(4) /* RTA_IIF */
2409 + nla_total_size(0) /* RTA_MULTIPATH */
2410 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2411 /* RTA_MFC_STATS */
2412 + nla_total_size(sizeof(struct rta_mfc_stats))
2413 ;
2414
2415 return len;
2416 }
2417
2418 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2419 int cmd)
2420 {
2421 struct net *net = read_pnet(&mrt->net);
2422 struct sk_buff *skb;
2423 int err = -ENOBUFS;
2424
2425 skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2426 GFP_ATOMIC);
2427 if (!skb)
2428 goto errout;
2429
2430 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2431 if (err < 0)
2432 goto errout;
2433
2434 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2435 return;
2436
2437 errout:
2438 kfree_skb(skb);
2439 if (err < 0)
2440 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2441 }
2442
2443 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2444 {
2445 struct net *net = sock_net(skb->sk);
2446 struct mr6_table *mrt;
2447 struct mfc6_cache *mfc;
2448 unsigned int t = 0, s_t;
2449 unsigned int h = 0, s_h;
2450 unsigned int e = 0, s_e;
2451
2452 s_t = cb->args[0];
2453 s_h = cb->args[1];
2454 s_e = cb->args[2];
2455
2456 read_lock(&mrt_lock);
2457 ip6mr_for_each_table(mrt, net) {
2458 if (t < s_t)
2459 goto next_table;
2460 if (t > s_t)
2461 s_h = 0;
2462 for (h = s_h; h < MFC6_LINES; h++) {
2463 list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2464 if (e < s_e)
2465 goto next_entry;
2466 if (ip6mr_fill_mroute(mrt, skb,
2467 NETLINK_CB(cb->skb).portid,
2468 cb->nlh->nlmsg_seq,
2469 mfc, RTM_NEWROUTE,
2470 NLM_F_MULTI) < 0)
2471 goto done;
2472 next_entry:
2473 e++;
2474 }
2475 e = s_e = 0;
2476 }
2477 spin_lock_bh(&mfc_unres_lock);
2478 list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2479 if (e < s_e)
2480 goto next_entry2;
2481 if (ip6mr_fill_mroute(mrt, skb,
2482 NETLINK_CB(cb->skb).portid,
2483 cb->nlh->nlmsg_seq,
2484 mfc, RTM_NEWROUTE,
2485 NLM_F_MULTI) < 0) {
2486 spin_unlock_bh(&mfc_unres_lock);
2487 goto done;
2488 }
2489 next_entry2:
2490 e++;
2491 }
2492 spin_unlock_bh(&mfc_unres_lock);
2493 e = s_e = 0;
2494 s_h = 0;
2495 next_table:
2496 t++;
2497 }
2498 done:
2499 read_unlock(&mrt_lock);
2500
2501 cb->args[2] = e;
2502 cb->args[1] = h;
2503 cb->args[0] = t;
2504
2505 return skb->len;
2506 }