]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - net/ipv4/ip_gre.c
cpufreq: CPPC: Don't set transition_latency
[mirror_ubuntu-bionic-kernel.git] / net / ipv4 / ip_gre.c
1 /*
2 * Linux NET3: GRE over IP protocol decoder.
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <linux/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/if_vlan.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
35
36 #include <net/sock.h>
37 #include <net/ip.h>
38 #include <net/icmp.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
41 #include <net/arp.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
45 #include <net/xfrm.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
49 #include <net/gre.h>
50 #include <net/dst_metadata.h>
51 #include <net/erspan.h>
52
53 /*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is a good
66 solution, but it supposes maintaining new variable in ALL
67 skb, even if no tunneling is used.
68
69 Current solution: xmit_recursion breaks dead loops. This is a percpu
70 counter, since when we enter the first ndo_xmit(), cpu migration is
71 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
72
73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems:
77
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end.
86
87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution.
89
90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially,
92 taking into account fragmentation. TO be short, ttl is not solution at all.
93
94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu
99 rapidly degrades to value <68, where looping stops.
100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-)
106
107 Alexey Kuznetsov.
108 */
109
110 static bool log_ecn_error = true;
111 module_param(log_ecn_error, bool, 0644);
112 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
113
114 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
115 static int ipgre_tunnel_init(struct net_device *dev);
116 static void erspan_build_header(struct sk_buff *skb,
117 __be32 id, u32 index, bool truncate);
118
119 static unsigned int ipgre_net_id __read_mostly;
120 static unsigned int gre_tap_net_id __read_mostly;
121 static unsigned int erspan_net_id __read_mostly;
122
123 static void ipgre_err(struct sk_buff *skb, u32 info,
124 const struct tnl_ptk_info *tpi)
125 {
126
127 /* All the routers (except for Linux) return only
128 8 bytes of packet payload. It means, that precise relaying of
129 ICMP in the real Internet is absolutely infeasible.
130
131 Moreover, Cisco "wise men" put GRE key to the third word
132 in GRE header. It makes impossible maintaining even soft
133 state for keyed GRE tunnels with enabled checksum. Tell
134 them "thank you".
135
136 Well, I wonder, rfc1812 was written by Cisco employee,
137 what the hell these idiots break standards established
138 by themselves???
139 */
140 struct net *net = dev_net(skb->dev);
141 struct ip_tunnel_net *itn;
142 const struct iphdr *iph;
143 const int type = icmp_hdr(skb)->type;
144 const int code = icmp_hdr(skb)->code;
145 unsigned int data_len = 0;
146 struct ip_tunnel *t;
147
148 switch (type) {
149 default:
150 case ICMP_PARAMETERPROB:
151 return;
152
153 case ICMP_DEST_UNREACH:
154 switch (code) {
155 case ICMP_SR_FAILED:
156 case ICMP_PORT_UNREACH:
157 /* Impossible event. */
158 return;
159 default:
160 /* All others are translated to HOST_UNREACH.
161 rfc2003 contains "deep thoughts" about NET_UNREACH,
162 I believe they are just ether pollution. --ANK
163 */
164 break;
165 }
166 break;
167
168 case ICMP_TIME_EXCEEDED:
169 if (code != ICMP_EXC_TTL)
170 return;
171 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
172 break;
173
174 case ICMP_REDIRECT:
175 break;
176 }
177
178 if (tpi->proto == htons(ETH_P_TEB))
179 itn = net_generic(net, gre_tap_net_id);
180 else
181 itn = net_generic(net, ipgre_net_id);
182
183 iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
184 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
185 iph->daddr, iph->saddr, tpi->key);
186
187 if (!t)
188 return;
189
190 #if IS_ENABLED(CONFIG_IPV6)
191 if (tpi->proto == htons(ETH_P_IPV6) &&
192 !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
193 type, data_len))
194 return;
195 #endif
196
197 if (t->parms.iph.daddr == 0 ||
198 ipv4_is_multicast(t->parms.iph.daddr))
199 return;
200
201 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
202 return;
203
204 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
205 t->err_count++;
206 else
207 t->err_count = 1;
208 t->err_time = jiffies;
209 }
210
211 static void gre_err(struct sk_buff *skb, u32 info)
212 {
213 /* All the routers (except for Linux) return only
214 * 8 bytes of packet payload. It means, that precise relaying of
215 * ICMP in the real Internet is absolutely infeasible.
216 *
217 * Moreover, Cisco "wise men" put GRE key to the third word
218 * in GRE header. It makes impossible maintaining even soft
219 * state for keyed
220 * GRE tunnels with enabled checksum. Tell them "thank you".
221 *
222 * Well, I wonder, rfc1812 was written by Cisco employee,
223 * what the hell these idiots break standards established
224 * by themselves???
225 */
226
227 const struct iphdr *iph = (struct iphdr *)skb->data;
228 const int type = icmp_hdr(skb)->type;
229 const int code = icmp_hdr(skb)->code;
230 struct tnl_ptk_info tpi;
231 bool csum_err = false;
232
233 if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP),
234 iph->ihl * 4) < 0) {
235 if (!csum_err) /* ignore csum errors. */
236 return;
237 }
238
239 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
240 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
241 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
242 return;
243 }
244 if (type == ICMP_REDIRECT) {
245 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
246 IPPROTO_GRE, 0);
247 return;
248 }
249
250 ipgre_err(skb, info, &tpi);
251 }
252
253 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
254 int gre_hdr_len)
255 {
256 struct net *net = dev_net(skb->dev);
257 struct metadata_dst *tun_dst = NULL;
258 struct ip_tunnel_net *itn;
259 struct ip_tunnel *tunnel;
260 struct erspanhdr *ershdr;
261 const struct iphdr *iph;
262 __be32 index;
263 int len;
264
265 itn = net_generic(net, erspan_net_id);
266 len = gre_hdr_len + sizeof(*ershdr);
267
268 if (unlikely(!pskb_may_pull(skb, len)))
269 return PACKET_REJECT;
270
271 iph = ip_hdr(skb);
272 ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
273
274 /* The original GRE header does not have key field,
275 * Use ERSPAN 10-bit session ID as key.
276 */
277 tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
278 index = ershdr->md.index;
279 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
280 tpi->flags | TUNNEL_KEY,
281 iph->saddr, iph->daddr, tpi->key);
282
283 if (tunnel) {
284 if (__iptunnel_pull_header(skb,
285 gre_hdr_len + sizeof(*ershdr),
286 htons(ETH_P_TEB),
287 false, false) < 0)
288 goto drop;
289
290 if (tunnel->collect_md) {
291 struct ip_tunnel_info *info;
292 struct erspan_metadata *md;
293 __be64 tun_id;
294 __be16 flags;
295
296 tpi->flags |= TUNNEL_KEY;
297 flags = tpi->flags;
298 tun_id = key32_to_tunnel_id(tpi->key);
299
300 tun_dst = ip_tun_rx_dst(skb, flags,
301 tun_id, sizeof(*md));
302 if (!tun_dst)
303 return PACKET_REJECT;
304
305 md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
306 if (!md) {
307 dst_release((struct dst_entry *)tun_dst);
308 return PACKET_REJECT;
309 }
310
311 md->index = index;
312 info = &tun_dst->u.tun_info;
313 info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
314 info->options_len = sizeof(*md);
315 } else {
316 tunnel->index = ntohl(index);
317 }
318
319 skb_reset_mac_header(skb);
320 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
321 return PACKET_RCVD;
322 }
323 drop:
324 kfree_skb(skb);
325 return PACKET_RCVD;
326 }
327
328 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
329 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
330 {
331 struct metadata_dst *tun_dst = NULL;
332 const struct iphdr *iph;
333 struct ip_tunnel *tunnel;
334
335 iph = ip_hdr(skb);
336 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
337 iph->saddr, iph->daddr, tpi->key);
338
339 if (tunnel) {
340 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
341 raw_proto, false) < 0)
342 goto drop;
343
344 if (tunnel->dev->type != ARPHRD_NONE)
345 skb_pop_mac_header(skb);
346 else
347 skb_reset_mac_header(skb);
348 if (tunnel->collect_md) {
349 __be16 flags;
350 __be64 tun_id;
351
352 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
353 tun_id = key32_to_tunnel_id(tpi->key);
354 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
355 if (!tun_dst)
356 return PACKET_REJECT;
357 }
358
359 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
360 return PACKET_RCVD;
361 }
362 return PACKET_NEXT;
363
364 drop:
365 kfree_skb(skb);
366 return PACKET_RCVD;
367 }
368
369 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
370 int hdr_len)
371 {
372 struct net *net = dev_net(skb->dev);
373 struct ip_tunnel_net *itn;
374 int res;
375
376 if (tpi->proto == htons(ETH_P_TEB))
377 itn = net_generic(net, gre_tap_net_id);
378 else
379 itn = net_generic(net, ipgre_net_id);
380
381 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
382 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
383 /* ipgre tunnels in collect metadata mode should receive
384 * also ETH_P_TEB traffic.
385 */
386 itn = net_generic(net, ipgre_net_id);
387 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
388 }
389 return res;
390 }
391
392 static int gre_rcv(struct sk_buff *skb)
393 {
394 struct tnl_ptk_info tpi;
395 bool csum_err = false;
396 int hdr_len;
397
398 #ifdef CONFIG_NET_IPGRE_BROADCAST
399 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
400 /* Looped back packet, drop it! */
401 if (rt_is_output_route(skb_rtable(skb)))
402 goto drop;
403 }
404 #endif
405
406 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
407 if (hdr_len < 0)
408 goto drop;
409
410 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) {
411 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
412 return 0;
413 goto out;
414 }
415
416 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
417 return 0;
418
419 out:
420 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
421 drop:
422 kfree_skb(skb);
423 return 0;
424 }
425
426 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
427 const struct iphdr *tnl_params,
428 __be16 proto)
429 {
430 struct ip_tunnel *tunnel = netdev_priv(dev);
431
432 if (tunnel->parms.o_flags & TUNNEL_SEQ)
433 tunnel->o_seqno++;
434
435 /* Push GRE header. */
436 gre_build_header(skb, tunnel->tun_hlen,
437 tunnel->parms.o_flags, proto, tunnel->parms.o_key,
438 htonl(tunnel->o_seqno));
439
440 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
441 }
442
443 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
444 {
445 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
446 }
447
448 static struct rtable *gre_get_rt(struct sk_buff *skb,
449 struct net_device *dev,
450 struct flowi4 *fl,
451 const struct ip_tunnel_key *key)
452 {
453 struct net *net = dev_net(dev);
454
455 memset(fl, 0, sizeof(*fl));
456 fl->daddr = key->u.ipv4.dst;
457 fl->saddr = key->u.ipv4.src;
458 fl->flowi4_tos = RT_TOS(key->tos);
459 fl->flowi4_mark = skb->mark;
460 fl->flowi4_proto = IPPROTO_GRE;
461
462 return ip_route_output_key(net, fl);
463 }
464
465 static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
466 struct net_device *dev,
467 struct flowi4 *fl,
468 int tunnel_hlen)
469 {
470 struct ip_tunnel_info *tun_info;
471 const struct ip_tunnel_key *key;
472 struct rtable *rt = NULL;
473 int min_headroom;
474 bool use_cache;
475 int err;
476
477 tun_info = skb_tunnel_info(skb);
478 key = &tun_info->key;
479 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
480
481 if (use_cache)
482 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
483 if (!rt) {
484 rt = gre_get_rt(skb, dev, fl, key);
485 if (IS_ERR(rt))
486 goto err_free_skb;
487 if (use_cache)
488 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
489 fl->saddr);
490 }
491
492 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
493 + tunnel_hlen + sizeof(struct iphdr);
494 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
495 int head_delta = SKB_DATA_ALIGN(min_headroom -
496 skb_headroom(skb) +
497 16);
498 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
499 0, GFP_ATOMIC);
500 if (unlikely(err))
501 goto err_free_rt;
502 }
503 return rt;
504
505 err_free_rt:
506 ip_rt_put(rt);
507 err_free_skb:
508 kfree_skb(skb);
509 dev->stats.tx_dropped++;
510 return NULL;
511 }
512
513 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
514 __be16 proto)
515 {
516 struct ip_tunnel_info *tun_info;
517 const struct ip_tunnel_key *key;
518 struct rtable *rt = NULL;
519 struct flowi4 fl;
520 int tunnel_hlen;
521 __be16 df, flags;
522
523 tun_info = skb_tunnel_info(skb);
524 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
525 ip_tunnel_info_af(tun_info) != AF_INET))
526 goto err_free_skb;
527
528 key = &tun_info->key;
529 tunnel_hlen = gre_calc_hlen(key->tun_flags);
530
531 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
532 if (!rt)
533 return;
534
535 /* Push Tunnel header. */
536 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
537 goto err_free_rt;
538
539 flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
540 gre_build_header(skb, tunnel_hlen, flags, proto,
541 tunnel_id_to_key32(tun_info->key.tun_id), 0);
542
543 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
544
545 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
546 key->tos, key->ttl, df, false);
547 return;
548
549 err_free_rt:
550 ip_rt_put(rt);
551 err_free_skb:
552 kfree_skb(skb);
553 dev->stats.tx_dropped++;
554 }
555
556 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
557 __be16 proto)
558 {
559 struct ip_tunnel *tunnel = netdev_priv(dev);
560 struct ip_tunnel_info *tun_info;
561 const struct ip_tunnel_key *key;
562 struct erspan_metadata *md;
563 struct rtable *rt = NULL;
564 bool truncate = false;
565 struct flowi4 fl;
566 int tunnel_hlen;
567 __be16 df;
568
569 tun_info = skb_tunnel_info(skb);
570 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
571 ip_tunnel_info_af(tun_info) != AF_INET))
572 goto err_free_skb;
573
574 key = &tun_info->key;
575
576 /* ERSPAN has fixed 8 byte GRE header */
577 tunnel_hlen = 8 + sizeof(struct erspanhdr);
578
579 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
580 if (!rt)
581 return;
582
583 if (gre_handle_offloads(skb, false))
584 goto err_free_rt;
585
586 if (skb->len > dev->mtu + dev->hard_header_len) {
587 pskb_trim(skb, dev->mtu + dev->hard_header_len);
588 truncate = true;
589 }
590
591 md = ip_tunnel_info_opts(tun_info);
592 if (!md)
593 goto err_free_rt;
594
595 erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
596 ntohl(md->index), truncate);
597
598 gre_build_header(skb, 8, TUNNEL_SEQ,
599 htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
600
601 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
602
603 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
604 key->tos, key->ttl, df, false);
605 return;
606
607 err_free_rt:
608 ip_rt_put(rt);
609 err_free_skb:
610 kfree_skb(skb);
611 dev->stats.tx_dropped++;
612 }
613
614 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
615 {
616 struct ip_tunnel_info *info = skb_tunnel_info(skb);
617 struct rtable *rt;
618 struct flowi4 fl4;
619
620 if (ip_tunnel_info_af(info) != AF_INET)
621 return -EINVAL;
622
623 rt = gre_get_rt(skb, dev, &fl4, &info->key);
624 if (IS_ERR(rt))
625 return PTR_ERR(rt);
626
627 ip_rt_put(rt);
628 info->key.u.ipv4.src = fl4.saddr;
629 return 0;
630 }
631
632 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
633 struct net_device *dev)
634 {
635 struct ip_tunnel *tunnel = netdev_priv(dev);
636 const struct iphdr *tnl_params;
637
638 if (tunnel->collect_md) {
639 gre_fb_xmit(skb, dev, skb->protocol);
640 return NETDEV_TX_OK;
641 }
642
643 if (dev->header_ops) {
644 /* Need space for new headers */
645 if (skb_cow_head(skb, dev->needed_headroom -
646 (tunnel->hlen + sizeof(struct iphdr))))
647 goto free_skb;
648
649 tnl_params = (const struct iphdr *)skb->data;
650
651 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
652 * to gre header.
653 */
654 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
655 skb_reset_mac_header(skb);
656 } else {
657 if (skb_cow_head(skb, dev->needed_headroom))
658 goto free_skb;
659
660 tnl_params = &tunnel->parms.iph;
661 }
662
663 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
664 goto free_skb;
665
666 __gre_xmit(skb, dev, tnl_params, skb->protocol);
667 return NETDEV_TX_OK;
668
669 free_skb:
670 kfree_skb(skb);
671 dev->stats.tx_dropped++;
672 return NETDEV_TX_OK;
673 }
674
675 static inline u8 tos_to_cos(u8 tos)
676 {
677 u8 dscp, cos;
678
679 dscp = tos >> 2;
680 cos = dscp >> 3;
681 return cos;
682 }
683
684 static void erspan_build_header(struct sk_buff *skb,
685 __be32 id, u32 index, bool truncate)
686 {
687 struct iphdr *iphdr = ip_hdr(skb);
688 struct ethhdr *eth = eth_hdr(skb);
689 enum erspan_encap_type enc_type;
690 struct erspanhdr *ershdr;
691 struct qtag_prefix {
692 __be16 eth_type;
693 __be16 tci;
694 } *qp;
695 u16 vlan_tci = 0;
696
697 enc_type = ERSPAN_ENCAP_NOVLAN;
698
699 /* If mirrored packet has vlan tag, extract tci and
700 * perserve vlan header in the mirrored frame.
701 */
702 if (eth->h_proto == htons(ETH_P_8021Q)) {
703 qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
704 vlan_tci = ntohs(qp->tci);
705 enc_type = ERSPAN_ENCAP_INFRAME;
706 }
707
708 skb_push(skb, sizeof(*ershdr));
709 ershdr = (struct erspanhdr *)skb->data;
710 memset(ershdr, 0, sizeof(*ershdr));
711
712 ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
713 (ERSPAN_VERSION << VER_OFFSET));
714 ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) |
715 ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) |
716 (enc_type << EN_OFFSET & EN_MASK) |
717 ((truncate << T_OFFSET) & T_MASK));
718 ershdr->md.index = htonl(index & INDEX_MASK);
719 }
720
721 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
722 struct net_device *dev)
723 {
724 struct ip_tunnel *tunnel = netdev_priv(dev);
725 bool truncate = false;
726
727 if (tunnel->collect_md) {
728 erspan_fb_xmit(skb, dev, skb->protocol);
729 return NETDEV_TX_OK;
730 }
731
732 if (gre_handle_offloads(skb, false))
733 goto free_skb;
734
735 if (skb_cow_head(skb, dev->needed_headroom))
736 goto free_skb;
737
738 if (skb->len > dev->mtu + dev->hard_header_len) {
739 pskb_trim(skb, dev->mtu + dev->hard_header_len);
740 truncate = true;
741 }
742
743 /* Push ERSPAN header */
744 erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate);
745 tunnel->parms.o_flags &= ~TUNNEL_KEY;
746 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
747 return NETDEV_TX_OK;
748
749 free_skb:
750 kfree_skb(skb);
751 dev->stats.tx_dropped++;
752 return NETDEV_TX_OK;
753 }
754
755 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
756 struct net_device *dev)
757 {
758 struct ip_tunnel *tunnel = netdev_priv(dev);
759
760 if (tunnel->collect_md) {
761 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
762 return NETDEV_TX_OK;
763 }
764
765 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
766 goto free_skb;
767
768 if (skb_cow_head(skb, dev->needed_headroom))
769 goto free_skb;
770
771 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
772 return NETDEV_TX_OK;
773
774 free_skb:
775 kfree_skb(skb);
776 dev->stats.tx_dropped++;
777 return NETDEV_TX_OK;
778 }
779
780 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
781 {
782 struct ip_tunnel *tunnel = netdev_priv(dev);
783 int len;
784
785 len = tunnel->tun_hlen;
786 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
787 len = tunnel->tun_hlen - len;
788 tunnel->hlen = tunnel->hlen + len;
789
790 dev->needed_headroom = dev->needed_headroom + len;
791 if (set_mtu)
792 dev->mtu = max_t(int, dev->mtu - len, 68);
793
794 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
795 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
796 tunnel->encap.type == TUNNEL_ENCAP_NONE) {
797 dev->features |= NETIF_F_GSO_SOFTWARE;
798 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
799 } else {
800 dev->features &= ~NETIF_F_GSO_SOFTWARE;
801 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
802 }
803 dev->features |= NETIF_F_LLTX;
804 } else {
805 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
806 dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE);
807 }
808 }
809
810 static int ipgre_tunnel_ioctl(struct net_device *dev,
811 struct ifreq *ifr, int cmd)
812 {
813 struct ip_tunnel_parm p;
814 int err;
815
816 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
817 return -EFAULT;
818
819 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
820 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
821 p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) ||
822 ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING)))
823 return -EINVAL;
824 }
825
826 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
827 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
828
829 err = ip_tunnel_ioctl(dev, &p, cmd);
830 if (err)
831 return err;
832
833 if (cmd == SIOCCHGTUNNEL) {
834 struct ip_tunnel *t = netdev_priv(dev);
835
836 t->parms.i_flags = p.i_flags;
837 t->parms.o_flags = p.o_flags;
838
839 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
840 ipgre_link_update(dev, true);
841 }
842
843 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
844 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
845
846 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
847 return -EFAULT;
848
849 return 0;
850 }
851
852 /* Nice toy. Unfortunately, useless in real life :-)
853 It allows to construct virtual multiprotocol broadcast "LAN"
854 over the Internet, provided multicast routing is tuned.
855
856
857 I have no idea was this bicycle invented before me,
858 so that I had to set ARPHRD_IPGRE to a random value.
859 I have an impression, that Cisco could make something similar,
860 but this feature is apparently missing in IOS<=11.2(8).
861
862 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
863 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
864
865 ping -t 255 224.66.66.66
866
867 If nobody answers, mbone does not work.
868
869 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
870 ip addr add 10.66.66.<somewhat>/24 dev Universe
871 ifconfig Universe up
872 ifconfig Universe add fe80::<Your_real_addr>/10
873 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
874 ftp 10.66.66.66
875 ...
876 ftp fec0:6666:6666::193.233.7.65
877 ...
878 */
879 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
880 unsigned short type,
881 const void *daddr, const void *saddr, unsigned int len)
882 {
883 struct ip_tunnel *t = netdev_priv(dev);
884 struct iphdr *iph;
885 struct gre_base_hdr *greh;
886
887 iph = skb_push(skb, t->hlen + sizeof(*iph));
888 greh = (struct gre_base_hdr *)(iph+1);
889 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
890 greh->protocol = htons(type);
891
892 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
893
894 /* Set the source hardware address. */
895 if (saddr)
896 memcpy(&iph->saddr, saddr, 4);
897 if (daddr)
898 memcpy(&iph->daddr, daddr, 4);
899 if (iph->daddr)
900 return t->hlen + sizeof(*iph);
901
902 return -(t->hlen + sizeof(*iph));
903 }
904
905 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
906 {
907 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
908 memcpy(haddr, &iph->saddr, 4);
909 return 4;
910 }
911
912 static const struct header_ops ipgre_header_ops = {
913 .create = ipgre_header,
914 .parse = ipgre_header_parse,
915 };
916
917 #ifdef CONFIG_NET_IPGRE_BROADCAST
918 static int ipgre_open(struct net_device *dev)
919 {
920 struct ip_tunnel *t = netdev_priv(dev);
921
922 if (ipv4_is_multicast(t->parms.iph.daddr)) {
923 struct flowi4 fl4;
924 struct rtable *rt;
925
926 rt = ip_route_output_gre(t->net, &fl4,
927 t->parms.iph.daddr,
928 t->parms.iph.saddr,
929 t->parms.o_key,
930 RT_TOS(t->parms.iph.tos),
931 t->parms.link);
932 if (IS_ERR(rt))
933 return -EADDRNOTAVAIL;
934 dev = rt->dst.dev;
935 ip_rt_put(rt);
936 if (!__in_dev_get_rtnl(dev))
937 return -EADDRNOTAVAIL;
938 t->mlink = dev->ifindex;
939 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
940 }
941 return 0;
942 }
943
944 static int ipgre_close(struct net_device *dev)
945 {
946 struct ip_tunnel *t = netdev_priv(dev);
947
948 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
949 struct in_device *in_dev;
950 in_dev = inetdev_by_index(t->net, t->mlink);
951 if (in_dev)
952 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
953 }
954 return 0;
955 }
956 #endif
957
958 static const struct net_device_ops ipgre_netdev_ops = {
959 .ndo_init = ipgre_tunnel_init,
960 .ndo_uninit = ip_tunnel_uninit,
961 #ifdef CONFIG_NET_IPGRE_BROADCAST
962 .ndo_open = ipgre_open,
963 .ndo_stop = ipgre_close,
964 #endif
965 .ndo_start_xmit = ipgre_xmit,
966 .ndo_do_ioctl = ipgre_tunnel_ioctl,
967 .ndo_change_mtu = ip_tunnel_change_mtu,
968 .ndo_get_stats64 = ip_tunnel_get_stats64,
969 .ndo_get_iflink = ip_tunnel_get_iflink,
970 };
971
972 #define GRE_FEATURES (NETIF_F_SG | \
973 NETIF_F_FRAGLIST | \
974 NETIF_F_HIGHDMA | \
975 NETIF_F_HW_CSUM)
976
977 static void ipgre_tunnel_setup(struct net_device *dev)
978 {
979 dev->netdev_ops = &ipgre_netdev_ops;
980 dev->type = ARPHRD_IPGRE;
981 ip_tunnel_setup(dev, ipgre_net_id);
982 }
983
984 static void __gre_tunnel_init(struct net_device *dev)
985 {
986 struct ip_tunnel *tunnel;
987 int t_hlen;
988
989 tunnel = netdev_priv(dev);
990 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
991 tunnel->parms.iph.protocol = IPPROTO_GRE;
992
993 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
994
995 t_hlen = tunnel->hlen + sizeof(struct iphdr);
996
997 dev->features |= GRE_FEATURES;
998 dev->hw_features |= GRE_FEATURES;
999
1000 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
1001 /* TCP offload with GRE SEQ is not supported, nor
1002 * can we support 2 levels of outer headers requiring
1003 * an update.
1004 */
1005 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
1006 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
1007 dev->features |= NETIF_F_GSO_SOFTWARE;
1008 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1009 }
1010
1011 /* Can use a lockless transmit, unless we generate
1012 * output sequences
1013 */
1014 dev->features |= NETIF_F_LLTX;
1015 }
1016 }
1017
1018 static int ipgre_tunnel_init(struct net_device *dev)
1019 {
1020 struct ip_tunnel *tunnel = netdev_priv(dev);
1021 struct iphdr *iph = &tunnel->parms.iph;
1022
1023 __gre_tunnel_init(dev);
1024
1025 memcpy(dev->dev_addr, &iph->saddr, 4);
1026 memcpy(dev->broadcast, &iph->daddr, 4);
1027
1028 dev->flags = IFF_NOARP;
1029 netif_keep_dst(dev);
1030 dev->addr_len = 4;
1031
1032 if (iph->daddr && !tunnel->collect_md) {
1033 #ifdef CONFIG_NET_IPGRE_BROADCAST
1034 if (ipv4_is_multicast(iph->daddr)) {
1035 if (!iph->saddr)
1036 return -EINVAL;
1037 dev->flags = IFF_BROADCAST;
1038 dev->header_ops = &ipgre_header_ops;
1039 }
1040 #endif
1041 } else if (!tunnel->collect_md) {
1042 dev->header_ops = &ipgre_header_ops;
1043 }
1044
1045 return ip_tunnel_init(dev);
1046 }
1047
1048 static const struct gre_protocol ipgre_protocol = {
1049 .handler = gre_rcv,
1050 .err_handler = gre_err,
1051 };
1052
1053 static int __net_init ipgre_init_net(struct net *net)
1054 {
1055 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1056 }
1057
1058 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
1059 {
1060 ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
1061 }
1062
1063 static struct pernet_operations ipgre_net_ops = {
1064 .init = ipgre_init_net,
1065 .exit_batch = ipgre_exit_batch_net,
1066 .id = &ipgre_net_id,
1067 .size = sizeof(struct ip_tunnel_net),
1068 };
1069
1070 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1071 struct netlink_ext_ack *extack)
1072 {
1073 __be16 flags;
1074
1075 if (!data)
1076 return 0;
1077
1078 flags = 0;
1079 if (data[IFLA_GRE_IFLAGS])
1080 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1081 if (data[IFLA_GRE_OFLAGS])
1082 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1083 if (flags & (GRE_VERSION|GRE_ROUTING))
1084 return -EINVAL;
1085
1086 if (data[IFLA_GRE_COLLECT_METADATA] &&
1087 data[IFLA_GRE_ENCAP_TYPE] &&
1088 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1089 return -EINVAL;
1090
1091 return 0;
1092 }
1093
1094 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1095 struct netlink_ext_ack *extack)
1096 {
1097 __be32 daddr;
1098
1099 if (tb[IFLA_ADDRESS]) {
1100 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1101 return -EINVAL;
1102 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1103 return -EADDRNOTAVAIL;
1104 }
1105
1106 if (!data)
1107 goto out;
1108
1109 if (data[IFLA_GRE_REMOTE]) {
1110 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1111 if (!daddr)
1112 return -EINVAL;
1113 }
1114
1115 out:
1116 return ipgre_tunnel_validate(tb, data, extack);
1117 }
1118
1119 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1120 struct netlink_ext_ack *extack)
1121 {
1122 __be16 flags = 0;
1123 int ret;
1124
1125 if (!data)
1126 return 0;
1127
1128 ret = ipgre_tap_validate(tb, data, extack);
1129 if (ret)
1130 return ret;
1131
1132 /* ERSPAN should only have GRE sequence and key flag */
1133 if (data[IFLA_GRE_OFLAGS])
1134 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1135 if (data[IFLA_GRE_IFLAGS])
1136 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1137 if (!data[IFLA_GRE_COLLECT_METADATA] &&
1138 flags != (GRE_SEQ | GRE_KEY))
1139 return -EINVAL;
1140
1141 /* ERSPAN Session ID only has 10-bit. Since we reuse
1142 * 32-bit key field as ID, check it's range.
1143 */
1144 if (data[IFLA_GRE_IKEY] &&
1145 (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1146 return -EINVAL;
1147
1148 if (data[IFLA_GRE_OKEY] &&
1149 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1150 return -EINVAL;
1151
1152 return 0;
1153 }
1154
1155 static int ipgre_netlink_parms(struct net_device *dev,
1156 struct nlattr *data[],
1157 struct nlattr *tb[],
1158 struct ip_tunnel_parm *parms,
1159 __u32 *fwmark)
1160 {
1161 struct ip_tunnel *t = netdev_priv(dev);
1162
1163 memset(parms, 0, sizeof(*parms));
1164
1165 parms->iph.protocol = IPPROTO_GRE;
1166
1167 if (!data)
1168 return 0;
1169
1170 if (data[IFLA_GRE_LINK])
1171 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1172
1173 if (data[IFLA_GRE_IFLAGS])
1174 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1175
1176 if (data[IFLA_GRE_OFLAGS])
1177 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1178
1179 if (data[IFLA_GRE_IKEY])
1180 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1181
1182 if (data[IFLA_GRE_OKEY])
1183 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1184
1185 if (data[IFLA_GRE_LOCAL])
1186 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1187
1188 if (data[IFLA_GRE_REMOTE])
1189 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1190
1191 if (data[IFLA_GRE_TTL])
1192 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1193
1194 if (data[IFLA_GRE_TOS])
1195 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1196
1197 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1198 if (t->ignore_df)
1199 return -EINVAL;
1200 parms->iph.frag_off = htons(IP_DF);
1201 }
1202
1203 if (data[IFLA_GRE_COLLECT_METADATA]) {
1204 t->collect_md = true;
1205 if (dev->type == ARPHRD_IPGRE)
1206 dev->type = ARPHRD_NONE;
1207 }
1208
1209 if (data[IFLA_GRE_IGNORE_DF]) {
1210 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1211 && (parms->iph.frag_off & htons(IP_DF)))
1212 return -EINVAL;
1213 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1214 }
1215
1216 if (data[IFLA_GRE_FWMARK])
1217 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1218
1219 if (data[IFLA_GRE_ERSPAN_INDEX]) {
1220 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1221
1222 if (t->index & ~INDEX_MASK)
1223 return -EINVAL;
1224 }
1225
1226 return 0;
1227 }
1228
1229 /* This function returns true when ENCAP attributes are present in the nl msg */
1230 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1231 struct ip_tunnel_encap *ipencap)
1232 {
1233 bool ret = false;
1234
1235 memset(ipencap, 0, sizeof(*ipencap));
1236
1237 if (!data)
1238 return ret;
1239
1240 if (data[IFLA_GRE_ENCAP_TYPE]) {
1241 ret = true;
1242 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1243 }
1244
1245 if (data[IFLA_GRE_ENCAP_FLAGS]) {
1246 ret = true;
1247 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1248 }
1249
1250 if (data[IFLA_GRE_ENCAP_SPORT]) {
1251 ret = true;
1252 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1253 }
1254
1255 if (data[IFLA_GRE_ENCAP_DPORT]) {
1256 ret = true;
1257 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1258 }
1259
1260 return ret;
1261 }
1262
1263 static int gre_tap_init(struct net_device *dev)
1264 {
1265 __gre_tunnel_init(dev);
1266 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1267 netif_keep_dst(dev);
1268
1269 return ip_tunnel_init(dev);
1270 }
1271
1272 static const struct net_device_ops gre_tap_netdev_ops = {
1273 .ndo_init = gre_tap_init,
1274 .ndo_uninit = ip_tunnel_uninit,
1275 .ndo_start_xmit = gre_tap_xmit,
1276 .ndo_set_mac_address = eth_mac_addr,
1277 .ndo_validate_addr = eth_validate_addr,
1278 .ndo_change_mtu = ip_tunnel_change_mtu,
1279 .ndo_get_stats64 = ip_tunnel_get_stats64,
1280 .ndo_get_iflink = ip_tunnel_get_iflink,
1281 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1282 };
1283
1284 static int erspan_tunnel_init(struct net_device *dev)
1285 {
1286 struct ip_tunnel *tunnel = netdev_priv(dev);
1287 int t_hlen;
1288
1289 tunnel->tun_hlen = 8;
1290 tunnel->parms.iph.protocol = IPPROTO_GRE;
1291 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1292 sizeof(struct erspanhdr);
1293 t_hlen = tunnel->hlen + sizeof(struct iphdr);
1294
1295 dev->features |= GRE_FEATURES;
1296 dev->hw_features |= GRE_FEATURES;
1297 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1298 netif_keep_dst(dev);
1299
1300 return ip_tunnel_init(dev);
1301 }
1302
1303 static const struct net_device_ops erspan_netdev_ops = {
1304 .ndo_init = erspan_tunnel_init,
1305 .ndo_uninit = ip_tunnel_uninit,
1306 .ndo_start_xmit = erspan_xmit,
1307 .ndo_set_mac_address = eth_mac_addr,
1308 .ndo_validate_addr = eth_validate_addr,
1309 .ndo_change_mtu = ip_tunnel_change_mtu,
1310 .ndo_get_stats64 = ip_tunnel_get_stats64,
1311 .ndo_get_iflink = ip_tunnel_get_iflink,
1312 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1313 };
1314
1315 static void ipgre_tap_setup(struct net_device *dev)
1316 {
1317 ether_setup(dev);
1318 dev->max_mtu = 0;
1319 dev->netdev_ops = &gre_tap_netdev_ops;
1320 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1321 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1322 ip_tunnel_setup(dev, gre_tap_net_id);
1323 }
1324
1325 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1326 struct nlattr *tb[], struct nlattr *data[],
1327 struct netlink_ext_ack *extack)
1328 {
1329 struct ip_tunnel_parm p;
1330 struct ip_tunnel_encap ipencap;
1331 __u32 fwmark = 0;
1332 int err;
1333
1334 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1335 struct ip_tunnel *t = netdev_priv(dev);
1336 err = ip_tunnel_encap_setup(t, &ipencap);
1337
1338 if (err < 0)
1339 return err;
1340 }
1341
1342 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1343 if (err < 0)
1344 return err;
1345 return ip_tunnel_newlink(dev, tb, &p, fwmark);
1346 }
1347
1348 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1349 struct nlattr *data[],
1350 struct netlink_ext_ack *extack)
1351 {
1352 struct ip_tunnel *t = netdev_priv(dev);
1353 struct ip_tunnel_encap ipencap;
1354 __u32 fwmark = t->fwmark;
1355 struct ip_tunnel_parm p;
1356 int err;
1357
1358 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1359 err = ip_tunnel_encap_setup(t, &ipencap);
1360
1361 if (err < 0)
1362 return err;
1363 }
1364
1365 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1366 if (err < 0)
1367 return err;
1368
1369 err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1370 if (err < 0)
1371 return err;
1372
1373 t->parms.i_flags = p.i_flags;
1374 t->parms.o_flags = p.o_flags;
1375
1376 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
1377 ipgre_link_update(dev, !tb[IFLA_MTU]);
1378
1379 return 0;
1380 }
1381
1382 static size_t ipgre_get_size(const struct net_device *dev)
1383 {
1384 return
1385 /* IFLA_GRE_LINK */
1386 nla_total_size(4) +
1387 /* IFLA_GRE_IFLAGS */
1388 nla_total_size(2) +
1389 /* IFLA_GRE_OFLAGS */
1390 nla_total_size(2) +
1391 /* IFLA_GRE_IKEY */
1392 nla_total_size(4) +
1393 /* IFLA_GRE_OKEY */
1394 nla_total_size(4) +
1395 /* IFLA_GRE_LOCAL */
1396 nla_total_size(4) +
1397 /* IFLA_GRE_REMOTE */
1398 nla_total_size(4) +
1399 /* IFLA_GRE_TTL */
1400 nla_total_size(1) +
1401 /* IFLA_GRE_TOS */
1402 nla_total_size(1) +
1403 /* IFLA_GRE_PMTUDISC */
1404 nla_total_size(1) +
1405 /* IFLA_GRE_ENCAP_TYPE */
1406 nla_total_size(2) +
1407 /* IFLA_GRE_ENCAP_FLAGS */
1408 nla_total_size(2) +
1409 /* IFLA_GRE_ENCAP_SPORT */
1410 nla_total_size(2) +
1411 /* IFLA_GRE_ENCAP_DPORT */
1412 nla_total_size(2) +
1413 /* IFLA_GRE_COLLECT_METADATA */
1414 nla_total_size(0) +
1415 /* IFLA_GRE_IGNORE_DF */
1416 nla_total_size(1) +
1417 /* IFLA_GRE_FWMARK */
1418 nla_total_size(4) +
1419 /* IFLA_GRE_ERSPAN_INDEX */
1420 nla_total_size(4) +
1421 0;
1422 }
1423
1424 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1425 {
1426 struct ip_tunnel *t = netdev_priv(dev);
1427 struct ip_tunnel_parm *p = &t->parms;
1428
1429 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1430 nla_put_be16(skb, IFLA_GRE_IFLAGS,
1431 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1432 nla_put_be16(skb, IFLA_GRE_OFLAGS,
1433 gre_tnl_flags_to_gre_flags(p->o_flags)) ||
1434 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1435 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1436 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1437 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1438 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1439 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1440 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1441 !!(p->iph.frag_off & htons(IP_DF))) ||
1442 nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1443 goto nla_put_failure;
1444
1445 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1446 t->encap.type) ||
1447 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1448 t->encap.sport) ||
1449 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1450 t->encap.dport) ||
1451 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1452 t->encap.flags))
1453 goto nla_put_failure;
1454
1455 if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1456 goto nla_put_failure;
1457
1458 if (t->collect_md) {
1459 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1460 goto nla_put_failure;
1461 }
1462
1463 if (t->index)
1464 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1465 goto nla_put_failure;
1466
1467 return 0;
1468
1469 nla_put_failure:
1470 return -EMSGSIZE;
1471 }
1472
1473 static void erspan_setup(struct net_device *dev)
1474 {
1475 ether_setup(dev);
1476 dev->netdev_ops = &erspan_netdev_ops;
1477 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1478 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1479 ip_tunnel_setup(dev, erspan_net_id);
1480 }
1481
1482 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1483 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1484 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1485 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1486 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1487 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1488 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1489 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1490 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1491 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1492 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1493 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
1494 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
1495 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
1496 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
1497 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
1498 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
1499 [IFLA_GRE_FWMARK] = { .type = NLA_U32 },
1500 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
1501 };
1502
1503 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1504 .kind = "gre",
1505 .maxtype = IFLA_GRE_MAX,
1506 .policy = ipgre_policy,
1507 .priv_size = sizeof(struct ip_tunnel),
1508 .setup = ipgre_tunnel_setup,
1509 .validate = ipgre_tunnel_validate,
1510 .newlink = ipgre_newlink,
1511 .changelink = ipgre_changelink,
1512 .dellink = ip_tunnel_dellink,
1513 .get_size = ipgre_get_size,
1514 .fill_info = ipgre_fill_info,
1515 .get_link_net = ip_tunnel_get_link_net,
1516 };
1517
1518 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1519 .kind = "gretap",
1520 .maxtype = IFLA_GRE_MAX,
1521 .policy = ipgre_policy,
1522 .priv_size = sizeof(struct ip_tunnel),
1523 .setup = ipgre_tap_setup,
1524 .validate = ipgre_tap_validate,
1525 .newlink = ipgre_newlink,
1526 .changelink = ipgre_changelink,
1527 .dellink = ip_tunnel_dellink,
1528 .get_size = ipgre_get_size,
1529 .fill_info = ipgre_fill_info,
1530 .get_link_net = ip_tunnel_get_link_net,
1531 };
1532
1533 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1534 .kind = "erspan",
1535 .maxtype = IFLA_GRE_MAX,
1536 .policy = ipgre_policy,
1537 .priv_size = sizeof(struct ip_tunnel),
1538 .setup = erspan_setup,
1539 .validate = erspan_validate,
1540 .newlink = ipgre_newlink,
1541 .changelink = ipgre_changelink,
1542 .dellink = ip_tunnel_dellink,
1543 .get_size = ipgre_get_size,
1544 .fill_info = ipgre_fill_info,
1545 .get_link_net = ip_tunnel_get_link_net,
1546 };
1547
1548 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1549 u8 name_assign_type)
1550 {
1551 struct nlattr *tb[IFLA_MAX + 1];
1552 struct net_device *dev;
1553 LIST_HEAD(list_kill);
1554 struct ip_tunnel *t;
1555 int err;
1556
1557 memset(&tb, 0, sizeof(tb));
1558
1559 dev = rtnl_create_link(net, name, name_assign_type,
1560 &ipgre_tap_ops, tb);
1561 if (IS_ERR(dev))
1562 return dev;
1563
1564 /* Configure flow based GRE device. */
1565 t = netdev_priv(dev);
1566 t->collect_md = true;
1567
1568 err = ipgre_newlink(net, dev, tb, NULL, NULL);
1569 if (err < 0) {
1570 free_netdev(dev);
1571 return ERR_PTR(err);
1572 }
1573
1574 /* openvswitch users expect packet sizes to be unrestricted,
1575 * so set the largest MTU we can.
1576 */
1577 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1578 if (err)
1579 goto out;
1580
1581 err = rtnl_configure_link(dev, NULL);
1582 if (err < 0)
1583 goto out;
1584
1585 return dev;
1586 out:
1587 ip_tunnel_dellink(dev, &list_kill);
1588 unregister_netdevice_many(&list_kill);
1589 return ERR_PTR(err);
1590 }
1591 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1592
1593 static int __net_init ipgre_tap_init_net(struct net *net)
1594 {
1595 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1596 }
1597
1598 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
1599 {
1600 ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
1601 }
1602
1603 static struct pernet_operations ipgre_tap_net_ops = {
1604 .init = ipgre_tap_init_net,
1605 .exit_batch = ipgre_tap_exit_batch_net,
1606 .id = &gre_tap_net_id,
1607 .size = sizeof(struct ip_tunnel_net),
1608 };
1609
1610 static int __net_init erspan_init_net(struct net *net)
1611 {
1612 return ip_tunnel_init_net(net, erspan_net_id,
1613 &erspan_link_ops, "erspan0");
1614 }
1615
1616 static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
1617 {
1618 ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
1619 }
1620
1621 static struct pernet_operations erspan_net_ops = {
1622 .init = erspan_init_net,
1623 .exit_batch = erspan_exit_batch_net,
1624 .id = &erspan_net_id,
1625 .size = sizeof(struct ip_tunnel_net),
1626 };
1627
1628 static int __init ipgre_init(void)
1629 {
1630 int err;
1631
1632 pr_info("GRE over IPv4 tunneling driver\n");
1633
1634 err = register_pernet_device(&ipgre_net_ops);
1635 if (err < 0)
1636 return err;
1637
1638 err = register_pernet_device(&ipgre_tap_net_ops);
1639 if (err < 0)
1640 goto pnet_tap_failed;
1641
1642 err = register_pernet_device(&erspan_net_ops);
1643 if (err < 0)
1644 goto pnet_erspan_failed;
1645
1646 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1647 if (err < 0) {
1648 pr_info("%s: can't add protocol\n", __func__);
1649 goto add_proto_failed;
1650 }
1651
1652 err = rtnl_link_register(&ipgre_link_ops);
1653 if (err < 0)
1654 goto rtnl_link_failed;
1655
1656 err = rtnl_link_register(&ipgre_tap_ops);
1657 if (err < 0)
1658 goto tap_ops_failed;
1659
1660 err = rtnl_link_register(&erspan_link_ops);
1661 if (err < 0)
1662 goto erspan_link_failed;
1663
1664 return 0;
1665
1666 erspan_link_failed:
1667 rtnl_link_unregister(&ipgre_tap_ops);
1668 tap_ops_failed:
1669 rtnl_link_unregister(&ipgre_link_ops);
1670 rtnl_link_failed:
1671 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1672 add_proto_failed:
1673 unregister_pernet_device(&erspan_net_ops);
1674 pnet_erspan_failed:
1675 unregister_pernet_device(&ipgre_tap_net_ops);
1676 pnet_tap_failed:
1677 unregister_pernet_device(&ipgre_net_ops);
1678 return err;
1679 }
1680
1681 static void __exit ipgre_fini(void)
1682 {
1683 rtnl_link_unregister(&ipgre_tap_ops);
1684 rtnl_link_unregister(&ipgre_link_ops);
1685 rtnl_link_unregister(&erspan_link_ops);
1686 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1687 unregister_pernet_device(&ipgre_tap_net_ops);
1688 unregister_pernet_device(&ipgre_net_ops);
1689 unregister_pernet_device(&erspan_net_ops);
1690 }
1691
1692 module_init(ipgre_init);
1693 module_exit(ipgre_fini);
1694 MODULE_LICENSE("GPL");
1695 MODULE_ALIAS_RTNL_LINK("gre");
1696 MODULE_ALIAS_RTNL_LINK("gretap");
1697 MODULE_ALIAS_RTNL_LINK("erspan");
1698 MODULE_ALIAS_NETDEV("gre0");
1699 MODULE_ALIAS_NETDEV("gretap0");
1700 MODULE_ALIAS_NETDEV("erspan0");