]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - net/ipv4/ip_gre.c
geneve: add dst caching support
[mirror_ubuntu-focal-kernel.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
afd46503
JP
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
4fc268d2 15#include <linux/capability.h>
1da177e4
LT
16#include <linux/module.h>
17#include <linux/types.h>
1da177e4 18#include <linux/kernel.h>
5a0e3ad6 19#include <linux/slab.h>
1da177e4
LT
20#include <asm/uaccess.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
2e15ea39 27#include <linux/if_vlan.h>
1da177e4
LT
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
e1a80002 33#include <linux/etherdevice.h>
46f25dff 34#include <linux/if_ether.h>
1da177e4
LT
35
36#include <net/sock.h>
37#include <net/ip.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
c5441932 40#include <net/ip_tunnels.h>
1da177e4
LT
41#include <net/arp.h>
42#include <net/checksum.h>
43#include <net/dsfield.h>
44#include <net/inet_ecn.h>
45#include <net/xfrm.h>
59a4c759
PE
46#include <net/net_namespace.h>
47#include <net/netns/generic.h>
c19e654d 48#include <net/rtnetlink.h>
00959ade 49#include <net/gre.h>
2e15ea39 50#include <net/dst_metadata.h>
1da177e4 51
dfd56b8b 52#if IS_ENABLED(CONFIG_IPV6)
1da177e4
LT
53#include <net/ipv6.h>
54#include <net/ip6_fib.h>
55#include <net/ip6_route.h>
56#endif
57
58/*
59 Problems & solutions
60 --------------------
61
62 1. The most important issue is detecting local dead loops.
63 They would cause complete host lockup in transmit, which
64 would be "resolved" by stack overflow or, if queueing is enabled,
65 with infinite looping in net_bh.
66
67 We cannot track such dead loops during route installation,
68 it is infeasible task. The most general solutions would be
69 to keep skb->encapsulation counter (sort of local ttl),
6d0722a2 70 and silently drop packet when it expires. It is a good
bff52857 71 solution, but it supposes maintaining new variable in ALL
1da177e4
LT
72 skb, even if no tunneling is used.
73
6d0722a2
ED
74 Current solution: xmit_recursion breaks dead loops. This is a percpu
75 counter, since when we enter the first ndo_xmit(), cpu migration is
76 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
1da177e4
LT
77
78 2. Networking dead loops would not kill routers, but would really
79 kill network. IP hop limit plays role of "t->recursion" in this case,
80 if we copy it from packet being encapsulated to upper header.
81 It is very good solution, but it introduces two problems:
82
83 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
84 do not work over tunnels.
85 - traceroute does not work. I planned to relay ICMP from tunnel,
86 so that this problem would be solved and traceroute output
87 would even more informative. This idea appeared to be wrong:
88 only Linux complies to rfc1812 now (yes, guys, Linux is the only
89 true router now :-)), all routers (at least, in neighbourhood of mine)
90 return only 8 bytes of payload. It is the end.
91
92 Hence, if we want that OSPF worked or traceroute said something reasonable,
93 we should search for another solution.
94
95 One of them is to parse packet trying to detect inner encapsulation
96 made by our node. It is difficult or even impossible, especially,
bff52857 97 taking into account fragmentation. TO be short, ttl is not solution at all.
1da177e4
LT
98
99 Current solution: The solution was UNEXPECTEDLY SIMPLE.
100 We force DF flag on tunnels with preconfigured hop limit,
101 that is ALL. :-) Well, it does not remove the problem completely,
102 but exponential growth of network traffic is changed to linear
103 (branches, that exceed pmtu are pruned) and tunnel mtu
bff52857 104 rapidly degrades to value <68, where looping stops.
1da177e4
LT
105 Yes, it is not good if there exists a router in the loop,
106 which does not force DF, even when encapsulating packets have DF set.
107 But it is not our problem! Nobody could accuse us, we made
108 all that we could make. Even if it is your gated who injected
109 fatal route to network, even if it were you who configured
110 fatal static route: you are innocent. :-)
111
1da177e4
LT
112 Alexey Kuznetsov.
113 */
114
eccc1bb8 115static bool log_ecn_error = true;
116module_param(log_ecn_error, bool, 0644);
117MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
118
c19e654d 119static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1da177e4 120static int ipgre_tunnel_init(struct net_device *dev);
eb8ce741 121
f99189b1 122static int ipgre_net_id __read_mostly;
c5441932 123static int gre_tap_net_id __read_mostly;
1da177e4 124
9f57c67c
PS
125static int ip_gre_calc_hlen(__be16 o_flags)
126{
127 int addend = 4;
128
129 if (o_flags & TUNNEL_CSUM)
130 addend += 4;
131 if (o_flags & TUNNEL_KEY)
132 addend += 4;
133 if (o_flags & TUNNEL_SEQ)
134 addend += 4;
135 return addend;
136}
137
138static __be16 gre_flags_to_tnl_flags(__be16 flags)
139{
140 __be16 tflags = 0;
141
142 if (flags & GRE_CSUM)
143 tflags |= TUNNEL_CSUM;
144 if (flags & GRE_ROUTING)
145 tflags |= TUNNEL_ROUTING;
146 if (flags & GRE_KEY)
147 tflags |= TUNNEL_KEY;
148 if (flags & GRE_SEQ)
149 tflags |= TUNNEL_SEQ;
150 if (flags & GRE_STRICT)
151 tflags |= TUNNEL_STRICT;
152 if (flags & GRE_REC)
153 tflags |= TUNNEL_REC;
154 if (flags & GRE_VERSION)
155 tflags |= TUNNEL_VERSION;
156
157 return tflags;
158}
159
160static __be16 tnl_flags_to_gre_flags(__be16 tflags)
161{
162 __be16 flags = 0;
163
164 if (tflags & TUNNEL_CSUM)
165 flags |= GRE_CSUM;
166 if (tflags & TUNNEL_ROUTING)
167 flags |= GRE_ROUTING;
168 if (tflags & TUNNEL_KEY)
169 flags |= GRE_KEY;
170 if (tflags & TUNNEL_SEQ)
171 flags |= GRE_SEQ;
172 if (tflags & TUNNEL_STRICT)
173 flags |= GRE_STRICT;
174 if (tflags & TUNNEL_REC)
175 flags |= GRE_REC;
176 if (tflags & TUNNEL_VERSION)
177 flags |= GRE_VERSION;
178
179 return flags;
180}
181
182static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
183 bool *csum_err)
184{
185 const struct gre_base_hdr *greh;
186 __be32 *options;
187 int hdr_len;
188
189 if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
190 return -EINVAL;
191
192 greh = (struct gre_base_hdr *)skb_transport_header(skb);
193 if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
194 return -EINVAL;
195
196 tpi->flags = gre_flags_to_tnl_flags(greh->flags);
197 hdr_len = ip_gre_calc_hlen(tpi->flags);
198
199 if (!pskb_may_pull(skb, hdr_len))
200 return -EINVAL;
201
202 greh = (struct gre_base_hdr *)skb_transport_header(skb);
203 tpi->proto = greh->protocol;
204
205 options = (__be32 *)(greh + 1);
206 if (greh->flags & GRE_CSUM) {
207 if (skb_checksum_simple_validate(skb)) {
208 *csum_err = true;
209 return -EINVAL;
210 }
211
212 skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
213 null_compute_pseudo);
214 options++;
215 }
216
217 if (greh->flags & GRE_KEY) {
218 tpi->key = *options;
219 options++;
220 } else {
221 tpi->key = 0;
222 }
223 if (unlikely(greh->flags & GRE_SEQ)) {
224 tpi->seq = *options;
225 options++;
226 } else {
227 tpi->seq = 0;
228 }
229 /* WCCP version 1 and 2 protocol decoding.
230 * - Change protocol to IP
231 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
232 */
233 if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
234 tpi->proto = htons(ETH_P_IP);
235 if ((*(u8 *)options & 0xF0) != 0x40) {
236 hdr_len += 4;
237 if (!pskb_may_pull(skb, hdr_len))
238 return -EINVAL;
239 }
240 }
241 return iptunnel_pull_header(skb, hdr_len, tpi->proto);
242}
243
244static void ipgre_err(struct sk_buff *skb, u32 info,
245 const struct tnl_ptk_info *tpi)
1da177e4 246{
1da177e4 247
c5441932
PS
248 /* All the routers (except for Linux) return only
249 8 bytes of packet payload. It means, that precise relaying of
250 ICMP in the real Internet is absolutely infeasible.
1da177e4 251
c5441932
PS
252 Moreover, Cisco "wise men" put GRE key to the third word
253 in GRE header. It makes impossible maintaining even soft
254 state for keyed GRE tunnels with enabled checksum. Tell
255 them "thank you".
1da177e4 256
c5441932
PS
257 Well, I wonder, rfc1812 was written by Cisco employee,
258 what the hell these idiots break standards established
259 by themselves???
260 */
261 struct net *net = dev_net(skb->dev);
262 struct ip_tunnel_net *itn;
96f5a846 263 const struct iphdr *iph;
88c7664f
ACM
264 const int type = icmp_hdr(skb)->type;
265 const int code = icmp_hdr(skb)->code;
1da177e4 266 struct ip_tunnel *t;
1da177e4 267
1da177e4
LT
268 switch (type) {
269 default:
270 case ICMP_PARAMETERPROB:
9f57c67c 271 return;
1da177e4
LT
272
273 case ICMP_DEST_UNREACH:
274 switch (code) {
275 case ICMP_SR_FAILED:
276 case ICMP_PORT_UNREACH:
277 /* Impossible event. */
9f57c67c 278 return;
1da177e4
LT
279 default:
280 /* All others are translated to HOST_UNREACH.
281 rfc2003 contains "deep thoughts" about NET_UNREACH,
282 I believe they are just ether pollution. --ANK
283 */
284 break;
285 }
286 break;
9f57c67c 287
1da177e4
LT
288 case ICMP_TIME_EXCEEDED:
289 if (code != ICMP_EXC_TTL)
9f57c67c 290 return;
1da177e4 291 break;
55be7a9c
DM
292
293 case ICMP_REDIRECT:
294 break;
1da177e4
LT
295 }
296
bda7bb46 297 if (tpi->proto == htons(ETH_P_TEB))
c5441932
PS
298 itn = net_generic(net, gre_tap_net_id);
299 else
300 itn = net_generic(net, ipgre_net_id);
301
c0c0c50f 302 iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
bda7bb46
PS
303 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
304 iph->daddr, iph->saddr, tpi->key);
d2083287 305
51456b29 306 if (!t)
9f57c67c 307 return;
36393395 308
36393395 309 if (t->parms.iph.daddr == 0 ||
f97c1e0c 310 ipv4_is_multicast(t->parms.iph.daddr))
9f57c67c 311 return;
1da177e4
LT
312
313 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
9f57c67c 314 return;
1da177e4 315
da6185d8 316 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
1da177e4
LT
317 t->err_count++;
318 else
319 t->err_count = 1;
320 t->err_time = jiffies;
9f57c67c
PS
321}
322
323static void gre_err(struct sk_buff *skb, u32 info)
324{
325 /* All the routers (except for Linux) return only
326 * 8 bytes of packet payload. It means, that precise relaying of
327 * ICMP in the real Internet is absolutely infeasible.
328 *
329 * Moreover, Cisco "wise men" put GRE key to the third word
330 * in GRE header. It makes impossible maintaining even soft
331 * state for keyed
332 * GRE tunnels with enabled checksum. Tell them "thank you".
333 *
334 * Well, I wonder, rfc1812 was written by Cisco employee,
335 * what the hell these idiots break standards established
336 * by themselves???
337 */
338
339 const int type = icmp_hdr(skb)->type;
340 const int code = icmp_hdr(skb)->code;
341 struct tnl_ptk_info tpi;
342 bool csum_err = false;
343
344 if (parse_gre_header(skb, &tpi, &csum_err)) {
345 if (!csum_err) /* ignore csum errors. */
346 return;
347 }
348
349 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
350 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
351 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
352 return;
353 }
354 if (type == ICMP_REDIRECT) {
355 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
356 IPPROTO_GRE, 0);
357 return;
358 }
359
360 ipgre_err(skb, info, &tpi);
1da177e4
LT
361}
362
2e15ea39
PS
363static __be64 key_to_tunnel_id(__be32 key)
364{
365#ifdef __BIG_ENDIAN
366 return (__force __be64)((__force u32)key);
367#else
368 return (__force __be64)((__force u64)key << 32);
369#endif
370}
371
372/* Returns the least-significant 32 bits of a __be64. */
373static __be32 tunnel_id_to_key(__be64 x)
374{
375#ifdef __BIG_ENDIAN
376 return (__force __be32)x;
377#else
378 return (__force __be32)((__force u64)x >> 32);
379#endif
380}
381
bda7bb46 382static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
1da177e4 383{
c5441932 384 struct net *net = dev_net(skb->dev);
2e15ea39 385 struct metadata_dst *tun_dst = NULL;
c5441932 386 struct ip_tunnel_net *itn;
b71d1d42 387 const struct iphdr *iph;
1da177e4 388 struct ip_tunnel *tunnel;
1da177e4 389
bda7bb46 390 if (tpi->proto == htons(ETH_P_TEB))
c5441932
PS
391 itn = net_generic(net, gre_tap_net_id);
392 else
393 itn = net_generic(net, ipgre_net_id);
1da177e4 394
c5441932 395 iph = ip_hdr(skb);
bda7bb46
PS
396 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
397 iph->saddr, iph->daddr, tpi->key);
e1a80002 398
d2083287 399 if (tunnel) {
0e3da5bb 400 skb_pop_mac_header(skb);
2e15ea39 401 if (tunnel->collect_md) {
c29a70d2
PS
402 __be16 flags;
403 __be64 tun_id;
2e15ea39 404
c29a70d2
PS
405 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
406 tun_id = key_to_tunnel_id(tpi->key);
407 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
2e15ea39
PS
408 if (!tun_dst)
409 return PACKET_REJECT;
2e15ea39
PS
410 }
411
412 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
bda7bb46 413 return PACKET_RCVD;
1da177e4 414 }
bda7bb46 415 return PACKET_REJECT;
1da177e4
LT
416}
417
9f57c67c
PS
418static int gre_rcv(struct sk_buff *skb)
419{
420 struct tnl_ptk_info tpi;
421 bool csum_err = false;
422
423#ifdef CONFIG_NET_IPGRE_BROADCAST
424 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
425 /* Looped back packet, drop it! */
426 if (rt_is_output_route(skb_rtable(skb)))
427 goto drop;
428 }
429#endif
430
431 if (parse_gre_header(skb, &tpi, &csum_err) < 0)
432 goto drop;
433
434 if (ipgre_rcv(skb, &tpi) == PACKET_RCVD)
435 return 0;
436
437 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
438drop:
439 kfree_skb(skb);
440 return 0;
441}
442
53936107
EC
443static __sum16 gre_checksum(struct sk_buff *skb)
444{
445 __wsum csum;
446
447 if (skb->ip_summed == CHECKSUM_PARTIAL)
448 csum = lco_csum(skb);
449 else
450 csum = skb_checksum(skb, 0, skb->len, 0);
451 return csum_fold(csum);
452}
453
2e15ea39
PS
454static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
455 __be16 proto, __be32 key, __be32 seq)
456{
457 struct gre_base_hdr *greh;
458
459 skb_push(skb, hdr_len);
460
461 skb_reset_transport_header(skb);
462 greh = (struct gre_base_hdr *)skb->data;
463 greh->flags = tnl_flags_to_gre_flags(flags);
464 greh->protocol = proto;
465
466 if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) {
467 __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
468
469 if (flags & TUNNEL_SEQ) {
470 *ptr = seq;
471 ptr--;
472 }
473 if (flags & TUNNEL_KEY) {
474 *ptr = key;
475 ptr--;
476 }
477 if (flags & TUNNEL_CSUM &&
478 !(skb_shinfo(skb)->gso_type &
479 (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
480 *ptr = 0;
53936107 481 *(__sum16 *)ptr = gre_checksum(skb);
2e15ea39
PS
482 }
483 }
484}
485
c5441932
PS
486static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
487 const struct iphdr *tnl_params,
488 __be16 proto)
489{
490 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 491
c5441932
PS
492 if (tunnel->parms.o_flags & TUNNEL_SEQ)
493 tunnel->o_seqno++;
1da177e4 494
c5441932 495 /* Push GRE header. */
2e15ea39
PS
496 build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
497 proto, tunnel->parms.o_key, htonl(tunnel->o_seqno));
54bc9bac 498
2e15ea39 499 skb_set_inner_protocol(skb, proto);
bf3d6a8f 500 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
c5441932 501}
1da177e4 502
b2acd1dc
PS
503static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
504 bool csum)
505{
6fa79666 506 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
b2acd1dc
PS
507}
508
fc4099f1
PS
509static struct rtable *gre_get_rt(struct sk_buff *skb,
510 struct net_device *dev,
511 struct flowi4 *fl,
512 const struct ip_tunnel_key *key)
513{
514 struct net *net = dev_net(dev);
515
516 memset(fl, 0, sizeof(*fl));
517 fl->daddr = key->u.ipv4.dst;
518 fl->saddr = key->u.ipv4.src;
519 fl->flowi4_tos = RT_TOS(key->tos);
520 fl->flowi4_mark = skb->mark;
521 fl->flowi4_proto = IPPROTO_GRE;
522
523 return ip_route_output_key(net, fl);
524}
525
2e15ea39
PS
526static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
527{
528 struct ip_tunnel_info *tun_info;
2e15ea39
PS
529 const struct ip_tunnel_key *key;
530 struct flowi4 fl;
531 struct rtable *rt;
532 int min_headroom;
533 int tunnel_hlen;
534 __be16 df, flags;
535 int err;
536
61adedf3 537 tun_info = skb_tunnel_info(skb);
7f9562a1
JB
538 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
539 ip_tunnel_info_af(tun_info) != AF_INET))
2e15ea39
PS
540 goto err_free_skb;
541
542 key = &tun_info->key;
fc4099f1 543 rt = gre_get_rt(skb, dev, &fl, key);
2e15ea39
PS
544 if (IS_ERR(rt))
545 goto err_free_skb;
546
547 tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
548
549 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
550 + tunnel_hlen + sizeof(struct iphdr);
551 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
552 int head_delta = SKB_DATA_ALIGN(min_headroom -
553 skb_headroom(skb) +
554 16);
555 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
556 0, GFP_ATOMIC);
557 if (unlikely(err))
558 goto err_free_rt;
559 }
560
561 /* Push Tunnel header. */
562 skb = gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM));
563 if (IS_ERR(skb)) {
564 skb = NULL;
565 goto err_free_rt;
566 }
567
568 flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
569 build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB),
570 tunnel_id_to_key(tun_info->key.tun_id), 0);
571
572 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
039f5062
PS
573
574 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
575 key->tos, key->ttl, df, false);
2e15ea39
PS
576 return;
577
578err_free_rt:
579 ip_rt_put(rt);
580err_free_skb:
581 kfree_skb(skb);
582 dev->stats.tx_dropped++;
583}
584
fc4099f1
PS
585static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
586{
587 struct ip_tunnel_info *info = skb_tunnel_info(skb);
588 struct rtable *rt;
589 struct flowi4 fl4;
590
591 if (ip_tunnel_info_af(info) != AF_INET)
592 return -EINVAL;
593
594 rt = gre_get_rt(skb, dev, &fl4, &info->key);
595 if (IS_ERR(rt))
596 return PTR_ERR(rt);
597
598 ip_rt_put(rt);
599 info->key.u.ipv4.src = fl4.saddr;
600 return 0;
601}
602
c5441932
PS
603static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
604 struct net_device *dev)
605{
606 struct ip_tunnel *tunnel = netdev_priv(dev);
607 const struct iphdr *tnl_params;
1da177e4 608
2e15ea39
PS
609 if (tunnel->collect_md) {
610 gre_fb_xmit(skb, dev);
611 return NETDEV_TX_OK;
612 }
613
c5441932
PS
614 if (dev->header_ops) {
615 /* Need space for new headers */
616 if (skb_cow_head(skb, dev->needed_headroom -
2bac7cb3 617 (tunnel->hlen + sizeof(struct iphdr))))
c5441932 618 goto free_skb;
1da177e4 619
c5441932 620 tnl_params = (const struct iphdr *)skb->data;
1da177e4 621
c5441932
PS
622 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
623 * to gre header.
624 */
625 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
8a0033a9 626 skb_reset_mac_header(skb);
c5441932
PS
627 } else {
628 if (skb_cow_head(skb, dev->needed_headroom))
629 goto free_skb;
1da177e4 630
c5441932 631 tnl_params = &tunnel->parms.iph;
1da177e4
LT
632 }
633
8a0033a9
TT
634 skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
635 if (IS_ERR(skb))
636 goto out;
637
c5441932 638 __gre_xmit(skb, dev, tnl_params, skb->protocol);
6ed10654 639 return NETDEV_TX_OK;
1da177e4 640
c5441932 641free_skb:
3acfa1e7 642 kfree_skb(skb);
c5441932
PS
643out:
644 dev->stats.tx_dropped++;
6ed10654 645 return NETDEV_TX_OK;
1da177e4
LT
646}
647
c5441932
PS
648static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
649 struct net_device *dev)
ee34c1eb 650{
c5441932 651 struct ip_tunnel *tunnel = netdev_priv(dev);
ee34c1eb 652
2e15ea39
PS
653 if (tunnel->collect_md) {
654 gre_fb_xmit(skb, dev);
655 return NETDEV_TX_OK;
656 }
657
45f2e997 658 skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
c5441932
PS
659 if (IS_ERR(skb))
660 goto out;
ee34c1eb 661
c5441932
PS
662 if (skb_cow_head(skb, dev->needed_headroom))
663 goto free_skb;
42aa9162 664
c5441932 665 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
c5441932 666 return NETDEV_TX_OK;
ee34c1eb 667
c5441932 668free_skb:
3acfa1e7 669 kfree_skb(skb);
c5441932
PS
670out:
671 dev->stats.tx_dropped++;
672 return NETDEV_TX_OK;
ee34c1eb
MS
673}
674
c5441932
PS
675static int ipgre_tunnel_ioctl(struct net_device *dev,
676 struct ifreq *ifr, int cmd)
1da177e4 677{
4565e991 678 int err;
1da177e4 679 struct ip_tunnel_parm p;
1da177e4 680
c5441932
PS
681 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
682 return -EFAULT;
6c734fb8
CW
683 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
684 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
685 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
686 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
687 return -EINVAL;
1da177e4 688 }
c5441932
PS
689 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
690 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
1da177e4 691
c5441932
PS
692 err = ip_tunnel_ioctl(dev, &p, cmd);
693 if (err)
694 return err;
1da177e4 695
c5441932
PS
696 p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
697 p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
698
699 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
700 return -EFAULT;
1da177e4
LT
701 return 0;
702}
703
1da177e4
LT
704/* Nice toy. Unfortunately, useless in real life :-)
705 It allows to construct virtual multiprotocol broadcast "LAN"
706 over the Internet, provided multicast routing is tuned.
707
708
709 I have no idea was this bicycle invented before me,
710 so that I had to set ARPHRD_IPGRE to a random value.
711 I have an impression, that Cisco could make something similar,
712 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 713
1da177e4
LT
714 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
715 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
716
717 ping -t 255 224.66.66.66
718
719 If nobody answers, mbone does not work.
720
721 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
722 ip addr add 10.66.66.<somewhat>/24 dev Universe
723 ifconfig Universe up
724 ifconfig Universe add fe80::<Your_real_addr>/10
725 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
726 ftp 10.66.66.66
727 ...
728 ftp fec0:6666:6666::193.233.7.65
729 ...
1da177e4 730 */
3b04ddde
SH
731static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
732 unsigned short type,
1507850b 733 const void *daddr, const void *saddr, unsigned int len)
1da177e4 734{
2941a486 735 struct ip_tunnel *t = netdev_priv(dev);
c5441932
PS
736 struct iphdr *iph;
737 struct gre_base_hdr *greh;
1da177e4 738
c5441932
PS
739 iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
740 greh = (struct gre_base_hdr *)(iph+1);
741 greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
742 greh->protocol = htons(type);
1da177e4 743
c5441932 744 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
e905a9ed 745
c5441932 746 /* Set the source hardware address. */
1da177e4
LT
747 if (saddr)
748 memcpy(&iph->saddr, saddr, 4);
6d55cb91 749 if (daddr)
1da177e4 750 memcpy(&iph->daddr, daddr, 4);
6d55cb91 751 if (iph->daddr)
77a482bd 752 return t->hlen + sizeof(*iph);
e905a9ed 753
c5441932 754 return -(t->hlen + sizeof(*iph));
1da177e4
LT
755}
756
6a5f44d7
TT
757static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
758{
b71d1d42 759 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
6a5f44d7
TT
760 memcpy(haddr, &iph->saddr, 4);
761 return 4;
762}
763
3b04ddde
SH
764static const struct header_ops ipgre_header_ops = {
765 .create = ipgre_header,
6a5f44d7 766 .parse = ipgre_header_parse,
3b04ddde
SH
767};
768
6a5f44d7 769#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
770static int ipgre_open(struct net_device *dev)
771{
2941a486 772 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 773
f97c1e0c 774 if (ipv4_is_multicast(t->parms.iph.daddr)) {
cbb1e85f
DM
775 struct flowi4 fl4;
776 struct rtable *rt;
777
b57708ad 778 rt = ip_route_output_gre(t->net, &fl4,
cbb1e85f
DM
779 t->parms.iph.daddr,
780 t->parms.iph.saddr,
781 t->parms.o_key,
782 RT_TOS(t->parms.iph.tos),
783 t->parms.link);
b23dd4fe 784 if (IS_ERR(rt))
1da177e4 785 return -EADDRNOTAVAIL;
d8d1f30b 786 dev = rt->dst.dev;
1da177e4 787 ip_rt_put(rt);
51456b29 788 if (!__in_dev_get_rtnl(dev))
1da177e4
LT
789 return -EADDRNOTAVAIL;
790 t->mlink = dev->ifindex;
e5ed6399 791 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
792 }
793 return 0;
794}
795
796static int ipgre_close(struct net_device *dev)
797{
2941a486 798 struct ip_tunnel *t = netdev_priv(dev);
b8c26a33 799
f97c1e0c 800 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 801 struct in_device *in_dev;
b57708ad 802 in_dev = inetdev_by_index(t->net, t->mlink);
8723e1b4 803 if (in_dev)
1da177e4 804 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1da177e4
LT
805 }
806 return 0;
807}
1da177e4
LT
808#endif
809
b8c26a33
SH
810static const struct net_device_ops ipgre_netdev_ops = {
811 .ndo_init = ipgre_tunnel_init,
c5441932 812 .ndo_uninit = ip_tunnel_uninit,
b8c26a33
SH
813#ifdef CONFIG_NET_IPGRE_BROADCAST
814 .ndo_open = ipgre_open,
815 .ndo_stop = ipgre_close,
816#endif
c5441932 817 .ndo_start_xmit = ipgre_xmit,
b8c26a33 818 .ndo_do_ioctl = ipgre_tunnel_ioctl,
c5441932
PS
819 .ndo_change_mtu = ip_tunnel_change_mtu,
820 .ndo_get_stats64 = ip_tunnel_get_stats64,
1e99584b 821 .ndo_get_iflink = ip_tunnel_get_iflink,
b8c26a33
SH
822};
823
6b78f16e
ED
824#define GRE_FEATURES (NETIF_F_SG | \
825 NETIF_F_FRAGLIST | \
826 NETIF_F_HIGHDMA | \
827 NETIF_F_HW_CSUM)
828
1da177e4
LT
829static void ipgre_tunnel_setup(struct net_device *dev)
830{
b8c26a33 831 dev->netdev_ops = &ipgre_netdev_ops;
5a455275 832 dev->type = ARPHRD_IPGRE;
c5441932
PS
833 ip_tunnel_setup(dev, ipgre_net_id);
834}
1da177e4 835
c5441932
PS
836static void __gre_tunnel_init(struct net_device *dev)
837{
838 struct ip_tunnel *tunnel;
4565e991 839 int t_hlen;
c5441932
PS
840
841 tunnel = netdev_priv(dev);
4565e991 842 tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
c5441932
PS
843 tunnel->parms.iph.protocol = IPPROTO_GRE;
844
4565e991
TH
845 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
846
847 t_hlen = tunnel->hlen + sizeof(struct iphdr);
848
849 dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
850 dev->mtu = ETH_DATA_LEN - t_hlen - 4;
6b78f16e 851
b57708ad 852 dev->features |= GRE_FEATURES;
6b78f16e 853 dev->hw_features |= GRE_FEATURES;
c5441932
PS
854
855 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
856 /* TCP offload with GRE SEQ is not supported. */
857 dev->features |= NETIF_F_GSO_SOFTWARE;
858 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
859 /* Can use a lockless transmit, unless we generate
860 * output sequences
861 */
862 dev->features |= NETIF_F_LLTX;
863 }
1da177e4
LT
864}
865
866static int ipgre_tunnel_init(struct net_device *dev)
867{
c5441932
PS
868 struct ip_tunnel *tunnel = netdev_priv(dev);
869 struct iphdr *iph = &tunnel->parms.iph;
1da177e4 870
c5441932 871 __gre_tunnel_init(dev);
1da177e4 872
c5441932
PS
873 memcpy(dev->dev_addr, &iph->saddr, 4);
874 memcpy(dev->broadcast, &iph->daddr, 4);
1da177e4 875
c5441932 876 dev->flags = IFF_NOARP;
02875878 877 netif_keep_dst(dev);
c5441932 878 dev->addr_len = 4;
1da177e4 879
1da177e4 880 if (iph->daddr) {
1da177e4 881#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 882 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
883 if (!iph->saddr)
884 return -EINVAL;
885 dev->flags = IFF_BROADCAST;
3b04ddde 886 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
887 }
888#endif
ee34c1eb 889 } else
6a5f44d7 890 dev->header_ops = &ipgre_header_ops;
1da177e4 891
c5441932 892 return ip_tunnel_init(dev);
1da177e4
LT
893}
894
9f57c67c
PS
895static const struct gre_protocol ipgre_protocol = {
896 .handler = gre_rcv,
897 .err_handler = gre_err,
1da177e4
LT
898};
899
2c8c1e72 900static int __net_init ipgre_init_net(struct net *net)
59a4c759 901{
c5441932 902 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
59a4c759
PE
903}
904
2c8c1e72 905static void __net_exit ipgre_exit_net(struct net *net)
59a4c759 906{
c5441932 907 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
6c742e71 908 ip_tunnel_delete_net(itn, &ipgre_link_ops);
59a4c759
PE
909}
910
911static struct pernet_operations ipgre_net_ops = {
912 .init = ipgre_init_net,
913 .exit = ipgre_exit_net,
cfb8fbf2 914 .id = &ipgre_net_id,
c5441932 915 .size = sizeof(struct ip_tunnel_net),
59a4c759 916};
1da177e4 917
c19e654d
HX
918static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
919{
920 __be16 flags;
921
922 if (!data)
923 return 0;
924
925 flags = 0;
926 if (data[IFLA_GRE_IFLAGS])
927 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
928 if (data[IFLA_GRE_OFLAGS])
929 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
930 if (flags & (GRE_VERSION|GRE_ROUTING))
931 return -EINVAL;
932
933 return 0;
934}
935
e1a80002
HX
936static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
937{
938 __be32 daddr;
939
940 if (tb[IFLA_ADDRESS]) {
941 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
942 return -EINVAL;
943 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
944 return -EADDRNOTAVAIL;
945 }
946
947 if (!data)
948 goto out;
949
950 if (data[IFLA_GRE_REMOTE]) {
951 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
952 if (!daddr)
953 return -EINVAL;
954 }
955
956out:
957 return ipgre_tunnel_validate(tb, data);
958}
959
2e15ea39
PS
960static void ipgre_netlink_parms(struct net_device *dev,
961 struct nlattr *data[],
962 struct nlattr *tb[],
963 struct ip_tunnel_parm *parms)
c19e654d 964{
7bb82d92 965 memset(parms, 0, sizeof(*parms));
c19e654d
HX
966
967 parms->iph.protocol = IPPROTO_GRE;
968
969 if (!data)
970 return;
971
972 if (data[IFLA_GRE_LINK])
973 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
974
975 if (data[IFLA_GRE_IFLAGS])
c5441932 976 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
c19e654d
HX
977
978 if (data[IFLA_GRE_OFLAGS])
c5441932 979 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
c19e654d
HX
980
981 if (data[IFLA_GRE_IKEY])
982 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
983
984 if (data[IFLA_GRE_OKEY])
985 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
986
987 if (data[IFLA_GRE_LOCAL])
67b61f6c 988 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
c19e654d
HX
989
990 if (data[IFLA_GRE_REMOTE])
67b61f6c 991 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
c19e654d
HX
992
993 if (data[IFLA_GRE_TTL])
994 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
995
996 if (data[IFLA_GRE_TOS])
997 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
998
999 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1000 parms->iph.frag_off = htons(IP_DF);
2e15ea39
PS
1001
1002 if (data[IFLA_GRE_COLLECT_METADATA]) {
1003 struct ip_tunnel *t = netdev_priv(dev);
1004
1005 t->collect_md = true;
1006 }
c19e654d
HX
1007}
1008
4565e991
TH
1009/* This function returns true when ENCAP attributes are present in the nl msg */
1010static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1011 struct ip_tunnel_encap *ipencap)
1012{
1013 bool ret = false;
1014
1015 memset(ipencap, 0, sizeof(*ipencap));
1016
1017 if (!data)
1018 return ret;
1019
1020 if (data[IFLA_GRE_ENCAP_TYPE]) {
1021 ret = true;
1022 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1023 }
1024
1025 if (data[IFLA_GRE_ENCAP_FLAGS]) {
1026 ret = true;
1027 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1028 }
1029
1030 if (data[IFLA_GRE_ENCAP_SPORT]) {
1031 ret = true;
3e97fa70 1032 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
4565e991
TH
1033 }
1034
1035 if (data[IFLA_GRE_ENCAP_DPORT]) {
1036 ret = true;
3e97fa70 1037 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
4565e991
TH
1038 }
1039
1040 return ret;
1041}
1042
c5441932 1043static int gre_tap_init(struct net_device *dev)
e1a80002 1044{
c5441932 1045 __gre_tunnel_init(dev);
bec94d43 1046 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
e1a80002 1047
c5441932 1048 return ip_tunnel_init(dev);
e1a80002
HX
1049}
1050
c5441932
PS
1051static const struct net_device_ops gre_tap_netdev_ops = {
1052 .ndo_init = gre_tap_init,
1053 .ndo_uninit = ip_tunnel_uninit,
1054 .ndo_start_xmit = gre_tap_xmit,
b8c26a33
SH
1055 .ndo_set_mac_address = eth_mac_addr,
1056 .ndo_validate_addr = eth_validate_addr,
c5441932
PS
1057 .ndo_change_mtu = ip_tunnel_change_mtu,
1058 .ndo_get_stats64 = ip_tunnel_get_stats64,
1e99584b 1059 .ndo_get_iflink = ip_tunnel_get_iflink,
fc4099f1 1060 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
b8c26a33
SH
1061};
1062
e1a80002
HX
1063static void ipgre_tap_setup(struct net_device *dev)
1064{
e1a80002 1065 ether_setup(dev);
c5441932 1066 dev->netdev_ops = &gre_tap_netdev_ops;
f8c1b7ce 1067 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
c5441932 1068 ip_tunnel_setup(dev, gre_tap_net_id);
e1a80002
HX
1069}
1070
c5441932
PS
1071static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1072 struct nlattr *tb[], struct nlattr *data[])
c19e654d 1073{
c5441932 1074 struct ip_tunnel_parm p;
4565e991
TH
1075 struct ip_tunnel_encap ipencap;
1076
1077 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1078 struct ip_tunnel *t = netdev_priv(dev);
1079 int err = ip_tunnel_encap_setup(t, &ipencap);
1080
1081 if (err < 0)
1082 return err;
1083 }
c19e654d 1084
2e15ea39 1085 ipgre_netlink_parms(dev, data, tb, &p);
c5441932 1086 return ip_tunnel_newlink(dev, tb, &p);
c19e654d
HX
1087}
1088
1089static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1090 struct nlattr *data[])
1091{
c19e654d 1092 struct ip_tunnel_parm p;
4565e991
TH
1093 struct ip_tunnel_encap ipencap;
1094
1095 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1096 struct ip_tunnel *t = netdev_priv(dev);
1097 int err = ip_tunnel_encap_setup(t, &ipencap);
1098
1099 if (err < 0)
1100 return err;
1101 }
c19e654d 1102
2e15ea39 1103 ipgre_netlink_parms(dev, data, tb, &p);
c5441932 1104 return ip_tunnel_changelink(dev, tb, &p);
c19e654d
HX
1105}
1106
1107static size_t ipgre_get_size(const struct net_device *dev)
1108{
1109 return
1110 /* IFLA_GRE_LINK */
1111 nla_total_size(4) +
1112 /* IFLA_GRE_IFLAGS */
1113 nla_total_size(2) +
1114 /* IFLA_GRE_OFLAGS */
1115 nla_total_size(2) +
1116 /* IFLA_GRE_IKEY */
1117 nla_total_size(4) +
1118 /* IFLA_GRE_OKEY */
1119 nla_total_size(4) +
1120 /* IFLA_GRE_LOCAL */
1121 nla_total_size(4) +
1122 /* IFLA_GRE_REMOTE */
1123 nla_total_size(4) +
1124 /* IFLA_GRE_TTL */
1125 nla_total_size(1) +
1126 /* IFLA_GRE_TOS */
1127 nla_total_size(1) +
1128 /* IFLA_GRE_PMTUDISC */
1129 nla_total_size(1) +
4565e991
TH
1130 /* IFLA_GRE_ENCAP_TYPE */
1131 nla_total_size(2) +
1132 /* IFLA_GRE_ENCAP_FLAGS */
1133 nla_total_size(2) +
1134 /* IFLA_GRE_ENCAP_SPORT */
1135 nla_total_size(2) +
1136 /* IFLA_GRE_ENCAP_DPORT */
1137 nla_total_size(2) +
2e15ea39
PS
1138 /* IFLA_GRE_COLLECT_METADATA */
1139 nla_total_size(0) +
c19e654d
HX
1140 0;
1141}
1142
1143static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1144{
1145 struct ip_tunnel *t = netdev_priv(dev);
1146 struct ip_tunnel_parm *p = &t->parms;
1147
f3756b79 1148 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
c5441932
PS
1149 nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
1150 nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
f3756b79
DM
1151 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1152 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
930345ea
JB
1153 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1154 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
f3756b79
DM
1155 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1156 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1157 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1158 !!(p->iph.frag_off & htons(IP_DF))))
1159 goto nla_put_failure;
4565e991
TH
1160
1161 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1162 t->encap.type) ||
3e97fa70
SD
1163 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1164 t->encap.sport) ||
1165 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1166 t->encap.dport) ||
4565e991 1167 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
e1b2cb65 1168 t->encap.flags))
4565e991
TH
1169 goto nla_put_failure;
1170
2e15ea39
PS
1171 if (t->collect_md) {
1172 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1173 goto nla_put_failure;
1174 }
1175
c19e654d
HX
1176 return 0;
1177
1178nla_put_failure:
1179 return -EMSGSIZE;
1180}
1181
1182static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1183 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1184 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1185 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1186 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1187 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
4d74f8ba
PM
1188 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1189 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
c19e654d
HX
1190 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1191 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1192 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
4565e991
TH
1193 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
1194 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
1195 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
1196 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
2e15ea39 1197 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
c19e654d
HX
1198};
1199
1200static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1201 .kind = "gre",
1202 .maxtype = IFLA_GRE_MAX,
1203 .policy = ipgre_policy,
1204 .priv_size = sizeof(struct ip_tunnel),
1205 .setup = ipgre_tunnel_setup,
1206 .validate = ipgre_tunnel_validate,
1207 .newlink = ipgre_newlink,
1208 .changelink = ipgre_changelink,
c5441932 1209 .dellink = ip_tunnel_dellink,
c19e654d
HX
1210 .get_size = ipgre_get_size,
1211 .fill_info = ipgre_fill_info,
1728d4fa 1212 .get_link_net = ip_tunnel_get_link_net,
c19e654d
HX
1213};
1214
e1a80002
HX
1215static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1216 .kind = "gretap",
1217 .maxtype = IFLA_GRE_MAX,
1218 .policy = ipgre_policy,
1219 .priv_size = sizeof(struct ip_tunnel),
1220 .setup = ipgre_tap_setup,
1221 .validate = ipgre_tap_validate,
1222 .newlink = ipgre_newlink,
1223 .changelink = ipgre_changelink,
c5441932 1224 .dellink = ip_tunnel_dellink,
e1a80002
HX
1225 .get_size = ipgre_get_size,
1226 .fill_info = ipgre_fill_info,
1728d4fa 1227 .get_link_net = ip_tunnel_get_link_net,
e1a80002
HX
1228};
1229
b2acd1dc
PS
1230struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1231 u8 name_assign_type)
1232{
1233 struct nlattr *tb[IFLA_MAX + 1];
1234 struct net_device *dev;
1235 struct ip_tunnel *t;
1236 int err;
1237
1238 memset(&tb, 0, sizeof(tb));
1239
1240 dev = rtnl_create_link(net, name, name_assign_type,
1241 &ipgre_tap_ops, tb);
1242 if (IS_ERR(dev))
1243 return dev;
1244
1245 /* Configure flow based GRE device. */
1246 t = netdev_priv(dev);
1247 t->collect_md = true;
1248
1249 err = ipgre_newlink(net, dev, tb, NULL);
1250 if (err < 0)
1251 goto out;
1252 return dev;
1253out:
1254 free_netdev(dev);
1255 return ERR_PTR(err);
1256}
1257EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1258
c5441932
PS
1259static int __net_init ipgre_tap_init_net(struct net *net)
1260{
2e15ea39 1261 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
c5441932
PS
1262}
1263
1264static void __net_exit ipgre_tap_exit_net(struct net *net)
1265{
1266 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
6c742e71 1267 ip_tunnel_delete_net(itn, &ipgre_tap_ops);
c5441932
PS
1268}
1269
1270static struct pernet_operations ipgre_tap_net_ops = {
1271 .init = ipgre_tap_init_net,
1272 .exit = ipgre_tap_exit_net,
1273 .id = &gre_tap_net_id,
1274 .size = sizeof(struct ip_tunnel_net),
1275};
1da177e4
LT
1276
1277static int __init ipgre_init(void)
1278{
1279 int err;
1280
058bd4d2 1281 pr_info("GRE over IPv4 tunneling driver\n");
1da177e4 1282
cfb8fbf2 1283 err = register_pernet_device(&ipgre_net_ops);
59a4c759 1284 if (err < 0)
c2892f02
AD
1285 return err;
1286
c5441932
PS
1287 err = register_pernet_device(&ipgre_tap_net_ops);
1288 if (err < 0)
1289 goto pnet_tap_faied;
1290
9f57c67c 1291 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02 1292 if (err < 0) {
058bd4d2 1293 pr_info("%s: can't add protocol\n", __func__);
c2892f02
AD
1294 goto add_proto_failed;
1295 }
7daa0004 1296
c19e654d
HX
1297 err = rtnl_link_register(&ipgre_link_ops);
1298 if (err < 0)
1299 goto rtnl_link_failed;
1300
e1a80002
HX
1301 err = rtnl_link_register(&ipgre_tap_ops);
1302 if (err < 0)
1303 goto tap_ops_failed;
1304
c5441932 1305 return 0;
c19e654d 1306
e1a80002
HX
1307tap_ops_failed:
1308 rtnl_link_unregister(&ipgre_link_ops);
c19e654d 1309rtnl_link_failed:
9f57c67c 1310 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02 1311add_proto_failed:
c5441932
PS
1312 unregister_pernet_device(&ipgre_tap_net_ops);
1313pnet_tap_faied:
c2892f02 1314 unregister_pernet_device(&ipgre_net_ops);
c5441932 1315 return err;
1da177e4
LT
1316}
1317
db44575f 1318static void __exit ipgre_fini(void)
1da177e4 1319{
e1a80002 1320 rtnl_link_unregister(&ipgre_tap_ops);
c19e654d 1321 rtnl_link_unregister(&ipgre_link_ops);
9f57c67c 1322 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
c5441932 1323 unregister_pernet_device(&ipgre_tap_net_ops);
c2892f02 1324 unregister_pernet_device(&ipgre_net_ops);
1da177e4
LT
1325}
1326
1327module_init(ipgre_init);
1328module_exit(ipgre_fini);
1329MODULE_LICENSE("GPL");
4d74f8ba
PM
1330MODULE_ALIAS_RTNL_LINK("gre");
1331MODULE_ALIAS_RTNL_LINK("gretap");
8909c9ad 1332MODULE_ALIAS_NETDEV("gre0");
c5441932 1333MODULE_ALIAS_NETDEV("gretap0");