]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/ipv4/ip_gre.c
Merge remote-tracking branches 'asoc/topic/sgtl5000', 'asoc/topic/simple', 'asoc...
[mirror_ubuntu-zesty-kernel.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
afd46503
JP
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
4fc268d2 15#include <linux/capability.h>
1da177e4
LT
16#include <linux/module.h>
17#include <linux/types.h>
1da177e4 18#include <linux/kernel.h>
5a0e3ad6 19#include <linux/slab.h>
1da177e4
LT
20#include <asm/uaccess.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
2e15ea39 27#include <linux/if_vlan.h>
1da177e4
LT
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
e1a80002 33#include <linux/etherdevice.h>
46f25dff 34#include <linux/if_ether.h>
1da177e4
LT
35
36#include <net/sock.h>
37#include <net/ip.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
c5441932 40#include <net/ip_tunnels.h>
1da177e4
LT
41#include <net/arp.h>
42#include <net/checksum.h>
43#include <net/dsfield.h>
44#include <net/inet_ecn.h>
45#include <net/xfrm.h>
59a4c759
PE
46#include <net/net_namespace.h>
47#include <net/netns/generic.h>
c19e654d 48#include <net/rtnetlink.h>
00959ade 49#include <net/gre.h>
2e15ea39 50#include <net/dst_metadata.h>
1da177e4 51
1da177e4
LT
52/*
53 Problems & solutions
54 --------------------
55
56 1. The most important issue is detecting local dead loops.
57 They would cause complete host lockup in transmit, which
58 would be "resolved" by stack overflow or, if queueing is enabled,
59 with infinite looping in net_bh.
60
61 We cannot track such dead loops during route installation,
62 it is infeasible task. The most general solutions would be
63 to keep skb->encapsulation counter (sort of local ttl),
6d0722a2 64 and silently drop packet when it expires. It is a good
bff52857 65 solution, but it supposes maintaining new variable in ALL
1da177e4
LT
66 skb, even if no tunneling is used.
67
6d0722a2
ED
68 Current solution: xmit_recursion breaks dead loops. This is a percpu
69 counter, since when we enter the first ndo_xmit(), cpu migration is
70 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
1da177e4
LT
71
72 2. Networking dead loops would not kill routers, but would really
73 kill network. IP hop limit plays role of "t->recursion" in this case,
74 if we copy it from packet being encapsulated to upper header.
75 It is very good solution, but it introduces two problems:
76
77 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
78 do not work over tunnels.
79 - traceroute does not work. I planned to relay ICMP from tunnel,
80 so that this problem would be solved and traceroute output
81 would even more informative. This idea appeared to be wrong:
82 only Linux complies to rfc1812 now (yes, guys, Linux is the only
83 true router now :-)), all routers (at least, in neighbourhood of mine)
84 return only 8 bytes of payload. It is the end.
85
86 Hence, if we want that OSPF worked or traceroute said something reasonable,
87 we should search for another solution.
88
89 One of them is to parse packet trying to detect inner encapsulation
90 made by our node. It is difficult or even impossible, especially,
bff52857 91 taking into account fragmentation. TO be short, ttl is not solution at all.
1da177e4
LT
92
93 Current solution: The solution was UNEXPECTEDLY SIMPLE.
94 We force DF flag on tunnels with preconfigured hop limit,
95 that is ALL. :-) Well, it does not remove the problem completely,
96 but exponential growth of network traffic is changed to linear
97 (branches, that exceed pmtu are pruned) and tunnel mtu
bff52857 98 rapidly degrades to value <68, where looping stops.
1da177e4
LT
99 Yes, it is not good if there exists a router in the loop,
100 which does not force DF, even when encapsulating packets have DF set.
101 But it is not our problem! Nobody could accuse us, we made
102 all that we could make. Even if it is your gated who injected
103 fatal route to network, even if it were you who configured
104 fatal static route: you are innocent. :-)
105
1da177e4
LT
106 Alexey Kuznetsov.
107 */
108
eccc1bb8 109static bool log_ecn_error = true;
110module_param(log_ecn_error, bool, 0644);
111MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
112
c19e654d 113static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1da177e4 114static int ipgre_tunnel_init(struct net_device *dev);
eb8ce741 115
f99189b1 116static int ipgre_net_id __read_mostly;
c5441932 117static int gre_tap_net_id __read_mostly;
1da177e4 118
9f57c67c
PS
119static void ipgre_err(struct sk_buff *skb, u32 info,
120 const struct tnl_ptk_info *tpi)
1da177e4 121{
1da177e4 122
c5441932
PS
123 /* All the routers (except for Linux) return only
124 8 bytes of packet payload. It means, that precise relaying of
125 ICMP in the real Internet is absolutely infeasible.
1da177e4 126
c5441932
PS
127 Moreover, Cisco "wise men" put GRE key to the third word
128 in GRE header. It makes impossible maintaining even soft
129 state for keyed GRE tunnels with enabled checksum. Tell
130 them "thank you".
1da177e4 131
c5441932
PS
132 Well, I wonder, rfc1812 was written by Cisco employee,
133 what the hell these idiots break standards established
134 by themselves???
135 */
136 struct net *net = dev_net(skb->dev);
137 struct ip_tunnel_net *itn;
96f5a846 138 const struct iphdr *iph;
88c7664f
ACM
139 const int type = icmp_hdr(skb)->type;
140 const int code = icmp_hdr(skb)->code;
20e1954f 141 unsigned int data_len = 0;
1da177e4 142 struct ip_tunnel *t;
1da177e4 143
1da177e4
LT
144 switch (type) {
145 default:
146 case ICMP_PARAMETERPROB:
9f57c67c 147 return;
1da177e4
LT
148
149 case ICMP_DEST_UNREACH:
150 switch (code) {
151 case ICMP_SR_FAILED:
152 case ICMP_PORT_UNREACH:
153 /* Impossible event. */
9f57c67c 154 return;
1da177e4
LT
155 default:
156 /* All others are translated to HOST_UNREACH.
157 rfc2003 contains "deep thoughts" about NET_UNREACH,
158 I believe they are just ether pollution. --ANK
159 */
160 break;
161 }
162 break;
9f57c67c 163
1da177e4
LT
164 case ICMP_TIME_EXCEEDED:
165 if (code != ICMP_EXC_TTL)
9f57c67c 166 return;
20e1954f 167 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
1da177e4 168 break;
55be7a9c
DM
169
170 case ICMP_REDIRECT:
171 break;
1da177e4
LT
172 }
173
bda7bb46 174 if (tpi->proto == htons(ETH_P_TEB))
c5441932
PS
175 itn = net_generic(net, gre_tap_net_id);
176 else
177 itn = net_generic(net, ipgre_net_id);
178
c0c0c50f 179 iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
bda7bb46
PS
180 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
181 iph->daddr, iph->saddr, tpi->key);
d2083287 182
51456b29 183 if (!t)
9f57c67c 184 return;
36393395 185
9b8c6d7b
ED
186#if IS_ENABLED(CONFIG_IPV6)
187 if (tpi->proto == htons(ETH_P_IPV6) &&
20e1954f
ED
188 !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
189 type, data_len))
9b8c6d7b
ED
190 return;
191#endif
192
36393395 193 if (t->parms.iph.daddr == 0 ||
f97c1e0c 194 ipv4_is_multicast(t->parms.iph.daddr))
9f57c67c 195 return;
1da177e4
LT
196
197 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
9f57c67c 198 return;
1da177e4 199
da6185d8 200 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
1da177e4
LT
201 t->err_count++;
202 else
203 t->err_count = 1;
204 t->err_time = jiffies;
9f57c67c
PS
205}
206
207static void gre_err(struct sk_buff *skb, u32 info)
208{
209 /* All the routers (except for Linux) return only
210 * 8 bytes of packet payload. It means, that precise relaying of
211 * ICMP in the real Internet is absolutely infeasible.
212 *
213 * Moreover, Cisco "wise men" put GRE key to the third word
214 * in GRE header. It makes impossible maintaining even soft
215 * state for keyed
216 * GRE tunnels with enabled checksum. Tell them "thank you".
217 *
218 * Well, I wonder, rfc1812 was written by Cisco employee,
219 * what the hell these idiots break standards established
220 * by themselves???
221 */
222
e582615a 223 const struct iphdr *iph = (struct iphdr *)skb->data;
9f57c67c
PS
224 const int type = icmp_hdr(skb)->type;
225 const int code = icmp_hdr(skb)->code;
226 struct tnl_ptk_info tpi;
227 bool csum_err = false;
228
e582615a
ED
229 if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP),
230 iph->ihl * 4) < 0) {
9f57c67c
PS
231 if (!csum_err) /* ignore csum errors. */
232 return;
233 }
234
235 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
236 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
237 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
238 return;
239 }
240 if (type == ICMP_REDIRECT) {
241 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
242 IPPROTO_GRE, 0);
243 return;
244 }
245
246 ipgre_err(skb, info, &tpi);
1da177e4
LT
247}
248
2e15ea39
PS
249static __be64 key_to_tunnel_id(__be32 key)
250{
251#ifdef __BIG_ENDIAN
252 return (__force __be64)((__force u32)key);
253#else
254 return (__force __be64)((__force u64)key << 32);
255#endif
256}
257
258/* Returns the least-significant 32 bits of a __be64. */
259static __be32 tunnel_id_to_key(__be64 x)
260{
261#ifdef __BIG_ENDIAN
262 return (__force __be32)x;
263#else
264 return (__force __be32)((__force u64)x >> 32);
265#endif
266}
267
125372fa
JB
268static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
269 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
1da177e4 270{
2e15ea39 271 struct metadata_dst *tun_dst = NULL;
b71d1d42 272 const struct iphdr *iph;
1da177e4 273 struct ip_tunnel *tunnel;
1da177e4 274
c5441932 275 iph = ip_hdr(skb);
bda7bb46
PS
276 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
277 iph->saddr, iph->daddr, tpi->key);
e1a80002 278
d2083287 279 if (tunnel) {
125372fa
JB
280 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
281 raw_proto, false) < 0)
244a797b
JB
282 goto drop;
283
e271c7b4
JB
284 if (tunnel->dev->type != ARPHRD_NONE)
285 skb_pop_mac_header(skb);
286 else
287 skb_reset_mac_header(skb);
2e15ea39 288 if (tunnel->collect_md) {
c29a70d2
PS
289 __be16 flags;
290 __be64 tun_id;
2e15ea39 291
c29a70d2
PS
292 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
293 tun_id = key_to_tunnel_id(tpi->key);
294 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
2e15ea39
PS
295 if (!tun_dst)
296 return PACKET_REJECT;
2e15ea39
PS
297 }
298
299 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
bda7bb46 300 return PACKET_RCVD;
1da177e4 301 }
125372fa 302 return PACKET_NEXT;
244a797b
JB
303
304drop:
305 kfree_skb(skb);
306 return PACKET_RCVD;
1da177e4
LT
307}
308
125372fa
JB
309static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
310 int hdr_len)
311{
312 struct net *net = dev_net(skb->dev);
313 struct ip_tunnel_net *itn;
314 int res;
315
316 if (tpi->proto == htons(ETH_P_TEB))
317 itn = net_generic(net, gre_tap_net_id);
318 else
319 itn = net_generic(net, ipgre_net_id);
320
321 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
322 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
323 /* ipgre tunnels in collect metadata mode should receive
324 * also ETH_P_TEB traffic.
325 */
326 itn = net_generic(net, ipgre_net_id);
327 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
328 }
329 return res;
330}
331
9f57c67c
PS
332static int gre_rcv(struct sk_buff *skb)
333{
334 struct tnl_ptk_info tpi;
335 bool csum_err = false;
95f5c64c 336 int hdr_len;
9f57c67c
PS
337
338#ifdef CONFIG_NET_IPGRE_BROADCAST
339 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
340 /* Looped back packet, drop it! */
341 if (rt_is_output_route(skb_rtable(skb)))
342 goto drop;
343 }
344#endif
345
e582615a 346 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
f132ae7c 347 if (hdr_len < 0)
95f5c64c
TH
348 goto drop;
349
244a797b 350 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
9f57c67c
PS
351 return 0;
352
353 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
354drop:
355 kfree_skb(skb);
356 return 0;
357}
358
c5441932
PS
359static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
360 const struct iphdr *tnl_params,
361 __be16 proto)
362{
363 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 364
c5441932
PS
365 if (tunnel->parms.o_flags & TUNNEL_SEQ)
366 tunnel->o_seqno++;
1da177e4 367
c5441932 368 /* Push GRE header. */
182a352d
TH
369 gre_build_header(skb, tunnel->tun_hlen,
370 tunnel->parms.o_flags, proto, tunnel->parms.o_key,
371 htonl(tunnel->o_seqno));
54bc9bac 372
bf3d6a8f 373 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
c5441932 374}
1da177e4 375
aed069df 376static int gre_handle_offloads(struct sk_buff *skb, bool csum)
b2acd1dc 377{
6fa79666 378 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
b2acd1dc
PS
379}
380
fc4099f1
PS
381static struct rtable *gre_get_rt(struct sk_buff *skb,
382 struct net_device *dev,
383 struct flowi4 *fl,
384 const struct ip_tunnel_key *key)
385{
386 struct net *net = dev_net(dev);
387
388 memset(fl, 0, sizeof(*fl));
389 fl->daddr = key->u.ipv4.dst;
390 fl->saddr = key->u.ipv4.src;
391 fl->flowi4_tos = RT_TOS(key->tos);
392 fl->flowi4_mark = skb->mark;
393 fl->flowi4_proto = IPPROTO_GRE;
394
395 return ip_route_output_key(net, fl);
396}
397
2090714e
JB
398static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
399 __be16 proto)
2e15ea39
PS
400{
401 struct ip_tunnel_info *tun_info;
2e15ea39 402 const struct ip_tunnel_key *key;
db3c6139 403 struct rtable *rt = NULL;
2e15ea39 404 struct flowi4 fl;
2e15ea39
PS
405 int min_headroom;
406 int tunnel_hlen;
407 __be16 df, flags;
db3c6139 408 bool use_cache;
2e15ea39
PS
409 int err;
410
61adedf3 411 tun_info = skb_tunnel_info(skb);
7f9562a1
JB
412 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
413 ip_tunnel_info_af(tun_info) != AF_INET))
2e15ea39
PS
414 goto err_free_skb;
415
416 key = &tun_info->key;
db3c6139
DB
417 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
418 if (use_cache)
419 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
3c1cb4d2
PA
420 if (!rt) {
421 rt = gre_get_rt(skb, dev, &fl, key);
422 if (IS_ERR(rt))
423 goto err_free_skb;
db3c6139 424 if (use_cache)
3c1cb4d2
PA
425 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
426 fl.saddr);
427 }
2e15ea39 428
95f5c64c 429 tunnel_hlen = gre_calc_hlen(key->tun_flags);
2e15ea39
PS
430
431 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
432 + tunnel_hlen + sizeof(struct iphdr);
433 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
434 int head_delta = SKB_DATA_ALIGN(min_headroom -
435 skb_headroom(skb) +
436 16);
437 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
438 0, GFP_ATOMIC);
439 if (unlikely(err))
440 goto err_free_rt;
441 }
442
443 /* Push Tunnel header. */
aed069df 444 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
2e15ea39 445 goto err_free_rt;
2e15ea39
PS
446
447 flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
cba65321 448 gre_build_header(skb, tunnel_hlen, flags, proto,
182a352d 449 tunnel_id_to_key(tun_info->key.tun_id), 0);
2e15ea39
PS
450
451 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
039f5062
PS
452
453 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
454 key->tos, key->ttl, df, false);
2e15ea39
PS
455 return;
456
457err_free_rt:
458 ip_rt_put(rt);
459err_free_skb:
460 kfree_skb(skb);
461 dev->stats.tx_dropped++;
462}
463
fc4099f1
PS
464static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
465{
466 struct ip_tunnel_info *info = skb_tunnel_info(skb);
467 struct rtable *rt;
468 struct flowi4 fl4;
469
470 if (ip_tunnel_info_af(info) != AF_INET)
471 return -EINVAL;
472
473 rt = gre_get_rt(skb, dev, &fl4, &info->key);
474 if (IS_ERR(rt))
475 return PTR_ERR(rt);
476
477 ip_rt_put(rt);
478 info->key.u.ipv4.src = fl4.saddr;
479 return 0;
480}
481
c5441932
PS
482static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
483 struct net_device *dev)
484{
485 struct ip_tunnel *tunnel = netdev_priv(dev);
486 const struct iphdr *tnl_params;
1da177e4 487
2e15ea39 488 if (tunnel->collect_md) {
2090714e 489 gre_fb_xmit(skb, dev, skb->protocol);
2e15ea39
PS
490 return NETDEV_TX_OK;
491 }
492
c5441932
PS
493 if (dev->header_ops) {
494 /* Need space for new headers */
495 if (skb_cow_head(skb, dev->needed_headroom -
2bac7cb3 496 (tunnel->hlen + sizeof(struct iphdr))))
c5441932 497 goto free_skb;
1da177e4 498
c5441932 499 tnl_params = (const struct iphdr *)skb->data;
1da177e4 500
c5441932
PS
501 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
502 * to gre header.
503 */
504 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
8a0033a9 505 skb_reset_mac_header(skb);
c5441932
PS
506 } else {
507 if (skb_cow_head(skb, dev->needed_headroom))
508 goto free_skb;
1da177e4 509
c5441932 510 tnl_params = &tunnel->parms.iph;
1da177e4
LT
511 }
512
aed069df
AD
513 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
514 goto free_skb;
8a0033a9 515
c5441932 516 __gre_xmit(skb, dev, tnl_params, skb->protocol);
6ed10654 517 return NETDEV_TX_OK;
1da177e4 518
c5441932 519free_skb:
3acfa1e7 520 kfree_skb(skb);
c5441932 521 dev->stats.tx_dropped++;
6ed10654 522 return NETDEV_TX_OK;
1da177e4
LT
523}
524
c5441932
PS
525static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
526 struct net_device *dev)
ee34c1eb 527{
c5441932 528 struct ip_tunnel *tunnel = netdev_priv(dev);
ee34c1eb 529
2e15ea39 530 if (tunnel->collect_md) {
2090714e 531 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
2e15ea39
PS
532 return NETDEV_TX_OK;
533 }
534
aed069df
AD
535 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
536 goto free_skb;
ee34c1eb 537
c5441932
PS
538 if (skb_cow_head(skb, dev->needed_headroom))
539 goto free_skb;
42aa9162 540
c5441932 541 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
c5441932 542 return NETDEV_TX_OK;
ee34c1eb 543
c5441932 544free_skb:
3acfa1e7 545 kfree_skb(skb);
c5441932
PS
546 dev->stats.tx_dropped++;
547 return NETDEV_TX_OK;
ee34c1eb
MS
548}
549
c5441932
PS
550static int ipgre_tunnel_ioctl(struct net_device *dev,
551 struct ifreq *ifr, int cmd)
1da177e4 552{
4565e991 553 int err;
1da177e4 554 struct ip_tunnel_parm p;
1da177e4 555
c5441932
PS
556 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
557 return -EFAULT;
6c734fb8
CW
558 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
559 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
560 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
561 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
562 return -EINVAL;
1da177e4 563 }
c5441932
PS
564 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
565 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
1da177e4 566
c5441932
PS
567 err = ip_tunnel_ioctl(dev, &p, cmd);
568 if (err)
569 return err;
1da177e4 570
95f5c64c
TH
571 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
572 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
c5441932
PS
573
574 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
575 return -EFAULT;
1da177e4
LT
576 return 0;
577}
578
1da177e4
LT
579/* Nice toy. Unfortunately, useless in real life :-)
580 It allows to construct virtual multiprotocol broadcast "LAN"
581 over the Internet, provided multicast routing is tuned.
582
583
584 I have no idea was this bicycle invented before me,
585 so that I had to set ARPHRD_IPGRE to a random value.
586 I have an impression, that Cisco could make something similar,
587 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 588
1da177e4
LT
589 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
590 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
591
592 ping -t 255 224.66.66.66
593
594 If nobody answers, mbone does not work.
595
596 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
597 ip addr add 10.66.66.<somewhat>/24 dev Universe
598 ifconfig Universe up
599 ifconfig Universe add fe80::<Your_real_addr>/10
600 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
601 ftp 10.66.66.66
602 ...
603 ftp fec0:6666:6666::193.233.7.65
604 ...
1da177e4 605 */
3b04ddde
SH
606static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
607 unsigned short type,
1507850b 608 const void *daddr, const void *saddr, unsigned int len)
1da177e4 609{
2941a486 610 struct ip_tunnel *t = netdev_priv(dev);
c5441932
PS
611 struct iphdr *iph;
612 struct gre_base_hdr *greh;
1da177e4 613
c5441932
PS
614 iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
615 greh = (struct gre_base_hdr *)(iph+1);
95f5c64c 616 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
c5441932 617 greh->protocol = htons(type);
1da177e4 618
c5441932 619 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
e905a9ed 620
c5441932 621 /* Set the source hardware address. */
1da177e4
LT
622 if (saddr)
623 memcpy(&iph->saddr, saddr, 4);
6d55cb91 624 if (daddr)
1da177e4 625 memcpy(&iph->daddr, daddr, 4);
6d55cb91 626 if (iph->daddr)
77a482bd 627 return t->hlen + sizeof(*iph);
e905a9ed 628
c5441932 629 return -(t->hlen + sizeof(*iph));
1da177e4
LT
630}
631
6a5f44d7
TT
632static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
633{
b71d1d42 634 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
6a5f44d7
TT
635 memcpy(haddr, &iph->saddr, 4);
636 return 4;
637}
638
3b04ddde
SH
639static const struct header_ops ipgre_header_ops = {
640 .create = ipgre_header,
6a5f44d7 641 .parse = ipgre_header_parse,
3b04ddde
SH
642};
643
6a5f44d7 644#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
645static int ipgre_open(struct net_device *dev)
646{
2941a486 647 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 648
f97c1e0c 649 if (ipv4_is_multicast(t->parms.iph.daddr)) {
cbb1e85f
DM
650 struct flowi4 fl4;
651 struct rtable *rt;
652
b57708ad 653 rt = ip_route_output_gre(t->net, &fl4,
cbb1e85f
DM
654 t->parms.iph.daddr,
655 t->parms.iph.saddr,
656 t->parms.o_key,
657 RT_TOS(t->parms.iph.tos),
658 t->parms.link);
b23dd4fe 659 if (IS_ERR(rt))
1da177e4 660 return -EADDRNOTAVAIL;
d8d1f30b 661 dev = rt->dst.dev;
1da177e4 662 ip_rt_put(rt);
51456b29 663 if (!__in_dev_get_rtnl(dev))
1da177e4
LT
664 return -EADDRNOTAVAIL;
665 t->mlink = dev->ifindex;
e5ed6399 666 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
667 }
668 return 0;
669}
670
671static int ipgre_close(struct net_device *dev)
672{
2941a486 673 struct ip_tunnel *t = netdev_priv(dev);
b8c26a33 674
f97c1e0c 675 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 676 struct in_device *in_dev;
b57708ad 677 in_dev = inetdev_by_index(t->net, t->mlink);
8723e1b4 678 if (in_dev)
1da177e4 679 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1da177e4
LT
680 }
681 return 0;
682}
1da177e4
LT
683#endif
684
b8c26a33
SH
685static const struct net_device_ops ipgre_netdev_ops = {
686 .ndo_init = ipgre_tunnel_init,
c5441932 687 .ndo_uninit = ip_tunnel_uninit,
b8c26a33
SH
688#ifdef CONFIG_NET_IPGRE_BROADCAST
689 .ndo_open = ipgre_open,
690 .ndo_stop = ipgre_close,
691#endif
c5441932 692 .ndo_start_xmit = ipgre_xmit,
b8c26a33 693 .ndo_do_ioctl = ipgre_tunnel_ioctl,
c5441932
PS
694 .ndo_change_mtu = ip_tunnel_change_mtu,
695 .ndo_get_stats64 = ip_tunnel_get_stats64,
1e99584b 696 .ndo_get_iflink = ip_tunnel_get_iflink,
b8c26a33
SH
697};
698
6b78f16e
ED
699#define GRE_FEATURES (NETIF_F_SG | \
700 NETIF_F_FRAGLIST | \
701 NETIF_F_HIGHDMA | \
702 NETIF_F_HW_CSUM)
703
1da177e4
LT
704static void ipgre_tunnel_setup(struct net_device *dev)
705{
b8c26a33 706 dev->netdev_ops = &ipgre_netdev_ops;
5a455275 707 dev->type = ARPHRD_IPGRE;
c5441932
PS
708 ip_tunnel_setup(dev, ipgre_net_id);
709}
1da177e4 710
c5441932
PS
711static void __gre_tunnel_init(struct net_device *dev)
712{
713 struct ip_tunnel *tunnel;
4565e991 714 int t_hlen;
c5441932
PS
715
716 tunnel = netdev_priv(dev);
95f5c64c 717 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
c5441932
PS
718 tunnel->parms.iph.protocol = IPPROTO_GRE;
719
4565e991
TH
720 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
721
722 t_hlen = tunnel->hlen + sizeof(struct iphdr);
723
724 dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
725 dev->mtu = ETH_DATA_LEN - t_hlen - 4;
6b78f16e 726
b57708ad 727 dev->features |= GRE_FEATURES;
6b78f16e 728 dev->hw_features |= GRE_FEATURES;
c5441932
PS
729
730 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
a0ca153f
AD
731 /* TCP offload with GRE SEQ is not supported, nor
732 * can we support 2 levels of outer headers requiring
733 * an update.
734 */
735 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
736 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
737 dev->features |= NETIF_F_GSO_SOFTWARE;
738 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
739 }
740
c5441932
PS
741 /* Can use a lockless transmit, unless we generate
742 * output sequences
743 */
744 dev->features |= NETIF_F_LLTX;
745 }
1da177e4
LT
746}
747
748static int ipgre_tunnel_init(struct net_device *dev)
749{
c5441932
PS
750 struct ip_tunnel *tunnel = netdev_priv(dev);
751 struct iphdr *iph = &tunnel->parms.iph;
1da177e4 752
c5441932 753 __gre_tunnel_init(dev);
1da177e4 754
c5441932
PS
755 memcpy(dev->dev_addr, &iph->saddr, 4);
756 memcpy(dev->broadcast, &iph->daddr, 4);
1da177e4 757
c5441932 758 dev->flags = IFF_NOARP;
02875878 759 netif_keep_dst(dev);
c5441932 760 dev->addr_len = 4;
1da177e4 761
a64b04d8 762 if (iph->daddr && !tunnel->collect_md) {
1da177e4 763#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 764 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
765 if (!iph->saddr)
766 return -EINVAL;
767 dev->flags = IFF_BROADCAST;
3b04ddde 768 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
769 }
770#endif
a64b04d8 771 } else if (!tunnel->collect_md) {
6a5f44d7 772 dev->header_ops = &ipgre_header_ops;
a64b04d8 773 }
1da177e4 774
c5441932 775 return ip_tunnel_init(dev);
1da177e4
LT
776}
777
9f57c67c
PS
778static const struct gre_protocol ipgre_protocol = {
779 .handler = gre_rcv,
780 .err_handler = gre_err,
1da177e4
LT
781};
782
2c8c1e72 783static int __net_init ipgre_init_net(struct net *net)
59a4c759 784{
c5441932 785 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
59a4c759
PE
786}
787
2c8c1e72 788static void __net_exit ipgre_exit_net(struct net *net)
59a4c759 789{
c5441932 790 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
6c742e71 791 ip_tunnel_delete_net(itn, &ipgre_link_ops);
59a4c759
PE
792}
793
794static struct pernet_operations ipgre_net_ops = {
795 .init = ipgre_init_net,
796 .exit = ipgre_exit_net,
cfb8fbf2 797 .id = &ipgre_net_id,
c5441932 798 .size = sizeof(struct ip_tunnel_net),
59a4c759 799};
1da177e4 800
c19e654d
HX
801static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
802{
803 __be16 flags;
804
805 if (!data)
806 return 0;
807
808 flags = 0;
809 if (data[IFLA_GRE_IFLAGS])
810 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
811 if (data[IFLA_GRE_OFLAGS])
812 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
813 if (flags & (GRE_VERSION|GRE_ROUTING))
814 return -EINVAL;
815
946b636f
JB
816 if (data[IFLA_GRE_COLLECT_METADATA] &&
817 data[IFLA_GRE_ENCAP_TYPE] &&
818 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
819 return -EINVAL;
820
c19e654d
HX
821 return 0;
822}
823
e1a80002
HX
824static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
825{
826 __be32 daddr;
827
828 if (tb[IFLA_ADDRESS]) {
829 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
830 return -EINVAL;
831 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
832 return -EADDRNOTAVAIL;
833 }
834
835 if (!data)
836 goto out;
837
838 if (data[IFLA_GRE_REMOTE]) {
839 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
840 if (!daddr)
841 return -EINVAL;
842 }
843
844out:
845 return ipgre_tunnel_validate(tb, data);
846}
847
22a59be8 848static int ipgre_netlink_parms(struct net_device *dev,
2e15ea39
PS
849 struct nlattr *data[],
850 struct nlattr *tb[],
851 struct ip_tunnel_parm *parms)
c19e654d 852{
22a59be8
PP
853 struct ip_tunnel *t = netdev_priv(dev);
854
7bb82d92 855 memset(parms, 0, sizeof(*parms));
c19e654d
HX
856
857 parms->iph.protocol = IPPROTO_GRE;
858
859 if (!data)
22a59be8 860 return 0;
c19e654d
HX
861
862 if (data[IFLA_GRE_LINK])
863 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
864
865 if (data[IFLA_GRE_IFLAGS])
c5441932 866 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
c19e654d
HX
867
868 if (data[IFLA_GRE_OFLAGS])
c5441932 869 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
c19e654d
HX
870
871 if (data[IFLA_GRE_IKEY])
872 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
873
874 if (data[IFLA_GRE_OKEY])
875 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
876
877 if (data[IFLA_GRE_LOCAL])
67b61f6c 878 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
c19e654d
HX
879
880 if (data[IFLA_GRE_REMOTE])
67b61f6c 881 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
c19e654d
HX
882
883 if (data[IFLA_GRE_TTL])
884 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
885
886 if (data[IFLA_GRE_TOS])
887 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
888
22a59be8
PP
889 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
890 if (t->ignore_df)
891 return -EINVAL;
c19e654d 892 parms->iph.frag_off = htons(IP_DF);
22a59be8 893 }
2e15ea39
PS
894
895 if (data[IFLA_GRE_COLLECT_METADATA]) {
2e15ea39 896 t->collect_md = true;
e271c7b4
JB
897 if (dev->type == ARPHRD_IPGRE)
898 dev->type = ARPHRD_NONE;
2e15ea39 899 }
22a59be8
PP
900
901 if (data[IFLA_GRE_IGNORE_DF]) {
902 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
903 && (parms->iph.frag_off & htons(IP_DF)))
904 return -EINVAL;
905 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
906 }
907
908 return 0;
c19e654d
HX
909}
910
4565e991
TH
911/* This function returns true when ENCAP attributes are present in the nl msg */
912static bool ipgre_netlink_encap_parms(struct nlattr *data[],
913 struct ip_tunnel_encap *ipencap)
914{
915 bool ret = false;
916
917 memset(ipencap, 0, sizeof(*ipencap));
918
919 if (!data)
920 return ret;
921
922 if (data[IFLA_GRE_ENCAP_TYPE]) {
923 ret = true;
924 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
925 }
926
927 if (data[IFLA_GRE_ENCAP_FLAGS]) {
928 ret = true;
929 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
930 }
931
932 if (data[IFLA_GRE_ENCAP_SPORT]) {
933 ret = true;
3e97fa70 934 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
4565e991
TH
935 }
936
937 if (data[IFLA_GRE_ENCAP_DPORT]) {
938 ret = true;
3e97fa70 939 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
4565e991
TH
940 }
941
942 return ret;
943}
944
c5441932 945static int gre_tap_init(struct net_device *dev)
e1a80002 946{
c5441932 947 __gre_tunnel_init(dev);
bec94d43 948 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
e1a80002 949
c5441932 950 return ip_tunnel_init(dev);
e1a80002
HX
951}
952
c5441932
PS
953static const struct net_device_ops gre_tap_netdev_ops = {
954 .ndo_init = gre_tap_init,
955 .ndo_uninit = ip_tunnel_uninit,
956 .ndo_start_xmit = gre_tap_xmit,
b8c26a33
SH
957 .ndo_set_mac_address = eth_mac_addr,
958 .ndo_validate_addr = eth_validate_addr,
c5441932
PS
959 .ndo_change_mtu = ip_tunnel_change_mtu,
960 .ndo_get_stats64 = ip_tunnel_get_stats64,
1e99584b 961 .ndo_get_iflink = ip_tunnel_get_iflink,
fc4099f1 962 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
b8c26a33
SH
963};
964
e1a80002
HX
965static void ipgre_tap_setup(struct net_device *dev)
966{
e1a80002 967 ether_setup(dev);
d13b161c
JB
968 dev->netdev_ops = &gre_tap_netdev_ops;
969 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
970 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
c5441932 971 ip_tunnel_setup(dev, gre_tap_net_id);
e1a80002
HX
972}
973
c5441932
PS
974static int ipgre_newlink(struct net *src_net, struct net_device *dev,
975 struct nlattr *tb[], struct nlattr *data[])
c19e654d 976{
c5441932 977 struct ip_tunnel_parm p;
4565e991 978 struct ip_tunnel_encap ipencap;
22a59be8 979 int err;
4565e991
TH
980
981 if (ipgre_netlink_encap_parms(data, &ipencap)) {
982 struct ip_tunnel *t = netdev_priv(dev);
22a59be8 983 err = ip_tunnel_encap_setup(t, &ipencap);
4565e991
TH
984
985 if (err < 0)
986 return err;
987 }
c19e654d 988
22a59be8
PP
989 err = ipgre_netlink_parms(dev, data, tb, &p);
990 if (err < 0)
991 return err;
c5441932 992 return ip_tunnel_newlink(dev, tb, &p);
c19e654d
HX
993}
994
995static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
996 struct nlattr *data[])
997{
c19e654d 998 struct ip_tunnel_parm p;
4565e991 999 struct ip_tunnel_encap ipencap;
22a59be8 1000 int err;
4565e991
TH
1001
1002 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1003 struct ip_tunnel *t = netdev_priv(dev);
22a59be8 1004 err = ip_tunnel_encap_setup(t, &ipencap);
4565e991
TH
1005
1006 if (err < 0)
1007 return err;
1008 }
c19e654d 1009
22a59be8
PP
1010 err = ipgre_netlink_parms(dev, data, tb, &p);
1011 if (err < 0)
1012 return err;
c5441932 1013 return ip_tunnel_changelink(dev, tb, &p);
c19e654d
HX
1014}
1015
1016static size_t ipgre_get_size(const struct net_device *dev)
1017{
1018 return
1019 /* IFLA_GRE_LINK */
1020 nla_total_size(4) +
1021 /* IFLA_GRE_IFLAGS */
1022 nla_total_size(2) +
1023 /* IFLA_GRE_OFLAGS */
1024 nla_total_size(2) +
1025 /* IFLA_GRE_IKEY */
1026 nla_total_size(4) +
1027 /* IFLA_GRE_OKEY */
1028 nla_total_size(4) +
1029 /* IFLA_GRE_LOCAL */
1030 nla_total_size(4) +
1031 /* IFLA_GRE_REMOTE */
1032 nla_total_size(4) +
1033 /* IFLA_GRE_TTL */
1034 nla_total_size(1) +
1035 /* IFLA_GRE_TOS */
1036 nla_total_size(1) +
1037 /* IFLA_GRE_PMTUDISC */
1038 nla_total_size(1) +
4565e991
TH
1039 /* IFLA_GRE_ENCAP_TYPE */
1040 nla_total_size(2) +
1041 /* IFLA_GRE_ENCAP_FLAGS */
1042 nla_total_size(2) +
1043 /* IFLA_GRE_ENCAP_SPORT */
1044 nla_total_size(2) +
1045 /* IFLA_GRE_ENCAP_DPORT */
1046 nla_total_size(2) +
2e15ea39
PS
1047 /* IFLA_GRE_COLLECT_METADATA */
1048 nla_total_size(0) +
22a59be8
PP
1049 /* IFLA_GRE_IGNORE_DF */
1050 nla_total_size(1) +
c19e654d
HX
1051 0;
1052}
1053
1054static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1055{
1056 struct ip_tunnel *t = netdev_priv(dev);
1057 struct ip_tunnel_parm *p = &t->parms;
1058
f3756b79 1059 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
95f5c64c
TH
1060 nla_put_be16(skb, IFLA_GRE_IFLAGS,
1061 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1062 nla_put_be16(skb, IFLA_GRE_OFLAGS,
1063 gre_tnl_flags_to_gre_flags(p->o_flags)) ||
f3756b79
DM
1064 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1065 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
930345ea
JB
1066 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1067 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
f3756b79
DM
1068 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1069 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1070 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1071 !!(p->iph.frag_off & htons(IP_DF))))
1072 goto nla_put_failure;
4565e991
TH
1073
1074 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1075 t->encap.type) ||
3e97fa70
SD
1076 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1077 t->encap.sport) ||
1078 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1079 t->encap.dport) ||
4565e991 1080 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
e1b2cb65 1081 t->encap.flags))
4565e991
TH
1082 goto nla_put_failure;
1083
22a59be8
PP
1084 if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1085 goto nla_put_failure;
1086
2e15ea39
PS
1087 if (t->collect_md) {
1088 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1089 goto nla_put_failure;
1090 }
1091
c19e654d
HX
1092 return 0;
1093
1094nla_put_failure:
1095 return -EMSGSIZE;
1096}
1097
1098static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1099 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1100 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1101 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1102 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1103 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
4d74f8ba
PM
1104 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1105 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
c19e654d
HX
1106 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1107 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1108 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
4565e991
TH
1109 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
1110 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
1111 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
1112 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
2e15ea39 1113 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
22a59be8 1114 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
c19e654d
HX
1115};
1116
1117static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1118 .kind = "gre",
1119 .maxtype = IFLA_GRE_MAX,
1120 .policy = ipgre_policy,
1121 .priv_size = sizeof(struct ip_tunnel),
1122 .setup = ipgre_tunnel_setup,
1123 .validate = ipgre_tunnel_validate,
1124 .newlink = ipgre_newlink,
1125 .changelink = ipgre_changelink,
c5441932 1126 .dellink = ip_tunnel_dellink,
c19e654d
HX
1127 .get_size = ipgre_get_size,
1128 .fill_info = ipgre_fill_info,
1728d4fa 1129 .get_link_net = ip_tunnel_get_link_net,
c19e654d
HX
1130};
1131
e1a80002
HX
1132static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1133 .kind = "gretap",
1134 .maxtype = IFLA_GRE_MAX,
1135 .policy = ipgre_policy,
1136 .priv_size = sizeof(struct ip_tunnel),
1137 .setup = ipgre_tap_setup,
1138 .validate = ipgre_tap_validate,
1139 .newlink = ipgre_newlink,
1140 .changelink = ipgre_changelink,
c5441932 1141 .dellink = ip_tunnel_dellink,
e1a80002
HX
1142 .get_size = ipgre_get_size,
1143 .fill_info = ipgre_fill_info,
1728d4fa 1144 .get_link_net = ip_tunnel_get_link_net,
e1a80002
HX
1145};
1146
b2acd1dc
PS
1147struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1148 u8 name_assign_type)
1149{
1150 struct nlattr *tb[IFLA_MAX + 1];
1151 struct net_device *dev;
106da663 1152 LIST_HEAD(list_kill);
b2acd1dc
PS
1153 struct ip_tunnel *t;
1154 int err;
1155
1156 memset(&tb, 0, sizeof(tb));
1157
1158 dev = rtnl_create_link(net, name, name_assign_type,
1159 &ipgre_tap_ops, tb);
1160 if (IS_ERR(dev))
1161 return dev;
1162
1163 /* Configure flow based GRE device. */
1164 t = netdev_priv(dev);
1165 t->collect_md = true;
1166
1167 err = ipgre_newlink(net, dev, tb, NULL);
106da663
ND
1168 if (err < 0) {
1169 free_netdev(dev);
1170 return ERR_PTR(err);
1171 }
7e059158
DW
1172
1173 /* openvswitch users expect packet sizes to be unrestricted,
1174 * so set the largest MTU we can.
1175 */
1176 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1177 if (err)
1178 goto out;
1179
da6f1da8
ND
1180 err = rtnl_configure_link(dev, NULL);
1181 if (err < 0)
1182 goto out;
1183
b2acd1dc
PS
1184 return dev;
1185out:
106da663
ND
1186 ip_tunnel_dellink(dev, &list_kill);
1187 unregister_netdevice_many(&list_kill);
b2acd1dc
PS
1188 return ERR_PTR(err);
1189}
1190EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1191
c5441932
PS
1192static int __net_init ipgre_tap_init_net(struct net *net)
1193{
2e15ea39 1194 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
c5441932
PS
1195}
1196
1197static void __net_exit ipgre_tap_exit_net(struct net *net)
1198{
1199 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
6c742e71 1200 ip_tunnel_delete_net(itn, &ipgre_tap_ops);
c5441932
PS
1201}
1202
1203static struct pernet_operations ipgre_tap_net_ops = {
1204 .init = ipgre_tap_init_net,
1205 .exit = ipgre_tap_exit_net,
1206 .id = &gre_tap_net_id,
1207 .size = sizeof(struct ip_tunnel_net),
1208};
1da177e4
LT
1209
1210static int __init ipgre_init(void)
1211{
1212 int err;
1213
058bd4d2 1214 pr_info("GRE over IPv4 tunneling driver\n");
1da177e4 1215
cfb8fbf2 1216 err = register_pernet_device(&ipgre_net_ops);
59a4c759 1217 if (err < 0)
c2892f02
AD
1218 return err;
1219
c5441932
PS
1220 err = register_pernet_device(&ipgre_tap_net_ops);
1221 if (err < 0)
1222 goto pnet_tap_faied;
1223
9f57c67c 1224 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02 1225 if (err < 0) {
058bd4d2 1226 pr_info("%s: can't add protocol\n", __func__);
c2892f02
AD
1227 goto add_proto_failed;
1228 }
7daa0004 1229
c19e654d
HX
1230 err = rtnl_link_register(&ipgre_link_ops);
1231 if (err < 0)
1232 goto rtnl_link_failed;
1233
e1a80002
HX
1234 err = rtnl_link_register(&ipgre_tap_ops);
1235 if (err < 0)
1236 goto tap_ops_failed;
1237
c5441932 1238 return 0;
c19e654d 1239
e1a80002
HX
1240tap_ops_failed:
1241 rtnl_link_unregister(&ipgre_link_ops);
c19e654d 1242rtnl_link_failed:
9f57c67c 1243 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02 1244add_proto_failed:
c5441932
PS
1245 unregister_pernet_device(&ipgre_tap_net_ops);
1246pnet_tap_faied:
c2892f02 1247 unregister_pernet_device(&ipgre_net_ops);
c5441932 1248 return err;
1da177e4
LT
1249}
1250
db44575f 1251static void __exit ipgre_fini(void)
1da177e4 1252{
e1a80002 1253 rtnl_link_unregister(&ipgre_tap_ops);
c19e654d 1254 rtnl_link_unregister(&ipgre_link_ops);
9f57c67c 1255 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
c5441932 1256 unregister_pernet_device(&ipgre_tap_net_ops);
c2892f02 1257 unregister_pernet_device(&ipgre_net_ops);
1da177e4
LT
1258}
1259
1260module_init(ipgre_init);
1261module_exit(ipgre_fini);
1262MODULE_LICENSE("GPL");
4d74f8ba
PM
1263MODULE_ALIAS_RTNL_LINK("gre");
1264MODULE_ALIAS_RTNL_LINK("gretap");
8909c9ad 1265MODULE_ALIAS_NETDEV("gre0");
c5441932 1266MODULE_ALIAS_NETDEV("gretap0");