]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - net/ipv4/ip_gre.c
Merge tag 'nfc-next-4.7-1' of git://git.kernel.org/pub/scm/linux/kernel/git/sameo...
[mirror_ubuntu-hirsute-kernel.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
afd46503
JP
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
4fc268d2 15#include <linux/capability.h>
1da177e4
LT
16#include <linux/module.h>
17#include <linux/types.h>
1da177e4 18#include <linux/kernel.h>
5a0e3ad6 19#include <linux/slab.h>
1da177e4
LT
20#include <asm/uaccess.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
2e15ea39 27#include <linux/if_vlan.h>
1da177e4
LT
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
e1a80002 33#include <linux/etherdevice.h>
46f25dff 34#include <linux/if_ether.h>
1da177e4
LT
35
36#include <net/sock.h>
37#include <net/ip.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
c5441932 40#include <net/ip_tunnels.h>
1da177e4
LT
41#include <net/arp.h>
42#include <net/checksum.h>
43#include <net/dsfield.h>
44#include <net/inet_ecn.h>
45#include <net/xfrm.h>
59a4c759
PE
46#include <net/net_namespace.h>
47#include <net/netns/generic.h>
c19e654d 48#include <net/rtnetlink.h>
00959ade 49#include <net/gre.h>
2e15ea39 50#include <net/dst_metadata.h>
1da177e4 51
dfd56b8b 52#if IS_ENABLED(CONFIG_IPV6)
1da177e4
LT
53#include <net/ipv6.h>
54#include <net/ip6_fib.h>
55#include <net/ip6_route.h>
56#endif
57
58/*
59 Problems & solutions
60 --------------------
61
62 1. The most important issue is detecting local dead loops.
63 They would cause complete host lockup in transmit, which
64 would be "resolved" by stack overflow or, if queueing is enabled,
65 with infinite looping in net_bh.
66
67 We cannot track such dead loops during route installation,
68 it is infeasible task. The most general solutions would be
69 to keep skb->encapsulation counter (sort of local ttl),
6d0722a2 70 and silently drop packet when it expires. It is a good
bff52857 71 solution, but it supposes maintaining new variable in ALL
1da177e4
LT
72 skb, even if no tunneling is used.
73
6d0722a2
ED
74 Current solution: xmit_recursion breaks dead loops. This is a percpu
75 counter, since when we enter the first ndo_xmit(), cpu migration is
76 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
1da177e4
LT
77
78 2. Networking dead loops would not kill routers, but would really
79 kill network. IP hop limit plays role of "t->recursion" in this case,
80 if we copy it from packet being encapsulated to upper header.
81 It is very good solution, but it introduces two problems:
82
83 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
84 do not work over tunnels.
85 - traceroute does not work. I planned to relay ICMP from tunnel,
86 so that this problem would be solved and traceroute output
87 would even more informative. This idea appeared to be wrong:
88 only Linux complies to rfc1812 now (yes, guys, Linux is the only
89 true router now :-)), all routers (at least, in neighbourhood of mine)
90 return only 8 bytes of payload. It is the end.
91
92 Hence, if we want that OSPF worked or traceroute said something reasonable,
93 we should search for another solution.
94
95 One of them is to parse packet trying to detect inner encapsulation
96 made by our node. It is difficult or even impossible, especially,
bff52857 97 taking into account fragmentation. TO be short, ttl is not solution at all.
1da177e4
LT
98
99 Current solution: The solution was UNEXPECTEDLY SIMPLE.
100 We force DF flag on tunnels with preconfigured hop limit,
101 that is ALL. :-) Well, it does not remove the problem completely,
102 but exponential growth of network traffic is changed to linear
103 (branches, that exceed pmtu are pruned) and tunnel mtu
bff52857 104 rapidly degrades to value <68, where looping stops.
1da177e4
LT
105 Yes, it is not good if there exists a router in the loop,
106 which does not force DF, even when encapsulating packets have DF set.
107 But it is not our problem! Nobody could accuse us, we made
108 all that we could make. Even if it is your gated who injected
109 fatal route to network, even if it were you who configured
110 fatal static route: you are innocent. :-)
111
1da177e4
LT
112 Alexey Kuznetsov.
113 */
114
eccc1bb8 115static bool log_ecn_error = true;
116module_param(log_ecn_error, bool, 0644);
117MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
118
c19e654d 119static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1da177e4 120static int ipgre_tunnel_init(struct net_device *dev);
eb8ce741 121
f99189b1 122static int ipgre_net_id __read_mostly;
c5441932 123static int gre_tap_net_id __read_mostly;
1da177e4 124
9f57c67c
PS
125static void ipgre_err(struct sk_buff *skb, u32 info,
126 const struct tnl_ptk_info *tpi)
1da177e4 127{
1da177e4 128
c5441932
PS
129 /* All the routers (except for Linux) return only
130 8 bytes of packet payload. It means, that precise relaying of
131 ICMP in the real Internet is absolutely infeasible.
1da177e4 132
c5441932
PS
133 Moreover, Cisco "wise men" put GRE key to the third word
134 in GRE header. It makes impossible maintaining even soft
135 state for keyed GRE tunnels with enabled checksum. Tell
136 them "thank you".
1da177e4 137
c5441932
PS
138 Well, I wonder, rfc1812 was written by Cisco employee,
139 what the hell these idiots break standards established
140 by themselves???
141 */
142 struct net *net = dev_net(skb->dev);
143 struct ip_tunnel_net *itn;
96f5a846 144 const struct iphdr *iph;
88c7664f
ACM
145 const int type = icmp_hdr(skb)->type;
146 const int code = icmp_hdr(skb)->code;
1da177e4 147 struct ip_tunnel *t;
1da177e4 148
1da177e4
LT
149 switch (type) {
150 default:
151 case ICMP_PARAMETERPROB:
9f57c67c 152 return;
1da177e4
LT
153
154 case ICMP_DEST_UNREACH:
155 switch (code) {
156 case ICMP_SR_FAILED:
157 case ICMP_PORT_UNREACH:
158 /* Impossible event. */
9f57c67c 159 return;
1da177e4
LT
160 default:
161 /* All others are translated to HOST_UNREACH.
162 rfc2003 contains "deep thoughts" about NET_UNREACH,
163 I believe they are just ether pollution. --ANK
164 */
165 break;
166 }
167 break;
9f57c67c 168
1da177e4
LT
169 case ICMP_TIME_EXCEEDED:
170 if (code != ICMP_EXC_TTL)
9f57c67c 171 return;
1da177e4 172 break;
55be7a9c
DM
173
174 case ICMP_REDIRECT:
175 break;
1da177e4
LT
176 }
177
bda7bb46 178 if (tpi->proto == htons(ETH_P_TEB))
c5441932
PS
179 itn = net_generic(net, gre_tap_net_id);
180 else
181 itn = net_generic(net, ipgre_net_id);
182
c0c0c50f 183 iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
bda7bb46
PS
184 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
185 iph->daddr, iph->saddr, tpi->key);
d2083287 186
51456b29 187 if (!t)
9f57c67c 188 return;
36393395 189
36393395 190 if (t->parms.iph.daddr == 0 ||
f97c1e0c 191 ipv4_is_multicast(t->parms.iph.daddr))
9f57c67c 192 return;
1da177e4
LT
193
194 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
9f57c67c 195 return;
1da177e4 196
da6185d8 197 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
1da177e4
LT
198 t->err_count++;
199 else
200 t->err_count = 1;
201 t->err_time = jiffies;
9f57c67c
PS
202}
203
204static void gre_err(struct sk_buff *skb, u32 info)
205{
206 /* All the routers (except for Linux) return only
207 * 8 bytes of packet payload. It means, that precise relaying of
208 * ICMP in the real Internet is absolutely infeasible.
209 *
210 * Moreover, Cisco "wise men" put GRE key to the third word
211 * in GRE header. It makes impossible maintaining even soft
212 * state for keyed
213 * GRE tunnels with enabled checksum. Tell them "thank you".
214 *
215 * Well, I wonder, rfc1812 was written by Cisco employee,
216 * what the hell these idiots break standards established
217 * by themselves???
218 */
219
220 const int type = icmp_hdr(skb)->type;
221 const int code = icmp_hdr(skb)->code;
222 struct tnl_ptk_info tpi;
223 bool csum_err = false;
224
f132ae7c 225 if (gre_parse_header(skb, &tpi, &csum_err) < 0) {
9f57c67c
PS
226 if (!csum_err) /* ignore csum errors. */
227 return;
228 }
229
230 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
231 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
232 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
233 return;
234 }
235 if (type == ICMP_REDIRECT) {
236 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
237 IPPROTO_GRE, 0);
238 return;
239 }
240
241 ipgre_err(skb, info, &tpi);
1da177e4
LT
242}
243
2e15ea39
PS
244static __be64 key_to_tunnel_id(__be32 key)
245{
246#ifdef __BIG_ENDIAN
247 return (__force __be64)((__force u32)key);
248#else
249 return (__force __be64)((__force u64)key << 32);
250#endif
251}
252
253/* Returns the least-significant 32 bits of a __be64. */
254static __be32 tunnel_id_to_key(__be64 x)
255{
256#ifdef __BIG_ENDIAN
257 return (__force __be32)x;
258#else
259 return (__force __be32)((__force u64)x >> 32);
260#endif
261}
262
125372fa
JB
263static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
264 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
1da177e4 265{
2e15ea39 266 struct metadata_dst *tun_dst = NULL;
b71d1d42 267 const struct iphdr *iph;
1da177e4 268 struct ip_tunnel *tunnel;
1da177e4 269
c5441932 270 iph = ip_hdr(skb);
bda7bb46
PS
271 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
272 iph->saddr, iph->daddr, tpi->key);
e1a80002 273
d2083287 274 if (tunnel) {
125372fa
JB
275 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
276 raw_proto, false) < 0)
244a797b
JB
277 goto drop;
278
0e3da5bb 279 skb_pop_mac_header(skb);
2e15ea39 280 if (tunnel->collect_md) {
c29a70d2
PS
281 __be16 flags;
282 __be64 tun_id;
2e15ea39 283
c29a70d2
PS
284 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
285 tun_id = key_to_tunnel_id(tpi->key);
286 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
2e15ea39
PS
287 if (!tun_dst)
288 return PACKET_REJECT;
2e15ea39
PS
289 }
290
291 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
bda7bb46 292 return PACKET_RCVD;
1da177e4 293 }
125372fa 294 return PACKET_NEXT;
244a797b
JB
295
296drop:
297 kfree_skb(skb);
298 return PACKET_RCVD;
1da177e4
LT
299}
300
125372fa
JB
301static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
302 int hdr_len)
303{
304 struct net *net = dev_net(skb->dev);
305 struct ip_tunnel_net *itn;
306 int res;
307
308 if (tpi->proto == htons(ETH_P_TEB))
309 itn = net_generic(net, gre_tap_net_id);
310 else
311 itn = net_generic(net, ipgre_net_id);
312
313 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
314 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
315 /* ipgre tunnels in collect metadata mode should receive
316 * also ETH_P_TEB traffic.
317 */
318 itn = net_generic(net, ipgre_net_id);
319 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
320 }
321 return res;
322}
323
9f57c67c
PS
324static int gre_rcv(struct sk_buff *skb)
325{
326 struct tnl_ptk_info tpi;
327 bool csum_err = false;
95f5c64c 328 int hdr_len;
9f57c67c
PS
329
330#ifdef CONFIG_NET_IPGRE_BROADCAST
331 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
332 /* Looped back packet, drop it! */
333 if (rt_is_output_route(skb_rtable(skb)))
334 goto drop;
335 }
336#endif
337
f132ae7c
JB
338 hdr_len = gre_parse_header(skb, &tpi, &csum_err);
339 if (hdr_len < 0)
95f5c64c
TH
340 goto drop;
341
244a797b 342 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
9f57c67c
PS
343 return 0;
344
345 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
346drop:
347 kfree_skb(skb);
348 return 0;
349}
350
c5441932
PS
351static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
352 const struct iphdr *tnl_params,
353 __be16 proto)
354{
355 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 356
c5441932
PS
357 if (tunnel->parms.o_flags & TUNNEL_SEQ)
358 tunnel->o_seqno++;
1da177e4 359
c5441932 360 /* Push GRE header. */
182a352d
TH
361 gre_build_header(skb, tunnel->tun_hlen,
362 tunnel->parms.o_flags, proto, tunnel->parms.o_key,
363 htonl(tunnel->o_seqno));
54bc9bac 364
2e15ea39 365 skb_set_inner_protocol(skb, proto);
bf3d6a8f 366 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
c5441932 367}
1da177e4 368
aed069df 369static int gre_handle_offloads(struct sk_buff *skb, bool csum)
b2acd1dc 370{
6fa79666 371 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
b2acd1dc
PS
372}
373
fc4099f1
PS
374static struct rtable *gre_get_rt(struct sk_buff *skb,
375 struct net_device *dev,
376 struct flowi4 *fl,
377 const struct ip_tunnel_key *key)
378{
379 struct net *net = dev_net(dev);
380
381 memset(fl, 0, sizeof(*fl));
382 fl->daddr = key->u.ipv4.dst;
383 fl->saddr = key->u.ipv4.src;
384 fl->flowi4_tos = RT_TOS(key->tos);
385 fl->flowi4_mark = skb->mark;
386 fl->flowi4_proto = IPPROTO_GRE;
387
388 return ip_route_output_key(net, fl);
389}
390
2090714e
JB
391static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
392 __be16 proto)
2e15ea39
PS
393{
394 struct ip_tunnel_info *tun_info;
2e15ea39 395 const struct ip_tunnel_key *key;
db3c6139 396 struct rtable *rt = NULL;
2e15ea39 397 struct flowi4 fl;
2e15ea39
PS
398 int min_headroom;
399 int tunnel_hlen;
400 __be16 df, flags;
db3c6139 401 bool use_cache;
2e15ea39
PS
402 int err;
403
61adedf3 404 tun_info = skb_tunnel_info(skb);
7f9562a1
JB
405 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
406 ip_tunnel_info_af(tun_info) != AF_INET))
2e15ea39
PS
407 goto err_free_skb;
408
409 key = &tun_info->key;
db3c6139
DB
410 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
411 if (use_cache)
412 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
3c1cb4d2
PA
413 if (!rt) {
414 rt = gre_get_rt(skb, dev, &fl, key);
415 if (IS_ERR(rt))
416 goto err_free_skb;
db3c6139 417 if (use_cache)
3c1cb4d2
PA
418 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
419 fl.saddr);
420 }
2e15ea39 421
95f5c64c 422 tunnel_hlen = gre_calc_hlen(key->tun_flags);
2e15ea39
PS
423
424 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
425 + tunnel_hlen + sizeof(struct iphdr);
426 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
427 int head_delta = SKB_DATA_ALIGN(min_headroom -
428 skb_headroom(skb) +
429 16);
430 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
431 0, GFP_ATOMIC);
432 if (unlikely(err))
433 goto err_free_rt;
434 }
435
436 /* Push Tunnel header. */
aed069df 437 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
2e15ea39 438 goto err_free_rt;
2e15ea39
PS
439
440 flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
cba65321 441 gre_build_header(skb, tunnel_hlen, flags, proto,
182a352d 442 tunnel_id_to_key(tun_info->key.tun_id), 0);
2e15ea39
PS
443
444 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
039f5062
PS
445
446 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
447 key->tos, key->ttl, df, false);
2e15ea39
PS
448 return;
449
450err_free_rt:
451 ip_rt_put(rt);
452err_free_skb:
453 kfree_skb(skb);
454 dev->stats.tx_dropped++;
455}
456
fc4099f1
PS
457static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
458{
459 struct ip_tunnel_info *info = skb_tunnel_info(skb);
460 struct rtable *rt;
461 struct flowi4 fl4;
462
463 if (ip_tunnel_info_af(info) != AF_INET)
464 return -EINVAL;
465
466 rt = gre_get_rt(skb, dev, &fl4, &info->key);
467 if (IS_ERR(rt))
468 return PTR_ERR(rt);
469
470 ip_rt_put(rt);
471 info->key.u.ipv4.src = fl4.saddr;
472 return 0;
473}
474
c5441932
PS
475static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
476 struct net_device *dev)
477{
478 struct ip_tunnel *tunnel = netdev_priv(dev);
479 const struct iphdr *tnl_params;
1da177e4 480
2e15ea39 481 if (tunnel->collect_md) {
2090714e 482 gre_fb_xmit(skb, dev, skb->protocol);
2e15ea39
PS
483 return NETDEV_TX_OK;
484 }
485
c5441932
PS
486 if (dev->header_ops) {
487 /* Need space for new headers */
488 if (skb_cow_head(skb, dev->needed_headroom -
2bac7cb3 489 (tunnel->hlen + sizeof(struct iphdr))))
c5441932 490 goto free_skb;
1da177e4 491
c5441932 492 tnl_params = (const struct iphdr *)skb->data;
1da177e4 493
c5441932
PS
494 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
495 * to gre header.
496 */
497 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
8a0033a9 498 skb_reset_mac_header(skb);
c5441932
PS
499 } else {
500 if (skb_cow_head(skb, dev->needed_headroom))
501 goto free_skb;
1da177e4 502
c5441932 503 tnl_params = &tunnel->parms.iph;
1da177e4
LT
504 }
505
aed069df
AD
506 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
507 goto free_skb;
8a0033a9 508
c5441932 509 __gre_xmit(skb, dev, tnl_params, skb->protocol);
6ed10654 510 return NETDEV_TX_OK;
1da177e4 511
c5441932 512free_skb:
3acfa1e7 513 kfree_skb(skb);
c5441932 514 dev->stats.tx_dropped++;
6ed10654 515 return NETDEV_TX_OK;
1da177e4
LT
516}
517
c5441932
PS
518static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
519 struct net_device *dev)
ee34c1eb 520{
c5441932 521 struct ip_tunnel *tunnel = netdev_priv(dev);
ee34c1eb 522
2e15ea39 523 if (tunnel->collect_md) {
2090714e 524 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
2e15ea39
PS
525 return NETDEV_TX_OK;
526 }
527
aed069df
AD
528 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
529 goto free_skb;
ee34c1eb 530
c5441932
PS
531 if (skb_cow_head(skb, dev->needed_headroom))
532 goto free_skb;
42aa9162 533
c5441932 534 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
c5441932 535 return NETDEV_TX_OK;
ee34c1eb 536
c5441932 537free_skb:
3acfa1e7 538 kfree_skb(skb);
c5441932
PS
539 dev->stats.tx_dropped++;
540 return NETDEV_TX_OK;
ee34c1eb
MS
541}
542
c5441932
PS
543static int ipgre_tunnel_ioctl(struct net_device *dev,
544 struct ifreq *ifr, int cmd)
1da177e4 545{
4565e991 546 int err;
1da177e4 547 struct ip_tunnel_parm p;
1da177e4 548
c5441932
PS
549 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
550 return -EFAULT;
6c734fb8
CW
551 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
552 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
553 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
554 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
555 return -EINVAL;
1da177e4 556 }
c5441932
PS
557 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
558 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
1da177e4 559
c5441932
PS
560 err = ip_tunnel_ioctl(dev, &p, cmd);
561 if (err)
562 return err;
1da177e4 563
95f5c64c
TH
564 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
565 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
c5441932
PS
566
567 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
568 return -EFAULT;
1da177e4
LT
569 return 0;
570}
571
1da177e4
LT
572/* Nice toy. Unfortunately, useless in real life :-)
573 It allows to construct virtual multiprotocol broadcast "LAN"
574 over the Internet, provided multicast routing is tuned.
575
576
577 I have no idea was this bicycle invented before me,
578 so that I had to set ARPHRD_IPGRE to a random value.
579 I have an impression, that Cisco could make something similar,
580 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 581
1da177e4
LT
582 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
583 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
584
585 ping -t 255 224.66.66.66
586
587 If nobody answers, mbone does not work.
588
589 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
590 ip addr add 10.66.66.<somewhat>/24 dev Universe
591 ifconfig Universe up
592 ifconfig Universe add fe80::<Your_real_addr>/10
593 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
594 ftp 10.66.66.66
595 ...
596 ftp fec0:6666:6666::193.233.7.65
597 ...
1da177e4 598 */
3b04ddde
SH
599static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
600 unsigned short type,
1507850b 601 const void *daddr, const void *saddr, unsigned int len)
1da177e4 602{
2941a486 603 struct ip_tunnel *t = netdev_priv(dev);
c5441932
PS
604 struct iphdr *iph;
605 struct gre_base_hdr *greh;
1da177e4 606
c5441932
PS
607 iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
608 greh = (struct gre_base_hdr *)(iph+1);
95f5c64c 609 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
c5441932 610 greh->protocol = htons(type);
1da177e4 611
c5441932 612 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
e905a9ed 613
c5441932 614 /* Set the source hardware address. */
1da177e4
LT
615 if (saddr)
616 memcpy(&iph->saddr, saddr, 4);
6d55cb91 617 if (daddr)
1da177e4 618 memcpy(&iph->daddr, daddr, 4);
6d55cb91 619 if (iph->daddr)
77a482bd 620 return t->hlen + sizeof(*iph);
e905a9ed 621
c5441932 622 return -(t->hlen + sizeof(*iph));
1da177e4
LT
623}
624
6a5f44d7
TT
625static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
626{
b71d1d42 627 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
6a5f44d7
TT
628 memcpy(haddr, &iph->saddr, 4);
629 return 4;
630}
631
3b04ddde
SH
632static const struct header_ops ipgre_header_ops = {
633 .create = ipgre_header,
6a5f44d7 634 .parse = ipgre_header_parse,
3b04ddde
SH
635};
636
6a5f44d7 637#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
638static int ipgre_open(struct net_device *dev)
639{
2941a486 640 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 641
f97c1e0c 642 if (ipv4_is_multicast(t->parms.iph.daddr)) {
cbb1e85f
DM
643 struct flowi4 fl4;
644 struct rtable *rt;
645
b57708ad 646 rt = ip_route_output_gre(t->net, &fl4,
cbb1e85f
DM
647 t->parms.iph.daddr,
648 t->parms.iph.saddr,
649 t->parms.o_key,
650 RT_TOS(t->parms.iph.tos),
651 t->parms.link);
b23dd4fe 652 if (IS_ERR(rt))
1da177e4 653 return -EADDRNOTAVAIL;
d8d1f30b 654 dev = rt->dst.dev;
1da177e4 655 ip_rt_put(rt);
51456b29 656 if (!__in_dev_get_rtnl(dev))
1da177e4
LT
657 return -EADDRNOTAVAIL;
658 t->mlink = dev->ifindex;
e5ed6399 659 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
660 }
661 return 0;
662}
663
664static int ipgre_close(struct net_device *dev)
665{
2941a486 666 struct ip_tunnel *t = netdev_priv(dev);
b8c26a33 667
f97c1e0c 668 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 669 struct in_device *in_dev;
b57708ad 670 in_dev = inetdev_by_index(t->net, t->mlink);
8723e1b4 671 if (in_dev)
1da177e4 672 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1da177e4
LT
673 }
674 return 0;
675}
1da177e4
LT
676#endif
677
b8c26a33
SH
678static const struct net_device_ops ipgre_netdev_ops = {
679 .ndo_init = ipgre_tunnel_init,
c5441932 680 .ndo_uninit = ip_tunnel_uninit,
b8c26a33
SH
681#ifdef CONFIG_NET_IPGRE_BROADCAST
682 .ndo_open = ipgre_open,
683 .ndo_stop = ipgre_close,
684#endif
c5441932 685 .ndo_start_xmit = ipgre_xmit,
b8c26a33 686 .ndo_do_ioctl = ipgre_tunnel_ioctl,
c5441932
PS
687 .ndo_change_mtu = ip_tunnel_change_mtu,
688 .ndo_get_stats64 = ip_tunnel_get_stats64,
1e99584b 689 .ndo_get_iflink = ip_tunnel_get_iflink,
b8c26a33
SH
690};
691
6b78f16e
ED
692#define GRE_FEATURES (NETIF_F_SG | \
693 NETIF_F_FRAGLIST | \
694 NETIF_F_HIGHDMA | \
695 NETIF_F_HW_CSUM)
696
1da177e4
LT
697static void ipgre_tunnel_setup(struct net_device *dev)
698{
b8c26a33 699 dev->netdev_ops = &ipgre_netdev_ops;
5a455275 700 dev->type = ARPHRD_IPGRE;
c5441932
PS
701 ip_tunnel_setup(dev, ipgre_net_id);
702}
1da177e4 703
c5441932
PS
704static void __gre_tunnel_init(struct net_device *dev)
705{
706 struct ip_tunnel *tunnel;
4565e991 707 int t_hlen;
c5441932
PS
708
709 tunnel = netdev_priv(dev);
95f5c64c 710 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
c5441932
PS
711 tunnel->parms.iph.protocol = IPPROTO_GRE;
712
4565e991
TH
713 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
714
715 t_hlen = tunnel->hlen + sizeof(struct iphdr);
716
717 dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
718 dev->mtu = ETH_DATA_LEN - t_hlen - 4;
6b78f16e 719
b57708ad 720 dev->features |= GRE_FEATURES;
6b78f16e 721 dev->hw_features |= GRE_FEATURES;
c5441932
PS
722
723 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
a0ca153f
AD
724 /* TCP offload with GRE SEQ is not supported, nor
725 * can we support 2 levels of outer headers requiring
726 * an update.
727 */
728 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
729 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
730 dev->features |= NETIF_F_GSO_SOFTWARE;
731 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
732 }
733
c5441932
PS
734 /* Can use a lockless transmit, unless we generate
735 * output sequences
736 */
737 dev->features |= NETIF_F_LLTX;
738 }
1da177e4
LT
739}
740
741static int ipgre_tunnel_init(struct net_device *dev)
742{
c5441932
PS
743 struct ip_tunnel *tunnel = netdev_priv(dev);
744 struct iphdr *iph = &tunnel->parms.iph;
1da177e4 745
c5441932 746 __gre_tunnel_init(dev);
1da177e4 747
c5441932
PS
748 memcpy(dev->dev_addr, &iph->saddr, 4);
749 memcpy(dev->broadcast, &iph->daddr, 4);
1da177e4 750
c5441932 751 dev->flags = IFF_NOARP;
02875878 752 netif_keep_dst(dev);
c5441932 753 dev->addr_len = 4;
1da177e4 754
a64b04d8 755 if (iph->daddr && !tunnel->collect_md) {
1da177e4 756#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 757 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
758 if (!iph->saddr)
759 return -EINVAL;
760 dev->flags = IFF_BROADCAST;
3b04ddde 761 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
762 }
763#endif
a64b04d8 764 } else if (!tunnel->collect_md) {
6a5f44d7 765 dev->header_ops = &ipgre_header_ops;
a64b04d8 766 }
1da177e4 767
c5441932 768 return ip_tunnel_init(dev);
1da177e4
LT
769}
770
9f57c67c
PS
771static const struct gre_protocol ipgre_protocol = {
772 .handler = gre_rcv,
773 .err_handler = gre_err,
1da177e4
LT
774};
775
2c8c1e72 776static int __net_init ipgre_init_net(struct net *net)
59a4c759 777{
c5441932 778 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
59a4c759
PE
779}
780
2c8c1e72 781static void __net_exit ipgre_exit_net(struct net *net)
59a4c759 782{
c5441932 783 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
6c742e71 784 ip_tunnel_delete_net(itn, &ipgre_link_ops);
59a4c759
PE
785}
786
787static struct pernet_operations ipgre_net_ops = {
788 .init = ipgre_init_net,
789 .exit = ipgre_exit_net,
cfb8fbf2 790 .id = &ipgre_net_id,
c5441932 791 .size = sizeof(struct ip_tunnel_net),
59a4c759 792};
1da177e4 793
c19e654d
HX
794static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
795{
796 __be16 flags;
797
798 if (!data)
799 return 0;
800
801 flags = 0;
802 if (data[IFLA_GRE_IFLAGS])
803 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
804 if (data[IFLA_GRE_OFLAGS])
805 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
806 if (flags & (GRE_VERSION|GRE_ROUTING))
807 return -EINVAL;
808
946b636f
JB
809 if (data[IFLA_GRE_COLLECT_METADATA] &&
810 data[IFLA_GRE_ENCAP_TYPE] &&
811 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
812 return -EINVAL;
813
c19e654d
HX
814 return 0;
815}
816
e1a80002
HX
817static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
818{
819 __be32 daddr;
820
821 if (tb[IFLA_ADDRESS]) {
822 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
823 return -EINVAL;
824 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
825 return -EADDRNOTAVAIL;
826 }
827
828 if (!data)
829 goto out;
830
831 if (data[IFLA_GRE_REMOTE]) {
832 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
833 if (!daddr)
834 return -EINVAL;
835 }
836
837out:
838 return ipgre_tunnel_validate(tb, data);
839}
840
2e15ea39
PS
841static void ipgre_netlink_parms(struct net_device *dev,
842 struct nlattr *data[],
843 struct nlattr *tb[],
844 struct ip_tunnel_parm *parms)
c19e654d 845{
7bb82d92 846 memset(parms, 0, sizeof(*parms));
c19e654d
HX
847
848 parms->iph.protocol = IPPROTO_GRE;
849
850 if (!data)
851 return;
852
853 if (data[IFLA_GRE_LINK])
854 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
855
856 if (data[IFLA_GRE_IFLAGS])
c5441932 857 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
c19e654d
HX
858
859 if (data[IFLA_GRE_OFLAGS])
c5441932 860 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
c19e654d
HX
861
862 if (data[IFLA_GRE_IKEY])
863 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
864
865 if (data[IFLA_GRE_OKEY])
866 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
867
868 if (data[IFLA_GRE_LOCAL])
67b61f6c 869 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
c19e654d
HX
870
871 if (data[IFLA_GRE_REMOTE])
67b61f6c 872 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
c19e654d
HX
873
874 if (data[IFLA_GRE_TTL])
875 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
876
877 if (data[IFLA_GRE_TOS])
878 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
879
880 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
881 parms->iph.frag_off = htons(IP_DF);
2e15ea39
PS
882
883 if (data[IFLA_GRE_COLLECT_METADATA]) {
884 struct ip_tunnel *t = netdev_priv(dev);
885
886 t->collect_md = true;
887 }
c19e654d
HX
888}
889
4565e991
TH
890/* This function returns true when ENCAP attributes are present in the nl msg */
891static bool ipgre_netlink_encap_parms(struct nlattr *data[],
892 struct ip_tunnel_encap *ipencap)
893{
894 bool ret = false;
895
896 memset(ipencap, 0, sizeof(*ipencap));
897
898 if (!data)
899 return ret;
900
901 if (data[IFLA_GRE_ENCAP_TYPE]) {
902 ret = true;
903 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
904 }
905
906 if (data[IFLA_GRE_ENCAP_FLAGS]) {
907 ret = true;
908 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
909 }
910
911 if (data[IFLA_GRE_ENCAP_SPORT]) {
912 ret = true;
3e97fa70 913 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
4565e991
TH
914 }
915
916 if (data[IFLA_GRE_ENCAP_DPORT]) {
917 ret = true;
3e97fa70 918 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
4565e991
TH
919 }
920
921 return ret;
922}
923
c5441932 924static int gre_tap_init(struct net_device *dev)
e1a80002 925{
c5441932 926 __gre_tunnel_init(dev);
bec94d43 927 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
e1a80002 928
c5441932 929 return ip_tunnel_init(dev);
e1a80002
HX
930}
931
c5441932
PS
932static const struct net_device_ops gre_tap_netdev_ops = {
933 .ndo_init = gre_tap_init,
934 .ndo_uninit = ip_tunnel_uninit,
935 .ndo_start_xmit = gre_tap_xmit,
b8c26a33
SH
936 .ndo_set_mac_address = eth_mac_addr,
937 .ndo_validate_addr = eth_validate_addr,
c5441932
PS
938 .ndo_change_mtu = ip_tunnel_change_mtu,
939 .ndo_get_stats64 = ip_tunnel_get_stats64,
1e99584b 940 .ndo_get_iflink = ip_tunnel_get_iflink,
fc4099f1 941 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
b8c26a33
SH
942};
943
e1a80002
HX
944static void ipgre_tap_setup(struct net_device *dev)
945{
e1a80002 946 ether_setup(dev);
d13b161c
JB
947 dev->netdev_ops = &gre_tap_netdev_ops;
948 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
949 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
c5441932 950 ip_tunnel_setup(dev, gre_tap_net_id);
e1a80002
HX
951}
952
c5441932
PS
953static int ipgre_newlink(struct net *src_net, struct net_device *dev,
954 struct nlattr *tb[], struct nlattr *data[])
c19e654d 955{
c5441932 956 struct ip_tunnel_parm p;
4565e991
TH
957 struct ip_tunnel_encap ipencap;
958
959 if (ipgre_netlink_encap_parms(data, &ipencap)) {
960 struct ip_tunnel *t = netdev_priv(dev);
961 int err = ip_tunnel_encap_setup(t, &ipencap);
962
963 if (err < 0)
964 return err;
965 }
c19e654d 966
2e15ea39 967 ipgre_netlink_parms(dev, data, tb, &p);
c5441932 968 return ip_tunnel_newlink(dev, tb, &p);
c19e654d
HX
969}
970
971static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
972 struct nlattr *data[])
973{
c19e654d 974 struct ip_tunnel_parm p;
4565e991
TH
975 struct ip_tunnel_encap ipencap;
976
977 if (ipgre_netlink_encap_parms(data, &ipencap)) {
978 struct ip_tunnel *t = netdev_priv(dev);
979 int err = ip_tunnel_encap_setup(t, &ipencap);
980
981 if (err < 0)
982 return err;
983 }
c19e654d 984
2e15ea39 985 ipgre_netlink_parms(dev, data, tb, &p);
c5441932 986 return ip_tunnel_changelink(dev, tb, &p);
c19e654d
HX
987}
988
989static size_t ipgre_get_size(const struct net_device *dev)
990{
991 return
992 /* IFLA_GRE_LINK */
993 nla_total_size(4) +
994 /* IFLA_GRE_IFLAGS */
995 nla_total_size(2) +
996 /* IFLA_GRE_OFLAGS */
997 nla_total_size(2) +
998 /* IFLA_GRE_IKEY */
999 nla_total_size(4) +
1000 /* IFLA_GRE_OKEY */
1001 nla_total_size(4) +
1002 /* IFLA_GRE_LOCAL */
1003 nla_total_size(4) +
1004 /* IFLA_GRE_REMOTE */
1005 nla_total_size(4) +
1006 /* IFLA_GRE_TTL */
1007 nla_total_size(1) +
1008 /* IFLA_GRE_TOS */
1009 nla_total_size(1) +
1010 /* IFLA_GRE_PMTUDISC */
1011 nla_total_size(1) +
4565e991
TH
1012 /* IFLA_GRE_ENCAP_TYPE */
1013 nla_total_size(2) +
1014 /* IFLA_GRE_ENCAP_FLAGS */
1015 nla_total_size(2) +
1016 /* IFLA_GRE_ENCAP_SPORT */
1017 nla_total_size(2) +
1018 /* IFLA_GRE_ENCAP_DPORT */
1019 nla_total_size(2) +
2e15ea39
PS
1020 /* IFLA_GRE_COLLECT_METADATA */
1021 nla_total_size(0) +
c19e654d
HX
1022 0;
1023}
1024
1025static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1026{
1027 struct ip_tunnel *t = netdev_priv(dev);
1028 struct ip_tunnel_parm *p = &t->parms;
1029
f3756b79 1030 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
95f5c64c
TH
1031 nla_put_be16(skb, IFLA_GRE_IFLAGS,
1032 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1033 nla_put_be16(skb, IFLA_GRE_OFLAGS,
1034 gre_tnl_flags_to_gre_flags(p->o_flags)) ||
f3756b79
DM
1035 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1036 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
930345ea
JB
1037 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1038 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
f3756b79
DM
1039 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1040 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1041 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1042 !!(p->iph.frag_off & htons(IP_DF))))
1043 goto nla_put_failure;
4565e991
TH
1044
1045 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1046 t->encap.type) ||
3e97fa70
SD
1047 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1048 t->encap.sport) ||
1049 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1050 t->encap.dport) ||
4565e991 1051 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
e1b2cb65 1052 t->encap.flags))
4565e991
TH
1053 goto nla_put_failure;
1054
2e15ea39
PS
1055 if (t->collect_md) {
1056 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1057 goto nla_put_failure;
1058 }
1059
c19e654d
HX
1060 return 0;
1061
1062nla_put_failure:
1063 return -EMSGSIZE;
1064}
1065
1066static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1067 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1068 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1069 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1070 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1071 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
4d74f8ba
PM
1072 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1073 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
c19e654d
HX
1074 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1075 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1076 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
4565e991
TH
1077 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
1078 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
1079 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
1080 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
2e15ea39 1081 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
c19e654d
HX
1082};
1083
1084static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1085 .kind = "gre",
1086 .maxtype = IFLA_GRE_MAX,
1087 .policy = ipgre_policy,
1088 .priv_size = sizeof(struct ip_tunnel),
1089 .setup = ipgre_tunnel_setup,
1090 .validate = ipgre_tunnel_validate,
1091 .newlink = ipgre_newlink,
1092 .changelink = ipgre_changelink,
c5441932 1093 .dellink = ip_tunnel_dellink,
c19e654d
HX
1094 .get_size = ipgre_get_size,
1095 .fill_info = ipgre_fill_info,
1728d4fa 1096 .get_link_net = ip_tunnel_get_link_net,
c19e654d
HX
1097};
1098
e1a80002
HX
1099static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1100 .kind = "gretap",
1101 .maxtype = IFLA_GRE_MAX,
1102 .policy = ipgre_policy,
1103 .priv_size = sizeof(struct ip_tunnel),
1104 .setup = ipgre_tap_setup,
1105 .validate = ipgre_tap_validate,
1106 .newlink = ipgre_newlink,
1107 .changelink = ipgre_changelink,
c5441932 1108 .dellink = ip_tunnel_dellink,
e1a80002
HX
1109 .get_size = ipgre_get_size,
1110 .fill_info = ipgre_fill_info,
1728d4fa 1111 .get_link_net = ip_tunnel_get_link_net,
e1a80002
HX
1112};
1113
b2acd1dc
PS
1114struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1115 u8 name_assign_type)
1116{
1117 struct nlattr *tb[IFLA_MAX + 1];
1118 struct net_device *dev;
1119 struct ip_tunnel *t;
1120 int err;
1121
1122 memset(&tb, 0, sizeof(tb));
1123
1124 dev = rtnl_create_link(net, name, name_assign_type,
1125 &ipgre_tap_ops, tb);
1126 if (IS_ERR(dev))
1127 return dev;
1128
1129 /* Configure flow based GRE device. */
1130 t = netdev_priv(dev);
1131 t->collect_md = true;
1132
1133 err = ipgre_newlink(net, dev, tb, NULL);
1134 if (err < 0)
1135 goto out;
7e059158
DW
1136
1137 /* openvswitch users expect packet sizes to be unrestricted,
1138 * so set the largest MTU we can.
1139 */
1140 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1141 if (err)
1142 goto out;
1143
b2acd1dc
PS
1144 return dev;
1145out:
1146 free_netdev(dev);
1147 return ERR_PTR(err);
1148}
1149EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1150
c5441932
PS
1151static int __net_init ipgre_tap_init_net(struct net *net)
1152{
2e15ea39 1153 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
c5441932
PS
1154}
1155
1156static void __net_exit ipgre_tap_exit_net(struct net *net)
1157{
1158 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
6c742e71 1159 ip_tunnel_delete_net(itn, &ipgre_tap_ops);
c5441932
PS
1160}
1161
1162static struct pernet_operations ipgre_tap_net_ops = {
1163 .init = ipgre_tap_init_net,
1164 .exit = ipgre_tap_exit_net,
1165 .id = &gre_tap_net_id,
1166 .size = sizeof(struct ip_tunnel_net),
1167};
1da177e4
LT
1168
1169static int __init ipgre_init(void)
1170{
1171 int err;
1172
058bd4d2 1173 pr_info("GRE over IPv4 tunneling driver\n");
1da177e4 1174
cfb8fbf2 1175 err = register_pernet_device(&ipgre_net_ops);
59a4c759 1176 if (err < 0)
c2892f02
AD
1177 return err;
1178
c5441932
PS
1179 err = register_pernet_device(&ipgre_tap_net_ops);
1180 if (err < 0)
1181 goto pnet_tap_faied;
1182
9f57c67c 1183 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02 1184 if (err < 0) {
058bd4d2 1185 pr_info("%s: can't add protocol\n", __func__);
c2892f02
AD
1186 goto add_proto_failed;
1187 }
7daa0004 1188
c19e654d
HX
1189 err = rtnl_link_register(&ipgre_link_ops);
1190 if (err < 0)
1191 goto rtnl_link_failed;
1192
e1a80002
HX
1193 err = rtnl_link_register(&ipgre_tap_ops);
1194 if (err < 0)
1195 goto tap_ops_failed;
1196
c5441932 1197 return 0;
c19e654d 1198
e1a80002
HX
1199tap_ops_failed:
1200 rtnl_link_unregister(&ipgre_link_ops);
c19e654d 1201rtnl_link_failed:
9f57c67c 1202 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02 1203add_proto_failed:
c5441932
PS
1204 unregister_pernet_device(&ipgre_tap_net_ops);
1205pnet_tap_faied:
c2892f02 1206 unregister_pernet_device(&ipgre_net_ops);
c5441932 1207 return err;
1da177e4
LT
1208}
1209
db44575f 1210static void __exit ipgre_fini(void)
1da177e4 1211{
e1a80002 1212 rtnl_link_unregister(&ipgre_tap_ops);
c19e654d 1213 rtnl_link_unregister(&ipgre_link_ops);
9f57c67c 1214 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
c5441932 1215 unregister_pernet_device(&ipgre_tap_net_ops);
c2892f02 1216 unregister_pernet_device(&ipgre_net_ops);
1da177e4
LT
1217}
1218
1219module_init(ipgre_init);
1220module_exit(ipgre_fini);
1221MODULE_LICENSE("GPL");
4d74f8ba
PM
1222MODULE_ALIAS_RTNL_LINK("gre");
1223MODULE_ALIAS_RTNL_LINK("gretap");
8909c9ad 1224MODULE_ALIAS_NETDEV("gre0");
c5441932 1225MODULE_ALIAS_NETDEV("gretap0");