]> git.proxmox.com Git - ovs.git/blob - datapath/linux/compat/ip_gre.c
datapath: work around the single GRE receive limitation.
[ovs.git] / datapath / linux / compat / ip_gre.c
1 /*
2 * Linux NET3: GRE over IP protocol decoder.
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15 #ifndef USE_UPSTREAM_TUNNEL
16 #include <linux/capability.h>
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/kconfig.h>
21 #include <linux/slab.h>
22 #include <linux/uaccess.h>
23 #include <linux/skbuff.h>
24 #include <linux/netdevice.h>
25 #include <linux/netdev_features.h>
26 #include <linux/in.h>
27 #include <linux/tcp.h>
28 #include <linux/udp.h>
29 #include <linux/if_arp.h>
30 #include <linux/mroute.h>
31 #include <linux/if_vlan.h>
32 #include <linux/init.h>
33 #include <linux/in6.h>
34 #include <linux/inetdevice.h>
35 #include <linux/igmp.h>
36 #include <linux/netfilter_ipv4.h>
37 #include <linux/etherdevice.h>
38 #include <linux/if_ether.h>
39
40 #include <net/sock.h>
41 #include <net/ip.h>
42 #include <net/icmp.h>
43 #include <net/protocol.h>
44 #include <net/ip_tunnels.h>
45 #include <net/arp.h>
46 #include <net/checksum.h>
47 #include <net/dsfield.h>
48 #include <net/inet_ecn.h>
49 #include <net/xfrm.h>
50 #include <net/net_namespace.h>
51 #include <net/netns/generic.h>
52 #include <net/rtnetlink.h>
53 #include <net/gre.h>
54 #include <net/dst_metadata.h>
55 #include <net/erspan.h>
56
57 #if IS_ENABLED(CONFIG_IPV6)
58 #include <net/ipv6.h>
59 #include <net/ip6_fib.h>
60 #include <net/ip6_route.h>
61 #endif
62
63 #include "gso.h"
64 #include "vport-netdev.h"
65
66 static int gre_tap_net_id __read_mostly;
67 static int ipgre_net_id __read_mostly;
68 static unsigned int erspan_net_id __read_mostly;
69 static void erspan_build_header(struct sk_buff *skb,
70 __be32 id, u32 index,
71 bool truncate, bool is_ipv4);
72
73 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
74 static bool ip_gre_loaded = false;
75
76 #define ip_gre_calc_hlen rpl_ip_gre_calc_hlen
77 static int ip_gre_calc_hlen(__be16 o_flags)
78 {
79 int addend = 4;
80
81 if (o_flags & TUNNEL_CSUM)
82 addend += 4;
83 if (o_flags & TUNNEL_KEY)
84 addend += 4;
85 if (o_flags & TUNNEL_SEQ)
86 addend += 4;
87 return addend;
88 }
89
90 /* Returns the least-significant 32 bits of a __be64. */
91 static __be32 tunnel_id_to_key(__be64 x)
92 {
93 #ifdef __BIG_ENDIAN
94 return (__force __be32)x;
95 #else
96 return (__force __be32)((__force u64)x >> 32);
97 #endif
98 }
99
100 static struct dst_ops md_dst_ops = {
101 .family = AF_UNSPEC,
102 };
103
104 #ifndef DST_METADATA
105 #define DST_METADATA 0x0080
106 #endif
107
108 static void rpl__metadata_dst_init(struct metadata_dst *md_dst,
109 enum metadata_type type, u8 optslen)
110
111 {
112 struct dst_entry *dst;
113
114 dst = &md_dst->dst;
115 dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE,
116 DST_METADATA | DST_NOCOUNT);
117
118 #if 0
119 /* unused in OVS */
120 dst->input = dst_md_discard;
121 dst->output = dst_md_discard_out;
122 #endif
123 memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
124 md_dst->type = type;
125 }
126
127 static struct metadata_dst *erspan_rpl_metadata_dst_alloc(u8 optslen, enum metadata_type type,
128 gfp_t flags)
129 {
130 struct metadata_dst *md_dst;
131
132 md_dst = kmalloc(sizeof(*md_dst) + optslen, flags);
133 if (!md_dst)
134 return NULL;
135
136 rpl__metadata_dst_init(md_dst, type, optslen);
137
138 return md_dst;
139 }
140 static inline struct metadata_dst *rpl_tun_rx_dst(int md_size)
141 {
142 struct metadata_dst *tun_dst;
143
144 tun_dst = erspan_rpl_metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC);
145 if (!tun_dst)
146 return NULL;
147
148 tun_dst->u.tun_info.options_len = 0;
149 tun_dst->u.tun_info.mode = 0;
150 return tun_dst;
151 }
152 static inline struct metadata_dst *rpl__ip_tun_set_dst(__be32 saddr,
153 __be32 daddr,
154 __u8 tos, __u8 ttl,
155 __be16 tp_dst,
156 __be16 flags,
157 __be64 tunnel_id,
158 int md_size)
159 {
160 struct metadata_dst *tun_dst;
161
162 tun_dst = rpl_tun_rx_dst(md_size);
163 if (!tun_dst)
164 return NULL;
165
166 ip_tunnel_key_init(&tun_dst->u.tun_info.key,
167 saddr, daddr, tos, ttl,
168 0, 0, tp_dst, tunnel_id, flags);
169 return tun_dst;
170 }
171
172 static inline struct metadata_dst *rpl_ip_tun_rx_dst(struct sk_buff *skb,
173 __be16 flags,
174 __be64 tunnel_id,
175 int md_size)
176 {
177 const struct iphdr *iph = ip_hdr(skb);
178
179 return rpl__ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl,
180 0, flags, tunnel_id, md_size);
181 }
182
183 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
184 int gre_hdr_len)
185 {
186 struct net *net = dev_net(skb->dev);
187 struct metadata_dst *tun_dst = NULL;
188 struct erspan_base_hdr *ershdr;
189 struct erspan_metadata *pkt_md;
190 struct ip_tunnel_net *itn;
191 struct ip_tunnel *tunnel;
192 const struct iphdr *iph;
193 struct erspan_md2 *md2;
194 int ver;
195 int len;
196
197 itn = net_generic(net, erspan_net_id);
198 len = gre_hdr_len + sizeof(*ershdr);
199
200 /* Check based hdr len */
201 if (unlikely(!pskb_may_pull(skb, len)))
202 return PACKET_REJECT;
203
204 iph = ip_hdr(skb);
205 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
206 ver = ershdr->ver;
207
208 /* The original GRE header does not have key field,
209 * Use ERSPAN 10-bit session ID as key.
210 */
211 tpi->key = cpu_to_be32(get_session_id(ershdr));
212 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
213 tpi->flags,
214 iph->saddr, iph->daddr, tpi->key);
215
216 if (tunnel) {
217 len = gre_hdr_len + erspan_hdr_len(ver);
218 if (unlikely(!pskb_may_pull(skb, len)))
219 return PACKET_REJECT;
220
221 ershdr = (struct erspan_base_hdr *)skb->data;
222 pkt_md = (struct erspan_metadata *)(ershdr + 1);
223
224 if (__iptunnel_pull_header(skb,
225 len,
226 htons(ETH_P_TEB),
227 false, false) < 0)
228 goto drop;
229
230 if (tunnel->collect_md) {
231 struct ip_tunnel_info *info;
232 struct erspan_metadata *md;
233 __be64 tun_id;
234 __be16 flags;
235
236 tpi->flags |= TUNNEL_KEY;
237 flags = tpi->flags;
238 tun_id = key32_to_tunnel_id(tpi->key);
239
240 tun_dst = rpl_ip_tun_rx_dst(skb, flags, tun_id, sizeof(*md));
241 if (!tun_dst)
242 return PACKET_REJECT;
243
244 md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
245 md->version = ver;
246 md2 = &md->u.md2;
247 memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
248 ERSPAN_V2_MDSIZE);
249
250 info = &tun_dst->u.tun_info;
251 info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
252 info->options_len = sizeof(*md);
253 }
254
255 skb_reset_mac_header(skb);
256 ovs_ip_tunnel_rcv(tunnel->dev, skb, tun_dst);
257 kfree(tun_dst);
258 return PACKET_RCVD;
259 }
260 drop:
261 kfree_skb(skb);
262 return PACKET_RCVD;
263 }
264
265
266 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
267 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
268 {
269 struct metadata_dst tun_dst;
270 const struct iphdr *iph;
271 struct ip_tunnel *tunnel;
272
273 iph = ip_hdr(skb);
274 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
275 iph->saddr, iph->daddr, tpi->key);
276
277 if (tunnel) {
278 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
279 raw_proto, false) < 0)
280 goto drop;
281
282 if (tunnel->dev->type != ARPHRD_NONE)
283 skb_pop_mac_header(skb);
284 else
285 skb_reset_mac_header(skb);
286 if (tunnel->collect_md) {
287 __be16 flags;
288 __be64 tun_id;
289
290 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
291 tun_id = key32_to_tunnel_id(tpi->key);
292 ovs_ip_tun_rx_dst(&tun_dst, skb, flags, tun_id, 0);
293 }
294
295 ovs_ip_tunnel_rcv(tunnel->dev, skb, &tun_dst);
296 return PACKET_RCVD;
297 }
298 return PACKET_NEXT;
299
300 drop:
301 kfree_skb(skb);
302 return PACKET_RCVD;
303 }
304
305
306 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
307 int hdr_len)
308 {
309 struct net *net = dev_net(skb->dev);
310 struct ip_tunnel_net *itn;
311 int res;
312
313 if (tpi->proto == htons(ETH_P_TEB))
314 itn = net_generic(net, gre_tap_net_id);
315 else
316 itn = net_generic(net, ipgre_net_id);
317
318 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
319 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
320 /* ipgre tunnels in collect metadata mode should receive
321 * also ETH_P_TEB traffic.
322 */
323 itn = net_generic(net, ipgre_net_id);
324 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
325 }
326 return res;
327 }
328
329 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
330 const struct iphdr *tnl_params,
331 __be16 proto)
332 {
333 struct ip_tunnel *tunnel = netdev_priv(dev);
334 struct tnl_ptk_info tpi;
335
336 tpi.flags = tunnel->parms.o_flags;
337 tpi.proto = proto;
338 tpi.key = tunnel->parms.o_key;
339 if (tunnel->parms.o_flags & TUNNEL_SEQ)
340 tunnel->o_seqno++;
341 tpi.seq = htonl(tunnel->o_seqno);
342
343 /* Push GRE header. */
344 gre_build_header(skb, &tpi, tunnel->hlen);
345
346 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
347 }
348
349 static int gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *unused_tpi)
350 {
351 struct tnl_ptk_info tpi;
352 bool csum_err = false;
353 int hdr_len;
354
355 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
356 if (hdr_len < 0)
357 goto drop;
358
359 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
360 tpi.proto == htons(ETH_P_ERSPAN2))) {
361 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
362 return 0;
363 goto drop;
364 }
365
366 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
367 return 0;
368 drop:
369
370 kfree_skb(skb);
371 return 0;
372 }
373
374 #if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0)
375 #include "gso.h"
376 /* gre_handle_offloads() has different return type on older kernsl. */
377 static void gre_nop_fix(struct sk_buff *skb) { }
378
379 static void gre_csum_fix(struct sk_buff *skb)
380 {
381 struct gre_base_hdr *greh;
382 __be32 *options;
383 int gre_offset = skb_transport_offset(skb);
384
385 greh = (struct gre_base_hdr *)skb_transport_header(skb);
386 options = ((__be32 *)greh + 1);
387
388 *options = 0;
389 *(__sum16 *)options = csum_fold(skb_checksum(skb, gre_offset,
390 skb->len - gre_offset, 0));
391 }
392
393 #define gre_handle_offloads rpl_gre_handle_offloads
394 static int rpl_gre_handle_offloads(struct sk_buff *skb, bool gre_csum)
395 {
396 int type = gre_csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE;
397 gso_fix_segment_t fix_segment;
398
399 if (gre_csum)
400 fix_segment = gre_csum_fix;
401 else
402 fix_segment = gre_nop_fix;
403
404 return ovs_iptunnel_handle_offloads(skb, type, fix_segment);
405 }
406 #else
407 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
408 {
409 return iptunnel_handle_offloads(skb, csum,
410 csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
411 }
412 #endif
413
414 static bool is_gre_gso(struct sk_buff *skb)
415 {
416 return skb_shinfo(skb)->gso_type &
417 (SKB_GSO_GRE | SKB_GSO_GRE_CSUM);
418 }
419
420 static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
421 __be16 proto, __be32 key, __be32 seq)
422 {
423 struct gre_base_hdr *greh;
424
425 skb_push(skb, hdr_len);
426
427 skb_reset_transport_header(skb);
428 greh = (struct gre_base_hdr *)skb->data;
429 greh->flags = tnl_flags_to_gre_flags(flags);
430 greh->protocol = proto;
431
432 if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) {
433 __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
434
435 if (flags & TUNNEL_SEQ) {
436 *ptr = seq;
437 ptr--;
438 }
439 if (flags & TUNNEL_KEY) {
440 *ptr = key;
441 ptr--;
442 }
443 if (flags & TUNNEL_CSUM && !is_gre_gso(skb)) {
444 *ptr = 0;
445 *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
446 skb->len, 0));
447 }
448 }
449 ovs_skb_set_inner_protocol(skb, proto);
450 }
451
452 static struct rtable *gre_get_rt(struct sk_buff *skb,
453 struct net_device *dev,
454 struct flowi4 *fl,
455 const struct ip_tunnel_key *key)
456 {
457 struct net *net = dev_net(dev);
458
459 memset(fl, 0, sizeof(*fl));
460 fl->daddr = key->u.ipv4.dst;
461 fl->saddr = key->u.ipv4.src;
462 fl->flowi4_tos = RT_TOS(key->tos);
463 fl->flowi4_mark = skb->mark;
464 fl->flowi4_proto = IPPROTO_GRE;
465
466 return ip_route_output_key(net, fl);
467 }
468
469 static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
470 struct net_device *dev,
471 struct flowi4 *fl,
472 int tunnel_hlen)
473 {
474 struct ip_tunnel_info *tun_info;
475 const struct ip_tunnel_key *key;
476 struct rtable *rt = NULL;
477 int min_headroom;
478 bool use_cache;
479 int err;
480
481 tun_info = skb_tunnel_info(skb);
482 key = &tun_info->key;
483 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
484
485 if (use_cache)
486 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
487 if (!rt) {
488 rt = gre_get_rt(skb, dev, fl, key);
489 if (IS_ERR(rt))
490 goto err_free_skb;
491 if (use_cache)
492 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
493 fl->saddr);
494 }
495
496 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
497 + tunnel_hlen + sizeof(struct iphdr);
498 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
499 int head_delta = SKB_DATA_ALIGN(min_headroom -
500 skb_headroom(skb) +
501 16);
502 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
503 0, GFP_ATOMIC);
504 if (unlikely(err))
505 goto err_free_rt;
506 }
507 return rt;
508
509 err_free_rt:
510 ip_rt_put(rt);
511 err_free_skb:
512 kfree_skb(skb);
513 dev->stats.tx_dropped++;
514 return NULL;
515 }
516
517 netdev_tx_t rpl_gre_fb_xmit(struct sk_buff *skb)
518 {
519 struct net_device *dev = skb->dev;
520 struct ip_tunnel_info *tun_info;
521 const struct ip_tunnel_key *key;
522 struct flowi4 fl;
523 struct rtable *rt;
524 int min_headroom;
525 int tunnel_hlen;
526 __be16 df, flags;
527 int err;
528
529 tun_info = skb_tunnel_info(skb);
530 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
531 ip_tunnel_info_af(tun_info) != AF_INET))
532 goto err_free_skb;
533
534 key = &tun_info->key;
535
536 rt = gre_get_rt(skb, dev, &fl, key);
537 if (IS_ERR(rt))
538 goto err_free_skb;
539
540 tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
541
542 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
543 + tunnel_hlen + sizeof(struct iphdr)
544 + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
545 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
546 int head_delta = SKB_DATA_ALIGN(min_headroom -
547 skb_headroom(skb) +
548 16);
549 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
550 0, GFP_ATOMIC);
551 if (unlikely(err))
552 goto err_free_rt;
553 }
554
555 if (skb_vlan_tag_present(skb)) {
556 skb = __vlan_hwaccel_push_inside(skb);
557 if (unlikely(!skb)) {
558 err = -ENOMEM;
559 goto err_free_rt;
560 }
561 }
562
563 /* Push Tunnel header. */
564 err = gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM));
565 if (err)
566 goto err_free_rt;
567
568 flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
569 build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB),
570 tunnel_id_to_key(tun_info->key.tun_id), 0);
571
572 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
573 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
574 key->tos, key->ttl, df, false);
575 return NETDEV_TX_OK;
576
577 err_free_rt:
578 ip_rt_put(rt);
579 err_free_skb:
580 kfree_skb(skb);
581 dev->stats.tx_dropped++;
582 return NETDEV_TX_OK;
583 }
584 EXPORT_SYMBOL(rpl_gre_fb_xmit);
585
586 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
587 __be16 proto)
588 {
589 struct ip_tunnel *tunnel = netdev_priv(dev);
590 struct ip_tunnel_info *tun_info;
591 const struct ip_tunnel_key *key;
592 struct erspan_metadata *md;
593 struct rtable *rt = NULL;
594 struct tnl_ptk_info tpi;
595 bool truncate = false;
596 struct flowi4 fl;
597 int tunnel_hlen;
598 int version;
599 __be16 df;
600 int nhoff;
601 int thoff;
602
603 tun_info = skb_tunnel_info(skb);
604 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
605 ip_tunnel_info_af(tun_info) != AF_INET))
606 goto err_free_skb;
607
608 key = &tun_info->key;
609 md = ip_tunnel_info_opts(tun_info);
610 if (!md)
611 goto err_free_rt;
612
613 /* ERSPAN has fixed 8 byte GRE header */
614 version = md->version;
615 tunnel_hlen = 8 + erspan_hdr_len(version);
616
617 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
618 if (!rt)
619 return;
620
621 if (gre_handle_offloads(skb, false))
622 goto err_free_rt;
623
624 if (skb->len > dev->mtu + dev->hard_header_len) {
625 pskb_trim(skb, dev->mtu + dev->hard_header_len);
626 truncate = true;
627 }
628
629 nhoff = skb_network_header(skb) - skb_mac_header(skb);
630 if (skb->protocol == htons(ETH_P_IP) &&
631 (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
632 truncate = true;
633
634 thoff = skb_transport_header(skb) - skb_mac_header(skb);
635 if (skb->protocol == htons(ETH_P_IPV6) &&
636 (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
637 truncate = true;
638
639 if (version == 1) {
640 erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
641 ntohl(md->u.index), truncate, true);
642 tpi.hdr_len = ERSPAN_V1_MDSIZE;
643 tpi.proto = htons(ETH_P_ERSPAN);
644 } else if (version == 2) {
645 erspan_build_header_v2(skb,
646 ntohl(tunnel_id_to_key32(key->tun_id)),
647 md->u.md2.dir,
648 get_hwid(&md->u.md2),
649 truncate, true);
650 tpi.hdr_len = ERSPAN_V2_MDSIZE;
651 tpi.proto = htons(ETH_P_ERSPAN2);
652 } else {
653 goto err_free_rt;
654 }
655
656 tpi.flags = TUNNEL_SEQ;
657 tpi.key = tunnel_id_to_key32(key->tun_id);
658 tpi.seq = htonl(tunnel->o_seqno++);
659
660 gre_build_header(skb, &tpi, 8);
661
662 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
663
664 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
665 key->tos, key->ttl, df, false);
666 return;
667
668 err_free_rt:
669 ip_rt_put(rt);
670 err_free_skb:
671 kfree_skb(skb);
672 dev->stats.tx_dropped++;
673 }
674
675 #define GRE_FEATURES (NETIF_F_SG | \
676 NETIF_F_FRAGLIST | \
677 NETIF_F_HIGHDMA | \
678 NETIF_F_HW_CSUM | \
679 NETIF_F_NETNS_LOCAL)
680
681 static void __gre_tunnel_init(struct net_device *dev)
682 {
683 struct ip_tunnel *tunnel;
684 int t_hlen;
685
686 tunnel = netdev_priv(dev);
687 tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
688 tunnel->parms.iph.protocol = IPPROTO_GRE;
689
690 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
691
692 t_hlen = tunnel->hlen + sizeof(struct iphdr);
693
694 dev->features |= GRE_FEATURES;
695 dev->hw_features |= GRE_FEATURES;
696
697 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
698 /* TCP offload with GRE SEQ is not supported, nor
699 * can we support 2 levels of outer headers requiring
700 * an update.
701 */
702 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
703 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
704 dev->features |= NETIF_F_GSO_SOFTWARE;
705 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
706 }
707
708 /* Can use a lockless transmit, unless we generate
709 * output sequences
710 */
711 dev->features |= NETIF_F_LLTX;
712 }
713 }
714
715 static int __gre_rcv(struct sk_buff *skb)
716 {
717 return gre_rcv(skb, NULL);
718 }
719
720 void __gre_err(struct sk_buff *skb, u32 info)
721 {
722 pr_warn("%s: GRE receive error\n", __func__);
723 }
724
725 static const struct gre_protocol ipgre_protocol = {
726 .handler = __gre_rcv,
727 .err_handler = __gre_err,
728 };
729
730 static int __net_init ipgre_init_net(struct net *net)
731 {
732 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
733 }
734
735 static void __net_exit ipgre_exit_net(struct net *net)
736 {
737 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
738
739 ip_tunnel_delete_net(itn, &ipgre_link_ops);
740 }
741
742 static struct pernet_operations ipgre_net_ops = {
743 .init = ipgre_init_net,
744 .exit = ipgre_exit_net,
745 .id = &ipgre_net_id,
746 .size = sizeof(struct ip_tunnel_net),
747 };
748
749 #ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
750 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
751 struct netlink_ext_ack *extack)
752 #else
753 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
754 #endif
755 {
756 __be16 flags;
757
758 if (!data)
759 return 0;
760
761 flags = 0;
762 if (data[IFLA_GRE_IFLAGS])
763 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
764 if (data[IFLA_GRE_OFLAGS])
765 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
766 if (flags & (GRE_VERSION|GRE_ROUTING))
767 return -EINVAL;
768
769 return 0;
770 }
771
772 #ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
773 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
774 struct netlink_ext_ack *extack)
775 #else
776 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
777 #endif
778 {
779 __be32 daddr;
780
781 if (tb[IFLA_ADDRESS]) {
782 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
783 return -EINVAL;
784 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
785 return -EADDRNOTAVAIL;
786 }
787
788 if (!data)
789 goto out;
790
791 if (data[IFLA_GRE_REMOTE]) {
792 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
793 if (!daddr)
794 return -EINVAL;
795 }
796
797 out:
798 #ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
799 return ipgre_tunnel_validate(tb, data, NULL);
800 #else
801 return ipgre_tunnel_validate(tb, data);
802 #endif
803 }
804
805 enum {
806 #ifndef HAVE_IFLA_GRE_ENCAP_DPORT
807 IFLA_GRE_ENCAP_TYPE = IFLA_GRE_FLAGS + 1,
808 IFLA_GRE_ENCAP_FLAGS,
809 IFLA_GRE_ENCAP_SPORT,
810 IFLA_GRE_ENCAP_DPORT,
811 #endif
812 #ifndef HAVE_IFLA_GRE_COLLECT_METADATA
813 IFLA_GRE_COLLECT_METADATA = IFLA_GRE_ENCAP_DPORT + 1,
814 #endif
815 #ifndef HAVE_IFLA_GRE_IGNORE_DF
816 IFLA_GRE_IGNORE_DF = IFLA_GRE_COLLECT_METADATA + 1,
817 #endif
818 #ifndef HAVE_IFLA_GRE_FWMARK
819 IFLA_GRE_FWMARK = IFLA_GRE_IGNORE_DF + 1,
820 #endif
821 #ifndef HAVE_IFLA_GRE_ERSPAN_INDEX
822 IFLA_GRE_ERSPAN_INDEX = IFLA_GRE_FWMARK + 1,
823 #endif
824 #ifndef HAVE_IFLA_GRE_ERSPAN_HWID
825 IFLA_GRE_ERSPAN_VER = IFLA_GRE_ERSPAN_INDEX + 1,
826 IFLA_GRE_ERSPAN_DIR,
827 IFLA_GRE_ERSPAN_HWID,
828 #endif
829 };
830
831 #define RPL_IFLA_GRE_MAX (IFLA_GRE_ERSPAN_HWID + 1)
832
833 #ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
834 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
835 struct netlink_ext_ack *extack)
836 #else
837 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[])
838 #endif
839 {
840 __be16 flags = 0;
841 int ret;
842
843 if (!data)
844 return 0;
845
846 #ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
847 ret = ipgre_tap_validate(tb, data, NULL);
848 #else
849 ret = ipgre_tap_validate(tb, data);
850 #endif
851 if (ret)
852 return ret;
853
854 /* ERSPAN should only have GRE sequence and key flag */
855 if (data[IFLA_GRE_OFLAGS])
856 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
857 if (data[IFLA_GRE_IFLAGS])
858 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
859 if (!data[IFLA_GRE_COLLECT_METADATA] &&
860 flags != (GRE_SEQ | GRE_KEY))
861 return -EINVAL;
862
863 /* ERSPAN Session ID only has 10-bit. Since we reuse
864 * 32-bit key field as ID, check it's range.
865 */
866 if (data[IFLA_GRE_OKEY] &&
867 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
868 return -EINVAL;
869
870 return 0;
871 }
872
873 static int ipgre_netlink_parms(struct net_device *dev,
874 struct nlattr *data[],
875 struct nlattr *tb[],
876 struct ip_tunnel_parm *parms)
877 {
878 struct ip_tunnel *t = netdev_priv(dev);
879
880 memset(parms, 0, sizeof(*parms));
881
882 parms->iph.protocol = IPPROTO_GRE;
883
884 if (!data)
885 return 0;
886
887 if (data[IFLA_GRE_LINK])
888 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
889
890 if (data[IFLA_GRE_IFLAGS])
891 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
892
893 if (data[IFLA_GRE_OFLAGS])
894 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
895
896 if (data[IFLA_GRE_IKEY])
897 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
898
899 if (data[IFLA_GRE_OKEY])
900 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
901
902 if (data[IFLA_GRE_LOCAL])
903 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
904
905 if (data[IFLA_GRE_REMOTE])
906 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
907
908 if (data[IFLA_GRE_TTL])
909 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
910
911 if (data[IFLA_GRE_TOS])
912 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
913
914 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
915 if (t->ignore_df)
916 return -EINVAL;
917 parms->iph.frag_off = htons(IP_DF);
918 }
919
920 if (data[IFLA_GRE_COLLECT_METADATA]) {
921 t->collect_md = true;
922 if (dev->type == ARPHRD_IPGRE)
923 dev->type = ARPHRD_NONE;
924 }
925
926 if (data[IFLA_GRE_IGNORE_DF]) {
927 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
928 && (parms->iph.frag_off & htons(IP_DF)))
929 return -EINVAL;
930 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
931 }
932
933 if (data[IFLA_GRE_ERSPAN_INDEX]) {
934 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
935
936 if (t->index & ~INDEX_MASK)
937 return -EINVAL;
938 }
939
940 return 0;
941 }
942
943 static int gre_tap_init(struct net_device *dev)
944 {
945 __gre_tunnel_init(dev);
946 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
947
948 return ip_tunnel_init(dev);
949 }
950
951 static netdev_tx_t gre_dev_xmit(struct sk_buff *skb, struct net_device *dev)
952 {
953 /* Drop All packets coming from networking stack. OVS-CB is
954 * not initialized for these packets.
955 */
956
957 dev_kfree_skb(skb);
958 dev->stats.tx_dropped++;
959 return NETDEV_TX_OK;
960 }
961
962 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
963 struct net_device *dev)
964 {
965 struct ip_tunnel *tunnel = netdev_priv(dev);
966 bool truncate = false;
967
968 if (tunnel->collect_md) {
969 erspan_fb_xmit(skb, dev, skb->protocol);
970 return NETDEV_TX_OK;
971 }
972
973 if (gre_handle_offloads(skb, false))
974 goto free_skb;
975
976 if (skb_cow_head(skb, dev->needed_headroom))
977 goto free_skb;
978
979 if (skb->len > dev->mtu + dev->hard_header_len) {
980 pskb_trim(skb, dev->mtu + dev->hard_header_len);
981 truncate = true;
982 }
983
984 /* Push ERSPAN header */
985 if (tunnel->erspan_ver == 1)
986 erspan_build_header(skb, ntohl(tunnel->parms.o_key),
987 tunnel->index,
988 truncate, true);
989 else if (tunnel->erspan_ver == 2)
990 erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
991 tunnel->dir, tunnel->hwid,
992 truncate, true);
993 else
994 goto free_skb;
995
996 tunnel->parms.o_flags &= ~TUNNEL_KEY;
997 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
998 return NETDEV_TX_OK;
999
1000 free_skb:
1001 kfree_skb(skb);
1002 dev->stats.tx_dropped++;
1003 return NETDEV_TX_OK;
1004 }
1005
1006 static netdev_tx_t __erspan_fb_xmit(struct sk_buff *skb)
1007 {
1008 erspan_fb_xmit(skb, skb->dev, skb->protocol);
1009 return NETDEV_TX_OK;
1010 }
1011
1012 int ovs_gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
1013 {
1014 struct ip_tunnel_info *info = skb_tunnel_info(skb);
1015 struct rtable *rt;
1016 struct flowi4 fl4;
1017
1018 if (ip_tunnel_info_af(info) != AF_INET)
1019 return -EINVAL;
1020
1021 rt = gre_get_rt(skb, dev, &fl4, &info->key);
1022 if (IS_ERR(rt))
1023 return PTR_ERR(rt);
1024
1025 ip_rt_put(rt);
1026 info->key.u.ipv4.src = fl4.saddr;
1027 return 0;
1028 }
1029 EXPORT_SYMBOL_GPL(ovs_gre_fill_metadata_dst);
1030
1031 static int erspan_tunnel_init(struct net_device *dev)
1032 {
1033 struct ip_tunnel *tunnel = netdev_priv(dev);
1034 int t_hlen;
1035
1036 tunnel->tun_hlen = 8;
1037 tunnel->parms.iph.protocol = IPPROTO_GRE;
1038 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1039 erspan_hdr_len(tunnel->erspan_ver);
1040 t_hlen = tunnel->hlen + sizeof(struct iphdr);
1041
1042 dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
1043 dev->mtu = ETH_DATA_LEN - t_hlen - 4;
1044 dev->features |= GRE_FEATURES;
1045 dev->hw_features |= GRE_FEATURES;
1046 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1047 netif_keep_dst(dev);
1048
1049 return ip_tunnel_init(dev);
1050 }
1051
1052 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1053 unsigned short type,
1054 const void *daddr, const void *saddr, unsigned int len)
1055 {
1056 struct ip_tunnel *t = netdev_priv(dev);
1057 struct iphdr *iph;
1058 struct gre_base_hdr *greh;
1059
1060 iph = (struct iphdr *)__skb_push(skb, t->hlen + sizeof(*iph));
1061 greh = (struct gre_base_hdr *)(iph+1);
1062 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
1063 greh->protocol = htons(type);
1064
1065 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1066
1067 /* Set the source hardware address. */
1068 if (saddr)
1069 memcpy(&iph->saddr, saddr, 4);
1070 if (daddr)
1071 memcpy(&iph->daddr, daddr, 4);
1072 if (iph->daddr)
1073 return t->hlen + sizeof(*iph);
1074
1075 return -(t->hlen + sizeof(*iph));
1076 }
1077
1078 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1079 {
1080 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
1081 memcpy(haddr, &iph->saddr, 4);
1082 return 4;
1083 }
1084
1085 static const struct header_ops ipgre_header_ops = {
1086 .create = ipgre_header,
1087 .parse = ipgre_header_parse,
1088 };
1089
1090 static int ipgre_tunnel_init(struct net_device *dev)
1091 {
1092 struct ip_tunnel *tunnel = netdev_priv(dev);
1093 struct iphdr *iph = &tunnel->parms.iph;
1094
1095 __gre_tunnel_init(dev);
1096
1097 memcpy(dev->dev_addr, &iph->saddr, 4);
1098 memcpy(dev->broadcast, &iph->daddr, 4);
1099
1100 dev->flags = IFF_NOARP;
1101 netif_keep_dst(dev);
1102 dev->addr_len = 4;
1103
1104 if (!tunnel->collect_md) {
1105 dev->header_ops = &ipgre_header_ops;
1106 }
1107
1108 return ip_tunnel_init(dev);
1109 }
1110
1111 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
1112 struct net_device *dev)
1113 {
1114 struct ip_tunnel *tunnel = netdev_priv(dev);
1115 const struct iphdr *tnl_params;
1116
1117 if (tunnel->collect_md) {
1118 gre_fb_xmit(skb);
1119 return NETDEV_TX_OK;
1120 }
1121
1122 if (dev->header_ops) {
1123 /* Need space for new headers */
1124 if (skb_cow_head(skb, dev->needed_headroom -
1125 (tunnel->hlen + sizeof(struct iphdr))))
1126 goto free_skb;
1127
1128 tnl_params = (const struct iphdr *)skb->data;
1129
1130 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
1131 * to gre header.
1132 */
1133 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
1134 skb_reset_mac_header(skb);
1135 } else {
1136 if (skb_cow_head(skb, dev->needed_headroom))
1137 goto free_skb;
1138
1139 tnl_params = &tunnel->parms.iph;
1140 }
1141
1142 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
1143 goto free_skb;
1144
1145 __gre_xmit(skb, dev, tnl_params, skb->protocol);
1146 return NETDEV_TX_OK;
1147
1148 free_skb:
1149 kfree_skb(skb);
1150 dev->stats.tx_dropped++;
1151 return NETDEV_TX_OK;
1152 }
1153
1154 static const struct net_device_ops ipgre_netdev_ops = {
1155 .ndo_init = ipgre_tunnel_init,
1156 .ndo_uninit = rpl_ip_tunnel_uninit,
1157 .ndo_start_xmit = ipgre_xmit,
1158 #ifdef HAVE_RHEL7_MAX_MTU
1159 .ndo_size = sizeof(struct net_device_ops),
1160 .extended.ndo_change_mtu = ip_tunnel_change_mtu,
1161 #else
1162 .ndo_change_mtu = ip_tunnel_change_mtu,
1163 #endif
1164 .ndo_get_stats64 = ip_tunnel_get_stats64,
1165 #ifdef HAVE_GET_LINK_NET
1166 .ndo_get_iflink = ip_tunnel_get_iflink,
1167 #endif
1168 };
1169
1170 static const struct net_device_ops gre_tap_netdev_ops = {
1171 .ndo_init = gre_tap_init,
1172 .ndo_uninit = rpl_ip_tunnel_uninit,
1173 .ndo_start_xmit = gre_dev_xmit,
1174 .ndo_set_mac_address = eth_mac_addr,
1175 .ndo_validate_addr = eth_validate_addr,
1176 #ifdef HAVE_RHEL7_MAX_MTU
1177 .ndo_size = sizeof(struct net_device_ops),
1178 .extended.ndo_change_mtu = ip_tunnel_change_mtu,
1179 #else
1180 .ndo_change_mtu = ip_tunnel_change_mtu,
1181 #endif
1182 .ndo_get_stats64 = ip_tunnel_get_stats64,
1183 #ifdef HAVE_NDO_GET_IFLINK
1184 .ndo_get_iflink = rpl_ip_tunnel_get_iflink,
1185 #endif
1186 #ifdef HAVE_NDO_FILL_METADATA_DST
1187 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1188 #endif
1189 };
1190
1191 static const struct net_device_ops erspan_netdev_ops = {
1192 .ndo_init = erspan_tunnel_init,
1193 .ndo_uninit = rpl_ip_tunnel_uninit,
1194 .ndo_start_xmit = erspan_xmit,
1195 .ndo_set_mac_address = eth_mac_addr,
1196 .ndo_validate_addr = eth_validate_addr,
1197 #ifdef HAVE_RHEL7_MAX_MTU
1198 .ndo_size = sizeof(struct net_device_ops),
1199 .extended.ndo_change_mtu = ip_tunnel_change_mtu,
1200 #else
1201 .ndo_change_mtu = ip_tunnel_change_mtu,
1202 #endif
1203 .ndo_get_stats64 = ip_tunnel_get_stats64,
1204 #ifdef HAVE_NDO_GET_IFLINK
1205 .ndo_get_iflink = rpl_ip_tunnel_get_iflink,
1206 #endif
1207 #ifdef HAVE_NDO_FILL_METADATA_DST
1208 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1209 #endif
1210 };
1211
1212 static void ipgre_tunnel_setup(struct net_device *dev)
1213 {
1214 dev->netdev_ops = &ipgre_netdev_ops;
1215 dev->type = ARPHRD_IPGRE;
1216 ip_tunnel_setup(dev, ipgre_net_id);
1217 }
1218
1219 static void ipgre_tap_setup(struct net_device *dev)
1220 {
1221 ether_setup(dev);
1222 #ifdef HAVE_NET_DEVICE_MAX_MTU
1223 dev->max_mtu = 0;
1224 #endif
1225 dev->netdev_ops = &gre_tap_netdev_ops;
1226 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1227 ip_tunnel_setup(dev, gre_tap_net_id);
1228 }
1229
1230 static void erspan_setup(struct net_device *dev)
1231 {
1232 eth_hw_addr_random(dev);
1233 ether_setup(dev);
1234 dev->netdev_ops = &erspan_netdev_ops;
1235 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1236 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1237 ip_tunnel_setup(dev, erspan_net_id);
1238 }
1239
1240 #ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
1241 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1242 struct nlattr *tb[], struct nlattr *data[],
1243 struct netlink_ext_ack *extack)
1244 #else
1245 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1246 struct nlattr *tb[], struct nlattr *data[])
1247 #endif
1248 {
1249 struct ip_tunnel_parm p;
1250 int err;
1251
1252 ipgre_netlink_parms(dev, data, tb, &p);
1253 err = ip_tunnel_newlink(dev, tb, &p);
1254 return err;
1255
1256 }
1257
1258 static size_t ipgre_get_size(const struct net_device *dev)
1259 {
1260 return
1261 /* IFLA_GRE_LINK */
1262 nla_total_size(4) +
1263 /* IFLA_GRE_IFLAGS */
1264 nla_total_size(2) +
1265 /* IFLA_GRE_OFLAGS */
1266 nla_total_size(2) +
1267 /* IFLA_GRE_IKEY */
1268 nla_total_size(4) +
1269 /* IFLA_GRE_OKEY */
1270 nla_total_size(4) +
1271 /* IFLA_GRE_LOCAL */
1272 nla_total_size(4) +
1273 /* IFLA_GRE_REMOTE */
1274 nla_total_size(4) +
1275 /* IFLA_GRE_TTL */
1276 nla_total_size(1) +
1277 /* IFLA_GRE_TOS */
1278 nla_total_size(1) +
1279 /* IFLA_GRE_PMTUDISC */
1280 nla_total_size(1) +
1281 /* IFLA_GRE_ENCAP_TYPE */
1282 nla_total_size(2) +
1283 /* IFLA_GRE_ENCAP_FLAGS */
1284 nla_total_size(2) +
1285 /* IFLA_GRE_ENCAP_SPORT */
1286 nla_total_size(2) +
1287 /* IFLA_GRE_ENCAP_DPORT */
1288 nla_total_size(2) +
1289 /* IFLA_GRE_COLLECT_METADATA */
1290 nla_total_size(0) +
1291 /* IFLA_GRE_ERSPAN_INDEX */
1292 nla_total_size(4) +
1293 /* IFLA_GRE_ERSPAN_VER */
1294 nla_total_size(1) +
1295 /* IFLA_GRE_ERSPAN_DIR */
1296 nla_total_size(1) +
1297 /* IFLA_GRE_ERSPAN_HWID */
1298 nla_total_size(2) +
1299 0;
1300 }
1301
1302 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1303 {
1304 struct ip_tunnel *t = netdev_priv(dev);
1305 struct ip_tunnel_parm *p = &t->parms;
1306
1307 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1308 nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
1309 nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
1310 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1311 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1312 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1313 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1314 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1315 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1316 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1317 !!(p->iph.frag_off & htons(IP_DF))))
1318 goto nla_put_failure;
1319
1320 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1321 goto nla_put_failure;
1322
1323 if (t->erspan_ver == 1) {
1324 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1325 goto nla_put_failure;
1326 } else if (t->erspan_ver == 2) {
1327 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1328 goto nla_put_failure;
1329 if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1330 goto nla_put_failure;
1331 }
1332
1333 return 0;
1334
1335 nla_put_failure:
1336 return -EMSGSIZE;
1337 }
1338
1339 static const struct nla_policy ipgre_policy[RPL_IFLA_GRE_MAX + 1] = {
1340 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1341 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1342 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1343 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1344 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1345 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1346 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1347 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1348 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1349 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1350 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
1351 [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 },
1352 [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 },
1353 [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 },
1354 };
1355
1356 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1357 .kind = "gre",
1358 .maxtype = RPL_IFLA_GRE_MAX,
1359 .policy = ipgre_policy,
1360 .priv_size = sizeof(struct ip_tunnel),
1361 .setup = ipgre_tunnel_setup,
1362 .validate = ipgre_tunnel_validate,
1363 .newlink = ipgre_newlink,
1364 .dellink = ip_tunnel_dellink,
1365 .get_size = ipgre_get_size,
1366 .fill_info = ipgre_fill_info,
1367 #ifdef HAVE_GET_LINK_NET
1368 .get_link_net = ip_tunnel_get_link_net,
1369 #endif
1370 };
1371
1372 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1373 .kind = "ovs_gretap",
1374 .maxtype = RPL_IFLA_GRE_MAX,
1375 .policy = ipgre_policy,
1376 .priv_size = sizeof(struct ip_tunnel),
1377 .setup = ipgre_tap_setup,
1378 .validate = ipgre_tap_validate,
1379 .newlink = ipgre_newlink,
1380 .dellink = ip_tunnel_dellink,
1381 .get_size = ipgre_get_size,
1382 .fill_info = ipgre_fill_info,
1383 #ifdef HAVE_GET_LINK_NET
1384 .get_link_net = ip_tunnel_get_link_net,
1385 #endif
1386 };
1387
1388 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1389 .kind = "erspan",
1390 .maxtype = RPL_IFLA_GRE_MAX,
1391 .policy = ipgre_policy,
1392 .priv_size = sizeof(struct ip_tunnel),
1393 .setup = erspan_setup,
1394 .validate = erspan_validate,
1395 .newlink = ipgre_newlink,
1396 .dellink = ip_tunnel_dellink,
1397 .get_size = ipgre_get_size,
1398 .fill_info = ipgre_fill_info,
1399 #ifdef HAVE_GET_LINK_NET
1400 .get_link_net = ip_tunnel_get_link_net,
1401 #endif
1402 };
1403
1404 struct net_device *rpl_gretap_fb_dev_create(struct net *net, const char *name,
1405 u8 name_assign_type)
1406 {
1407 struct nlattr *tb[IFLA_MAX + 1];
1408 struct net_device *dev;
1409 LIST_HEAD(list_kill);
1410 struct ip_tunnel *t;
1411 int err;
1412
1413 memset(&tb, 0, sizeof(tb));
1414
1415 dev = rtnl_create_link(net, (char *)name, name_assign_type,
1416 &ipgre_tap_ops, tb);
1417 if (IS_ERR(dev))
1418 return dev;
1419
1420 t = netdev_priv(dev);
1421 t->collect_md = true;
1422 /* Configure flow based GRE device. */
1423 #ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
1424 err = ipgre_newlink(net, dev, tb, NULL, NULL);
1425 #else
1426 err = ipgre_newlink(net, dev, tb, NULL);
1427 #endif
1428 if (err < 0) {
1429 free_netdev(dev);
1430 return ERR_PTR(err);
1431 }
1432
1433 /* openvswitch users expect packet sizes to be unrestricted,
1434 * so set the largest MTU we can.
1435 */
1436 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1437 if (err)
1438 goto out;
1439
1440 return dev;
1441 out:
1442 ip_tunnel_dellink(dev, &list_kill);
1443 unregister_netdevice_many(&list_kill);
1444 return ERR_PTR(err);
1445 }
1446 EXPORT_SYMBOL_GPL(rpl_gretap_fb_dev_create);
1447
1448 static int __net_init erspan_init_net(struct net *net)
1449 {
1450 return ip_tunnel_init_net(net, erspan_net_id,
1451 &erspan_link_ops, NULL);
1452 }
1453
1454 static void __net_exit erspan_exit_net(struct net *net)
1455 {
1456 struct ip_tunnel_net *itn = net_generic(net, erspan_net_id);
1457
1458 ip_tunnel_delete_net(itn, &erspan_link_ops);
1459 }
1460
1461 static struct pernet_operations erspan_net_ops = {
1462 .init = erspan_init_net,
1463 .exit = erspan_exit_net,
1464 .id = &erspan_net_id,
1465 .size = sizeof(struct ip_tunnel_net),
1466 };
1467
1468 static int __net_init ipgre_tap_init_net(struct net *net)
1469 {
1470 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1471 }
1472
1473 static void __net_exit ipgre_tap_exit_net(struct net *net)
1474 {
1475 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
1476
1477 ip_tunnel_delete_net(itn, &ipgre_tap_ops);
1478 }
1479
1480 static struct pernet_operations ipgre_tap_net_ops = {
1481 .init = ipgre_tap_init_net,
1482 .exit = ipgre_tap_exit_net,
1483 .id = &gre_tap_net_id,
1484 .size = sizeof(struct ip_tunnel_net),
1485 };
1486
1487 static struct net_device *erspan_fb_dev_create(struct net *net,
1488 const char *name,
1489 u8 name_assign_type)
1490 {
1491 struct nlattr *tb[IFLA_MAX + 1];
1492 struct net_device *dev;
1493 LIST_HEAD(list_kill);
1494 struct ip_tunnel *t;
1495 int err;
1496
1497 memset(&tb, 0, sizeof(tb));
1498
1499 dev = rtnl_create_link(net, (char *)name, name_assign_type,
1500 &erspan_link_ops, tb);
1501 if (IS_ERR(dev))
1502 return dev;
1503
1504 t = netdev_priv(dev);
1505 t->collect_md = true;
1506 /* Configure flow based GRE device. */
1507 #ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
1508 err = ipgre_newlink(net, dev, tb, NULL, NULL);
1509 #else
1510 err = ipgre_newlink(net, dev, tb, NULL);
1511 #endif
1512 if (err < 0) {
1513 free_netdev(dev);
1514 return ERR_PTR(err);
1515 }
1516
1517 /* openvswitch users expect packet sizes to be unrestricted,
1518 * so set the largest MTU we can.
1519 */
1520 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1521 if (err)
1522 goto out;
1523
1524 return dev;
1525 out:
1526 ip_tunnel_dellink(dev, &list_kill);
1527 unregister_netdevice_many(&list_kill);
1528 return ERR_PTR(err);
1529 }
1530
1531 static struct vport_ops ovs_erspan_vport_ops;
1532
1533 static struct vport *erspan_tnl_create(const struct vport_parms *parms)
1534 {
1535 struct net *net = ovs_dp_get_net(parms->dp);
1536 struct net_device *dev;
1537 struct vport *vport;
1538 int err;
1539
1540 vport = ovs_vport_alloc(0, &ovs_erspan_vport_ops, parms);
1541 if (IS_ERR(vport))
1542 return vport;
1543
1544 rtnl_lock();
1545 dev = erspan_fb_dev_create(net, parms->name, NET_NAME_USER);
1546 if (IS_ERR(dev)) {
1547 rtnl_unlock();
1548 ovs_vport_free(vport);
1549 return ERR_CAST(dev);
1550 }
1551
1552 err = dev_change_flags(dev, dev->flags | IFF_UP);
1553 if (err < 0) {
1554 rtnl_delete_link(dev);
1555 rtnl_unlock();
1556 ovs_vport_free(vport);
1557 return ERR_PTR(err);
1558 }
1559
1560 rtnl_unlock();
1561 return vport;
1562 }
1563
1564 static struct vport *erspan_create(const struct vport_parms *parms)
1565 {
1566 struct vport *vport;
1567
1568 vport = erspan_tnl_create(parms);
1569 if (IS_ERR(vport))
1570 return vport;
1571
1572 return ovs_netdev_link(vport, parms->name);
1573 }
1574
1575 static struct vport_ops ovs_erspan_vport_ops = {
1576 .type = OVS_VPORT_TYPE_ERSPAN,
1577 .create = erspan_create,
1578 .send = __erspan_fb_xmit,
1579 #ifndef USE_UPSTREAM_TUNNEL
1580 .fill_metadata_dst = gre_fill_metadata_dst,
1581 #endif
1582 .destroy = ovs_netdev_tunnel_destroy,
1583 };
1584
1585 static struct vport_ops ovs_ipgre_vport_ops;
1586
1587 static struct vport *ipgre_tnl_create(const struct vport_parms *parms)
1588 {
1589 struct net *net = ovs_dp_get_net(parms->dp);
1590 struct net_device *dev;
1591 struct vport *vport;
1592 int err;
1593
1594 vport = ovs_vport_alloc(0, &ovs_ipgre_vport_ops, parms);
1595 if (IS_ERR(vport))
1596 return vport;
1597
1598 rtnl_lock();
1599 dev = gretap_fb_dev_create(net, parms->name, NET_NAME_USER);
1600 if (IS_ERR(dev)) {
1601 rtnl_unlock();
1602 ovs_vport_free(vport);
1603 return ERR_CAST(dev);
1604 }
1605
1606 err = dev_change_flags(dev, dev->flags | IFF_UP);
1607 if (err < 0) {
1608 rtnl_delete_link(dev);
1609 rtnl_unlock();
1610 ovs_vport_free(vport);
1611 return ERR_PTR(err);
1612 }
1613
1614 rtnl_unlock();
1615 return vport;
1616 }
1617
1618 static struct vport *ipgre_create(const struct vport_parms *parms)
1619 {
1620 struct vport *vport;
1621
1622 vport = ipgre_tnl_create(parms);
1623 if (IS_ERR(vport))
1624 return vport;
1625
1626 return ovs_netdev_link(vport, parms->name);
1627 }
1628
1629 static struct vport_ops ovs_ipgre_vport_ops = {
1630 .type = OVS_VPORT_TYPE_GRE,
1631 .create = ipgre_create,
1632 .send = gre_fb_xmit,
1633 #ifndef USE_UPSTREAM_TUNNEL
1634 .fill_metadata_dst = gre_fill_metadata_dst,
1635 #endif
1636 .destroy = ovs_netdev_tunnel_destroy,
1637 };
1638
1639 int rpl_ipgre_init(void)
1640 {
1641 int err;
1642
1643 err = register_pernet_device(&ipgre_tap_net_ops);
1644 if (err < 0) {
1645 if (err == -EEXIST)
1646 goto ip_gre_loaded;
1647 else
1648 goto pnet_tap_failed;
1649 }
1650
1651 err = register_pernet_device(&erspan_net_ops);
1652 if (err < 0) {
1653 if (err == -EEXIST)
1654 goto ip_gre_loaded;
1655 else
1656 goto pnet_erspan_failed;
1657 }
1658
1659 err = register_pernet_device(&ipgre_net_ops);
1660 if (err < 0) {
1661 if (err == -EEXIST)
1662 goto ip_gre_loaded;
1663 else
1664 goto pnet_ipgre_failed;
1665 }
1666
1667 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1668 if (err < 0) {
1669 pr_info("%s: can't add protocol\n", __func__);
1670 if (err == -EBUSY) {
1671 goto ip_gre_loaded;
1672 } else {
1673 goto add_proto_failed;
1674 }
1675 }
1676
1677 pr_info("GRE over IPv4 tunneling driver\n");
1678 ovs_vport_ops_register(&ovs_ipgre_vport_ops);
1679 ovs_vport_ops_register(&ovs_erspan_vport_ops);
1680 return 0;
1681
1682 ip_gre_loaded:
1683 /* Since GRE only allows single receiver to be registerd,
1684 * we skip here so only gre transmit works, see:
1685 *
1686 * commit 9f57c67c379d88a10e8ad676426fee5ae7341b14
1687 * Author: Pravin B Shelar <pshelar@nicira.com>
1688 * Date: Fri Aug 7 23:51:52 2015 -0700
1689 * gre: Remove support for sharing GRE protocol hook
1690 *
1691 * OVS GRE receive part is disabled.
1692 */
1693 pr_info("GRE TX only over IPv4 tunneling driver\n");
1694 ip_gre_loaded = true;
1695 ovs_vport_ops_register(&ovs_ipgre_vport_ops);
1696 ovs_vport_ops_register(&ovs_erspan_vport_ops);
1697 return 0;
1698
1699 add_proto_failed:
1700 unregister_pernet_device(&ipgre_net_ops);
1701 pnet_ipgre_failed:
1702 unregister_pernet_device(&erspan_net_ops);
1703 pnet_erspan_failed:
1704 unregister_pernet_device(&ipgre_tap_net_ops);
1705 pnet_tap_failed:
1706 pr_err("Error while initializing GRE %d\n", err);
1707 return err;
1708 }
1709
1710 void rpl_ipgre_fini(void)
1711 {
1712 ovs_vport_ops_unregister(&ovs_erspan_vport_ops);
1713 ovs_vport_ops_unregister(&ovs_ipgre_vport_ops);
1714
1715 if (!ip_gre_loaded) {
1716 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1717 unregister_pernet_device(&ipgre_net_ops);
1718 unregister_pernet_device(&erspan_net_ops);
1719 unregister_pernet_device(&ipgre_tap_net_ops);
1720 }
1721 }
1722
1723 #endif