]> git.proxmox.com Git - ovs.git/blame - datapath/linux/compat/ip_gre.c
rhel: Enable ERSPAN features for RHEL 7.x
[ovs.git] / datapath / linux / compat / ip_gre.c
CommitLineData
e23775f2
PS
1/*
2 * Linux NET3: GRE over IP protocol decoder.
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
b66081f3 15#ifndef USE_UPSTREAM_TUNNEL
e23775f2
PS
16#include <linux/capability.h>
17#include <linux/module.h>
18#include <linux/types.h>
19#include <linux/kernel.h>
20#include <linux/kconfig.h>
21#include <linux/slab.h>
22#include <asm/uaccess.h>
23#include <linux/skbuff.h>
24#include <linux/netdevice.h>
25#include <linux/netdev_features.h>
26#include <linux/in.h>
27#include <linux/tcp.h>
28#include <linux/udp.h>
29#include <linux/if_arp.h>
30#include <linux/mroute.h>
31#include <linux/if_vlan.h>
32#include <linux/init.h>
33#include <linux/in6.h>
34#include <linux/inetdevice.h>
35#include <linux/igmp.h>
36#include <linux/netfilter_ipv4.h>
37#include <linux/etherdevice.h>
38#include <linux/if_ether.h>
39
40#include <net/sock.h>
41#include <net/ip.h>
42#include <net/icmp.h>
43#include <net/protocol.h>
44#include <net/ip_tunnels.h>
45#include <net/arp.h>
46#include <net/checksum.h>
47#include <net/dsfield.h>
48#include <net/inet_ecn.h>
49#include <net/xfrm.h>
50#include <net/net_namespace.h>
51#include <net/netns/generic.h>
52#include <net/rtnetlink.h>
53#include <net/gre.h>
54#include <net/dst_metadata.h>
8e53509c 55#include <net/erspan.h>
e23775f2 56
e23775f2
PS
57#if IS_ENABLED(CONFIG_IPV6)
58#include <net/ipv6.h>
59#include <net/ip6_fib.h>
60#include <net/ip6_route.h>
61#endif
62
63#include "gso.h"
64#include "vport-netdev.h"
65
66static int gre_tap_net_id __read_mostly;
8e53509c
WT
67static int ipgre_net_id __read_mostly;
68static unsigned int erspan_net_id __read_mostly;
29a29457
WT
69static void erspan_build_header(struct sk_buff *skb,
70 __be32 id, u32 index,
71 bool truncate, bool is_ipv4);
8e53509c
WT
72
73static struct rtnl_link_ops ipgre_link_ops __read_mostly;
e23775f2
PS
74
75#define ip_gre_calc_hlen rpl_ip_gre_calc_hlen
76static int ip_gre_calc_hlen(__be16 o_flags)
77{
78 int addend = 4;
79
80 if (o_flags & TUNNEL_CSUM)
81 addend += 4;
82 if (o_flags & TUNNEL_KEY)
83 addend += 4;
84 if (o_flags & TUNNEL_SEQ)
85 addend += 4;
86 return addend;
87}
88
8e53509c
WT
89/* Returns the least-significant 32 bits of a __be64. */
90static __be32 tunnel_id_to_key(__be64 x)
e23775f2 91{
8e53509c
WT
92#ifdef __BIG_ENDIAN
93 return (__force __be32)x;
94#else
95 return (__force __be32)((__force u64)x >> 32);
96#endif
97}
e23775f2 98
8e53509c
WT
99/* Called with rcu_read_lock and BH disabled. */
100static int gre_err(struct sk_buff *skb, u32 info,
101 const struct tnl_ptk_info *tpi)
102{
103 return PACKET_REJECT;
e23775f2
PS
104}
105
8e53509c
WT
106static struct dst_ops md_dst_ops = {
107 .family = AF_UNSPEC,
108};
109
110#ifndef DST_METADATA
111#define DST_METADATA 0x0080
112#endif
113
114static void rpl__metadata_dst_init(struct metadata_dst *md_dst,
115 enum metadata_type type, u8 optslen)
116
e23775f2 117{
8e53509c
WT
118 struct dst_entry *dst;
119
120 dst = &md_dst->dst;
121 dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE,
122 DST_METADATA | DST_NOCOUNT);
123
124#if 0
125 /* unused in OVS */
126 dst->input = dst_md_discard;
127 dst->output = dst_md_discard_out;
e23775f2 128#endif
8e53509c
WT
129 memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
130 md_dst->type = type;
e23775f2
PS
131}
132
8e53509c
WT
133static struct metadata_dst *erspan_rpl_metadata_dst_alloc(u8 optslen, enum metadata_type type,
134 gfp_t flags)
e23775f2 135{
8e53509c
WT
136 struct metadata_dst *md_dst;
137
138 md_dst = kmalloc(sizeof(*md_dst) + optslen, flags);
139 if (!md_dst)
140 return NULL;
141
142 rpl__metadata_dst_init(md_dst, type, optslen);
143
144 return md_dst;
145}
146static inline struct metadata_dst *rpl_tun_rx_dst(int md_size)
147{
148 struct metadata_dst *tun_dst;
149
150 tun_dst = erspan_rpl_metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC);
151 if (!tun_dst)
152 return NULL;
153
154 tun_dst->u.tun_info.options_len = 0;
155 tun_dst->u.tun_info.mode = 0;
156 return tun_dst;
157}
158static inline struct metadata_dst *rpl__ip_tun_set_dst(__be32 saddr,
159 __be32 daddr,
160 __u8 tos, __u8 ttl,
161 __be16 tp_dst,
162 __be16 flags,
163 __be64 tunnel_id,
164 int md_size)
165{
166 struct metadata_dst *tun_dst;
167
168 tun_dst = rpl_tun_rx_dst(md_size);
169 if (!tun_dst)
170 return NULL;
171
172 ip_tunnel_key_init(&tun_dst->u.tun_info.key,
173 saddr, daddr, tos, ttl,
174 0, 0, tp_dst, tunnel_id, flags);
175 return tun_dst;
e23775f2
PS
176}
177
8e53509c
WT
178static inline struct metadata_dst *rpl_ip_tun_rx_dst(struct sk_buff *skb,
179 __be16 flags,
180 __be64 tunnel_id,
181 int md_size)
182{
183 const struct iphdr *iph = ip_hdr(skb);
184
185 return rpl__ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl,
186 0, flags, tunnel_id, md_size);
187}
188
189static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
190 int gre_hdr_len)
e23775f2
PS
191{
192 struct net *net = dev_net(skb->dev);
e987c48a 193 struct metadata_dst *tun_dst = NULL;
2829d394
WT
194 struct erspan_base_hdr *ershdr;
195 struct erspan_metadata *pkt_md;
e23775f2 196 struct ip_tunnel_net *itn;
e23775f2 197 struct ip_tunnel *tunnel;
8e53509c 198 const struct iphdr *iph;
c387d817 199 struct erspan_md2 *md2;
2829d394 200 int ver;
8e53509c 201 int len;
e23775f2 202
8e53509c 203 itn = net_generic(net, erspan_net_id);
8e53509c 204 len = gre_hdr_len + sizeof(*ershdr);
e23775f2 205
2829d394 206 /* Check based hdr len */
8e53509c 207 if (unlikely(!pskb_may_pull(skb, len)))
e987c48a 208 return PACKET_REJECT;
e23775f2
PS
209
210 iph = ip_hdr(skb);
2829d394 211 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
e987c48a 212 ver = ershdr->ver;
8e53509c
WT
213
214 /* The original GRE header does not have key field,
215 * Use ERSPAN 10-bit session ID as key.
216 */
217 tpi->key = cpu_to_be32(get_session_id(ershdr));
218 /* OVS doesn't set tunnel key - so don't bother with it */
219 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
220 tpi->flags,
221 iph->saddr, iph->daddr, 0);
222
e23775f2 223 if (tunnel) {
2829d394
WT
224 len = gre_hdr_len + erspan_hdr_len(ver);
225 if (unlikely(!pskb_may_pull(skb, len)))
bf3eff8d 226 return PACKET_REJECT;
2829d394 227
e289f5e0
WT
228 ershdr = (struct erspan_base_hdr *)skb->data;
229 pkt_md = (struct erspan_metadata *)(ershdr + 1);
230
8e53509c 231 if (__iptunnel_pull_header(skb,
2829d394 232 len,
8e53509c
WT
233 htons(ETH_P_TEB),
234 false, false) < 0)
235 goto drop;
236
237 if (tunnel->collect_md) {
238 struct ip_tunnel_info *info;
239 struct erspan_metadata *md;
240 __be64 tun_id;
241 __be16 flags;
242
243 tpi->flags |= TUNNEL_KEY;
244 flags = tpi->flags;
245 tun_id = key32_to_tunnel_id(tpi->key);
246
247 tun_dst = rpl_ip_tun_rx_dst(skb, flags, tun_id, sizeof(*md));
248 if (!tun_dst)
e23775f2 249 return PACKET_REJECT;
8e53509c
WT
250
251 md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
252 md->version = ver;
253 md2 = &md->u.md2;
254 memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
255 ERSPAN_V2_MDSIZE);
256
257 info = &tun_dst->u.tun_info;
258 info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
259 info->options_len = sizeof(*md);
260 }
261
262 skb_reset_mac_header(skb);
263 ovs_ip_tunnel_rcv(tunnel->dev, skb, tun_dst);
264 kfree(tun_dst);
265 return PACKET_RCVD;
266 }
267drop:
268 kfree_skb(skb);
269 return PACKET_RCVD;
270}
271
272
273static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
274 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
275{
276 struct metadata_dst tun_dst;
277 const struct iphdr *iph;
278 struct ip_tunnel *tunnel;
279
280 iph = ip_hdr(skb);
281 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
282 iph->saddr, iph->daddr, tpi->key);
283
284 if (tunnel) {
285 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
286 raw_proto, false) < 0)
287 goto drop;
288
289 if (tunnel->dev->type != ARPHRD_NONE)
290 skb_pop_mac_header(skb);
291 else
292 skb_reset_mac_header(skb);
293 if (tunnel->collect_md) {
294 __be16 flags;
295 __be64 tun_id;
296
297 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
298 tun_id = key32_to_tunnel_id(tpi->key);
299 ovs_ip_tun_rx_dst(&tun_dst, skb, flags, tun_id, 0);
e23775f2
PS
300 }
301
302 ovs_ip_tunnel_rcv(tunnel->dev, skb, &tun_dst);
303 return PACKET_RCVD;
304 }
8e53509c
WT
305 return PACKET_NEXT;
306
307drop:
308 kfree_skb(skb);
309 return PACKET_RCVD;
310}
311
312
313static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
314 int hdr_len)
315{
316 struct net *net = dev_net(skb->dev);
317 struct ip_tunnel_net *itn;
318 int res;
319
320 if (tpi->proto == htons(ETH_P_TEB))
321 itn = net_generic(net, gre_tap_net_id);
322 else
323 itn = net_generic(net, ipgre_net_id);
324
325 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
326 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
327 /* ipgre tunnels in collect metadata mode should receive
328 * also ETH_P_TEB traffic.
329 */
330 itn = net_generic(net, ipgre_net_id);
331 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
332 }
333 return res;
e23775f2
PS
334}
335
8e53509c
WT
336static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
337 const struct iphdr *tnl_params,
338 __be16 proto)
e23775f2 339{
8e53509c
WT
340 struct ip_tunnel *tunnel = netdev_priv(dev);
341 struct tnl_ptk_info tpi;
342
343 tpi.flags = tunnel->parms.o_flags;
344 tpi.proto = proto;
345 tpi.key = tunnel->parms.o_key;
346 if (tunnel->parms.o_flags & TUNNEL_SEQ)
347 tunnel->o_seqno++;
348 tpi.seq = htonl(tunnel->o_seqno);
349
350 /* Push GRE header. */
351 gre_build_header(skb, &tpi, tunnel->hlen);
352
353 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
354}
355
356#ifndef HAVE_DEMUX_PARSE_GRE_HEADER
357static int gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *unused_tpi)
358{
359 struct tnl_ptk_info tpi;
360 bool csum_err = false;
361 int hdr_len;
362
363 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
364 if (hdr_len < 0)
365 goto drop;
366
e4120b10
WT
367 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
368 tpi.proto == htons(ETH_P_ERSPAN2))) {
8e53509c
WT
369 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
370 return 0;
4a8a3521 371 goto drop;
8e53509c
WT
372 }
373
374 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
375 return 0;
376drop:
377
378 kfree_skb(skb);
379 return 0;
380}
381#else
382static int gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *__tpi)
383{
384 struct tnl_ptk_info tpi = *__tpi;
385
386 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
387 tpi.proto == htons(ETH_P_ERSPAN2))) {
388 if (erspan_rcv(skb, &tpi, 0) == PACKET_RCVD)
389 return 0;
390 goto drop;
391 }
392
393 if (ipgre_rcv(skb, &tpi, 0) == PACKET_RCVD)
e23775f2 394 return 0;
8e53509c
WT
395drop:
396
e23775f2
PS
397 kfree_skb(skb);
398 return 0;
399}
8e53509c 400#endif
e23775f2 401
cf5789aa
PS
402#if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0)
403/* gre_handle_offloads() has different return type on older kernsl. */
e23775f2
PS
404static void gre_nop_fix(struct sk_buff *skb) { }
405
406static void gre_csum_fix(struct sk_buff *skb)
407{
408 struct gre_base_hdr *greh;
409 __be32 *options;
410 int gre_offset = skb_transport_offset(skb);
411
412 greh = (struct gre_base_hdr *)skb_transport_header(skb);
413 options = ((__be32 *)greh + 1);
414
415 *options = 0;
416 *(__sum16 *)options = csum_fold(skb_checksum(skb, gre_offset,
417 skb->len - gre_offset, 0));
418}
419
420static bool is_gre_gso(struct sk_buff *skb)
421{
422 return skb_is_gso(skb);
423}
424
a620d468 425#define gre_handle_offloads rpl_gre_handle_offloads
cf5789aa 426static int rpl_gre_handle_offloads(struct sk_buff *skb, bool gre_csum)
e23775f2
PS
427{
428 int type = gre_csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE;
429 gso_fix_segment_t fix_segment;
430
431 if (gre_csum)
432 fix_segment = gre_csum_fix;
433 else
434 fix_segment = gre_nop_fix;
435
2a94b571 436 return ovs_iptunnel_handle_offloads(skb, type, fix_segment);
e23775f2
PS
437}
438#else
439
440static bool is_gre_gso(struct sk_buff *skb)
441{
442 return skb_shinfo(skb)->gso_type &
443 (SKB_GSO_GRE | SKB_GSO_GRE_CSUM);
444}
445
cf5789aa 446static int rpl_gre_handle_offloads(struct sk_buff *skb, bool gre_csum)
e23775f2 447{
cf5789aa
PS
448 if (skb_is_gso(skb) && skb_is_encapsulated(skb))
449 return -ENOSYS;
450
e23775f2
PS
451#undef gre_handle_offloads
452 return gre_handle_offloads(skb, gre_csum);
453}
454#endif
455
456static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
457 __be16 proto, __be32 key, __be32 seq)
458{
459 struct gre_base_hdr *greh;
460
461 skb_push(skb, hdr_len);
462
463 skb_reset_transport_header(skb);
464 greh = (struct gre_base_hdr *)skb->data;
465 greh->flags = tnl_flags_to_gre_flags(flags);
466 greh->protocol = proto;
467
468 if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) {
469 __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
470
471 if (flags & TUNNEL_SEQ) {
472 *ptr = seq;
473 ptr--;
474 }
475 if (flags & TUNNEL_KEY) {
476 *ptr = key;
477 ptr--;
478 }
479 if (flags & TUNNEL_CSUM && !is_gre_gso(skb)) {
480 *ptr = 0;
481 *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
482 skb->len, 0));
483 }
484 }
485 ovs_skb_set_inner_protocol(skb, proto);
486}
487
aad7cb91
PS
488static struct rtable *gre_get_rt(struct sk_buff *skb,
489 struct net_device *dev,
490 struct flowi4 *fl,
491 const struct ip_tunnel_key *key)
492{
493 struct net *net = dev_net(dev);
494
495 memset(fl, 0, sizeof(*fl));
496 fl->daddr = key->u.ipv4.dst;
497 fl->saddr = key->u.ipv4.src;
498 fl->flowi4_tos = RT_TOS(key->tos);
499 fl->flowi4_mark = skb->mark;
500 fl->flowi4_proto = IPPROTO_GRE;
501
502 return ip_route_output_key(net, fl);
503}
e23775f2 504
d8433b88
WT
505static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
506 struct net_device *dev,
507 struct flowi4 *fl,
508 int tunnel_hlen)
509{
510 struct ip_tunnel_info *tun_info;
511 const struct ip_tunnel_key *key;
512 struct rtable *rt = NULL;
513 int min_headroom;
514 bool use_cache;
515 int err;
516
517 tun_info = skb_tunnel_info(skb);
518 key = &tun_info->key;
519 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
520
521 if (use_cache)
522 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
523 if (!rt) {
524 rt = gre_get_rt(skb, dev, fl, key);
525 if (IS_ERR(rt))
526 goto err_free_skb;
527 if (use_cache)
528 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
529 fl->saddr);
530 }
531
532 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
533 + tunnel_hlen + sizeof(struct iphdr);
534 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
535 int head_delta = SKB_DATA_ALIGN(min_headroom -
536 skb_headroom(skb) +
537 16);
538 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
539 0, GFP_ATOMIC);
540 if (unlikely(err))
541 goto err_free_rt;
542 }
543 return rt;
544
545err_free_rt:
546 ip_rt_put(rt);
547err_free_skb:
548 kfree_skb(skb);
549 dev->stats.tx_dropped++;
550 return NULL;
551}
552
e23775f2
PS
553netdev_tx_t rpl_gre_fb_xmit(struct sk_buff *skb)
554{
555 struct net_device *dev = skb->dev;
e23775f2
PS
556 struct ip_tunnel_info *tun_info;
557 const struct ip_tunnel_key *key;
558 struct flowi4 fl;
559 struct rtable *rt;
560 int min_headroom;
561 int tunnel_hlen;
562 __be16 df, flags;
563 int err;
564
565 tun_info = skb_tunnel_info(skb);
566 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
567 ip_tunnel_info_af(tun_info) != AF_INET))
568 goto err_free_skb;
569
570 key = &tun_info->key;
aad7cb91
PS
571
572 rt = gre_get_rt(skb, dev, &fl, key);
e23775f2
PS
573 if (IS_ERR(rt))
574 goto err_free_skb;
575
576 tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
577
8063e095 578 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
e23775f2
PS
579 + tunnel_hlen + sizeof(struct iphdr)
580 + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
581 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
582 int head_delta = SKB_DATA_ALIGN(min_headroom -
583 skb_headroom(skb) +
584 16);
585 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
586 0, GFP_ATOMIC);
587 if (unlikely(err))
588 goto err_free_rt;
589 }
590
591 skb = vlan_hwaccel_push_inside(skb);
592 if (unlikely(!skb)) {
593 err = -ENOMEM;
594 goto err_free_rt;
595 }
596
597 /* Push Tunnel header. */
cf5789aa
PS
598 err = rpl_gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM));
599 if (err)
e23775f2 600 goto err_free_rt;
e23775f2
PS
601
602 flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
603 build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB),
604 tunnel_id_to_key(tun_info->key.tun_id), 0);
605
606 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
43dd2fce
PS
607 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
608 key->tos, key->ttl, df, false);
e23775f2
PS
609 return NETDEV_TX_OK;
610
611err_free_rt:
612 ip_rt_put(rt);
613err_free_skb:
614 kfree_skb(skb);
615 dev->stats.tx_dropped++;
616 return NETDEV_TX_OK;
617}
618EXPORT_SYMBOL(rpl_gre_fb_xmit);
619
8e53509c
WT
620static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
621 __be16 proto)
622{
623 struct ip_tunnel *tunnel = netdev_priv(dev);
624 struct ip_tunnel_info *tun_info;
625 const struct ip_tunnel_key *key;
626 struct erspan_metadata *md;
627 struct rtable *rt = NULL;
628 struct tnl_ptk_info tpi;
629 bool truncate = false;
630 struct flowi4 fl;
631 int tunnel_hlen;
632 int version;
633 __be16 df;
c7d95a7c 634 int nhoff;
f1bc8ecd 635 int thoff;
8e53509c
WT
636
637 tun_info = skb_tunnel_info(skb);
638 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
639 ip_tunnel_info_af(tun_info) != AF_INET))
640 goto err_free_skb;
641
642 key = &tun_info->key;
643 md = ip_tunnel_info_opts(tun_info);
644 if (!md)
645 goto err_free_rt;
646
647 /* ERSPAN has fixed 8 byte GRE header */
648 version = md->version;
649 tunnel_hlen = 8 + erspan_hdr_len(version);
650
651 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
652 if (!rt)
653 return;
654
655 if (gre_handle_offloads(skb, false))
656 goto err_free_rt;
657
658 if (skb->len > dev->mtu + dev->hard_header_len) {
659 pskb_trim(skb, dev->mtu + dev->hard_header_len);
660 truncate = true;
661 }
662
c7d95a7c
WT
663 nhoff = skb_network_header(skb) - skb_mac_header(skb);
664 if (skb->protocol == htons(ETH_P_IP) &&
665 (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
666 truncate = true;
667
f1bc8ecd
WT
668 thoff = skb_transport_header(skb) - skb_mac_header(skb);
669 if (skb->protocol == htons(ETH_P_IPV6) &&
670 (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
671 truncate = true;
672
8e53509c
WT
673 if (version == 1) {
674 erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
675 ntohl(md->u.index), truncate, true);
676 tpi.hdr_len = ERSPAN_V1_MDSIZE;
677 tpi.proto = htons(ETH_P_ERSPAN);
678 } else if (version == 2) {
679 erspan_build_header_v2(skb,
680 ntohl(tunnel_id_to_key32(key->tun_id)),
681 md->u.md2.dir,
682 get_hwid(&md->u.md2),
683 truncate, true);
684 tpi.hdr_len = ERSPAN_V2_MDSIZE;
685 tpi.proto = htons(ETH_P_ERSPAN2);
686 } else {
687 goto err_free_rt;
688 }
689
690 tpi.flags = TUNNEL_SEQ;
691 tpi.key = tunnel_id_to_key32(key->tun_id);
692 tpi.seq = htonl(tunnel->o_seqno++);
693
694 gre_build_header(skb, &tpi, 8);
695
696 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
697
698 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
699 key->tos, key->ttl, df, false);
700 return;
701
702err_free_rt:
703 ip_rt_put(rt);
704err_free_skb:
705 kfree_skb(skb);
706 dev->stats.tx_dropped++;
707}
708
e23775f2
PS
709#define GRE_FEATURES (NETIF_F_SG | \
710 NETIF_F_FRAGLIST | \
711 NETIF_F_HIGHDMA | \
712 NETIF_F_HW_CSUM | \
713 NETIF_F_NETNS_LOCAL)
714
715static void __gre_tunnel_init(struct net_device *dev)
716{
717 struct ip_tunnel *tunnel;
718 int t_hlen;
719
720 tunnel = netdev_priv(dev);
e23775f2 721 tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
8e53509c 722 tunnel->parms.iph.protocol = IPPROTO_GRE;
e23775f2
PS
723
724 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
725
726 t_hlen = tunnel->hlen + sizeof(struct iphdr);
727
e23775f2 728 dev->features |= GRE_FEATURES;
e23775f2 729 dev->hw_features |= GRE_FEATURES;
e23775f2
PS
730
731 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
8e53509c
WT
732 /* TCP offload with GRE SEQ is not supported, nor
733 * can we support 2 levels of outer headers requiring
734 * an update.
735 */
736 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
737 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
738 dev->features |= NETIF_F_GSO_SOFTWARE;
739 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
740 }
741
e23775f2
PS
742 /* Can use a lockless transmit, unless we generate
743 * output sequences
744 */
745 dev->features |= NETIF_F_LLTX;
746 }
747}
748
e23775f2
PS
749static struct gre_cisco_protocol ipgre_protocol = {
750 .handler = gre_rcv,
751 .err_handler = gre_err,
752 .priority = 1,
753};
754
8e53509c
WT
755static int __net_init ipgre_init_net(struct net *net)
756{
757 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
758}
759
760static void __net_exit ipgre_exit_net(struct net *net)
761{
762 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
763
764 ip_tunnel_delete_net(itn, &ipgre_link_ops);
765}
766
767static struct pernet_operations ipgre_net_ops = {
768 .init = ipgre_init_net,
769 .exit = ipgre_exit_net,
770 .id = &ipgre_net_id,
771 .size = sizeof(struct ip_tunnel_net),
772};
773
e23775f2
PS
774static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
775{
776 __be16 flags;
777
778 if (!data)
779 return 0;
780
781 flags = 0;
782 if (data[IFLA_GRE_IFLAGS])
783 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
784 if (data[IFLA_GRE_OFLAGS])
785 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
786 if (flags & (GRE_VERSION|GRE_ROUTING))
787 return -EINVAL;
788
789 return 0;
790}
791
792static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
793{
794 __be32 daddr;
795
796 if (tb[IFLA_ADDRESS]) {
797 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
798 return -EINVAL;
799 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
800 return -EADDRNOTAVAIL;
801 }
802
803 if (!data)
804 goto out;
805
806 if (data[IFLA_GRE_REMOTE]) {
807 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
808 if (!daddr)
809 return -EINVAL;
810 }
811
812out:
813 return ipgre_tunnel_validate(tb, data);
814}
815
8e53509c 816enum {
e1ededf4 817#ifndef HAVE_IFLA_GRE_ENCAP_DPORT
8e53509c
WT
818 IFLA_GRE_ENCAP_TYPE = IFLA_GRE_FLAGS + 1,
819 IFLA_GRE_ENCAP_FLAGS,
820 IFLA_GRE_ENCAP_SPORT,
821 IFLA_GRE_ENCAP_DPORT,
822#endif
e1ededf4 823#ifndef HAVE_IFLA_GRE_COLLECT_METADATA
8e53509c
WT
824 IFLA_GRE_COLLECT_METADATA = IFLA_GRE_ENCAP_DPORT + 1,
825#endif
e1ededf4 826#ifndef HAVE_IFLA_GRE_IGNORE_DF
8e53509c
WT
827 IFLA_GRE_IGNORE_DF = IFLA_GRE_COLLECT_METADATA + 1,
828#endif
e1ededf4 829#ifndef HAVE_IFLA_GRE_FWMARK
8e53509c
WT
830 IFLA_GRE_FWMARK = IFLA_GRE_IGNORE_DF + 1,
831#endif
e1ededf4 832#ifndef HAVE_IFLA_GRE_ERSPAN_INDEX
8e53509c
WT
833 IFLA_GRE_ERSPAN_INDEX = IFLA_GRE_FWMARK + 1,
834#endif
e1ededf4 835#ifndef HAVE_IFLA_GRE_ERSPAN_HWID
e4120b10
WT
836 IFLA_GRE_ERSPAN_VER = IFLA_GRE_ERSPAN_INDEX + 1,
837 IFLA_GRE_ERSPAN_DIR,
838 IFLA_GRE_ERSPAN_HWID,
839#endif
8e53509c
WT
840};
841
e4120b10 842#define RPL_IFLA_GRE_MAX (IFLA_GRE_ERSPAN_HWID + 1)
8e53509c
WT
843
844static int erspan_validate(struct nlattr *tb[], struct nlattr *data[])
845{
846 __be16 flags = 0;
847 int ret;
848
849 if (!data)
850 return 0;
851
852 ret = ipgre_tap_validate(tb, data);
853 if (ret)
854 return ret;
855
856 /* ERSPAN should only have GRE sequence and key flag */
5bff8644
WT
857 if (data[IFLA_GRE_OFLAGS])
858 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
859 if (data[IFLA_GRE_IFLAGS])
860 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
861 if (!data[IFLA_GRE_COLLECT_METADATA] &&
862 flags != (GRE_SEQ | GRE_KEY))
8e53509c
WT
863 return -EINVAL;
864
865 /* ERSPAN Session ID only has 10-bit. Since we reuse
866 * 32-bit key field as ID, check it's range.
867 */
8e53509c
WT
868 if (data[IFLA_GRE_OKEY] &&
869 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
870 return -EINVAL;
871
872 return 0;
873}
874
875static int ipgre_netlink_parms(struct net_device *dev,
876 struct nlattr *data[],
877 struct nlattr *tb[],
878 struct ip_tunnel_parm *parms)
e23775f2 879{
8e53509c
WT
880 struct ip_tunnel *t = netdev_priv(dev);
881
e23775f2
PS
882 memset(parms, 0, sizeof(*parms));
883
884 parms->iph.protocol = IPPROTO_GRE;
8e53509c
WT
885
886 if (!data)
887 return 0;
888
889 if (data[IFLA_GRE_LINK])
890 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
891
892 if (data[IFLA_GRE_IFLAGS])
893 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
894
895 if (data[IFLA_GRE_OFLAGS])
896 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
897
898 if (data[IFLA_GRE_IKEY])
899 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
900
901 if (data[IFLA_GRE_OKEY])
902 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
903
904 if (data[IFLA_GRE_LOCAL])
905 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
906
907 if (data[IFLA_GRE_REMOTE])
908 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
909
910 if (data[IFLA_GRE_TTL])
911 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
912
913 if (data[IFLA_GRE_TOS])
914 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
915
916 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
917 if (t->ignore_df)
918 return -EINVAL;
919 parms->iph.frag_off = htons(IP_DF);
920 }
921
922 if (data[IFLA_GRE_COLLECT_METADATA]) {
923 t->collect_md = true;
924 if (dev->type == ARPHRD_IPGRE)
925 dev->type = ARPHRD_NONE;
926 }
927
928 if (data[IFLA_GRE_IGNORE_DF]) {
929 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
930 && (parms->iph.frag_off & htons(IP_DF)))
931 return -EINVAL;
932 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
933 }
934
935 if (data[IFLA_GRE_ERSPAN_INDEX]) {
936 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
937
938 if (t->index & ~INDEX_MASK)
939 return -EINVAL;
940 }
941
942 return 0;
e23775f2
PS
943}
944
945static int gre_tap_init(struct net_device *dev)
946{
947 __gre_tunnel_init(dev);
948 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
949
950 return ip_tunnel_init(dev);
951}
952
953static netdev_tx_t gre_dev_xmit(struct sk_buff *skb, struct net_device *dev)
954{
955 /* Drop All packets coming from networking stack. OVS-CB is
956 * not initialized for these packets.
957 */
958
959 dev_kfree_skb(skb);
960 dev->stats.tx_dropped++;
961 return NETDEV_TX_OK;
962}
963
8e53509c
WT
964static netdev_tx_t erspan_xmit(struct sk_buff *skb,
965 struct net_device *dev)
966{
967 struct ip_tunnel *tunnel = netdev_priv(dev);
968 bool truncate = false;
969
5bff8644
WT
970 if (tunnel->collect_md) {
971 erspan_fb_xmit(skb, dev, skb->protocol);
972 return NETDEV_TX_OK;
973 }
974
8e53509c
WT
975 if (gre_handle_offloads(skb, false))
976 goto free_skb;
977
978 if (skb_cow_head(skb, dev->needed_headroom))
979 goto free_skb;
980
4f53627e
WT
981 if (skb->len > dev->mtu + dev->hard_header_len) {
982 pskb_trim(skb, dev->mtu + dev->hard_header_len);
8e53509c
WT
983 truncate = true;
984 }
985
986 /* Push ERSPAN header */
e4120b10 987 if (tunnel->erspan_ver == 1)
e6aa9dd8
WT
988 erspan_build_header(skb, ntohl(tunnel->parms.o_key),
989 tunnel->index,
e4120b10
WT
990 truncate, true);
991 else
e6aa9dd8 992 erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
e4120b10
WT
993 tunnel->dir, tunnel->hwid,
994 truncate, true);
995
8e53509c
WT
996 tunnel->parms.o_flags &= ~TUNNEL_KEY;
997 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
998 return NETDEV_TX_OK;
999
1000free_skb:
1001 kfree_skb(skb);
1002 dev->stats.tx_dropped++;
1003 return NETDEV_TX_OK;
1004}
1005
1006static netdev_tx_t __erspan_fb_xmit(struct sk_buff *skb)
1007{
1008 erspan_fb_xmit(skb, skb->dev, skb->protocol);
1009 return NETDEV_TX_OK;
1010}
1011
aad7cb91
PS
1012int ovs_gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
1013{
1014 struct ip_tunnel_info *info = skb_tunnel_info(skb);
1015 struct rtable *rt;
1016 struct flowi4 fl4;
1017
1018 if (ip_tunnel_info_af(info) != AF_INET)
1019 return -EINVAL;
1020
1021 rt = gre_get_rt(skb, dev, &fl4, &info->key);
1022 if (IS_ERR(rt))
1023 return PTR_ERR(rt);
1024
1025 ip_rt_put(rt);
1026 info->key.u.ipv4.src = fl4.saddr;
1027 return 0;
1028}
1029EXPORT_SYMBOL_GPL(ovs_gre_fill_metadata_dst);
1030
8e53509c
WT
1031static int erspan_tunnel_init(struct net_device *dev)
1032{
1033 struct ip_tunnel *tunnel = netdev_priv(dev);
1034 int t_hlen;
1035
1036 tunnel->tun_hlen = 8;
1037 tunnel->parms.iph.protocol = IPPROTO_GRE;
cd71b0d7 1038 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
e4120b10 1039 erspan_hdr_len(tunnel->erspan_ver);
cd71b0d7 1040 t_hlen = tunnel->hlen + sizeof(struct iphdr);
8e53509c
WT
1041
1042 dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
1043 dev->mtu = ETH_DATA_LEN - t_hlen - 4;
1044 dev->features |= GRE_FEATURES;
1045 dev->hw_features |= GRE_FEATURES;
1046 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
2ce53c0a 1047 netif_keep_dst(dev);
8e53509c
WT
1048
1049 return ip_tunnel_init(dev);
1050}
1051
1052static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1053 unsigned short type,
1054 const void *daddr, const void *saddr, unsigned int len)
1055{
1056 struct ip_tunnel *t = netdev_priv(dev);
1057 struct iphdr *iph;
1058 struct gre_base_hdr *greh;
1059
1060 iph = (struct iphdr *)__skb_push(skb, t->hlen + sizeof(*iph));
1061 greh = (struct gre_base_hdr *)(iph+1);
1062 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
1063 greh->protocol = htons(type);
1064
1065 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1066
1067 /* Set the source hardware address. */
1068 if (saddr)
1069 memcpy(&iph->saddr, saddr, 4);
1070 if (daddr)
1071 memcpy(&iph->daddr, daddr, 4);
1072 if (iph->daddr)
1073 return t->hlen + sizeof(*iph);
1074
1075 return -(t->hlen + sizeof(*iph));
1076}
1077
1078static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1079{
1080 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
1081 memcpy(haddr, &iph->saddr, 4);
1082 return 4;
1083}
1084
1085static const struct header_ops ipgre_header_ops = {
1086 .create = ipgre_header,
1087 .parse = ipgre_header_parse,
1088};
1089
1090static int ipgre_tunnel_init(struct net_device *dev)
1091{
1092 struct ip_tunnel *tunnel = netdev_priv(dev);
1093 struct iphdr *iph = &tunnel->parms.iph;
1094
1095 __gre_tunnel_init(dev);
1096
1097 memcpy(dev->dev_addr, &iph->saddr, 4);
1098 memcpy(dev->broadcast, &iph->daddr, 4);
1099
1100 dev->flags = IFF_NOARP;
1101 netif_keep_dst(dev);
1102 dev->addr_len = 4;
1103
1104 if (!tunnel->collect_md) {
1105 dev->header_ops = &ipgre_header_ops;
1106 }
1107
1108 return ip_tunnel_init(dev);
1109}
1110
1111static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
1112 struct net_device *dev)
1113{
1114 struct ip_tunnel *tunnel = netdev_priv(dev);
1115 const struct iphdr *tnl_params;
1116
1117 if (tunnel->collect_md) {
1118 gre_fb_xmit(skb);
1119 return NETDEV_TX_OK;
1120 }
1121
1122 if (dev->header_ops) {
1123 /* Need space for new headers */
1124 if (skb_cow_head(skb, dev->needed_headroom -
1125 (tunnel->hlen + sizeof(struct iphdr))))
1126 goto free_skb;
1127
1128 tnl_params = (const struct iphdr *)skb->data;
1129
1130 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
1131 * to gre header.
1132 */
1133 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
1134 skb_reset_mac_header(skb);
1135 } else {
1136 if (skb_cow_head(skb, dev->needed_headroom))
1137 goto free_skb;
1138
1139 tnl_params = &tunnel->parms.iph;
1140 }
1141
1142 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
1143 goto free_skb;
1144
1145 __gre_xmit(skb, dev, tnl_params, skb->protocol);
1146 return NETDEV_TX_OK;
1147
1148free_skb:
1149 kfree_skb(skb);
1150 dev->stats.tx_dropped++;
1151 return NETDEV_TX_OK;
1152}
1153
1154static const struct net_device_ops ipgre_netdev_ops = {
1155 .ndo_init = ipgre_tunnel_init,
1156 .ndo_uninit = rpl_ip_tunnel_uninit,
1157 .ndo_start_xmit = ipgre_xmit,
1158 .ndo_change_mtu = ip_tunnel_change_mtu,
1159 .ndo_get_stats64 = ip_tunnel_get_stats64,
1160#ifdef HAVE_GET_LINK_NET
1161 .ndo_get_iflink = ip_tunnel_get_iflink,
1162#endif
1163};
1164
e23775f2
PS
1165static const struct net_device_ops gre_tap_netdev_ops = {
1166 .ndo_init = gre_tap_init,
8e53509c 1167 .ndo_uninit = rpl_ip_tunnel_uninit,
e23775f2
PS
1168 .ndo_start_xmit = gre_dev_xmit,
1169 .ndo_set_mac_address = eth_mac_addr,
1170 .ndo_validate_addr = eth_validate_addr,
39ca3383
YHW
1171#ifdef HAVE_RHEL7_MAX_MTU
1172 .extended.ndo_change_mtu = ip_tunnel_change_mtu,
1173#else
e23775f2 1174 .ndo_change_mtu = ip_tunnel_change_mtu,
39ca3383 1175#endif
8e53509c 1176 .ndo_get_stats64 = rpl_ip_tunnel_get_stats64,
e23775f2 1177#ifdef HAVE_NDO_GET_IFLINK
8e53509c 1178 .ndo_get_iflink = rpl_ip_tunnel_get_iflink,
e23775f2 1179#endif
aad7cb91
PS
1180#ifdef HAVE_NDO_FILL_METADATA_DST
1181 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1182#endif
e23775f2
PS
1183};
1184
8e53509c
WT
1185static const struct net_device_ops erspan_netdev_ops = {
1186 .ndo_init = erspan_tunnel_init,
1187 .ndo_uninit = rpl_ip_tunnel_uninit,
1188 .ndo_start_xmit = erspan_xmit,
1189 .ndo_set_mac_address = eth_mac_addr,
1190 .ndo_validate_addr = eth_validate_addr,
1191 .ndo_change_mtu = ip_tunnel_change_mtu,
1192 .ndo_get_stats64 = rpl_ip_tunnel_get_stats64,
1193#ifdef HAVE_NDO_GET_IFLINK
1194 .ndo_get_iflink = rpl_ip_tunnel_get_iflink,
1195#endif
1196#ifdef HAVE_NDO_FILL_METADATA_DST
1197 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1198#endif
1199};
1200
1201static void ipgre_tunnel_setup(struct net_device *dev)
1202{
1203 dev->netdev_ops = &ipgre_netdev_ops;
1204 dev->type = ARPHRD_IPGRE;
1205 ip_tunnel_setup(dev, ipgre_net_id);
1206}
1207
e23775f2
PS
1208static void ipgre_tap_setup(struct net_device *dev)
1209{
1210 ether_setup(dev);
e987c48a 1211#ifdef HAVE_NET_DEVICE_MAX_MTU
4a8a3521 1212 dev->max_mtu = 0;
e987c48a 1213#endif
e23775f2
PS
1214 dev->netdev_ops = &gre_tap_netdev_ops;
1215 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1216 ip_tunnel_setup(dev, gre_tap_net_id);
1217}
1218
8e53509c
WT
1219static void erspan_setup(struct net_device *dev)
1220{
1221 eth_hw_addr_random(dev);
1222 ether_setup(dev);
1223 dev->netdev_ops = &erspan_netdev_ops;
1224 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1225 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1226 ip_tunnel_setup(dev, erspan_net_id);
1227}
1228
e23775f2
PS
1229static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1230 struct nlattr *tb[], struct nlattr *data[])
e23775f2
PS
1231{
1232 struct ip_tunnel_parm p;
1233 int err;
1234
1235 ipgre_netlink_parms(dev, data, tb, &p);
1236 err = ip_tunnel_newlink(dev, tb, &p);
1237 return err;
1238
1239}
1240
1241static size_t ipgre_get_size(const struct net_device *dev)
1242{
1243 return
1244 /* IFLA_GRE_LINK */
1245 nla_total_size(4) +
1246 /* IFLA_GRE_IFLAGS */
1247 nla_total_size(2) +
1248 /* IFLA_GRE_OFLAGS */
1249 nla_total_size(2) +
1250 /* IFLA_GRE_IKEY */
1251 nla_total_size(4) +
1252 /* IFLA_GRE_OKEY */
1253 nla_total_size(4) +
1254 /* IFLA_GRE_LOCAL */
1255 nla_total_size(4) +
1256 /* IFLA_GRE_REMOTE */
1257 nla_total_size(4) +
1258 /* IFLA_GRE_TTL */
1259 nla_total_size(1) +
1260 /* IFLA_GRE_TOS */
1261 nla_total_size(1) +
1262 /* IFLA_GRE_PMTUDISC */
1263 nla_total_size(1) +
1264 /* IFLA_GRE_ENCAP_TYPE */
1265 nla_total_size(2) +
1266 /* IFLA_GRE_ENCAP_FLAGS */
1267 nla_total_size(2) +
1268 /* IFLA_GRE_ENCAP_SPORT */
1269 nla_total_size(2) +
1270 /* IFLA_GRE_ENCAP_DPORT */
1271 nla_total_size(2) +
1272 /* IFLA_GRE_COLLECT_METADATA */
1273 nla_total_size(0) +
8e53509c
WT
1274 /* IFLA_GRE_ERSPAN_INDEX */
1275 nla_total_size(4) +
e4120b10
WT
1276 /* IFLA_GRE_ERSPAN_VER */
1277 nla_total_size(1) +
1278 /* IFLA_GRE_ERSPAN_DIR */
1279 nla_total_size(1) +
1280 /* IFLA_GRE_ERSPAN_HWID */
1281 nla_total_size(2) +
e23775f2
PS
1282 0;
1283}
1284
1285static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1286{
1287 struct ip_tunnel *t = netdev_priv(dev);
1288 struct ip_tunnel_parm *p = &t->parms;
1289
1290 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1291 nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
1292 nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
1293 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1294 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1295 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1296 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1297 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1298 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1299 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1300 !!(p->iph.frag_off & htons(IP_DF))))
1301 goto nla_put_failure;
1302
e4120b10
WT
1303 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1304 goto nla_put_failure;
1305
1306 if (t->erspan_ver == 1) {
1307 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1308 goto nla_put_failure;
1309 } else if (t->erspan_ver == 2) {
1310 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1311 goto nla_put_failure;
1312 if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
8e53509c 1313 goto nla_put_failure;
e4120b10 1314 }
8e53509c 1315
e23775f2
PS
1316 return 0;
1317
1318nla_put_failure:
1319 return -EMSGSIZE;
1320}
1321
8e53509c 1322static const struct nla_policy ipgre_policy[RPL_IFLA_GRE_MAX + 1] = {
e23775f2
PS
1323 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1324 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1325 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1326 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1327 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1328 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1329 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1330 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1331 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1332 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
8e53509c 1333 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
e4120b10
WT
1334 [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 },
1335 [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 },
1336 [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 },
8e53509c
WT
1337};
1338
1339static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1340 .kind = "gre",
1341 .maxtype = RPL_IFLA_GRE_MAX,
1342 .policy = ipgre_policy,
1343 .priv_size = sizeof(struct ip_tunnel),
1344 .setup = ipgre_tunnel_setup,
1345 .validate = ipgre_tunnel_validate,
1346 .newlink = ipgre_newlink,
1347 .dellink = ip_tunnel_dellink,
1348 .get_size = ipgre_get_size,
1349 .fill_info = ipgre_fill_info,
1350#ifdef HAVE_GET_LINK_NET
1351 .get_link_net = ip_tunnel_get_link_net,
1352#endif
e23775f2
PS
1353};
1354
1355static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1356 .kind = "ovs_gretap",
8e53509c 1357 .maxtype = RPL_IFLA_GRE_MAX,
e23775f2
PS
1358 .policy = ipgre_policy,
1359 .priv_size = sizeof(struct ip_tunnel),
1360 .setup = ipgre_tap_setup,
1361 .validate = ipgre_tap_validate,
1362 .newlink = ipgre_newlink,
1363 .dellink = ip_tunnel_dellink,
1364 .get_size = ipgre_get_size,
1365 .fill_info = ipgre_fill_info,
1366#ifdef HAVE_GET_LINK_NET
1367 .get_link_net = ip_tunnel_get_link_net,
1368#endif
1369};
1370
8e53509c
WT
1371static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1372 .kind = "erspan",
1373 .maxtype = RPL_IFLA_GRE_MAX,
1374 .policy = ipgre_policy,
1375 .priv_size = sizeof(struct ip_tunnel),
1376 .setup = erspan_setup,
1377 .validate = erspan_validate,
1378 .newlink = ipgre_newlink,
1379 .dellink = ip_tunnel_dellink,
1380 .get_size = ipgre_get_size,
1381 .fill_info = ipgre_fill_info,
1382#ifdef HAVE_GET_LINK_NET
1383 .get_link_net = ip_tunnel_get_link_net,
1384#endif
1385};
1386
e23775f2
PS
1387struct net_device *rpl_gretap_fb_dev_create(struct net *net, const char *name,
1388 u8 name_assign_type)
1389{
1390 struct nlattr *tb[IFLA_MAX + 1];
1391 struct net_device *dev;
7cecb9c9 1392 LIST_HEAD(list_kill);
e23775f2
PS
1393 struct ip_tunnel *t;
1394 int err;
1395
1396 memset(&tb, 0, sizeof(tb));
1397
1398 dev = rtnl_create_link(net, (char *)name, name_assign_type,
1399 &ipgre_tap_ops, tb);
1400 if (IS_ERR(dev))
1401 return dev;
1402
1403 t = netdev_priv(dev);
1404 t->collect_md = true;
1405 /* Configure flow based GRE device. */
e23775f2 1406 err = ipgre_newlink(net, dev, tb, NULL);
7cecb9c9
PS
1407 if (err < 0) {
1408 free_netdev(dev);
1409 return ERR_PTR(err);
1410 }
06f1a61a
DW
1411
1412 /* openvswitch users expect packet sizes to be unrestricted,
1413 * so set the largest MTU we can.
1414 */
1415 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1416 if (err)
1417 goto out;
1418
e23775f2
PS
1419 return dev;
1420out:
7cecb9c9
PS
1421 ip_tunnel_dellink(dev, &list_kill);
1422 unregister_netdevice_many(&list_kill);
e23775f2
PS
1423 return ERR_PTR(err);
1424}
1425EXPORT_SYMBOL_GPL(rpl_gretap_fb_dev_create);
1426
8e53509c
WT
1427static int __net_init erspan_init_net(struct net *net)
1428{
1429 return ip_tunnel_init_net(net, erspan_net_id,
1430 &erspan_link_ops, NULL);
1431}
1432
1433static void __net_exit erspan_exit_net(struct net *net)
1434{
1435 struct ip_tunnel_net *itn = net_generic(net, erspan_net_id);
1436
1437 ip_tunnel_delete_net(itn, &erspan_link_ops);
1438}
1439
1440static struct pernet_operations erspan_net_ops = {
1441 .init = erspan_init_net,
1442 .exit = erspan_exit_net,
1443 .id = &erspan_net_id,
1444 .size = sizeof(struct ip_tunnel_net),
1445};
1446
e23775f2
PS
1447static int __net_init ipgre_tap_init_net(struct net *net)
1448{
1449 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1450}
1451
1452static void __net_exit ipgre_tap_exit_net(struct net *net)
1453{
1454 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
1455
1456 ip_tunnel_delete_net(itn, &ipgre_tap_ops);
1457}
1458
1459static struct pernet_operations ipgre_tap_net_ops = {
1460 .init = ipgre_tap_init_net,
1461 .exit = ipgre_tap_exit_net,
1462 .id = &gre_tap_net_id,
1463 .size = sizeof(struct ip_tunnel_net),
1464};
1465
8e53509c
WT
1466static struct net_device *erspan_fb_dev_create(struct net *net,
1467 const char *name,
1468 u8 name_assign_type)
1469{
1470 struct nlattr *tb[IFLA_MAX + 1];
1471 struct net_device *dev;
1472 LIST_HEAD(list_kill);
1473 struct ip_tunnel *t;
1474 int err;
1475
1476 memset(&tb, 0, sizeof(tb));
1477
1478 dev = rtnl_create_link(net, (char *)name, name_assign_type,
1479 &erspan_link_ops, tb);
1480 if (IS_ERR(dev))
1481 return dev;
1482
1483 t = netdev_priv(dev);
1484 t->collect_md = true;
1485 /* Configure flow based GRE device. */
1486 err = ipgre_newlink(net, dev, tb, NULL);
1487 if (err < 0) {
1488 free_netdev(dev);
1489 return ERR_PTR(err);
1490 }
1491
1492 /* openvswitch users expect packet sizes to be unrestricted,
1493 * so set the largest MTU we can.
1494 */
1495 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1496 if (err)
1497 goto out;
1498
1499 return dev;
1500out:
1501 ip_tunnel_dellink(dev, &list_kill);
1502 unregister_netdevice_many(&list_kill);
1503 return ERR_PTR(err);
1504}
1505
1506static struct vport_ops ovs_erspan_vport_ops;
1507
1508static struct vport *erspan_tnl_create(const struct vport_parms *parms)
1509{
1510 struct net *net = ovs_dp_get_net(parms->dp);
1511 struct net_device *dev;
1512 struct vport *vport;
1513 int err;
1514
1515 vport = ovs_vport_alloc(0, &ovs_erspan_vport_ops, parms);
1516 if (IS_ERR(vport))
1517 return vport;
1518
1519 rtnl_lock();
1520 dev = erspan_fb_dev_create(net, parms->name, NET_NAME_USER);
1521 if (IS_ERR(dev)) {
1522 rtnl_unlock();
1523 ovs_vport_free(vport);
1524 return ERR_CAST(dev);
1525 }
1526
1527 err = dev_change_flags(dev, dev->flags | IFF_UP);
1528 if (err < 0) {
1529 rtnl_delete_link(dev);
1530 rtnl_unlock();
1531 ovs_vport_free(vport);
1532 return ERR_PTR(err);
1533 }
1534
1535 rtnl_unlock();
1536 return vport;
1537}
1538
1539static struct vport *erspan_create(const struct vport_parms *parms)
1540{
1541 struct vport *vport;
1542
1543 vport = erspan_tnl_create(parms);
1544 if (IS_ERR(vport))
1545 return vport;
1546
1547 return ovs_netdev_link(vport, parms->name);
1548}
1549
1550#ifndef OVS_VPORT_TYPE_ERSPAN
1551/* Until integration is done... */
1552#define OVS_VPORT_TYPE_ERSPAN 107 /* ERSPAN tunnel. */
1553#endif
1554static struct vport_ops ovs_erspan_vport_ops = {
1555 .type = OVS_VPORT_TYPE_ERSPAN,
1556 .create = erspan_create,
1557 .send = __erspan_fb_xmit,
1558#ifndef USE_UPSTREAM_TUNNEL
1559 .fill_metadata_dst = gre_fill_metadata_dst,
1560#endif
1561 .destroy = ovs_netdev_tunnel_destroy,
1562};
1563
1564static struct vport_ops ovs_ipgre_vport_ops;
1565
1566static struct vport *ipgre_tnl_create(const struct vport_parms *parms)
1567{
1568 struct net *net = ovs_dp_get_net(parms->dp);
1569 struct net_device *dev;
1570 struct vport *vport;
1571 int err;
1572
1573 vport = ovs_vport_alloc(0, &ovs_ipgre_vport_ops, parms);
1574 if (IS_ERR(vport))
1575 return vport;
1576
1577 rtnl_lock();
1578 dev = gretap_fb_dev_create(net, parms->name, NET_NAME_USER);
1579 if (IS_ERR(dev)) {
1580 rtnl_unlock();
1581 ovs_vport_free(vport);
1582 return ERR_CAST(dev);
1583 }
1584
1585 err = dev_change_flags(dev, dev->flags | IFF_UP);
1586 if (err < 0) {
1587 rtnl_delete_link(dev);
1588 rtnl_unlock();
1589 ovs_vport_free(vport);
1590 return ERR_PTR(err);
1591 }
1592
1593 rtnl_unlock();
1594 return vport;
1595}
1596
1597static struct vport *ipgre_create(const struct vport_parms *parms)
1598{
1599 struct vport *vport;
1600
1601 vport = ipgre_tnl_create(parms);
1602 if (IS_ERR(vport))
1603 return vport;
1604
1605 return ovs_netdev_link(vport, parms->name);
1606}
1607
1608static struct vport_ops ovs_ipgre_vport_ops = {
1609 .type = OVS_VPORT_TYPE_GRE,
1610 .create = ipgre_create,
1611 .send = gre_fb_xmit,
1612#ifndef USE_UPSTREAM_TUNNEL
1613 .fill_metadata_dst = gre_fill_metadata_dst,
1614#endif
1615 .destroy = ovs_netdev_tunnel_destroy,
1616};
1617
e23775f2
PS
1618int rpl_ipgre_init(void)
1619{
1620 int err;
1621
1622 err = register_pernet_device(&ipgre_tap_net_ops);
1623 if (err < 0)
f3c31839 1624 goto pnet_tap_failed;
e23775f2 1625
8e53509c
WT
1626 err = register_pernet_device(&erspan_net_ops);
1627 if (err < 0)
1628 goto pnet_erspan_failed;
1629
1630 err = register_pernet_device(&ipgre_net_ops);
1631 if (err < 0)
1632 goto pnet_ipgre_failed;
1633
e23775f2
PS
1634 err = gre_cisco_register(&ipgre_protocol);
1635 if (err < 0) {
1636 pr_info("%s: can't add protocol\n", __func__);
1637 goto add_proto_failed;
1638 }
1639
e23775f2 1640 pr_info("GRE over IPv4 tunneling driver\n");
8e53509c
WT
1641
1642 ovs_vport_ops_register(&ovs_ipgre_vport_ops);
1643 ovs_vport_ops_register(&ovs_erspan_vport_ops);
e23775f2
PS
1644 return 0;
1645
e23775f2 1646add_proto_failed:
8e53509c
WT
1647 unregister_pernet_device(&ipgre_net_ops);
1648pnet_ipgre_failed:
1649 unregister_pernet_device(&erspan_net_ops);
1650pnet_erspan_failed:
e23775f2 1651 unregister_pernet_device(&ipgre_tap_net_ops);
f3c31839 1652pnet_tap_failed:
258b27d3 1653 pr_err("Error while initializing GRE %d\n", err);
e23775f2
PS
1654 return err;
1655}
1656
1657void rpl_ipgre_fini(void)
1658{
8e53509c
WT
1659 ovs_vport_ops_unregister(&ovs_erspan_vport_ops);
1660 ovs_vport_ops_unregister(&ovs_ipgre_vport_ops);
e23775f2 1661 gre_cisco_unregister(&ipgre_protocol);
8e53509c
WT
1662 unregister_pernet_device(&ipgre_net_ops);
1663 unregister_pernet_device(&erspan_net_ops);
e23775f2
PS
1664 unregister_pernet_device(&ipgre_tap_net_ops);
1665}
1666
1667#endif