2 * GENEVE: Generic Network Virtualization Encapsulation
4 * Copyright (c) 2015 Red Hat, Inc.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 #include <linux/kernel.h>
14 #include <linux/module.h>
15 #include <linux/netdevice.h>
16 #include <linux/etherdevice.h>
17 #include <linux/hash.h>
18 #include <linux/if_link.h>
19 #include <linux/if_vlan.h>
21 #include <net/addrconf.h>
22 #include <net/dst_cache.h>
23 #include <net/dst_metadata.h>
24 #include <net/net_namespace.h>
25 #include <net/netns/generic.h>
26 #include <net/rtnetlink.h>
27 #include <net/geneve.h>
28 #include <net/protocol.h>
29 #include <net/udp_tunnel.h>
30 #include <net/ip6_route.h>
31 #if IS_ENABLED(CONFIG_IPV6)
33 #include <net/addrconf.h>
34 #include <net/ip6_tunnel.h>
35 #include <net/ip6_checksum.h>
40 #include "vport-netdev.h"
43 #ifndef USE_UPSTREAM_TUNNEL
45 #define GENEVE_NETDEV_VER "0.6"
47 #define GENEVE_UDP_PORT 6081
49 #define GENEVE_N_VID (1u << 24)
50 #define GENEVE_VID_MASK (GENEVE_N_VID - 1)
52 #define VNI_HASH_BITS 10
53 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
56 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
58 /* per-network namespace private data for this module */
60 struct list_head geneve_list
;
61 struct list_head sock_list
;
64 static int geneve_net_id
;
67 struct sockaddr_in sin
;
68 struct sockaddr_in6 sin6
;
72 static union geneve_addr geneve_remote_unspec
= { .sa
.sa_family
= AF_UNSPEC
, };
74 /* Pseudo network device */
76 struct hlist_node hlist
; /* vni hash table */
77 struct net
*net
; /* netns for packet i/o */
78 struct net_device
*dev
; /* netdev for geneve tunnel */
79 struct geneve_sock __rcu
*sock4
; /* IPv4 socket used for geneve tunnel */
80 #if IS_ENABLED(CONFIG_IPV6)
81 struct geneve_sock __rcu
*sock6
; /* IPv6 socket used for geneve tunnel */
83 u8 vni
[3]; /* virtual network ID for tunnel */
84 u8 ttl
; /* TTL override */
85 u8 tos
; /* TOS override */
86 union geneve_addr remote
; /* IP address for link partner */
87 struct list_head next
; /* geneve's per namespace list */
88 __be32 label
; /* IPv6 flowlabel override */
92 struct dst_cache dst_cache
;
95 /* Geneve device flags */
96 #define GENEVE_F_UDP_ZERO_CSUM_TX BIT(0)
97 #define GENEVE_F_UDP_ZERO_CSUM6_TX BIT(1)
98 #define GENEVE_F_UDP_ZERO_CSUM6_RX BIT(2)
102 struct list_head list
;
106 struct hlist_head vni_list
[VNI_HASH_SIZE
];
108 #ifdef HAVE_UDP_OFFLOAD
109 struct udp_offload udp_offloads
;
113 static inline __u32
geneve_net_vni_hash(u8 vni
[3])
117 vnid
= (vni
[0] << 16) | (vni
[1] << 8) | vni
[2];
118 return hash_32(vnid
, VNI_HASH_BITS
);
121 static __be64
vni_to_tunnel_id(const __u8
*vni
)
124 return (vni
[0] << 16) | (vni
[1] << 8) | vni
[2];
126 return (__force __be64
)(((__force u64
)vni
[0] << 40) |
127 ((__force u64
)vni
[1] << 48) |
128 ((__force u64
)vni
[2] << 56));
132 static sa_family_t
geneve_get_sk_family(struct geneve_sock
*gs
)
134 return gs
->sock
->sk
->sk_family
;
137 static struct geneve_dev
*geneve_lookup(struct geneve_sock
*gs
,
138 __be32 addr
, u8 vni
[])
140 struct hlist_head
*vni_list_head
;
141 struct geneve_dev
*geneve
;
144 /* Find the device for this VNI */
145 hash
= geneve_net_vni_hash(vni
);
146 vni_list_head
= &gs
->vni_list
[hash
];
147 hlist_for_each_entry_rcu(geneve
, vni_list_head
, hlist
) {
148 if (!memcmp(vni
, geneve
->vni
, sizeof(geneve
->vni
)) &&
149 addr
== geneve
->remote
.sin
.sin_addr
.s_addr
)
155 #if IS_ENABLED(CONFIG_IPV6)
156 static struct geneve_dev
*geneve6_lookup(struct geneve_sock
*gs
,
157 struct in6_addr addr6
, u8 vni
[])
159 struct hlist_head
*vni_list_head
;
160 struct geneve_dev
*geneve
;
163 /* Find the device for this VNI */
164 hash
= geneve_net_vni_hash(vni
);
165 vni_list_head
= &gs
->vni_list
[hash
];
166 hlist_for_each_entry_rcu(geneve
, vni_list_head
, hlist
) {
167 if (!memcmp(vni
, geneve
->vni
, sizeof(geneve
->vni
)) &&
168 ipv6_addr_equal(&addr6
, &geneve
->remote
.sin6
.sin6_addr
))
175 static inline struct genevehdr
*geneve_hdr(const struct sk_buff
*skb
)
177 return (struct genevehdr
*)(udp_hdr(skb
) + 1);
180 static struct geneve_dev
*geneve_lookup_skb(struct geneve_sock
*gs
,
185 static u8 zero_vni
[3];
186 #if IS_ENABLED(CONFIG_IPV6)
187 static struct in6_addr zero_addr6
;
190 if (geneve_get_sk_family(gs
) == AF_INET
) {
193 iph
= ip_hdr(skb
); /* outer IP header... */
195 if (gs
->collect_md
) {
199 vni
= geneve_hdr(skb
)->vni
;
203 return geneve_lookup(gs
, addr
, vni
);
204 #if IS_ENABLED(CONFIG_IPV6)
205 } else if (geneve_get_sk_family(gs
) == AF_INET6
) {
206 struct ipv6hdr
*ip6h
;
207 struct in6_addr addr6
;
209 ip6h
= ipv6_hdr(skb
); /* outer IPv6 header... */
211 if (gs
->collect_md
) {
215 vni
= geneve_hdr(skb
)->vni
;
219 return geneve6_lookup(gs
, addr6
, vni
);
225 /* geneve receive/decap routine */
226 static void geneve_rx(struct geneve_dev
*geneve
, struct geneve_sock
*gs
,
229 struct genevehdr
*gnvh
= geneve_hdr(skb
);
230 struct metadata_dst
*tun_dst
= NULL
;
231 struct pcpu_sw_netstats
*stats
;
235 struct metadata_dst dst
;
236 char buf
[sizeof(struct metadata_dst
) + 256];
239 if (ip_tunnel_collect_metadata() || gs
->collect_md
) {
242 flags
= TUNNEL_KEY
| TUNNEL_GENEVE_OPT
|
243 (gnvh
->oam
? TUNNEL_OAM
: 0) |
244 (gnvh
->critical
? TUNNEL_CRIT_OPT
: 0);
247 ovs_udp_tun_rx_dst(tun_dst
,
248 skb
, geneve_get_sk_family(gs
), flags
,
249 vni_to_tunnel_id(gnvh
->vni
),
253 /* Update tunnel dst according to Geneve options. */
254 ip_tunnel_info_opts_set(&tun_dst
->u
.tun_info
,
255 gnvh
->options
, gnvh
->opt_len
* 4,
258 /* Drop packets w/ critical options,
259 * since we don't support any...
265 skb_reset_mac_header(skb
);
266 skb
->protocol
= eth_type_trans(skb
, geneve
->dev
);
267 skb_postpull_rcsum(skb
, eth_hdr(skb
), ETH_HLEN
);
270 ovs_skb_dst_set(skb
, &tun_dst
->dst
);
272 /* Ignore packet loops (and multicast echo) */
273 if (ether_addr_equal(eth_hdr(skb
)->h_source
, geneve
->dev
->dev_addr
))
276 oiph
= skb_network_header(skb
);
277 skb_reset_network_header(skb
);
279 if (geneve_get_sk_family(gs
) == AF_INET
)
280 err
= IP_ECN_decapsulate(oiph
, skb
);
281 #if IS_ENABLED(CONFIG_IPV6)
283 err
= IP6_ECN_decapsulate(oiph
, skb
);
285 if (unlikely(err
> 1)) {
286 ++geneve
->dev
->stats
.rx_frame_errors
;
287 ++geneve
->dev
->stats
.rx_errors
;
291 stats
= this_cpu_ptr(geneve
->dev
->tstats
);
292 u64_stats_update_begin(&stats
->syncp
);
294 stats
->rx_bytes
+= skb
->len
;
295 u64_stats_update_end(&stats
->syncp
);
297 netdev_port_receive(skb
, skb_tunnel_info(skb
));
300 /* Consume bad packet */
304 /* Setup stats when device is created */
305 static int geneve_init(struct net_device
*dev
)
307 struct geneve_dev
*geneve
= netdev_priv(dev
);
310 dev
->tstats
= netdev_alloc_pcpu_stats(struct pcpu_sw_netstats
);
314 err
= dst_cache_init(&geneve
->dst_cache
, GFP_KERNEL
);
316 free_percpu(dev
->tstats
);
323 static void geneve_uninit(struct net_device
*dev
)
325 struct geneve_dev
*geneve
= netdev_priv(dev
);
327 dst_cache_destroy(&geneve
->dst_cache
);
328 free_percpu(dev
->tstats
);
331 /* Callback from net/ipv4/udp.c to receive packets */
332 static int geneve_udp_encap_recv(struct sock
*sk
, struct sk_buff
*skb
)
334 struct genevehdr
*geneveh
;
335 struct geneve_dev
*geneve
;
336 struct geneve_sock
*gs
;
339 /* Need Geneve and inner Ethernet header to be present */
340 if (unlikely(!pskb_may_pull(skb
, GENEVE_BASE_HLEN
)))
343 /* Return packets with reserved bits set */
344 geneveh
= geneve_hdr(skb
);
345 if (unlikely(geneveh
->ver
!= GENEVE_VER
))
348 if (unlikely(geneveh
->proto_type
!= htons(ETH_P_TEB
)))
351 gs
= rcu_dereference_sk_user_data(sk
);
355 #if IS_ENABLED(CONFIG_IPV6)
356 #ifdef OVS_CHECK_UDP_TUNNEL_ZERO_CSUM
357 if (geneve_get_sk_family(gs
) == AF_INET6
&&
358 !udp_hdr(skb
)->check
&&
359 !(gs
->flags
& GENEVE_F_UDP_ZERO_CSUM6_RX
)) {
360 udp6_csum_zero_error(skb
);
365 geneve
= geneve_lookup_skb(gs
, skb
);
369 opts_len
= geneveh
->opt_len
* 4;
370 if (iptunnel_pull_header(skb
, GENEVE_BASE_HLEN
+ opts_len
,
372 !net_eq(geneve
->net
, dev_net(geneve
->dev
))))
375 geneve_rx(geneve
, gs
, skb
);
379 /* Consume bad packet */
384 static struct socket
*geneve_create_sock(struct net
*net
, bool ipv6
,
385 __be16 port
, u32 flags
)
388 struct udp_port_cfg udp_conf
;
391 memset(&udp_conf
, 0, sizeof(udp_conf
));
394 udp_conf
.family
= AF_INET6
;
395 udp_conf
.ipv6_v6only
= 1;
396 udp_conf
.use_udp6_rx_checksums
=
397 !(flags
& GENEVE_F_UDP_ZERO_CSUM6_RX
);
399 udp_conf
.family
= AF_INET
;
400 udp_conf
.local_ip
.s_addr
= htonl(INADDR_ANY
);
403 udp_conf
.local_udp_port
= port
;
405 /* Open UDP socket */
406 err
= udp_sock_create(net
, &udp_conf
, &sock
);
413 static void geneve_notify_add_rx_port(struct geneve_sock
*gs
)
415 struct net_device
*dev
;
416 struct sock
*sk
= gs
->sock
->sk
;
417 struct net
*net
= sock_net(sk
);
418 sa_family_t sa_family
= geneve_get_sk_family(gs
);
421 if (sa_family
== AF_INET
) {
422 err
= udp_add_offload(sock_net(sk
), &gs
->udp_offloads
);
424 pr_warn("geneve: udp_add_offload failed with status %d\n",
429 for_each_netdev_rcu(net
, dev
) {
430 #ifdef HAVE_NDO_ADD_GENEVE_PORT
431 __be16 port
= inet_sk(sk
)->inet_sport
;
433 if (dev
->netdev_ops
->ndo_add_geneve_port
)
434 dev
->netdev_ops
->ndo_add_geneve_port(dev
, sa_family
,
436 #elif defined(HAVE_NDO_UDP_TUNNEL_ADD)
437 struct udp_tunnel_info ti
;
438 ti
.type
= UDP_TUNNEL_TYPE_GENEVE
;
439 ti
.sa_family
= sa_family
;
440 ti
.port
= inet_sk(sk
)->inet_sport
;
442 if (dev
->netdev_ops
->ndo_udp_tunnel_add
)
443 dev
->netdev_ops
->ndo_udp_tunnel_add(dev
, &ti
);
449 static void geneve_notify_del_rx_port(struct geneve_sock
*gs
)
451 struct net_device
*dev
;
452 struct sock
*sk
= gs
->sock
->sk
;
453 struct net
*net
= sock_net(sk
);
454 sa_family_t sa_family
= geneve_get_sk_family(gs
);
457 for_each_netdev_rcu(net
, dev
) {
458 #ifdef HAVE_NDO_ADD_GENEVE_PORT
459 __be16 port
= inet_sk(sk
)->inet_sport
;
461 if (dev
->netdev_ops
->ndo_del_geneve_port
)
462 dev
->netdev_ops
->ndo_del_geneve_port(dev
, sa_family
,
464 #elif defined(HAVE_NDO_UDP_TUNNEL_ADD)
465 struct udp_tunnel_info ti
;
466 ti
.type
= UDP_TUNNEL_TYPE_GENEVE
;
467 ti
.port
= inet_sk(sk
)->inet_sport
;
468 ti
.sa_family
= sa_family
;
470 if (dev
->netdev_ops
->ndo_udp_tunnel_del
)
471 dev
->netdev_ops
->ndo_udp_tunnel_del(dev
, &ti
);
477 if (sa_family
== AF_INET
)
478 udp_del_offload(&gs
->udp_offloads
);
481 #if defined(HAVE_UDP_OFFLOAD) || \
482 defined(HAVE_UDP_TUNNEL_SOCK_CFG_GRO_RECEIVE)
484 static int geneve_hlen(struct genevehdr
*gh
)
486 return sizeof(*gh
) + gh
->opt_len
* 4;
489 #ifndef HAVE_UDP_OFFLOAD_ARG_UOFF
490 static struct sk_buff
**geneve_gro_receive(struct sk_buff
**head
,
493 static struct sk_buff
**geneve_gro_receive(struct sk_buff
**head
,
495 struct udp_offload
*uoff
)
498 struct sk_buff
*p
, **pp
= NULL
;
499 struct genevehdr
*gh
, *gh2
;
500 unsigned int hlen
, gh_len
, off_gnv
;
501 const struct packet_offload
*ptype
;
505 off_gnv
= skb_gro_offset(skb
);
506 hlen
= off_gnv
+ sizeof(*gh
);
507 gh
= skb_gro_header_fast(skb
, off_gnv
);
508 if (skb_gro_header_hard(skb
, hlen
)) {
509 gh
= skb_gro_header_slow(skb
, hlen
, off_gnv
);
514 if (gh
->ver
!= GENEVE_VER
|| gh
->oam
)
516 gh_len
= geneve_hlen(gh
);
518 hlen
= off_gnv
+ gh_len
;
519 if (skb_gro_header_hard(skb
, hlen
)) {
520 gh
= skb_gro_header_slow(skb
, hlen
, off_gnv
);
525 for (p
= *head
; p
; p
= p
->next
) {
526 if (!NAPI_GRO_CB(p
)->same_flow
)
529 gh2
= (struct genevehdr
*)(p
->data
+ off_gnv
);
530 if (gh
->opt_len
!= gh2
->opt_len
||
531 memcmp(gh
, gh2
, gh_len
)) {
532 NAPI_GRO_CB(p
)->same_flow
= 0;
537 type
= gh
->proto_type
;
540 ptype
= gro_find_receive_by_type(type
);
544 skb_gro_pull(skb
, gh_len
);
545 skb_gro_postpull_rcsum(skb
, gh
, gh_len
);
546 pp
= ptype
->callbacks
.gro_receive(head
, skb
);
552 NAPI_GRO_CB(skb
)->flush
|= flush
;
557 #ifndef HAVE_UDP_OFFLOAD_ARG_UOFF
558 static int geneve_gro_complete(struct sk_buff
*skb
, int nhoff
)
560 static int geneve_gro_complete(struct sk_buff
*skb
, int nhoff
,
561 struct udp_offload
*uoff
)
564 struct genevehdr
*gh
;
565 struct packet_offload
*ptype
;
570 udp_tunnel_gro_complete(skb
, nhoff
);
572 gh
= (struct genevehdr
*)(skb
->data
+ nhoff
);
573 gh_len
= geneve_hlen(gh
);
574 type
= gh
->proto_type
;
577 ptype
= gro_find_complete_by_type(type
);
579 err
= ptype
->callbacks
.gro_complete(skb
, nhoff
+ gh_len
);
583 skb_set_inner_mac_header(skb
, nhoff
+ gh_len
);
588 /* Create new listen socket if needed */
589 static struct geneve_sock
*geneve_socket_create(struct net
*net
, __be16 port
,
590 bool ipv6
, u32 flags
)
592 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
593 struct geneve_sock
*gs
;
595 struct udp_tunnel_sock_cfg tunnel_cfg
;
598 gs
= kzalloc(sizeof(*gs
), GFP_KERNEL
);
600 return ERR_PTR(-ENOMEM
);
602 sock
= geneve_create_sock(net
, ipv6
, port
, flags
);
605 return ERR_CAST(sock
);
610 for (h
= 0; h
< VNI_HASH_SIZE
; ++h
)
611 INIT_HLIST_HEAD(&gs
->vni_list
[h
]);
613 /* Initialize the geneve udp offloads structure */
614 #ifdef HAVE_UDP_OFFLOAD
615 gs
->udp_offloads
.port
= port
;
616 gs
->udp_offloads
.callbacks
.gro_receive
= geneve_gro_receive
;
617 gs
->udp_offloads
.callbacks
.gro_complete
= geneve_gro_complete
;
620 geneve_notify_add_rx_port(gs
);
621 /* Mark socket as an encapsulation socket */
622 memset(&tunnel_cfg
, 0, sizeof(tunnel_cfg
));
623 tunnel_cfg
.sk_user_data
= gs
;
624 tunnel_cfg
.encap_type
= 1;
625 #ifdef HAVE_UDP_TUNNEL_SOCK_CFG_GRO_RECEIVE
626 tunnel_cfg
.gro_receive
= geneve_gro_receive
;
627 tunnel_cfg
.gro_complete
= geneve_gro_complete
;
629 tunnel_cfg
.encap_rcv
= geneve_udp_encap_recv
;
630 tunnel_cfg
.encap_destroy
= NULL
;
631 setup_udp_tunnel_sock(net
, sock
, &tunnel_cfg
);
632 list_add(&gs
->list
, &gn
->sock_list
);
636 static void __geneve_sock_release(struct geneve_sock
*gs
)
638 if (!gs
|| --gs
->refcnt
)
642 geneve_notify_del_rx_port(gs
);
643 udp_tunnel_sock_release(gs
->sock
);
647 static void geneve_sock_release(struct geneve_dev
*geneve
)
649 struct geneve_sock
*gs4
= rtnl_dereference(geneve
->sock4
);
650 #if IS_ENABLED(CONFIG_IPV6)
651 struct geneve_sock
*gs6
= rtnl_dereference(geneve
->sock6
);
653 rcu_assign_pointer(geneve
->sock6
, NULL
);
656 rcu_assign_pointer(geneve
->sock4
, NULL
);
659 __geneve_sock_release(gs4
);
660 #if IS_ENABLED(CONFIG_IPV6)
661 __geneve_sock_release(gs6
);
665 static struct geneve_sock
*geneve_find_sock(struct geneve_net
*gn
,
669 struct geneve_sock
*gs
;
671 list_for_each_entry(gs
, &gn
->sock_list
, list
) {
672 if (inet_sk(gs
->sock
->sk
)->inet_sport
== dst_port
&&
673 geneve_get_sk_family(gs
) == family
) {
680 static int geneve_sock_add(struct geneve_dev
*geneve
, bool ipv6
)
682 struct net
*net
= geneve
->net
;
683 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
684 struct geneve_sock
*gs
;
687 gs
= geneve_find_sock(gn
, ipv6
? AF_INET6
: AF_INET
, geneve
->dst_port
);
693 gs
= geneve_socket_create(net
, geneve
->dst_port
, ipv6
, geneve
->flags
);
698 gs
->collect_md
= geneve
->collect_md
;
699 gs
->flags
= geneve
->flags
;
700 #if IS_ENABLED(CONFIG_IPV6)
702 rcu_assign_pointer(geneve
->sock6
, gs
);
705 rcu_assign_pointer(geneve
->sock4
, gs
);
707 hash
= geneve_net_vni_hash(geneve
->vni
);
708 hlist_add_head_rcu(&geneve
->hlist
, &gs
->vni_list
[hash
]);
712 static int geneve_open(struct net_device
*dev
)
714 struct geneve_dev
*geneve
= netdev_priv(dev
);
715 bool ipv6
= geneve
->remote
.sa
.sa_family
== AF_INET6
;
716 bool metadata
= geneve
->collect_md
;
719 #if IS_ENABLED(CONFIG_IPV6)
720 if (ipv6
|| metadata
)
721 ret
= geneve_sock_add(geneve
, true);
724 if (!ret
&& (!ipv6
|| metadata
))
725 ret
= geneve_sock_add(geneve
, false);
727 geneve_sock_release(geneve
);
732 static int geneve_stop(struct net_device
*dev
)
734 struct geneve_dev
*geneve
= netdev_priv(dev
);
736 if (!hlist_unhashed(&geneve
->hlist
))
737 hlist_del_rcu(&geneve
->hlist
);
738 geneve_sock_release(geneve
);
742 static void geneve_build_header(struct genevehdr
*geneveh
,
743 __be16 tun_flags
, u8 vni
[3],
744 u8 options_len
, u8
*options
)
746 geneveh
->ver
= GENEVE_VER
;
747 geneveh
->opt_len
= options_len
/ 4;
748 geneveh
->oam
= !!(tun_flags
& TUNNEL_OAM
);
749 geneveh
->critical
= !!(tun_flags
& TUNNEL_CRIT_OPT
);
751 memcpy(geneveh
->vni
, vni
, 3);
752 geneveh
->proto_type
= htons(ETH_P_TEB
);
755 memcpy(geneveh
->options
, options
, options_len
);
758 static int push_vlan_tag(struct sk_buff
*skb
)
760 if (skb_vlan_tag_present(skb
)) {
761 __be16 vlan_proto
= skb
->vlan_proto
;
764 err
= __vlan_insert_tag(skb
, skb
->vlan_proto
,
765 skb_vlan_tag_get(skb
));
770 skb
->protocol
= vlan_proto
;
775 static int geneve_build_skb(struct rtable
*rt
, struct sk_buff
*skb
,
776 __be16 tun_flags
, u8 vni
[3], u8 opt_len
, u8
*opt
,
777 u32 flags
, bool xnet
)
779 struct genevehdr
*gnvh
;
782 bool udp_sum
= !(flags
& GENEVE_F_UDP_ZERO_CSUM_TX
);
784 skb_scrub_packet(skb
, xnet
);
786 min_headroom
= LL_RESERVED_SPACE(rt
->dst
.dev
) + rt
->dst
.header_len
787 + GENEVE_BASE_HLEN
+ opt_len
+ sizeof(struct iphdr
)
788 + (skb_vlan_tag_present(skb
) ? VLAN_HLEN
: 0);
790 err
= skb_cow_head(skb
, min_headroom
);
794 err
= push_vlan_tag(skb
);
798 err
= udp_tunnel_handle_offloads(skb
, udp_sum
);
802 gnvh
= (struct genevehdr
*)__skb_push(skb
, sizeof(*gnvh
) + opt_len
);
803 geneve_build_header(gnvh
, tun_flags
, vni
, opt_len
, opt
);
805 ovs_skb_set_inner_protocol(skb
, htons(ETH_P_TEB
));
813 #if IS_ENABLED(CONFIG_IPV6)
814 static int geneve6_build_skb(struct dst_entry
*dst
, struct sk_buff
*skb
,
815 __be16 tun_flags
, u8 vni
[3], u8 opt_len
, u8
*opt
,
816 u32 flags
, bool xnet
)
818 struct genevehdr
*gnvh
;
821 bool udp_sum
= !(flags
& GENEVE_F_UDP_ZERO_CSUM6_TX
);
823 skb_scrub_packet(skb
, xnet
);
825 min_headroom
= LL_RESERVED_SPACE(dst
->dev
) + dst
->header_len
826 + GENEVE_BASE_HLEN
+ opt_len
+ sizeof(struct ipv6hdr
)
827 + (skb_vlan_tag_present(skb
) ? VLAN_HLEN
: 0);
829 err
= skb_cow_head(skb
, min_headroom
);
833 err
= push_vlan_tag(skb
);
837 err
= udp_tunnel_handle_offloads(skb
, udp_sum
);
841 gnvh
= (struct genevehdr
*)__skb_push(skb
, sizeof(*gnvh
) + opt_len
);
842 geneve_build_header(gnvh
, tun_flags
, vni
, opt_len
, opt
);
844 ovs_skb_set_inner_protocol(skb
, htons(ETH_P_TEB
));
853 static struct rtable
*geneve_get_v4_rt(struct sk_buff
*skb
,
854 struct net_device
*dev
,
856 struct ip_tunnel_info
*info
,
857 __be16 dport
, __be16 sport
)
859 bool use_cache
= ip_tunnel_dst_cache_usable(skb
, info
);
860 struct geneve_dev
*geneve
= netdev_priv(dev
);
861 struct dst_cache
*dst_cache
;
862 struct rtable
*rt
= NULL
;
865 if (!rcu_dereference(geneve
->sock4
))
866 return ERR_PTR(-EIO
);
868 memset(fl4
, 0, sizeof(*fl4
));
869 fl4
->flowi4_mark
= skb
->mark
;
870 fl4
->flowi4_proto
= IPPROTO_UDP
;
871 fl4
->fl4_dport
= dport
;
872 fl4
->fl4_sport
= sport
;
875 fl4
->daddr
= info
->key
.u
.ipv4
.dst
;
876 fl4
->saddr
= info
->key
.u
.ipv4
.src
;
877 fl4
->flowi4_tos
= RT_TOS(info
->key
.tos
);
878 dst_cache
= &info
->dst_cache
;
882 const struct iphdr
*iip
= ip_hdr(skb
);
884 tos
= ip_tunnel_get_dsfield(iip
, skb
);
888 fl4
->flowi4_tos
= RT_TOS(tos
);
889 fl4
->daddr
= geneve
->remote
.sin
.sin_addr
.s_addr
;
890 dst_cache
= &geneve
->dst_cache
;
894 rt
= dst_cache_get_ip4(dst_cache
, &fl4
->saddr
);
899 rt
= ip_route_output_key(geneve
->net
, fl4
);
901 netdev_dbg(dev
, "no route to %pI4\n", &fl4
->daddr
);
902 return ERR_PTR(-ENETUNREACH
);
904 if (rt
->dst
.dev
== dev
) { /* is this necessary? */
905 netdev_dbg(dev
, "circular route to %pI4\n", &fl4
->daddr
);
907 return ERR_PTR(-ELOOP
);
910 dst_cache_set_ip4(dst_cache
, &rt
->dst
, fl4
->saddr
);
914 #if IS_ENABLED(CONFIG_IPV6)
915 static struct dst_entry
*geneve_get_v6_dst(struct sk_buff
*skb
,
916 struct net_device
*dev
,
918 struct ip_tunnel_info
*info
,
919 __be16 dport
, __be16 sport
)
921 bool use_cache
= ip_tunnel_dst_cache_usable(skb
, info
);
922 struct geneve_dev
*geneve
= netdev_priv(dev
);
923 struct dst_entry
*dst
= NULL
;
924 struct dst_cache
*dst_cache
;
925 struct geneve_sock
*gs6
;
928 gs6
= rcu_dereference(geneve
->sock6
);
930 return ERR_PTR(-EIO
);
932 memset(fl6
, 0, sizeof(*fl6
));
933 fl6
->flowi6_mark
= skb
->mark
;
934 fl6
->flowi6_proto
= IPPROTO_UDP
;
935 fl6
->fl6_dport
= dport
;
936 fl6
->fl6_sport
= sport
;
939 fl6
->daddr
= info
->key
.u
.ipv6
.dst
;
940 fl6
->saddr
= info
->key
.u
.ipv6
.src
;
941 fl6
->flowlabel
= ip6_make_flowinfo(RT_TOS(info
->key
.tos
),
943 dst_cache
= &info
->dst_cache
;
947 const struct iphdr
*iip
= ip_hdr(skb
);
949 prio
= ip_tunnel_get_dsfield(iip
, skb
);
953 fl6
->flowlabel
= ip6_make_flowinfo(RT_TOS(prio
),
955 fl6
->daddr
= geneve
->remote
.sin6
.sin6_addr
;
956 dst_cache
= &geneve
->dst_cache
;
960 dst
= dst_cache_get_ip6(dst_cache
, &fl6
->saddr
);
965 #ifdef HAVE_IPV6_DST_LOOKUP_NET
966 if (ipv6_stub
->ipv6_dst_lookup(geneve
->net
, gs6
->sock
->sk
, &dst
, fl6
)) {
968 #ifdef HAVE_IPV6_STUB
969 if (ipv6_stub
->ipv6_dst_lookup(gs6
->sock
->sk
, &dst
, fl6
)) {
971 if (ip6_dst_lookup(gs6
->sock
->sk
, &dst
, fl6
)) {
974 netdev_dbg(dev
, "no route to %pI6\n", &fl6
->daddr
);
975 return ERR_PTR(-ENETUNREACH
);
977 if (dst
->dev
== dev
) { /* is this necessary? */
978 netdev_dbg(dev
, "circular route to %pI6\n", &fl6
->daddr
);
980 return ERR_PTR(-ELOOP
);
984 dst_cache_set_ip6(dst_cache
, dst
, &fl6
->saddr
);
989 /* Convert 64 bit tunnel ID to 24 bit VNI. */
990 static void tunnel_id_to_vni(__be64 tun_id
, __u8
*vni
)
993 vni
[0] = (__force __u8
)(tun_id
>> 16);
994 vni
[1] = (__force __u8
)(tun_id
>> 8);
995 vni
[2] = (__force __u8
)tun_id
;
997 vni
[0] = (__force __u8
)((__force u64
)tun_id
>> 40);
998 vni
[1] = (__force __u8
)((__force u64
)tun_id
>> 48);
999 vni
[2] = (__force __u8
)((__force u64
)tun_id
>> 56);
1003 static netdev_tx_t
geneve_xmit_skb(struct sk_buff
*skb
, struct net_device
*dev
,
1004 struct ip_tunnel_info
*info
)
1006 struct geneve_dev
*geneve
= netdev_priv(dev
);
1007 struct geneve_sock
*gs4
;
1008 struct rtable
*rt
= NULL
;
1009 const struct iphdr
*iip
; /* interior IP header */
1015 bool xnet
= !net_eq(geneve
->net
, dev_net(geneve
->dev
));
1016 u32 flags
= geneve
->flags
;
1018 gs4
= rcu_dereference(geneve
->sock4
);
1022 if (geneve
->collect_md
) {
1023 if (unlikely(!info
|| !(info
->mode
& IP_TUNNEL_INFO_TX
))) {
1024 netdev_dbg(dev
, "no tunnel metadata\n");
1027 if (info
&& ip_tunnel_info_af(info
) != AF_INET
)
1031 sport
= udp_flow_src_port(geneve
->net
, skb
, 1, USHRT_MAX
, true);
1032 rt
= geneve_get_v4_rt(skb
, dev
, &fl4
, info
, geneve
->dst_port
, sport
);
1038 skb_reset_mac_header(skb
);
1043 const struct ip_tunnel_key
*key
= &info
->key
;
1047 tunnel_id_to_vni(key
->tun_id
, vni
);
1048 if (info
->options_len
)
1049 opts
= ip_tunnel_info_opts(info
);
1051 if (key
->tun_flags
& TUNNEL_CSUM
)
1052 flags
&= ~GENEVE_F_UDP_ZERO_CSUM_TX
;
1054 flags
|= GENEVE_F_UDP_ZERO_CSUM_TX
;
1056 err
= geneve_build_skb(rt
, skb
, key
->tun_flags
, vni
,
1057 info
->options_len
, opts
, flags
, xnet
);
1061 tos
= ip_tunnel_ecn_encap(key
->tos
, iip
, skb
);
1063 df
= key
->tun_flags
& TUNNEL_DONT_FRAGMENT
? htons(IP_DF
) : 0;
1065 err
= geneve_build_skb(rt
, skb
, 0, geneve
->vni
,
1066 0, NULL
, flags
, xnet
);
1070 tos
= ip_tunnel_ecn_encap(fl4
.flowi4_tos
, iip
, skb
);
1072 if (!ttl
&& IN_MULTICAST(ntohl(fl4
.daddr
)))
1074 ttl
= ttl
? : ip4_dst_hoplimit(&rt
->dst
);
1077 udp_tunnel_xmit_skb(rt
, gs4
->sock
->sk
, skb
, fl4
.saddr
, fl4
.daddr
,
1078 tos
, ttl
, df
, sport
, geneve
->dst_port
,
1079 !net_eq(geneve
->net
, dev_net(geneve
->dev
)),
1080 !!(flags
& GENEVE_F_UDP_ZERO_CSUM_TX
));
1082 return NETDEV_TX_OK
;
1088 dev
->stats
.collisions
++;
1089 else if (err
== -ENETUNREACH
)
1090 dev
->stats
.tx_carrier_errors
++;
1092 dev
->stats
.tx_errors
++;
1093 return NETDEV_TX_OK
;
1096 #if IS_ENABLED(CONFIG_IPV6)
1097 static netdev_tx_t
geneve6_xmit_skb(struct sk_buff
*skb
, struct net_device
*dev
,
1098 struct ip_tunnel_info
*info
)
1100 struct geneve_dev
*geneve
= netdev_priv(dev
);
1101 struct dst_entry
*dst
= NULL
;
1102 const struct iphdr
*iip
; /* interior IP header */
1103 struct geneve_sock
*gs6
;
1109 bool xnet
= !net_eq(geneve
->net
, dev_net(geneve
->dev
));
1110 u32 flags
= geneve
->flags
;
1112 gs6
= rcu_dereference(geneve
->sock6
);
1116 if (geneve
->collect_md
) {
1117 if (unlikely(!info
|| !(info
->mode
& IP_TUNNEL_INFO_TX
))) {
1118 netdev_dbg(dev
, "no tunnel metadata\n");
1123 sport
= udp_flow_src_port(geneve
->net
, skb
, 1, USHRT_MAX
, true);
1124 dst
= geneve_get_v6_dst(skb
, dev
, &fl6
, info
, geneve
->dst_port
, sport
);
1130 skb_reset_mac_header(skb
);
1135 const struct ip_tunnel_key
*key
= &info
->key
;
1139 tunnel_id_to_vni(key
->tun_id
, vni
);
1140 if (info
->options_len
)
1141 opts
= ip_tunnel_info_opts(info
);
1143 if (key
->tun_flags
& TUNNEL_CSUM
)
1144 flags
&= ~GENEVE_F_UDP_ZERO_CSUM6_TX
;
1146 flags
|= GENEVE_F_UDP_ZERO_CSUM6_TX
;
1148 err
= geneve6_build_skb(dst
, skb
, key
->tun_flags
, vni
,
1149 info
->options_len
, opts
,
1154 prio
= ip_tunnel_ecn_encap(key
->tos
, iip
, skb
);
1156 label
= info
->key
.label
;
1158 err
= geneve6_build_skb(dst
, skb
, 0, geneve
->vni
,
1159 0, NULL
, flags
, xnet
);
1163 prio
= ip_tunnel_ecn_encap(ip6_tclass(fl6
.flowlabel
),
1166 if (!ttl
&& ipv6_addr_is_multicast(&fl6
.daddr
))
1168 ttl
= ttl
? : ip6_dst_hoplimit(dst
);
1169 label
= geneve
->label
;
1171 udp_tunnel6_xmit_skb(dst
, gs6
->sock
->sk
, skb
, dev
,
1172 &fl6
.saddr
, &fl6
.daddr
, prio
, ttl
, label
,
1173 sport
, geneve
->dst_port
,
1174 !!(flags
& GENEVE_F_UDP_ZERO_CSUM6_TX
));
1175 return NETDEV_TX_OK
;
1181 dev
->stats
.collisions
++;
1182 else if (err
== -ENETUNREACH
)
1183 dev
->stats
.tx_carrier_errors
++;
1185 dev
->stats
.tx_errors
++;
1186 return NETDEV_TX_OK
;
1190 netdev_tx_t
rpl_geneve_xmit(struct sk_buff
*skb
)
1192 struct net_device
*dev
= skb
->dev
;
1193 struct geneve_dev
*geneve
= netdev_priv(dev
);
1194 struct ip_tunnel_info
*info
= NULL
;
1196 if (geneve
->collect_md
)
1197 info
= skb_tunnel_info(skb
);
1199 #if IS_ENABLED(CONFIG_IPV6)
1200 if ((info
&& ip_tunnel_info_af(info
) == AF_INET6
) ||
1201 (!info
&& geneve
->remote
.sa
.sa_family
== AF_INET6
))
1202 return geneve6_xmit_skb(skb
, dev
, info
);
1204 return geneve_xmit_skb(skb
, dev
, info
);
1206 EXPORT_SYMBOL_GPL(rpl_geneve_xmit
);
1208 static netdev_tx_t
geneve_dev_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
1210 /* Drop All packets coming from networking stack. OVS-CB is
1211 * not initialized for these packets.
1215 dev
->stats
.tx_dropped
++;
1216 return NETDEV_TX_OK
;
1219 static int __geneve_change_mtu(struct net_device
*dev
, int new_mtu
, bool strict
)
1221 struct geneve_dev
*geneve
= netdev_priv(dev
);
1222 /* The max_mtu calculation does not take account of GENEVE
1223 * options, to avoid excluding potentially valid
1226 int max_mtu
= IP_MAX_MTU
- GENEVE_BASE_HLEN
- dev
->hard_header_len
;
1228 if (geneve
->remote
.sa
.sa_family
== AF_INET6
)
1229 max_mtu
-= sizeof(struct ipv6hdr
);
1231 max_mtu
-= sizeof(struct iphdr
);
1236 if (new_mtu
> max_mtu
) {
1247 static int geneve_change_mtu(struct net_device
*dev
, int new_mtu
)
1249 return __geneve_change_mtu(dev
, new_mtu
, true);
1252 int ovs_geneve_fill_metadata_dst(struct net_device
*dev
, struct sk_buff
*skb
)
1254 struct ip_tunnel_info
*info
= skb_tunnel_info(skb
);
1255 struct geneve_dev
*geneve
= netdev_priv(dev
);
1259 #if IS_ENABLED(CONFIG_IPV6)
1260 struct dst_entry
*dst
;
1264 sport
= udp_flow_src_port(geneve
->net
, skb
,
1265 1, USHRT_MAX
, true);
1267 if (ip_tunnel_info_af(info
) == AF_INET
) {
1268 rt
= geneve_get_v4_rt(skb
, dev
, &fl4
, info
, geneve
->dst_port
, sport
);
1273 info
->key
.u
.ipv4
.src
= fl4
.saddr
;
1274 #if IS_ENABLED(CONFIG_IPV6)
1275 } else if (ip_tunnel_info_af(info
) == AF_INET6
) {
1276 dst
= geneve_get_v6_dst(skb
, dev
, &fl6
, info
, geneve
->dst_port
, sport
);
1278 return PTR_ERR(dst
);
1281 info
->key
.u
.ipv6
.src
= fl6
.saddr
;
1287 info
->key
.tp_src
= sport
;
1288 info
->key
.tp_dst
= geneve
->dst_port
;
1291 EXPORT_SYMBOL_GPL(ovs_geneve_fill_metadata_dst
);
1293 static const struct net_device_ops geneve_netdev_ops
= {
1294 .ndo_init
= geneve_init
,
1295 .ndo_uninit
= geneve_uninit
,
1296 .ndo_open
= geneve_open
,
1297 .ndo_stop
= geneve_stop
,
1298 .ndo_start_xmit
= geneve_dev_xmit
,
1299 .ndo_get_stats64
= ip_tunnel_get_stats64
,
1300 #ifdef HAVE_RHEL7_MAX_MTU
1301 .ndo_size
= sizeof(struct net_device_ops
),
1302 .extended
.ndo_change_mtu
= geneve_change_mtu
,
1304 .ndo_change_mtu
= geneve_change_mtu
,
1306 .ndo_validate_addr
= eth_validate_addr
,
1307 .ndo_set_mac_address
= eth_mac_addr
,
1308 #ifdef HAVE_NDO_FILL_METADATA_DST
1309 .ndo_fill_metadata_dst
= geneve_fill_metadata_dst
,
1313 static void geneve_get_drvinfo(struct net_device
*dev
,
1314 struct ethtool_drvinfo
*drvinfo
)
1316 strlcpy(drvinfo
->version
, GENEVE_NETDEV_VER
, sizeof(drvinfo
->version
));
1317 strlcpy(drvinfo
->driver
, "geneve", sizeof(drvinfo
->driver
));
1320 static const struct ethtool_ops geneve_ethtool_ops
= {
1321 .get_drvinfo
= geneve_get_drvinfo
,
1322 .get_link
= ethtool_op_get_link
,
1325 /* Info for udev, that this is a virtual tunnel endpoint */
1326 static struct device_type geneve_type
= {
1330 /* Calls the ndo_add_geneve_port or ndo_udp_tunnel_add of the caller
1331 * in order to supply the listening GENEVE udp ports. Callers are
1332 * expected to implement the ndo_add_geneve_port.
1334 static void geneve_push_rx_ports(struct net_device
*dev
)
1336 #ifdef HAVE_NDO_ADD_GENEVE_PORT
1337 struct net
*net
= dev_net(dev
);
1338 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
1339 struct geneve_sock
*gs
;
1340 sa_family_t sa_family
;
1344 if (!dev
->netdev_ops
->ndo_add_geneve_port
)
1348 list_for_each_entry_rcu(gs
, &gn
->sock_list
, list
) {
1350 sa_family
= sk
->sk_family
;
1351 port
= inet_sk(sk
)->inet_sport
;
1352 dev
->netdev_ops
->ndo_add_geneve_port(dev
, sa_family
, port
);
1355 #elif defined(HAVE_NDO_UDP_TUNNEL_ADD)
1356 struct net
*net
= dev_net(dev
);
1357 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
1358 struct geneve_sock
*gs
;
1361 if (!dev
->netdev_ops
->ndo_udp_tunnel_add
)
1365 list_for_each_entry_rcu(gs
, &gn
->sock_list
, list
) {
1366 struct udp_tunnel_info ti
;
1367 ti
.type
= UDP_TUNNEL_TYPE_GENEVE
;
1369 ti
.port
= inet_sk(sk
)->inet_sport
;
1370 ti
.sa_family
= sk
->sk_family
;
1371 dev
->netdev_ops
->ndo_udp_tunnel_add(dev
, &ti
);
1377 /* Initialize the device structure. */
1378 static void geneve_setup(struct net_device
*dev
)
1382 dev
->netdev_ops
= &geneve_netdev_ops
;
1383 dev
->ethtool_ops
= &geneve_ethtool_ops
;
1384 #ifndef HAVE_NEEDS_FREE_NETDEV
1385 dev
->destructor
= free_netdev
;
1387 dev
->needs_free_netdev
= true;
1390 SET_NETDEV_DEVTYPE(dev
, &geneve_type
);
1392 dev
->features
|= NETIF_F_LLTX
;
1393 dev
->features
|= NETIF_F_SG
| NETIF_F_HW_CSUM
;
1394 dev
->features
|= NETIF_F_RXCSUM
;
1395 dev
->features
|= NETIF_F_GSO_SOFTWARE
;
1397 dev
->hw_features
|= NETIF_F_SG
| NETIF_F_HW_CSUM
| NETIF_F_RXCSUM
;
1398 dev
->hw_features
|= NETIF_F_GSO_SOFTWARE
;
1401 netif_keep_dst(dev
);
1403 dev
->priv_flags
&= ~IFF_TX_SKB_SHARING
;
1404 dev
->priv_flags
|= IFF_LIVE_ADDR_CHANGE
| IFF_NO_QUEUE
;
1405 eth_hw_addr_random(dev
);
1408 static const struct nla_policy geneve_policy
[IFLA_GENEVE_MAX
+ 1] = {
1409 [IFLA_GENEVE_ID
] = { .type
= NLA_U32
},
1410 [IFLA_GENEVE_REMOTE
] = { .len
= sizeof_field(struct iphdr
, daddr
) },
1411 [IFLA_GENEVE_REMOTE6
] = { .len
= sizeof(struct in6_addr
) },
1412 [IFLA_GENEVE_TTL
] = { .type
= NLA_U8
},
1413 [IFLA_GENEVE_TOS
] = { .type
= NLA_U8
},
1414 [IFLA_GENEVE_LABEL
] = { .type
= NLA_U32
},
1415 [IFLA_GENEVE_PORT
] = { .type
= NLA_U16
},
1416 [IFLA_GENEVE_COLLECT_METADATA
] = { .type
= NLA_FLAG
},
1417 [IFLA_GENEVE_UDP_CSUM
] = { .type
= NLA_U8
},
1418 [IFLA_GENEVE_UDP_ZERO_CSUM6_TX
] = { .type
= NLA_U8
},
1419 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX
] = { .type
= NLA_U8
},
1422 #ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
1423 static int geneve_validate(struct nlattr
*tb
[], struct nlattr
*data
[],
1424 struct netlink_ext_ack
*extack
)
1426 static int geneve_validate(struct nlattr
*tb
[], struct nlattr
*data
[])
1429 if (tb
[IFLA_ADDRESS
]) {
1430 if (nla_len(tb
[IFLA_ADDRESS
]) != ETH_ALEN
)
1433 if (!is_valid_ether_addr(nla_data(tb
[IFLA_ADDRESS
])))
1434 return -EADDRNOTAVAIL
;
1440 if (data
[IFLA_GENEVE_ID
]) {
1441 __u32 vni
= nla_get_u32(data
[IFLA_GENEVE_ID
]);
1443 if (vni
>= GENEVE_VID_MASK
)
1450 static struct geneve_dev
*geneve_find_dev(struct geneve_net
*gn
,
1452 union geneve_addr
*remote
,
1454 bool *tun_on_same_port
,
1455 bool *tun_collect_md
)
1457 struct geneve_dev
*geneve
, *t
;
1459 *tun_on_same_port
= false;
1460 *tun_collect_md
= false;
1462 list_for_each_entry(geneve
, &gn
->geneve_list
, next
) {
1463 if (geneve
->dst_port
== dst_port
) {
1464 *tun_collect_md
= geneve
->collect_md
;
1465 *tun_on_same_port
= true;
1467 if (!memcmp(vni
, geneve
->vni
, sizeof(geneve
->vni
)) &&
1468 !memcmp(remote
, &geneve
->remote
, sizeof(geneve
->remote
)) &&
1469 dst_port
== geneve
->dst_port
)
1475 static int geneve_configure(struct net
*net
, struct net_device
*dev
,
1476 union geneve_addr
*remote
,
1477 __u32 vni
, __u8 ttl
, __u8 tos
, __be32 label
,
1478 __be16 dst_port
, bool metadata
, u32 flags
)
1480 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
1481 struct geneve_dev
*t
, *geneve
= netdev_priv(dev
);
1482 bool tun_collect_md
, tun_on_same_port
;
1488 (remote
->sa
.sa_family
!= AF_UNSPEC
|| vni
|| tos
|| ttl
|| label
))
1494 geneve
->vni
[0] = (vni
& 0x00ff0000) >> 16;
1495 geneve
->vni
[1] = (vni
& 0x0000ff00) >> 8;
1496 geneve
->vni
[2] = vni
& 0x000000ff;
1498 if ((remote
->sa
.sa_family
== AF_INET
&&
1499 IN_MULTICAST(ntohl(remote
->sin
.sin_addr
.s_addr
))) ||
1500 (remote
->sa
.sa_family
== AF_INET6
&&
1501 ipv6_addr_is_multicast(&remote
->sin6
.sin6_addr
)))
1503 if (label
&& remote
->sa
.sa_family
!= AF_INET6
)
1506 geneve
->remote
= *remote
;
1510 geneve
->label
= label
;
1511 geneve
->dst_port
= dst_port
;
1512 geneve
->collect_md
= metadata
;
1513 geneve
->flags
= flags
;
1515 t
= geneve_find_dev(gn
, dst_port
, remote
, geneve
->vni
,
1516 &tun_on_same_port
, &tun_collect_md
);
1520 /* make enough headroom for basic scenario */
1521 encap_len
= GENEVE_BASE_HLEN
+ ETH_HLEN
;
1522 if (remote
->sa
.sa_family
== AF_INET
)
1523 encap_len
+= sizeof(struct iphdr
);
1525 encap_len
+= sizeof(struct ipv6hdr
);
1526 dev
->needed_headroom
= encap_len
+ ETH_HLEN
;
1529 if (tun_on_same_port
)
1536 dst_cache_reset(&geneve
->dst_cache
);
1538 err
= register_netdevice(dev
);
1542 list_add(&geneve
->next
, &gn
->geneve_list
);
1546 #ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
1547 static int geneve_newlink(struct net
*net
, struct net_device
*dev
,
1548 struct nlattr
*tb
[], struct nlattr
*data
[],
1549 struct netlink_ext_ack
*extack
)
1551 static int geneve_newlink(struct net
*net
, struct net_device
*dev
,
1552 struct nlattr
*tb
[], struct nlattr
*data
[])
1555 __be16 dst_port
= htons(GENEVE_UDP_PORT
);
1556 __u8 ttl
= 0, tos
= 0;
1557 bool metadata
= false;
1558 union geneve_addr remote
= geneve_remote_unspec
;
1563 if (data
[IFLA_GENEVE_REMOTE
] && data
[IFLA_GENEVE_REMOTE6
])
1566 if (data
[IFLA_GENEVE_REMOTE
]) {
1567 remote
.sa
.sa_family
= AF_INET
;
1568 remote
.sin
.sin_addr
.s_addr
=
1569 nla_get_in_addr(data
[IFLA_GENEVE_REMOTE
]);
1572 if (data
[IFLA_GENEVE_REMOTE6
]) {
1573 if (!IS_ENABLED(CONFIG_IPV6
))
1574 return -EPFNOSUPPORT
;
1576 remote
.sa
.sa_family
= AF_INET6
;
1577 remote
.sin6
.sin6_addr
=
1578 nla_get_in6_addr(data
[IFLA_GENEVE_REMOTE6
]);
1580 if (ipv6_addr_type(&remote
.sin6
.sin6_addr
) &
1581 IPV6_ADDR_LINKLOCAL
) {
1582 netdev_dbg(dev
, "link-local remote is unsupported\n");
1587 if (data
[IFLA_GENEVE_ID
])
1588 vni
= nla_get_u32(data
[IFLA_GENEVE_ID
]);
1590 if (data
[IFLA_GENEVE_TTL
])
1591 ttl
= nla_get_u8(data
[IFLA_GENEVE_TTL
]);
1593 if (data
[IFLA_GENEVE_TOS
])
1594 tos
= nla_get_u8(data
[IFLA_GENEVE_TOS
]);
1596 if (data
[IFLA_GENEVE_LABEL
])
1597 label
= nla_get_be32(data
[IFLA_GENEVE_LABEL
]) &
1598 IPV6_FLOWLABEL_MASK
;
1600 if (data
[IFLA_GENEVE_PORT
])
1601 dst_port
= nla_get_be16(data
[IFLA_GENEVE_PORT
]);
1603 if (data
[IFLA_GENEVE_COLLECT_METADATA
])
1606 if (data
[IFLA_GENEVE_UDP_CSUM
] &&
1607 !nla_get_u8(data
[IFLA_GENEVE_UDP_CSUM
]))
1608 flags
|= GENEVE_F_UDP_ZERO_CSUM_TX
;
1610 if (data
[IFLA_GENEVE_UDP_ZERO_CSUM6_TX
] &&
1611 nla_get_u8(data
[IFLA_GENEVE_UDP_ZERO_CSUM6_TX
]))
1612 flags
|= GENEVE_F_UDP_ZERO_CSUM6_TX
;
1614 if (data
[IFLA_GENEVE_UDP_ZERO_CSUM6_RX
] &&
1615 nla_get_u8(data
[IFLA_GENEVE_UDP_ZERO_CSUM6_RX
]))
1616 flags
|= GENEVE_F_UDP_ZERO_CSUM6_RX
;
1618 return geneve_configure(net
, dev
, &remote
, vni
, ttl
, tos
, label
,
1619 dst_port
, metadata
, flags
);
1622 static void geneve_dellink(struct net_device
*dev
, struct list_head
*head
)
1624 struct geneve_dev
*geneve
= netdev_priv(dev
);
1626 list_del(&geneve
->next
);
1627 unregister_netdevice_queue(dev
, head
);
1630 static size_t geneve_get_size(const struct net_device
*dev
)
1632 return nla_total_size(sizeof(__u32
)) + /* IFLA_GENEVE_ID */
1633 nla_total_size(sizeof(struct in6_addr
)) + /* IFLA_GENEVE_REMOTE{6} */
1634 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_TTL */
1635 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_TOS */
1636 nla_total_size(sizeof(__be32
)) + /* IFLA_GENEVE_LABEL */
1637 nla_total_size(sizeof(__be16
)) + /* IFLA_GENEVE_PORT */
1638 nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */
1639 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_UDP_CSUM */
1640 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */
1641 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */
1645 static int geneve_fill_info(struct sk_buff
*skb
, const struct net_device
*dev
)
1647 struct geneve_dev
*geneve
= netdev_priv(dev
);
1650 vni
= (geneve
->vni
[0] << 16) | (geneve
->vni
[1] << 8) | geneve
->vni
[2];
1651 if (nla_put_u32(skb
, IFLA_GENEVE_ID
, vni
))
1652 goto nla_put_failure
;
1654 if (geneve
->remote
.sa
.sa_family
== AF_INET
) {
1655 if (nla_put_in_addr(skb
, IFLA_GENEVE_REMOTE
,
1656 geneve
->remote
.sin
.sin_addr
.s_addr
))
1657 goto nla_put_failure
;
1658 #if IS_ENABLED(CONFIG_IPV6)
1660 if (nla_put_in6_addr(skb
, IFLA_GENEVE_REMOTE6
,
1661 &geneve
->remote
.sin6
.sin6_addr
))
1662 goto nla_put_failure
;
1666 if (nla_put_u8(skb
, IFLA_GENEVE_TTL
, geneve
->ttl
) ||
1667 nla_put_u8(skb
, IFLA_GENEVE_TOS
, geneve
->tos
) ||
1668 nla_put_be32(skb
, IFLA_GENEVE_LABEL
, geneve
->label
))
1669 goto nla_put_failure
;
1671 if (nla_put_be16(skb
, IFLA_GENEVE_PORT
, geneve
->dst_port
))
1672 goto nla_put_failure
;
1674 if (geneve
->collect_md
) {
1675 if (nla_put_flag(skb
, IFLA_GENEVE_COLLECT_METADATA
))
1676 goto nla_put_failure
;
1679 if (nla_put_u8(skb
, IFLA_GENEVE_UDP_CSUM
,
1680 !(geneve
->flags
& GENEVE_F_UDP_ZERO_CSUM_TX
)) ||
1681 nla_put_u8(skb
, IFLA_GENEVE_UDP_ZERO_CSUM6_TX
,
1682 !!(geneve
->flags
& GENEVE_F_UDP_ZERO_CSUM6_TX
)) ||
1683 nla_put_u8(skb
, IFLA_GENEVE_UDP_ZERO_CSUM6_RX
,
1684 !!(geneve
->flags
& GENEVE_F_UDP_ZERO_CSUM6_RX
)))
1685 goto nla_put_failure
;
1693 static struct rtnl_link_ops geneve_link_ops __read_mostly
= {
1694 .kind
= "ovs_geneve",
1695 .maxtype
= IFLA_GENEVE_MAX
,
1696 .policy
= geneve_policy
,
1697 .priv_size
= sizeof(struct geneve_dev
),
1698 .setup
= geneve_setup
,
1699 .validate
= geneve_validate
,
1700 .newlink
= geneve_newlink
,
1701 .dellink
= geneve_dellink
,
1702 .get_size
= geneve_get_size
,
1703 .fill_info
= geneve_fill_info
,
1706 struct net_device
*rpl_geneve_dev_create_fb(struct net
*net
, const char *name
,
1707 u8 name_assign_type
, u16 dst_port
)
1709 struct nlattr
*tb
[IFLA_MAX
+ 1];
1710 struct net_device
*dev
;
1711 LIST_HEAD(list_kill
);
1714 memset(tb
, 0, sizeof(tb
));
1715 dev
= rtnl_create_link(net
, name
, name_assign_type
,
1716 &geneve_link_ops
, tb
);
1720 err
= geneve_configure(net
, dev
, &geneve_remote_unspec
,
1721 0, 0, 0, 0, htons(dst_port
), true,
1722 GENEVE_F_UDP_ZERO_CSUM6_RX
);
1725 return ERR_PTR(err
);
1728 /* openvswitch users expect packet sizes to be unrestricted,
1729 * so set the largest MTU we can.
1731 err
= __geneve_change_mtu(dev
, IP_MAX_MTU
, false);
1735 err
= rtnl_configure_link(dev
, NULL
);
1742 geneve_dellink(dev
, &list_kill
);
1743 unregister_netdevice_many(&list_kill
);
1744 return ERR_PTR(err
);
1746 EXPORT_SYMBOL_GPL(rpl_geneve_dev_create_fb
);
1748 static int geneve_netdevice_event(struct notifier_block
*unused
,
1749 unsigned long event
, void *ptr
)
1751 struct net_device
*dev
= netdev_notifier_info_to_dev(ptr
);
1753 if (event
== NETDEV_OFFLOAD_PUSH_GENEVE
)
1754 geneve_push_rx_ports(dev
);
1759 static struct notifier_block geneve_notifier_block __read_mostly
= {
1760 .notifier_call
= geneve_netdevice_event
,
1763 static __net_init
int geneve_init_net(struct net
*net
)
1765 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
1767 INIT_LIST_HEAD(&gn
->geneve_list
);
1768 INIT_LIST_HEAD(&gn
->sock_list
);
1772 static void __net_exit
geneve_exit_net(struct net
*net
)
1774 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
1775 struct geneve_dev
*geneve
, *next
;
1776 struct net_device
*dev
, *aux
;
1781 /* gather any geneve devices that were moved into this ns */
1782 for_each_netdev_safe(net
, dev
, aux
)
1783 if (dev
->rtnl_link_ops
== &geneve_link_ops
)
1784 unregister_netdevice_queue(dev
, &list
);
1786 /* now gather any other geneve devices that were created in this ns */
1787 list_for_each_entry_safe(geneve
, next
, &gn
->geneve_list
, next
) {
1788 /* If geneve->dev is in the same netns, it was already added
1789 * to the list by the previous loop.
1791 if (!net_eq(dev_net(geneve
->dev
), net
))
1792 unregister_netdevice_queue(geneve
->dev
, &list
);
1795 /* unregister the devices gathered above */
1796 unregister_netdevice_many(&list
);
1800 static struct pernet_operations geneve_net_ops
= {
1801 .init
= geneve_init_net
,
1802 .exit
= geneve_exit_net
,
1803 .id
= &geneve_net_id
,
1804 .size
= sizeof(struct geneve_net
),
1807 int rpl_geneve_init_module(void)
1811 rc
= register_pernet_subsys(&geneve_net_ops
);
1815 rc
= register_netdevice_notifier(&geneve_notifier_block
);
1819 rc
= rtnl_link_register(&geneve_link_ops
);
1823 pr_info("Geneve tunneling driver\n");
1827 unregister_netdevice_notifier(&geneve_notifier_block
);
1829 unregister_pernet_subsys(&geneve_net_ops
);
1831 pr_err("Error while initializing GENEVE %d\n", rc
);
1835 void rpl_geneve_cleanup_module(void)
1837 rtnl_link_unregister(&geneve_link_ops
);
1838 unregister_netdevice_notifier(&geneve_notifier_block
);
1839 unregister_pernet_subsys(&geneve_net_ops
);