1 // SPDX-License-Identifier: GPL-2.0-only
3 * GENEVE: Generic Network Virtualization Encapsulation
5 * Copyright (c) 2015 Red Hat, Inc.
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10 #include <linux/kernel.h>
11 #include <linux/module.h>
12 #include <linux/etherdevice.h>
13 #include <linux/hash.h>
14 #include <net/ipv6_stubs.h>
15 #include <net/dst_metadata.h>
16 #include <net/gro_cells.h>
17 #include <net/rtnetlink.h>
18 #include <net/geneve.h>
19 #include <net/protocol.h>
21 #define GENEVE_NETDEV_VER "0.6"
23 #define GENEVE_N_VID (1u << 24)
24 #define GENEVE_VID_MASK (GENEVE_N_VID - 1)
26 #define VNI_HASH_BITS 10
27 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
29 static bool log_ecn_error
= true;
30 module_param(log_ecn_error
, bool, 0644);
31 MODULE_PARM_DESC(log_ecn_error
, "Log packets received with corrupted ECN");
34 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
35 #define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN)
36 #define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN)
38 /* per-network namespace private data for this module */
40 struct list_head geneve_list
;
41 struct list_head sock_list
;
44 static unsigned int geneve_net_id
;
46 struct geneve_dev_node
{
47 struct hlist_node hlist
;
48 struct geneve_dev
*geneve
;
51 struct geneve_config
{
52 struct ip_tunnel_info info
;
54 bool use_udp6_rx_checksums
;
56 enum ifla_geneve_df df
;
59 /* Pseudo network device */
61 struct geneve_dev_node hlist4
; /* vni hash table for IPv4 socket */
62 #if IS_ENABLED(CONFIG_IPV6)
63 struct geneve_dev_node hlist6
; /* vni hash table for IPv6 socket */
65 struct net
*net
; /* netns for packet i/o */
66 struct net_device
*dev
; /* netdev for geneve tunnel */
67 struct geneve_sock __rcu
*sock4
; /* IPv4 socket used for geneve tunnel */
68 #if IS_ENABLED(CONFIG_IPV6)
69 struct geneve_sock __rcu
*sock6
; /* IPv6 socket used for geneve tunnel */
71 struct list_head next
; /* geneve's per namespace list */
72 struct gro_cells gro_cells
;
73 struct geneve_config cfg
;
78 struct list_head list
;
82 struct hlist_head vni_list
[VNI_HASH_SIZE
];
85 static inline __u32
geneve_net_vni_hash(u8 vni
[3])
89 vnid
= (vni
[0] << 16) | (vni
[1] << 8) | vni
[2];
90 return hash_32(vnid
, VNI_HASH_BITS
);
93 static __be64
vni_to_tunnel_id(const __u8
*vni
)
96 return (vni
[0] << 16) | (vni
[1] << 8) | vni
[2];
98 return (__force __be64
)(((__force u64
)vni
[0] << 40) |
99 ((__force u64
)vni
[1] << 48) |
100 ((__force u64
)vni
[2] << 56));
104 /* Convert 64 bit tunnel ID to 24 bit VNI. */
105 static void tunnel_id_to_vni(__be64 tun_id
, __u8
*vni
)
108 vni
[0] = (__force __u8
)(tun_id
>> 16);
109 vni
[1] = (__force __u8
)(tun_id
>> 8);
110 vni
[2] = (__force __u8
)tun_id
;
112 vni
[0] = (__force __u8
)((__force u64
)tun_id
>> 40);
113 vni
[1] = (__force __u8
)((__force u64
)tun_id
>> 48);
114 vni
[2] = (__force __u8
)((__force u64
)tun_id
>> 56);
118 static bool eq_tun_id_and_vni(u8
*tun_id
, u8
*vni
)
120 return !memcmp(vni
, &tun_id
[5], 3);
123 static sa_family_t
geneve_get_sk_family(struct geneve_sock
*gs
)
125 return gs
->sock
->sk
->sk_family
;
128 static struct geneve_dev
*geneve_lookup(struct geneve_sock
*gs
,
129 __be32 addr
, u8 vni
[])
131 struct hlist_head
*vni_list_head
;
132 struct geneve_dev_node
*node
;
135 /* Find the device for this VNI */
136 hash
= geneve_net_vni_hash(vni
);
137 vni_list_head
= &gs
->vni_list
[hash
];
138 hlist_for_each_entry_rcu(node
, vni_list_head
, hlist
) {
139 if (eq_tun_id_and_vni((u8
*)&node
->geneve
->cfg
.info
.key
.tun_id
, vni
) &&
140 addr
== node
->geneve
->cfg
.info
.key
.u
.ipv4
.dst
)
146 #if IS_ENABLED(CONFIG_IPV6)
147 static struct geneve_dev
*geneve6_lookup(struct geneve_sock
*gs
,
148 struct in6_addr addr6
, u8 vni
[])
150 struct hlist_head
*vni_list_head
;
151 struct geneve_dev_node
*node
;
154 /* Find the device for this VNI */
155 hash
= geneve_net_vni_hash(vni
);
156 vni_list_head
= &gs
->vni_list
[hash
];
157 hlist_for_each_entry_rcu(node
, vni_list_head
, hlist
) {
158 if (eq_tun_id_and_vni((u8
*)&node
->geneve
->cfg
.info
.key
.tun_id
, vni
) &&
159 ipv6_addr_equal(&addr6
, &node
->geneve
->cfg
.info
.key
.u
.ipv6
.dst
))
166 static inline struct genevehdr
*geneve_hdr(const struct sk_buff
*skb
)
168 return (struct genevehdr
*)(udp_hdr(skb
) + 1);
171 static struct geneve_dev
*geneve_lookup_skb(struct geneve_sock
*gs
,
174 static u8 zero_vni
[3];
177 if (geneve_get_sk_family(gs
) == AF_INET
) {
181 iph
= ip_hdr(skb
); /* outer IP header... */
183 if (gs
->collect_md
) {
187 vni
= geneve_hdr(skb
)->vni
;
191 return geneve_lookup(gs
, addr
, vni
);
192 #if IS_ENABLED(CONFIG_IPV6)
193 } else if (geneve_get_sk_family(gs
) == AF_INET6
) {
194 static struct in6_addr zero_addr6
;
195 struct ipv6hdr
*ip6h
;
196 struct in6_addr addr6
;
198 ip6h
= ipv6_hdr(skb
); /* outer IPv6 header... */
200 if (gs
->collect_md
) {
204 vni
= geneve_hdr(skb
)->vni
;
208 return geneve6_lookup(gs
, addr6
, vni
);
214 /* geneve receive/decap routine */
215 static void geneve_rx(struct geneve_dev
*geneve
, struct geneve_sock
*gs
,
218 struct genevehdr
*gnvh
= geneve_hdr(skb
);
219 struct metadata_dst
*tun_dst
= NULL
;
220 struct pcpu_sw_netstats
*stats
;
225 if (ip_tunnel_collect_metadata() || gs
->collect_md
) {
228 flags
= TUNNEL_KEY
| TUNNEL_GENEVE_OPT
|
229 (gnvh
->oam
? TUNNEL_OAM
: 0) |
230 (gnvh
->critical
? TUNNEL_CRIT_OPT
: 0);
232 tun_dst
= udp_tun_rx_dst(skb
, geneve_get_sk_family(gs
), flags
,
233 vni_to_tunnel_id(gnvh
->vni
),
236 geneve
->dev
->stats
.rx_dropped
++;
239 /* Update tunnel dst according to Geneve options. */
240 ip_tunnel_info_opts_set(&tun_dst
->u
.tun_info
,
241 gnvh
->options
, gnvh
->opt_len
* 4,
244 /* Drop packets w/ critical options,
245 * since we don't support any...
247 if (gnvh
->critical
) {
248 geneve
->dev
->stats
.rx_frame_errors
++;
249 geneve
->dev
->stats
.rx_errors
++;
254 skb_reset_mac_header(skb
);
255 skb
->protocol
= eth_type_trans(skb
, geneve
->dev
);
256 skb_postpull_rcsum(skb
, eth_hdr(skb
), ETH_HLEN
);
259 skb_dst_set(skb
, &tun_dst
->dst
);
261 /* Ignore packet loops (and multicast echo) */
262 if (ether_addr_equal(eth_hdr(skb
)->h_source
, geneve
->dev
->dev_addr
)) {
263 geneve
->dev
->stats
.rx_errors
++;
267 oiph
= skb_network_header(skb
);
268 skb_reset_network_header(skb
);
270 if (geneve_get_sk_family(gs
) == AF_INET
)
271 err
= IP_ECN_decapsulate(oiph
, skb
);
272 #if IS_ENABLED(CONFIG_IPV6)
274 err
= IP6_ECN_decapsulate(oiph
, skb
);
279 if (geneve_get_sk_family(gs
) == AF_INET
)
280 net_info_ratelimited("non-ECT from %pI4 "
282 &((struct iphdr
*)oiph
)->saddr
,
283 ((struct iphdr
*)oiph
)->tos
);
284 #if IS_ENABLED(CONFIG_IPV6)
286 net_info_ratelimited("non-ECT from %pI6\n",
287 &((struct ipv6hdr
*)oiph
)->saddr
);
291 ++geneve
->dev
->stats
.rx_frame_errors
;
292 ++geneve
->dev
->stats
.rx_errors
;
298 err
= gro_cells_receive(&geneve
->gro_cells
, skb
);
299 if (likely(err
== NET_RX_SUCCESS
)) {
300 stats
= this_cpu_ptr(geneve
->dev
->tstats
);
301 u64_stats_update_begin(&stats
->syncp
);
303 stats
->rx_bytes
+= len
;
304 u64_stats_update_end(&stats
->syncp
);
308 /* Consume bad packet */
312 /* Setup stats when device is created */
313 static int geneve_init(struct net_device
*dev
)
315 struct geneve_dev
*geneve
= netdev_priv(dev
);
318 dev
->tstats
= netdev_alloc_pcpu_stats(struct pcpu_sw_netstats
);
322 err
= gro_cells_init(&geneve
->gro_cells
, dev
);
324 free_percpu(dev
->tstats
);
328 err
= dst_cache_init(&geneve
->cfg
.info
.dst_cache
, GFP_KERNEL
);
330 free_percpu(dev
->tstats
);
331 gro_cells_destroy(&geneve
->gro_cells
);
337 static void geneve_uninit(struct net_device
*dev
)
339 struct geneve_dev
*geneve
= netdev_priv(dev
);
341 dst_cache_destroy(&geneve
->cfg
.info
.dst_cache
);
342 gro_cells_destroy(&geneve
->gro_cells
);
343 free_percpu(dev
->tstats
);
346 /* Callback from net/ipv4/udp.c to receive packets */
347 static int geneve_udp_encap_recv(struct sock
*sk
, struct sk_buff
*skb
)
349 struct genevehdr
*geneveh
;
350 struct geneve_dev
*geneve
;
351 struct geneve_sock
*gs
;
354 /* Need UDP and Geneve header to be present */
355 if (unlikely(!pskb_may_pull(skb
, GENEVE_BASE_HLEN
)))
358 /* Return packets with reserved bits set */
359 geneveh
= geneve_hdr(skb
);
360 if (unlikely(geneveh
->ver
!= GENEVE_VER
))
363 if (unlikely(geneveh
->proto_type
!= htons(ETH_P_TEB
)))
366 gs
= rcu_dereference_sk_user_data(sk
);
370 geneve
= geneve_lookup_skb(gs
, skb
);
374 opts_len
= geneveh
->opt_len
* 4;
375 if (iptunnel_pull_header(skb
, GENEVE_BASE_HLEN
+ opts_len
,
377 !net_eq(geneve
->net
, dev_net(geneve
->dev
)))) {
378 geneve
->dev
->stats
.rx_dropped
++;
382 geneve_rx(geneve
, gs
, skb
);
386 /* Consume bad packet */
391 /* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */
392 static int geneve_udp_encap_err_lookup(struct sock
*sk
, struct sk_buff
*skb
)
394 struct genevehdr
*geneveh
;
395 struct geneve_sock
*gs
;
396 u8 zero_vni
[3] = { 0 };
399 if (!pskb_may_pull(skb
, skb_transport_offset(skb
) + GENEVE_BASE_HLEN
))
402 geneveh
= geneve_hdr(skb
);
403 if (geneveh
->ver
!= GENEVE_VER
)
406 if (geneveh
->proto_type
!= htons(ETH_P_TEB
))
409 gs
= rcu_dereference_sk_user_data(sk
);
413 if (geneve_get_sk_family(gs
) == AF_INET
) {
414 struct iphdr
*iph
= ip_hdr(skb
);
417 if (!gs
->collect_md
) {
418 vni
= geneve_hdr(skb
)->vni
;
422 return geneve_lookup(gs
, addr4
, vni
) ? 0 : -ENOENT
;
425 #if IS_ENABLED(CONFIG_IPV6)
426 if (geneve_get_sk_family(gs
) == AF_INET6
) {
427 struct ipv6hdr
*ip6h
= ipv6_hdr(skb
);
428 struct in6_addr addr6
;
430 memset(&addr6
, 0, sizeof(struct in6_addr
));
432 if (!gs
->collect_md
) {
433 vni
= geneve_hdr(skb
)->vni
;
437 return geneve6_lookup(gs
, addr6
, vni
) ? 0 : -ENOENT
;
441 return -EPFNOSUPPORT
;
444 static struct socket
*geneve_create_sock(struct net
*net
, bool ipv6
,
445 __be16 port
, bool ipv6_rx_csum
)
448 struct udp_port_cfg udp_conf
;
451 memset(&udp_conf
, 0, sizeof(udp_conf
));
454 udp_conf
.family
= AF_INET6
;
455 udp_conf
.ipv6_v6only
= 1;
456 udp_conf
.use_udp6_rx_checksums
= ipv6_rx_csum
;
458 udp_conf
.family
= AF_INET
;
459 udp_conf
.local_ip
.s_addr
= htonl(INADDR_ANY
);
462 udp_conf
.local_udp_port
= port
;
464 /* Open UDP socket */
465 err
= udp_sock_create(net
, &udp_conf
, &sock
);
472 static int geneve_hlen(struct genevehdr
*gh
)
474 return sizeof(*gh
) + gh
->opt_len
* 4;
477 static struct sk_buff
*geneve_gro_receive(struct sock
*sk
,
478 struct list_head
*head
,
481 struct sk_buff
*pp
= NULL
;
483 struct genevehdr
*gh
, *gh2
;
484 unsigned int hlen
, gh_len
, off_gnv
;
485 const struct packet_offload
*ptype
;
489 off_gnv
= skb_gro_offset(skb
);
490 hlen
= off_gnv
+ sizeof(*gh
);
491 gh
= skb_gro_header_fast(skb
, off_gnv
);
492 if (skb_gro_header_hard(skb
, hlen
)) {
493 gh
= skb_gro_header_slow(skb
, hlen
, off_gnv
);
498 if (gh
->ver
!= GENEVE_VER
|| gh
->oam
)
500 gh_len
= geneve_hlen(gh
);
502 hlen
= off_gnv
+ gh_len
;
503 if (skb_gro_header_hard(skb
, hlen
)) {
504 gh
= skb_gro_header_slow(skb
, hlen
, off_gnv
);
509 list_for_each_entry(p
, head
, list
) {
510 if (!NAPI_GRO_CB(p
)->same_flow
)
513 gh2
= (struct genevehdr
*)(p
->data
+ off_gnv
);
514 if (gh
->opt_len
!= gh2
->opt_len
||
515 memcmp(gh
, gh2
, gh_len
)) {
516 NAPI_GRO_CB(p
)->same_flow
= 0;
521 type
= gh
->proto_type
;
524 ptype
= gro_find_receive_by_type(type
);
528 skb_gro_pull(skb
, gh_len
);
529 skb_gro_postpull_rcsum(skb
, gh
, gh_len
);
530 pp
= call_gro_receive(ptype
->callbacks
.gro_receive
, head
, skb
);
536 skb_gro_flush_final(skb
, pp
, flush
);
541 static int geneve_gro_complete(struct sock
*sk
, struct sk_buff
*skb
,
544 struct genevehdr
*gh
;
545 struct packet_offload
*ptype
;
550 gh
= (struct genevehdr
*)(skb
->data
+ nhoff
);
551 gh_len
= geneve_hlen(gh
);
552 type
= gh
->proto_type
;
555 ptype
= gro_find_complete_by_type(type
);
557 err
= ptype
->callbacks
.gro_complete(skb
, nhoff
+ gh_len
);
561 skb_set_inner_mac_header(skb
, nhoff
+ gh_len
);
566 /* Create new listen socket if needed */
567 static struct geneve_sock
*geneve_socket_create(struct net
*net
, __be16 port
,
568 bool ipv6
, bool ipv6_rx_csum
)
570 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
571 struct geneve_sock
*gs
;
573 struct udp_tunnel_sock_cfg tunnel_cfg
;
576 gs
= kzalloc(sizeof(*gs
), GFP_KERNEL
);
578 return ERR_PTR(-ENOMEM
);
580 sock
= geneve_create_sock(net
, ipv6
, port
, ipv6_rx_csum
);
583 return ERR_CAST(sock
);
588 for (h
= 0; h
< VNI_HASH_SIZE
; ++h
)
589 INIT_HLIST_HEAD(&gs
->vni_list
[h
]);
591 /* Initialize the geneve udp offloads structure */
592 udp_tunnel_notify_add_rx_port(gs
->sock
, UDP_TUNNEL_TYPE_GENEVE
);
594 /* Mark socket as an encapsulation socket */
595 memset(&tunnel_cfg
, 0, sizeof(tunnel_cfg
));
596 tunnel_cfg
.sk_user_data
= gs
;
597 tunnel_cfg
.encap_type
= 1;
598 tunnel_cfg
.gro_receive
= geneve_gro_receive
;
599 tunnel_cfg
.gro_complete
= geneve_gro_complete
;
600 tunnel_cfg
.encap_rcv
= geneve_udp_encap_recv
;
601 tunnel_cfg
.encap_err_lookup
= geneve_udp_encap_err_lookup
;
602 tunnel_cfg
.encap_destroy
= NULL
;
603 setup_udp_tunnel_sock(net
, sock
, &tunnel_cfg
);
604 list_add(&gs
->list
, &gn
->sock_list
);
608 static void __geneve_sock_release(struct geneve_sock
*gs
)
610 if (!gs
|| --gs
->refcnt
)
614 udp_tunnel_notify_del_rx_port(gs
->sock
, UDP_TUNNEL_TYPE_GENEVE
);
615 udp_tunnel_sock_release(gs
->sock
);
619 static void geneve_sock_release(struct geneve_dev
*geneve
)
621 struct geneve_sock
*gs4
= rtnl_dereference(geneve
->sock4
);
622 #if IS_ENABLED(CONFIG_IPV6)
623 struct geneve_sock
*gs6
= rtnl_dereference(geneve
->sock6
);
625 rcu_assign_pointer(geneve
->sock6
, NULL
);
628 rcu_assign_pointer(geneve
->sock4
, NULL
);
631 __geneve_sock_release(gs4
);
632 #if IS_ENABLED(CONFIG_IPV6)
633 __geneve_sock_release(gs6
);
637 static struct geneve_sock
*geneve_find_sock(struct geneve_net
*gn
,
641 struct geneve_sock
*gs
;
643 list_for_each_entry(gs
, &gn
->sock_list
, list
) {
644 if (inet_sk(gs
->sock
->sk
)->inet_sport
== dst_port
&&
645 geneve_get_sk_family(gs
) == family
) {
652 static int geneve_sock_add(struct geneve_dev
*geneve
, bool ipv6
)
654 struct net
*net
= geneve
->net
;
655 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
656 struct geneve_dev_node
*node
;
657 struct geneve_sock
*gs
;
661 gs
= geneve_find_sock(gn
, ipv6
? AF_INET6
: AF_INET
, geneve
->cfg
.info
.key
.tp_dst
);
667 gs
= geneve_socket_create(net
, geneve
->cfg
.info
.key
.tp_dst
, ipv6
,
668 geneve
->cfg
.use_udp6_rx_checksums
);
673 gs
->collect_md
= geneve
->cfg
.collect_md
;
674 #if IS_ENABLED(CONFIG_IPV6)
676 rcu_assign_pointer(geneve
->sock6
, gs
);
677 node
= &geneve
->hlist6
;
681 rcu_assign_pointer(geneve
->sock4
, gs
);
682 node
= &geneve
->hlist4
;
684 node
->geneve
= geneve
;
686 tunnel_id_to_vni(geneve
->cfg
.info
.key
.tun_id
, vni
);
687 hash
= geneve_net_vni_hash(vni
);
688 hlist_add_head_rcu(&node
->hlist
, &gs
->vni_list
[hash
]);
692 static int geneve_open(struct net_device
*dev
)
694 struct geneve_dev
*geneve
= netdev_priv(dev
);
695 bool metadata
= geneve
->cfg
.collect_md
;
699 ipv6
= geneve
->cfg
.info
.mode
& IP_TUNNEL_INFO_IPV6
|| metadata
;
700 ipv4
= !ipv6
|| metadata
;
701 #if IS_ENABLED(CONFIG_IPV6)
703 ret
= geneve_sock_add(geneve
, true);
704 if (ret
< 0 && ret
!= -EAFNOSUPPORT
)
709 ret
= geneve_sock_add(geneve
, false);
711 geneve_sock_release(geneve
);
716 static int geneve_stop(struct net_device
*dev
)
718 struct geneve_dev
*geneve
= netdev_priv(dev
);
720 hlist_del_init_rcu(&geneve
->hlist4
.hlist
);
721 #if IS_ENABLED(CONFIG_IPV6)
722 hlist_del_init_rcu(&geneve
->hlist6
.hlist
);
724 geneve_sock_release(geneve
);
728 static void geneve_build_header(struct genevehdr
*geneveh
,
729 const struct ip_tunnel_info
*info
)
731 geneveh
->ver
= GENEVE_VER
;
732 geneveh
->opt_len
= info
->options_len
/ 4;
733 geneveh
->oam
= !!(info
->key
.tun_flags
& TUNNEL_OAM
);
734 geneveh
->critical
= !!(info
->key
.tun_flags
& TUNNEL_CRIT_OPT
);
736 tunnel_id_to_vni(info
->key
.tun_id
, geneveh
->vni
);
737 geneveh
->proto_type
= htons(ETH_P_TEB
);
740 if (info
->key
.tun_flags
& TUNNEL_GENEVE_OPT
)
741 ip_tunnel_info_opts_get(geneveh
->options
, info
);
744 static int geneve_build_skb(struct dst_entry
*dst
, struct sk_buff
*skb
,
745 const struct ip_tunnel_info
*info
,
746 bool xnet
, int ip_hdr_len
)
748 bool udp_sum
= !!(info
->key
.tun_flags
& TUNNEL_CSUM
);
749 struct genevehdr
*gnvh
;
753 skb_reset_mac_header(skb
);
754 skb_scrub_packet(skb
, xnet
);
756 min_headroom
= LL_RESERVED_SPACE(dst
->dev
) + dst
->header_len
+
757 GENEVE_BASE_HLEN
+ info
->options_len
+ ip_hdr_len
;
758 err
= skb_cow_head(skb
, min_headroom
);
762 err
= udp_tunnel_handle_offloads(skb
, udp_sum
);
766 gnvh
= __skb_push(skb
, sizeof(*gnvh
) + info
->options_len
);
767 geneve_build_header(gnvh
, info
);
768 skb_set_inner_protocol(skb
, htons(ETH_P_TEB
));
776 static struct rtable
*geneve_get_v4_rt(struct sk_buff
*skb
,
777 struct net_device
*dev
,
778 struct geneve_sock
*gs4
,
780 const struct ip_tunnel_info
*info
)
782 bool use_cache
= ip_tunnel_dst_cache_usable(skb
, info
);
783 struct geneve_dev
*geneve
= netdev_priv(dev
);
784 struct dst_cache
*dst_cache
;
785 struct rtable
*rt
= NULL
;
789 return ERR_PTR(-EIO
);
791 memset(fl4
, 0, sizeof(*fl4
));
792 fl4
->flowi4_mark
= skb
->mark
;
793 fl4
->flowi4_proto
= IPPROTO_UDP
;
794 fl4
->daddr
= info
->key
.u
.ipv4
.dst
;
795 fl4
->saddr
= info
->key
.u
.ipv4
.src
;
798 if ((tos
== 1) && !geneve
->cfg
.collect_md
) {
799 tos
= ip_tunnel_get_dsfield(ip_hdr(skb
), skb
);
802 fl4
->flowi4_tos
= RT_TOS(tos
);
804 dst_cache
= (struct dst_cache
*)&info
->dst_cache
;
806 rt
= dst_cache_get_ip4(dst_cache
, &fl4
->saddr
);
810 rt
= ip_route_output_key(geneve
->net
, fl4
);
812 netdev_dbg(dev
, "no route to %pI4\n", &fl4
->daddr
);
813 return ERR_PTR(-ENETUNREACH
);
815 if (rt
->dst
.dev
== dev
) { /* is this necessary? */
816 netdev_dbg(dev
, "circular route to %pI4\n", &fl4
->daddr
);
818 return ERR_PTR(-ELOOP
);
821 dst_cache_set_ip4(dst_cache
, &rt
->dst
, fl4
->saddr
);
825 #if IS_ENABLED(CONFIG_IPV6)
826 static struct dst_entry
*geneve_get_v6_dst(struct sk_buff
*skb
,
827 struct net_device
*dev
,
828 struct geneve_sock
*gs6
,
830 const struct ip_tunnel_info
*info
)
832 bool use_cache
= ip_tunnel_dst_cache_usable(skb
, info
);
833 struct geneve_dev
*geneve
= netdev_priv(dev
);
834 struct dst_entry
*dst
= NULL
;
835 struct dst_cache
*dst_cache
;
839 return ERR_PTR(-EIO
);
841 memset(fl6
, 0, sizeof(*fl6
));
842 fl6
->flowi6_mark
= skb
->mark
;
843 fl6
->flowi6_proto
= IPPROTO_UDP
;
844 fl6
->daddr
= info
->key
.u
.ipv6
.dst
;
845 fl6
->saddr
= info
->key
.u
.ipv6
.src
;
846 prio
= info
->key
.tos
;
847 if ((prio
== 1) && !geneve
->cfg
.collect_md
) {
848 prio
= ip_tunnel_get_dsfield(ip_hdr(skb
), skb
);
852 fl6
->flowlabel
= ip6_make_flowinfo(RT_TOS(prio
),
854 dst_cache
= (struct dst_cache
*)&info
->dst_cache
;
856 dst
= dst_cache_get_ip6(dst_cache
, &fl6
->saddr
);
860 dst
= ipv6_stub
->ipv6_dst_lookup_flow(geneve
->net
, gs6
->sock
->sk
, fl6
,
863 netdev_dbg(dev
, "no route to %pI6\n", &fl6
->daddr
);
864 return ERR_PTR(-ENETUNREACH
);
866 if (dst
->dev
== dev
) { /* is this necessary? */
867 netdev_dbg(dev
, "circular route to %pI6\n", &fl6
->daddr
);
869 return ERR_PTR(-ELOOP
);
873 dst_cache_set_ip6(dst_cache
, dst
, &fl6
->saddr
);
878 static int geneve_xmit_skb(struct sk_buff
*skb
, struct net_device
*dev
,
879 struct geneve_dev
*geneve
,
880 const struct ip_tunnel_info
*info
)
882 bool xnet
= !net_eq(geneve
->net
, dev_net(geneve
->dev
));
883 struct geneve_sock
*gs4
= rcu_dereference(geneve
->sock4
);
884 const struct ip_tunnel_key
*key
= &info
->key
;
892 rt
= geneve_get_v4_rt(skb
, dev
, gs4
, &fl4
, info
);
896 err
= skb_tunnel_check_pmtu(skb
, &rt
->dst
,
897 GENEVE_IPV4_HLEN
+ info
->options_len
,
898 netif_is_any_bridge_port(dev
));
900 dst_release(&rt
->dst
);
903 struct ip_tunnel_info
*info
;
905 info
= skb_tunnel_info(skb
);
907 info
->key
.u
.ipv4
.dst
= fl4
.saddr
;
908 info
->key
.u
.ipv4
.src
= fl4
.daddr
;
911 if (!pskb_may_pull(skb
, ETH_HLEN
)) {
912 dst_release(&rt
->dst
);
916 skb
->protocol
= eth_type_trans(skb
, geneve
->dev
);
918 dst_release(&rt
->dst
);
922 sport
= udp_flow_src_port(geneve
->net
, skb
, 1, USHRT_MAX
, true);
923 if (geneve
->cfg
.collect_md
) {
924 tos
= ip_tunnel_ecn_encap(key
->tos
, ip_hdr(skb
), skb
);
927 df
= key
->tun_flags
& TUNNEL_DONT_FRAGMENT
? htons(IP_DF
) : 0;
929 tos
= ip_tunnel_ecn_encap(fl4
.flowi4_tos
, ip_hdr(skb
), skb
);
930 if (geneve
->cfg
.ttl_inherit
)
931 ttl
= ip_tunnel_get_ttl(ip_hdr(skb
), skb
);
934 ttl
= ttl
? : ip4_dst_hoplimit(&rt
->dst
);
936 if (geneve
->cfg
.df
== GENEVE_DF_SET
) {
938 } else if (geneve
->cfg
.df
== GENEVE_DF_INHERIT
) {
939 struct ethhdr
*eth
= eth_hdr(skb
);
941 if (ntohs(eth
->h_proto
) == ETH_P_IPV6
) {
943 } else if (ntohs(eth
->h_proto
) == ETH_P_IP
) {
944 struct iphdr
*iph
= ip_hdr(skb
);
946 if (iph
->frag_off
& htons(IP_DF
))
952 err
= geneve_build_skb(&rt
->dst
, skb
, info
, xnet
, sizeof(struct iphdr
));
956 udp_tunnel_xmit_skb(rt
, gs4
->sock
->sk
, skb
, fl4
.saddr
, fl4
.daddr
,
957 tos
, ttl
, df
, sport
, geneve
->cfg
.info
.key
.tp_dst
,
958 !net_eq(geneve
->net
, dev_net(geneve
->dev
)),
959 !(info
->key
.tun_flags
& TUNNEL_CSUM
));
963 #if IS_ENABLED(CONFIG_IPV6)
964 static int geneve6_xmit_skb(struct sk_buff
*skb
, struct net_device
*dev
,
965 struct geneve_dev
*geneve
,
966 const struct ip_tunnel_info
*info
)
968 bool xnet
= !net_eq(geneve
->net
, dev_net(geneve
->dev
));
969 struct geneve_sock
*gs6
= rcu_dereference(geneve
->sock6
);
970 const struct ip_tunnel_key
*key
= &info
->key
;
971 struct dst_entry
*dst
= NULL
;
977 dst
= geneve_get_v6_dst(skb
, dev
, gs6
, &fl6
, info
);
981 err
= skb_tunnel_check_pmtu(skb
, dst
,
982 GENEVE_IPV6_HLEN
+ info
->options_len
,
983 netif_is_any_bridge_port(dev
));
988 struct ip_tunnel_info
*info
= skb_tunnel_info(skb
);
991 info
->key
.u
.ipv6
.dst
= fl6
.saddr
;
992 info
->key
.u
.ipv6
.src
= fl6
.daddr
;
995 if (!pskb_may_pull(skb
, ETH_HLEN
)) {
1000 skb
->protocol
= eth_type_trans(skb
, geneve
->dev
);
1006 sport
= udp_flow_src_port(geneve
->net
, skb
, 1, USHRT_MAX
, true);
1007 if (geneve
->cfg
.collect_md
) {
1008 prio
= ip_tunnel_ecn_encap(key
->tos
, ip_hdr(skb
), skb
);
1011 prio
= ip_tunnel_ecn_encap(ip6_tclass(fl6
.flowlabel
),
1013 if (geneve
->cfg
.ttl_inherit
)
1014 ttl
= ip_tunnel_get_ttl(ip_hdr(skb
), skb
);
1017 ttl
= ttl
? : ip6_dst_hoplimit(dst
);
1019 err
= geneve_build_skb(dst
, skb
, info
, xnet
, sizeof(struct ipv6hdr
));
1023 udp_tunnel6_xmit_skb(dst
, gs6
->sock
->sk
, skb
, dev
,
1024 &fl6
.saddr
, &fl6
.daddr
, prio
, ttl
,
1025 info
->key
.label
, sport
, geneve
->cfg
.info
.key
.tp_dst
,
1026 !(info
->key
.tun_flags
& TUNNEL_CSUM
));
1031 static netdev_tx_t
geneve_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
1033 struct geneve_dev
*geneve
= netdev_priv(dev
);
1034 struct ip_tunnel_info
*info
= NULL
;
1037 if (geneve
->cfg
.collect_md
) {
1038 info
= skb_tunnel_info(skb
);
1039 if (unlikely(!info
|| !(info
->mode
& IP_TUNNEL_INFO_TX
))) {
1040 netdev_dbg(dev
, "no tunnel metadata\n");
1042 dev
->stats
.tx_dropped
++;
1043 return NETDEV_TX_OK
;
1046 info
= &geneve
->cfg
.info
;
1050 #if IS_ENABLED(CONFIG_IPV6)
1051 if (info
->mode
& IP_TUNNEL_INFO_IPV6
)
1052 err
= geneve6_xmit_skb(skb
, dev
, geneve
, info
);
1055 err
= geneve_xmit_skb(skb
, dev
, geneve
, info
);
1059 return NETDEV_TX_OK
;
1061 if (err
!= -EMSGSIZE
)
1065 dev
->stats
.collisions
++;
1066 else if (err
== -ENETUNREACH
)
1067 dev
->stats
.tx_carrier_errors
++;
1069 dev
->stats
.tx_errors
++;
1070 return NETDEV_TX_OK
;
1073 static int geneve_change_mtu(struct net_device
*dev
, int new_mtu
)
1075 if (new_mtu
> dev
->max_mtu
)
1076 new_mtu
= dev
->max_mtu
;
1077 else if (new_mtu
< dev
->min_mtu
)
1078 new_mtu
= dev
->min_mtu
;
1084 static int geneve_fill_metadata_dst(struct net_device
*dev
, struct sk_buff
*skb
)
1086 struct ip_tunnel_info
*info
= skb_tunnel_info(skb
);
1087 struct geneve_dev
*geneve
= netdev_priv(dev
);
1089 if (ip_tunnel_info_af(info
) == AF_INET
) {
1092 struct geneve_sock
*gs4
= rcu_dereference(geneve
->sock4
);
1094 rt
= geneve_get_v4_rt(skb
, dev
, gs4
, &fl4
, info
);
1099 info
->key
.u
.ipv4
.src
= fl4
.saddr
;
1100 #if IS_ENABLED(CONFIG_IPV6)
1101 } else if (ip_tunnel_info_af(info
) == AF_INET6
) {
1102 struct dst_entry
*dst
;
1104 struct geneve_sock
*gs6
= rcu_dereference(geneve
->sock6
);
1106 dst
= geneve_get_v6_dst(skb
, dev
, gs6
, &fl6
, info
);
1108 return PTR_ERR(dst
);
1111 info
->key
.u
.ipv6
.src
= fl6
.saddr
;
1117 info
->key
.tp_src
= udp_flow_src_port(geneve
->net
, skb
,
1118 1, USHRT_MAX
, true);
1119 info
->key
.tp_dst
= geneve
->cfg
.info
.key
.tp_dst
;
1123 static const struct net_device_ops geneve_netdev_ops
= {
1124 .ndo_init
= geneve_init
,
1125 .ndo_uninit
= geneve_uninit
,
1126 .ndo_open
= geneve_open
,
1127 .ndo_stop
= geneve_stop
,
1128 .ndo_start_xmit
= geneve_xmit
,
1129 .ndo_get_stats64
= ip_tunnel_get_stats64
,
1130 .ndo_change_mtu
= geneve_change_mtu
,
1131 .ndo_validate_addr
= eth_validate_addr
,
1132 .ndo_set_mac_address
= eth_mac_addr
,
1133 .ndo_fill_metadata_dst
= geneve_fill_metadata_dst
,
1136 static void geneve_get_drvinfo(struct net_device
*dev
,
1137 struct ethtool_drvinfo
*drvinfo
)
1139 strlcpy(drvinfo
->version
, GENEVE_NETDEV_VER
, sizeof(drvinfo
->version
));
1140 strlcpy(drvinfo
->driver
, "geneve", sizeof(drvinfo
->driver
));
1143 static const struct ethtool_ops geneve_ethtool_ops
= {
1144 .get_drvinfo
= geneve_get_drvinfo
,
1145 .get_link
= ethtool_op_get_link
,
1148 /* Info for udev, that this is a virtual tunnel endpoint */
1149 static struct device_type geneve_type
= {
1153 /* Calls the ndo_udp_tunnel_add of the caller in order to
1154 * supply the listening GENEVE udp ports. Callers are expected
1155 * to implement the ndo_udp_tunnel_add.
1157 static void geneve_offload_rx_ports(struct net_device
*dev
, bool push
)
1159 struct net
*net
= dev_net(dev
);
1160 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
1161 struct geneve_sock
*gs
;
1164 list_for_each_entry_rcu(gs
, &gn
->sock_list
, list
) {
1166 udp_tunnel_push_rx_port(dev
, gs
->sock
,
1167 UDP_TUNNEL_TYPE_GENEVE
);
1169 udp_tunnel_drop_rx_port(dev
, gs
->sock
,
1170 UDP_TUNNEL_TYPE_GENEVE
);
1176 /* Initialize the device structure. */
1177 static void geneve_setup(struct net_device
*dev
)
1181 dev
->netdev_ops
= &geneve_netdev_ops
;
1182 dev
->ethtool_ops
= &geneve_ethtool_ops
;
1183 dev
->needs_free_netdev
= true;
1185 SET_NETDEV_DEVTYPE(dev
, &geneve_type
);
1187 dev
->features
|= NETIF_F_LLTX
;
1188 dev
->features
|= NETIF_F_SG
| NETIF_F_HW_CSUM
;
1189 dev
->features
|= NETIF_F_RXCSUM
;
1190 dev
->features
|= NETIF_F_GSO_SOFTWARE
;
1192 dev
->hw_features
|= NETIF_F_SG
| NETIF_F_HW_CSUM
| NETIF_F_RXCSUM
;
1193 dev
->hw_features
|= NETIF_F_GSO_SOFTWARE
;
1195 /* MTU range: 68 - (something less than 65535) */
1196 dev
->min_mtu
= ETH_MIN_MTU
;
1197 /* The max_mtu calculation does not take account of GENEVE
1198 * options, to avoid excluding potentially valid
1199 * configurations. This will be further reduced by IPvX hdr size.
1201 dev
->max_mtu
= IP_MAX_MTU
- GENEVE_BASE_HLEN
- dev
->hard_header_len
;
1203 netif_keep_dst(dev
);
1204 dev
->priv_flags
&= ~IFF_TX_SKB_SHARING
;
1205 dev
->priv_flags
|= IFF_LIVE_ADDR_CHANGE
| IFF_NO_QUEUE
;
1206 eth_hw_addr_random(dev
);
1209 static const struct nla_policy geneve_policy
[IFLA_GENEVE_MAX
+ 1] = {
1210 [IFLA_GENEVE_ID
] = { .type
= NLA_U32
},
1211 [IFLA_GENEVE_REMOTE
] = { .len
= sizeof_field(struct iphdr
, daddr
) },
1212 [IFLA_GENEVE_REMOTE6
] = { .len
= sizeof(struct in6_addr
) },
1213 [IFLA_GENEVE_TTL
] = { .type
= NLA_U8
},
1214 [IFLA_GENEVE_TOS
] = { .type
= NLA_U8
},
1215 [IFLA_GENEVE_LABEL
] = { .type
= NLA_U32
},
1216 [IFLA_GENEVE_PORT
] = { .type
= NLA_U16
},
1217 [IFLA_GENEVE_COLLECT_METADATA
] = { .type
= NLA_FLAG
},
1218 [IFLA_GENEVE_UDP_CSUM
] = { .type
= NLA_U8
},
1219 [IFLA_GENEVE_UDP_ZERO_CSUM6_TX
] = { .type
= NLA_U8
},
1220 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX
] = { .type
= NLA_U8
},
1221 [IFLA_GENEVE_TTL_INHERIT
] = { .type
= NLA_U8
},
1222 [IFLA_GENEVE_DF
] = { .type
= NLA_U8
},
1225 static int geneve_validate(struct nlattr
*tb
[], struct nlattr
*data
[],
1226 struct netlink_ext_ack
*extack
)
1228 if (tb
[IFLA_ADDRESS
]) {
1229 if (nla_len(tb
[IFLA_ADDRESS
]) != ETH_ALEN
) {
1230 NL_SET_ERR_MSG_ATTR(extack
, tb
[IFLA_ADDRESS
],
1231 "Provided link layer address is not Ethernet");
1235 if (!is_valid_ether_addr(nla_data(tb
[IFLA_ADDRESS
]))) {
1236 NL_SET_ERR_MSG_ATTR(extack
, tb
[IFLA_ADDRESS
],
1237 "Provided Ethernet address is not unicast");
1238 return -EADDRNOTAVAIL
;
1243 NL_SET_ERR_MSG(extack
,
1244 "Not enough attributes provided to perform the operation");
1248 if (data
[IFLA_GENEVE_ID
]) {
1249 __u32 vni
= nla_get_u32(data
[IFLA_GENEVE_ID
]);
1251 if (vni
>= GENEVE_N_VID
) {
1252 NL_SET_ERR_MSG_ATTR(extack
, data
[IFLA_GENEVE_ID
],
1253 "Geneve ID must be lower than 16777216");
1258 if (data
[IFLA_GENEVE_DF
]) {
1259 enum ifla_geneve_df df
= nla_get_u8(data
[IFLA_GENEVE_DF
]);
1261 if (df
< 0 || df
> GENEVE_DF_MAX
) {
1262 NL_SET_ERR_MSG_ATTR(extack
, data
[IFLA_GENEVE_DF
],
1263 "Invalid DF attribute");
1271 static struct geneve_dev
*geneve_find_dev(struct geneve_net
*gn
,
1272 const struct ip_tunnel_info
*info
,
1273 bool *tun_on_same_port
,
1274 bool *tun_collect_md
)
1276 struct geneve_dev
*geneve
, *t
= NULL
;
1278 *tun_on_same_port
= false;
1279 *tun_collect_md
= false;
1280 list_for_each_entry(geneve
, &gn
->geneve_list
, next
) {
1281 if (info
->key
.tp_dst
== geneve
->cfg
.info
.key
.tp_dst
) {
1282 *tun_collect_md
= geneve
->cfg
.collect_md
;
1283 *tun_on_same_port
= true;
1285 if (info
->key
.tun_id
== geneve
->cfg
.info
.key
.tun_id
&&
1286 info
->key
.tp_dst
== geneve
->cfg
.info
.key
.tp_dst
&&
1287 !memcmp(&info
->key
.u
, &geneve
->cfg
.info
.key
.u
, sizeof(info
->key
.u
)))
1293 static bool is_tnl_info_zero(const struct ip_tunnel_info
*info
)
1295 return !(info
->key
.tun_id
|| info
->key
.tun_flags
|| info
->key
.tos
||
1296 info
->key
.ttl
|| info
->key
.label
|| info
->key
.tp_src
||
1297 memchr_inv(&info
->key
.u
, 0, sizeof(info
->key
.u
)));
1300 static bool geneve_dst_addr_equal(struct ip_tunnel_info
*a
,
1301 struct ip_tunnel_info
*b
)
1303 if (ip_tunnel_info_af(a
) == AF_INET
)
1304 return a
->key
.u
.ipv4
.dst
== b
->key
.u
.ipv4
.dst
;
1306 return ipv6_addr_equal(&a
->key
.u
.ipv6
.dst
, &b
->key
.u
.ipv6
.dst
);
1309 static int geneve_configure(struct net
*net
, struct net_device
*dev
,
1310 struct netlink_ext_ack
*extack
,
1311 const struct geneve_config
*cfg
)
1313 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
1314 struct geneve_dev
*t
, *geneve
= netdev_priv(dev
);
1315 const struct ip_tunnel_info
*info
= &cfg
->info
;
1316 bool tun_collect_md
, tun_on_same_port
;
1319 if (cfg
->collect_md
&& !is_tnl_info_zero(info
)) {
1320 NL_SET_ERR_MSG(extack
,
1321 "Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified");
1328 t
= geneve_find_dev(gn
, info
, &tun_on_same_port
, &tun_collect_md
);
1332 /* make enough headroom for basic scenario */
1333 encap_len
= GENEVE_BASE_HLEN
+ ETH_HLEN
;
1334 if (!cfg
->collect_md
&& ip_tunnel_info_af(info
) == AF_INET
) {
1335 encap_len
+= sizeof(struct iphdr
);
1336 dev
->max_mtu
-= sizeof(struct iphdr
);
1338 encap_len
+= sizeof(struct ipv6hdr
);
1339 dev
->max_mtu
-= sizeof(struct ipv6hdr
);
1341 dev
->needed_headroom
= encap_len
+ ETH_HLEN
;
1343 if (cfg
->collect_md
) {
1344 if (tun_on_same_port
) {
1345 NL_SET_ERR_MSG(extack
,
1346 "There can be only one externally controlled device on a destination port");
1350 if (tun_collect_md
) {
1351 NL_SET_ERR_MSG(extack
,
1352 "There already exists an externally controlled device on this destination port");
1357 dst_cache_reset(&geneve
->cfg
.info
.dst_cache
);
1358 memcpy(&geneve
->cfg
, cfg
, sizeof(*cfg
));
1360 err
= register_netdevice(dev
);
1364 list_add(&geneve
->next
, &gn
->geneve_list
);
1368 static void init_tnl_info(struct ip_tunnel_info
*info
, __u16 dst_port
)
1370 memset(info
, 0, sizeof(*info
));
1371 info
->key
.tp_dst
= htons(dst_port
);
1374 static int geneve_nl2info(struct nlattr
*tb
[], struct nlattr
*data
[],
1375 struct netlink_ext_ack
*extack
,
1376 struct geneve_config
*cfg
, bool changelink
)
1378 struct ip_tunnel_info
*info
= &cfg
->info
;
1381 if (data
[IFLA_GENEVE_REMOTE
] && data
[IFLA_GENEVE_REMOTE6
]) {
1382 NL_SET_ERR_MSG(extack
,
1383 "Cannot specify both IPv4 and IPv6 Remote addresses");
1387 if (data
[IFLA_GENEVE_REMOTE
]) {
1388 if (changelink
&& (ip_tunnel_info_af(info
) == AF_INET6
)) {
1389 attrtype
= IFLA_GENEVE_REMOTE
;
1393 info
->key
.u
.ipv4
.dst
=
1394 nla_get_in_addr(data
[IFLA_GENEVE_REMOTE
]);
1396 if (ipv4_is_multicast(info
->key
.u
.ipv4
.dst
)) {
1397 NL_SET_ERR_MSG_ATTR(extack
, data
[IFLA_GENEVE_REMOTE
],
1398 "Remote IPv4 address cannot be Multicast");
1403 if (data
[IFLA_GENEVE_REMOTE6
]) {
1404 #if IS_ENABLED(CONFIG_IPV6)
1405 if (changelink
&& (ip_tunnel_info_af(info
) == AF_INET
)) {
1406 attrtype
= IFLA_GENEVE_REMOTE6
;
1410 info
->mode
= IP_TUNNEL_INFO_IPV6
;
1411 info
->key
.u
.ipv6
.dst
=
1412 nla_get_in6_addr(data
[IFLA_GENEVE_REMOTE6
]);
1414 if (ipv6_addr_type(&info
->key
.u
.ipv6
.dst
) &
1415 IPV6_ADDR_LINKLOCAL
) {
1416 NL_SET_ERR_MSG_ATTR(extack
, data
[IFLA_GENEVE_REMOTE6
],
1417 "Remote IPv6 address cannot be link-local");
1420 if (ipv6_addr_is_multicast(&info
->key
.u
.ipv6
.dst
)) {
1421 NL_SET_ERR_MSG_ATTR(extack
, data
[IFLA_GENEVE_REMOTE6
],
1422 "Remote IPv6 address cannot be Multicast");
1425 info
->key
.tun_flags
|= TUNNEL_CSUM
;
1426 cfg
->use_udp6_rx_checksums
= true;
1428 NL_SET_ERR_MSG_ATTR(extack
, data
[IFLA_GENEVE_REMOTE6
],
1429 "IPv6 support not enabled in the kernel");
1430 return -EPFNOSUPPORT
;
1434 if (data
[IFLA_GENEVE_ID
]) {
1439 vni
= nla_get_u32(data
[IFLA_GENEVE_ID
]);
1440 tvni
[0] = (vni
& 0x00ff0000) >> 16;
1441 tvni
[1] = (vni
& 0x0000ff00) >> 8;
1442 tvni
[2] = vni
& 0x000000ff;
1444 tunid
= vni_to_tunnel_id(tvni
);
1445 if (changelink
&& (tunid
!= info
->key
.tun_id
)) {
1446 attrtype
= IFLA_GENEVE_ID
;
1449 info
->key
.tun_id
= tunid
;
1452 if (data
[IFLA_GENEVE_TTL_INHERIT
]) {
1453 if (nla_get_u8(data
[IFLA_GENEVE_TTL_INHERIT
]))
1454 cfg
->ttl_inherit
= true;
1456 cfg
->ttl_inherit
= false;
1457 } else if (data
[IFLA_GENEVE_TTL
]) {
1458 info
->key
.ttl
= nla_get_u8(data
[IFLA_GENEVE_TTL
]);
1459 cfg
->ttl_inherit
= false;
1462 if (data
[IFLA_GENEVE_TOS
])
1463 info
->key
.tos
= nla_get_u8(data
[IFLA_GENEVE_TOS
]);
1465 if (data
[IFLA_GENEVE_DF
])
1466 cfg
->df
= nla_get_u8(data
[IFLA_GENEVE_DF
]);
1468 if (data
[IFLA_GENEVE_LABEL
]) {
1469 info
->key
.label
= nla_get_be32(data
[IFLA_GENEVE_LABEL
]) &
1470 IPV6_FLOWLABEL_MASK
;
1471 if (info
->key
.label
&& (!(info
->mode
& IP_TUNNEL_INFO_IPV6
))) {
1472 NL_SET_ERR_MSG_ATTR(extack
, data
[IFLA_GENEVE_LABEL
],
1473 "Label attribute only applies for IPv6 Geneve devices");
1478 if (data
[IFLA_GENEVE_PORT
]) {
1480 attrtype
= IFLA_GENEVE_PORT
;
1483 info
->key
.tp_dst
= nla_get_be16(data
[IFLA_GENEVE_PORT
]);
1486 if (data
[IFLA_GENEVE_COLLECT_METADATA
]) {
1488 attrtype
= IFLA_GENEVE_COLLECT_METADATA
;
1491 cfg
->collect_md
= true;
1494 if (data
[IFLA_GENEVE_UDP_CSUM
]) {
1496 attrtype
= IFLA_GENEVE_UDP_CSUM
;
1499 if (nla_get_u8(data
[IFLA_GENEVE_UDP_CSUM
]))
1500 info
->key
.tun_flags
|= TUNNEL_CSUM
;
1503 if (data
[IFLA_GENEVE_UDP_ZERO_CSUM6_TX
]) {
1504 #if IS_ENABLED(CONFIG_IPV6)
1506 attrtype
= IFLA_GENEVE_UDP_ZERO_CSUM6_TX
;
1509 if (nla_get_u8(data
[IFLA_GENEVE_UDP_ZERO_CSUM6_TX
]))
1510 info
->key
.tun_flags
&= ~TUNNEL_CSUM
;
1512 NL_SET_ERR_MSG_ATTR(extack
, data
[IFLA_GENEVE_UDP_ZERO_CSUM6_TX
],
1513 "IPv6 support not enabled in the kernel");
1514 return -EPFNOSUPPORT
;
1518 if (data
[IFLA_GENEVE_UDP_ZERO_CSUM6_RX
]) {
1519 #if IS_ENABLED(CONFIG_IPV6)
1521 attrtype
= IFLA_GENEVE_UDP_ZERO_CSUM6_RX
;
1524 if (nla_get_u8(data
[IFLA_GENEVE_UDP_ZERO_CSUM6_RX
]))
1525 cfg
->use_udp6_rx_checksums
= false;
1527 NL_SET_ERR_MSG_ATTR(extack
, data
[IFLA_GENEVE_UDP_ZERO_CSUM6_RX
],
1528 "IPv6 support not enabled in the kernel");
1529 return -EPFNOSUPPORT
;
1535 NL_SET_ERR_MSG_ATTR(extack
, data
[attrtype
],
1536 "Changing VNI, Port, endpoint IP address family, external, and UDP checksum attributes are not supported");
1540 static void geneve_link_config(struct net_device
*dev
,
1541 struct ip_tunnel_info
*info
, struct nlattr
*tb
[])
1543 struct geneve_dev
*geneve
= netdev_priv(dev
);
1547 geneve_change_mtu(dev
, nla_get_u32(tb
[IFLA_MTU
]));
1551 switch (ip_tunnel_info_af(info
)) {
1553 struct flowi4 fl4
= { .daddr
= info
->key
.u
.ipv4
.dst
};
1554 struct rtable
*rt
= ip_route_output_key(geneve
->net
, &fl4
);
1556 if (!IS_ERR(rt
) && rt
->dst
.dev
) {
1557 ldev_mtu
= rt
->dst
.dev
->mtu
- GENEVE_IPV4_HLEN
;
1562 #if IS_ENABLED(CONFIG_IPV6)
1564 struct rt6_info
*rt
;
1566 if (!__in6_dev_get(dev
))
1569 rt
= rt6_lookup(geneve
->net
, &info
->key
.u
.ipv6
.dst
, NULL
, 0,
1572 if (rt
&& rt
->dst
.dev
)
1573 ldev_mtu
= rt
->dst
.dev
->mtu
- GENEVE_IPV6_HLEN
;
1583 geneve_change_mtu(dev
, ldev_mtu
- info
->options_len
);
1586 static int geneve_newlink(struct net
*net
, struct net_device
*dev
,
1587 struct nlattr
*tb
[], struct nlattr
*data
[],
1588 struct netlink_ext_ack
*extack
)
1590 struct geneve_config cfg
= {
1591 .df
= GENEVE_DF_UNSET
,
1592 .use_udp6_rx_checksums
= false,
1593 .ttl_inherit
= false,
1594 .collect_md
= false,
1598 init_tnl_info(&cfg
.info
, GENEVE_UDP_PORT
);
1599 err
= geneve_nl2info(tb
, data
, extack
, &cfg
, false);
1603 err
= geneve_configure(net
, dev
, extack
, &cfg
);
1607 geneve_link_config(dev
, &cfg
.info
, tb
);
1612 /* Quiesces the geneve device data path for both TX and RX.
1614 * On transmit geneve checks for non-NULL geneve_sock before it proceeds.
1615 * So, if we set that socket to NULL under RCU and wait for synchronize_net()
1616 * to complete for the existing set of in-flight packets to be transmitted,
1617 * then we would have quiesced the transmit data path. All the future packets
1618 * will get dropped until we unquiesce the data path.
1620 * On receive geneve dereference the geneve_sock stashed in the socket. So,
1621 * if we set that to NULL under RCU and wait for synchronize_net() to
1622 * complete, then we would have quiesced the receive data path.
1624 static void geneve_quiesce(struct geneve_dev
*geneve
, struct geneve_sock
**gs4
,
1625 struct geneve_sock
**gs6
)
1627 *gs4
= rtnl_dereference(geneve
->sock4
);
1628 rcu_assign_pointer(geneve
->sock4
, NULL
);
1630 rcu_assign_sk_user_data((*gs4
)->sock
->sk
, NULL
);
1631 #if IS_ENABLED(CONFIG_IPV6)
1632 *gs6
= rtnl_dereference(geneve
->sock6
);
1633 rcu_assign_pointer(geneve
->sock6
, NULL
);
1635 rcu_assign_sk_user_data((*gs6
)->sock
->sk
, NULL
);
1642 /* Resumes the geneve device data path for both TX and RX. */
1643 static void geneve_unquiesce(struct geneve_dev
*geneve
, struct geneve_sock
*gs4
,
1644 struct geneve_sock __maybe_unused
*gs6
)
1646 rcu_assign_pointer(geneve
->sock4
, gs4
);
1648 rcu_assign_sk_user_data(gs4
->sock
->sk
, gs4
);
1649 #if IS_ENABLED(CONFIG_IPV6)
1650 rcu_assign_pointer(geneve
->sock6
, gs6
);
1652 rcu_assign_sk_user_data(gs6
->sock
->sk
, gs6
);
1657 static int geneve_changelink(struct net_device
*dev
, struct nlattr
*tb
[],
1658 struct nlattr
*data
[],
1659 struct netlink_ext_ack
*extack
)
1661 struct geneve_dev
*geneve
= netdev_priv(dev
);
1662 struct geneve_sock
*gs4
, *gs6
;
1663 struct geneve_config cfg
;
1666 /* If the geneve device is configured for metadata (or externally
1667 * controlled, for example, OVS), then nothing can be changed.
1669 if (geneve
->cfg
.collect_md
)
1672 /* Start with the existing info. */
1673 memcpy(&cfg
, &geneve
->cfg
, sizeof(cfg
));
1674 err
= geneve_nl2info(tb
, data
, extack
, &cfg
, true);
1678 if (!geneve_dst_addr_equal(&geneve
->cfg
.info
, &cfg
.info
)) {
1679 dst_cache_reset(&cfg
.info
.dst_cache
);
1680 geneve_link_config(dev
, &cfg
.info
, tb
);
1683 geneve_quiesce(geneve
, &gs4
, &gs6
);
1684 memcpy(&geneve
->cfg
, &cfg
, sizeof(cfg
));
1685 geneve_unquiesce(geneve
, gs4
, gs6
);
1690 static void geneve_dellink(struct net_device
*dev
, struct list_head
*head
)
1692 struct geneve_dev
*geneve
= netdev_priv(dev
);
1694 list_del(&geneve
->next
);
1695 unregister_netdevice_queue(dev
, head
);
1698 static size_t geneve_get_size(const struct net_device
*dev
)
1700 return nla_total_size(sizeof(__u32
)) + /* IFLA_GENEVE_ID */
1701 nla_total_size(sizeof(struct in6_addr
)) + /* IFLA_GENEVE_REMOTE{6} */
1702 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_TTL */
1703 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_TOS */
1704 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_DF */
1705 nla_total_size(sizeof(__be32
)) + /* IFLA_GENEVE_LABEL */
1706 nla_total_size(sizeof(__be16
)) + /* IFLA_GENEVE_PORT */
1707 nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */
1708 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_UDP_CSUM */
1709 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */
1710 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */
1711 nla_total_size(sizeof(__u8
)) + /* IFLA_GENEVE_TTL_INHERIT */
1715 static int geneve_fill_info(struct sk_buff
*skb
, const struct net_device
*dev
)
1717 struct geneve_dev
*geneve
= netdev_priv(dev
);
1718 struct ip_tunnel_info
*info
= &geneve
->cfg
.info
;
1719 bool ttl_inherit
= geneve
->cfg
.ttl_inherit
;
1720 bool metadata
= geneve
->cfg
.collect_md
;
1724 tunnel_id_to_vni(info
->key
.tun_id
, tmp_vni
);
1725 vni
= (tmp_vni
[0] << 16) | (tmp_vni
[1] << 8) | tmp_vni
[2];
1726 if (nla_put_u32(skb
, IFLA_GENEVE_ID
, vni
))
1727 goto nla_put_failure
;
1729 if (!metadata
&& ip_tunnel_info_af(info
) == AF_INET
) {
1730 if (nla_put_in_addr(skb
, IFLA_GENEVE_REMOTE
,
1731 info
->key
.u
.ipv4
.dst
))
1732 goto nla_put_failure
;
1733 if (nla_put_u8(skb
, IFLA_GENEVE_UDP_CSUM
,
1734 !!(info
->key
.tun_flags
& TUNNEL_CSUM
)))
1735 goto nla_put_failure
;
1737 #if IS_ENABLED(CONFIG_IPV6)
1738 } else if (!metadata
) {
1739 if (nla_put_in6_addr(skb
, IFLA_GENEVE_REMOTE6
,
1740 &info
->key
.u
.ipv6
.dst
))
1741 goto nla_put_failure
;
1742 if (nla_put_u8(skb
, IFLA_GENEVE_UDP_ZERO_CSUM6_TX
,
1743 !(info
->key
.tun_flags
& TUNNEL_CSUM
)))
1744 goto nla_put_failure
;
1748 if (nla_put_u8(skb
, IFLA_GENEVE_TTL
, info
->key
.ttl
) ||
1749 nla_put_u8(skb
, IFLA_GENEVE_TOS
, info
->key
.tos
) ||
1750 nla_put_be32(skb
, IFLA_GENEVE_LABEL
, info
->key
.label
))
1751 goto nla_put_failure
;
1753 if (nla_put_u8(skb
, IFLA_GENEVE_DF
, geneve
->cfg
.df
))
1754 goto nla_put_failure
;
1756 if (nla_put_be16(skb
, IFLA_GENEVE_PORT
, info
->key
.tp_dst
))
1757 goto nla_put_failure
;
1759 if (metadata
&& nla_put_flag(skb
, IFLA_GENEVE_COLLECT_METADATA
))
1760 goto nla_put_failure
;
1762 #if IS_ENABLED(CONFIG_IPV6)
1763 if (nla_put_u8(skb
, IFLA_GENEVE_UDP_ZERO_CSUM6_RX
,
1764 !geneve
->cfg
.use_udp6_rx_checksums
))
1765 goto nla_put_failure
;
1768 if (nla_put_u8(skb
, IFLA_GENEVE_TTL_INHERIT
, ttl_inherit
))
1769 goto nla_put_failure
;
1777 static struct rtnl_link_ops geneve_link_ops __read_mostly
= {
1779 .maxtype
= IFLA_GENEVE_MAX
,
1780 .policy
= geneve_policy
,
1781 .priv_size
= sizeof(struct geneve_dev
),
1782 .setup
= geneve_setup
,
1783 .validate
= geneve_validate
,
1784 .newlink
= geneve_newlink
,
1785 .changelink
= geneve_changelink
,
1786 .dellink
= geneve_dellink
,
1787 .get_size
= geneve_get_size
,
1788 .fill_info
= geneve_fill_info
,
1791 struct net_device
*geneve_dev_create_fb(struct net
*net
, const char *name
,
1792 u8 name_assign_type
, u16 dst_port
)
1794 struct nlattr
*tb
[IFLA_MAX
+ 1];
1795 struct net_device
*dev
;
1796 LIST_HEAD(list_kill
);
1798 struct geneve_config cfg
= {
1799 .df
= GENEVE_DF_UNSET
,
1800 .use_udp6_rx_checksums
= true,
1801 .ttl_inherit
= false,
1805 memset(tb
, 0, sizeof(tb
));
1806 dev
= rtnl_create_link(net
, name
, name_assign_type
,
1807 &geneve_link_ops
, tb
, NULL
);
1811 init_tnl_info(&cfg
.info
, dst_port
);
1812 err
= geneve_configure(net
, dev
, NULL
, &cfg
);
1815 return ERR_PTR(err
);
1818 /* openvswitch users expect packet sizes to be unrestricted,
1819 * so set the largest MTU we can.
1821 err
= geneve_change_mtu(dev
, IP_MAX_MTU
);
1825 err
= rtnl_configure_link(dev
, NULL
);
1831 geneve_dellink(dev
, &list_kill
);
1832 unregister_netdevice_many(&list_kill
);
1833 return ERR_PTR(err
);
1835 EXPORT_SYMBOL_GPL(geneve_dev_create_fb
);
1837 static int geneve_netdevice_event(struct notifier_block
*unused
,
1838 unsigned long event
, void *ptr
)
1840 struct net_device
*dev
= netdev_notifier_info_to_dev(ptr
);
1842 if (event
== NETDEV_UDP_TUNNEL_PUSH_INFO
||
1843 event
== NETDEV_UDP_TUNNEL_DROP_INFO
) {
1844 geneve_offload_rx_ports(dev
, event
== NETDEV_UDP_TUNNEL_PUSH_INFO
);
1845 } else if (event
== NETDEV_UNREGISTER
) {
1846 if (!dev
->udp_tunnel_nic_info
)
1847 geneve_offload_rx_ports(dev
, false);
1848 } else if (event
== NETDEV_REGISTER
) {
1849 if (!dev
->udp_tunnel_nic_info
)
1850 geneve_offload_rx_ports(dev
, true);
1856 static struct notifier_block geneve_notifier_block __read_mostly
= {
1857 .notifier_call
= geneve_netdevice_event
,
1860 static __net_init
int geneve_init_net(struct net
*net
)
1862 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
1864 INIT_LIST_HEAD(&gn
->geneve_list
);
1865 INIT_LIST_HEAD(&gn
->sock_list
);
1869 static void geneve_destroy_tunnels(struct net
*net
, struct list_head
*head
)
1871 struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
1872 struct geneve_dev
*geneve
, *next
;
1873 struct net_device
*dev
, *aux
;
1875 /* gather any geneve devices that were moved into this ns */
1876 for_each_netdev_safe(net
, dev
, aux
)
1877 if (dev
->rtnl_link_ops
== &geneve_link_ops
)
1878 unregister_netdevice_queue(dev
, head
);
1880 /* now gather any other geneve devices that were created in this ns */
1881 list_for_each_entry_safe(geneve
, next
, &gn
->geneve_list
, next
) {
1882 /* If geneve->dev is in the same netns, it was already added
1883 * to the list by the previous loop.
1885 if (!net_eq(dev_net(geneve
->dev
), net
))
1886 unregister_netdevice_queue(geneve
->dev
, head
);
1890 static void __net_exit
geneve_exit_batch_net(struct list_head
*net_list
)
1896 list_for_each_entry(net
, net_list
, exit_list
)
1897 geneve_destroy_tunnels(net
, &list
);
1899 /* unregister the devices gathered above */
1900 unregister_netdevice_many(&list
);
1903 list_for_each_entry(net
, net_list
, exit_list
) {
1904 const struct geneve_net
*gn
= net_generic(net
, geneve_net_id
);
1906 WARN_ON_ONCE(!list_empty(&gn
->sock_list
));
1910 static struct pernet_operations geneve_net_ops
= {
1911 .init
= geneve_init_net
,
1912 .exit_batch
= geneve_exit_batch_net
,
1913 .id
= &geneve_net_id
,
1914 .size
= sizeof(struct geneve_net
),
1917 static int __init
geneve_init_module(void)
1921 rc
= register_pernet_subsys(&geneve_net_ops
);
1925 rc
= register_netdevice_notifier(&geneve_notifier_block
);
1929 rc
= rtnl_link_register(&geneve_link_ops
);
1935 unregister_netdevice_notifier(&geneve_notifier_block
);
1937 unregister_pernet_subsys(&geneve_net_ops
);
1941 late_initcall(geneve_init_module
);
1943 static void __exit
geneve_cleanup_module(void)
1945 rtnl_link_unregister(&geneve_link_ops
);
1946 unregister_netdevice_notifier(&geneve_notifier_block
);
1947 unregister_pernet_subsys(&geneve_net_ops
);
1949 module_exit(geneve_cleanup_module
);
1951 MODULE_LICENSE("GPL");
1952 MODULE_VERSION(GENEVE_NETDEV_VER
);
1953 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>");
1954 MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic");
1955 MODULE_ALIAS_RTNL_LINK("geneve");