]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - drivers/net/geneve.c
Merge tag 'sh-pfc-for-v5.1-tag2' of git://git.kernel.org/pub/scm/linux/kernel/git...
[mirror_ubuntu-focal-kernel.git] / drivers / net / geneve.c
CommitLineData
2d07dc79
JL
1/*
2 * GENEVE: Generic Network Virtualization Encapsulation
3 *
4 * Copyright (c) 2015 Red Hat, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13#include <linux/kernel.h>
14#include <linux/module.h>
2d07dc79
JL
15#include <linux/etherdevice.h>
16#include <linux/hash.h>
e305ac6c 17#include <net/dst_metadata.h>
8e816df8 18#include <net/gro_cells.h>
2d07dc79
JL
19#include <net/rtnetlink.h>
20#include <net/geneve.h>
371bd106 21#include <net/protocol.h>
2d07dc79
JL
22
23#define GENEVE_NETDEV_VER "0.6"
24
25#define GENEVE_UDP_PORT 6081
26
27#define GENEVE_N_VID (1u << 24)
28#define GENEVE_VID_MASK (GENEVE_N_VID - 1)
29
30#define VNI_HASH_BITS 10
31#define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
32
33static bool log_ecn_error = true;
34module_param(log_ecn_error, bool, 0644);
35MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
36
371bd106
PS
37#define GENEVE_VER 0
38#define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
5edbea69
AK
39#define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN)
40#define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN)
371bd106 41
2d07dc79
JL
42/* per-network namespace private data for this module */
43struct geneve_net {
371bd106 44 struct list_head geneve_list;
371bd106 45 struct list_head sock_list;
2d07dc79
JL
46};
47
c7d03a00 48static unsigned int geneve_net_id;
371bd106 49
4b4c21fa
JB
50struct geneve_dev_node {
51 struct hlist_node hlist;
52 struct geneve_dev *geneve;
53};
54
2d07dc79
JL
55/* Pseudo network device */
56struct geneve_dev {
4b4c21fa
JB
57 struct geneve_dev_node hlist4; /* vni hash table for IPv4 socket */
58#if IS_ENABLED(CONFIG_IPV6)
59 struct geneve_dev_node hlist6; /* vni hash table for IPv6 socket */
60#endif
2d07dc79
JL
61 struct net *net; /* netns for packet i/o */
62 struct net_device *dev; /* netdev for geneve tunnel */
9b4437a5 63 struct ip_tunnel_info info;
fceb9c3e 64 struct geneve_sock __rcu *sock4; /* IPv4 socket used for geneve tunnel */
8ed66f0e 65#if IS_ENABLED(CONFIG_IPV6)
fceb9c3e 66 struct geneve_sock __rcu *sock6; /* IPv6 socket used for geneve tunnel */
8ed66f0e 67#endif
2d07dc79 68 struct list_head next; /* geneve's per namespace list */
8e816df8 69 struct gro_cells gro_cells;
9b4437a5 70 bool collect_md;
71 bool use_udp6_rx_checksums;
52d0d404 72 bool ttl_inherit;
a025fb5f 73 enum ifla_geneve_df df;
2d07dc79
JL
74};
75
371bd106
PS
76struct geneve_sock {
77 bool collect_md;
371bd106
PS
78 struct list_head list;
79 struct socket *sock;
80 struct rcu_head rcu;
81 int refcnt;
66d47003 82 struct hlist_head vni_list[VNI_HASH_SIZE];
371bd106 83};
2d07dc79
JL
84
85static inline __u32 geneve_net_vni_hash(u8 vni[3])
86{
87 __u32 vnid;
88
89 vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2];
90 return hash_32(vnid, VNI_HASH_BITS);
91}
92
e305ac6c
PS
93static __be64 vni_to_tunnel_id(const __u8 *vni)
94{
95#ifdef __BIG_ENDIAN
96 return (vni[0] << 16) | (vni[1] << 8) | vni[2];
97#else
98 return (__force __be64)(((__force u64)vni[0] << 40) |
99 ((__force u64)vni[1] << 48) |
100 ((__force u64)vni[2] << 56));
101#endif
102}
103
9b4437a5 104/* Convert 64 bit tunnel ID to 24 bit VNI. */
105static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
106{
107#ifdef __BIG_ENDIAN
108 vni[0] = (__force __u8)(tun_id >> 16);
109 vni[1] = (__force __u8)(tun_id >> 8);
110 vni[2] = (__force __u8)tun_id;
111#else
112 vni[0] = (__force __u8)((__force u64)tun_id >> 40);
113 vni[1] = (__force __u8)((__force u64)tun_id >> 48);
114 vni[2] = (__force __u8)((__force u64)tun_id >> 56);
115#endif
116}
117
2e0b26e1 118static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni)
119{
2e0b26e1 120 return !memcmp(vni, &tun_id[5], 3);
2e0b26e1 121}
122
1e9f12ec
JB
123static sa_family_t geneve_get_sk_family(struct geneve_sock *gs)
124{
125 return gs->sock->sk->sk_family;
126}
127
66d47003 128static struct geneve_dev *geneve_lookup(struct geneve_sock *gs,
371bd106 129 __be32 addr, u8 vni[])
2d07dc79 130{
2d07dc79 131 struct hlist_head *vni_list_head;
4b4c21fa 132 struct geneve_dev_node *node;
2d07dc79
JL
133 __u32 hash;
134
2d07dc79 135 /* Find the device for this VNI */
371bd106 136 hash = geneve_net_vni_hash(vni);
66d47003 137 vni_list_head = &gs->vni_list[hash];
4b4c21fa
JB
138 hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
139 if (eq_tun_id_and_vni((u8 *)&node->geneve->info.key.tun_id, vni) &&
140 addr == node->geneve->info.key.u.ipv4.dst)
141 return node->geneve;
8ed66f0e
JL
142 }
143 return NULL;
144}
145
146#if IS_ENABLED(CONFIG_IPV6)
147static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs,
148 struct in6_addr addr6, u8 vni[])
149{
150 struct hlist_head *vni_list_head;
4b4c21fa 151 struct geneve_dev_node *node;
8ed66f0e
JL
152 __u32 hash;
153
154 /* Find the device for this VNI */
155 hash = geneve_net_vni_hash(vni);
156 vni_list_head = &gs->vni_list[hash];
4b4c21fa
JB
157 hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
158 if (eq_tun_id_and_vni((u8 *)&node->geneve->info.key.tun_id, vni) &&
159 ipv6_addr_equal(&addr6, &node->geneve->info.key.u.ipv6.dst))
160 return node->geneve;
2d07dc79 161 }
e305ac6c
PS
162 return NULL;
163}
8ed66f0e 164#endif
e305ac6c 165
371bd106
PS
166static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
167{
168 return (struct genevehdr *)(udp_hdr(skb) + 1);
169}
170
9fc47545
JB
171static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs,
172 struct sk_buff *skb)
e305ac6c 173{
8ed66f0e 174 static u8 zero_vni[3];
9b4437a5 175 u8 *vni;
e305ac6c 176
1e9f12ec 177 if (geneve_get_sk_family(gs) == AF_INET) {
9fc47545 178 struct iphdr *iph;
9b4437a5 179 __be32 addr;
9fc47545 180
8ed66f0e 181 iph = ip_hdr(skb); /* outer IP header... */
371bd106 182
8ed66f0e
JL
183 if (gs->collect_md) {
184 vni = zero_vni;
185 addr = 0;
186 } else {
9fc47545 187 vni = geneve_hdr(skb)->vni;
8ed66f0e
JL
188 addr = iph->saddr;
189 }
190
9fc47545 191 return geneve_lookup(gs, addr, vni);
8ed66f0e 192#if IS_ENABLED(CONFIG_IPV6)
1e9f12ec 193 } else if (geneve_get_sk_family(gs) == AF_INET6) {
9b4437a5 194 static struct in6_addr zero_addr6;
9fc47545
JB
195 struct ipv6hdr *ip6h;
196 struct in6_addr addr6;
197
8ed66f0e 198 ip6h = ipv6_hdr(skb); /* outer IPv6 header... */
371bd106 199
8ed66f0e
JL
200 if (gs->collect_md) {
201 vni = zero_vni;
202 addr6 = zero_addr6;
203 } else {
9fc47545 204 vni = geneve_hdr(skb)->vni;
8ed66f0e
JL
205 addr6 = ip6h->saddr;
206 }
207
9fc47545 208 return geneve6_lookup(gs, addr6, vni);
8ed66f0e
JL
209#endif
210 }
9fc47545
JB
211 return NULL;
212}
213
214/* geneve receive/decap routine */
215static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
216 struct sk_buff *skb)
217{
218 struct genevehdr *gnvh = geneve_hdr(skb);
219 struct metadata_dst *tun_dst = NULL;
220 struct pcpu_sw_netstats *stats;
fe741e23 221 unsigned int len;
9fc47545
JB
222 int err = 0;
223 void *oiph;
2d07dc79 224
371bd106 225 if (ip_tunnel_collect_metadata() || gs->collect_md) {
e305ac6c 226 __be16 flags;
e305ac6c
PS
227
228 flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
229 (gnvh->oam ? TUNNEL_OAM : 0) |
230 (gnvh->critical ? TUNNEL_CRIT_OPT : 0);
231
1e9f12ec 232 tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags,
e305ac6c
PS
233 vni_to_tunnel_id(gnvh->vni),
234 gnvh->opt_len * 4);
fe741e23
GM
235 if (!tun_dst) {
236 geneve->dev->stats.rx_dropped++;
e305ac6c 237 goto drop;
fe741e23 238 }
e305ac6c 239 /* Update tunnel dst according to Geneve options. */
4c222798 240 ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
256c87c1
PJV
241 gnvh->options, gnvh->opt_len * 4,
242 TUNNEL_GENEVE_OPT);
e305ac6c
PS
243 } else {
244 /* Drop packets w/ critical options,
245 * since we don't support any...
246 */
fe741e23
GM
247 if (gnvh->critical) {
248 geneve->dev->stats.rx_frame_errors++;
249 geneve->dev->stats.rx_errors++;
e305ac6c 250 goto drop;
fe741e23 251 }
e305ac6c 252 }
2d07dc79
JL
253
254 skb_reset_mac_header(skb);
2d07dc79
JL
255 skb->protocol = eth_type_trans(skb, geneve->dev);
256 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
257
e305ac6c
PS
258 if (tun_dst)
259 skb_dst_set(skb, &tun_dst->dst);
260
2d07dc79 261 /* Ignore packet loops (and multicast echo) */
fe741e23
GM
262 if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) {
263 geneve->dev->stats.rx_errors++;
2d07dc79 264 goto drop;
fe741e23 265 }
2d07dc79 266
9fc47545 267 oiph = skb_network_header(skb);
2d07dc79
JL
268 skb_reset_network_header(skb);
269
9fc47545
JB
270 if (geneve_get_sk_family(gs) == AF_INET)
271 err = IP_ECN_decapsulate(oiph, skb);
8ed66f0e 272#if IS_ENABLED(CONFIG_IPV6)
9fc47545
JB
273 else
274 err = IP6_ECN_decapsulate(oiph, skb);
8ed66f0e 275#endif
2d07dc79
JL
276
277 if (unlikely(err)) {
8ed66f0e 278 if (log_ecn_error) {
9fc47545 279 if (geneve_get_sk_family(gs) == AF_INET)
8ed66f0e
JL
280 net_info_ratelimited("non-ECT from %pI4 "
281 "with TOS=%#x\n",
9fc47545
JB
282 &((struct iphdr *)oiph)->saddr,
283 ((struct iphdr *)oiph)->tos);
8ed66f0e 284#if IS_ENABLED(CONFIG_IPV6)
9fc47545 285 else
8ed66f0e 286 net_info_ratelimited("non-ECT from %pI6\n",
9fc47545 287 &((struct ipv6hdr *)oiph)->saddr);
8ed66f0e
JL
288#endif
289 }
2d07dc79
JL
290 if (err > 1) {
291 ++geneve->dev->stats.rx_frame_errors;
292 ++geneve->dev->stats.rx_errors;
293 goto drop;
294 }
295 }
296
fe741e23
GM
297 len = skb->len;
298 err = gro_cells_receive(&geneve->gro_cells, skb);
299 if (likely(err == NET_RX_SUCCESS)) {
300 stats = this_cpu_ptr(geneve->dev->tstats);
301 u64_stats_update_begin(&stats->syncp);
302 stats->rx_packets++;
303 stats->rx_bytes += len;
304 u64_stats_update_end(&stats->syncp);
305 }
2d07dc79
JL
306 return;
307drop:
308 /* Consume bad packet */
309 kfree_skb(skb);
310}
311
312/* Setup stats when device is created */
313static int geneve_init(struct net_device *dev)
314{
8e816df8
JG
315 struct geneve_dev *geneve = netdev_priv(dev);
316 int err;
317
2d07dc79
JL
318 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
319 if (!dev->tstats)
320 return -ENOMEM;
8e816df8
JG
321
322 err = gro_cells_init(&geneve->gro_cells, dev);
323 if (err) {
324 free_percpu(dev->tstats);
325 return err;
326 }
327
9b4437a5 328 err = dst_cache_init(&geneve->info.dst_cache, GFP_KERNEL);
468dfffc
PA
329 if (err) {
330 free_percpu(dev->tstats);
331 gro_cells_destroy(&geneve->gro_cells);
332 return err;
333 }
2d07dc79
JL
334 return 0;
335}
336
337static void geneve_uninit(struct net_device *dev)
338{
8e816df8
JG
339 struct geneve_dev *geneve = netdev_priv(dev);
340
9b4437a5 341 dst_cache_destroy(&geneve->info.dst_cache);
8e816df8 342 gro_cells_destroy(&geneve->gro_cells);
2d07dc79
JL
343 free_percpu(dev->tstats);
344}
345
371bd106
PS
346/* Callback from net/ipv4/udp.c to receive packets */
347static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
348{
349 struct genevehdr *geneveh;
9fc47545 350 struct geneve_dev *geneve;
371bd106
PS
351 struct geneve_sock *gs;
352 int opts_len;
353
fe741e23 354 /* Need UDP and Geneve header to be present */
371bd106 355 if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN)))
e5aed006 356 goto drop;
371bd106
PS
357
358 /* Return packets with reserved bits set */
359 geneveh = geneve_hdr(skb);
360 if (unlikely(geneveh->ver != GENEVE_VER))
e5aed006 361 goto drop;
371bd106
PS
362
363 if (unlikely(geneveh->proto_type != htons(ETH_P_TEB)))
e5aed006 364 goto drop;
371bd106 365
9fc47545
JB
366 gs = rcu_dereference_sk_user_data(sk);
367 if (!gs)
368 goto drop;
369
370 geneve = geneve_lookup_skb(gs, skb);
371 if (!geneve)
372 goto drop;
373
371bd106
PS
374 opts_len = geneveh->opt_len * 4;
375 if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
7f290c94 376 htons(ETH_P_TEB),
fe741e23
GM
377 !net_eq(geneve->net, dev_net(geneve->dev)))) {
378 geneve->dev->stats.rx_dropped++;
371bd106 379 goto drop;
fe741e23 380 }
371bd106 381
9fc47545 382 geneve_rx(geneve, gs, skb);
371bd106
PS
383 return 0;
384
385drop:
386 /* Consume bad packet */
387 kfree_skb(skb);
388 return 0;
371bd106
PS
389}
390
a0796644
SB
391/* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */
392static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb)
393{
394 struct genevehdr *geneveh;
395 struct geneve_sock *gs;
396 u8 zero_vni[3] = { 0 };
397 u8 *vni = zero_vni;
398
399 if (skb->len < GENEVE_BASE_HLEN)
400 return -EINVAL;
401
402 geneveh = geneve_hdr(skb);
403 if (geneveh->ver != GENEVE_VER)
404 return -EINVAL;
405
406 if (geneveh->proto_type != htons(ETH_P_TEB))
407 return -EINVAL;
408
409 gs = rcu_dereference_sk_user_data(sk);
410 if (!gs)
411 return -ENOENT;
412
413 if (geneve_get_sk_family(gs) == AF_INET) {
414 struct iphdr *iph = ip_hdr(skb);
415 __be32 addr4 = 0;
416
417 if (!gs->collect_md) {
418 vni = geneve_hdr(skb)->vni;
419 addr4 = iph->daddr;
420 }
421
422 return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT;
423 }
424
425#if IS_ENABLED(CONFIG_IPV6)
426 if (geneve_get_sk_family(gs) == AF_INET6) {
427 struct ipv6hdr *ip6h = ipv6_hdr(skb);
8a962c4a
NC
428 struct in6_addr addr6;
429
430 memset(&addr6, 0, sizeof(struct in6_addr));
a0796644
SB
431
432 if (!gs->collect_md) {
433 vni = geneve_hdr(skb)->vni;
434 addr6 = ip6h->daddr;
435 }
436
437 return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT;
438 }
439#endif
440
441 return -EPFNOSUPPORT;
442}
443
371bd106 444static struct socket *geneve_create_sock(struct net *net, bool ipv6,
9b4437a5 445 __be16 port, bool ipv6_rx_csum)
371bd106
PS
446{
447 struct socket *sock;
448 struct udp_port_cfg udp_conf;
449 int err;
450
451 memset(&udp_conf, 0, sizeof(udp_conf));
452
453 if (ipv6) {
454 udp_conf.family = AF_INET6;
8ed66f0e 455 udp_conf.ipv6_v6only = 1;
9b4437a5 456 udp_conf.use_udp6_rx_checksums = ipv6_rx_csum;
371bd106
PS
457 } else {
458 udp_conf.family = AF_INET;
459 udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
460 }
461
462 udp_conf.local_udp_port = port;
463
464 /* Open UDP socket */
465 err = udp_sock_create(net, &udp_conf, &sock);
466 if (err < 0)
467 return ERR_PTR(err);
468
469 return sock;
470}
471
371bd106
PS
472static int geneve_hlen(struct genevehdr *gh)
473{
474 return sizeof(*gh) + gh->opt_len * 4;
475}
476
d4546c25
DM
477static struct sk_buff *geneve_gro_receive(struct sock *sk,
478 struct list_head *head,
479 struct sk_buff *skb)
371bd106 480{
d4546c25
DM
481 struct sk_buff *pp = NULL;
482 struct sk_buff *p;
371bd106
PS
483 struct genevehdr *gh, *gh2;
484 unsigned int hlen, gh_len, off_gnv;
485 const struct packet_offload *ptype;
486 __be16 type;
487 int flush = 1;
488
489 off_gnv = skb_gro_offset(skb);
490 hlen = off_gnv + sizeof(*gh);
491 gh = skb_gro_header_fast(skb, off_gnv);
492 if (skb_gro_header_hard(skb, hlen)) {
493 gh = skb_gro_header_slow(skb, hlen, off_gnv);
494 if (unlikely(!gh))
495 goto out;
496 }
497
498 if (gh->ver != GENEVE_VER || gh->oam)
499 goto out;
500 gh_len = geneve_hlen(gh);
501
502 hlen = off_gnv + gh_len;
503 if (skb_gro_header_hard(skb, hlen)) {
504 gh = skb_gro_header_slow(skb, hlen, off_gnv);
505 if (unlikely(!gh))
506 goto out;
507 }
508
d4546c25 509 list_for_each_entry(p, head, list) {
371bd106
PS
510 if (!NAPI_GRO_CB(p)->same_flow)
511 continue;
512
513 gh2 = (struct genevehdr *)(p->data + off_gnv);
514 if (gh->opt_len != gh2->opt_len ||
515 memcmp(gh, gh2, gh_len)) {
516 NAPI_GRO_CB(p)->same_flow = 0;
517 continue;
518 }
519 }
520
521 type = gh->proto_type;
522
523 rcu_read_lock();
524 ptype = gro_find_receive_by_type(type);
c194cf93 525 if (!ptype)
371bd106 526 goto out_unlock;
371bd106
PS
527
528 skb_gro_pull(skb, gh_len);
529 skb_gro_postpull_rcsum(skb, gh, gh_len);
fcd91dd4 530 pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
c194cf93 531 flush = 0;
371bd106
PS
532
533out_unlock:
534 rcu_read_unlock();
535out:
603d4cf8 536 skb_gro_flush_final(skb, pp, flush);
371bd106
PS
537
538 return pp;
539}
540
4a0090a9
TH
541static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb,
542 int nhoff)
371bd106
PS
543{
544 struct genevehdr *gh;
545 struct packet_offload *ptype;
546 __be16 type;
547 int gh_len;
548 int err = -ENOSYS;
549
371bd106
PS
550 gh = (struct genevehdr *)(skb->data + nhoff);
551 gh_len = geneve_hlen(gh);
552 type = gh->proto_type;
553
554 rcu_read_lock();
555 ptype = gro_find_complete_by_type(type);
556 if (ptype)
557 err = ptype->callbacks.gro_complete(skb, nhoff + gh_len);
558
559 rcu_read_unlock();
229740c6
JR
560
561 skb_set_inner_mac_header(skb, nhoff + gh_len);
562
371bd106
PS
563 return err;
564}
565
566/* Create new listen socket if needed */
567static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
9b4437a5 568 bool ipv6, bool ipv6_rx_csum)
371bd106
PS
569{
570 struct geneve_net *gn = net_generic(net, geneve_net_id);
571 struct geneve_sock *gs;
572 struct socket *sock;
573 struct udp_tunnel_sock_cfg tunnel_cfg;
66d47003 574 int h;
371bd106
PS
575
576 gs = kzalloc(sizeof(*gs), GFP_KERNEL);
577 if (!gs)
578 return ERR_PTR(-ENOMEM);
579
9b4437a5 580 sock = geneve_create_sock(net, ipv6, port, ipv6_rx_csum);
371bd106
PS
581 if (IS_ERR(sock)) {
582 kfree(gs);
583 return ERR_CAST(sock);
584 }
585
586 gs->sock = sock;
587 gs->refcnt = 1;
66d47003
PS
588 for (h = 0; h < VNI_HASH_SIZE; ++h)
589 INIT_HLIST_HEAD(&gs->vni_list[h]);
371bd106
PS
590
591 /* Initialize the geneve udp offloads structure */
e7b3db5e 592 udp_tunnel_notify_add_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE);
371bd106
PS
593
594 /* Mark socket as an encapsulation socket */
4a0090a9 595 memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
371bd106
PS
596 tunnel_cfg.sk_user_data = gs;
597 tunnel_cfg.encap_type = 1;
4a0090a9
TH
598 tunnel_cfg.gro_receive = geneve_gro_receive;
599 tunnel_cfg.gro_complete = geneve_gro_complete;
371bd106 600 tunnel_cfg.encap_rcv = geneve_udp_encap_recv;
a0796644 601 tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup;
371bd106
PS
602 tunnel_cfg.encap_destroy = NULL;
603 setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
371bd106
PS
604 list_add(&gs->list, &gn->sock_list);
605 return gs;
606}
607
8ed66f0e 608static void __geneve_sock_release(struct geneve_sock *gs)
371bd106 609{
8ed66f0e 610 if (!gs || --gs->refcnt)
371bd106
PS
611 return;
612
613 list_del(&gs->list);
e7b3db5e 614 udp_tunnel_notify_del_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE);
371bd106
PS
615 udp_tunnel_sock_release(gs->sock);
616 kfree_rcu(gs, rcu);
617}
618
8ed66f0e
JL
619static void geneve_sock_release(struct geneve_dev *geneve)
620{
fceb9c3e 621 struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4);
8ed66f0e 622#if IS_ENABLED(CONFIG_IPV6)
fceb9c3e 623 struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6);
624
625 rcu_assign_pointer(geneve->sock6, NULL);
626#endif
627
628 rcu_assign_pointer(geneve->sock4, NULL);
629 synchronize_net();
630
631 __geneve_sock_release(gs4);
632#if IS_ENABLED(CONFIG_IPV6)
633 __geneve_sock_release(gs6);
8ed66f0e
JL
634#endif
635}
636
371bd106 637static struct geneve_sock *geneve_find_sock(struct geneve_net *gn,
8ed66f0e 638 sa_family_t family,
371bd106
PS
639 __be16 dst_port)
640{
641 struct geneve_sock *gs;
642
643 list_for_each_entry(gs, &gn->sock_list, list) {
644 if (inet_sk(gs->sock->sk)->inet_sport == dst_port &&
1e9f12ec 645 geneve_get_sk_family(gs) == family) {
371bd106
PS
646 return gs;
647 }
648 }
649 return NULL;
650}
651
8ed66f0e 652static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6)
2d07dc79 653{
2d07dc79 654 struct net *net = geneve->net;
371bd106 655 struct geneve_net *gn = net_generic(net, geneve_net_id);
4b4c21fa 656 struct geneve_dev_node *node;
2d07dc79 657 struct geneve_sock *gs;
9b4437a5 658 __u8 vni[3];
66d47003 659 __u32 hash;
2d07dc79 660
9b4437a5 661 gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->info.key.tp_dst);
371bd106
PS
662 if (gs) {
663 gs->refcnt++;
664 goto out;
665 }
666
9b4437a5 667 gs = geneve_socket_create(net, geneve->info.key.tp_dst, ipv6,
668 geneve->use_udp6_rx_checksums);
2d07dc79
JL
669 if (IS_ERR(gs))
670 return PTR_ERR(gs);
671
371bd106
PS
672out:
673 gs->collect_md = geneve->collect_md;
8ed66f0e 674#if IS_ENABLED(CONFIG_IPV6)
4b4c21fa 675 if (ipv6) {
fceb9c3e 676 rcu_assign_pointer(geneve->sock6, gs);
4b4c21fa
JB
677 node = &geneve->hlist6;
678 } else
8ed66f0e 679#endif
4b4c21fa 680 {
fceb9c3e 681 rcu_assign_pointer(geneve->sock4, gs);
4b4c21fa
JB
682 node = &geneve->hlist4;
683 }
684 node->geneve = geneve;
66d47003 685
9b4437a5 686 tunnel_id_to_vni(geneve->info.key.tun_id, vni);
687 hash = geneve_net_vni_hash(vni);
4b4c21fa 688 hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]);
2d07dc79
JL
689 return 0;
690}
691
8ed66f0e
JL
692static int geneve_open(struct net_device *dev)
693{
694 struct geneve_dev *geneve = netdev_priv(dev);
9b4437a5 695 bool ipv6 = !!(geneve->info.mode & IP_TUNNEL_INFO_IPV6);
8ed66f0e
JL
696 bool metadata = geneve->collect_md;
697 int ret = 0;
698
8ed66f0e 699#if IS_ENABLED(CONFIG_IPV6)
8ed66f0e
JL
700 if (ipv6 || metadata)
701 ret = geneve_sock_add(geneve, true);
702#endif
703 if (!ret && (!ipv6 || metadata))
704 ret = geneve_sock_add(geneve, false);
705 if (ret < 0)
706 geneve_sock_release(geneve);
707
708 return ret;
709}
710
2d07dc79
JL
711static int geneve_stop(struct net_device *dev)
712{
713 struct geneve_dev *geneve = netdev_priv(dev);
2d07dc79 714
4b4c21fa
JB
715 hlist_del_init_rcu(&geneve->hlist4.hlist);
716#if IS_ENABLED(CONFIG_IPV6)
717 hlist_del_init_rcu(&geneve->hlist6.hlist);
718#endif
8ed66f0e 719 geneve_sock_release(geneve);
371bd106
PS
720 return 0;
721}
722
8ed66f0e 723static void geneve_build_header(struct genevehdr *geneveh,
c3ef5aa5 724 const struct ip_tunnel_info *info)
8ed66f0e
JL
725{
726 geneveh->ver = GENEVE_VER;
c3ef5aa5 727 geneveh->opt_len = info->options_len / 4;
728 geneveh->oam = !!(info->key.tun_flags & TUNNEL_OAM);
729 geneveh->critical = !!(info->key.tun_flags & TUNNEL_CRIT_OPT);
8ed66f0e 730 geneveh->rsvd1 = 0;
c3ef5aa5 731 tunnel_id_to_vni(info->key.tun_id, geneveh->vni);
8ed66f0e
JL
732 geneveh->proto_type = htons(ETH_P_TEB);
733 geneveh->rsvd2 = 0;
734
256c87c1
PJV
735 if (info->key.tun_flags & TUNNEL_GENEVE_OPT)
736 ip_tunnel_info_opts_get(geneveh->options, info);
8ed66f0e
JL
737}
738
c3ef5aa5 739static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb,
740 const struct ip_tunnel_info *info,
741 bool xnet, int ip_hdr_len)
371bd106 742{
c3ef5aa5 743 bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
8ed66f0e
JL
744 struct genevehdr *gnvh;
745 int min_headroom;
746 int err;
747
c3ef5aa5 748 skb_reset_mac_header(skb);
8ed66f0e
JL
749 skb_scrub_packet(skb, xnet);
750
c3ef5aa5 751 min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len +
752 GENEVE_BASE_HLEN + info->options_len + ip_hdr_len;
8ed66f0e 753 err = skb_cow_head(skb, min_headroom);
aed069df 754 if (unlikely(err))
8ed66f0e 755 goto free_dst;
8ed66f0e 756
aed069df 757 err = udp_tunnel_handle_offloads(skb, udp_sum);
1ba64fac 758 if (err)
8ed66f0e 759 goto free_dst;
8ed66f0e 760
d58ff351 761 gnvh = __skb_push(skb, sizeof(*gnvh) + info->options_len);
c3ef5aa5 762 geneve_build_header(gnvh, info);
8ed66f0e
JL
763 skb_set_inner_protocol(skb, htons(ETH_P_TEB));
764 return 0;
765
766free_dst:
767 dst_release(dst);
768 return err;
769}
8ed66f0e
JL
770
771static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
772 struct net_device *dev,
5b861f6b 773 struct geneve_sock *gs4,
8ed66f0e 774 struct flowi4 *fl4,
c3ef5aa5 775 const struct ip_tunnel_info *info)
e305ac6c 776{
db3c6139 777 bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
e305ac6c 778 struct geneve_dev *geneve = netdev_priv(dev);
468dfffc 779 struct dst_cache *dst_cache;
e305ac6c
PS
780 struct rtable *rt = NULL;
781 __u8 tos;
782
5b861f6b 783 if (!gs4)
fceb9c3e 784 return ERR_PTR(-EIO);
785
e305ac6c
PS
786 memset(fl4, 0, sizeof(*fl4));
787 fl4->flowi4_mark = skb->mark;
788 fl4->flowi4_proto = IPPROTO_UDP;
9b4437a5 789 fl4->daddr = info->key.u.ipv4.dst;
790 fl4->saddr = info->key.u.ipv4.src;
e305ac6c 791
9b4437a5 792 tos = info->key.tos;
793 if ((tos == 1) && !geneve->collect_md) {
794 tos = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
795 use_cache = false;
468dfffc 796 }
9b4437a5 797 fl4->flowi4_tos = RT_TOS(tos);
468dfffc 798
c3ef5aa5 799 dst_cache = (struct dst_cache *)&info->dst_cache;
468dfffc
PA
800 if (use_cache) {
801 rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
802 if (rt)
803 return rt;
e305ac6c 804 }
e305ac6c
PS
805 rt = ip_route_output_key(geneve->net, fl4);
806 if (IS_ERR(rt)) {
807 netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr);
fc4099f1 808 return ERR_PTR(-ENETUNREACH);
e305ac6c
PS
809 }
810 if (rt->dst.dev == dev) { /* is this necessary? */
811 netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr);
e305ac6c 812 ip_rt_put(rt);
fc4099f1 813 return ERR_PTR(-ELOOP);
e305ac6c 814 }
468dfffc
PA
815 if (use_cache)
816 dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr);
e305ac6c
PS
817 return rt;
818}
819
8ed66f0e
JL
820#if IS_ENABLED(CONFIG_IPV6)
821static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
822 struct net_device *dev,
5b861f6b 823 struct geneve_sock *gs6,
8ed66f0e 824 struct flowi6 *fl6,
c3ef5aa5 825 const struct ip_tunnel_info *info)
8ed66f0e 826{
db3c6139 827 bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
8ed66f0e 828 struct geneve_dev *geneve = netdev_priv(dev);
8ed66f0e 829 struct dst_entry *dst = NULL;
468dfffc 830 struct dst_cache *dst_cache;
3a56f86f 831 __u8 prio;
8ed66f0e 832
fceb9c3e 833 if (!gs6)
834 return ERR_PTR(-EIO);
835
8ed66f0e
JL
836 memset(fl6, 0, sizeof(*fl6));
837 fl6->flowi6_mark = skb->mark;
838 fl6->flowi6_proto = IPPROTO_UDP;
9b4437a5 839 fl6->daddr = info->key.u.ipv6.dst;
840 fl6->saddr = info->key.u.ipv6.src;
841 prio = info->key.tos;
842 if ((prio == 1) && !geneve->collect_md) {
843 prio = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
844 use_cache = false;
468dfffc
PA
845 }
846
9b4437a5 847 fl6->flowlabel = ip6_make_flowinfo(RT_TOS(prio),
848 info->key.label);
c3ef5aa5 849 dst_cache = (struct dst_cache *)&info->dst_cache;
468dfffc
PA
850 if (use_cache) {
851 dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
852 if (dst)
853 return dst;
8ed66f0e 854 }
8ed66f0e
JL
855 if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) {
856 netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr);
857 return ERR_PTR(-ENETUNREACH);
858 }
859 if (dst->dev == dev) { /* is this necessary? */
860 netdev_dbg(dev, "circular route to %pI6\n", &fl6->daddr);
861 dst_release(dst);
862 return ERR_PTR(-ELOOP);
863 }
864
468dfffc
PA
865 if (use_cache)
866 dst_cache_set_ip6(dst_cache, dst, &fl6->saddr);
8ed66f0e
JL
867 return dst;
868}
869#endif
870
9b4437a5 871static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
c3ef5aa5 872 struct geneve_dev *geneve,
873 const struct ip_tunnel_info *info)
2d07dc79 874{
9b4437a5 875 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
876 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
877 const struct ip_tunnel_key *key = &info->key;
878 struct rtable *rt;
2d07dc79 879 struct flowi4 fl4;
8760ce58 880 __u8 tos, ttl;
a025fb5f 881 __be16 df = 0;
e305ac6c 882 __be16 sport;
bcceeec3 883 int err;
980c394c 884
5b861f6b 885 rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info);
9b4437a5 886 if (IS_ERR(rt))
887 return PTR_ERR(rt);
371bd106 888
6b4f92af
SB
889 skb_tunnel_check_pmtu(skb, &rt->dst,
890 GENEVE_IPV4_HLEN + info->options_len);
52a589d5 891
371bd106 892 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
9b4437a5 893 if (geneve->collect_md) {
894 tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
371bd106 895 ttl = key->ttl;
a025fb5f
SB
896
897 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
e305ac6c 898 } else {
9b4437a5 899 tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, ip_hdr(skb), skb);
52d0d404
HL
900 if (geneve->ttl_inherit)
901 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
902 else
903 ttl = key->ttl;
904 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
a025fb5f
SB
905
906 if (geneve->df == GENEVE_DF_SET) {
907 df = htons(IP_DF);
908 } else if (geneve->df == GENEVE_DF_INHERIT) {
909 struct ethhdr *eth = eth_hdr(skb);
910
911 if (ntohs(eth->h_proto) == ETH_P_IPV6) {
912 df = htons(IP_DF);
913 } else if (ntohs(eth->h_proto) == ETH_P_IP) {
914 struct iphdr *iph = ip_hdr(skb);
915
916 if (iph->frag_off & htons(IP_DF))
917 df = htons(IP_DF);
918 }
919 }
2d07dc79 920 }
2d07dc79 921
c3ef5aa5 922 err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr));
9b4437a5 923 if (unlikely(err))
924 return err;
efeb2267 925
9b4437a5 926 udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr,
927 tos, ttl, df, sport, geneve->info.key.tp_dst,
928 !net_eq(geneve->net, dev_net(geneve->dev)),
929 !(info->key.tun_flags & TUNNEL_CSUM));
930 return 0;
2d07dc79
JL
931}
932
8ed66f0e 933#if IS_ENABLED(CONFIG_IPV6)
9b4437a5 934static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
c3ef5aa5 935 struct geneve_dev *geneve,
936 const struct ip_tunnel_info *info)
8ed66f0e 937{
9b4437a5 938 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
939 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
940 const struct ip_tunnel_key *key = &info->key;
8ed66f0e 941 struct dst_entry *dst = NULL;
8ed66f0e 942 struct flowi6 fl6;
3a56f86f 943 __u8 prio, ttl;
8ed66f0e 944 __be16 sport;
bcceeec3 945 int err;
8ed66f0e 946
5b861f6b 947 dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info);
9b4437a5 948 if (IS_ERR(dst))
949 return PTR_ERR(dst);
8ed66f0e 950
6b4f92af 951 skb_tunnel_check_pmtu(skb, dst, GENEVE_IPV6_HLEN + info->options_len);
52a589d5 952
8ed66f0e 953 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
9b4437a5 954 if (geneve->collect_md) {
955 prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
956 ttl = key->ttl;
957 } else {
958 prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel),
959 ip_hdr(skb), skb);
52d0d404
HL
960 if (geneve->ttl_inherit)
961 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
962 else
963 ttl = key->ttl;
964 ttl = ttl ? : ip6_dst_hoplimit(dst);
9b4437a5 965 }
31ac1c19 966 err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr));
9b4437a5 967 if (unlikely(err))
968 return err;
8ed66f0e 969
9b4437a5 970 udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev,
971 &fl6.saddr, &fl6.daddr, prio, ttl,
972 info->key.label, sport, geneve->info.key.tp_dst,
973 !(info->key.tun_flags & TUNNEL_CSUM));
974 return 0;
975}
976#endif
8ed66f0e 977
9b4437a5 978static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
979{
980 struct geneve_dev *geneve = netdev_priv(dev);
981 struct ip_tunnel_info *info = NULL;
982 int err;
abe492b4 983
9b4437a5 984 if (geneve->collect_md) {
985 info = skb_tunnel_info(skb);
986 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) {
987 err = -EINVAL;
988 netdev_dbg(dev, "no tunnel metadata\n");
aed069df 989 goto tx_error;
9b4437a5 990 }
8ed66f0e 991 } else {
9b4437a5 992 info = &geneve->info;
8ed66f0e 993 }
8eb3b995 994
a717e3f7 995 rcu_read_lock();
9b4437a5 996#if IS_ENABLED(CONFIG_IPV6)
997 if (info->mode & IP_TUNNEL_INFO_IPV6)
998 err = geneve6_xmit_skb(skb, dev, geneve, info);
999 else
1000#endif
1001 err = geneve_xmit_skb(skb, dev, geneve, info);
a717e3f7 1002 rcu_read_unlock();
8ed66f0e 1003
9b4437a5 1004 if (likely(!err))
1005 return NETDEV_TX_OK;
8ed66f0e
JL
1006tx_error:
1007 dev_kfree_skb(skb);
aed069df 1008
8ed66f0e
JL
1009 if (err == -ELOOP)
1010 dev->stats.collisions++;
1011 else if (err == -ENETUNREACH)
1012 dev->stats.tx_carrier_errors++;
efeb2267
HY
1013
1014 dev->stats.tx_errors++;
8ed66f0e
JL
1015 return NETDEV_TX_OK;
1016}
8ed66f0e 1017
91572088 1018static int geneve_change_mtu(struct net_device *dev, int new_mtu)
55e5bfb5 1019{
91572088
JW
1020 if (new_mtu > dev->max_mtu)
1021 new_mtu = dev->max_mtu;
321acc1c
AK
1022 else if (new_mtu < dev->min_mtu)
1023 new_mtu = dev->min_mtu;
aeee0e66 1024
55e5bfb5
DW
1025 dev->mtu = new_mtu;
1026 return 0;
1027}
1028
fc4099f1
PS
1029static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
1030{
1031 struct ip_tunnel_info *info = skb_tunnel_info(skb);
1032 struct geneve_dev *geneve = netdev_priv(dev);
fc4099f1 1033
b8812fa8 1034 if (ip_tunnel_info_af(info) == AF_INET) {
9b4437a5 1035 struct rtable *rt;
1036 struct flowi4 fl4;
5b861f6b 1037 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
9b4437a5 1038
5b861f6b 1039 rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info);
b8812fa8
JL
1040 if (IS_ERR(rt))
1041 return PTR_ERR(rt);
fc4099f1 1042
b8812fa8
JL
1043 ip_rt_put(rt);
1044 info->key.u.ipv4.src = fl4.saddr;
1045#if IS_ENABLED(CONFIG_IPV6)
1046 } else if (ip_tunnel_info_af(info) == AF_INET6) {
9b4437a5 1047 struct dst_entry *dst;
1048 struct flowi6 fl6;
5b861f6b 1049 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
9b4437a5 1050
5b861f6b 1051 dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info);
b8812fa8
JL
1052 if (IS_ERR(dst))
1053 return PTR_ERR(dst);
1054
1055 dst_release(dst);
1056 info->key.u.ipv6.src = fl6.saddr;
1057#endif
1058 } else {
1059 return -EINVAL;
1060 }
fc4099f1 1061
fc4099f1
PS
1062 info->key.tp_src = udp_flow_src_port(geneve->net, skb,
1063 1, USHRT_MAX, true);
9b4437a5 1064 info->key.tp_dst = geneve->info.key.tp_dst;
fc4099f1
PS
1065 return 0;
1066}
1067
2d07dc79
JL
1068static const struct net_device_ops geneve_netdev_ops = {
1069 .ndo_init = geneve_init,
1070 .ndo_uninit = geneve_uninit,
1071 .ndo_open = geneve_open,
1072 .ndo_stop = geneve_stop,
1073 .ndo_start_xmit = geneve_xmit,
1074 .ndo_get_stats64 = ip_tunnel_get_stats64,
55e5bfb5 1075 .ndo_change_mtu = geneve_change_mtu,
2d07dc79
JL
1076 .ndo_validate_addr = eth_validate_addr,
1077 .ndo_set_mac_address = eth_mac_addr,
fc4099f1 1078 .ndo_fill_metadata_dst = geneve_fill_metadata_dst,
2d07dc79
JL
1079};
1080
1081static void geneve_get_drvinfo(struct net_device *dev,
1082 struct ethtool_drvinfo *drvinfo)
1083{
1084 strlcpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version));
1085 strlcpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver));
1086}
1087
1088static const struct ethtool_ops geneve_ethtool_ops = {
1089 .get_drvinfo = geneve_get_drvinfo,
1090 .get_link = ethtool_op_get_link,
1091};
1092
1093/* Info for udev, that this is a virtual tunnel endpoint */
1094static struct device_type geneve_type = {
1095 .name = "geneve",
1096};
1097
e5de25dc 1098/* Calls the ndo_udp_tunnel_add of the caller in order to
05ca4029 1099 * supply the listening GENEVE udp ports. Callers are expected
e5de25dc 1100 * to implement the ndo_udp_tunnel_add.
05ca4029 1101 */
2d2b13fc 1102static void geneve_offload_rx_ports(struct net_device *dev, bool push)
05ca4029
SA
1103{
1104 struct net *net = dev_net(dev);
1105 struct geneve_net *gn = net_generic(net, geneve_net_id);
1106 struct geneve_sock *gs;
681e683f 1107
05ca4029 1108 rcu_read_lock();
2d2b13fc
SD
1109 list_for_each_entry_rcu(gs, &gn->sock_list, list) {
1110 if (push) {
1111 udp_tunnel_push_rx_port(dev, gs->sock,
1112 UDP_TUNNEL_TYPE_GENEVE);
1113 } else {
1114 udp_tunnel_drop_rx_port(dev, gs->sock,
1115 UDP_TUNNEL_TYPE_GENEVE);
1116 }
1117 }
05ca4029
SA
1118 rcu_read_unlock();
1119}
05ca4029 1120
2d07dc79
JL
1121/* Initialize the device structure. */
1122static void geneve_setup(struct net_device *dev)
1123{
1124 ether_setup(dev);
1125
1126 dev->netdev_ops = &geneve_netdev_ops;
1127 dev->ethtool_ops = &geneve_ethtool_ops;
cf124db5 1128 dev->needs_free_netdev = true;
2d07dc79
JL
1129
1130 SET_NETDEV_DEVTYPE(dev, &geneve_type);
1131
2d07dc79
JL
1132 dev->features |= NETIF_F_LLTX;
1133 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
1134 dev->features |= NETIF_F_RXCSUM;
1135 dev->features |= NETIF_F_GSO_SOFTWARE;
1136
2d07dc79
JL
1137 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
1138 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
2d07dc79 1139
91572088
JW
1140 /* MTU range: 68 - (something less than 65535) */
1141 dev->min_mtu = ETH_MIN_MTU;
1142 /* The max_mtu calculation does not take account of GENEVE
1143 * options, to avoid excluding potentially valid
1144 * configurations. This will be further reduced by IPvX hdr size.
1145 */
1146 dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len;
1147
2d07dc79 1148 netif_keep_dst(dev);
fc41cdb3 1149 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
ed961ac2 1150 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
87cd3dca 1151 eth_hw_addr_random(dev);
2d07dc79
JL
1152}
1153
1154static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
1155 [IFLA_GENEVE_ID] = { .type = NLA_U32 },
1156 [IFLA_GENEVE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
8ed66f0e 1157 [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) },
8760ce58 1158 [IFLA_GENEVE_TTL] = { .type = NLA_U8 },
d8951125 1159 [IFLA_GENEVE_TOS] = { .type = NLA_U8 },
8eb3b995 1160 [IFLA_GENEVE_LABEL] = { .type = NLA_U32 },
cd7918b3 1161 [IFLA_GENEVE_PORT] = { .type = NLA_U16 },
e305ac6c 1162 [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG },
abe492b4
TH
1163 [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 },
1164 [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 },
1165 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
52d0d404 1166 [IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 },
a025fb5f 1167 [IFLA_GENEVE_DF] = { .type = NLA_U8 },
2d07dc79
JL
1168};
1169
a8b8a889
MS
1170static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
1171 struct netlink_ext_ack *extack)
2d07dc79
JL
1172{
1173 if (tb[IFLA_ADDRESS]) {
c5ebc440
GM
1174 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
1175 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
1176 "Provided link layer address is not Ethernet");
2d07dc79 1177 return -EINVAL;
c5ebc440 1178 }
2d07dc79 1179
c5ebc440
GM
1180 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
1181 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
1182 "Provided Ethernet address is not unicast");
2d07dc79 1183 return -EADDRNOTAVAIL;
c5ebc440 1184 }
2d07dc79
JL
1185 }
1186
c5ebc440
GM
1187 if (!data) {
1188 NL_SET_ERR_MSG(extack,
1189 "Not enough attributes provided to perform the operation");
2d07dc79 1190 return -EINVAL;
c5ebc440 1191 }
2d07dc79
JL
1192
1193 if (data[IFLA_GENEVE_ID]) {
1194 __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]);
1195
c5ebc440
GM
1196 if (vni >= GENEVE_N_VID) {
1197 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_ID],
1198 "Geneve ID must be lower than 16777216");
2d07dc79 1199 return -ERANGE;
c5ebc440 1200 }
2d07dc79
JL
1201 }
1202
a025fb5f
SB
1203 if (data[IFLA_GENEVE_DF]) {
1204 enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]);
1205
1206 if (df < 0 || df > GENEVE_DF_MAX) {
1207 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_GENEVE_DF],
1208 "Invalid DF attribute");
1209 return -EINVAL;
1210 }
1211 }
1212
2d07dc79
JL
1213 return 0;
1214}
1215
371bd106 1216static struct geneve_dev *geneve_find_dev(struct geneve_net *gn,
9b4437a5 1217 const struct ip_tunnel_info *info,
371bd106
PS
1218 bool *tun_on_same_port,
1219 bool *tun_collect_md)
1220{
9b4437a5 1221 struct geneve_dev *geneve, *t = NULL;
371bd106
PS
1222
1223 *tun_on_same_port = false;
1224 *tun_collect_md = false;
371bd106 1225 list_for_each_entry(geneve, &gn->geneve_list, next) {
9b4437a5 1226 if (info->key.tp_dst == geneve->info.key.tp_dst) {
371bd106
PS
1227 *tun_collect_md = geneve->collect_md;
1228 *tun_on_same_port = true;
1229 }
9b4437a5 1230 if (info->key.tun_id == geneve->info.key.tun_id &&
1231 info->key.tp_dst == geneve->info.key.tp_dst &&
1232 !memcmp(&info->key.u, &geneve->info.key.u, sizeof(info->key.u)))
371bd106
PS
1233 t = geneve;
1234 }
1235 return t;
1236}
1237
9b4437a5 1238static bool is_tnl_info_zero(const struct ip_tunnel_info *info)
1239{
3fa5f11d
SB
1240 return !(info->key.tun_id || info->key.tun_flags || info->key.tos ||
1241 info->key.ttl || info->key.label || info->key.tp_src ||
1242 memchr_inv(&info->key.u, 0, sizeof(info->key.u)));
9b4437a5 1243}
1244
5b861f6b
GM
1245static bool geneve_dst_addr_equal(struct ip_tunnel_info *a,
1246 struct ip_tunnel_info *b)
1247{
1248 if (ip_tunnel_info_af(a) == AF_INET)
1249 return a->key.u.ipv4.dst == b->key.u.ipv4.dst;
1250 else
1251 return ipv6_addr_equal(&a->key.u.ipv6.dst, &b->key.u.ipv6.dst);
1252}
1253
e305ac6c 1254static int geneve_configure(struct net *net, struct net_device *dev,
c5ebc440 1255 struct netlink_ext_ack *extack,
9b4437a5 1256 const struct ip_tunnel_info *info,
52d0d404 1257 bool metadata, bool ipv6_rx_csum,
a025fb5f 1258 bool ttl_inherit, enum ifla_geneve_df df)
2d07dc79
JL
1259{
1260 struct geneve_net *gn = net_generic(net, geneve_net_id);
371bd106
PS
1261 struct geneve_dev *t, *geneve = netdev_priv(dev);
1262 bool tun_collect_md, tun_on_same_port;
184fc8b5 1263 int err, encap_len;
2d07dc79 1264
c5ebc440
GM
1265 if (metadata && !is_tnl_info_zero(info)) {
1266 NL_SET_ERR_MSG(extack,
1267 "Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified");
8ed66f0e 1268 return -EINVAL;
c5ebc440 1269 }
2d07dc79
JL
1270
1271 geneve->net = net;
1272 geneve->dev = dev;
1273
9b4437a5 1274 t = geneve_find_dev(gn, info, &tun_on_same_port, &tun_collect_md);
371bd106
PS
1275 if (t)
1276 return -EBUSY;
1277
184fc8b5
PA
1278 /* make enough headroom for basic scenario */
1279 encap_len = GENEVE_BASE_HLEN + ETH_HLEN;
9a1c44d9 1280 if (!metadata && ip_tunnel_info_af(info) == AF_INET) {
184fc8b5 1281 encap_len += sizeof(struct iphdr);
91572088
JW
1282 dev->max_mtu -= sizeof(struct iphdr);
1283 } else {
184fc8b5 1284 encap_len += sizeof(struct ipv6hdr);
91572088
JW
1285 dev->max_mtu -= sizeof(struct ipv6hdr);
1286 }
184fc8b5
PA
1287 dev->needed_headroom = encap_len + ETH_HLEN;
1288
371bd106 1289 if (metadata) {
c5ebc440
GM
1290 if (tun_on_same_port) {
1291 NL_SET_ERR_MSG(extack,
1292 "There can be only one externally controlled device on a destination port");
371bd106 1293 return -EPERM;
c5ebc440 1294 }
371bd106 1295 } else {
c5ebc440
GM
1296 if (tun_collect_md) {
1297 NL_SET_ERR_MSG(extack,
1298 "There already exists an externally controlled device on this destination port");
371bd106 1299 return -EPERM;
c5ebc440 1300 }
371bd106
PS
1301 }
1302
9b4437a5 1303 dst_cache_reset(&geneve->info.dst_cache);
1304 geneve->info = *info;
1305 geneve->collect_md = metadata;
1306 geneve->use_udp6_rx_checksums = ipv6_rx_csum;
52d0d404 1307 geneve->ttl_inherit = ttl_inherit;
a025fb5f 1308 geneve->df = df;
468dfffc 1309
2d07dc79
JL
1310 err = register_netdevice(dev);
1311 if (err)
1312 return err;
1313
e305ac6c 1314 list_add(&geneve->next, &gn->geneve_list);
e305ac6c
PS
1315 return 0;
1316}
1317
9b4437a5 1318static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port)
1319{
1320 memset(info, 0, sizeof(*info));
1321 info->key.tp_dst = htons(dst_port);
1322}
1323
c5ebc440
GM
1324static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
1325 struct netlink_ext_ack *extack,
1326 struct ip_tunnel_info *info, bool *metadata,
52d0d404 1327 bool *use_udp6_rx_checksums, bool *ttl_inherit,
a025fb5f 1328 enum ifla_geneve_df *df, bool changelink)
e305ac6c 1329{
c5ebc440
GM
1330 int attrtype;
1331
1332 if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) {
1333 NL_SET_ERR_MSG(extack,
1334 "Cannot specify both IPv4 and IPv6 Remote addresses");
8ed66f0e 1335 return -EINVAL;
c5ebc440 1336 }
8ed66f0e
JL
1337
1338 if (data[IFLA_GENEVE_REMOTE]) {
c5ebc440
GM
1339 if (changelink && (ip_tunnel_info_af(info) == AF_INET6)) {
1340 attrtype = IFLA_GENEVE_REMOTE;
1341 goto change_notsup;
1342 }
5b861f6b
GM
1343
1344 info->key.u.ipv4.dst =
8ed66f0e 1345 nla_get_in_addr(data[IFLA_GENEVE_REMOTE]);
9b4437a5 1346
5b861f6b 1347 if (IN_MULTICAST(ntohl(info->key.u.ipv4.dst))) {
c5ebc440
GM
1348 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE],
1349 "Remote IPv4 address cannot be Multicast");
9b4437a5 1350 return -EINVAL;
1351 }
8ed66f0e
JL
1352 }
1353
1354 if (data[IFLA_GENEVE_REMOTE6]) {
4c52a889 1355#if IS_ENABLED(CONFIG_IPV6)
c5ebc440
GM
1356 if (changelink && (ip_tunnel_info_af(info) == AF_INET)) {
1357 attrtype = IFLA_GENEVE_REMOTE6;
1358 goto change_notsup;
1359 }
5b861f6b
GM
1360
1361 info->mode = IP_TUNNEL_INFO_IPV6;
1362 info->key.u.ipv6.dst =
8ed66f0e
JL
1363 nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]);
1364
5b861f6b 1365 if (ipv6_addr_type(&info->key.u.ipv6.dst) &
8ed66f0e 1366 IPV6_ADDR_LINKLOCAL) {
c5ebc440
GM
1367 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
1368 "Remote IPv6 address cannot be link-local");
8ed66f0e
JL
1369 return -EINVAL;
1370 }
5b861f6b 1371 if (ipv6_addr_is_multicast(&info->key.u.ipv6.dst)) {
c5ebc440
GM
1372 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
1373 "Remote IPv6 address cannot be Multicast");
9b4437a5 1374 return -EINVAL;
1375 }
5b861f6b
GM
1376 info->key.tun_flags |= TUNNEL_CSUM;
1377 *use_udp6_rx_checksums = true;
9b4437a5 1378#else
c5ebc440
GM
1379 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
1380 "IPv6 support not enabled in the kernel");
9b4437a5 1381 return -EPFNOSUPPORT;
1382#endif
8ed66f0e
JL
1383 }
1384
9b4437a5 1385 if (data[IFLA_GENEVE_ID]) {
1386 __u32 vni;
1387 __u8 tvni[3];
5b861f6b 1388 __be64 tunid;
9b4437a5 1389
e277de5f 1390 vni = nla_get_u32(data[IFLA_GENEVE_ID]);
9b4437a5 1391 tvni[0] = (vni & 0x00ff0000) >> 16;
1392 tvni[1] = (vni & 0x0000ff00) >> 8;
1393 tvni[2] = vni & 0x000000ff;
e305ac6c 1394
5b861f6b 1395 tunid = vni_to_tunnel_id(tvni);
c5ebc440
GM
1396 if (changelink && (tunid != info->key.tun_id)) {
1397 attrtype = IFLA_GENEVE_ID;
1398 goto change_notsup;
1399 }
5b861f6b 1400 info->key.tun_id = tunid;
9b4437a5 1401 }
5b861f6b 1402
a97d97ba
HL
1403 if (data[IFLA_GENEVE_TTL_INHERIT]) {
1404 if (nla_get_u8(data[IFLA_GENEVE_TTL_INHERIT]))
1405 *ttl_inherit = true;
1406 else
1407 *ttl_inherit = false;
1408 } else if (data[IFLA_GENEVE_TTL]) {
5b861f6b 1409 info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
a97d97ba
HL
1410 *ttl_inherit = false;
1411 }
52d0d404 1412
d8951125 1413 if (data[IFLA_GENEVE_TOS])
5b861f6b 1414 info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
d8951125 1415
a025fb5f
SB
1416 if (data[IFLA_GENEVE_DF])
1417 *df = nla_get_u8(data[IFLA_GENEVE_DF]);
1418
9b4437a5 1419 if (data[IFLA_GENEVE_LABEL]) {
5b861f6b 1420 info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
9b4437a5 1421 IPV6_FLOWLABEL_MASK;
c5ebc440
GM
1422 if (info->key.label && (!(info->mode & IP_TUNNEL_INFO_IPV6))) {
1423 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LABEL],
1424 "Label attribute only applies for IPv6 Geneve devices");
9b4437a5 1425 return -EINVAL;
c5ebc440 1426 }
9b4437a5 1427 }
8eb3b995 1428
5b861f6b 1429 if (data[IFLA_GENEVE_PORT]) {
c5ebc440
GM
1430 if (changelink) {
1431 attrtype = IFLA_GENEVE_PORT;
1432 goto change_notsup;
1433 }
5b861f6b
GM
1434 info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]);
1435 }
2d07dc79 1436
5b861f6b 1437 if (data[IFLA_GENEVE_COLLECT_METADATA]) {
c5ebc440
GM
1438 if (changelink) {
1439 attrtype = IFLA_GENEVE_COLLECT_METADATA;
1440 goto change_notsup;
1441 }
5b861f6b
GM
1442 *metadata = true;
1443 }
2d07dc79 1444
5b861f6b 1445 if (data[IFLA_GENEVE_UDP_CSUM]) {
c5ebc440
GM
1446 if (changelink) {
1447 attrtype = IFLA_GENEVE_UDP_CSUM;
1448 goto change_notsup;
1449 }
5b861f6b
GM
1450 if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
1451 info->key.tun_flags |= TUNNEL_CSUM;
1452 }
abe492b4 1453
5b861f6b 1454 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) {
f9094b76 1455#if IS_ENABLED(CONFIG_IPV6)
c5ebc440
GM
1456 if (changelink) {
1457 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_TX;
1458 goto change_notsup;
1459 }
5b861f6b
GM
1460 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]))
1461 info->key.tun_flags &= ~TUNNEL_CSUM;
f9094b76
HL
1462#else
1463 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX],
1464 "IPv6 support not enabled in the kernel");
1465 return -EPFNOSUPPORT;
1466#endif
5b861f6b 1467 }
abe492b4 1468
5b861f6b 1469 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]) {
f9094b76 1470#if IS_ENABLED(CONFIG_IPV6)
c5ebc440
GM
1471 if (changelink) {
1472 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_RX;
1473 goto change_notsup;
1474 }
5b861f6b
GM
1475 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]))
1476 *use_udp6_rx_checksums = false;
f9094b76
HL
1477#else
1478 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX],
1479 "IPv6 support not enabled in the kernel");
1480 return -EPFNOSUPPORT;
1481#endif
5b861f6b
GM
1482 }
1483
1484 return 0;
c5ebc440
GM
1485change_notsup:
1486 NL_SET_ERR_MSG_ATTR(extack, data[attrtype],
1487 "Changing VNI, Port, endpoint IP address family, external, and UDP checksum attributes are not supported");
1488 return -EOPNOTSUPP;
5b861f6b
GM
1489}
1490
c40e89fd
AK
1491static void geneve_link_config(struct net_device *dev,
1492 struct ip_tunnel_info *info, struct nlattr *tb[])
1493{
1494 struct geneve_dev *geneve = netdev_priv(dev);
1495 int ldev_mtu = 0;
1496
1497 if (tb[IFLA_MTU]) {
1498 geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
1499 return;
1500 }
1501
1502 switch (ip_tunnel_info_af(info)) {
1503 case AF_INET: {
1504 struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst };
1505 struct rtable *rt = ip_route_output_key(geneve->net, &fl4);
1506
1507 if (!IS_ERR(rt) && rt->dst.dev) {
1508 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN;
1509 ip_rt_put(rt);
1510 }
1511 break;
1512 }
1513#if IS_ENABLED(CONFIG_IPV6)
1514 case AF_INET6: {
c0a47e44
HL
1515 struct rt6_info *rt;
1516
1517 if (!__in6_dev_get(dev))
1518 break;
1519
1520 rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0,
1521 NULL, 0);
c40e89fd
AK
1522
1523 if (rt && rt->dst.dev)
1524 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN;
1525 ip6_rt_put(rt);
1526 break;
1527 }
1528#endif
1529 }
1530
1531 if (ldev_mtu <= 0)
1532 return;
1533
1534 geneve_change_mtu(dev, ldev_mtu - info->options_len);
1535}
1536
5b861f6b
GM
1537static int geneve_newlink(struct net *net, struct net_device *dev,
1538 struct nlattr *tb[], struct nlattr *data[],
1539 struct netlink_ext_ack *extack)
1540{
a025fb5f 1541 enum ifla_geneve_df df = GENEVE_DF_UNSET;
5b861f6b
GM
1542 bool use_udp6_rx_checksums = false;
1543 struct ip_tunnel_info info;
52d0d404 1544 bool ttl_inherit = false;
5b861f6b
GM
1545 bool metadata = false;
1546 int err;
1547
1548 init_tnl_info(&info, GENEVE_UDP_PORT);
c5ebc440 1549 err = geneve_nl2info(tb, data, extack, &info, &metadata,
a025fb5f 1550 &use_udp6_rx_checksums, &ttl_inherit, &df, false);
5b861f6b
GM
1551 if (err)
1552 return err;
abe492b4 1553
c40e89fd 1554 err = geneve_configure(net, dev, extack, &info, metadata,
a025fb5f 1555 use_udp6_rx_checksums, ttl_inherit, df);
c40e89fd
AK
1556 if (err)
1557 return err;
1558
1559 geneve_link_config(dev, &info, tb);
1560
1561 return 0;
2d07dc79
JL
1562}
1563
5b861f6b
GM
1564/* Quiesces the geneve device data path for both TX and RX.
1565 *
1566 * On transmit geneve checks for non-NULL geneve_sock before it proceeds.
1567 * So, if we set that socket to NULL under RCU and wait for synchronize_net()
1568 * to complete for the existing set of in-flight packets to be transmitted,
1569 * then we would have quiesced the transmit data path. All the future packets
1570 * will get dropped until we unquiesce the data path.
1571 *
1572 * On receive geneve dereference the geneve_sock stashed in the socket. So,
1573 * if we set that to NULL under RCU and wait for synchronize_net() to
1574 * complete, then we would have quiesced the receive data path.
1575 */
1576static void geneve_quiesce(struct geneve_dev *geneve, struct geneve_sock **gs4,
1577 struct geneve_sock **gs6)
1578{
1579 *gs4 = rtnl_dereference(geneve->sock4);
1580 rcu_assign_pointer(geneve->sock4, NULL);
1581 if (*gs4)
1582 rcu_assign_sk_user_data((*gs4)->sock->sk, NULL);
1583#if IS_ENABLED(CONFIG_IPV6)
1584 *gs6 = rtnl_dereference(geneve->sock6);
1585 rcu_assign_pointer(geneve->sock6, NULL);
1586 if (*gs6)
1587 rcu_assign_sk_user_data((*gs6)->sock->sk, NULL);
1588#else
1589 *gs6 = NULL;
1590#endif
1591 synchronize_net();
1592}
1593
1594/* Resumes the geneve device data path for both TX and RX. */
1595static void geneve_unquiesce(struct geneve_dev *geneve, struct geneve_sock *gs4,
1596 struct geneve_sock __maybe_unused *gs6)
1597{
1598 rcu_assign_pointer(geneve->sock4, gs4);
1599 if (gs4)
1600 rcu_assign_sk_user_data(gs4->sock->sk, gs4);
1601#if IS_ENABLED(CONFIG_IPV6)
1602 rcu_assign_pointer(geneve->sock6, gs6);
1603 if (gs6)
1604 rcu_assign_sk_user_data(gs6->sock->sk, gs6);
1605#endif
1606 synchronize_net();
1607}
1608
1609static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
1610 struct nlattr *data[],
1611 struct netlink_ext_ack *extack)
1612{
1613 struct geneve_dev *geneve = netdev_priv(dev);
1614 struct geneve_sock *gs4, *gs6;
1615 struct ip_tunnel_info info;
1616 bool metadata;
1617 bool use_udp6_rx_checksums;
a025fb5f 1618 enum ifla_geneve_df df;
52d0d404 1619 bool ttl_inherit;
5b861f6b
GM
1620 int err;
1621
1622 /* If the geneve device is configured for metadata (or externally
1623 * controlled, for example, OVS), then nothing can be changed.
1624 */
1625 if (geneve->collect_md)
1626 return -EOPNOTSUPP;
1627
1628 /* Start with the existing info. */
1629 memcpy(&info, &geneve->info, sizeof(info));
1630 metadata = geneve->collect_md;
1631 use_udp6_rx_checksums = geneve->use_udp6_rx_checksums;
52d0d404 1632 ttl_inherit = geneve->ttl_inherit;
c5ebc440 1633 err = geneve_nl2info(tb, data, extack, &info, &metadata,
a025fb5f 1634 &use_udp6_rx_checksums, &ttl_inherit, &df, true);
5b861f6b
GM
1635 if (err)
1636 return err;
1637
c40e89fd 1638 if (!geneve_dst_addr_equal(&geneve->info, &info)) {
5b861f6b 1639 dst_cache_reset(&info.dst_cache);
c40e89fd
AK
1640 geneve_link_config(dev, &info, tb);
1641 }
5b861f6b
GM
1642
1643 geneve_quiesce(geneve, &gs4, &gs6);
1644 geneve->info = info;
1645 geneve->collect_md = metadata;
1646 geneve->use_udp6_rx_checksums = use_udp6_rx_checksums;
52d0d404 1647 geneve->ttl_inherit = ttl_inherit;
5b861f6b
GM
1648 geneve_unquiesce(geneve, gs4, gs6);
1649
1650 return 0;
1651}
1652
2d07dc79
JL
1653static void geneve_dellink(struct net_device *dev, struct list_head *head)
1654{
1655 struct geneve_dev *geneve = netdev_priv(dev);
1656
2d07dc79
JL
1657 list_del(&geneve->next);
1658 unregister_netdevice_queue(dev, head);
1659}
1660
1661static size_t geneve_get_size(const struct net_device *dev)
1662{
1663 return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */
8ed66f0e 1664 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */
8760ce58 1665 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */
d8951125 1666 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */
a025fb5f 1667 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_DF */
8eb3b995 1668 nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */
7bbe33ff 1669 nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */
e305ac6c 1670 nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */
abe492b4
TH
1671 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */
1672 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */
1673 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */
52d0d404 1674 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */
2d07dc79
JL
1675 0;
1676}
1677
1678static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
1679{
1680 struct geneve_dev *geneve = netdev_priv(dev);
9b4437a5 1681 struct ip_tunnel_info *info = &geneve->info;
52d0d404 1682 bool ttl_inherit = geneve->ttl_inherit;
fd7eafd0 1683 bool metadata = geneve->collect_md;
9b4437a5 1684 __u8 tmp_vni[3];
2d07dc79
JL
1685 __u32 vni;
1686
9b4437a5 1687 tunnel_id_to_vni(info->key.tun_id, tmp_vni);
1688 vni = (tmp_vni[0] << 16) | (tmp_vni[1] << 8) | tmp_vni[2];
2d07dc79
JL
1689 if (nla_put_u32(skb, IFLA_GENEVE_ID, vni))
1690 goto nla_put_failure;
1691
fd7eafd0 1692 if (!metadata && ip_tunnel_info_af(info) == AF_INET) {
8ed66f0e 1693 if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE,
9b4437a5 1694 info->key.u.ipv4.dst))
1695 goto nla_put_failure;
9b4437a5 1696 if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM,
1697 !!(info->key.tun_flags & TUNNEL_CSUM)))
8ed66f0e 1698 goto nla_put_failure;
9b4437a5 1699
8ed66f0e 1700#if IS_ENABLED(CONFIG_IPV6)
fd7eafd0 1701 } else if (!metadata) {
8ed66f0e 1702 if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6,
9b4437a5 1703 &info->key.u.ipv6.dst))
1704 goto nla_put_failure;
9b4437a5 1705 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
1706 !(info->key.tun_flags & TUNNEL_CSUM)))
1707 goto nla_put_failure;
11387fe4 1708#endif
fd7eafd0 1709 }
2d07dc79 1710
9b4437a5 1711 if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) ||
1712 nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) ||
1713 nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label))
8760ce58
JL
1714 goto nla_put_failure;
1715
a025fb5f
SB
1716 if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->df))
1717 goto nla_put_failure;
1718
9b4437a5 1719 if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst))
cd7918b3
PS
1720 goto nla_put_failure;
1721
fd7eafd0 1722 if (metadata && nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA))
f9094b76 1723 goto nla_put_failure;
fd7eafd0 1724
f9094b76 1725#if IS_ENABLED(CONFIG_IPV6)
fd7eafd0
HL
1726 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
1727 !geneve->use_udp6_rx_checksums))
1728 goto nla_put_failure;
f9094b76 1729#endif
fd7eafd0 1730
52d0d404
HL
1731 if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit))
1732 goto nla_put_failure;
1733
2d07dc79
JL
1734 return 0;
1735
1736nla_put_failure:
1737 return -EMSGSIZE;
1738}
1739
1740static struct rtnl_link_ops geneve_link_ops __read_mostly = {
1741 .kind = "geneve",
1742 .maxtype = IFLA_GENEVE_MAX,
1743 .policy = geneve_policy,
1744 .priv_size = sizeof(struct geneve_dev),
1745 .setup = geneve_setup,
1746 .validate = geneve_validate,
1747 .newlink = geneve_newlink,
5b861f6b 1748 .changelink = geneve_changelink,
2d07dc79
JL
1749 .dellink = geneve_dellink,
1750 .get_size = geneve_get_size,
1751 .fill_info = geneve_fill_info,
1752};
1753
e305ac6c
PS
1754struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
1755 u8 name_assign_type, u16 dst_port)
1756{
1757 struct nlattr *tb[IFLA_MAX + 1];
9b4437a5 1758 struct ip_tunnel_info info;
e305ac6c 1759 struct net_device *dev;
106da663 1760 LIST_HEAD(list_kill);
e305ac6c
PS
1761 int err;
1762
1763 memset(tb, 0, sizeof(tb));
1764 dev = rtnl_create_link(net, name, name_assign_type,
d0522f1c 1765 &geneve_link_ops, tb, NULL);
e305ac6c
PS
1766 if (IS_ERR(dev))
1767 return dev;
1768
9b4437a5 1769 init_tnl_info(&info, dst_port);
a025fb5f
SB
1770 err = geneve_configure(net, dev, NULL, &info,
1771 true, true, false, GENEVE_DF_UNSET);
106da663
ND
1772 if (err) {
1773 free_netdev(dev);
1774 return ERR_PTR(err);
1775 }
7e059158
DW
1776
1777 /* openvswitch users expect packet sizes to be unrestricted,
1778 * so set the largest MTU we can.
1779 */
91572088 1780 err = geneve_change_mtu(dev, IP_MAX_MTU);
7e059158
DW
1781 if (err)
1782 goto err;
1783
41009481
ND
1784 err = rtnl_configure_link(dev, NULL);
1785 if (err < 0)
1786 goto err;
1787
e305ac6c 1788 return dev;
9b4437a5 1789err:
106da663
ND
1790 geneve_dellink(dev, &list_kill);
1791 unregister_netdevice_many(&list_kill);
7e059158 1792 return ERR_PTR(err);
e305ac6c
PS
1793}
1794EXPORT_SYMBOL_GPL(geneve_dev_create_fb);
1795
681e683f
HFS
1796static int geneve_netdevice_event(struct notifier_block *unused,
1797 unsigned long event, void *ptr)
1798{
1799 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1800
2d2b13fc 1801 if (event == NETDEV_UDP_TUNNEL_PUSH_INFO ||
04584957 1802 event == NETDEV_UDP_TUNNEL_DROP_INFO) {
2d2b13fc 1803 geneve_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO);
04584957
SD
1804 } else if (event == NETDEV_UNREGISTER) {
1805 geneve_offload_rx_ports(dev, false);
1806 } else if (event == NETDEV_REGISTER) {
1807 geneve_offload_rx_ports(dev, true);
1808 }
681e683f
HFS
1809
1810 return NOTIFY_DONE;
1811}
1812
1813static struct notifier_block geneve_notifier_block __read_mostly = {
1814 .notifier_call = geneve_netdevice_event,
1815};
1816
2d07dc79
JL
1817static __net_init int geneve_init_net(struct net *net)
1818{
1819 struct geneve_net *gn = net_generic(net, geneve_net_id);
2d07dc79
JL
1820
1821 INIT_LIST_HEAD(&gn->geneve_list);
371bd106 1822 INIT_LIST_HEAD(&gn->sock_list);
2d07dc79
JL
1823 return 0;
1824}
1825
2843a253 1826static void geneve_destroy_tunnels(struct net *net, struct list_head *head)
2d07dc79
JL
1827{
1828 struct geneve_net *gn = net_generic(net, geneve_net_id);
1829 struct geneve_dev *geneve, *next;
1830 struct net_device *dev, *aux;
2d07dc79
JL
1831
1832 /* gather any geneve devices that were moved into this ns */
1833 for_each_netdev_safe(net, dev, aux)
1834 if (dev->rtnl_link_ops == &geneve_link_ops)
2843a253 1835 unregister_netdevice_queue(dev, head);
2d07dc79
JL
1836
1837 /* now gather any other geneve devices that were created in this ns */
1838 list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) {
1839 /* If geneve->dev is in the same netns, it was already added
1840 * to the list by the previous loop.
1841 */
1842 if (!net_eq(dev_net(geneve->dev), net))
2843a253 1843 unregister_netdevice_queue(geneve->dev, head);
2d07dc79
JL
1844 }
1845
2843a253
HY
1846 WARN_ON_ONCE(!list_empty(&gn->sock_list));
1847}
1848
1849static void __net_exit geneve_exit_batch_net(struct list_head *net_list)
1850{
1851 struct net *net;
1852 LIST_HEAD(list);
1853
1854 rtnl_lock();
1855 list_for_each_entry(net, net_list, exit_list)
1856 geneve_destroy_tunnels(net, &list);
1857
2d07dc79
JL
1858 /* unregister the devices gathered above */
1859 unregister_netdevice_many(&list);
1860 rtnl_unlock();
1861}
1862
1863static struct pernet_operations geneve_net_ops = {
1864 .init = geneve_init_net,
2843a253 1865 .exit_batch = geneve_exit_batch_net,
2d07dc79
JL
1866 .id = &geneve_net_id,
1867 .size = sizeof(struct geneve_net),
1868};
1869
1870static int __init geneve_init_module(void)
1871{
1872 int rc;
1873
1874 rc = register_pernet_subsys(&geneve_net_ops);
1875 if (rc)
1876 goto out1;
1877
681e683f 1878 rc = register_netdevice_notifier(&geneve_notifier_block);
2d07dc79
JL
1879 if (rc)
1880 goto out2;
1881
681e683f
HFS
1882 rc = rtnl_link_register(&geneve_link_ops);
1883 if (rc)
1884 goto out3;
1885
2d07dc79 1886 return 0;
681e683f
HFS
1887out3:
1888 unregister_netdevice_notifier(&geneve_notifier_block);
2d07dc79
JL
1889out2:
1890 unregister_pernet_subsys(&geneve_net_ops);
1891out1:
1892 return rc;
1893}
1894late_initcall(geneve_init_module);
1895
1896static void __exit geneve_cleanup_module(void)
1897{
1898 rtnl_link_unregister(&geneve_link_ops);
681e683f 1899 unregister_netdevice_notifier(&geneve_notifier_block);
2d07dc79
JL
1900 unregister_pernet_subsys(&geneve_net_ops);
1901}
1902module_exit(geneve_cleanup_module);
1903
1904MODULE_LICENSE("GPL");
1905MODULE_VERSION(GENEVE_NETDEV_VER);
1906MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>");
1907MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic");
1908MODULE_ALIAS_RTNL_LINK("geneve");