]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blob - drivers/net/geneve.c
net: add netlink_ext_ack argument to rtnl_link_ops.validate
[mirror_ubuntu-hirsute-kernel.git] / drivers / net / geneve.c
1 /*
2 * GENEVE: Generic Network Virtualization Encapsulation
3 *
4 * Copyright (c) 2015 Red Hat, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13 #include <linux/kernel.h>
14 #include <linux/module.h>
15 #include <linux/etherdevice.h>
16 #include <linux/hash.h>
17 #include <net/dst_metadata.h>
18 #include <net/gro_cells.h>
19 #include <net/rtnetlink.h>
20 #include <net/geneve.h>
21 #include <net/protocol.h>
22
23 #define GENEVE_NETDEV_VER "0.6"
24
25 #define GENEVE_UDP_PORT 6081
26
27 #define GENEVE_N_VID (1u << 24)
28 #define GENEVE_VID_MASK (GENEVE_N_VID - 1)
29
30 #define VNI_HASH_BITS 10
31 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
32
33 static bool log_ecn_error = true;
34 module_param(log_ecn_error, bool, 0644);
35 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
36
37 #define GENEVE_VER 0
38 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
39
40 /* per-network namespace private data for this module */
41 struct geneve_net {
42 struct list_head geneve_list;
43 struct list_head sock_list;
44 };
45
46 static unsigned int geneve_net_id;
47
48 /* Pseudo network device */
49 struct geneve_dev {
50 struct hlist_node hlist; /* vni hash table */
51 struct net *net; /* netns for packet i/o */
52 struct net_device *dev; /* netdev for geneve tunnel */
53 struct ip_tunnel_info info;
54 struct geneve_sock __rcu *sock4; /* IPv4 socket used for geneve tunnel */
55 #if IS_ENABLED(CONFIG_IPV6)
56 struct geneve_sock __rcu *sock6; /* IPv6 socket used for geneve tunnel */
57 #endif
58 struct list_head next; /* geneve's per namespace list */
59 struct gro_cells gro_cells;
60 bool collect_md;
61 bool use_udp6_rx_checksums;
62 };
63
64 struct geneve_sock {
65 bool collect_md;
66 struct list_head list;
67 struct socket *sock;
68 struct rcu_head rcu;
69 int refcnt;
70 struct hlist_head vni_list[VNI_HASH_SIZE];
71 };
72
73 static inline __u32 geneve_net_vni_hash(u8 vni[3])
74 {
75 __u32 vnid;
76
77 vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2];
78 return hash_32(vnid, VNI_HASH_BITS);
79 }
80
81 static __be64 vni_to_tunnel_id(const __u8 *vni)
82 {
83 #ifdef __BIG_ENDIAN
84 return (vni[0] << 16) | (vni[1] << 8) | vni[2];
85 #else
86 return (__force __be64)(((__force u64)vni[0] << 40) |
87 ((__force u64)vni[1] << 48) |
88 ((__force u64)vni[2] << 56));
89 #endif
90 }
91
92 /* Convert 64 bit tunnel ID to 24 bit VNI. */
93 static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
94 {
95 #ifdef __BIG_ENDIAN
96 vni[0] = (__force __u8)(tun_id >> 16);
97 vni[1] = (__force __u8)(tun_id >> 8);
98 vni[2] = (__force __u8)tun_id;
99 #else
100 vni[0] = (__force __u8)((__force u64)tun_id >> 40);
101 vni[1] = (__force __u8)((__force u64)tun_id >> 48);
102 vni[2] = (__force __u8)((__force u64)tun_id >> 56);
103 #endif
104 }
105
106 static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni)
107 {
108 #ifdef __BIG_ENDIAN
109 return (vni[0] == tun_id[2]) &&
110 (vni[1] == tun_id[1]) &&
111 (vni[2] == tun_id[0]);
112 #else
113 return !memcmp(vni, &tun_id[5], 3);
114 #endif
115 }
116
117 static sa_family_t geneve_get_sk_family(struct geneve_sock *gs)
118 {
119 return gs->sock->sk->sk_family;
120 }
121
122 static struct geneve_dev *geneve_lookup(struct geneve_sock *gs,
123 __be32 addr, u8 vni[])
124 {
125 struct hlist_head *vni_list_head;
126 struct geneve_dev *geneve;
127 __u32 hash;
128
129 /* Find the device for this VNI */
130 hash = geneve_net_vni_hash(vni);
131 vni_list_head = &gs->vni_list[hash];
132 hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) {
133 if (eq_tun_id_and_vni((u8 *)&geneve->info.key.tun_id, vni) &&
134 addr == geneve->info.key.u.ipv4.dst)
135 return geneve;
136 }
137 return NULL;
138 }
139
140 #if IS_ENABLED(CONFIG_IPV6)
141 static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs,
142 struct in6_addr addr6, u8 vni[])
143 {
144 struct hlist_head *vni_list_head;
145 struct geneve_dev *geneve;
146 __u32 hash;
147
148 /* Find the device for this VNI */
149 hash = geneve_net_vni_hash(vni);
150 vni_list_head = &gs->vni_list[hash];
151 hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) {
152 if (eq_tun_id_and_vni((u8 *)&geneve->info.key.tun_id, vni) &&
153 ipv6_addr_equal(&addr6, &geneve->info.key.u.ipv6.dst))
154 return geneve;
155 }
156 return NULL;
157 }
158 #endif
159
160 static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
161 {
162 return (struct genevehdr *)(udp_hdr(skb) + 1);
163 }
164
165 static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs,
166 struct sk_buff *skb)
167 {
168 static u8 zero_vni[3];
169 u8 *vni;
170
171 if (geneve_get_sk_family(gs) == AF_INET) {
172 struct iphdr *iph;
173 __be32 addr;
174
175 iph = ip_hdr(skb); /* outer IP header... */
176
177 if (gs->collect_md) {
178 vni = zero_vni;
179 addr = 0;
180 } else {
181 vni = geneve_hdr(skb)->vni;
182 addr = iph->saddr;
183 }
184
185 return geneve_lookup(gs, addr, vni);
186 #if IS_ENABLED(CONFIG_IPV6)
187 } else if (geneve_get_sk_family(gs) == AF_INET6) {
188 static struct in6_addr zero_addr6;
189 struct ipv6hdr *ip6h;
190 struct in6_addr addr6;
191
192 ip6h = ipv6_hdr(skb); /* outer IPv6 header... */
193
194 if (gs->collect_md) {
195 vni = zero_vni;
196 addr6 = zero_addr6;
197 } else {
198 vni = geneve_hdr(skb)->vni;
199 addr6 = ip6h->saddr;
200 }
201
202 return geneve6_lookup(gs, addr6, vni);
203 #endif
204 }
205 return NULL;
206 }
207
208 /* geneve receive/decap routine */
209 static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
210 struct sk_buff *skb)
211 {
212 struct genevehdr *gnvh = geneve_hdr(skb);
213 struct metadata_dst *tun_dst = NULL;
214 struct pcpu_sw_netstats *stats;
215 unsigned int len;
216 int err = 0;
217 void *oiph;
218
219 if (ip_tunnel_collect_metadata() || gs->collect_md) {
220 __be16 flags;
221
222 flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
223 (gnvh->oam ? TUNNEL_OAM : 0) |
224 (gnvh->critical ? TUNNEL_CRIT_OPT : 0);
225
226 tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags,
227 vni_to_tunnel_id(gnvh->vni),
228 gnvh->opt_len * 4);
229 if (!tun_dst) {
230 geneve->dev->stats.rx_dropped++;
231 goto drop;
232 }
233 /* Update tunnel dst according to Geneve options. */
234 ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
235 gnvh->options, gnvh->opt_len * 4);
236 } else {
237 /* Drop packets w/ critical options,
238 * since we don't support any...
239 */
240 if (gnvh->critical) {
241 geneve->dev->stats.rx_frame_errors++;
242 geneve->dev->stats.rx_errors++;
243 goto drop;
244 }
245 }
246
247 skb_reset_mac_header(skb);
248 skb->protocol = eth_type_trans(skb, geneve->dev);
249 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
250
251 if (tun_dst)
252 skb_dst_set(skb, &tun_dst->dst);
253
254 /* Ignore packet loops (and multicast echo) */
255 if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) {
256 geneve->dev->stats.rx_errors++;
257 goto drop;
258 }
259
260 oiph = skb_network_header(skb);
261 skb_reset_network_header(skb);
262
263 if (geneve_get_sk_family(gs) == AF_INET)
264 err = IP_ECN_decapsulate(oiph, skb);
265 #if IS_ENABLED(CONFIG_IPV6)
266 else
267 err = IP6_ECN_decapsulate(oiph, skb);
268 #endif
269
270 if (unlikely(err)) {
271 if (log_ecn_error) {
272 if (geneve_get_sk_family(gs) == AF_INET)
273 net_info_ratelimited("non-ECT from %pI4 "
274 "with TOS=%#x\n",
275 &((struct iphdr *)oiph)->saddr,
276 ((struct iphdr *)oiph)->tos);
277 #if IS_ENABLED(CONFIG_IPV6)
278 else
279 net_info_ratelimited("non-ECT from %pI6\n",
280 &((struct ipv6hdr *)oiph)->saddr);
281 #endif
282 }
283 if (err > 1) {
284 ++geneve->dev->stats.rx_frame_errors;
285 ++geneve->dev->stats.rx_errors;
286 goto drop;
287 }
288 }
289
290 len = skb->len;
291 err = gro_cells_receive(&geneve->gro_cells, skb);
292 if (likely(err == NET_RX_SUCCESS)) {
293 stats = this_cpu_ptr(geneve->dev->tstats);
294 u64_stats_update_begin(&stats->syncp);
295 stats->rx_packets++;
296 stats->rx_bytes += len;
297 u64_stats_update_end(&stats->syncp);
298 }
299 return;
300 drop:
301 /* Consume bad packet */
302 kfree_skb(skb);
303 }
304
305 /* Setup stats when device is created */
306 static int geneve_init(struct net_device *dev)
307 {
308 struct geneve_dev *geneve = netdev_priv(dev);
309 int err;
310
311 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
312 if (!dev->tstats)
313 return -ENOMEM;
314
315 err = gro_cells_init(&geneve->gro_cells, dev);
316 if (err) {
317 free_percpu(dev->tstats);
318 return err;
319 }
320
321 err = dst_cache_init(&geneve->info.dst_cache, GFP_KERNEL);
322 if (err) {
323 free_percpu(dev->tstats);
324 gro_cells_destroy(&geneve->gro_cells);
325 return err;
326 }
327 return 0;
328 }
329
330 static void geneve_uninit(struct net_device *dev)
331 {
332 struct geneve_dev *geneve = netdev_priv(dev);
333
334 dst_cache_destroy(&geneve->info.dst_cache);
335 gro_cells_destroy(&geneve->gro_cells);
336 free_percpu(dev->tstats);
337 }
338
339 /* Callback from net/ipv4/udp.c to receive packets */
340 static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
341 {
342 struct genevehdr *geneveh;
343 struct geneve_dev *geneve;
344 struct geneve_sock *gs;
345 int opts_len;
346
347 /* Need UDP and Geneve header to be present */
348 if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN)))
349 goto drop;
350
351 /* Return packets with reserved bits set */
352 geneveh = geneve_hdr(skb);
353 if (unlikely(geneveh->ver != GENEVE_VER))
354 goto drop;
355
356 if (unlikely(geneveh->proto_type != htons(ETH_P_TEB)))
357 goto drop;
358
359 gs = rcu_dereference_sk_user_data(sk);
360 if (!gs)
361 goto drop;
362
363 geneve = geneve_lookup_skb(gs, skb);
364 if (!geneve)
365 goto drop;
366
367 opts_len = geneveh->opt_len * 4;
368 if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
369 htons(ETH_P_TEB),
370 !net_eq(geneve->net, dev_net(geneve->dev)))) {
371 geneve->dev->stats.rx_dropped++;
372 goto drop;
373 }
374
375 geneve_rx(geneve, gs, skb);
376 return 0;
377
378 drop:
379 /* Consume bad packet */
380 kfree_skb(skb);
381 return 0;
382 }
383
384 static struct socket *geneve_create_sock(struct net *net, bool ipv6,
385 __be16 port, bool ipv6_rx_csum)
386 {
387 struct socket *sock;
388 struct udp_port_cfg udp_conf;
389 int err;
390
391 memset(&udp_conf, 0, sizeof(udp_conf));
392
393 if (ipv6) {
394 udp_conf.family = AF_INET6;
395 udp_conf.ipv6_v6only = 1;
396 udp_conf.use_udp6_rx_checksums = ipv6_rx_csum;
397 } else {
398 udp_conf.family = AF_INET;
399 udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
400 }
401
402 udp_conf.local_udp_port = port;
403
404 /* Open UDP socket */
405 err = udp_sock_create(net, &udp_conf, &sock);
406 if (err < 0)
407 return ERR_PTR(err);
408
409 return sock;
410 }
411
412 static int geneve_hlen(struct genevehdr *gh)
413 {
414 return sizeof(*gh) + gh->opt_len * 4;
415 }
416
417 static struct sk_buff **geneve_gro_receive(struct sock *sk,
418 struct sk_buff **head,
419 struct sk_buff *skb)
420 {
421 struct sk_buff *p, **pp = NULL;
422 struct genevehdr *gh, *gh2;
423 unsigned int hlen, gh_len, off_gnv;
424 const struct packet_offload *ptype;
425 __be16 type;
426 int flush = 1;
427
428 off_gnv = skb_gro_offset(skb);
429 hlen = off_gnv + sizeof(*gh);
430 gh = skb_gro_header_fast(skb, off_gnv);
431 if (skb_gro_header_hard(skb, hlen)) {
432 gh = skb_gro_header_slow(skb, hlen, off_gnv);
433 if (unlikely(!gh))
434 goto out;
435 }
436
437 if (gh->ver != GENEVE_VER || gh->oam)
438 goto out;
439 gh_len = geneve_hlen(gh);
440
441 hlen = off_gnv + gh_len;
442 if (skb_gro_header_hard(skb, hlen)) {
443 gh = skb_gro_header_slow(skb, hlen, off_gnv);
444 if (unlikely(!gh))
445 goto out;
446 }
447
448 for (p = *head; p; p = p->next) {
449 if (!NAPI_GRO_CB(p)->same_flow)
450 continue;
451
452 gh2 = (struct genevehdr *)(p->data + off_gnv);
453 if (gh->opt_len != gh2->opt_len ||
454 memcmp(gh, gh2, gh_len)) {
455 NAPI_GRO_CB(p)->same_flow = 0;
456 continue;
457 }
458 }
459
460 type = gh->proto_type;
461
462 rcu_read_lock();
463 ptype = gro_find_receive_by_type(type);
464 if (!ptype)
465 goto out_unlock;
466
467 skb_gro_pull(skb, gh_len);
468 skb_gro_postpull_rcsum(skb, gh, gh_len);
469 pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
470 flush = 0;
471
472 out_unlock:
473 rcu_read_unlock();
474 out:
475 NAPI_GRO_CB(skb)->flush |= flush;
476
477 return pp;
478 }
479
480 static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb,
481 int nhoff)
482 {
483 struct genevehdr *gh;
484 struct packet_offload *ptype;
485 __be16 type;
486 int gh_len;
487 int err = -ENOSYS;
488
489 gh = (struct genevehdr *)(skb->data + nhoff);
490 gh_len = geneve_hlen(gh);
491 type = gh->proto_type;
492
493 rcu_read_lock();
494 ptype = gro_find_complete_by_type(type);
495 if (ptype)
496 err = ptype->callbacks.gro_complete(skb, nhoff + gh_len);
497
498 rcu_read_unlock();
499
500 skb_set_inner_mac_header(skb, nhoff + gh_len);
501
502 return err;
503 }
504
505 /* Create new listen socket if needed */
506 static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
507 bool ipv6, bool ipv6_rx_csum)
508 {
509 struct geneve_net *gn = net_generic(net, geneve_net_id);
510 struct geneve_sock *gs;
511 struct socket *sock;
512 struct udp_tunnel_sock_cfg tunnel_cfg;
513 int h;
514
515 gs = kzalloc(sizeof(*gs), GFP_KERNEL);
516 if (!gs)
517 return ERR_PTR(-ENOMEM);
518
519 sock = geneve_create_sock(net, ipv6, port, ipv6_rx_csum);
520 if (IS_ERR(sock)) {
521 kfree(gs);
522 return ERR_CAST(sock);
523 }
524
525 gs->sock = sock;
526 gs->refcnt = 1;
527 for (h = 0; h < VNI_HASH_SIZE; ++h)
528 INIT_HLIST_HEAD(&gs->vni_list[h]);
529
530 /* Initialize the geneve udp offloads structure */
531 udp_tunnel_notify_add_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE);
532
533 /* Mark socket as an encapsulation socket */
534 memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
535 tunnel_cfg.sk_user_data = gs;
536 tunnel_cfg.encap_type = 1;
537 tunnel_cfg.gro_receive = geneve_gro_receive;
538 tunnel_cfg.gro_complete = geneve_gro_complete;
539 tunnel_cfg.encap_rcv = geneve_udp_encap_recv;
540 tunnel_cfg.encap_destroy = NULL;
541 setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
542 list_add(&gs->list, &gn->sock_list);
543 return gs;
544 }
545
546 static void __geneve_sock_release(struct geneve_sock *gs)
547 {
548 if (!gs || --gs->refcnt)
549 return;
550
551 list_del(&gs->list);
552 udp_tunnel_notify_del_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE);
553 udp_tunnel_sock_release(gs->sock);
554 kfree_rcu(gs, rcu);
555 }
556
557 static void geneve_sock_release(struct geneve_dev *geneve)
558 {
559 struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4);
560 #if IS_ENABLED(CONFIG_IPV6)
561 struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6);
562
563 rcu_assign_pointer(geneve->sock6, NULL);
564 #endif
565
566 rcu_assign_pointer(geneve->sock4, NULL);
567 synchronize_net();
568
569 __geneve_sock_release(gs4);
570 #if IS_ENABLED(CONFIG_IPV6)
571 __geneve_sock_release(gs6);
572 #endif
573 }
574
575 static struct geneve_sock *geneve_find_sock(struct geneve_net *gn,
576 sa_family_t family,
577 __be16 dst_port)
578 {
579 struct geneve_sock *gs;
580
581 list_for_each_entry(gs, &gn->sock_list, list) {
582 if (inet_sk(gs->sock->sk)->inet_sport == dst_port &&
583 geneve_get_sk_family(gs) == family) {
584 return gs;
585 }
586 }
587 return NULL;
588 }
589
590 static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6)
591 {
592 struct net *net = geneve->net;
593 struct geneve_net *gn = net_generic(net, geneve_net_id);
594 struct geneve_sock *gs;
595 __u8 vni[3];
596 __u32 hash;
597
598 gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->info.key.tp_dst);
599 if (gs) {
600 gs->refcnt++;
601 goto out;
602 }
603
604 gs = geneve_socket_create(net, geneve->info.key.tp_dst, ipv6,
605 geneve->use_udp6_rx_checksums);
606 if (IS_ERR(gs))
607 return PTR_ERR(gs);
608
609 out:
610 gs->collect_md = geneve->collect_md;
611 #if IS_ENABLED(CONFIG_IPV6)
612 if (ipv6)
613 rcu_assign_pointer(geneve->sock6, gs);
614 else
615 #endif
616 rcu_assign_pointer(geneve->sock4, gs);
617
618 tunnel_id_to_vni(geneve->info.key.tun_id, vni);
619 hash = geneve_net_vni_hash(vni);
620 hlist_add_head_rcu(&geneve->hlist, &gs->vni_list[hash]);
621 return 0;
622 }
623
624 static int geneve_open(struct net_device *dev)
625 {
626 struct geneve_dev *geneve = netdev_priv(dev);
627 bool ipv6 = !!(geneve->info.mode & IP_TUNNEL_INFO_IPV6);
628 bool metadata = geneve->collect_md;
629 int ret = 0;
630
631 #if IS_ENABLED(CONFIG_IPV6)
632 if (ipv6 || metadata)
633 ret = geneve_sock_add(geneve, true);
634 #endif
635 if (!ret && (!ipv6 || metadata))
636 ret = geneve_sock_add(geneve, false);
637 if (ret < 0)
638 geneve_sock_release(geneve);
639
640 return ret;
641 }
642
643 static int geneve_stop(struct net_device *dev)
644 {
645 struct geneve_dev *geneve = netdev_priv(dev);
646
647 if (!hlist_unhashed(&geneve->hlist))
648 hlist_del_rcu(&geneve->hlist);
649 geneve_sock_release(geneve);
650 return 0;
651 }
652
653 static void geneve_build_header(struct genevehdr *geneveh,
654 const struct ip_tunnel_info *info)
655 {
656 geneveh->ver = GENEVE_VER;
657 geneveh->opt_len = info->options_len / 4;
658 geneveh->oam = !!(info->key.tun_flags & TUNNEL_OAM);
659 geneveh->critical = !!(info->key.tun_flags & TUNNEL_CRIT_OPT);
660 geneveh->rsvd1 = 0;
661 tunnel_id_to_vni(info->key.tun_id, geneveh->vni);
662 geneveh->proto_type = htons(ETH_P_TEB);
663 geneveh->rsvd2 = 0;
664
665 ip_tunnel_info_opts_get(geneveh->options, info);
666 }
667
668 static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb,
669 const struct ip_tunnel_info *info,
670 bool xnet, int ip_hdr_len)
671 {
672 bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
673 struct genevehdr *gnvh;
674 int min_headroom;
675 int err;
676
677 skb_reset_mac_header(skb);
678 skb_scrub_packet(skb, xnet);
679
680 min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len +
681 GENEVE_BASE_HLEN + info->options_len + ip_hdr_len;
682 err = skb_cow_head(skb, min_headroom);
683 if (unlikely(err))
684 goto free_dst;
685
686 err = udp_tunnel_handle_offloads(skb, udp_sum);
687 if (err)
688 goto free_dst;
689
690 gnvh = __skb_push(skb, sizeof(*gnvh) + info->options_len);
691 geneve_build_header(gnvh, info);
692 skb_set_inner_protocol(skb, htons(ETH_P_TEB));
693 return 0;
694
695 free_dst:
696 dst_release(dst);
697 return err;
698 }
699
700 static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
701 struct net_device *dev,
702 struct flowi4 *fl4,
703 const struct ip_tunnel_info *info)
704 {
705 bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
706 struct geneve_dev *geneve = netdev_priv(dev);
707 struct dst_cache *dst_cache;
708 struct rtable *rt = NULL;
709 __u8 tos;
710
711 if (!rcu_dereference(geneve->sock4))
712 return ERR_PTR(-EIO);
713
714 memset(fl4, 0, sizeof(*fl4));
715 fl4->flowi4_mark = skb->mark;
716 fl4->flowi4_proto = IPPROTO_UDP;
717 fl4->daddr = info->key.u.ipv4.dst;
718 fl4->saddr = info->key.u.ipv4.src;
719
720 tos = info->key.tos;
721 if ((tos == 1) && !geneve->collect_md) {
722 tos = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
723 use_cache = false;
724 }
725 fl4->flowi4_tos = RT_TOS(tos);
726
727 dst_cache = (struct dst_cache *)&info->dst_cache;
728 if (use_cache) {
729 rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
730 if (rt)
731 return rt;
732 }
733 rt = ip_route_output_key(geneve->net, fl4);
734 if (IS_ERR(rt)) {
735 netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr);
736 return ERR_PTR(-ENETUNREACH);
737 }
738 if (rt->dst.dev == dev) { /* is this necessary? */
739 netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr);
740 ip_rt_put(rt);
741 return ERR_PTR(-ELOOP);
742 }
743 if (use_cache)
744 dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr);
745 return rt;
746 }
747
748 #if IS_ENABLED(CONFIG_IPV6)
749 static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
750 struct net_device *dev,
751 struct flowi6 *fl6,
752 const struct ip_tunnel_info *info)
753 {
754 bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
755 struct geneve_dev *geneve = netdev_priv(dev);
756 struct dst_entry *dst = NULL;
757 struct dst_cache *dst_cache;
758 struct geneve_sock *gs6;
759 __u8 prio;
760
761 gs6 = rcu_dereference(geneve->sock6);
762 if (!gs6)
763 return ERR_PTR(-EIO);
764
765 memset(fl6, 0, sizeof(*fl6));
766 fl6->flowi6_mark = skb->mark;
767 fl6->flowi6_proto = IPPROTO_UDP;
768 fl6->daddr = info->key.u.ipv6.dst;
769 fl6->saddr = info->key.u.ipv6.src;
770 prio = info->key.tos;
771 if ((prio == 1) && !geneve->collect_md) {
772 prio = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
773 use_cache = false;
774 }
775
776 fl6->flowlabel = ip6_make_flowinfo(RT_TOS(prio),
777 info->key.label);
778 dst_cache = (struct dst_cache *)&info->dst_cache;
779 if (use_cache) {
780 dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
781 if (dst)
782 return dst;
783 }
784 if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) {
785 netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr);
786 return ERR_PTR(-ENETUNREACH);
787 }
788 if (dst->dev == dev) { /* is this necessary? */
789 netdev_dbg(dev, "circular route to %pI6\n", &fl6->daddr);
790 dst_release(dst);
791 return ERR_PTR(-ELOOP);
792 }
793
794 if (use_cache)
795 dst_cache_set_ip6(dst_cache, dst, &fl6->saddr);
796 return dst;
797 }
798 #endif
799
800 static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
801 struct geneve_dev *geneve,
802 const struct ip_tunnel_info *info)
803 {
804 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
805 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
806 const struct ip_tunnel_key *key = &info->key;
807 struct rtable *rt;
808 struct flowi4 fl4;
809 __u8 tos, ttl;
810 __be16 sport;
811 __be16 df;
812 int err;
813
814 rt = geneve_get_v4_rt(skb, dev, &fl4, info);
815 if (IS_ERR(rt))
816 return PTR_ERR(rt);
817
818 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
819 if (geneve->collect_md) {
820 tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
821 ttl = key->ttl;
822 } else {
823 tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, ip_hdr(skb), skb);
824 ttl = key->ttl ? : ip4_dst_hoplimit(&rt->dst);
825 }
826 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
827
828 err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr));
829 if (unlikely(err))
830 return err;
831
832 udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr,
833 tos, ttl, df, sport, geneve->info.key.tp_dst,
834 !net_eq(geneve->net, dev_net(geneve->dev)),
835 !(info->key.tun_flags & TUNNEL_CSUM));
836 return 0;
837 }
838
839 #if IS_ENABLED(CONFIG_IPV6)
840 static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
841 struct geneve_dev *geneve,
842 const struct ip_tunnel_info *info)
843 {
844 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
845 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
846 const struct ip_tunnel_key *key = &info->key;
847 struct dst_entry *dst = NULL;
848 struct flowi6 fl6;
849 __u8 prio, ttl;
850 __be16 sport;
851 int err;
852
853 dst = geneve_get_v6_dst(skb, dev, &fl6, info);
854 if (IS_ERR(dst))
855 return PTR_ERR(dst);
856
857 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
858 if (geneve->collect_md) {
859 prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
860 ttl = key->ttl;
861 } else {
862 prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel),
863 ip_hdr(skb), skb);
864 ttl = key->ttl ? : ip6_dst_hoplimit(dst);
865 }
866 err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr));
867 if (unlikely(err))
868 return err;
869
870 udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev,
871 &fl6.saddr, &fl6.daddr, prio, ttl,
872 info->key.label, sport, geneve->info.key.tp_dst,
873 !(info->key.tun_flags & TUNNEL_CSUM));
874 return 0;
875 }
876 #endif
877
878 static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
879 {
880 struct geneve_dev *geneve = netdev_priv(dev);
881 struct ip_tunnel_info *info = NULL;
882 int err;
883
884 if (geneve->collect_md) {
885 info = skb_tunnel_info(skb);
886 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) {
887 err = -EINVAL;
888 netdev_dbg(dev, "no tunnel metadata\n");
889 goto tx_error;
890 }
891 } else {
892 info = &geneve->info;
893 }
894
895 rcu_read_lock();
896 #if IS_ENABLED(CONFIG_IPV6)
897 if (info->mode & IP_TUNNEL_INFO_IPV6)
898 err = geneve6_xmit_skb(skb, dev, geneve, info);
899 else
900 #endif
901 err = geneve_xmit_skb(skb, dev, geneve, info);
902 rcu_read_unlock();
903
904 if (likely(!err))
905 return NETDEV_TX_OK;
906 tx_error:
907 dev_kfree_skb(skb);
908
909 if (err == -ELOOP)
910 dev->stats.collisions++;
911 else if (err == -ENETUNREACH)
912 dev->stats.tx_carrier_errors++;
913
914 dev->stats.tx_errors++;
915 return NETDEV_TX_OK;
916 }
917
918 static int geneve_change_mtu(struct net_device *dev, int new_mtu)
919 {
920 /* Only possible if called internally, ndo_change_mtu path's new_mtu
921 * is guaranteed to be between dev->min_mtu and dev->max_mtu.
922 */
923 if (new_mtu > dev->max_mtu)
924 new_mtu = dev->max_mtu;
925
926 dev->mtu = new_mtu;
927 return 0;
928 }
929
930 static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
931 {
932 struct ip_tunnel_info *info = skb_tunnel_info(skb);
933 struct geneve_dev *geneve = netdev_priv(dev);
934
935 if (ip_tunnel_info_af(info) == AF_INET) {
936 struct rtable *rt;
937 struct flowi4 fl4;
938
939 rt = geneve_get_v4_rt(skb, dev, &fl4, info);
940 if (IS_ERR(rt))
941 return PTR_ERR(rt);
942
943 ip_rt_put(rt);
944 info->key.u.ipv4.src = fl4.saddr;
945 #if IS_ENABLED(CONFIG_IPV6)
946 } else if (ip_tunnel_info_af(info) == AF_INET6) {
947 struct dst_entry *dst;
948 struct flowi6 fl6;
949
950 dst = geneve_get_v6_dst(skb, dev, &fl6, info);
951 if (IS_ERR(dst))
952 return PTR_ERR(dst);
953
954 dst_release(dst);
955 info->key.u.ipv6.src = fl6.saddr;
956 #endif
957 } else {
958 return -EINVAL;
959 }
960
961 info->key.tp_src = udp_flow_src_port(geneve->net, skb,
962 1, USHRT_MAX, true);
963 info->key.tp_dst = geneve->info.key.tp_dst;
964 return 0;
965 }
966
967 static const struct net_device_ops geneve_netdev_ops = {
968 .ndo_init = geneve_init,
969 .ndo_uninit = geneve_uninit,
970 .ndo_open = geneve_open,
971 .ndo_stop = geneve_stop,
972 .ndo_start_xmit = geneve_xmit,
973 .ndo_get_stats64 = ip_tunnel_get_stats64,
974 .ndo_change_mtu = geneve_change_mtu,
975 .ndo_validate_addr = eth_validate_addr,
976 .ndo_set_mac_address = eth_mac_addr,
977 .ndo_fill_metadata_dst = geneve_fill_metadata_dst,
978 };
979
980 static void geneve_get_drvinfo(struct net_device *dev,
981 struct ethtool_drvinfo *drvinfo)
982 {
983 strlcpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version));
984 strlcpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver));
985 }
986
987 static const struct ethtool_ops geneve_ethtool_ops = {
988 .get_drvinfo = geneve_get_drvinfo,
989 .get_link = ethtool_op_get_link,
990 };
991
992 /* Info for udev, that this is a virtual tunnel endpoint */
993 static struct device_type geneve_type = {
994 .name = "geneve",
995 };
996
997 /* Calls the ndo_udp_tunnel_add of the caller in order to
998 * supply the listening GENEVE udp ports. Callers are expected
999 * to implement the ndo_udp_tunnel_add.
1000 */
1001 static void geneve_push_rx_ports(struct net_device *dev)
1002 {
1003 struct net *net = dev_net(dev);
1004 struct geneve_net *gn = net_generic(net, geneve_net_id);
1005 struct geneve_sock *gs;
1006
1007 rcu_read_lock();
1008 list_for_each_entry_rcu(gs, &gn->sock_list, list)
1009 udp_tunnel_push_rx_port(dev, gs->sock,
1010 UDP_TUNNEL_TYPE_GENEVE);
1011 rcu_read_unlock();
1012 }
1013
1014 /* Initialize the device structure. */
1015 static void geneve_setup(struct net_device *dev)
1016 {
1017 ether_setup(dev);
1018
1019 dev->netdev_ops = &geneve_netdev_ops;
1020 dev->ethtool_ops = &geneve_ethtool_ops;
1021 dev->needs_free_netdev = true;
1022
1023 SET_NETDEV_DEVTYPE(dev, &geneve_type);
1024
1025 dev->features |= NETIF_F_LLTX;
1026 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
1027 dev->features |= NETIF_F_RXCSUM;
1028 dev->features |= NETIF_F_GSO_SOFTWARE;
1029
1030 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
1031 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1032
1033 /* MTU range: 68 - (something less than 65535) */
1034 dev->min_mtu = ETH_MIN_MTU;
1035 /* The max_mtu calculation does not take account of GENEVE
1036 * options, to avoid excluding potentially valid
1037 * configurations. This will be further reduced by IPvX hdr size.
1038 */
1039 dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len;
1040
1041 netif_keep_dst(dev);
1042 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1043 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
1044 eth_hw_addr_random(dev);
1045 }
1046
1047 static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
1048 [IFLA_GENEVE_ID] = { .type = NLA_U32 },
1049 [IFLA_GENEVE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1050 [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) },
1051 [IFLA_GENEVE_TTL] = { .type = NLA_U8 },
1052 [IFLA_GENEVE_TOS] = { .type = NLA_U8 },
1053 [IFLA_GENEVE_LABEL] = { .type = NLA_U32 },
1054 [IFLA_GENEVE_PORT] = { .type = NLA_U16 },
1055 [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG },
1056 [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 },
1057 [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 },
1058 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
1059 };
1060
1061 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
1062 struct netlink_ext_ack *extack)
1063 {
1064 if (tb[IFLA_ADDRESS]) {
1065 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1066 return -EINVAL;
1067
1068 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1069 return -EADDRNOTAVAIL;
1070 }
1071
1072 if (!data)
1073 return -EINVAL;
1074
1075 if (data[IFLA_GENEVE_ID]) {
1076 __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]);
1077
1078 if (vni >= GENEVE_VID_MASK)
1079 return -ERANGE;
1080 }
1081
1082 return 0;
1083 }
1084
1085 static struct geneve_dev *geneve_find_dev(struct geneve_net *gn,
1086 const struct ip_tunnel_info *info,
1087 bool *tun_on_same_port,
1088 bool *tun_collect_md)
1089 {
1090 struct geneve_dev *geneve, *t = NULL;
1091
1092 *tun_on_same_port = false;
1093 *tun_collect_md = false;
1094 list_for_each_entry(geneve, &gn->geneve_list, next) {
1095 if (info->key.tp_dst == geneve->info.key.tp_dst) {
1096 *tun_collect_md = geneve->collect_md;
1097 *tun_on_same_port = true;
1098 }
1099 if (info->key.tun_id == geneve->info.key.tun_id &&
1100 info->key.tp_dst == geneve->info.key.tp_dst &&
1101 !memcmp(&info->key.u, &geneve->info.key.u, sizeof(info->key.u)))
1102 t = geneve;
1103 }
1104 return t;
1105 }
1106
1107 static bool is_all_zero(const u8 *fp, size_t size)
1108 {
1109 int i;
1110
1111 for (i = 0; i < size; i++)
1112 if (fp[i])
1113 return false;
1114 return true;
1115 }
1116
1117 static bool is_tnl_info_zero(const struct ip_tunnel_info *info)
1118 {
1119 if (info->key.tun_id || info->key.tun_flags || info->key.tos ||
1120 info->key.ttl || info->key.label || info->key.tp_src ||
1121 !is_all_zero((const u8 *)&info->key.u, sizeof(info->key.u)))
1122 return false;
1123 else
1124 return true;
1125 }
1126
1127 static int geneve_configure(struct net *net, struct net_device *dev,
1128 const struct ip_tunnel_info *info,
1129 bool metadata, bool ipv6_rx_csum)
1130 {
1131 struct geneve_net *gn = net_generic(net, geneve_net_id);
1132 struct geneve_dev *t, *geneve = netdev_priv(dev);
1133 bool tun_collect_md, tun_on_same_port;
1134 int err, encap_len;
1135
1136 if (metadata && !is_tnl_info_zero(info))
1137 return -EINVAL;
1138
1139 geneve->net = net;
1140 geneve->dev = dev;
1141
1142 t = geneve_find_dev(gn, info, &tun_on_same_port, &tun_collect_md);
1143 if (t)
1144 return -EBUSY;
1145
1146 /* make enough headroom for basic scenario */
1147 encap_len = GENEVE_BASE_HLEN + ETH_HLEN;
1148 if (!metadata && ip_tunnel_info_af(info) == AF_INET) {
1149 encap_len += sizeof(struct iphdr);
1150 dev->max_mtu -= sizeof(struct iphdr);
1151 } else {
1152 encap_len += sizeof(struct ipv6hdr);
1153 dev->max_mtu -= sizeof(struct ipv6hdr);
1154 }
1155 dev->needed_headroom = encap_len + ETH_HLEN;
1156
1157 if (metadata) {
1158 if (tun_on_same_port)
1159 return -EPERM;
1160 } else {
1161 if (tun_collect_md)
1162 return -EPERM;
1163 }
1164
1165 dst_cache_reset(&geneve->info.dst_cache);
1166 geneve->info = *info;
1167 geneve->collect_md = metadata;
1168 geneve->use_udp6_rx_checksums = ipv6_rx_csum;
1169
1170 err = register_netdevice(dev);
1171 if (err)
1172 return err;
1173
1174 list_add(&geneve->next, &gn->geneve_list);
1175 return 0;
1176 }
1177
1178 static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port)
1179 {
1180 memset(info, 0, sizeof(*info));
1181 info->key.tp_dst = htons(dst_port);
1182 }
1183
1184 static int geneve_newlink(struct net *net, struct net_device *dev,
1185 struct nlattr *tb[], struct nlattr *data[],
1186 struct netlink_ext_ack *extack)
1187 {
1188 bool use_udp6_rx_checksums = false;
1189 struct ip_tunnel_info info;
1190 bool metadata = false;
1191
1192 init_tnl_info(&info, GENEVE_UDP_PORT);
1193
1194 if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6])
1195 return -EINVAL;
1196
1197 if (data[IFLA_GENEVE_REMOTE]) {
1198 info.key.u.ipv4.dst =
1199 nla_get_in_addr(data[IFLA_GENEVE_REMOTE]);
1200
1201 if (IN_MULTICAST(ntohl(info.key.u.ipv4.dst))) {
1202 netdev_dbg(dev, "multicast remote is unsupported\n");
1203 return -EINVAL;
1204 }
1205 }
1206
1207 if (data[IFLA_GENEVE_REMOTE6]) {
1208 #if IS_ENABLED(CONFIG_IPV6)
1209 info.mode = IP_TUNNEL_INFO_IPV6;
1210 info.key.u.ipv6.dst =
1211 nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]);
1212
1213 if (ipv6_addr_type(&info.key.u.ipv6.dst) &
1214 IPV6_ADDR_LINKLOCAL) {
1215 netdev_dbg(dev, "link-local remote is unsupported\n");
1216 return -EINVAL;
1217 }
1218 if (ipv6_addr_is_multicast(&info.key.u.ipv6.dst)) {
1219 netdev_dbg(dev, "multicast remote is unsupported\n");
1220 return -EINVAL;
1221 }
1222 info.key.tun_flags |= TUNNEL_CSUM;
1223 use_udp6_rx_checksums = true;
1224 #else
1225 return -EPFNOSUPPORT;
1226 #endif
1227 }
1228
1229 if (data[IFLA_GENEVE_ID]) {
1230 __u32 vni;
1231 __u8 tvni[3];
1232
1233 vni = nla_get_u32(data[IFLA_GENEVE_ID]);
1234 tvni[0] = (vni & 0x00ff0000) >> 16;
1235 tvni[1] = (vni & 0x0000ff00) >> 8;
1236 tvni[2] = vni & 0x000000ff;
1237
1238 info.key.tun_id = vni_to_tunnel_id(tvni);
1239 }
1240 if (data[IFLA_GENEVE_TTL])
1241 info.key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
1242
1243 if (data[IFLA_GENEVE_TOS])
1244 info.key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
1245
1246 if (data[IFLA_GENEVE_LABEL]) {
1247 info.key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
1248 IPV6_FLOWLABEL_MASK;
1249 if (info.key.label && (!(info.mode & IP_TUNNEL_INFO_IPV6)))
1250 return -EINVAL;
1251 }
1252
1253 if (data[IFLA_GENEVE_PORT])
1254 info.key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]);
1255
1256 if (data[IFLA_GENEVE_COLLECT_METADATA])
1257 metadata = true;
1258
1259 if (data[IFLA_GENEVE_UDP_CSUM] &&
1260 nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
1261 info.key.tun_flags |= TUNNEL_CSUM;
1262
1263 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX] &&
1264 nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]))
1265 info.key.tun_flags &= ~TUNNEL_CSUM;
1266
1267 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX] &&
1268 nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]))
1269 use_udp6_rx_checksums = false;
1270
1271 return geneve_configure(net, dev, &info, metadata, use_udp6_rx_checksums);
1272 }
1273
1274 static void geneve_dellink(struct net_device *dev, struct list_head *head)
1275 {
1276 struct geneve_dev *geneve = netdev_priv(dev);
1277
1278 list_del(&geneve->next);
1279 unregister_netdevice_queue(dev, head);
1280 }
1281
1282 static size_t geneve_get_size(const struct net_device *dev)
1283 {
1284 return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */
1285 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */
1286 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */
1287 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */
1288 nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */
1289 nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */
1290 nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */
1291 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */
1292 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */
1293 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */
1294 0;
1295 }
1296
1297 static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
1298 {
1299 struct geneve_dev *geneve = netdev_priv(dev);
1300 struct ip_tunnel_info *info = &geneve->info;
1301 __u8 tmp_vni[3];
1302 __u32 vni;
1303
1304 tunnel_id_to_vni(info->key.tun_id, tmp_vni);
1305 vni = (tmp_vni[0] << 16) | (tmp_vni[1] << 8) | tmp_vni[2];
1306 if (nla_put_u32(skb, IFLA_GENEVE_ID, vni))
1307 goto nla_put_failure;
1308
1309 if (rtnl_dereference(geneve->sock4)) {
1310 if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE,
1311 info->key.u.ipv4.dst))
1312 goto nla_put_failure;
1313
1314 if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM,
1315 !!(info->key.tun_flags & TUNNEL_CSUM)))
1316 goto nla_put_failure;
1317
1318 }
1319
1320 #if IS_ENABLED(CONFIG_IPV6)
1321 if (rtnl_dereference(geneve->sock6)) {
1322 if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6,
1323 &info->key.u.ipv6.dst))
1324 goto nla_put_failure;
1325
1326 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
1327 !(info->key.tun_flags & TUNNEL_CSUM)))
1328 goto nla_put_failure;
1329
1330 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
1331 !geneve->use_udp6_rx_checksums))
1332 goto nla_put_failure;
1333 }
1334 #endif
1335
1336 if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) ||
1337 nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) ||
1338 nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label))
1339 goto nla_put_failure;
1340
1341 if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst))
1342 goto nla_put_failure;
1343
1344 if (geneve->collect_md) {
1345 if (nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA))
1346 goto nla_put_failure;
1347 }
1348 return 0;
1349
1350 nla_put_failure:
1351 return -EMSGSIZE;
1352 }
1353
1354 static struct rtnl_link_ops geneve_link_ops __read_mostly = {
1355 .kind = "geneve",
1356 .maxtype = IFLA_GENEVE_MAX,
1357 .policy = geneve_policy,
1358 .priv_size = sizeof(struct geneve_dev),
1359 .setup = geneve_setup,
1360 .validate = geneve_validate,
1361 .newlink = geneve_newlink,
1362 .dellink = geneve_dellink,
1363 .get_size = geneve_get_size,
1364 .fill_info = geneve_fill_info,
1365 };
1366
1367 struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
1368 u8 name_assign_type, u16 dst_port)
1369 {
1370 struct nlattr *tb[IFLA_MAX + 1];
1371 struct ip_tunnel_info info;
1372 struct net_device *dev;
1373 LIST_HEAD(list_kill);
1374 int err;
1375
1376 memset(tb, 0, sizeof(tb));
1377 dev = rtnl_create_link(net, name, name_assign_type,
1378 &geneve_link_ops, tb);
1379 if (IS_ERR(dev))
1380 return dev;
1381
1382 init_tnl_info(&info, dst_port);
1383 err = geneve_configure(net, dev, &info, true, true);
1384 if (err) {
1385 free_netdev(dev);
1386 return ERR_PTR(err);
1387 }
1388
1389 /* openvswitch users expect packet sizes to be unrestricted,
1390 * so set the largest MTU we can.
1391 */
1392 err = geneve_change_mtu(dev, IP_MAX_MTU);
1393 if (err)
1394 goto err;
1395
1396 err = rtnl_configure_link(dev, NULL);
1397 if (err < 0)
1398 goto err;
1399
1400 return dev;
1401 err:
1402 geneve_dellink(dev, &list_kill);
1403 unregister_netdevice_many(&list_kill);
1404 return ERR_PTR(err);
1405 }
1406 EXPORT_SYMBOL_GPL(geneve_dev_create_fb);
1407
1408 static int geneve_netdevice_event(struct notifier_block *unused,
1409 unsigned long event, void *ptr)
1410 {
1411 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1412
1413 if (event == NETDEV_UDP_TUNNEL_PUSH_INFO)
1414 geneve_push_rx_ports(dev);
1415
1416 return NOTIFY_DONE;
1417 }
1418
1419 static struct notifier_block geneve_notifier_block __read_mostly = {
1420 .notifier_call = geneve_netdevice_event,
1421 };
1422
1423 static __net_init int geneve_init_net(struct net *net)
1424 {
1425 struct geneve_net *gn = net_generic(net, geneve_net_id);
1426
1427 INIT_LIST_HEAD(&gn->geneve_list);
1428 INIT_LIST_HEAD(&gn->sock_list);
1429 return 0;
1430 }
1431
1432 static void __net_exit geneve_exit_net(struct net *net)
1433 {
1434 struct geneve_net *gn = net_generic(net, geneve_net_id);
1435 struct geneve_dev *geneve, *next;
1436 struct net_device *dev, *aux;
1437 LIST_HEAD(list);
1438
1439 rtnl_lock();
1440
1441 /* gather any geneve devices that were moved into this ns */
1442 for_each_netdev_safe(net, dev, aux)
1443 if (dev->rtnl_link_ops == &geneve_link_ops)
1444 unregister_netdevice_queue(dev, &list);
1445
1446 /* now gather any other geneve devices that were created in this ns */
1447 list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) {
1448 /* If geneve->dev is in the same netns, it was already added
1449 * to the list by the previous loop.
1450 */
1451 if (!net_eq(dev_net(geneve->dev), net))
1452 unregister_netdevice_queue(geneve->dev, &list);
1453 }
1454
1455 /* unregister the devices gathered above */
1456 unregister_netdevice_many(&list);
1457 rtnl_unlock();
1458 }
1459
1460 static struct pernet_operations geneve_net_ops = {
1461 .init = geneve_init_net,
1462 .exit = geneve_exit_net,
1463 .id = &geneve_net_id,
1464 .size = sizeof(struct geneve_net),
1465 };
1466
1467 static int __init geneve_init_module(void)
1468 {
1469 int rc;
1470
1471 rc = register_pernet_subsys(&geneve_net_ops);
1472 if (rc)
1473 goto out1;
1474
1475 rc = register_netdevice_notifier(&geneve_notifier_block);
1476 if (rc)
1477 goto out2;
1478
1479 rc = rtnl_link_register(&geneve_link_ops);
1480 if (rc)
1481 goto out3;
1482
1483 return 0;
1484 out3:
1485 unregister_netdevice_notifier(&geneve_notifier_block);
1486 out2:
1487 unregister_pernet_subsys(&geneve_net_ops);
1488 out1:
1489 return rc;
1490 }
1491 late_initcall(geneve_init_module);
1492
1493 static void __exit geneve_cleanup_module(void)
1494 {
1495 rtnl_link_unregister(&geneve_link_ops);
1496 unregister_netdevice_notifier(&geneve_notifier_block);
1497 unregister_pernet_subsys(&geneve_net_ops);
1498 }
1499 module_exit(geneve_cleanup_module);
1500
1501 MODULE_LICENSE("GPL");
1502 MODULE_VERSION(GENEVE_NETDEV_VER);
1503 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>");
1504 MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic");
1505 MODULE_ALIAS_RTNL_LINK("geneve");