]> git.proxmox.com Git - mirror_ovs.git/blob - datapath/linux/compat/geneve.c
297593ce6e1ad3a0d74fdc299392a7a306b061e9
[mirror_ovs.git] / datapath / linux / compat / geneve.c
1 /*
2 * GENEVE: Generic Network Virtualization Encapsulation
3 *
4 * Copyright (c) 2015 Red Hat, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13 #include <linux/kernel.h>
14 #include <linux/module.h>
15 #include <linux/netdevice.h>
16 #include <linux/etherdevice.h>
17 #include <linux/hash.h>
18 #include <linux/if_link.h>
19
20 #include <net/dst_metadata.h>
21 #include <net/net_namespace.h>
22 #include <net/netns/generic.h>
23 #include <net/rtnetlink.h>
24 #include <net/geneve.h>
25 #include <net/protocol.h>
26
27 #include "gso.h"
28 #include "vport-netdev.h"
29 #include "compat.h"
30
31 #ifndef HAVE_METADATA_DST
32 #define GENEVE_NETDEV_VER "0.6"
33
34 #define GENEVE_UDP_PORT 6081
35
36 #define GENEVE_N_VID (1u << 24)
37 #define GENEVE_VID_MASK (GENEVE_N_VID - 1)
38
39 #define VNI_HASH_BITS 10
40 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
41
42 #define GENEVE_VER 0
43 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
44
45 /* per-network namespace private data for this module */
46 struct geneve_net {
47 struct list_head geneve_list;
48 struct list_head sock_list;
49 };
50
51 static int geneve_net_id;
52
53 /* Pseudo network device */
54 struct geneve_dev {
55 struct hlist_node hlist; /* vni hash table */
56 struct net *net; /* netns for packet i/o */
57 struct net_device *dev; /* netdev for geneve tunnel */
58 struct geneve_sock *sock; /* socket used for geneve tunnel */
59 u8 vni[3]; /* virtual network ID for tunnel */
60 u8 ttl; /* TTL override */
61 u8 tos; /* TOS override */
62 struct sockaddr_in remote; /* IPv4 address for link partner */
63 struct list_head next; /* geneve's per namespace list */
64 __be16 dst_port;
65 bool collect_md;
66 };
67
68 struct geneve_sock {
69 bool collect_md;
70 struct list_head list;
71 struct socket *sock;
72 struct rcu_head rcu;
73 int refcnt;
74 #ifdef HAVE_UDP_OFFLOAD
75 struct udp_offload udp_offloads;
76 #endif
77 struct hlist_head vni_list[VNI_HASH_SIZE];
78 };
79
80 static inline __u32 geneve_net_vni_hash(u8 vni[3])
81 {
82 __u32 vnid;
83
84 vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2];
85 return hash_32(vnid, VNI_HASH_BITS);
86 }
87
88 static __be64 vni_to_tunnel_id(const __u8 *vni)
89 {
90 #ifdef __BIG_ENDIAN
91 return (vni[0] << 16) | (vni[1] << 8) | vni[2];
92 #else
93 return (__force __be64)(((__force u64)vni[0] << 40) |
94 ((__force u64)vni[1] << 48) |
95 ((__force u64)vni[2] << 56));
96 #endif
97 }
98
99 static struct geneve_dev *geneve_lookup(struct geneve_sock *gs,
100 __be32 addr, u8 vni[])
101 {
102 struct hlist_head *vni_list_head;
103 struct geneve_dev *geneve;
104 __u32 hash;
105
106 /* Find the device for this VNI */
107 hash = geneve_net_vni_hash(vni);
108 vni_list_head = &gs->vni_list[hash];
109 hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) {
110 if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) &&
111 addr == geneve->remote.sin_addr.s_addr)
112 return geneve;
113 }
114 return NULL;
115 }
116
117 static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
118 {
119 return (struct genevehdr *)(udp_hdr(skb) + 1);
120 }
121
122 /* geneve receive/decap routine */
123 static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb)
124 {
125 struct genevehdr *gnvh = geneve_hdr(skb);
126 struct metadata_dst *tun_dst;
127 struct geneve_dev *geneve = NULL;
128 #ifdef HAVE_DEV_TSTATS
129 struct pcpu_sw_netstats *stats;
130 #endif
131 struct iphdr *iph;
132 u8 *vni;
133 __be32 addr;
134 int err;
135 union {
136 struct metadata_dst dst;
137 char buf[sizeof(struct metadata_dst) + 256];
138 } buf;
139
140 iph = ip_hdr(skb); /* outer IP header... */
141
142 if (gs->collect_md) {
143 static u8 zero_vni[3];
144
145 vni = zero_vni;
146 addr = 0;
147 } else {
148 vni = gnvh->vni;
149 addr = iph->saddr;
150 }
151
152 geneve = geneve_lookup(gs, addr, vni);
153 if (!geneve)
154 goto drop;
155
156 if (ip_tunnel_collect_metadata() || gs->collect_md) {
157 __be16 flags;
158
159 flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
160 (gnvh->oam ? TUNNEL_OAM : 0) |
161 (gnvh->critical ? TUNNEL_CRIT_OPT : 0);
162
163 tun_dst = &buf.dst;
164 ovs_udp_tun_rx_dst(&tun_dst->u.tun_info, skb, AF_INET, flags,
165 vni_to_tunnel_id(gnvh->vni), gnvh->opt_len * 4);
166 /* Update tunnel dst according to Geneve options. */
167 ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
168 gnvh->options, gnvh->opt_len * 4);
169 } else {
170 /* Drop packets w/ critical options,
171 * since we don't support any...
172 */
173 tun_dst = NULL;
174 if (gnvh->critical)
175 goto drop;
176 }
177
178 skb_reset_mac_header(skb);
179 skb_scrub_packet(skb, !net_eq(geneve->net, dev_net(geneve->dev)));
180 skb->protocol = eth_type_trans(skb, geneve->dev);
181 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
182
183 if (tun_dst)
184 ovs_skb_dst_set(skb, &tun_dst->dst);
185 else
186 goto drop;
187 /* Ignore packet loops (and multicast echo) */
188 if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr))
189 goto drop;
190
191 skb_reset_network_header(skb);
192
193 err = IP_ECN_decapsulate(iph, skb);
194
195 if (unlikely(err)) {
196 if (err > 1) {
197 ++geneve->dev->stats.rx_frame_errors;
198 ++geneve->dev->stats.rx_errors;
199 goto drop;
200 }
201 }
202
203 #ifdef HAVE_DEV_TSTATS
204 stats = this_cpu_ptr((struct pcpu_sw_netstats __percpu *)geneve->dev->tstats);
205 u64_stats_update_begin(&stats->syncp);
206 stats->rx_packets++;
207 stats->rx_bytes += skb->len;
208 u64_stats_update_end(&stats->syncp);
209 #endif
210 netdev_port_receive(skb, &tun_dst->u.tun_info);
211 return;
212 drop:
213 /* Consume bad packet */
214 kfree_skb(skb);
215 }
216
217 #ifdef HAVE_DEV_TSTATS
218 /* Setup stats when device is created */
219 static int geneve_init(struct net_device *dev)
220 {
221 dev->tstats = (typeof(dev->tstats)) netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
222 if (!dev->tstats)
223 return -ENOMEM;
224
225 return 0;
226 }
227
228 static void geneve_uninit(struct net_device *dev)
229 {
230 free_percpu(dev->tstats);
231 }
232 #endif
233
234 /* Callback from net/ipv4/udp.c to receive packets */
235 static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
236 {
237 struct genevehdr *geneveh;
238 struct geneve_sock *gs;
239 int opts_len;
240
241 /* Need Geneve and inner Ethernet header to be present */
242 if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN)))
243 goto error;
244
245 /* Return packets with reserved bits set */
246 geneveh = geneve_hdr(skb);
247 if (unlikely(geneveh->ver != GENEVE_VER))
248 goto error;
249
250 if (unlikely(geneveh->proto_type != htons(ETH_P_TEB)))
251 goto error;
252
253 opts_len = geneveh->opt_len * 4;
254 if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
255 htons(ETH_P_TEB)))
256 goto drop;
257
258 gs = rcu_dereference_sk_user_data(sk);
259 if (!gs)
260 goto drop;
261
262 geneve_rx(gs, skb);
263 return 0;
264
265 drop:
266 /* Consume bad packet */
267 kfree_skb(skb);
268 return 0;
269
270 error:
271 /* Let the UDP layer deal with the skb */
272 return 1;
273 }
274
275 static struct socket *geneve_create_sock(struct net *net, bool ipv6,
276 __be16 port)
277 {
278 struct socket *sock;
279 struct udp_port_cfg udp_conf;
280 int err;
281
282 memset(&udp_conf, 0, sizeof(udp_conf));
283
284 if (ipv6) {
285 udp_conf.family = AF_INET6;
286 } else {
287 udp_conf.family = AF_INET;
288 udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
289 }
290
291 udp_conf.local_udp_port = port;
292
293 /* Open UDP socket */
294 err = udp_sock_create(net, &udp_conf, &sock);
295 if (err < 0)
296 return ERR_PTR(err);
297
298 return sock;
299 }
300
301 #ifdef HAVE_UDP_OFFLOAD
302 static void geneve_notify_add_rx_port(struct geneve_sock *gs)
303 {
304 struct sock *sk = gs->sock->sk;
305 sa_family_t sa_family = sk->sk_family;
306 int err;
307
308 if (sa_family == AF_INET) {
309 err = udp_add_offload(&gs->udp_offloads);
310 if (err)
311 pr_warn("geneve: udp_add_offload failed with status %d\n",
312 err);
313 }
314 }
315
316 static int geneve_hlen(struct genevehdr *gh)
317 {
318 return sizeof(*gh) + gh->opt_len * 4;
319 }
320
321 #ifndef HAVE_UDP_OFFLOAD_ARG_UOFF
322 static struct sk_buff **geneve_gro_receive(struct sk_buff **head,
323 struct sk_buff *skb)
324 #else
325 static struct sk_buff **geneve_gro_receive(struct sk_buff **head,
326 struct sk_buff *skb,
327 struct udp_offload *uoff)
328 #endif
329 {
330 struct sk_buff *p, **pp = NULL;
331 struct genevehdr *gh, *gh2;
332 unsigned int hlen, gh_len, off_gnv;
333 const struct packet_offload *ptype;
334 __be16 type;
335 int flush = 1;
336
337 off_gnv = skb_gro_offset(skb);
338 hlen = off_gnv + sizeof(*gh);
339 gh = skb_gro_header_fast(skb, off_gnv);
340 if (skb_gro_header_hard(skb, hlen)) {
341 gh = skb_gro_header_slow(skb, hlen, off_gnv);
342 if (unlikely(!gh))
343 goto out;
344 }
345
346 if (gh->ver != GENEVE_VER || gh->oam)
347 goto out;
348 gh_len = geneve_hlen(gh);
349
350 hlen = off_gnv + gh_len;
351 if (skb_gro_header_hard(skb, hlen)) {
352 gh = skb_gro_header_slow(skb, hlen, off_gnv);
353 if (unlikely(!gh))
354 goto out;
355 }
356
357 flush = 0;
358
359 for (p = *head; p; p = p->next) {
360 if (!NAPI_GRO_CB(p)->same_flow)
361 continue;
362
363 gh2 = (struct genevehdr *)(p->data + off_gnv);
364 if (gh->opt_len != gh2->opt_len ||
365 memcmp(gh, gh2, gh_len)) {
366 NAPI_GRO_CB(p)->same_flow = 0;
367 continue;
368 }
369 }
370
371 type = gh->proto_type;
372
373 rcu_read_lock();
374 ptype = gro_find_receive_by_type(type);
375 if (!ptype) {
376 flush = 1;
377 goto out_unlock;
378 }
379
380 skb_gro_pull(skb, gh_len);
381 skb_gro_postpull_rcsum(skb, gh, gh_len);
382 pp = ptype->callbacks.gro_receive(head, skb);
383
384 out_unlock:
385 rcu_read_unlock();
386 out:
387 NAPI_GRO_CB(skb)->flush |= flush;
388
389 return pp;
390 }
391
392 #ifndef HAVE_UDP_OFFLOAD_ARG_UOFF
393 static int geneve_gro_complete(struct sk_buff *skb, int nhoff)
394 #else
395 static int geneve_gro_complete(struct sk_buff *skb, int nhoff,
396 struct udp_offload *uoff)
397 #endif
398 {
399 struct genevehdr *gh;
400 struct packet_offload *ptype;
401 __be16 type;
402 int gh_len;
403 int err = -ENOSYS;
404
405 udp_tunnel_gro_complete(skb, nhoff);
406
407 gh = (struct genevehdr *)(skb->data + nhoff);
408 gh_len = geneve_hlen(gh);
409 type = gh->proto_type;
410
411 rcu_read_lock();
412 ptype = gro_find_complete_by_type(type);
413 if (ptype)
414 err = ptype->callbacks.gro_complete(skb, nhoff + gh_len);
415
416 rcu_read_unlock();
417 return err;
418 }
419 #endif
420
421 /* Create new listen socket if needed */
422 static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
423 bool ipv6)
424 {
425 struct geneve_net *gn = net_generic(net, geneve_net_id);
426 struct geneve_sock *gs;
427 struct socket *sock;
428 struct udp_tunnel_sock_cfg tunnel_cfg;
429 int h;
430
431 gs = kzalloc(sizeof(*gs), GFP_KERNEL);
432 if (!gs)
433 return ERR_PTR(-ENOMEM);
434
435 sock = geneve_create_sock(net, ipv6, port);
436 if (IS_ERR(sock)) {
437 kfree(gs);
438 return ERR_CAST(sock);
439 }
440
441 gs->sock = sock;
442 gs->refcnt = 1;
443 for (h = 0; h < VNI_HASH_SIZE; ++h)
444 INIT_HLIST_HEAD(&gs->vni_list[h]);
445
446 /* Initialize the geneve udp offloads structure */
447 #ifdef HAVE_UDP_OFFLOAD
448 gs->udp_offloads.port = port;
449 gs->udp_offloads.callbacks.gro_receive = geneve_gro_receive;
450 gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete;
451 geneve_notify_add_rx_port(gs);
452 #endif
453 /* Mark socket as an encapsulation socket */
454 tunnel_cfg.sk_user_data = gs;
455 tunnel_cfg.encap_type = 1;
456 tunnel_cfg.encap_rcv = geneve_udp_encap_recv;
457 tunnel_cfg.encap_destroy = NULL;
458 setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
459 list_add(&gs->list, &gn->sock_list);
460 return gs;
461 }
462
463 static void geneve_notify_del_rx_port(struct geneve_sock *gs)
464 {
465 #ifdef HAVE_UDP_OFFLOAD
466 struct sock *sk = gs->sock->sk;
467 sa_family_t sa_family = sk->sk_family;
468
469 if (sa_family == AF_INET)
470 udp_del_offload(&gs->udp_offloads);
471 #endif
472 }
473
474 static void free_gs_rcu(struct rcu_head *rcu)
475 {
476 struct geneve_sock *gs = container_of(rcu, struct geneve_sock, rcu);
477
478 kfree(gs);
479 }
480
481 static void geneve_sock_release(struct geneve_sock *gs)
482 {
483 if (--gs->refcnt)
484 return;
485
486 list_del(&gs->list);
487 geneve_notify_del_rx_port(gs);
488 udp_tunnel_sock_release(gs->sock);
489 call_rcu(&gs->rcu, free_gs_rcu);
490 }
491
492 static struct geneve_sock *geneve_find_sock(struct geneve_net *gn,
493 __be16 dst_port)
494 {
495 struct geneve_sock *gs;
496
497 list_for_each_entry(gs, &gn->sock_list, list) {
498 if (inet_sport(gs->sock->sk) == dst_port &&
499 inet_sk(gs->sock->sk)->sk.sk_family == AF_INET) {
500 return gs;
501 }
502 }
503 return NULL;
504 }
505
506 static int geneve_open(struct net_device *dev)
507 {
508 struct geneve_dev *geneve = netdev_priv(dev);
509 struct net *net = geneve->net;
510 struct geneve_net *gn = net_generic(net, geneve_net_id);
511 struct geneve_sock *gs;
512 __u32 hash;
513
514 gs = geneve_find_sock(gn, geneve->dst_port);
515 if (gs) {
516 gs->refcnt++;
517 goto out;
518 }
519
520 gs = geneve_socket_create(net, geneve->dst_port, false);
521 if (IS_ERR(gs))
522 return PTR_ERR(gs);
523
524 out:
525 gs->collect_md = geneve->collect_md;
526 geneve->sock = gs;
527
528 hash = geneve_net_vni_hash(geneve->vni);
529 hlist_add_head_rcu(&geneve->hlist, &gs->vni_list[hash]);
530 return 0;
531 }
532
533 static int geneve_stop(struct net_device *dev)
534 {
535 struct geneve_dev *geneve = netdev_priv(dev);
536 struct geneve_sock *gs = geneve->sock;
537
538 if (!hlist_unhashed(&geneve->hlist))
539 hlist_del_rcu(&geneve->hlist);
540 geneve_sock_release(gs);
541 return 0;
542 }
543
544 static int geneve_build_skb(struct rtable *rt, struct sk_buff *skb,
545 __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt,
546 bool csum)
547 {
548 struct genevehdr *gnvh;
549 int min_headroom;
550 int err;
551
552 min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
553 + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr)
554 + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
555 err = skb_cow_head(skb, min_headroom);
556 if (unlikely(err)) {
557 kfree_skb(skb);
558 goto free_rt;
559 }
560
561 skb = vlan_hwaccel_push_inside(skb);
562 if (!skb) {
563 err = -ENOMEM;
564 goto free_rt;
565 }
566
567 skb = udp_tunnel_handle_offloads(skb, csum, 0, false);
568 if (IS_ERR(skb)) {
569 err = PTR_ERR(skb);
570 goto free_rt;
571 }
572 gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
573 gnvh->ver = GENEVE_VER;
574 gnvh->opt_len = opt_len / 4;
575 gnvh->oam = !!(tun_flags & TUNNEL_OAM);
576 gnvh->critical = !!(tun_flags & TUNNEL_CRIT_OPT);
577 gnvh->rsvd1 = 0;
578 memcpy(gnvh->vni, vni, 3);
579 gnvh->proto_type = htons(ETH_P_TEB);
580 gnvh->rsvd2 = 0;
581 memcpy(gnvh->options, opt, opt_len);
582
583 ovs_skb_set_inner_protocol(skb, htons(ETH_P_TEB));
584 return 0;
585
586 free_rt:
587 ip_rt_put(rt);
588 return err;
589 }
590
591 static struct rtable *geneve_get_rt(struct sk_buff *skb,
592 struct net_device *dev,
593 struct flowi4 *fl4,
594 struct ip_tunnel_info *info)
595 {
596 struct geneve_dev *geneve = netdev_priv(dev);
597 struct rtable *rt = NULL;
598 __u8 tos;
599
600 memset(fl4, 0, sizeof(*fl4));
601 fl4->flowi4_mark = skb->mark;
602 fl4->flowi4_proto = IPPROTO_UDP;
603
604 if (info) {
605 fl4->daddr = info->key.u.ipv4.dst;
606 fl4->saddr = info->key.u.ipv4.src;
607 fl4->flowi4_tos = RT_TOS(info->key.tos);
608 } else {
609 tos = geneve->tos;
610 if (tos == 1) {
611 const struct iphdr *iip = ip_hdr(skb);
612
613 tos = ip_tunnel_get_dsfield(iip, skb);
614 }
615
616 fl4->flowi4_tos = RT_TOS(tos);
617 fl4->daddr = geneve->remote.sin_addr.s_addr;
618 }
619
620 rt = ip_route_output_key(geneve->net, fl4);
621 if (IS_ERR(rt)) {
622 netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr);
623 dev->stats.tx_carrier_errors++;
624 return rt;
625 }
626 if (rt_dst(rt).dev == dev) { /* is this necessary? */
627 netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr);
628 dev->stats.collisions++;
629 ip_rt_put(rt);
630 return ERR_PTR(-EINVAL);
631 }
632 return rt;
633 }
634
635 /* Convert 64 bit tunnel ID to 24 bit VNI. */
636 static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
637 {
638 #ifdef __BIG_ENDIAN
639 vni[0] = (__force __u8)(tun_id >> 16);
640 vni[1] = (__force __u8)(tun_id >> 8);
641 vni[2] = (__force __u8)tun_id;
642 #else
643 vni[0] = (__force __u8)((__force u64)tun_id >> 40);
644 vni[1] = (__force __u8)((__force u64)tun_id >> 48);
645 vni[2] = (__force __u8)((__force u64)tun_id >> 56);
646 #endif
647 }
648
649 netdev_tx_t rpl_geneve_xmit(struct sk_buff *skb)
650 {
651 struct net_device *dev = skb->dev;
652 struct geneve_dev *geneve = netdev_priv(dev);
653 struct geneve_sock *gs = geneve->sock;
654 struct ip_tunnel_info *info = NULL;
655 struct rtable *rt = NULL;
656 const struct iphdr *iip; /* interior IP header */
657 struct flowi4 fl4;
658 __u8 tos, ttl;
659 __be16 sport;
660 bool udp_csum;
661 __be16 df;
662 int err;
663
664 if (geneve->collect_md) {
665 info = skb_tunnel_info(skb);
666 if (unlikely(info && !(info->mode & IP_TUNNEL_INFO_TX))) {
667 netdev_dbg(dev, "no tunnel metadata\n");
668 goto tx_error;
669 }
670 if (info && ip_tunnel_info_af(info) != AF_INET)
671 goto tx_error;
672 }
673
674 rt = geneve_get_rt(skb, dev, &fl4, info);
675 if (IS_ERR(rt)) {
676 netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr);
677 dev->stats.tx_carrier_errors++;
678 goto tx_error;
679 }
680
681 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
682 skb_reset_mac_header(skb);
683
684 iip = ip_hdr(skb);
685
686 if (info) {
687 const struct ip_tunnel_key *key = &info->key;
688 u8 *opts = NULL;
689 u8 vni[3];
690
691 tunnel_id_to_vni(key->tun_id, vni);
692 if (key->tun_flags & TUNNEL_GENEVE_OPT)
693 opts = ip_tunnel_info_opts(info);
694
695 udp_csum = !!(key->tun_flags & TUNNEL_CSUM);
696 err = geneve_build_skb(rt, skb, key->tun_flags, vni,
697 info->options_len, opts, udp_csum);
698 if (unlikely(err))
699 goto err;
700
701 tos = ip_tunnel_ecn_encap(key->tos, iip, skb);
702 ttl = key->ttl;
703 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
704 } else {
705 udp_csum = false;
706 err = geneve_build_skb(rt, skb, 0, geneve->vni,
707 0, NULL, udp_csum);
708 if (unlikely(err))
709 goto err;
710
711 tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, iip, skb);
712 ttl = geneve->ttl;
713 if (!ttl && IN_MULTICAST(ntohl(fl4.daddr)))
714 ttl = 1;
715 ttl = ttl ? : ip4_dst_hoplimit(&rt_dst(rt));
716 df = 0;
717 }
718 err = udp_tunnel_xmit_skb(rt, gs->sock->sk, skb, fl4.saddr, fl4.daddr,
719 tos, ttl, df, sport, geneve->dst_port,
720 !net_eq(geneve->net, dev_net(geneve->dev)),
721 !udp_csum);
722
723 iptunnel_xmit_stats(err, &dev->stats, (struct pcpu_sw_netstats __percpu *) dev->tstats);
724 return NETDEV_TX_OK;
725
726 tx_error:
727 dev_kfree_skb(skb);
728 err:
729 dev->stats.tx_errors++;
730 return NETDEV_TX_OK;
731 }
732 EXPORT_SYMBOL(rpl_geneve_xmit);
733
734 static netdev_tx_t geneve_dev_xmit(struct sk_buff *skb, struct net_device *dev)
735 {
736 /* Drop All packets coming from networking stack. OVS-CB is
737 * not initialized for these packets.
738 */
739
740 dev_kfree_skb(skb);
741 dev->stats.tx_dropped++;
742 return NETDEV_TX_OK;
743 }
744
745 static const struct net_device_ops geneve_netdev_ops = {
746 #ifdef HAVE_DEV_TSTATS
747 .ndo_init = geneve_init,
748 .ndo_uninit = geneve_uninit,
749 .ndo_get_stats64 = ip_tunnel_get_stats64,
750 #endif
751 .ndo_open = geneve_open,
752 .ndo_stop = geneve_stop,
753 .ndo_start_xmit = geneve_dev_xmit,
754 .ndo_change_mtu = eth_change_mtu,
755 .ndo_validate_addr = eth_validate_addr,
756 .ndo_set_mac_address = eth_mac_addr,
757 };
758
759 static void geneve_get_drvinfo(struct net_device *dev,
760 struct ethtool_drvinfo *drvinfo)
761 {
762 strlcpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version));
763 strlcpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver));
764 }
765
766 static const struct ethtool_ops geneve_ethtool_ops = {
767 .get_drvinfo = geneve_get_drvinfo,
768 .get_link = ethtool_op_get_link,
769 };
770
771 /* Info for udev, that this is a virtual tunnel endpoint */
772 static struct device_type geneve_type = {
773 .name = "geneve",
774 };
775
776 /* Initialize the device structure. */
777 static void geneve_setup(struct net_device *dev)
778 {
779 ether_setup(dev);
780
781 dev->netdev_ops = &geneve_netdev_ops;
782 dev->ethtool_ops = &geneve_ethtool_ops;
783 dev->destructor = free_netdev;
784
785 SET_NETDEV_DEVTYPE(dev, &geneve_type);
786
787 dev->features |= NETIF_F_LLTX;
788 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
789 dev->features |= NETIF_F_RXCSUM;
790 dev->features |= NETIF_F_GSO_SOFTWARE;
791
792 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39)
793 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
794 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
795 #endif
796 #if 0
797 /* Not required */
798 netif_keep_dst(dev);
799 #endif
800 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
801 eth_hw_addr_random(dev);
802 }
803
804 static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
805 [IFLA_GENEVE_ID] = { .type = NLA_U32 },
806 [IFLA_GENEVE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
807 [IFLA_GENEVE_TTL] = { .type = NLA_U8 },
808 [IFLA_GENEVE_TOS] = { .type = NLA_U8 },
809 [IFLA_GENEVE_PORT] = { .type = NLA_U16 },
810 [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG },
811 };
812
813 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[])
814 {
815 if (tb[IFLA_ADDRESS]) {
816 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
817 return -EINVAL;
818
819 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
820 return -EADDRNOTAVAIL;
821 }
822
823 if (!data)
824 return -EINVAL;
825
826 if (data[IFLA_GENEVE_ID]) {
827 __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]);
828
829 if (vni >= GENEVE_VID_MASK)
830 return -ERANGE;
831 }
832
833 return 0;
834 }
835
836 static struct geneve_dev *geneve_find_dev(struct geneve_net *gn,
837 __be16 dst_port,
838 __be32 rem_addr,
839 u8 vni[],
840 bool *tun_on_same_port,
841 bool *tun_collect_md)
842 {
843 struct geneve_dev *geneve, *t;
844
845 *tun_on_same_port = false;
846 *tun_collect_md = false;
847 t = NULL;
848 list_for_each_entry(geneve, &gn->geneve_list, next) {
849 if (geneve->dst_port == dst_port) {
850 *tun_collect_md = geneve->collect_md;
851 *tun_on_same_port = true;
852 }
853 if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) &&
854 rem_addr == geneve->remote.sin_addr.s_addr &&
855 dst_port == geneve->dst_port)
856 t = geneve;
857 }
858 return t;
859 }
860
861 static int geneve_configure(struct net *net, struct net_device *dev,
862 __be32 rem_addr, __u32 vni, __u8 ttl, __u8 tos,
863 __be16 dst_port, bool metadata)
864 {
865 struct geneve_net *gn = net_generic(net, geneve_net_id);
866 struct geneve_dev *t, *geneve = netdev_priv(dev);
867 bool tun_collect_md, tun_on_same_port;
868 int err;
869
870 if (metadata) {
871 if (rem_addr || vni || tos || ttl)
872 return -EINVAL;
873 }
874
875 geneve->net = net;
876 geneve->dev = dev;
877
878 geneve->vni[0] = (vni & 0x00ff0000) >> 16;
879 geneve->vni[1] = (vni & 0x0000ff00) >> 8;
880 geneve->vni[2] = vni & 0x000000ff;
881
882 geneve->remote.sin_addr.s_addr = rem_addr;
883 if (IN_MULTICAST(ntohl(geneve->remote.sin_addr.s_addr)))
884 return -EINVAL;
885
886 geneve->ttl = ttl;
887 geneve->tos = tos;
888 geneve->dst_port = dst_port;
889 geneve->collect_md = metadata;
890
891 t = geneve_find_dev(gn, dst_port, rem_addr, geneve->vni,
892 &tun_on_same_port, &tun_collect_md);
893 if (t)
894 return -EBUSY;
895
896 if (metadata) {
897 if (tun_on_same_port)
898 return -EPERM;
899 } else {
900 if (tun_collect_md)
901 return -EPERM;
902 }
903
904 err = register_netdevice(dev);
905 if (err)
906 return err;
907
908 list_add(&geneve->next, &gn->geneve_list);
909 return 0;
910 }
911
912 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39)
913 static int geneve_newlink(struct net_device *dev,
914 struct nlattr *tb[], struct nlattr *data[])
915 {
916 struct net *net = &init_net;
917 #else
918 static int geneve_newlink(struct net *net, struct net_device *dev,
919 struct nlattr *tb[], struct nlattr *data[])
920 {
921 #endif
922 __be16 dst_port = htons(GENEVE_UDP_PORT);
923 __u8 ttl = 0, tos = 0;
924 bool metadata = false;
925 __be32 rem_addr;
926 __u32 vni;
927
928 if (!data[IFLA_GENEVE_ID] || !data[IFLA_GENEVE_REMOTE])
929 return -EINVAL;
930
931 vni = nla_get_u32(data[IFLA_GENEVE_ID]);
932 rem_addr = nla_get_in_addr(data[IFLA_GENEVE_REMOTE]);
933
934 if (data[IFLA_GENEVE_TTL])
935 ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
936
937 if (data[IFLA_GENEVE_TOS])
938 tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
939
940 if (data[IFLA_GENEVE_PORT])
941 dst_port = nla_get_be16(data[IFLA_GENEVE_PORT]);
942
943 if (data[IFLA_GENEVE_COLLECT_METADATA])
944 metadata = true;
945
946 return geneve_configure(net, dev, rem_addr, vni,
947 ttl, tos, dst_port, metadata);
948 }
949
950 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39)
951 static void geneve_dellink(struct net_device *dev)
952 #else
953 static void geneve_dellink(struct net_device *dev, struct list_head *head)
954 #endif
955 {
956 struct geneve_dev *geneve = netdev_priv(dev);
957
958 list_del(&geneve->next);
959 unregister_netdevice_queue(dev, head);
960 }
961
962 static size_t geneve_get_size(const struct net_device *dev)
963 {
964 return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */
965 nla_total_size(sizeof(struct in_addr)) + /* IFLA_GENEVE_REMOTE */
966 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */
967 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */
968 nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */
969 nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */
970 0;
971 }
972
973 static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
974 {
975 struct geneve_dev *geneve = netdev_priv(dev);
976 __u32 vni;
977
978 vni = (geneve->vni[0] << 16) | (geneve->vni[1] << 8) | geneve->vni[2];
979 if (nla_put_u32(skb, IFLA_GENEVE_ID, vni))
980 goto nla_put_failure;
981
982 if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE,
983 geneve->remote.sin_addr.s_addr))
984 goto nla_put_failure;
985
986 if (nla_put_u8(skb, IFLA_GENEVE_TTL, geneve->ttl) ||
987 nla_put_u8(skb, IFLA_GENEVE_TOS, geneve->tos))
988 goto nla_put_failure;
989
990 if (nla_put_be16(skb, IFLA_GENEVE_PORT, geneve->dst_port))
991 goto nla_put_failure;
992
993 if (geneve->collect_md) {
994 if (nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA))
995 goto nla_put_failure;
996 }
997
998 return 0;
999
1000 nla_put_failure:
1001 return -EMSGSIZE;
1002 }
1003
1004 static struct rtnl_link_ops geneve_link_ops __read_mostly = {
1005 .kind = "ovs_geneve",
1006 .maxtype = IFLA_GENEVE_MAX,
1007 .policy = geneve_policy,
1008 .priv_size = sizeof(struct geneve_dev),
1009 .setup = geneve_setup,
1010 .validate = geneve_validate,
1011 .newlink = geneve_newlink,
1012 .dellink = geneve_dellink,
1013 .get_size = geneve_get_size,
1014 .fill_info = geneve_fill_info,
1015 };
1016
1017 struct net_device *rpl_geneve_dev_create_fb(struct net *net, const char *name,
1018 u8 name_assign_type, u16 dst_port)
1019 {
1020 struct nlattr *tb[IFLA_MAX + 1];
1021 struct net_device *dev;
1022 int err;
1023
1024 memset(tb, 0, sizeof(tb));
1025 dev = rtnl_create_link(net, (char *) name, name_assign_type,
1026 &geneve_link_ops, tb);
1027 if (IS_ERR(dev))
1028 return dev;
1029
1030 err = geneve_configure(net, dev, 0, 0, 0, 0, htons(dst_port), true);
1031 if (err) {
1032 free_netdev(dev);
1033 return ERR_PTR(err);
1034 }
1035 return dev;
1036 }
1037 EXPORT_SYMBOL_GPL(rpl_geneve_dev_create_fb);
1038
1039 static __net_init int geneve_init_net(struct net *net)
1040 {
1041 struct geneve_net *gn = net_generic(net, geneve_net_id);
1042
1043 INIT_LIST_HEAD(&gn->geneve_list);
1044 INIT_LIST_HEAD(&gn->sock_list);
1045 return 0;
1046 }
1047
1048 static void __net_exit geneve_exit_net(struct net *net)
1049 {
1050 struct geneve_net *gn = net_generic(net, geneve_net_id);
1051 struct geneve_dev *geneve, *next;
1052 struct net_device *dev, *aux;
1053 LIST_HEAD(list);
1054
1055 rtnl_lock();
1056
1057 /* gather any geneve devices that were moved into this ns */
1058 for_each_netdev_safe(net, dev, aux)
1059 if (dev->rtnl_link_ops == &geneve_link_ops)
1060 unregister_netdevice_queue(dev, &list);
1061
1062 /* now gather any other geneve devices that were created in this ns */
1063 list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) {
1064 /* If geneve->dev is in the same netns, it was already added
1065 * to the list by the previous loop.
1066 */
1067 if (!net_eq(dev_net(geneve->dev), net))
1068 unregister_netdevice_queue(geneve->dev, &list);
1069 }
1070
1071 /* unregister the devices gathered above */
1072 unregister_netdevice_many(&list);
1073 rtnl_unlock();
1074 }
1075
1076 static struct pernet_operations geneve_net_ops = {
1077 .init = geneve_init_net,
1078 .exit = geneve_exit_net,
1079 .id = &geneve_net_id,
1080 .size = sizeof(struct geneve_net),
1081 };
1082
1083 DEFINE_COMPAT_PNET_REG_FUNC(device)
1084 int rpl_geneve_init_module(void)
1085 {
1086 int rc;
1087
1088 rc = register_pernet_subsys(&geneve_net_ops);
1089 if (rc)
1090 goto out1;
1091
1092 rc = rtnl_link_register(&geneve_link_ops);
1093 if (rc)
1094 goto out2;
1095
1096 pr_info("Geneve tunneling driver\n");
1097 return 0;
1098 out2:
1099 unregister_pernet_subsys(&geneve_net_ops);
1100 out1:
1101 return rc;
1102 }
1103
1104 void rpl_geneve_cleanup_module(void)
1105 {
1106 rtnl_link_unregister(&geneve_link_ops);
1107 unregister_pernet_subsys(&geneve_net_ops);
1108 }
1109 #endif