]> git.proxmox.com Git - mirror_ovs.git/blob - lib/netdev-native-tnl.c
netdev-native-tnl: Fix a build error on NetBSD 7.0
[mirror_ovs.git] / lib / netdev-native-tnl.c
1 /*
2 * Copyright (c) 2016 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <sys/socket.h>
22 #include <net/if.h>
23 #include <netinet/in.h>
24 #include <netinet/ip6.h>
25 #include <sys/ioctl.h>
26
27 #include <errno.h>
28 #include <stdlib.h>
29 #include <sys/time.h>
30
31 #include "openvswitch/list.h"
32 #include "byte-order.h"
33 #include "csum.h"
34 #include "daemon.h"
35 #include "dirs.h"
36 #include "dpif.h"
37 #include "dp-packet.h"
38 #include "entropy.h"
39 #include "flow.h"
40 #include "hash.h"
41 #include "hmap.h"
42 #include "id-pool.h"
43 #include "netdev-provider.h"
44 #include "netdev-vport.h"
45 #include "netdev-vport-private.h"
46 #include "odp-netlink.h"
47 #include "dp-packet.h"
48 #include "ovs-router.h"
49 #include "packets.h"
50 #include "poll-loop.h"
51 #include "random.h"
52 #include "route-table.h"
53 #include "shash.h"
54 #include "socket-util.h"
55 #include "timeval.h"
56 #include "netdev-native-tnl.h"
57 #include "openvswitch/vlog.h"
58 #include "unaligned.h"
59 #include "unixctl.h"
60 #include "util.h"
61
62 VLOG_DEFINE_THIS_MODULE(native_tnl);
63 static struct vlog_rate_limit err_rl = VLOG_RATE_LIMIT_INIT(60, 5);
64
65 #define VXLAN_HLEN (sizeof(struct udp_header) + \
66 sizeof(struct vxlanhdr))
67
68 #define GENEVE_BASE_HLEN (sizeof(struct udp_header) + \
69 sizeof(struct genevehdr))
70
71 uint16_t tnl_udp_port_min = 32768;
72 uint16_t tnl_udp_port_max = 61000;
73
74 void *
75 netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
76 unsigned int *hlen)
77 {
78 void *nh;
79 struct ip_header *ip;
80 struct ovs_16aligned_ip6_hdr *ip6;
81 void *l4;
82 int l3_size;
83
84 nh = dp_packet_l3(packet);
85 ip = nh;
86 ip6 = nh;
87 l4 = dp_packet_l4(packet);
88
89 if (!nh || !l4) {
90 return NULL;
91 }
92
93 *hlen = sizeof(struct eth_header);
94
95 l3_size = dp_packet_size(packet) -
96 ((char *)nh - (char *)dp_packet_data(packet));
97
98 if (IP_VER(ip->ip_ihl_ver) == 4) {
99
100 ovs_be32 ip_src, ip_dst;
101
102 if (csum(ip, IP_IHL(ip->ip_ihl_ver) * 4)) {
103 VLOG_WARN_RL(&err_rl, "ip packet has invalid checksum");
104 return NULL;
105 }
106
107 if (ntohs(ip->ip_tot_len) > l3_size) {
108 VLOG_WARN_RL(&err_rl, "ip packet is truncated (IP length %d, actual %d)",
109 ntohs(ip->ip_tot_len), l3_size);
110 return NULL;
111 }
112 if (IP_IHL(ip->ip_ihl_ver) * 4 > sizeof(struct ip_header)) {
113 VLOG_WARN_RL(&err_rl, "ip options not supported on tunnel packets "
114 "(%d bytes)", IP_IHL(ip->ip_ihl_ver) * 4);
115 return NULL;
116 }
117
118 ip_src = get_16aligned_be32(&ip->ip_src);
119 ip_dst = get_16aligned_be32(&ip->ip_dst);
120
121 tnl->ip_src = ip_src;
122 tnl->ip_dst = ip_dst;
123 tnl->ip_tos = ip->ip_tos;
124 tnl->ip_ttl = ip->ip_ttl;
125
126 *hlen += IP_HEADER_LEN;
127
128 } else if (IP_VER(ip->ip_ihl_ver) == 6) {
129
130 memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.be16, sizeof ip6->ip6_src);
131 memcpy(tnl->ipv6_dst.s6_addr, ip6->ip6_dst.be16, sizeof ip6->ip6_dst);
132 tnl->ip_tos = 0;
133 tnl->ip_ttl = ip6->ip6_hlim;
134
135 *hlen += IPV6_HEADER_LEN;
136
137 } else {
138 VLOG_WARN_RL(&err_rl, "ipv4 packet has invalid version (%d)",
139 IP_VER(ip->ip_ihl_ver));
140 return NULL;
141 }
142
143 return l4;
144 }
145
146 /* Pushes the 'size' bytes of 'header' into the headroom of 'packet',
147 * reallocating the packet if necessary. 'header' should contain an Ethernet
148 * header, followed by an IPv4 header (without options), and an L4 header.
149 *
150 * This function sets the IP header's ip_tot_len field (which should be zeroed
151 * as part of 'header') and puts its value into '*ip_tot_size' as well. Also
152 * updates IP header checksum.
153 *
154 * Return pointer to the L4 header added to 'packet'. */
155 void *
156 netdev_tnl_push_ip_header(struct dp_packet *packet,
157 const void *header, int size, int *ip_tot_size)
158 {
159 struct eth_header *eth;
160 struct ip_header *ip;
161 struct ovs_16aligned_ip6_hdr *ip6;
162
163 eth = dp_packet_push_uninit(packet, size);
164 *ip_tot_size = dp_packet_size(packet) - sizeof (struct eth_header);
165
166 memcpy(eth, header, size);
167
168 if (netdev_tnl_is_header_ipv6(header)) {
169 ip6 = netdev_tnl_ipv6_hdr(eth);
170 *ip_tot_size -= IPV6_HEADER_LEN;
171 ip6->ip6_plen = htons(*ip_tot_size);
172 return ip6 + 1;
173 } else {
174 ip = netdev_tnl_ip_hdr(eth);
175 ip->ip_tot_len = htons(*ip_tot_size);
176 ip->ip_csum = recalc_csum16(ip->ip_csum, 0, ip->ip_tot_len);
177 *ip_tot_size -= IP_HEADER_LEN;
178 return ip + 1;
179 }
180 }
181
182 static void *
183 udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
184 unsigned int *hlen)
185 {
186 struct udp_header *udp;
187
188 udp = netdev_tnl_ip_extract_tnl_md(packet, tnl, hlen);
189 if (!udp) {
190 return NULL;
191 }
192
193 if (udp->udp_csum) {
194 uint32_t csum;
195 if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
196 csum = packet_csum_pseudoheader6(dp_packet_l3(packet));
197 } else {
198 csum = packet_csum_pseudoheader(dp_packet_l3(packet));
199 }
200
201 csum = csum_continue(csum, udp, dp_packet_size(packet) -
202 ((const unsigned char *)udp -
203 (const unsigned char *)dp_packet_l2(packet)));
204 if (csum_finish(csum)) {
205 return NULL;
206 }
207 tnl->flags |= FLOW_TNL_F_CSUM;
208 }
209
210 tnl->tp_src = udp->udp_src;
211 tnl->tp_dst = udp->udp_dst;
212
213 return udp + 1;
214 }
215
216
217 void
218 netdev_tnl_push_udp_header(struct dp_packet *packet,
219 const struct ovs_action_push_tnl *data)
220 {
221 struct udp_header *udp;
222 int ip_tot_size;
223
224 udp = netdev_tnl_push_ip_header(packet, data->header, data->header_len, &ip_tot_size);
225
226 /* set udp src port */
227 udp->udp_src = netdev_tnl_get_src_port(packet);
228 udp->udp_len = htons(ip_tot_size);
229
230 if (udp->udp_csum) {
231 uint32_t csum;
232 if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
233 csum = packet_csum_pseudoheader6(netdev_tnl_ipv6_hdr(dp_packet_data(packet)));
234 } else {
235 csum = packet_csum_pseudoheader(netdev_tnl_ip_hdr(dp_packet_data(packet)));
236 }
237
238 csum = csum_continue(csum, udp, ip_tot_size);
239 udp->udp_csum = csum_finish(csum);
240
241 if (!udp->udp_csum) {
242 udp->udp_csum = htons(0xffff);
243 }
244 }
245 }
246
247 static void *
248 udp_build_header(struct netdev_tunnel_config *tnl_cfg,
249 const struct flow *tnl_flow,
250 struct ovs_action_push_tnl *data,
251 unsigned int *hlen)
252 {
253 struct ip_header *ip;
254 struct ovs_16aligned_ip6_hdr *ip6;
255 struct udp_header *udp;
256 bool is_ipv6;
257
258 *hlen = sizeof(struct eth_header);
259
260 is_ipv6 = netdev_tnl_is_header_ipv6(data->header);
261
262 if (is_ipv6) {
263 ip6 = netdev_tnl_ipv6_hdr(data->header);
264 ip6->ip6_nxt = IPPROTO_UDP;
265 udp = (struct udp_header *) (ip6 + 1);
266 *hlen += IPV6_HEADER_LEN;
267 } else {
268 ip = netdev_tnl_ip_hdr(data->header);
269 ip->ip_proto = IPPROTO_UDP;
270 udp = (struct udp_header *) (ip + 1);
271 *hlen += IP_HEADER_LEN;
272 }
273
274 udp->udp_dst = tnl_cfg->dst_port;
275
276 if (is_ipv6 || tnl_flow->tunnel.flags & FLOW_TNL_F_CSUM) {
277 /* Write a value in now to mark that we should compute the checksum
278 * later. 0xffff is handy because it is transparent to the
279 * calculation. */
280 udp->udp_csum = htons(0xffff);
281 }
282
283 return udp + 1;
284 }
285
286 static int
287 gre_header_len(ovs_be16 flags)
288 {
289 int hlen = 4;
290
291 if (flags & htons(GRE_CSUM)) {
292 hlen += 4;
293 }
294 if (flags & htons(GRE_KEY)) {
295 hlen += 4;
296 }
297 if (flags & htons(GRE_SEQ)) {
298 hlen += 4;
299 }
300 return hlen;
301 }
302
303 static int
304 parse_gre_header(struct dp_packet *packet,
305 struct flow_tnl *tnl)
306 {
307 const struct gre_base_hdr *greh;
308 ovs_16aligned_be32 *options;
309 int hlen;
310 unsigned int ulen;
311
312 greh = netdev_tnl_ip_extract_tnl_md(packet, tnl, &ulen);
313 if (!greh) {
314 return -EINVAL;
315 }
316
317 if (greh->flags & ~(htons(GRE_CSUM | GRE_KEY | GRE_SEQ))) {
318 return -EINVAL;
319 }
320
321 if (greh->protocol != htons(ETH_TYPE_TEB)) {
322 return -EINVAL;
323 }
324
325 hlen = ulen + gre_header_len(greh->flags);
326 if (hlen > dp_packet_size(packet)) {
327 return -EINVAL;
328 }
329
330 options = (ovs_16aligned_be32 *)(greh + 1);
331 if (greh->flags & htons(GRE_CSUM)) {
332 ovs_be16 pkt_csum;
333
334 pkt_csum = csum(greh, dp_packet_size(packet) -
335 ((const unsigned char *)greh -
336 (const unsigned char *)dp_packet_l2(packet)));
337 if (pkt_csum) {
338 return -EINVAL;
339 }
340 tnl->flags = FLOW_TNL_F_CSUM;
341 options++;
342 }
343
344 if (greh->flags & htons(GRE_KEY)) {
345 tnl->tun_id = (OVS_FORCE ovs_be64) ((OVS_FORCE uint64_t)(get_16aligned_be32(options)) << 32);
346 tnl->flags |= FLOW_TNL_F_KEY;
347 options++;
348 }
349
350 if (greh->flags & htons(GRE_SEQ)) {
351 options++;
352 }
353
354 return hlen;
355 }
356
357 struct dp_packet *
358 netdev_gre_pop_header(struct dp_packet *packet)
359 {
360 struct pkt_metadata *md = &packet->md;
361 struct flow_tnl *tnl = &md->tunnel;
362 int hlen = sizeof(struct eth_header) + 4;
363
364 hlen += netdev_tnl_is_header_ipv6(dp_packet_data(packet)) ?
365 IPV6_HEADER_LEN : IP_HEADER_LEN;
366
367 pkt_metadata_init_tnl(md);
368 if (hlen > dp_packet_size(packet)) {
369 goto err;
370 }
371
372 hlen = parse_gre_header(packet, tnl);
373 if (hlen < 0) {
374 goto err;
375 }
376
377 dp_packet_reset_packet(packet, hlen);
378
379 return packet;
380 err:
381 dp_packet_delete(packet);
382 return NULL;
383 }
384
385 void
386 netdev_gre_push_header(struct dp_packet *packet,
387 const struct ovs_action_push_tnl *data)
388 {
389 struct gre_base_hdr *greh;
390 int ip_tot_size;
391
392 greh = netdev_tnl_push_ip_header(packet, data->header, data->header_len, &ip_tot_size);
393
394 if (greh->flags & htons(GRE_CSUM)) {
395 ovs_be16 *csum_opt = (ovs_be16 *) (greh + 1);
396 *csum_opt = csum(greh, ip_tot_size);
397 }
398 }
399
400 int
401 netdev_gre_build_header(const struct netdev *netdev,
402 struct ovs_action_push_tnl *data,
403 const struct flow *tnl_flow)
404 {
405 struct netdev_vport *dev = netdev_vport_cast(netdev);
406 struct netdev_tunnel_config *tnl_cfg;
407 struct ip_header *ip;
408 struct ovs_16aligned_ip6_hdr *ip6;
409 struct gre_base_hdr *greh;
410 ovs_16aligned_be32 *options;
411 int hlen;
412 bool is_ipv6;
413
414 is_ipv6 = netdev_tnl_is_header_ipv6(data->header);
415
416 /* XXX: RCUfy tnl_cfg. */
417 ovs_mutex_lock(&dev->mutex);
418 tnl_cfg = &dev->tnl_cfg;
419
420 if (is_ipv6) {
421 ip6 = netdev_tnl_ipv6_hdr(data->header);
422 ip6->ip6_nxt = IPPROTO_GRE;
423 greh = (struct gre_base_hdr *) (ip6 + 1);
424 } else {
425 ip = netdev_tnl_ip_hdr(data->header);
426 ip->ip_proto = IPPROTO_GRE;
427 greh = (struct gre_base_hdr *) (ip + 1);
428 }
429
430 greh->protocol = htons(ETH_TYPE_TEB);
431 greh->flags = 0;
432
433 options = (ovs_16aligned_be32 *) (greh + 1);
434 if (tnl_flow->tunnel.flags & FLOW_TNL_F_CSUM) {
435 greh->flags |= htons(GRE_CSUM);
436 put_16aligned_be32(options, 0);
437 options++;
438 }
439
440 if (tnl_cfg->out_key_present) {
441 greh->flags |= htons(GRE_KEY);
442 put_16aligned_be32(options, (OVS_FORCE ovs_be32)
443 ((OVS_FORCE uint64_t) tnl_flow->tunnel.tun_id >> 32));
444 options++;
445 }
446
447 ovs_mutex_unlock(&dev->mutex);
448
449 hlen = (uint8_t *) options - (uint8_t *) greh;
450
451 data->header_len = sizeof(struct eth_header) + hlen +
452 (is_ipv6 ? IPV6_HEADER_LEN : IP_HEADER_LEN);
453 data->tnl_type = OVS_VPORT_TYPE_GRE;
454 return 0;
455 }
456
457 struct dp_packet *
458 netdev_vxlan_pop_header(struct dp_packet *packet)
459 {
460 struct pkt_metadata *md = &packet->md;
461 struct flow_tnl *tnl = &md->tunnel;
462 struct vxlanhdr *vxh;
463 unsigned int hlen;
464
465 pkt_metadata_init_tnl(md);
466 if (VXLAN_HLEN > dp_packet_l4_size(packet)) {
467 goto err;
468 }
469
470 vxh = udp_extract_tnl_md(packet, tnl, &hlen);
471 if (!vxh) {
472 goto err;
473 }
474
475 if (get_16aligned_be32(&vxh->vx_flags) != htonl(VXLAN_FLAGS) ||
476 (get_16aligned_be32(&vxh->vx_vni) & htonl(0xff))) {
477 VLOG_WARN_RL(&err_rl, "invalid vxlan flags=%#x vni=%#x\n",
478 ntohl(get_16aligned_be32(&vxh->vx_flags)),
479 ntohl(get_16aligned_be32(&vxh->vx_vni)));
480 goto err;
481 }
482 tnl->tun_id = htonll(ntohl(get_16aligned_be32(&vxh->vx_vni)) >> 8);
483 tnl->flags |= FLOW_TNL_F_KEY;
484
485 dp_packet_reset_packet(packet, hlen + VXLAN_HLEN);
486
487 return packet;
488 err:
489 dp_packet_delete(packet);
490 return NULL;
491 }
492
493 int
494 netdev_vxlan_build_header(const struct netdev *netdev,
495 struct ovs_action_push_tnl *data,
496 const struct flow *tnl_flow)
497 {
498 struct netdev_vport *dev = netdev_vport_cast(netdev);
499 struct netdev_tunnel_config *tnl_cfg;
500 struct vxlanhdr *vxh;
501 unsigned int hlen;
502
503 /* XXX: RCUfy tnl_cfg. */
504 ovs_mutex_lock(&dev->mutex);
505 tnl_cfg = &dev->tnl_cfg;
506
507 vxh = udp_build_header(tnl_cfg, tnl_flow, data, &hlen);
508
509 put_16aligned_be32(&vxh->vx_flags, htonl(VXLAN_FLAGS));
510 put_16aligned_be32(&vxh->vx_vni, htonl(ntohll(tnl_flow->tunnel.tun_id) << 8));
511
512 ovs_mutex_unlock(&dev->mutex);
513 data->header_len = hlen + VXLAN_HLEN;
514 data->tnl_type = OVS_VPORT_TYPE_VXLAN;
515 return 0;
516 }
517
518 struct dp_packet *
519 netdev_geneve_pop_header(struct dp_packet *packet)
520 {
521 struct pkt_metadata *md = &packet->md;
522 struct flow_tnl *tnl = &md->tunnel;
523 struct genevehdr *gnh;
524 unsigned int hlen, opts_len, ulen;
525
526 pkt_metadata_init_tnl(md);
527 if (GENEVE_BASE_HLEN > dp_packet_l4_size(packet)) {
528 VLOG_WARN_RL(&err_rl, "geneve packet too small: min header=%u packet size=%"PRIuSIZE"\n",
529 (unsigned int)GENEVE_BASE_HLEN, dp_packet_l4_size(packet));
530 goto err;
531 }
532
533 gnh = udp_extract_tnl_md(packet, tnl, &ulen);
534 if (!gnh) {
535 goto err;
536 }
537
538 opts_len = gnh->opt_len * 4;
539 hlen = ulen + GENEVE_BASE_HLEN + opts_len;
540 if (hlen > dp_packet_size(packet)) {
541 VLOG_WARN_RL(&err_rl, "geneve packet too small: header len=%u packet size=%u\n",
542 hlen, dp_packet_size(packet));
543 goto err;
544 }
545
546 if (gnh->ver != 0) {
547 VLOG_WARN_RL(&err_rl, "unknown geneve version: %"PRIu8"\n", gnh->ver);
548 goto err;
549 }
550
551 if (gnh->proto_type != htons(ETH_TYPE_TEB)) {
552 VLOG_WARN_RL(&err_rl, "unknown geneve encapsulated protocol: %#x\n",
553 ntohs(gnh->proto_type));
554 goto err;
555 }
556
557 tnl->flags |= gnh->oam ? FLOW_TNL_F_OAM : 0;
558 tnl->tun_id = htonll(ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
559 tnl->flags |= FLOW_TNL_F_KEY;
560
561 memcpy(tnl->metadata.opts.gnv, gnh->options, opts_len);
562 tnl->metadata.present.len = opts_len;
563 tnl->flags |= FLOW_TNL_F_UDPIF;
564
565 dp_packet_reset_packet(packet, hlen);
566
567 return packet;
568 err:
569 dp_packet_delete(packet);
570 return NULL;
571 }
572
573 int
574 netdev_geneve_build_header(const struct netdev *netdev,
575 struct ovs_action_push_tnl *data,
576 const struct flow *tnl_flow)
577 {
578 struct netdev_vport *dev = netdev_vport_cast(netdev);
579 struct netdev_tunnel_config *tnl_cfg;
580 struct genevehdr *gnh;
581 int opt_len;
582 bool crit_opt;
583 unsigned int hlen;
584
585 /* XXX: RCUfy tnl_cfg. */
586 ovs_mutex_lock(&dev->mutex);
587 tnl_cfg = &dev->tnl_cfg;
588
589 gnh = udp_build_header(tnl_cfg, tnl_flow, data, &hlen);
590
591 put_16aligned_be32(&gnh->vni, htonl(ntohll(tnl_flow->tunnel.tun_id) << 8));
592
593 ovs_mutex_unlock(&dev->mutex);
594
595 opt_len = tun_metadata_to_geneve_header(&tnl_flow->tunnel,
596 gnh->options, &crit_opt);
597
598 gnh->opt_len = opt_len / 4;
599 gnh->oam = !!(tnl_flow->tunnel.flags & FLOW_TNL_F_OAM);
600 gnh->critical = crit_opt ? 1 : 0;
601 gnh->proto_type = htons(ETH_TYPE_TEB);
602
603 data->header_len = hlen + GENEVE_BASE_HLEN + opt_len;
604 data->tnl_type = OVS_VPORT_TYPE_GENEVE;
605 return 0;
606 }
607
608 \f
609 void
610 netdev_tnl_egress_port_range(struct unixctl_conn *conn, int argc,
611 const char *argv[], void *aux OVS_UNUSED)
612 {
613 int val1, val2;
614
615 if (argc < 3) {
616 struct ds ds = DS_EMPTY_INITIALIZER;
617
618 ds_put_format(&ds, "Tunnel UDP source port range: %"PRIu16"-%"PRIu16"\n",
619 tnl_udp_port_min, tnl_udp_port_max);
620
621 unixctl_command_reply(conn, ds_cstr(&ds));
622 ds_destroy(&ds);
623 return;
624 }
625
626 if (argc != 3) {
627 return;
628 }
629
630 val1 = atoi(argv[1]);
631 if (val1 <= 0 || val1 > UINT16_MAX) {
632 unixctl_command_reply(conn, "Invalid min.");
633 return;
634 }
635 val2 = atoi(argv[2]);
636 if (val2 <= 0 || val2 > UINT16_MAX) {
637 unixctl_command_reply(conn, "Invalid max.");
638 return;
639 }
640
641 if (val1 > val2) {
642 tnl_udp_port_min = val2;
643 tnl_udp_port_max = val1;
644 } else {
645 tnl_udp_port_min = val1;
646 tnl_udp_port_max = val2;
647 }
648 seq_change(tnl_conf_seq);
649
650 unixctl_command_reply(conn, "OK");
651 }