]> git.proxmox.com Git - mirror_ovs.git/blob - lib/netdev-native-tnl.c
netdev: Return number of packet from netdev_pop_header()
[mirror_ovs.git] / lib / netdev-native-tnl.c
1 /*
2 * Copyright (c) 2016 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <sys/socket.h>
22 #include <net/if.h>
23 #include <netinet/ip6.h>
24 #include <sys/ioctl.h>
25
26 #include <errno.h>
27 #include <stdlib.h>
28 #include <sys/time.h>
29
30 #include "openvswitch/list.h"
31 #include "byte-order.h"
32 #include "csum.h"
33 #include "daemon.h"
34 #include "dirs.h"
35 #include "dpif.h"
36 #include "dp-packet.h"
37 #include "entropy.h"
38 #include "flow.h"
39 #include "hash.h"
40 #include "hmap.h"
41 #include "id-pool.h"
42 #include "netdev-provider.h"
43 #include "netdev-vport.h"
44 #include "netdev-vport-private.h"
45 #include "odp-netlink.h"
46 #include "dp-packet.h"
47 #include "ovs-router.h"
48 #include "packets.h"
49 #include "poll-loop.h"
50 #include "random.h"
51 #include "route-table.h"
52 #include "shash.h"
53 #include "socket-util.h"
54 #include "timeval.h"
55 #include "netdev-native-tnl.h"
56 #include "openvswitch/vlog.h"
57 #include "unaligned.h"
58 #include "unixctl.h"
59 #include "util.h"
60
61 VLOG_DEFINE_THIS_MODULE(native_tnl);
62 static struct vlog_rate_limit err_rl = VLOG_RATE_LIMIT_INIT(60, 5);
63
64 #define VXLAN_HLEN (sizeof(struct udp_header) + \
65 sizeof(struct vxlanhdr))
66
67 #define GENEVE_BASE_HLEN (sizeof(struct udp_header) + \
68 sizeof(struct genevehdr))
69
70 uint16_t tnl_udp_port_min = 32768;
71 uint16_t tnl_udp_port_max = 61000;
72
73 void *
74 netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
75 unsigned int *hlen)
76 {
77 void *nh;
78 struct ip_header *ip;
79 struct ovs_16aligned_ip6_hdr *ip6;
80 void *l4;
81 int l3_size;
82
83 nh = dp_packet_l3(packet);
84 ip = nh;
85 ip6 = nh;
86 l4 = dp_packet_l4(packet);
87
88 if (!nh || !l4) {
89 return NULL;
90 }
91
92 *hlen = sizeof(struct eth_header);
93
94 l3_size = dp_packet_size(packet) -
95 ((char *)nh - (char *)dp_packet_data(packet));
96
97 if (IP_VER(ip->ip_ihl_ver) == 4) {
98
99 ovs_be32 ip_src, ip_dst;
100
101 if (csum(ip, IP_IHL(ip->ip_ihl_ver) * 4)) {
102 VLOG_WARN_RL(&err_rl, "ip packet has invalid checksum");
103 return NULL;
104 }
105
106 if (ntohs(ip->ip_tot_len) > l3_size) {
107 VLOG_WARN_RL(&err_rl, "ip packet is truncated (IP length %d, actual %d)",
108 ntohs(ip->ip_tot_len), l3_size);
109 return NULL;
110 }
111 if (IP_IHL(ip->ip_ihl_ver) * 4 > sizeof(struct ip_header)) {
112 VLOG_WARN_RL(&err_rl, "ip options not supported on tunnel packets "
113 "(%d bytes)", IP_IHL(ip->ip_ihl_ver) * 4);
114 return NULL;
115 }
116
117 ip_src = get_16aligned_be32(&ip->ip_src);
118 ip_dst = get_16aligned_be32(&ip->ip_dst);
119
120 tnl->ip_src = ip_src;
121 tnl->ip_dst = ip_dst;
122 tnl->ip_tos = ip->ip_tos;
123 tnl->ip_ttl = ip->ip_ttl;
124
125 *hlen += IP_HEADER_LEN;
126
127 } else if (IP_VER(ip->ip_ihl_ver) == 6) {
128
129 memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.be16, sizeof ip6->ip6_src);
130 memcpy(tnl->ipv6_dst.s6_addr, ip6->ip6_dst.be16, sizeof ip6->ip6_dst);
131 tnl->ip_tos = 0;
132 tnl->ip_ttl = ip6->ip6_hlim;
133
134 *hlen += IPV6_HEADER_LEN;
135
136 } else {
137 VLOG_WARN_RL(&err_rl, "ipv4 packet has invalid version (%d)",
138 IP_VER(ip->ip_ihl_ver));
139 return NULL;
140 }
141
142 return l4;
143 }
144
145 /* Pushes the 'size' bytes of 'header' into the headroom of 'packet',
146 * reallocating the packet if necessary. 'header' should contain an Ethernet
147 * header, followed by an IPv4 header (without options), and an L4 header.
148 *
149 * This function sets the IP header's ip_tot_len field (which should be zeroed
150 * as part of 'header') and puts its value into '*ip_tot_size' as well. Also
151 * updates IP header checksum.
152 *
153 * Return pointer to the L4 header added to 'packet'. */
154 void *
155 netdev_tnl_push_ip_header(struct dp_packet *packet,
156 const void *header, int size, int *ip_tot_size)
157 {
158 struct eth_header *eth;
159 struct ip_header *ip;
160 struct ovs_16aligned_ip6_hdr *ip6;
161
162 eth = dp_packet_push_uninit(packet, size);
163 *ip_tot_size = dp_packet_size(packet) - sizeof (struct eth_header);
164
165 memcpy(eth, header, size);
166
167 if (netdev_tnl_is_header_ipv6(header)) {
168 ip6 = netdev_tnl_ipv6_hdr(eth);
169 *ip_tot_size -= IPV6_HEADER_LEN;
170 ip6->ip6_plen = htons(*ip_tot_size);
171 return ip6 + 1;
172 } else {
173 ip = netdev_tnl_ip_hdr(eth);
174 ip->ip_tot_len = htons(*ip_tot_size);
175 ip->ip_csum = recalc_csum16(ip->ip_csum, 0, ip->ip_tot_len);
176 *ip_tot_size -= IP_HEADER_LEN;
177 return ip + 1;
178 }
179 }
180
181 static void *
182 udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
183 unsigned int *hlen)
184 {
185 struct udp_header *udp;
186
187 udp = netdev_tnl_ip_extract_tnl_md(packet, tnl, hlen);
188 if (!udp) {
189 return NULL;
190 }
191
192 if (udp->udp_csum) {
193 uint32_t csum;
194 if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
195 csum = packet_csum_pseudoheader6(dp_packet_l3(packet));
196 } else {
197 csum = packet_csum_pseudoheader(dp_packet_l3(packet));
198 }
199
200 csum = csum_continue(csum, udp, dp_packet_size(packet) -
201 ((const unsigned char *)udp -
202 (const unsigned char *)dp_packet_l2(packet)));
203 if (csum_finish(csum)) {
204 return NULL;
205 }
206 tnl->flags |= FLOW_TNL_F_CSUM;
207 }
208
209 tnl->tp_src = udp->udp_src;
210 tnl->tp_dst = udp->udp_dst;
211
212 return udp + 1;
213 }
214
215
216 void
217 netdev_tnl_push_udp_header(struct dp_packet *packet,
218 const struct ovs_action_push_tnl *data)
219 {
220 struct udp_header *udp;
221 int ip_tot_size;
222
223 udp = netdev_tnl_push_ip_header(packet, data->header, data->header_len, &ip_tot_size);
224
225 /* set udp src port */
226 udp->udp_src = netdev_tnl_get_src_port(packet);
227 udp->udp_len = htons(ip_tot_size);
228
229 if (udp->udp_csum) {
230 uint32_t csum;
231 if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
232 csum = packet_csum_pseudoheader6(netdev_tnl_ipv6_hdr(dp_packet_data(packet)));
233 } else {
234 csum = packet_csum_pseudoheader(netdev_tnl_ip_hdr(dp_packet_data(packet)));
235 }
236
237 csum = csum_continue(csum, udp, ip_tot_size);
238 udp->udp_csum = csum_finish(csum);
239
240 if (!udp->udp_csum) {
241 udp->udp_csum = htons(0xffff);
242 }
243 }
244 }
245
246 static void *
247 udp_build_header(struct netdev_tunnel_config *tnl_cfg,
248 const struct flow *tnl_flow,
249 struct ovs_action_push_tnl *data,
250 unsigned int *hlen)
251 {
252 struct ip_header *ip;
253 struct ovs_16aligned_ip6_hdr *ip6;
254 struct udp_header *udp;
255 bool is_ipv6;
256
257 *hlen = sizeof(struct eth_header);
258
259 is_ipv6 = netdev_tnl_is_header_ipv6(data->header);
260
261 if (is_ipv6) {
262 ip6 = netdev_tnl_ipv6_hdr(data->header);
263 ip6->ip6_nxt = IPPROTO_UDP;
264 udp = (struct udp_header *) (ip6 + 1);
265 *hlen += IPV6_HEADER_LEN;
266 } else {
267 ip = netdev_tnl_ip_hdr(data->header);
268 ip->ip_proto = IPPROTO_UDP;
269 udp = (struct udp_header *) (ip + 1);
270 *hlen += IP_HEADER_LEN;
271 }
272
273 udp->udp_dst = tnl_cfg->dst_port;
274
275 if (is_ipv6 || tnl_flow->tunnel.flags & FLOW_TNL_F_CSUM) {
276 /* Write a value in now to mark that we should compute the checksum
277 * later. 0xffff is handy because it is transparent to the
278 * calculation. */
279 udp->udp_csum = htons(0xffff);
280 }
281
282 return udp + 1;
283 }
284
285 static int
286 gre_header_len(ovs_be16 flags)
287 {
288 int hlen = 4;
289
290 if (flags & htons(GRE_CSUM)) {
291 hlen += 4;
292 }
293 if (flags & htons(GRE_KEY)) {
294 hlen += 4;
295 }
296 if (flags & htons(GRE_SEQ)) {
297 hlen += 4;
298 }
299 return hlen;
300 }
301
302 static int
303 parse_gre_header(struct dp_packet *packet,
304 struct flow_tnl *tnl)
305 {
306 const struct gre_base_hdr *greh;
307 ovs_16aligned_be32 *options;
308 int hlen;
309 unsigned int ulen;
310
311 greh = netdev_tnl_ip_extract_tnl_md(packet, tnl, &ulen);
312 if (!greh) {
313 return -EINVAL;
314 }
315
316 if (greh->flags & ~(htons(GRE_CSUM | GRE_KEY | GRE_SEQ))) {
317 return -EINVAL;
318 }
319
320 if (greh->protocol != htons(ETH_TYPE_TEB)) {
321 return -EINVAL;
322 }
323
324 hlen = ulen + gre_header_len(greh->flags);
325 if (hlen > dp_packet_size(packet)) {
326 return -EINVAL;
327 }
328
329 options = (ovs_16aligned_be32 *)(greh + 1);
330 if (greh->flags & htons(GRE_CSUM)) {
331 ovs_be16 pkt_csum;
332
333 pkt_csum = csum(greh, dp_packet_size(packet) -
334 ((const unsigned char *)greh -
335 (const unsigned char *)dp_packet_l2(packet)));
336 if (pkt_csum) {
337 return -EINVAL;
338 }
339 tnl->flags = FLOW_TNL_F_CSUM;
340 options++;
341 }
342
343 if (greh->flags & htons(GRE_KEY)) {
344 tnl->tun_id = (OVS_FORCE ovs_be64) ((OVS_FORCE uint64_t)(get_16aligned_be32(options)) << 32);
345 tnl->flags |= FLOW_TNL_F_KEY;
346 options++;
347 }
348
349 if (greh->flags & htons(GRE_SEQ)) {
350 options++;
351 }
352
353 return hlen;
354 }
355
356 struct dp_packet *
357 netdev_gre_pop_header(struct dp_packet *packet)
358 {
359 struct pkt_metadata *md = &packet->md;
360 struct flow_tnl *tnl = &md->tunnel;
361 int hlen = sizeof(struct eth_header) + 4;
362
363 hlen += netdev_tnl_is_header_ipv6(dp_packet_data(packet)) ?
364 IPV6_HEADER_LEN : IP_HEADER_LEN;
365
366 pkt_metadata_init_tnl(md);
367 if (hlen > dp_packet_size(packet)) {
368 goto err;
369 }
370
371 hlen = parse_gre_header(packet, tnl);
372 if (hlen < 0) {
373 goto err;
374 }
375
376 dp_packet_reset_packet(packet, hlen);
377
378 return packet;
379 err:
380 dp_packet_delete(packet);
381 return NULL;
382 }
383
384 void
385 netdev_gre_push_header(struct dp_packet *packet,
386 const struct ovs_action_push_tnl *data)
387 {
388 struct gre_base_hdr *greh;
389 int ip_tot_size;
390
391 greh = netdev_tnl_push_ip_header(packet, data->header, data->header_len, &ip_tot_size);
392
393 if (greh->flags & htons(GRE_CSUM)) {
394 ovs_be16 *csum_opt = (ovs_be16 *) (greh + 1);
395 *csum_opt = csum(greh, ip_tot_size);
396 }
397 }
398
399 int
400 netdev_gre_build_header(const struct netdev *netdev,
401 struct ovs_action_push_tnl *data,
402 const struct flow *tnl_flow)
403 {
404 struct netdev_vport *dev = netdev_vport_cast(netdev);
405 struct netdev_tunnel_config *tnl_cfg;
406 struct ip_header *ip;
407 struct ovs_16aligned_ip6_hdr *ip6;
408 struct gre_base_hdr *greh;
409 ovs_16aligned_be32 *options;
410 int hlen;
411 bool is_ipv6;
412
413 is_ipv6 = netdev_tnl_is_header_ipv6(data->header);
414
415 /* XXX: RCUfy tnl_cfg. */
416 ovs_mutex_lock(&dev->mutex);
417 tnl_cfg = &dev->tnl_cfg;
418
419 if (is_ipv6) {
420 ip6 = netdev_tnl_ipv6_hdr(data->header);
421 ip6->ip6_nxt = IPPROTO_GRE;
422 greh = (struct gre_base_hdr *) (ip6 + 1);
423 } else {
424 ip = netdev_tnl_ip_hdr(data->header);
425 ip->ip_proto = IPPROTO_GRE;
426 greh = (struct gre_base_hdr *) (ip + 1);
427 }
428
429 greh->protocol = htons(ETH_TYPE_TEB);
430 greh->flags = 0;
431
432 options = (ovs_16aligned_be32 *) (greh + 1);
433 if (tnl_flow->tunnel.flags & FLOW_TNL_F_CSUM) {
434 greh->flags |= htons(GRE_CSUM);
435 put_16aligned_be32(options, 0);
436 options++;
437 }
438
439 if (tnl_cfg->out_key_present) {
440 greh->flags |= htons(GRE_KEY);
441 put_16aligned_be32(options, (OVS_FORCE ovs_be32)
442 ((OVS_FORCE uint64_t) tnl_flow->tunnel.tun_id >> 32));
443 options++;
444 }
445
446 ovs_mutex_unlock(&dev->mutex);
447
448 hlen = (uint8_t *) options - (uint8_t *) greh;
449
450 data->header_len = sizeof(struct eth_header) + hlen +
451 (is_ipv6 ? IPV6_HEADER_LEN : IP_HEADER_LEN);
452 data->tnl_type = OVS_VPORT_TYPE_GRE;
453 return 0;
454 }
455
456 struct dp_packet *
457 netdev_vxlan_pop_header(struct dp_packet *packet)
458 {
459 struct pkt_metadata *md = &packet->md;
460 struct flow_tnl *tnl = &md->tunnel;
461 struct vxlanhdr *vxh;
462 unsigned int hlen;
463
464 pkt_metadata_init_tnl(md);
465 if (VXLAN_HLEN > dp_packet_l4_size(packet)) {
466 goto err;
467 }
468
469 vxh = udp_extract_tnl_md(packet, tnl, &hlen);
470 if (!vxh) {
471 goto err;
472 }
473
474 if (get_16aligned_be32(&vxh->vx_flags) != htonl(VXLAN_FLAGS) ||
475 (get_16aligned_be32(&vxh->vx_vni) & htonl(0xff))) {
476 VLOG_WARN_RL(&err_rl, "invalid vxlan flags=%#x vni=%#x\n",
477 ntohl(get_16aligned_be32(&vxh->vx_flags)),
478 ntohl(get_16aligned_be32(&vxh->vx_vni)));
479 goto err;
480 }
481 tnl->tun_id = htonll(ntohl(get_16aligned_be32(&vxh->vx_vni)) >> 8);
482 tnl->flags |= FLOW_TNL_F_KEY;
483
484 dp_packet_reset_packet(packet, hlen + VXLAN_HLEN);
485
486 return packet;
487 err:
488 dp_packet_delete(packet);
489 return NULL;
490 }
491
492 int
493 netdev_vxlan_build_header(const struct netdev *netdev,
494 struct ovs_action_push_tnl *data,
495 const struct flow *tnl_flow)
496 {
497 struct netdev_vport *dev = netdev_vport_cast(netdev);
498 struct netdev_tunnel_config *tnl_cfg;
499 struct vxlanhdr *vxh;
500 unsigned int hlen;
501
502 /* XXX: RCUfy tnl_cfg. */
503 ovs_mutex_lock(&dev->mutex);
504 tnl_cfg = &dev->tnl_cfg;
505
506 vxh = udp_build_header(tnl_cfg, tnl_flow, data, &hlen);
507
508 put_16aligned_be32(&vxh->vx_flags, htonl(VXLAN_FLAGS));
509 put_16aligned_be32(&vxh->vx_vni, htonl(ntohll(tnl_flow->tunnel.tun_id) << 8));
510
511 ovs_mutex_unlock(&dev->mutex);
512 data->header_len = hlen + VXLAN_HLEN;
513 data->tnl_type = OVS_VPORT_TYPE_VXLAN;
514 return 0;
515 }
516
517 struct dp_packet *
518 netdev_geneve_pop_header(struct dp_packet *packet)
519 {
520 struct pkt_metadata *md = &packet->md;
521 struct flow_tnl *tnl = &md->tunnel;
522 struct genevehdr *gnh;
523 unsigned int hlen, opts_len, ulen;
524
525 pkt_metadata_init_tnl(md);
526 if (GENEVE_BASE_HLEN > dp_packet_l4_size(packet)) {
527 VLOG_WARN_RL(&err_rl, "geneve packet too small: min header=%u packet size=%"PRIuSIZE"\n",
528 (unsigned int)GENEVE_BASE_HLEN, dp_packet_l4_size(packet));
529 goto err;
530 }
531
532 gnh = udp_extract_tnl_md(packet, tnl, &ulen);
533 if (!gnh) {
534 goto err;
535 }
536
537 opts_len = gnh->opt_len * 4;
538 hlen = ulen + GENEVE_BASE_HLEN + opts_len;
539 if (hlen > dp_packet_size(packet)) {
540 VLOG_WARN_RL(&err_rl, "geneve packet too small: header len=%u packet size=%u\n",
541 hlen, dp_packet_size(packet));
542 goto err;
543 }
544
545 if (gnh->ver != 0) {
546 VLOG_WARN_RL(&err_rl, "unknown geneve version: %"PRIu8"\n", gnh->ver);
547 goto err;
548 }
549
550 if (gnh->proto_type != htons(ETH_TYPE_TEB)) {
551 VLOG_WARN_RL(&err_rl, "unknown geneve encapsulated protocol: %#x\n",
552 ntohs(gnh->proto_type));
553 goto err;
554 }
555
556 tnl->flags |= gnh->oam ? FLOW_TNL_F_OAM : 0;
557 tnl->tun_id = htonll(ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
558 tnl->flags |= FLOW_TNL_F_KEY;
559
560 memcpy(tnl->metadata.opts.gnv, gnh->options, opts_len);
561 tnl->metadata.present.len = opts_len;
562 tnl->flags |= FLOW_TNL_F_UDPIF;
563
564 dp_packet_reset_packet(packet, hlen);
565
566 return packet;
567 err:
568 dp_packet_delete(packet);
569 return NULL;
570 }
571
572 int
573 netdev_geneve_build_header(const struct netdev *netdev,
574 struct ovs_action_push_tnl *data,
575 const struct flow *tnl_flow)
576 {
577 struct netdev_vport *dev = netdev_vport_cast(netdev);
578 struct netdev_tunnel_config *tnl_cfg;
579 struct genevehdr *gnh;
580 int opt_len;
581 bool crit_opt;
582 unsigned int hlen;
583
584 /* XXX: RCUfy tnl_cfg. */
585 ovs_mutex_lock(&dev->mutex);
586 tnl_cfg = &dev->tnl_cfg;
587
588 gnh = udp_build_header(tnl_cfg, tnl_flow, data, &hlen);
589
590 put_16aligned_be32(&gnh->vni, htonl(ntohll(tnl_flow->tunnel.tun_id) << 8));
591
592 ovs_mutex_unlock(&dev->mutex);
593
594 opt_len = tun_metadata_to_geneve_header(&tnl_flow->tunnel,
595 gnh->options, &crit_opt);
596
597 gnh->opt_len = opt_len / 4;
598 gnh->oam = !!(tnl_flow->tunnel.flags & FLOW_TNL_F_OAM);
599 gnh->critical = crit_opt ? 1 : 0;
600 gnh->proto_type = htons(ETH_TYPE_TEB);
601
602 data->header_len = hlen + GENEVE_BASE_HLEN + opt_len;
603 data->tnl_type = OVS_VPORT_TYPE_GENEVE;
604 return 0;
605 }
606
607 \f
608 void
609 netdev_tnl_egress_port_range(struct unixctl_conn *conn, int argc,
610 const char *argv[], void *aux OVS_UNUSED)
611 {
612 int val1, val2;
613
614 if (argc < 3) {
615 struct ds ds = DS_EMPTY_INITIALIZER;
616
617 ds_put_format(&ds, "Tunnel UDP source port range: %"PRIu16"-%"PRIu16"\n",
618 tnl_udp_port_min, tnl_udp_port_max);
619
620 unixctl_command_reply(conn, ds_cstr(&ds));
621 ds_destroy(&ds);
622 return;
623 }
624
625 if (argc != 3) {
626 return;
627 }
628
629 val1 = atoi(argv[1]);
630 if (val1 <= 0 || val1 > UINT16_MAX) {
631 unixctl_command_reply(conn, "Invalid min.");
632 return;
633 }
634 val2 = atoi(argv[2]);
635 if (val2 <= 0 || val2 > UINT16_MAX) {
636 unixctl_command_reply(conn, "Invalid max.");
637 return;
638 }
639
640 if (val1 > val2) {
641 tnl_udp_port_min = val2;
642 tnl_udp_port_max = val1;
643 } else {
644 tnl_udp_port_min = val1;
645 tnl_udp_port_max = val2;
646 }
647 seq_change(tnl_conf_seq);
648
649 unixctl_command_reply(conn, "OK");
650 }