]>
git.proxmox.com Git - mirror_ovs.git/blob - lib/netdev-native-tnl.c
2 * Copyright (c) 2016 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
21 #include <sys/socket.h>
23 #include <netinet/ip6.h>
24 #include <sys/ioctl.h>
30 #include "openvswitch/list.h"
31 #include "byte-order.h"
36 #include "dp-packet.h"
42 #include "netdev-provider.h"
43 #include "netdev-vport.h"
44 #include "netdev-vport-private.h"
45 #include "odp-netlink.h"
46 #include "dp-packet.h"
47 #include "ovs-router.h"
49 #include "poll-loop.h"
51 #include "route-table.h"
53 #include "socket-util.h"
55 #include "netdev-native-tnl.h"
56 #include "openvswitch/vlog.h"
57 #include "unaligned.h"
61 VLOG_DEFINE_THIS_MODULE(native_tnl
);
62 static struct vlog_rate_limit err_rl
= VLOG_RATE_LIMIT_INIT(60, 5);
64 #define VXLAN_HLEN (sizeof(struct udp_header) + \
65 sizeof(struct vxlanhdr))
67 #define GENEVE_BASE_HLEN (sizeof(struct udp_header) + \
68 sizeof(struct genevehdr))
70 uint16_t tnl_udp_port_min
= 32768;
71 uint16_t tnl_udp_port_max
= 61000;
74 netdev_tnl_ip_extract_tnl_md(struct dp_packet
*packet
, struct flow_tnl
*tnl
,
79 struct ovs_16aligned_ip6_hdr
*ip6
;
83 nh
= dp_packet_l3(packet
);
86 l4
= dp_packet_l4(packet
);
92 *hlen
= sizeof(struct eth_header
);
94 l3_size
= dp_packet_size(packet
) -
95 ((char *)nh
- (char *)dp_packet_data(packet
));
97 if (IP_VER(ip
->ip_ihl_ver
) == 4) {
99 ovs_be32 ip_src
, ip_dst
;
101 if (csum(ip
, IP_IHL(ip
->ip_ihl_ver
) * 4)) {
102 VLOG_WARN_RL(&err_rl
, "ip packet has invalid checksum");
106 if (ntohs(ip
->ip_tot_len
) > l3_size
) {
107 VLOG_WARN_RL(&err_rl
, "ip packet is truncated (IP length %d, actual %d)",
108 ntohs(ip
->ip_tot_len
), l3_size
);
111 if (IP_IHL(ip
->ip_ihl_ver
) * 4 > sizeof(struct ip_header
)) {
112 VLOG_WARN_RL(&err_rl
, "ip options not supported on tunnel packets "
113 "(%d bytes)", IP_IHL(ip
->ip_ihl_ver
) * 4);
117 ip_src
= get_16aligned_be32(&ip
->ip_src
);
118 ip_dst
= get_16aligned_be32(&ip
->ip_dst
);
120 tnl
->ip_src
= ip_src
;
121 tnl
->ip_dst
= ip_dst
;
122 tnl
->ip_tos
= ip
->ip_tos
;
123 tnl
->ip_ttl
= ip
->ip_ttl
;
125 *hlen
+= IP_HEADER_LEN
;
127 } else if (IP_VER(ip
->ip_ihl_ver
) == 6) {
129 memcpy(tnl
->ipv6_src
.s6_addr
, ip6
->ip6_src
.be16
, sizeof ip6
->ip6_src
);
130 memcpy(tnl
->ipv6_dst
.s6_addr
, ip6
->ip6_dst
.be16
, sizeof ip6
->ip6_dst
);
132 tnl
->ip_ttl
= ip6
->ip6_hlim
;
134 *hlen
+= IPV6_HEADER_LEN
;
137 VLOG_WARN_RL(&err_rl
, "ipv4 packet has invalid version (%d)",
138 IP_VER(ip
->ip_ihl_ver
));
145 /* Pushes the 'size' bytes of 'header' into the headroom of 'packet',
146 * reallocating the packet if necessary. 'header' should contain an Ethernet
147 * header, followed by an IPv4 header (without options), and an L4 header.
149 * This function sets the IP header's ip_tot_len field (which should be zeroed
150 * as part of 'header') and puts its value into '*ip_tot_size' as well. Also
151 * updates IP header checksum.
153 * Return pointer to the L4 header added to 'packet'. */
155 netdev_tnl_push_ip_header(struct dp_packet
*packet
,
156 const void *header
, int size
, int *ip_tot_size
)
158 struct eth_header
*eth
;
159 struct ip_header
*ip
;
160 struct ovs_16aligned_ip6_hdr
*ip6
;
162 eth
= dp_packet_push_uninit(packet
, size
);
163 *ip_tot_size
= dp_packet_size(packet
) - sizeof (struct eth_header
);
165 memcpy(eth
, header
, size
);
167 if (netdev_tnl_is_header_ipv6(header
)) {
168 ip6
= netdev_tnl_ipv6_hdr(eth
);
169 *ip_tot_size
-= IPV6_HEADER_LEN
;
170 ip6
->ip6_plen
= htons(*ip_tot_size
);
173 ip
= netdev_tnl_ip_hdr(eth
);
174 ip
->ip_tot_len
= htons(*ip_tot_size
);
175 ip
->ip_csum
= recalc_csum16(ip
->ip_csum
, 0, ip
->ip_tot_len
);
176 *ip_tot_size
-= IP_HEADER_LEN
;
182 udp_extract_tnl_md(struct dp_packet
*packet
, struct flow_tnl
*tnl
,
185 struct udp_header
*udp
;
187 udp
= netdev_tnl_ip_extract_tnl_md(packet
, tnl
, hlen
);
194 if (netdev_tnl_is_header_ipv6(dp_packet_data(packet
))) {
195 csum
= packet_csum_pseudoheader6(dp_packet_l3(packet
));
197 csum
= packet_csum_pseudoheader(dp_packet_l3(packet
));
200 csum
= csum_continue(csum
, udp
, dp_packet_size(packet
) -
201 ((const unsigned char *)udp
-
202 (const unsigned char *)dp_packet_l2(packet
)));
203 if (csum_finish(csum
)) {
206 tnl
->flags
|= FLOW_TNL_F_CSUM
;
209 tnl
->tp_src
= udp
->udp_src
;
210 tnl
->tp_dst
= udp
->udp_dst
;
217 netdev_tnl_push_udp_header(struct dp_packet
*packet
,
218 const struct ovs_action_push_tnl
*data
)
220 struct udp_header
*udp
;
223 udp
= netdev_tnl_push_ip_header(packet
, data
->header
, data
->header_len
, &ip_tot_size
);
225 /* set udp src port */
226 udp
->udp_src
= netdev_tnl_get_src_port(packet
);
227 udp
->udp_len
= htons(ip_tot_size
);
231 if (netdev_tnl_is_header_ipv6(dp_packet_data(packet
))) {
232 csum
= packet_csum_pseudoheader6(netdev_tnl_ipv6_hdr(dp_packet_data(packet
)));
234 csum
= packet_csum_pseudoheader(netdev_tnl_ip_hdr(dp_packet_data(packet
)));
237 csum
= csum_continue(csum
, udp
, ip_tot_size
);
238 udp
->udp_csum
= csum_finish(csum
);
240 if (!udp
->udp_csum
) {
241 udp
->udp_csum
= htons(0xffff);
247 udp_build_header(struct netdev_tunnel_config
*tnl_cfg
,
248 const struct flow
*tnl_flow
,
249 struct ovs_action_push_tnl
*data
,
252 struct ip_header
*ip
;
253 struct ovs_16aligned_ip6_hdr
*ip6
;
254 struct udp_header
*udp
;
257 *hlen
= sizeof(struct eth_header
);
259 is_ipv6
= netdev_tnl_is_header_ipv6(data
->header
);
262 ip6
= netdev_tnl_ipv6_hdr(data
->header
);
263 ip6
->ip6_nxt
= IPPROTO_UDP
;
264 udp
= (struct udp_header
*) (ip6
+ 1);
265 *hlen
+= IPV6_HEADER_LEN
;
267 ip
= netdev_tnl_ip_hdr(data
->header
);
268 ip
->ip_proto
= IPPROTO_UDP
;
269 udp
= (struct udp_header
*) (ip
+ 1);
270 *hlen
+= IP_HEADER_LEN
;
273 udp
->udp_dst
= tnl_cfg
->dst_port
;
275 if (is_ipv6
|| tnl_flow
->tunnel
.flags
& FLOW_TNL_F_CSUM
) {
276 /* Write a value in now to mark that we should compute the checksum
277 * later. 0xffff is handy because it is transparent to the
279 udp
->udp_csum
= htons(0xffff);
286 gre_header_len(ovs_be16 flags
)
290 if (flags
& htons(GRE_CSUM
)) {
293 if (flags
& htons(GRE_KEY
)) {
296 if (flags
& htons(GRE_SEQ
)) {
303 parse_gre_header(struct dp_packet
*packet
,
304 struct flow_tnl
*tnl
)
306 const struct gre_base_hdr
*greh
;
307 ovs_16aligned_be32
*options
;
311 greh
= netdev_tnl_ip_extract_tnl_md(packet
, tnl
, &ulen
);
316 if (greh
->flags
& ~(htons(GRE_CSUM
| GRE_KEY
| GRE_SEQ
))) {
320 if (greh
->protocol
!= htons(ETH_TYPE_TEB
)) {
324 hlen
= ulen
+ gre_header_len(greh
->flags
);
325 if (hlen
> dp_packet_size(packet
)) {
329 options
= (ovs_16aligned_be32
*)(greh
+ 1);
330 if (greh
->flags
& htons(GRE_CSUM
)) {
333 pkt_csum
= csum(greh
, dp_packet_size(packet
) -
334 ((const unsigned char *)greh
-
335 (const unsigned char *)dp_packet_l2(packet
)));
339 tnl
->flags
= FLOW_TNL_F_CSUM
;
343 if (greh
->flags
& htons(GRE_KEY
)) {
344 tnl
->tun_id
= (OVS_FORCE ovs_be64
) ((OVS_FORCE
uint64_t)(get_16aligned_be32(options
)) << 32);
345 tnl
->flags
|= FLOW_TNL_F_KEY
;
349 if (greh
->flags
& htons(GRE_SEQ
)) {
357 netdev_gre_pop_header(struct dp_packet
*packet
)
359 struct pkt_metadata
*md
= &packet
->md
;
360 struct flow_tnl
*tnl
= &md
->tunnel
;
361 int hlen
= sizeof(struct eth_header
) + 4;
363 hlen
+= netdev_tnl_is_header_ipv6(dp_packet_data(packet
)) ?
364 IPV6_HEADER_LEN
: IP_HEADER_LEN
;
366 pkt_metadata_init_tnl(md
);
367 if (hlen
> dp_packet_size(packet
)) {
371 hlen
= parse_gre_header(packet
, tnl
);
376 dp_packet_reset_packet(packet
, hlen
);
380 dp_packet_delete(packet
);
385 netdev_gre_push_header(struct dp_packet
*packet
,
386 const struct ovs_action_push_tnl
*data
)
388 struct gre_base_hdr
*greh
;
391 greh
= netdev_tnl_push_ip_header(packet
, data
->header
, data
->header_len
, &ip_tot_size
);
393 if (greh
->flags
& htons(GRE_CSUM
)) {
394 ovs_be16
*csum_opt
= (ovs_be16
*) (greh
+ 1);
395 *csum_opt
= csum(greh
, ip_tot_size
);
400 netdev_gre_build_header(const struct netdev
*netdev
,
401 struct ovs_action_push_tnl
*data
,
402 const struct flow
*tnl_flow
)
404 struct netdev_vport
*dev
= netdev_vport_cast(netdev
);
405 struct netdev_tunnel_config
*tnl_cfg
;
406 struct ip_header
*ip
;
407 struct ovs_16aligned_ip6_hdr
*ip6
;
408 struct gre_base_hdr
*greh
;
409 ovs_16aligned_be32
*options
;
413 is_ipv6
= netdev_tnl_is_header_ipv6(data
->header
);
415 /* XXX: RCUfy tnl_cfg. */
416 ovs_mutex_lock(&dev
->mutex
);
417 tnl_cfg
= &dev
->tnl_cfg
;
420 ip6
= netdev_tnl_ipv6_hdr(data
->header
);
421 ip6
->ip6_nxt
= IPPROTO_GRE
;
422 greh
= (struct gre_base_hdr
*) (ip6
+ 1);
424 ip
= netdev_tnl_ip_hdr(data
->header
);
425 ip
->ip_proto
= IPPROTO_GRE
;
426 greh
= (struct gre_base_hdr
*) (ip
+ 1);
429 greh
->protocol
= htons(ETH_TYPE_TEB
);
432 options
= (ovs_16aligned_be32
*) (greh
+ 1);
433 if (tnl_flow
->tunnel
.flags
& FLOW_TNL_F_CSUM
) {
434 greh
->flags
|= htons(GRE_CSUM
);
435 put_16aligned_be32(options
, 0);
439 if (tnl_cfg
->out_key_present
) {
440 greh
->flags
|= htons(GRE_KEY
);
441 put_16aligned_be32(options
, (OVS_FORCE ovs_be32
)
442 ((OVS_FORCE
uint64_t) tnl_flow
->tunnel
.tun_id
>> 32));
446 ovs_mutex_unlock(&dev
->mutex
);
448 hlen
= (uint8_t *) options
- (uint8_t *) greh
;
450 data
->header_len
= sizeof(struct eth_header
) + hlen
+
451 (is_ipv6
? IPV6_HEADER_LEN
: IP_HEADER_LEN
);
452 data
->tnl_type
= OVS_VPORT_TYPE_GRE
;
457 netdev_vxlan_pop_header(struct dp_packet
*packet
)
459 struct pkt_metadata
*md
= &packet
->md
;
460 struct flow_tnl
*tnl
= &md
->tunnel
;
461 struct vxlanhdr
*vxh
;
464 pkt_metadata_init_tnl(md
);
465 if (VXLAN_HLEN
> dp_packet_l4_size(packet
)) {
469 vxh
= udp_extract_tnl_md(packet
, tnl
, &hlen
);
474 if (get_16aligned_be32(&vxh
->vx_flags
) != htonl(VXLAN_FLAGS
) ||
475 (get_16aligned_be32(&vxh
->vx_vni
) & htonl(0xff))) {
476 VLOG_WARN_RL(&err_rl
, "invalid vxlan flags=%#x vni=%#x\n",
477 ntohl(get_16aligned_be32(&vxh
->vx_flags
)),
478 ntohl(get_16aligned_be32(&vxh
->vx_vni
)));
481 tnl
->tun_id
= htonll(ntohl(get_16aligned_be32(&vxh
->vx_vni
)) >> 8);
482 tnl
->flags
|= FLOW_TNL_F_KEY
;
484 dp_packet_reset_packet(packet
, hlen
+ VXLAN_HLEN
);
488 dp_packet_delete(packet
);
493 netdev_vxlan_build_header(const struct netdev
*netdev
,
494 struct ovs_action_push_tnl
*data
,
495 const struct flow
*tnl_flow
)
497 struct netdev_vport
*dev
= netdev_vport_cast(netdev
);
498 struct netdev_tunnel_config
*tnl_cfg
;
499 struct vxlanhdr
*vxh
;
502 /* XXX: RCUfy tnl_cfg. */
503 ovs_mutex_lock(&dev
->mutex
);
504 tnl_cfg
= &dev
->tnl_cfg
;
506 vxh
= udp_build_header(tnl_cfg
, tnl_flow
, data
, &hlen
);
508 put_16aligned_be32(&vxh
->vx_flags
, htonl(VXLAN_FLAGS
));
509 put_16aligned_be32(&vxh
->vx_vni
, htonl(ntohll(tnl_flow
->tunnel
.tun_id
) << 8));
511 ovs_mutex_unlock(&dev
->mutex
);
512 data
->header_len
= hlen
+ VXLAN_HLEN
;
513 data
->tnl_type
= OVS_VPORT_TYPE_VXLAN
;
518 netdev_geneve_pop_header(struct dp_packet
*packet
)
520 struct pkt_metadata
*md
= &packet
->md
;
521 struct flow_tnl
*tnl
= &md
->tunnel
;
522 struct genevehdr
*gnh
;
523 unsigned int hlen
, opts_len
, ulen
;
525 pkt_metadata_init_tnl(md
);
526 if (GENEVE_BASE_HLEN
> dp_packet_l4_size(packet
)) {
527 VLOG_WARN_RL(&err_rl
, "geneve packet too small: min header=%u packet size=%"PRIuSIZE
"\n",
528 (unsigned int)GENEVE_BASE_HLEN
, dp_packet_l4_size(packet
));
532 gnh
= udp_extract_tnl_md(packet
, tnl
, &ulen
);
537 opts_len
= gnh
->opt_len
* 4;
538 hlen
= ulen
+ GENEVE_BASE_HLEN
+ opts_len
;
539 if (hlen
> dp_packet_size(packet
)) {
540 VLOG_WARN_RL(&err_rl
, "geneve packet too small: header len=%u packet size=%u\n",
541 hlen
, dp_packet_size(packet
));
546 VLOG_WARN_RL(&err_rl
, "unknown geneve version: %"PRIu8
"\n", gnh
->ver
);
550 if (gnh
->proto_type
!= htons(ETH_TYPE_TEB
)) {
551 VLOG_WARN_RL(&err_rl
, "unknown geneve encapsulated protocol: %#x\n",
552 ntohs(gnh
->proto_type
));
556 tnl
->flags
|= gnh
->oam
? FLOW_TNL_F_OAM
: 0;
557 tnl
->tun_id
= htonll(ntohl(get_16aligned_be32(&gnh
->vni
)) >> 8);
558 tnl
->flags
|= FLOW_TNL_F_KEY
;
560 memcpy(tnl
->metadata
.opts
.gnv
, gnh
->options
, opts_len
);
561 tnl
->metadata
.present
.len
= opts_len
;
562 tnl
->flags
|= FLOW_TNL_F_UDPIF
;
564 dp_packet_reset_packet(packet
, hlen
);
568 dp_packet_delete(packet
);
573 netdev_geneve_build_header(const struct netdev
*netdev
,
574 struct ovs_action_push_tnl
*data
,
575 const struct flow
*tnl_flow
)
577 struct netdev_vport
*dev
= netdev_vport_cast(netdev
);
578 struct netdev_tunnel_config
*tnl_cfg
;
579 struct genevehdr
*gnh
;
584 /* XXX: RCUfy tnl_cfg. */
585 ovs_mutex_lock(&dev
->mutex
);
586 tnl_cfg
= &dev
->tnl_cfg
;
588 gnh
= udp_build_header(tnl_cfg
, tnl_flow
, data
, &hlen
);
590 put_16aligned_be32(&gnh
->vni
, htonl(ntohll(tnl_flow
->tunnel
.tun_id
) << 8));
592 ovs_mutex_unlock(&dev
->mutex
);
594 opt_len
= tun_metadata_to_geneve_header(&tnl_flow
->tunnel
,
595 gnh
->options
, &crit_opt
);
597 gnh
->opt_len
= opt_len
/ 4;
598 gnh
->oam
= !!(tnl_flow
->tunnel
.flags
& FLOW_TNL_F_OAM
);
599 gnh
->critical
= crit_opt
? 1 : 0;
600 gnh
->proto_type
= htons(ETH_TYPE_TEB
);
602 data
->header_len
= hlen
+ GENEVE_BASE_HLEN
+ opt_len
;
603 data
->tnl_type
= OVS_VPORT_TYPE_GENEVE
;
609 netdev_tnl_egress_port_range(struct unixctl_conn
*conn
, int argc
,
610 const char *argv
[], void *aux OVS_UNUSED
)
615 struct ds ds
= DS_EMPTY_INITIALIZER
;
617 ds_put_format(&ds
, "Tunnel UDP source port range: %"PRIu16
"-%"PRIu16
"\n",
618 tnl_udp_port_min
, tnl_udp_port_max
);
620 unixctl_command_reply(conn
, ds_cstr(&ds
));
629 val1
= atoi(argv
[1]);
630 if (val1
<= 0 || val1
> UINT16_MAX
) {
631 unixctl_command_reply(conn
, "Invalid min.");
634 val2
= atoi(argv
[2]);
635 if (val2
<= 0 || val2
> UINT16_MAX
) {
636 unixctl_command_reply(conn
, "Invalid max.");
641 tnl_udp_port_min
= val2
;
642 tnl_udp_port_max
= val1
;
644 tnl_udp_port_min
= val1
;
645 tnl_udp_port_max
= val2
;
647 seq_change(tnl_conf_seq
);
649 unixctl_command_reply(conn
, "OK");