2 * Copyright (c) 2016 Nicira, Inc.
3 * Copyright (c) 2016 Red Hat, Inc.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
20 #include "netdev-native-tnl.h"
24 #include <sys/socket.h>
26 #include <netinet/in.h>
27 #include <netinet/ip.h>
28 #include <netinet/ip6.h>
29 #include <sys/ioctl.h>
35 #include "byte-order.h"
37 #include "dp-packet.h"
39 #include "netdev-vport.h"
40 #include "netdev-vport-private.h"
41 #include "odp-netlink.h"
44 #include "unaligned.h"
46 #include "openvswitch/vlog.h"
48 VLOG_DEFINE_THIS_MODULE(native_tnl
);
49 static struct vlog_rate_limit err_rl
= VLOG_RATE_LIMIT_INIT(60, 5);
51 #define VXLAN_HLEN (sizeof(struct udp_header) + \
52 sizeof(struct vxlanhdr))
54 #define GENEVE_BASE_HLEN (sizeof(struct udp_header) + \
55 sizeof(struct genevehdr))
57 uint16_t tnl_udp_port_min
= 32768;
58 uint16_t tnl_udp_port_max
= 61000;
61 netdev_tnl_ip_extract_tnl_md(struct dp_packet
*packet
, struct flow_tnl
*tnl
,
66 struct ovs_16aligned_ip6_hdr
*ip6
;
70 nh
= dp_packet_l3(packet
);
73 l4
= dp_packet_l4(packet
);
79 *hlen
= sizeof(struct eth_header
);
81 l3_size
= dp_packet_size(packet
) -
82 ((char *)nh
- (char *)dp_packet_data(packet
));
84 if (IP_VER(ip
->ip_ihl_ver
) == 4) {
86 ovs_be32 ip_src
, ip_dst
;
88 if (OVS_UNLIKELY(!dp_packet_ip_checksum_valid(packet
))) {
89 if (csum(ip
, IP_IHL(ip
->ip_ihl_ver
) * 4)) {
90 VLOG_WARN_RL(&err_rl
, "ip packet has invalid checksum");
95 if (ntohs(ip
->ip_tot_len
) > l3_size
) {
96 VLOG_WARN_RL(&err_rl
, "ip packet is truncated (IP length %d, actual %d)",
97 ntohs(ip
->ip_tot_len
), l3_size
);
100 if (IP_IHL(ip
->ip_ihl_ver
) * 4 > sizeof(struct ip_header
)) {
101 VLOG_WARN_RL(&err_rl
, "ip options not supported on tunnel packets "
102 "(%d bytes)", IP_IHL(ip
->ip_ihl_ver
) * 4);
106 ip_src
= get_16aligned_be32(&ip
->ip_src
);
107 ip_dst
= get_16aligned_be32(&ip
->ip_dst
);
109 tnl
->ip_src
= ip_src
;
110 tnl
->ip_dst
= ip_dst
;
111 tnl
->ip_tos
= ip
->ip_tos
;
112 tnl
->ip_ttl
= ip
->ip_ttl
;
114 *hlen
+= IP_HEADER_LEN
;
116 } else if (IP_VER(ip
->ip_ihl_ver
) == 6) {
117 ovs_be32 tc_flow
= get_16aligned_be32(&ip6
->ip6_flow
);
119 memcpy(tnl
->ipv6_src
.s6_addr
, ip6
->ip6_src
.be16
, sizeof ip6
->ip6_src
);
120 memcpy(tnl
->ipv6_dst
.s6_addr
, ip6
->ip6_dst
.be16
, sizeof ip6
->ip6_dst
);
122 tnl
->ip_tos
= ntohl(tc_flow
) >> 20;
123 tnl
->ip_ttl
= ip6
->ip6_hlim
;
125 *hlen
+= IPV6_HEADER_LEN
;
128 VLOG_WARN_RL(&err_rl
, "ipv4 packet has invalid version (%d)",
129 IP_VER(ip
->ip_ihl_ver
));
136 /* Pushes the 'size' bytes of 'header' into the headroom of 'packet',
137 * reallocating the packet if necessary. 'header' should contain an Ethernet
138 * header, followed by an IPv4 header (without options), and an L4 header.
140 * This function sets the IP header's ip_tot_len field (which should be zeroed
141 * as part of 'header') and puts its value into '*ip_tot_size' as well. Also
142 * updates IP header checksum.
144 * Return pointer to the L4 header added to 'packet'. */
146 netdev_tnl_push_ip_header(struct dp_packet
*packet
,
147 const void *header
, int size
, int *ip_tot_size
)
149 struct eth_header
*eth
;
150 struct ip_header
*ip
;
151 struct ovs_16aligned_ip6_hdr
*ip6
;
153 eth
= dp_packet_push_uninit(packet
, size
);
154 *ip_tot_size
= dp_packet_size(packet
) - sizeof (struct eth_header
);
156 memcpy(eth
, header
, size
);
158 if (netdev_tnl_is_header_ipv6(header
)) {
159 ip6
= netdev_tnl_ipv6_hdr(eth
);
160 *ip_tot_size
-= IPV6_HEADER_LEN
;
161 ip6
->ip6_plen
= htons(*ip_tot_size
);
164 ip
= netdev_tnl_ip_hdr(eth
);
165 ip
->ip_tot_len
= htons(*ip_tot_size
);
166 ip
->ip_csum
= recalc_csum16(ip
->ip_csum
, 0, ip
->ip_tot_len
);
167 *ip_tot_size
-= IP_HEADER_LEN
;
173 udp_extract_tnl_md(struct dp_packet
*packet
, struct flow_tnl
*tnl
,
176 struct udp_header
*udp
;
178 udp
= netdev_tnl_ip_extract_tnl_md(packet
, tnl
, hlen
);
184 if (OVS_UNLIKELY(!dp_packet_l4_checksum_valid(packet
))) {
186 if (netdev_tnl_is_header_ipv6(dp_packet_data(packet
))) {
187 csum
= packet_csum_pseudoheader6(dp_packet_l3(packet
));
189 csum
= packet_csum_pseudoheader(dp_packet_l3(packet
));
192 csum
= csum_continue(csum
, udp
, dp_packet_size(packet
) -
193 ((const unsigned char *)udp
-
194 (const unsigned char *)dp_packet_eth(packet
)
196 if (csum_finish(csum
)) {
200 tnl
->flags
|= FLOW_TNL_F_CSUM
;
203 tnl
->tp_src
= udp
->udp_src
;
204 tnl
->tp_dst
= udp
->udp_dst
;
211 netdev_tnl_push_udp_header(struct dp_packet
*packet
,
212 const struct ovs_action_push_tnl
*data
)
214 struct udp_header
*udp
;
217 udp
= netdev_tnl_push_ip_header(packet
, data
->header
, data
->header_len
, &ip_tot_size
);
219 /* set udp src port */
220 udp
->udp_src
= netdev_tnl_get_src_port(packet
);
221 udp
->udp_len
= htons(ip_tot_size
);
225 if (netdev_tnl_is_header_ipv6(dp_packet_data(packet
))) {
226 csum
= packet_csum_pseudoheader6(netdev_tnl_ipv6_hdr(dp_packet_data(packet
)));
228 csum
= packet_csum_pseudoheader(netdev_tnl_ip_hdr(dp_packet_data(packet
)));
231 csum
= csum_continue(csum
, udp
, ip_tot_size
);
232 udp
->udp_csum
= csum_finish(csum
);
234 if (!udp
->udp_csum
) {
235 udp
->udp_csum
= htons(0xffff);
241 eth_build_header(struct ovs_action_push_tnl
*data
,
242 const struct netdev_tnl_build_header_params
*params
)
244 uint16_t eth_proto
= params
->is_ipv6
? ETH_TYPE_IPV6
: ETH_TYPE_IP
;
245 struct eth_header
*eth
;
247 memset(data
->header
, 0, sizeof data
->header
);
249 eth
= (struct eth_header
*)data
->header
;
250 eth
->eth_dst
= params
->dmac
;
251 eth
->eth_src
= params
->smac
;
252 eth
->eth_type
= htons(eth_proto
);
253 data
->header_len
= sizeof(struct eth_header
);
258 netdev_tnl_ip_build_header(struct ovs_action_push_tnl
*data
,
259 const struct netdev_tnl_build_header_params
*params
,
264 l3
= eth_build_header(data
, params
);
265 if (!params
->is_ipv6
) {
266 ovs_be32 ip_src
= in6_addr_get_mapped_ipv4(params
->s_ip
);
267 struct ip_header
*ip
;
269 ip
= (struct ip_header
*) l3
;
271 ip
->ip_ihl_ver
= IP_IHL_VER(5, 4);
272 ip
->ip_tos
= params
->flow
->tunnel
.ip_tos
;
273 ip
->ip_ttl
= params
->flow
->tunnel
.ip_ttl
;
274 ip
->ip_proto
= next_proto
;
275 put_16aligned_be32(&ip
->ip_src
, ip_src
);
276 put_16aligned_be32(&ip
->ip_dst
, params
->flow
->tunnel
.ip_dst
);
278 ip
->ip_frag_off
= (params
->flow
->tunnel
.flags
& FLOW_TNL_F_DONT_FRAGMENT
) ?
281 /* Checksum has already been zeroed by eth_build_header. */
282 ip
->ip_csum
= csum(ip
, sizeof *ip
);
284 data
->header_len
+= IP_HEADER_LEN
;
287 struct ovs_16aligned_ip6_hdr
*ip6
;
289 ip6
= (struct ovs_16aligned_ip6_hdr
*) l3
;
291 put_16aligned_be32(&ip6
->ip6_flow
, htonl(6 << 28) |
292 htonl(params
->flow
->tunnel
.ip_tos
<< 20));
293 ip6
->ip6_hlim
= params
->flow
->tunnel
.ip_ttl
;
294 ip6
->ip6_nxt
= next_proto
;
295 memcpy(&ip6
->ip6_src
, params
->s_ip
, sizeof(ovs_be32
[4]));
296 memcpy(&ip6
->ip6_dst
, ¶ms
->flow
->tunnel
.ipv6_dst
, sizeof(ovs_be32
[4]));
298 data
->header_len
+= IPV6_HEADER_LEN
;
304 udp_build_header(struct netdev_tunnel_config
*tnl_cfg
,
305 struct ovs_action_push_tnl
*data
,
306 const struct netdev_tnl_build_header_params
*params
)
308 struct udp_header
*udp
;
310 udp
= netdev_tnl_ip_build_header(data
, params
, IPPROTO_UDP
);
311 udp
->udp_dst
= tnl_cfg
->dst_port
;
313 if (params
->is_ipv6
|| params
->flow
->tunnel
.flags
& FLOW_TNL_F_CSUM
) {
314 /* Write a value in now to mark that we should compute the checksum
315 * later. 0xffff is handy because it is transparent to the
317 udp
->udp_csum
= htons(0xffff);
319 data
->header_len
+= sizeof *udp
;
324 gre_header_len(ovs_be16 flags
)
328 if (flags
& htons(GRE_CSUM
)) {
331 if (flags
& htons(GRE_KEY
)) {
334 if (flags
& htons(GRE_SEQ
)) {
341 parse_gre_header(struct dp_packet
*packet
,
342 struct flow_tnl
*tnl
)
344 const struct gre_base_hdr
*greh
;
345 ovs_16aligned_be32
*options
;
349 greh
= netdev_tnl_ip_extract_tnl_md(packet
, tnl
, &ulen
);
354 if (greh
->flags
& ~(htons(GRE_CSUM
| GRE_KEY
| GRE_SEQ
))) {
358 if (greh
->protocol
!= htons(ETH_TYPE_TEB
)) {
362 hlen
= ulen
+ gre_header_len(greh
->flags
);
363 if (hlen
> dp_packet_size(packet
)) {
367 options
= (ovs_16aligned_be32
*)(greh
+ 1);
368 if (greh
->flags
& htons(GRE_CSUM
)) {
371 pkt_csum
= csum(greh
, dp_packet_size(packet
) -
372 ((const unsigned char *)greh
-
373 (const unsigned char *)dp_packet_eth(packet
)));
377 tnl
->flags
= FLOW_TNL_F_CSUM
;
381 if (greh
->flags
& htons(GRE_KEY
)) {
382 tnl
->tun_id
= be32_to_be64(get_16aligned_be32(options
));
383 tnl
->flags
|= FLOW_TNL_F_KEY
;
387 if (greh
->flags
& htons(GRE_SEQ
)) {
395 netdev_gre_pop_header(struct dp_packet
*packet
)
397 struct pkt_metadata
*md
= &packet
->md
;
398 struct flow_tnl
*tnl
= &md
->tunnel
;
399 int hlen
= sizeof(struct eth_header
) + 4;
401 hlen
+= netdev_tnl_is_header_ipv6(dp_packet_data(packet
)) ?
402 IPV6_HEADER_LEN
: IP_HEADER_LEN
;
404 pkt_metadata_init_tnl(md
);
405 if (hlen
> dp_packet_size(packet
)) {
409 hlen
= parse_gre_header(packet
, tnl
);
414 dp_packet_reset_packet(packet
, hlen
);
418 dp_packet_delete(packet
);
423 netdev_gre_push_header(struct dp_packet
*packet
,
424 const struct ovs_action_push_tnl
*data
)
426 struct gre_base_hdr
*greh
;
429 greh
= netdev_tnl_push_ip_header(packet
, data
->header
, data
->header_len
, &ip_tot_size
);
431 if (greh
->flags
& htons(GRE_CSUM
)) {
432 ovs_be16
*csum_opt
= (ovs_be16
*) (greh
+ 1);
433 *csum_opt
= csum(greh
, ip_tot_size
);
438 netdev_gre_build_header(const struct netdev
*netdev
,
439 struct ovs_action_push_tnl
*data
,
440 const struct netdev_tnl_build_header_params
*params
)
442 struct netdev_vport
*dev
= netdev_vport_cast(netdev
);
443 struct netdev_tunnel_config
*tnl_cfg
;
444 struct gre_base_hdr
*greh
;
445 ovs_16aligned_be32
*options
;
448 /* XXX: RCUfy tnl_cfg. */
449 ovs_mutex_lock(&dev
->mutex
);
450 tnl_cfg
= &dev
->tnl_cfg
;
452 greh
= netdev_tnl_ip_build_header(data
, params
, IPPROTO_GRE
);
454 greh
->protocol
= htons(ETH_TYPE_TEB
);
457 options
= (ovs_16aligned_be32
*) (greh
+ 1);
458 if (params
->flow
->tunnel
.flags
& FLOW_TNL_F_CSUM
) {
459 greh
->flags
|= htons(GRE_CSUM
);
460 put_16aligned_be32(options
, 0);
464 if (tnl_cfg
->out_key_present
) {
465 greh
->flags
|= htons(GRE_KEY
);
466 put_16aligned_be32(options
, be64_to_be32(params
->flow
->tunnel
.tun_id
));
470 ovs_mutex_unlock(&dev
->mutex
);
472 hlen
= (uint8_t *) options
- (uint8_t *) greh
;
474 data
->header_len
+= hlen
;
475 data
->tnl_type
= OVS_VPORT_TYPE_GRE
;
480 netdev_vxlan_pop_header(struct dp_packet
*packet
)
482 struct pkt_metadata
*md
= &packet
->md
;
483 struct flow_tnl
*tnl
= &md
->tunnel
;
484 struct vxlanhdr
*vxh
;
487 pkt_metadata_init_tnl(md
);
488 if (VXLAN_HLEN
> dp_packet_l4_size(packet
)) {
492 vxh
= udp_extract_tnl_md(packet
, tnl
, &hlen
);
497 if (get_16aligned_be32(&vxh
->vx_flags
) != htonl(VXLAN_FLAGS
) ||
498 (get_16aligned_be32(&vxh
->vx_vni
) & htonl(0xff))) {
499 VLOG_WARN_RL(&err_rl
, "invalid vxlan flags=%#x vni=%#x\n",
500 ntohl(get_16aligned_be32(&vxh
->vx_flags
)),
501 ntohl(get_16aligned_be32(&vxh
->vx_vni
)));
504 tnl
->tun_id
= htonll(ntohl(get_16aligned_be32(&vxh
->vx_vni
)) >> 8);
505 tnl
->flags
|= FLOW_TNL_F_KEY
;
507 dp_packet_reset_packet(packet
, hlen
+ VXLAN_HLEN
);
511 dp_packet_delete(packet
);
516 netdev_vxlan_build_header(const struct netdev
*netdev
,
517 struct ovs_action_push_tnl
*data
,
518 const struct netdev_tnl_build_header_params
*params
)
520 struct netdev_vport
*dev
= netdev_vport_cast(netdev
);
521 struct netdev_tunnel_config
*tnl_cfg
;
522 struct vxlanhdr
*vxh
;
524 /* XXX: RCUfy tnl_cfg. */
525 ovs_mutex_lock(&dev
->mutex
);
526 tnl_cfg
= &dev
->tnl_cfg
;
528 vxh
= udp_build_header(tnl_cfg
, data
, params
);
530 put_16aligned_be32(&vxh
->vx_flags
, htonl(VXLAN_FLAGS
));
531 put_16aligned_be32(&vxh
->vx_vni
, htonl(ntohll(params
->flow
->tunnel
.tun_id
) << 8));
533 ovs_mutex_unlock(&dev
->mutex
);
534 data
->header_len
+= sizeof *vxh
;
535 data
->tnl_type
= OVS_VPORT_TYPE_VXLAN
;
540 netdev_geneve_pop_header(struct dp_packet
*packet
)
542 struct pkt_metadata
*md
= &packet
->md
;
543 struct flow_tnl
*tnl
= &md
->tunnel
;
544 struct genevehdr
*gnh
;
545 unsigned int hlen
, opts_len
, ulen
;
547 pkt_metadata_init_tnl(md
);
548 if (GENEVE_BASE_HLEN
> dp_packet_l4_size(packet
)) {
549 VLOG_WARN_RL(&err_rl
, "geneve packet too small: min header=%u packet size=%"PRIuSIZE
"\n",
550 (unsigned int)GENEVE_BASE_HLEN
, dp_packet_l4_size(packet
));
554 gnh
= udp_extract_tnl_md(packet
, tnl
, &ulen
);
559 opts_len
= gnh
->opt_len
* 4;
560 hlen
= ulen
+ GENEVE_BASE_HLEN
+ opts_len
;
561 if (hlen
> dp_packet_size(packet
)) {
562 VLOG_WARN_RL(&err_rl
, "geneve packet too small: header len=%u packet size=%u\n",
563 hlen
, dp_packet_size(packet
));
568 VLOG_WARN_RL(&err_rl
, "unknown geneve version: %"PRIu8
"\n", gnh
->ver
);
572 if (gnh
->proto_type
!= htons(ETH_TYPE_TEB
)) {
573 VLOG_WARN_RL(&err_rl
, "unknown geneve encapsulated protocol: %#x\n",
574 ntohs(gnh
->proto_type
));
578 tnl
->flags
|= gnh
->oam
? FLOW_TNL_F_OAM
: 0;
579 tnl
->tun_id
= htonll(ntohl(get_16aligned_be32(&gnh
->vni
)) >> 8);
580 tnl
->flags
|= FLOW_TNL_F_KEY
;
582 memcpy(tnl
->metadata
.opts
.gnv
, gnh
->options
, opts_len
);
583 tnl
->metadata
.present
.len
= opts_len
;
584 tnl
->flags
|= FLOW_TNL_F_UDPIF
;
586 dp_packet_reset_packet(packet
, hlen
);
590 dp_packet_delete(packet
);
595 netdev_geneve_build_header(const struct netdev
*netdev
,
596 struct ovs_action_push_tnl
*data
,
597 const struct netdev_tnl_build_header_params
*params
)
599 struct netdev_vport
*dev
= netdev_vport_cast(netdev
);
600 struct netdev_tunnel_config
*tnl_cfg
;
601 struct genevehdr
*gnh
;
605 /* XXX: RCUfy tnl_cfg. */
606 ovs_mutex_lock(&dev
->mutex
);
607 tnl_cfg
= &dev
->tnl_cfg
;
609 gnh
= udp_build_header(tnl_cfg
, data
, params
);
611 put_16aligned_be32(&gnh
->vni
, htonl(ntohll(params
->flow
->tunnel
.tun_id
) << 8));
613 ovs_mutex_unlock(&dev
->mutex
);
615 opt_len
= tun_metadata_to_geneve_header(¶ms
->flow
->tunnel
,
616 gnh
->options
, &crit_opt
);
618 gnh
->opt_len
= opt_len
/ 4;
619 gnh
->oam
= !!(params
->flow
->tunnel
.flags
& FLOW_TNL_F_OAM
);
620 gnh
->critical
= crit_opt
? 1 : 0;
621 gnh
->proto_type
= htons(ETH_TYPE_TEB
);
623 data
->header_len
+= sizeof *gnh
+ opt_len
;
624 data
->tnl_type
= OVS_VPORT_TYPE_GENEVE
;
630 netdev_tnl_egress_port_range(struct unixctl_conn
*conn
, int argc
,
631 const char *argv
[], void *aux OVS_UNUSED
)
636 struct ds ds
= DS_EMPTY_INITIALIZER
;
638 ds_put_format(&ds
, "Tunnel UDP source port range: %"PRIu16
"-%"PRIu16
"\n",
639 tnl_udp_port_min
, tnl_udp_port_max
);
641 unixctl_command_reply(conn
, ds_cstr(&ds
));
650 val1
= atoi(argv
[1]);
651 if (val1
<= 0 || val1
> UINT16_MAX
) {
652 unixctl_command_reply(conn
, "Invalid min.");
655 val2
= atoi(argv
[2]);
656 if (val2
<= 0 || val2
> UINT16_MAX
) {
657 unixctl_command_reply(conn
, "Invalid max.");
662 tnl_udp_port_min
= val2
;
663 tnl_udp_port_max
= val1
;
665 tnl_udp_port_min
= val1
;
666 tnl_udp_port_max
= val2
;
668 seq_change(tnl_conf_seq
);
670 unixctl_command_reply(conn
, "OK");