]>
git.proxmox.com Git - mirror_ovs.git/blob - lib/netdev-native-tnl.c
2 * Copyright (c) 2016 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
21 #include <sys/socket.h>
23 #include <netinet/in.h>
24 #include <netinet/ip6.h>
25 #include <sys/ioctl.h>
31 #include "openvswitch/list.h"
32 #include "byte-order.h"
37 #include "dp-packet.h"
43 #include "netdev-provider.h"
44 #include "netdev-vport.h"
45 #include "netdev-vport-private.h"
46 #include "odp-netlink.h"
47 #include "dp-packet.h"
48 #include "ovs-router.h"
50 #include "poll-loop.h"
52 #include "route-table.h"
54 #include "socket-util.h"
56 #include "netdev-native-tnl.h"
57 #include "openvswitch/vlog.h"
58 #include "unaligned.h"
62 VLOG_DEFINE_THIS_MODULE(native_tnl
);
63 static struct vlog_rate_limit err_rl
= VLOG_RATE_LIMIT_INIT(60, 5);
65 #define VXLAN_HLEN (sizeof(struct udp_header) + \
66 sizeof(struct vxlanhdr))
68 #define GENEVE_BASE_HLEN (sizeof(struct udp_header) + \
69 sizeof(struct genevehdr))
71 uint16_t tnl_udp_port_min
= 32768;
72 uint16_t tnl_udp_port_max
= 61000;
75 netdev_tnl_ip_extract_tnl_md(struct dp_packet
*packet
, struct flow_tnl
*tnl
,
80 struct ovs_16aligned_ip6_hdr
*ip6
;
84 nh
= dp_packet_l3(packet
);
87 l4
= dp_packet_l4(packet
);
93 *hlen
= sizeof(struct eth_header
);
95 l3_size
= dp_packet_size(packet
) -
96 ((char *)nh
- (char *)dp_packet_data(packet
));
98 if (IP_VER(ip
->ip_ihl_ver
) == 4) {
100 ovs_be32 ip_src
, ip_dst
;
102 if (csum(ip
, IP_IHL(ip
->ip_ihl_ver
) * 4)) {
103 VLOG_WARN_RL(&err_rl
, "ip packet has invalid checksum");
107 if (ntohs(ip
->ip_tot_len
) > l3_size
) {
108 VLOG_WARN_RL(&err_rl
, "ip packet is truncated (IP length %d, actual %d)",
109 ntohs(ip
->ip_tot_len
), l3_size
);
112 if (IP_IHL(ip
->ip_ihl_ver
) * 4 > sizeof(struct ip_header
)) {
113 VLOG_WARN_RL(&err_rl
, "ip options not supported on tunnel packets "
114 "(%d bytes)", IP_IHL(ip
->ip_ihl_ver
) * 4);
118 ip_src
= get_16aligned_be32(&ip
->ip_src
);
119 ip_dst
= get_16aligned_be32(&ip
->ip_dst
);
121 tnl
->ip_src
= ip_src
;
122 tnl
->ip_dst
= ip_dst
;
123 tnl
->ip_tos
= ip
->ip_tos
;
124 tnl
->ip_ttl
= ip
->ip_ttl
;
126 *hlen
+= IP_HEADER_LEN
;
128 } else if (IP_VER(ip
->ip_ihl_ver
) == 6) {
130 memcpy(tnl
->ipv6_src
.s6_addr
, ip6
->ip6_src
.be16
, sizeof ip6
->ip6_src
);
131 memcpy(tnl
->ipv6_dst
.s6_addr
, ip6
->ip6_dst
.be16
, sizeof ip6
->ip6_dst
);
133 tnl
->ip_ttl
= ip6
->ip6_hlim
;
135 *hlen
+= IPV6_HEADER_LEN
;
138 VLOG_WARN_RL(&err_rl
, "ipv4 packet has invalid version (%d)",
139 IP_VER(ip
->ip_ihl_ver
));
146 /* Pushes the 'size' bytes of 'header' into the headroom of 'packet',
147 * reallocating the packet if necessary. 'header' should contain an Ethernet
148 * header, followed by an IPv4 header (without options), and an L4 header.
150 * This function sets the IP header's ip_tot_len field (which should be zeroed
151 * as part of 'header') and puts its value into '*ip_tot_size' as well. Also
152 * updates IP header checksum.
154 * Return pointer to the L4 header added to 'packet'. */
156 netdev_tnl_push_ip_header(struct dp_packet
*packet
,
157 const void *header
, int size
, int *ip_tot_size
)
159 struct eth_header
*eth
;
160 struct ip_header
*ip
;
161 struct ovs_16aligned_ip6_hdr
*ip6
;
163 eth
= dp_packet_push_uninit(packet
, size
);
164 *ip_tot_size
= dp_packet_size(packet
) - sizeof (struct eth_header
);
166 memcpy(eth
, header
, size
);
168 if (netdev_tnl_is_header_ipv6(header
)) {
169 ip6
= netdev_tnl_ipv6_hdr(eth
);
170 *ip_tot_size
-= IPV6_HEADER_LEN
;
171 ip6
->ip6_plen
= htons(*ip_tot_size
);
174 ip
= netdev_tnl_ip_hdr(eth
);
175 ip
->ip_tot_len
= htons(*ip_tot_size
);
176 ip
->ip_csum
= recalc_csum16(ip
->ip_csum
, 0, ip
->ip_tot_len
);
177 *ip_tot_size
-= IP_HEADER_LEN
;
183 udp_extract_tnl_md(struct dp_packet
*packet
, struct flow_tnl
*tnl
,
186 struct udp_header
*udp
;
188 udp
= netdev_tnl_ip_extract_tnl_md(packet
, tnl
, hlen
);
195 if (netdev_tnl_is_header_ipv6(dp_packet_data(packet
))) {
196 csum
= packet_csum_pseudoheader6(dp_packet_l3(packet
));
198 csum
= packet_csum_pseudoheader(dp_packet_l3(packet
));
201 csum
= csum_continue(csum
, udp
, dp_packet_size(packet
) -
202 ((const unsigned char *)udp
-
203 (const unsigned char *)dp_packet_l2(packet
)));
204 if (csum_finish(csum
)) {
207 tnl
->flags
|= FLOW_TNL_F_CSUM
;
210 tnl
->tp_src
= udp
->udp_src
;
211 tnl
->tp_dst
= udp
->udp_dst
;
218 netdev_tnl_push_udp_header(struct dp_packet
*packet
,
219 const struct ovs_action_push_tnl
*data
)
221 struct udp_header
*udp
;
224 udp
= netdev_tnl_push_ip_header(packet
, data
->header
, data
->header_len
, &ip_tot_size
);
226 /* set udp src port */
227 udp
->udp_src
= netdev_tnl_get_src_port(packet
);
228 udp
->udp_len
= htons(ip_tot_size
);
232 if (netdev_tnl_is_header_ipv6(dp_packet_data(packet
))) {
233 csum
= packet_csum_pseudoheader6(netdev_tnl_ipv6_hdr(dp_packet_data(packet
)));
235 csum
= packet_csum_pseudoheader(netdev_tnl_ip_hdr(dp_packet_data(packet
)));
238 csum
= csum_continue(csum
, udp
, ip_tot_size
);
239 udp
->udp_csum
= csum_finish(csum
);
241 if (!udp
->udp_csum
) {
242 udp
->udp_csum
= htons(0xffff);
248 udp_build_header(struct netdev_tunnel_config
*tnl_cfg
,
249 const struct flow
*tnl_flow
,
250 struct ovs_action_push_tnl
*data
,
253 struct ip_header
*ip
;
254 struct ovs_16aligned_ip6_hdr
*ip6
;
255 struct udp_header
*udp
;
258 *hlen
= sizeof(struct eth_header
);
260 is_ipv6
= netdev_tnl_is_header_ipv6(data
->header
);
263 ip6
= netdev_tnl_ipv6_hdr(data
->header
);
264 ip6
->ip6_nxt
= IPPROTO_UDP
;
265 udp
= (struct udp_header
*) (ip6
+ 1);
266 *hlen
+= IPV6_HEADER_LEN
;
268 ip
= netdev_tnl_ip_hdr(data
->header
);
269 ip
->ip_proto
= IPPROTO_UDP
;
270 udp
= (struct udp_header
*) (ip
+ 1);
271 *hlen
+= IP_HEADER_LEN
;
274 udp
->udp_dst
= tnl_cfg
->dst_port
;
276 if (is_ipv6
|| tnl_flow
->tunnel
.flags
& FLOW_TNL_F_CSUM
) {
277 /* Write a value in now to mark that we should compute the checksum
278 * later. 0xffff is handy because it is transparent to the
280 udp
->udp_csum
= htons(0xffff);
287 gre_header_len(ovs_be16 flags
)
291 if (flags
& htons(GRE_CSUM
)) {
294 if (flags
& htons(GRE_KEY
)) {
297 if (flags
& htons(GRE_SEQ
)) {
304 parse_gre_header(struct dp_packet
*packet
,
305 struct flow_tnl
*tnl
)
307 const struct gre_base_hdr
*greh
;
308 ovs_16aligned_be32
*options
;
312 greh
= netdev_tnl_ip_extract_tnl_md(packet
, tnl
, &ulen
);
317 if (greh
->flags
& ~(htons(GRE_CSUM
| GRE_KEY
| GRE_SEQ
))) {
321 if (greh
->protocol
!= htons(ETH_TYPE_TEB
)) {
325 hlen
= ulen
+ gre_header_len(greh
->flags
);
326 if (hlen
> dp_packet_size(packet
)) {
330 options
= (ovs_16aligned_be32
*)(greh
+ 1);
331 if (greh
->flags
& htons(GRE_CSUM
)) {
334 pkt_csum
= csum(greh
, dp_packet_size(packet
) -
335 ((const unsigned char *)greh
-
336 (const unsigned char *)dp_packet_l2(packet
)));
340 tnl
->flags
= FLOW_TNL_F_CSUM
;
344 if (greh
->flags
& htons(GRE_KEY
)) {
345 tnl
->tun_id
= (OVS_FORCE ovs_be64
) ((OVS_FORCE
uint64_t)(get_16aligned_be32(options
)) << 32);
346 tnl
->flags
|= FLOW_TNL_F_KEY
;
350 if (greh
->flags
& htons(GRE_SEQ
)) {
358 netdev_gre_pop_header(struct dp_packet
*packet
)
360 struct pkt_metadata
*md
= &packet
->md
;
361 struct flow_tnl
*tnl
= &md
->tunnel
;
362 int hlen
= sizeof(struct eth_header
) + 4;
364 hlen
+= netdev_tnl_is_header_ipv6(dp_packet_data(packet
)) ?
365 IPV6_HEADER_LEN
: IP_HEADER_LEN
;
367 pkt_metadata_init_tnl(md
);
368 if (hlen
> dp_packet_size(packet
)) {
372 hlen
= parse_gre_header(packet
, tnl
);
377 dp_packet_reset_packet(packet
, hlen
);
381 dp_packet_delete(packet
);
386 netdev_gre_push_header(struct dp_packet
*packet
,
387 const struct ovs_action_push_tnl
*data
)
389 struct gre_base_hdr
*greh
;
392 greh
= netdev_tnl_push_ip_header(packet
, data
->header
, data
->header_len
, &ip_tot_size
);
394 if (greh
->flags
& htons(GRE_CSUM
)) {
395 ovs_be16
*csum_opt
= (ovs_be16
*) (greh
+ 1);
396 *csum_opt
= csum(greh
, ip_tot_size
);
401 netdev_gre_build_header(const struct netdev
*netdev
,
402 struct ovs_action_push_tnl
*data
,
403 const struct flow
*tnl_flow
)
405 struct netdev_vport
*dev
= netdev_vport_cast(netdev
);
406 struct netdev_tunnel_config
*tnl_cfg
;
407 struct ip_header
*ip
;
408 struct ovs_16aligned_ip6_hdr
*ip6
;
409 struct gre_base_hdr
*greh
;
410 ovs_16aligned_be32
*options
;
414 is_ipv6
= netdev_tnl_is_header_ipv6(data
->header
);
416 /* XXX: RCUfy tnl_cfg. */
417 ovs_mutex_lock(&dev
->mutex
);
418 tnl_cfg
= &dev
->tnl_cfg
;
421 ip6
= netdev_tnl_ipv6_hdr(data
->header
);
422 ip6
->ip6_nxt
= IPPROTO_GRE
;
423 greh
= (struct gre_base_hdr
*) (ip6
+ 1);
425 ip
= netdev_tnl_ip_hdr(data
->header
);
426 ip
->ip_proto
= IPPROTO_GRE
;
427 greh
= (struct gre_base_hdr
*) (ip
+ 1);
430 greh
->protocol
= htons(ETH_TYPE_TEB
);
433 options
= (ovs_16aligned_be32
*) (greh
+ 1);
434 if (tnl_flow
->tunnel
.flags
& FLOW_TNL_F_CSUM
) {
435 greh
->flags
|= htons(GRE_CSUM
);
436 put_16aligned_be32(options
, 0);
440 if (tnl_cfg
->out_key_present
) {
441 greh
->flags
|= htons(GRE_KEY
);
442 put_16aligned_be32(options
, (OVS_FORCE ovs_be32
)
443 ((OVS_FORCE
uint64_t) tnl_flow
->tunnel
.tun_id
>> 32));
447 ovs_mutex_unlock(&dev
->mutex
);
449 hlen
= (uint8_t *) options
- (uint8_t *) greh
;
451 data
->header_len
= sizeof(struct eth_header
) + hlen
+
452 (is_ipv6
? IPV6_HEADER_LEN
: IP_HEADER_LEN
);
453 data
->tnl_type
= OVS_VPORT_TYPE_GRE
;
458 netdev_vxlan_pop_header(struct dp_packet
*packet
)
460 struct pkt_metadata
*md
= &packet
->md
;
461 struct flow_tnl
*tnl
= &md
->tunnel
;
462 struct vxlanhdr
*vxh
;
465 pkt_metadata_init_tnl(md
);
466 if (VXLAN_HLEN
> dp_packet_l4_size(packet
)) {
470 vxh
= udp_extract_tnl_md(packet
, tnl
, &hlen
);
475 if (get_16aligned_be32(&vxh
->vx_flags
) != htonl(VXLAN_FLAGS
) ||
476 (get_16aligned_be32(&vxh
->vx_vni
) & htonl(0xff))) {
477 VLOG_WARN_RL(&err_rl
, "invalid vxlan flags=%#x vni=%#x\n",
478 ntohl(get_16aligned_be32(&vxh
->vx_flags
)),
479 ntohl(get_16aligned_be32(&vxh
->vx_vni
)));
482 tnl
->tun_id
= htonll(ntohl(get_16aligned_be32(&vxh
->vx_vni
)) >> 8);
483 tnl
->flags
|= FLOW_TNL_F_KEY
;
485 dp_packet_reset_packet(packet
, hlen
+ VXLAN_HLEN
);
489 dp_packet_delete(packet
);
494 netdev_vxlan_build_header(const struct netdev
*netdev
,
495 struct ovs_action_push_tnl
*data
,
496 const struct flow
*tnl_flow
)
498 struct netdev_vport
*dev
= netdev_vport_cast(netdev
);
499 struct netdev_tunnel_config
*tnl_cfg
;
500 struct vxlanhdr
*vxh
;
503 /* XXX: RCUfy tnl_cfg. */
504 ovs_mutex_lock(&dev
->mutex
);
505 tnl_cfg
= &dev
->tnl_cfg
;
507 vxh
= udp_build_header(tnl_cfg
, tnl_flow
, data
, &hlen
);
509 put_16aligned_be32(&vxh
->vx_flags
, htonl(VXLAN_FLAGS
));
510 put_16aligned_be32(&vxh
->vx_vni
, htonl(ntohll(tnl_flow
->tunnel
.tun_id
) << 8));
512 ovs_mutex_unlock(&dev
->mutex
);
513 data
->header_len
= hlen
+ VXLAN_HLEN
;
514 data
->tnl_type
= OVS_VPORT_TYPE_VXLAN
;
519 netdev_geneve_pop_header(struct dp_packet
*packet
)
521 struct pkt_metadata
*md
= &packet
->md
;
522 struct flow_tnl
*tnl
= &md
->tunnel
;
523 struct genevehdr
*gnh
;
524 unsigned int hlen
, opts_len
, ulen
;
526 pkt_metadata_init_tnl(md
);
527 if (GENEVE_BASE_HLEN
> dp_packet_l4_size(packet
)) {
528 VLOG_WARN_RL(&err_rl
, "geneve packet too small: min header=%u packet size=%"PRIuSIZE
"\n",
529 (unsigned int)GENEVE_BASE_HLEN
, dp_packet_l4_size(packet
));
533 gnh
= udp_extract_tnl_md(packet
, tnl
, &ulen
);
538 opts_len
= gnh
->opt_len
* 4;
539 hlen
= ulen
+ GENEVE_BASE_HLEN
+ opts_len
;
540 if (hlen
> dp_packet_size(packet
)) {
541 VLOG_WARN_RL(&err_rl
, "geneve packet too small: header len=%u packet size=%u\n",
542 hlen
, dp_packet_size(packet
));
547 VLOG_WARN_RL(&err_rl
, "unknown geneve version: %"PRIu8
"\n", gnh
->ver
);
551 if (gnh
->proto_type
!= htons(ETH_TYPE_TEB
)) {
552 VLOG_WARN_RL(&err_rl
, "unknown geneve encapsulated protocol: %#x\n",
553 ntohs(gnh
->proto_type
));
557 tnl
->flags
|= gnh
->oam
? FLOW_TNL_F_OAM
: 0;
558 tnl
->tun_id
= htonll(ntohl(get_16aligned_be32(&gnh
->vni
)) >> 8);
559 tnl
->flags
|= FLOW_TNL_F_KEY
;
561 memcpy(tnl
->metadata
.opts
.gnv
, gnh
->options
, opts_len
);
562 tnl
->metadata
.present
.len
= opts_len
;
563 tnl
->flags
|= FLOW_TNL_F_UDPIF
;
565 dp_packet_reset_packet(packet
, hlen
);
569 dp_packet_delete(packet
);
574 netdev_geneve_build_header(const struct netdev
*netdev
,
575 struct ovs_action_push_tnl
*data
,
576 const struct flow
*tnl_flow
)
578 struct netdev_vport
*dev
= netdev_vport_cast(netdev
);
579 struct netdev_tunnel_config
*tnl_cfg
;
580 struct genevehdr
*gnh
;
585 /* XXX: RCUfy tnl_cfg. */
586 ovs_mutex_lock(&dev
->mutex
);
587 tnl_cfg
= &dev
->tnl_cfg
;
589 gnh
= udp_build_header(tnl_cfg
, tnl_flow
, data
, &hlen
);
591 put_16aligned_be32(&gnh
->vni
, htonl(ntohll(tnl_flow
->tunnel
.tun_id
) << 8));
593 ovs_mutex_unlock(&dev
->mutex
);
595 opt_len
= tun_metadata_to_geneve_header(&tnl_flow
->tunnel
,
596 gnh
->options
, &crit_opt
);
598 gnh
->opt_len
= opt_len
/ 4;
599 gnh
->oam
= !!(tnl_flow
->tunnel
.flags
& FLOW_TNL_F_OAM
);
600 gnh
->critical
= crit_opt
? 1 : 0;
601 gnh
->proto_type
= htons(ETH_TYPE_TEB
);
603 data
->header_len
= hlen
+ GENEVE_BASE_HLEN
+ opt_len
;
604 data
->tnl_type
= OVS_VPORT_TYPE_GENEVE
;
610 netdev_tnl_egress_port_range(struct unixctl_conn
*conn
, int argc
,
611 const char *argv
[], void *aux OVS_UNUSED
)
616 struct ds ds
= DS_EMPTY_INITIALIZER
;
618 ds_put_format(&ds
, "Tunnel UDP source port range: %"PRIu16
"-%"PRIu16
"\n",
619 tnl_udp_port_min
, tnl_udp_port_max
);
621 unixctl_command_reply(conn
, ds_cstr(&ds
));
630 val1
= atoi(argv
[1]);
631 if (val1
<= 0 || val1
> UINT16_MAX
) {
632 unixctl_command_reply(conn
, "Invalid min.");
635 val2
= atoi(argv
[2]);
636 if (val2
<= 0 || val2
> UINT16_MAX
) {
637 unixctl_command_reply(conn
, "Invalid max.");
642 tnl_udp_port_min
= val2
;
643 tnl_udp_port_max
= val1
;
645 tnl_udp_port_min
= val1
;
646 tnl_udp_port_max
= val2
;
648 seq_change(tnl_conf_seq
);
650 unixctl_command_reply(conn
, "OK");