2 * Copyright (c) 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "netdev-vport.h"
23 #include <sys/socket.h>
25 #include <sys/ioctl.h>
27 #include "byte-order.h"
32 #include "dp-packet.h"
33 #include "dynamic-string.h"
38 #include "netdev-provider.h"
39 #include "odp-netlink.h"
40 #include "dp-packet.h"
41 #include "ovs-router.h"
43 #include "poll-loop.h"
44 #include "route-table.h"
46 #include "socket-util.h"
47 #include "openvswitch/vlog.h"
48 #include "unaligned.h"
52 VLOG_DEFINE_THIS_MODULE(netdev_vport
);
53 static struct vlog_rate_limit err_rl
= VLOG_RATE_LIMIT_INIT(60, 5);
55 #define GENEVE_DST_PORT 6081
56 #define VXLAN_DST_PORT 4789
57 #define LISP_DST_PORT 4341
59 #define VXLAN_HLEN (sizeof(struct eth_header) + \
60 sizeof(struct ip_header) + \
61 sizeof(struct udp_header) + \
62 sizeof(struct vxlanhdr))
64 #define DEFAULT_TTL 64
69 /* Protects all members below. */
70 struct ovs_mutex mutex
;
72 uint8_t etheraddr
[ETH_ADDR_LEN
];
73 struct netdev_stats stats
;
76 struct netdev_tunnel_config tnl_cfg
;
77 char egress_iface
[IFNAMSIZ
];
85 const char *dpif_port
;
86 struct netdev_class netdev_class
;
89 /* Last read of the route-table's change number. */
90 static uint64_t rt_change_seqno
;
92 static int netdev_vport_construct(struct netdev
*);
93 static int get_patch_config(const struct netdev
*netdev
, struct smap
*args
);
94 static int get_tunnel_config(const struct netdev
*, struct smap
*args
);
95 static bool tunnel_check_status_change__(struct netdev_vport
*);
97 static uint16_t tnl_udp_port_min
= 32768;
98 static uint16_t tnl_udp_port_max
= 61000;
101 is_vport_class(const struct netdev_class
*class)
103 return class->construct
== netdev_vport_construct
;
107 netdev_vport_is_vport_class(const struct netdev_class
*class)
109 return is_vport_class(class);
112 static const struct vport_class
*
113 vport_class_cast(const struct netdev_class
*class)
115 ovs_assert(is_vport_class(class));
116 return CONTAINER_OF(class, struct vport_class
, netdev_class
);
119 static struct netdev_vport
*
120 netdev_vport_cast(const struct netdev
*netdev
)
122 ovs_assert(is_vport_class(netdev_get_class(netdev
)));
123 return CONTAINER_OF(netdev
, struct netdev_vport
, up
);
126 static const struct netdev_tunnel_config
*
127 get_netdev_tunnel_config(const struct netdev
*netdev
)
129 return &netdev_vport_cast(netdev
)->tnl_cfg
;
133 netdev_vport_is_patch(const struct netdev
*netdev
)
135 const struct netdev_class
*class = netdev_get_class(netdev
);
137 return class->get_config
== get_patch_config
;
141 netdev_vport_is_layer3(const struct netdev
*dev
)
143 const char *type
= netdev_get_type(dev
);
145 return (!strcmp("lisp", type
));
149 netdev_vport_needs_dst_port(const struct netdev
*dev
)
151 const struct netdev_class
*class = netdev_get_class(dev
);
152 const char *type
= netdev_get_type(dev
);
154 return (class->get_config
== get_tunnel_config
&&
155 (!strcmp("geneve", type
) || !strcmp("vxlan", type
) ||
156 !strcmp("lisp", type
)));
160 netdev_vport_class_get_dpif_port(const struct netdev_class
*class)
162 return is_vport_class(class) ? vport_class_cast(class)->dpif_port
: NULL
;
166 netdev_vport_get_dpif_port(const struct netdev
*netdev
,
167 char namebuf
[], size_t bufsize
)
169 const struct netdev_class
*class = netdev_get_class(netdev
);
170 const char *dpif_port
= netdev_vport_class_get_dpif_port(class);
173 return netdev_get_name(netdev
);
176 if (netdev_vport_needs_dst_port(netdev
)) {
177 const struct netdev_vport
*vport
= netdev_vport_cast(netdev
);
180 * Note: IFNAMSIZ is 16 bytes long. Implementations should choose
181 * a dpif port name that is short enough to fit including any
182 * port numbers but assert just in case.
184 BUILD_ASSERT(NETDEV_VPORT_NAME_BUFSIZE
>= IFNAMSIZ
);
185 ovs_assert(strlen(dpif_port
) + 6 < IFNAMSIZ
);
186 snprintf(namebuf
, bufsize
, "%s_%d", dpif_port
,
187 ntohs(vport
->tnl_cfg
.dst_port
));
195 netdev_vport_get_dpif_port_strdup(const struct netdev
*netdev
)
197 char namebuf
[NETDEV_VPORT_NAME_BUFSIZE
];
199 return xstrdup(netdev_vport_get_dpif_port(netdev
, namebuf
,
203 /* Whenever the route-table change number is incremented,
204 * netdev_vport_route_changed() should be called to update
205 * the corresponding tunnel interface status. */
207 netdev_vport_route_changed(void)
209 struct netdev
**vports
;
212 vports
= netdev_get_vports(&n_vports
);
213 for (i
= 0; i
< n_vports
; i
++) {
214 struct netdev
*netdev_
= vports
[i
];
215 struct netdev_vport
*netdev
= netdev_vport_cast(netdev_
);
217 ovs_mutex_lock(&netdev
->mutex
);
218 /* Finds all tunnel vports. */
219 if (netdev
->tnl_cfg
.ip_dst
) {
220 if (tunnel_check_status_change__(netdev
)) {
221 netdev_change_seq_changed(netdev_
);
224 ovs_mutex_unlock(&netdev
->mutex
);
226 netdev_close(netdev_
);
232 static struct netdev
*
233 netdev_vport_alloc(void)
235 struct netdev_vport
*netdev
= xzalloc(sizeof *netdev
);
240 netdev_vport_construct(struct netdev
*netdev_
)
242 struct netdev_vport
*dev
= netdev_vport_cast(netdev_
);
243 const char *type
= netdev_get_type(netdev_
);
245 ovs_mutex_init(&dev
->mutex
);
246 eth_addr_random(dev
->etheraddr
);
248 /* Add a default destination port for tunnel ports if none specified. */
249 if (!strcmp(type
, "geneve")) {
250 dev
->tnl_cfg
.dst_port
= htons(GENEVE_DST_PORT
);
251 } else if (!strcmp(type
, "vxlan")) {
252 dev
->tnl_cfg
.dst_port
= htons(VXLAN_DST_PORT
);
253 } else if (!strcmp(type
, "lisp")) {
254 dev
->tnl_cfg
.dst_port
= htons(LISP_DST_PORT
);
261 netdev_vport_destruct(struct netdev
*netdev_
)
263 struct netdev_vport
*netdev
= netdev_vport_cast(netdev_
);
266 ovs_mutex_destroy(&netdev
->mutex
);
270 netdev_vport_dealloc(struct netdev
*netdev_
)
272 struct netdev_vport
*netdev
= netdev_vport_cast(netdev_
);
277 netdev_vport_set_etheraddr(struct netdev
*netdev_
,
278 const uint8_t mac
[ETH_ADDR_LEN
])
280 struct netdev_vport
*netdev
= netdev_vport_cast(netdev_
);
282 ovs_mutex_lock(&netdev
->mutex
);
283 memcpy(netdev
->etheraddr
, mac
, ETH_ADDR_LEN
);
284 ovs_mutex_unlock(&netdev
->mutex
);
285 netdev_change_seq_changed(netdev_
);
291 netdev_vport_get_etheraddr(const struct netdev
*netdev_
,
292 uint8_t mac
[ETH_ADDR_LEN
])
294 struct netdev_vport
*netdev
= netdev_vport_cast(netdev_
);
296 ovs_mutex_lock(&netdev
->mutex
);
297 memcpy(mac
, netdev
->etheraddr
, ETH_ADDR_LEN
);
298 ovs_mutex_unlock(&netdev
->mutex
);
303 /* Checks if the tunnel status has changed and returns a boolean.
304 * Updates the tunnel status if it has changed. */
306 tunnel_check_status_change__(struct netdev_vport
*netdev
)
307 OVS_REQUIRES(netdev
->mutex
)
309 char iface
[IFNAMSIZ
];
315 route
= netdev
->tnl_cfg
.ip_dst
;
316 if (ovs_router_lookup(route
, iface
, &gw
)) {
317 struct netdev
*egress_netdev
;
319 if (!netdev_open(iface
, "system", &egress_netdev
)) {
320 status
= netdev_get_carrier(egress_netdev
);
321 netdev_close(egress_netdev
);
325 if (strcmp(netdev
->egress_iface
, iface
)
326 || netdev
->carrier_status
!= status
) {
327 ovs_strlcpy(netdev
->egress_iface
, iface
, IFNAMSIZ
);
328 netdev
->carrier_status
= status
;
337 tunnel_get_status(const struct netdev
*netdev_
, struct smap
*smap
)
339 struct netdev_vport
*netdev
= netdev_vport_cast(netdev_
);
341 if (netdev
->egress_iface
[0]) {
342 smap_add(smap
, "tunnel_egress_iface", netdev
->egress_iface
);
344 smap_add(smap
, "tunnel_egress_iface_carrier",
345 netdev
->carrier_status
? "up" : "down");
352 netdev_vport_update_flags(struct netdev
*netdev OVS_UNUSED
,
353 enum netdev_flags off
,
354 enum netdev_flags on OVS_UNUSED
,
355 enum netdev_flags
*old_flagsp
)
357 if (off
& (NETDEV_UP
| NETDEV_PROMISC
)) {
361 *old_flagsp
= NETDEV_UP
| NETDEV_PROMISC
;
366 netdev_vport_run(void)
371 seq
= route_table_get_change_seq();
372 if (rt_change_seqno
!= seq
) {
373 rt_change_seqno
= seq
;
374 netdev_vport_route_changed();
379 netdev_vport_wait(void)
384 seq
= route_table_get_change_seq();
385 if (rt_change_seqno
!= seq
) {
386 poll_immediate_wake();
390 /* Code specific to tunnel types. */
393 parse_key(const struct smap
*args
, const char *name
,
394 bool *present
, bool *flow
)
401 s
= smap_get(args
, name
);
403 s
= smap_get(args
, "key");
411 if (!strcmp(s
, "flow")) {
415 return htonll(strtoull(s
, NULL
, 0));
420 set_tunnel_config(struct netdev
*dev_
, const struct smap
*args
)
422 struct netdev_vport
*dev
= netdev_vport_cast(dev_
);
423 const char *name
= netdev_get_name(dev_
);
424 const char *type
= netdev_get_type(dev_
);
425 bool ipsec_mech_set
, needs_dst_port
, has_csum
;
426 struct netdev_tunnel_config tnl_cfg
;
427 struct smap_node
*node
;
429 has_csum
= strstr(type
, "gre") || strstr(type
, "geneve") ||
430 strstr(type
, "vxlan");
431 ipsec_mech_set
= false;
432 memset(&tnl_cfg
, 0, sizeof tnl_cfg
);
434 /* Add a default destination port for tunnel ports if none specified. */
435 if (!strcmp(type
, "geneve")) {
436 tnl_cfg
.dst_port
= htons(GENEVE_DST_PORT
);
439 if (!strcmp(type
, "vxlan")) {
440 tnl_cfg
.dst_port
= htons(VXLAN_DST_PORT
);
443 if (!strcmp(type
, "lisp")) {
444 tnl_cfg
.dst_port
= htons(LISP_DST_PORT
);
447 needs_dst_port
= netdev_vport_needs_dst_port(dev_
);
448 tnl_cfg
.ipsec
= strstr(type
, "ipsec");
449 tnl_cfg
.dont_fragment
= true;
451 SMAP_FOR_EACH (node
, args
) {
452 if (!strcmp(node
->key
, "remote_ip")) {
453 struct in_addr in_addr
;
454 if (!strcmp(node
->value
, "flow")) {
455 tnl_cfg
.ip_dst_flow
= true;
456 tnl_cfg
.ip_dst
= htonl(0);
457 } else if (lookup_ip(node
->value
, &in_addr
)) {
458 VLOG_WARN("%s: bad %s 'remote_ip'", name
, type
);
459 } else if (ip_is_multicast(in_addr
.s_addr
)) {
460 VLOG_WARN("%s: multicast remote_ip="IP_FMT
" not allowed",
461 name
, IP_ARGS(in_addr
.s_addr
));
464 tnl_cfg
.ip_dst
= in_addr
.s_addr
;
466 } else if (!strcmp(node
->key
, "local_ip")) {
467 struct in_addr in_addr
;
468 if (!strcmp(node
->value
, "flow")) {
469 tnl_cfg
.ip_src_flow
= true;
470 tnl_cfg
.ip_src
= htonl(0);
471 } else if (lookup_ip(node
->value
, &in_addr
)) {
472 VLOG_WARN("%s: bad %s 'local_ip'", name
, type
);
474 tnl_cfg
.ip_src
= in_addr
.s_addr
;
476 } else if (!strcmp(node
->key
, "tos")) {
477 if (!strcmp(node
->value
, "inherit")) {
478 tnl_cfg
.tos_inherit
= true;
482 tos
= strtol(node
->value
, &endptr
, 0);
483 if (*endptr
== '\0' && tos
== (tos
& IP_DSCP_MASK
)) {
486 VLOG_WARN("%s: invalid TOS %s", name
, node
->value
);
489 } else if (!strcmp(node
->key
, "ttl")) {
490 if (!strcmp(node
->value
, "inherit")) {
491 tnl_cfg
.ttl_inherit
= true;
493 tnl_cfg
.ttl
= atoi(node
->value
);
495 } else if (!strcmp(node
->key
, "dst_port") && needs_dst_port
) {
496 tnl_cfg
.dst_port
= htons(atoi(node
->value
));
497 } else if (!strcmp(node
->key
, "csum") && has_csum
) {
498 if (!strcmp(node
->value
, "true")) {
501 } else if (!strcmp(node
->key
, "df_default")) {
502 if (!strcmp(node
->value
, "false")) {
503 tnl_cfg
.dont_fragment
= false;
505 } else if (!strcmp(node
->key
, "peer_cert") && tnl_cfg
.ipsec
) {
506 if (smap_get(args
, "certificate")) {
507 ipsec_mech_set
= true;
509 const char *use_ssl_cert
;
511 /* If the "use_ssl_cert" is true, then "certificate" and
512 * "private_key" will be pulled from the SSL table. The
513 * use of this option is strongly discouraged, since it
514 * will like be removed when multiple SSL configurations
515 * are supported by OVS.
517 use_ssl_cert
= smap_get(args
, "use_ssl_cert");
518 if (!use_ssl_cert
|| strcmp(use_ssl_cert
, "true")) {
519 VLOG_ERR("%s: 'peer_cert' requires 'certificate' argument",
523 ipsec_mech_set
= true;
525 } else if (!strcmp(node
->key
, "psk") && tnl_cfg
.ipsec
) {
526 ipsec_mech_set
= true;
527 } else if (tnl_cfg
.ipsec
528 && (!strcmp(node
->key
, "certificate")
529 || !strcmp(node
->key
, "private_key")
530 || !strcmp(node
->key
, "use_ssl_cert"))) {
531 /* Ignore options not used by the netdev. */
532 } else if (!strcmp(node
->key
, "key") ||
533 !strcmp(node
->key
, "in_key") ||
534 !strcmp(node
->key
, "out_key")) {
535 /* Handled separately below. */
536 } else if (!strcmp(node
->key
, "exts")) {
537 char *str
= xstrdup(node
->value
);
538 char *ext
, *save_ptr
= NULL
;
542 ext
= strtok_r(str
, ",", &save_ptr
);
544 if (!strcmp(type
, "vxlan") && !strcmp(ext
, "gbp")) {
545 tnl_cfg
.exts
|= (1 << OVS_VXLAN_EXT_GBP
);
547 VLOG_WARN("%s: unknown extension '%s'", name
, ext
);
550 ext
= strtok_r(NULL
, ",", &save_ptr
);
555 VLOG_WARN("%s: unknown %s argument '%s'", name
, type
, node
->key
);
560 static struct ovs_mutex mutex
= OVS_MUTEX_INITIALIZER
;
561 static pid_t pid
= 0;
564 ovs_mutex_lock(&mutex
);
566 char *file_name
= xasprintf("%s/%s", ovs_rundir(),
567 "ovs-monitor-ipsec.pid");
568 pid
= read_pidfile(file_name
);
571 ovs_mutex_unlock(&mutex
);
575 VLOG_ERR("%s: IPsec requires the ovs-monitor-ipsec daemon",
580 if (smap_get(args
, "peer_cert") && smap_get(args
, "psk")) {
581 VLOG_ERR("%s: cannot define both 'peer_cert' and 'psk'", name
);
585 if (!ipsec_mech_set
) {
586 VLOG_ERR("%s: IPsec requires an 'peer_cert' or psk' argument",
592 if (!tnl_cfg
.ip_dst
&& !tnl_cfg
.ip_dst_flow
) {
593 VLOG_ERR("%s: %s type requires valid 'remote_ip' argument",
597 if (tnl_cfg
.ip_src_flow
&& !tnl_cfg
.ip_dst_flow
) {
598 VLOG_ERR("%s: %s type requires 'remote_ip=flow' with 'local_ip=flow'",
603 tnl_cfg
.ttl
= DEFAULT_TTL
;
606 tnl_cfg
.in_key
= parse_key(args
, "in_key",
607 &tnl_cfg
.in_key_present
,
608 &tnl_cfg
.in_key_flow
);
610 tnl_cfg
.out_key
= parse_key(args
, "out_key",
611 &tnl_cfg
.out_key_present
,
612 &tnl_cfg
.out_key_flow
);
614 ovs_mutex_lock(&dev
->mutex
);
615 dev
->tnl_cfg
= tnl_cfg
;
616 tunnel_check_status_change__(dev
);
617 netdev_change_seq_changed(dev_
);
618 ovs_mutex_unlock(&dev
->mutex
);
624 get_tunnel_config(const struct netdev
*dev
, struct smap
*args
)
626 struct netdev_vport
*netdev
= netdev_vport_cast(dev
);
627 struct netdev_tunnel_config tnl_cfg
;
629 ovs_mutex_lock(&netdev
->mutex
);
630 tnl_cfg
= netdev
->tnl_cfg
;
631 ovs_mutex_unlock(&netdev
->mutex
);
633 if (tnl_cfg
.ip_dst
) {
634 smap_add_format(args
, "remote_ip", IP_FMT
, IP_ARGS(tnl_cfg
.ip_dst
));
635 } else if (tnl_cfg
.ip_dst_flow
) {
636 smap_add(args
, "remote_ip", "flow");
639 if (tnl_cfg
.ip_src
) {
640 smap_add_format(args
, "local_ip", IP_FMT
, IP_ARGS(tnl_cfg
.ip_src
));
641 } else if (tnl_cfg
.ip_src_flow
) {
642 smap_add(args
, "local_ip", "flow");
645 if (tnl_cfg
.in_key_flow
&& tnl_cfg
.out_key_flow
) {
646 smap_add(args
, "key", "flow");
647 } else if (tnl_cfg
.in_key_present
&& tnl_cfg
.out_key_present
648 && tnl_cfg
.in_key
== tnl_cfg
.out_key
) {
649 smap_add_format(args
, "key", "%"PRIu64
, ntohll(tnl_cfg
.in_key
));
651 if (tnl_cfg
.in_key_flow
) {
652 smap_add(args
, "in_key", "flow");
653 } else if (tnl_cfg
.in_key_present
) {
654 smap_add_format(args
, "in_key", "%"PRIu64
,
655 ntohll(tnl_cfg
.in_key
));
658 if (tnl_cfg
.out_key_flow
) {
659 smap_add(args
, "out_key", "flow");
660 } else if (tnl_cfg
.out_key_present
) {
661 smap_add_format(args
, "out_key", "%"PRIu64
,
662 ntohll(tnl_cfg
.out_key
));
666 if (tnl_cfg
.ttl_inherit
) {
667 smap_add(args
, "ttl", "inherit");
668 } else if (tnl_cfg
.ttl
!= DEFAULT_TTL
) {
669 smap_add_format(args
, "ttl", "%"PRIu8
, tnl_cfg
.ttl
);
672 if (tnl_cfg
.tos_inherit
) {
673 smap_add(args
, "tos", "inherit");
674 } else if (tnl_cfg
.tos
) {
675 smap_add_format(args
, "tos", "0x%x", tnl_cfg
.tos
);
678 if (tnl_cfg
.dst_port
) {
679 uint16_t dst_port
= ntohs(tnl_cfg
.dst_port
);
680 const char *type
= netdev_get_type(dev
);
682 if ((!strcmp("geneve", type
) && dst_port
!= GENEVE_DST_PORT
) ||
683 (!strcmp("vxlan", type
) && dst_port
!= VXLAN_DST_PORT
) ||
684 (!strcmp("lisp", type
) && dst_port
!= LISP_DST_PORT
)) {
685 smap_add_format(args
, "dst_port", "%d", dst_port
);
690 smap_add(args
, "csum", "true");
693 if (!tnl_cfg
.dont_fragment
) {
694 smap_add(args
, "df_default", "false");
700 /* Code specific to patch ports. */
702 /* If 'netdev' is a patch port, returns the name of its peer as a malloc()'d
703 * string that the caller must free.
705 * If 'netdev' is not a patch port, returns NULL. */
707 netdev_vport_patch_peer(const struct netdev
*netdev_
)
711 if (netdev_vport_is_patch(netdev_
)) {
712 struct netdev_vport
*netdev
= netdev_vport_cast(netdev_
);
714 ovs_mutex_lock(&netdev
->mutex
);
716 peer
= xstrdup(netdev
->peer
);
718 ovs_mutex_unlock(&netdev
->mutex
);
725 netdev_vport_inc_rx(const struct netdev
*netdev
,
726 const struct dpif_flow_stats
*stats
)
728 if (is_vport_class(netdev_get_class(netdev
))) {
729 struct netdev_vport
*dev
= netdev_vport_cast(netdev
);
731 ovs_mutex_lock(&dev
->mutex
);
732 dev
->stats
.rx_packets
+= stats
->n_packets
;
733 dev
->stats
.rx_bytes
+= stats
->n_bytes
;
734 ovs_mutex_unlock(&dev
->mutex
);
739 netdev_vport_inc_tx(const struct netdev
*netdev
,
740 const struct dpif_flow_stats
*stats
)
742 if (is_vport_class(netdev_get_class(netdev
))) {
743 struct netdev_vport
*dev
= netdev_vport_cast(netdev
);
745 ovs_mutex_lock(&dev
->mutex
);
746 dev
->stats
.tx_packets
+= stats
->n_packets
;
747 dev
->stats
.tx_bytes
+= stats
->n_bytes
;
748 ovs_mutex_unlock(&dev
->mutex
);
753 get_patch_config(const struct netdev
*dev_
, struct smap
*args
)
755 struct netdev_vport
*dev
= netdev_vport_cast(dev_
);
757 ovs_mutex_lock(&dev
->mutex
);
759 smap_add(args
, "peer", dev
->peer
);
761 ovs_mutex_unlock(&dev
->mutex
);
767 set_patch_config(struct netdev
*dev_
, const struct smap
*args
)
769 struct netdev_vport
*dev
= netdev_vport_cast(dev_
);
770 const char *name
= netdev_get_name(dev_
);
773 peer
= smap_get(args
, "peer");
775 VLOG_ERR("%s: patch type requires valid 'peer' argument", name
);
779 if (smap_count(args
) > 1) {
780 VLOG_ERR("%s: patch type takes only a 'peer' argument", name
);
784 if (!strcmp(name
, peer
)) {
785 VLOG_ERR("%s: patch peer must not be self", name
);
789 ovs_mutex_lock(&dev
->mutex
);
791 dev
->peer
= xstrdup(peer
);
792 netdev_change_seq_changed(dev_
);
793 ovs_mutex_unlock(&dev
->mutex
);
799 get_stats(const struct netdev
*netdev
, struct netdev_stats
*stats
)
801 struct netdev_vport
*dev
= netdev_vport_cast(netdev
);
803 ovs_mutex_lock(&dev
->mutex
);
805 ovs_mutex_unlock(&dev
->mutex
);
811 /* Tunnel push pop ops. */
813 static struct ip_header
*
816 return (void *)((char *)eth
+ sizeof (struct eth_header
));
819 static struct gre_base_hdr
*
820 gre_hdr(struct ip_header
*ip
)
822 return (void *)((char *)ip
+ sizeof (struct ip_header
));
826 ip_extract_tnl_md(struct dp_packet
*packet
, struct flow_tnl
*tnl
)
828 struct ip_header
*nh
;
831 nh
= dp_packet_l3(packet
);
832 l4
= dp_packet_l4(packet
);
838 tnl
->ip_src
= get_16aligned_be32(&nh
->ip_src
);
839 tnl
->ip_dst
= get_16aligned_be32(&nh
->ip_dst
);
840 tnl
->ip_tos
= nh
->ip_tos
;
845 /* Pushes the 'size' bytes of 'header' into the headroom of 'packet',
846 * reallocating the packet if necessary. 'header' should contain an Ethernet
847 * header, followed by an IPv4 header (without options), and an L4 header.
849 * This function sets the IP header's ip_tot_len field (which should be zeroed
850 * as part of 'header') and puts its value into '*ip_tot_size' as well. Also
851 * updates IP header checksum.
853 * Return pointer to the L4 header added to 'packet'. */
855 push_ip_header(struct dp_packet
*packet
,
856 const void *header
, int size
, int *ip_tot_size
)
858 struct eth_header
*eth
;
859 struct ip_header
*ip
;
861 eth
= dp_packet_push_uninit(packet
, size
);
862 *ip_tot_size
= dp_packet_size(packet
) - sizeof (struct eth_header
);
864 memcpy(eth
, header
, size
);
866 ip
->ip_tot_len
= htons(*ip_tot_size
);
869 ip
->ip_csum
= recalc_csum16(ip
->ip_csum
, 0, ip
->ip_tot_len
);
875 gre_header_len(ovs_be16 flags
)
877 int hlen
= sizeof(struct eth_header
) +
878 sizeof(struct ip_header
) + 4;
880 if (flags
& htons(GRE_CSUM
)) {
883 if (flags
& htons(GRE_KEY
)) {
886 if (flags
& htons(GRE_SEQ
)) {
893 parse_gre_header(struct dp_packet
*packet
,
894 struct flow_tnl
*tnl
)
896 const struct gre_base_hdr
*greh
;
897 ovs_16aligned_be32
*options
;
900 greh
= ip_extract_tnl_md(packet
, tnl
);
905 if (greh
->flags
& ~(htons(GRE_CSUM
| GRE_KEY
| GRE_SEQ
))) {
909 hlen
= gre_header_len(greh
->flags
);
910 if (hlen
> dp_packet_size(packet
)) {
914 options
= (ovs_16aligned_be32
*)(greh
+ 1);
915 if (greh
->flags
& htons(GRE_CSUM
)) {
918 pkt_csum
= csum(greh
, dp_packet_size(packet
) -
919 ((const unsigned char *)greh
-
920 (const unsigned char *)dp_packet_l2(packet
)));
924 tnl
->flags
= FLOW_TNL_F_CSUM
;
928 if (greh
->flags
& htons(GRE_KEY
)) {
929 tnl
->tun_id
= (OVS_FORCE ovs_be64
) ((OVS_FORCE
uint64_t)(get_16aligned_be32(options
)) << 32);
930 tnl
->flags
|= FLOW_TNL_F_KEY
;
934 if (greh
->flags
& htons(GRE_SEQ
)) {
942 reset_tnl_md(struct pkt_metadata
*md
)
944 memset(&md
->tunnel
, 0, sizeof(md
->tunnel
));
948 gre_extract_md(struct dp_packet
*packet
)
950 struct pkt_metadata
*md
= &packet
->md
;
951 struct flow_tnl
*tnl
= &md
->tunnel
;
952 int hlen
= sizeof(struct eth_header
) +
953 sizeof(struct ip_header
) + 4;
955 memset(md
, 0, sizeof *md
);
956 if (hlen
> dp_packet_size(packet
)) {
960 hlen
= parse_gre_header(packet
, tnl
);
965 dp_packet_reset_packet(packet
, hlen
);
969 netdev_gre_pop_header(struct netdev
*netdev_ OVS_UNUSED
,
970 struct dp_packet
**pkt
, int cnt
)
974 for (i
= 0; i
< cnt
; i
++) {
975 gre_extract_md(pkt
[i
]);
981 netdev_gre_push_header__(struct dp_packet
*packet
,
982 const void *header
, int size
)
984 struct gre_base_hdr
*greh
;
987 greh
= push_ip_header(packet
, header
, size
, &ip_tot_size
);
989 if (greh
->flags
& htons(GRE_CSUM
)) {
990 ovs_16aligned_be32
*options
= (ovs_16aligned_be32
*) (greh
+ 1);
992 put_16aligned_be32(options
,
993 (OVS_FORCE ovs_be32
) csum(greh
, ip_tot_size
- sizeof (struct ip_header
)));
998 netdev_gre_push_header(const struct netdev
*netdev OVS_UNUSED
,
999 struct dp_packet
**packets
, int cnt
,
1000 const struct ovs_action_push_tnl
*data
)
1004 for (i
= 0; i
< cnt
; i
++) {
1005 netdev_gre_push_header__(packets
[i
], data
->header
, data
->header_len
);
1006 packets
[i
]->md
= PKT_METADATA_INITIALIZER(u32_to_odp(data
->out_port
));
1013 netdev_gre_build_header(const struct netdev
*netdev
,
1014 struct ovs_action_push_tnl
*data
,
1015 const struct flow
*tnl_flow
)
1017 struct netdev_vport
*dev
= netdev_vport_cast(netdev
);
1018 struct netdev_tunnel_config
*tnl_cfg
;
1019 struct ip_header
*ip
;
1020 struct gre_base_hdr
*greh
;
1021 ovs_16aligned_be32
*options
;
1024 /* XXX: RCUfy tnl_cfg. */
1025 ovs_mutex_lock(&dev
->mutex
);
1026 tnl_cfg
= &dev
->tnl_cfg
;
1028 ip
= ip_hdr(data
->header
);
1029 ip
->ip_proto
= IPPROTO_GRE
;
1032 greh
->protocol
= htons(ETH_TYPE_TEB
);
1035 options
= (ovs_16aligned_be32
*) (greh
+ 1);
1036 if (tnl_cfg
->csum
) {
1037 greh
->flags
|= htons(GRE_CSUM
);
1038 put_16aligned_be32(options
, 0);
1042 if (tnl_cfg
->out_key_present
) {
1043 greh
->flags
|= htons(GRE_KEY
);
1044 put_16aligned_be32(options
, (OVS_FORCE ovs_be32
)
1045 ((OVS_FORCE
uint64_t) tnl_flow
->tunnel
.tun_id
>> 32));
1049 ovs_mutex_unlock(&dev
->mutex
);
1051 hlen
= (uint8_t *) options
- (uint8_t *) greh
;
1053 data
->header_len
= sizeof(struct eth_header
) +
1054 sizeof(struct ip_header
) + hlen
;
1055 data
->tnl_type
= OVS_VPORT_TYPE_GRE
;
1060 vxlan_extract_md(struct dp_packet
*packet
)
1062 struct pkt_metadata
*md
= &packet
->md
;
1063 struct flow_tnl
*tnl
= &md
->tunnel
;
1064 struct udp_header
*udp
;
1065 struct vxlanhdr
*vxh
;
1067 memset(md
, 0, sizeof *md
);
1068 if (VXLAN_HLEN
> dp_packet_size(packet
)) {
1072 udp
= ip_extract_tnl_md(packet
, tnl
);
1076 vxh
= (struct vxlanhdr
*) (udp
+ 1);
1078 if (get_16aligned_be32(&vxh
->vx_flags
) != htonl(VXLAN_FLAGS
) ||
1079 (get_16aligned_be32(&vxh
->vx_vni
) & htonl(0xff))) {
1080 VLOG_WARN_RL(&err_rl
, "invalid vxlan flags=%#x vni=%#x\n",
1081 ntohl(get_16aligned_be32(&vxh
->vx_flags
)),
1082 ntohl(get_16aligned_be32(&vxh
->vx_vni
)));
1086 tnl
->tp_src
= udp
->udp_src
;
1087 tnl
->tp_dst
= udp
->udp_dst
;
1088 tnl
->tun_id
= htonll(ntohl(get_16aligned_be32(&vxh
->vx_vni
)) >> 8);
1090 dp_packet_reset_packet(packet
, VXLAN_HLEN
);
1094 netdev_vxlan_pop_header(struct netdev
*netdev_ OVS_UNUSED
,
1095 struct dp_packet
**pkt
, int cnt
)
1099 for (i
= 0; i
< cnt
; i
++) {
1100 vxlan_extract_md(pkt
[i
]);
1106 netdev_vxlan_build_header(const struct netdev
*netdev
,
1107 struct ovs_action_push_tnl
*data
,
1108 const struct flow
*tnl_flow
)
1110 struct netdev_vport
*dev
= netdev_vport_cast(netdev
);
1111 struct netdev_tunnel_config
*tnl_cfg
;
1112 struct ip_header
*ip
;
1113 struct udp_header
*udp
;
1114 struct vxlanhdr
*vxh
;
1116 /* XXX: RCUfy tnl_cfg. */
1117 ovs_mutex_lock(&dev
->mutex
);
1118 tnl_cfg
= &dev
->tnl_cfg
;
1120 ip
= ip_hdr(data
->header
);
1121 ip
->ip_proto
= IPPROTO_UDP
;
1123 udp
= (struct udp_header
*) (ip
+ 1);
1124 udp
->udp_dst
= tnl_cfg
->dst_port
;
1126 vxh
= (struct vxlanhdr
*) (udp
+ 1);
1127 put_16aligned_be32(&vxh
->vx_flags
, htonl(VXLAN_FLAGS
));
1128 put_16aligned_be32(&vxh
->vx_vni
, htonl(ntohll(tnl_flow
->tunnel
.tun_id
) << 8));
1130 ovs_mutex_unlock(&dev
->mutex
);
1131 data
->header_len
= VXLAN_HLEN
;
1132 data
->tnl_type
= OVS_VPORT_TYPE_VXLAN
;
1137 get_src_port(struct dp_packet
*packet
)
1141 hash
= dp_packet_get_dp_hash(packet
);
1143 return htons((((uint64_t) hash
* (tnl_udp_port_max
- tnl_udp_port_min
)) >> 32) +
1148 netdev_vxlan_push_header__(struct dp_packet
*packet
,
1149 const void *header
, int size
)
1151 struct udp_header
*udp
;
1154 udp
= push_ip_header(packet
, header
, size
, &ip_tot_size
);
1156 /* set udp src port */
1157 udp
->udp_src
= get_src_port(packet
);
1158 udp
->udp_len
= htons(ip_tot_size
- sizeof (struct ip_header
));
1159 /* udp_csum is zero */
1163 netdev_vxlan_push_header(const struct netdev
*netdev OVS_UNUSED
,
1164 struct dp_packet
**packets
, int cnt
,
1165 const struct ovs_action_push_tnl
*data
)
1169 for (i
= 0; i
< cnt
; i
++) {
1170 netdev_vxlan_push_header__(packets
[i
],
1171 data
->header
, VXLAN_HLEN
);
1172 packets
[i
]->md
= PKT_METADATA_INITIALIZER(u32_to_odp(data
->out_port
));
1178 netdev_vport_range(struct unixctl_conn
*conn
, int argc
,
1179 const char *argv
[], void *aux OVS_UNUSED
)
1184 struct ds ds
= DS_EMPTY_INITIALIZER
;
1186 ds_put_format(&ds
, "Tunnel UDP source port range: %"PRIu16
"-%"PRIu16
"\n",
1187 tnl_udp_port_min
, tnl_udp_port_max
);
1189 unixctl_command_reply(conn
, ds_cstr(&ds
));
1198 val1
= atoi(argv
[1]);
1199 if (val1
<= 0 || val1
> UINT16_MAX
) {
1200 unixctl_command_reply(conn
, "Invalid min.");
1203 val2
= atoi(argv
[2]);
1204 if (val2
<= 0 || val2
> UINT16_MAX
) {
1205 unixctl_command_reply(conn
, "Invalid max.");
1210 tnl_udp_port_min
= val2
;
1211 tnl_udp_port_max
= val1
;
1213 tnl_udp_port_min
= val1
;
1214 tnl_udp_port_max
= val2
;
1216 seq_change(tnl_conf_seq
);
1218 unixctl_command_reply(conn
, "OK");
1222 #define VPORT_FUNCTIONS(GET_CONFIG, SET_CONFIG, \
1223 GET_TUNNEL_CONFIG, GET_STATUS, \
1225 PUSH_HEADER, POP_HEADER) \
1228 netdev_vport_wait, \
1230 netdev_vport_alloc, \
1231 netdev_vport_construct, \
1232 netdev_vport_destruct, \
1233 netdev_vport_dealloc, \
1236 GET_TUNNEL_CONFIG, \
1240 NULL, /* get_numa_id */ \
1241 NULL, /* set_multiq */ \
1244 NULL, /* send_wait */ \
1246 netdev_vport_set_etheraddr, \
1247 netdev_vport_get_etheraddr, \
1248 NULL, /* get_mtu */ \
1249 NULL, /* set_mtu */ \
1250 NULL, /* get_ifindex */ \
1251 NULL, /* get_carrier */ \
1252 NULL, /* get_carrier_resets */ \
1253 NULL, /* get_miimon */ \
1256 NULL, /* get_features */ \
1257 NULL, /* set_advertisements */ \
1259 NULL, /* set_policing */ \
1260 NULL, /* get_qos_types */ \
1261 NULL, /* get_qos_capabilities */ \
1262 NULL, /* get_qos */ \
1263 NULL, /* set_qos */ \
1264 NULL, /* get_queue */ \
1265 NULL, /* set_queue */ \
1266 NULL, /* delete_queue */ \
1267 NULL, /* get_queue_stats */ \
1268 NULL, /* queue_dump_start */ \
1269 NULL, /* queue_dump_next */ \
1270 NULL, /* queue_dump_done */ \
1271 NULL, /* dump_queue_stats */ \
1273 NULL, /* get_in4 */ \
1274 NULL, /* set_in4 */ \
1275 NULL, /* get_in6 */ \
1276 NULL, /* add_router */ \
1277 NULL, /* get_next_hop */ \
1279 NULL, /* arp_lookup */ \
1281 netdev_vport_update_flags, \
1283 NULL, /* rx_alloc */ \
1284 NULL, /* rx_construct */ \
1285 NULL, /* rx_destruct */ \
1286 NULL, /* rx_dealloc */ \
1287 NULL, /* rx_recv */ \
1288 NULL, /* rx_wait */ \
1289 NULL, /* rx_drain */
1292 #define TUNNEL_CLASS(NAME, DPIF_PORT, BUILD_HEADER, PUSH_HEADER, POP_HEADER) \
1294 { NAME, VPORT_FUNCTIONS(get_tunnel_config, \
1295 set_tunnel_config, \
1296 get_netdev_tunnel_config, \
1297 tunnel_get_status, \
1298 BUILD_HEADER, PUSH_HEADER, POP_HEADER) }}
1301 netdev_vport_tunnel_register(void)
1303 /* The name of the dpif_port should be short enough to accomodate adding
1304 * a port number to the end if one is necessary. */
1305 static const struct vport_class vport_classes
[] = {
1306 TUNNEL_CLASS("geneve", "genev_sys", NULL
, NULL
, NULL
),
1307 TUNNEL_CLASS("gre", "gre_sys", netdev_gre_build_header
,
1308 netdev_gre_push_header
,
1309 netdev_gre_pop_header
),
1310 TUNNEL_CLASS("ipsec_gre", "gre_sys", NULL
, NULL
, NULL
),
1311 TUNNEL_CLASS("gre64", "gre64_sys", NULL
, NULL
, NULL
),
1312 TUNNEL_CLASS("ipsec_gre64", "gre64_sys", NULL
, NULL
, NULL
),
1313 TUNNEL_CLASS("vxlan", "vxlan_sys", netdev_vxlan_build_header
,
1314 netdev_vxlan_push_header
,
1315 netdev_vxlan_pop_header
),
1316 TUNNEL_CLASS("lisp", "lisp_sys", NULL
, NULL
, NULL
)
1318 static struct ovsthread_once once
= OVSTHREAD_ONCE_INITIALIZER
;
1320 if (ovsthread_once_start(&once
)) {
1323 for (i
= 0; i
< ARRAY_SIZE(vport_classes
); i
++) {
1324 netdev_register_provider(&vport_classes
[i
].netdev_class
);
1327 unixctl_command_register("tnl/egress_port_range", "min max", 0, 2,
1328 netdev_vport_range
, NULL
);
1330 ovsthread_once_done(&once
);
1335 netdev_vport_patch_register(void)
1337 static const struct vport_class patch_class
=
1339 { "patch", VPORT_FUNCTIONS(get_patch_config
,
1342 NULL
, NULL
, NULL
, NULL
) }};
1343 netdev_register_provider(&patch_class
.netdev_class
);