]> git.proxmox.com Git - mirror_ovs.git/blob - lib/netdev-vport.c
netdev: Fix user space tunneling for set_tunnel action.
[mirror_ovs.git] / lib / netdev-vport.c
1 /*
2 * Copyright (c) 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include "netdev-vport.h"
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <sys/socket.h>
24 #include <net/if.h>
25 #include <sys/ioctl.h>
26
27 #include "byte-order.h"
28 #include "csum.h"
29 #include "daemon.h"
30 #include "dirs.h"
31 #include "dpif.h"
32 #include "dp-packet.h"
33 #include "dynamic-string.h"
34 #include "flow.h"
35 #include "hash.h"
36 #include "hmap.h"
37 #include "list.h"
38 #include "netdev-provider.h"
39 #include "odp-netlink.h"
40 #include "dp-packet.h"
41 #include "ovs-router.h"
42 #include "packets.h"
43 #include "poll-loop.h"
44 #include "route-table.h"
45 #include "shash.h"
46 #include "socket-util.h"
47 #include "openvswitch/vlog.h"
48 #include "unaligned.h"
49 #include "unixctl.h"
50 #include "util.h"
51
52 VLOG_DEFINE_THIS_MODULE(netdev_vport);
53 static struct vlog_rate_limit err_rl = VLOG_RATE_LIMIT_INIT(60, 5);
54
55 #define GENEVE_DST_PORT 6081
56 #define VXLAN_DST_PORT 4789
57 #define LISP_DST_PORT 4341
58
59 #define VXLAN_HLEN (sizeof(struct eth_header) + \
60 sizeof(struct ip_header) + \
61 sizeof(struct udp_header) + \
62 sizeof(struct vxlanhdr))
63
64 #define DEFAULT_TTL 64
65
66 struct netdev_vport {
67 struct netdev up;
68
69 /* Protects all members below. */
70 struct ovs_mutex mutex;
71
72 uint8_t etheraddr[ETH_ADDR_LEN];
73 struct netdev_stats stats;
74
75 /* Tunnels. */
76 struct netdev_tunnel_config tnl_cfg;
77 char egress_iface[IFNAMSIZ];
78 bool carrier_status;
79
80 /* Patch Ports. */
81 char *peer;
82 };
83
84 struct vport_class {
85 const char *dpif_port;
86 struct netdev_class netdev_class;
87 };
88
89 /* Last read of the route-table's change number. */
90 static uint64_t rt_change_seqno;
91
92 static int netdev_vport_construct(struct netdev *);
93 static int get_patch_config(const struct netdev *netdev, struct smap *args);
94 static int get_tunnel_config(const struct netdev *, struct smap *args);
95 static bool tunnel_check_status_change__(struct netdev_vport *);
96
97 static uint16_t tnl_udp_port_min = 32768;
98 static uint16_t tnl_udp_port_max = 61000;
99
100 static bool
101 is_vport_class(const struct netdev_class *class)
102 {
103 return class->construct == netdev_vport_construct;
104 }
105
106 bool
107 netdev_vport_is_vport_class(const struct netdev_class *class)
108 {
109 return is_vport_class(class);
110 }
111
112 static const struct vport_class *
113 vport_class_cast(const struct netdev_class *class)
114 {
115 ovs_assert(is_vport_class(class));
116 return CONTAINER_OF(class, struct vport_class, netdev_class);
117 }
118
119 static struct netdev_vport *
120 netdev_vport_cast(const struct netdev *netdev)
121 {
122 ovs_assert(is_vport_class(netdev_get_class(netdev)));
123 return CONTAINER_OF(netdev, struct netdev_vport, up);
124 }
125
126 static const struct netdev_tunnel_config *
127 get_netdev_tunnel_config(const struct netdev *netdev)
128 {
129 return &netdev_vport_cast(netdev)->tnl_cfg;
130 }
131
132 bool
133 netdev_vport_is_patch(const struct netdev *netdev)
134 {
135 const struct netdev_class *class = netdev_get_class(netdev);
136
137 return class->get_config == get_patch_config;
138 }
139
140 bool
141 netdev_vport_is_layer3(const struct netdev *dev)
142 {
143 const char *type = netdev_get_type(dev);
144
145 return (!strcmp("lisp", type));
146 }
147
148 static bool
149 netdev_vport_needs_dst_port(const struct netdev *dev)
150 {
151 const struct netdev_class *class = netdev_get_class(dev);
152 const char *type = netdev_get_type(dev);
153
154 return (class->get_config == get_tunnel_config &&
155 (!strcmp("geneve", type) || !strcmp("vxlan", type) ||
156 !strcmp("lisp", type)));
157 }
158
159 const char *
160 netdev_vport_class_get_dpif_port(const struct netdev_class *class)
161 {
162 return is_vport_class(class) ? vport_class_cast(class)->dpif_port : NULL;
163 }
164
165 const char *
166 netdev_vport_get_dpif_port(const struct netdev *netdev,
167 char namebuf[], size_t bufsize)
168 {
169 const struct netdev_class *class = netdev_get_class(netdev);
170 const char *dpif_port = netdev_vport_class_get_dpif_port(class);
171
172 if (!dpif_port) {
173 return netdev_get_name(netdev);
174 }
175
176 if (netdev_vport_needs_dst_port(netdev)) {
177 const struct netdev_vport *vport = netdev_vport_cast(netdev);
178
179 /*
180 * Note: IFNAMSIZ is 16 bytes long. Implementations should choose
181 * a dpif port name that is short enough to fit including any
182 * port numbers but assert just in case.
183 */
184 BUILD_ASSERT(NETDEV_VPORT_NAME_BUFSIZE >= IFNAMSIZ);
185 ovs_assert(strlen(dpif_port) + 6 < IFNAMSIZ);
186 snprintf(namebuf, bufsize, "%s_%d", dpif_port,
187 ntohs(vport->tnl_cfg.dst_port));
188 return namebuf;
189 } else {
190 return dpif_port;
191 }
192 }
193
194 char *
195 netdev_vport_get_dpif_port_strdup(const struct netdev *netdev)
196 {
197 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
198
199 return xstrdup(netdev_vport_get_dpif_port(netdev, namebuf,
200 sizeof namebuf));
201 }
202
203 /* Whenever the route-table change number is incremented,
204 * netdev_vport_route_changed() should be called to update
205 * the corresponding tunnel interface status. */
206 static void
207 netdev_vport_route_changed(void)
208 {
209 struct netdev **vports;
210 size_t i, n_vports;
211
212 vports = netdev_get_vports(&n_vports);
213 for (i = 0; i < n_vports; i++) {
214 struct netdev *netdev_ = vports[i];
215 struct netdev_vport *netdev = netdev_vport_cast(netdev_);
216
217 ovs_mutex_lock(&netdev->mutex);
218 /* Finds all tunnel vports. */
219 if (netdev->tnl_cfg.ip_dst) {
220 if (tunnel_check_status_change__(netdev)) {
221 netdev_change_seq_changed(netdev_);
222 }
223 }
224 ovs_mutex_unlock(&netdev->mutex);
225
226 netdev_close(netdev_);
227 }
228
229 free(vports);
230 }
231
232 static struct netdev *
233 netdev_vport_alloc(void)
234 {
235 struct netdev_vport *netdev = xzalloc(sizeof *netdev);
236 return &netdev->up;
237 }
238
239 static int
240 netdev_vport_construct(struct netdev *netdev_)
241 {
242 struct netdev_vport *dev = netdev_vport_cast(netdev_);
243 const char *type = netdev_get_type(netdev_);
244
245 ovs_mutex_init(&dev->mutex);
246 eth_addr_random(dev->etheraddr);
247
248 /* Add a default destination port for tunnel ports if none specified. */
249 if (!strcmp(type, "geneve")) {
250 dev->tnl_cfg.dst_port = htons(GENEVE_DST_PORT);
251 } else if (!strcmp(type, "vxlan")) {
252 dev->tnl_cfg.dst_port = htons(VXLAN_DST_PORT);
253 } else if (!strcmp(type, "lisp")) {
254 dev->tnl_cfg.dst_port = htons(LISP_DST_PORT);
255 }
256
257 return 0;
258 }
259
260 static void
261 netdev_vport_destruct(struct netdev *netdev_)
262 {
263 struct netdev_vport *netdev = netdev_vport_cast(netdev_);
264
265 free(netdev->peer);
266 ovs_mutex_destroy(&netdev->mutex);
267 }
268
269 static void
270 netdev_vport_dealloc(struct netdev *netdev_)
271 {
272 struct netdev_vport *netdev = netdev_vport_cast(netdev_);
273 free(netdev);
274 }
275
276 static int
277 netdev_vport_set_etheraddr(struct netdev *netdev_,
278 const uint8_t mac[ETH_ADDR_LEN])
279 {
280 struct netdev_vport *netdev = netdev_vport_cast(netdev_);
281
282 ovs_mutex_lock(&netdev->mutex);
283 memcpy(netdev->etheraddr, mac, ETH_ADDR_LEN);
284 ovs_mutex_unlock(&netdev->mutex);
285 netdev_change_seq_changed(netdev_);
286
287 return 0;
288 }
289
290 static int
291 netdev_vport_get_etheraddr(const struct netdev *netdev_,
292 uint8_t mac[ETH_ADDR_LEN])
293 {
294 struct netdev_vport *netdev = netdev_vport_cast(netdev_);
295
296 ovs_mutex_lock(&netdev->mutex);
297 memcpy(mac, netdev->etheraddr, ETH_ADDR_LEN);
298 ovs_mutex_unlock(&netdev->mutex);
299
300 return 0;
301 }
302
303 /* Checks if the tunnel status has changed and returns a boolean.
304 * Updates the tunnel status if it has changed. */
305 static bool
306 tunnel_check_status_change__(struct netdev_vport *netdev)
307 OVS_REQUIRES(netdev->mutex)
308 {
309 char iface[IFNAMSIZ];
310 bool status = false;
311 ovs_be32 route;
312 ovs_be32 gw;
313
314 iface[0] = '\0';
315 route = netdev->tnl_cfg.ip_dst;
316 if (ovs_router_lookup(route, iface, &gw)) {
317 struct netdev *egress_netdev;
318
319 if (!netdev_open(iface, "system", &egress_netdev)) {
320 status = netdev_get_carrier(egress_netdev);
321 netdev_close(egress_netdev);
322 }
323 }
324
325 if (strcmp(netdev->egress_iface, iface)
326 || netdev->carrier_status != status) {
327 ovs_strlcpy(netdev->egress_iface, iface, IFNAMSIZ);
328 netdev->carrier_status = status;
329
330 return true;
331 }
332
333 return false;
334 }
335
336 static int
337 tunnel_get_status(const struct netdev *netdev_, struct smap *smap)
338 {
339 struct netdev_vport *netdev = netdev_vport_cast(netdev_);
340
341 if (netdev->egress_iface[0]) {
342 smap_add(smap, "tunnel_egress_iface", netdev->egress_iface);
343
344 smap_add(smap, "tunnel_egress_iface_carrier",
345 netdev->carrier_status ? "up" : "down");
346 }
347
348 return 0;
349 }
350
351 static int
352 netdev_vport_update_flags(struct netdev *netdev OVS_UNUSED,
353 enum netdev_flags off,
354 enum netdev_flags on OVS_UNUSED,
355 enum netdev_flags *old_flagsp)
356 {
357 if (off & (NETDEV_UP | NETDEV_PROMISC)) {
358 return EOPNOTSUPP;
359 }
360
361 *old_flagsp = NETDEV_UP | NETDEV_PROMISC;
362 return 0;
363 }
364
365 static void
366 netdev_vport_run(void)
367 {
368 uint64_t seq;
369
370 route_table_run();
371 seq = route_table_get_change_seq();
372 if (rt_change_seqno != seq) {
373 rt_change_seqno = seq;
374 netdev_vport_route_changed();
375 }
376 }
377
378 static void
379 netdev_vport_wait(void)
380 {
381 uint64_t seq;
382
383 route_table_wait();
384 seq = route_table_get_change_seq();
385 if (rt_change_seqno != seq) {
386 poll_immediate_wake();
387 }
388 }
389 \f
390 /* Code specific to tunnel types. */
391
392 static ovs_be64
393 parse_key(const struct smap *args, const char *name,
394 bool *present, bool *flow)
395 {
396 const char *s;
397
398 *present = false;
399 *flow = false;
400
401 s = smap_get(args, name);
402 if (!s) {
403 s = smap_get(args, "key");
404 if (!s) {
405 return 0;
406 }
407 }
408
409 *present = true;
410
411 if (!strcmp(s, "flow")) {
412 *flow = true;
413 return 0;
414 } else {
415 return htonll(strtoull(s, NULL, 0));
416 }
417 }
418
419 static int
420 set_tunnel_config(struct netdev *dev_, const struct smap *args)
421 {
422 struct netdev_vport *dev = netdev_vport_cast(dev_);
423 const char *name = netdev_get_name(dev_);
424 const char *type = netdev_get_type(dev_);
425 bool ipsec_mech_set, needs_dst_port, has_csum;
426 struct netdev_tunnel_config tnl_cfg;
427 struct smap_node *node;
428
429 has_csum = strstr(type, "gre") || strstr(type, "geneve") ||
430 strstr(type, "vxlan");
431 ipsec_mech_set = false;
432 memset(&tnl_cfg, 0, sizeof tnl_cfg);
433
434 /* Add a default destination port for tunnel ports if none specified. */
435 if (!strcmp(type, "geneve")) {
436 tnl_cfg.dst_port = htons(GENEVE_DST_PORT);
437 }
438
439 if (!strcmp(type, "vxlan")) {
440 tnl_cfg.dst_port = htons(VXLAN_DST_PORT);
441 }
442
443 if (!strcmp(type, "lisp")) {
444 tnl_cfg.dst_port = htons(LISP_DST_PORT);
445 }
446
447 needs_dst_port = netdev_vport_needs_dst_port(dev_);
448 tnl_cfg.ipsec = strstr(type, "ipsec");
449 tnl_cfg.dont_fragment = true;
450
451 SMAP_FOR_EACH (node, args) {
452 if (!strcmp(node->key, "remote_ip")) {
453 struct in_addr in_addr;
454 if (!strcmp(node->value, "flow")) {
455 tnl_cfg.ip_dst_flow = true;
456 tnl_cfg.ip_dst = htonl(0);
457 } else if (lookup_ip(node->value, &in_addr)) {
458 VLOG_WARN("%s: bad %s 'remote_ip'", name, type);
459 } else if (ip_is_multicast(in_addr.s_addr)) {
460 VLOG_WARN("%s: multicast remote_ip="IP_FMT" not allowed",
461 name, IP_ARGS(in_addr.s_addr));
462 return EINVAL;
463 } else {
464 tnl_cfg.ip_dst = in_addr.s_addr;
465 }
466 } else if (!strcmp(node->key, "local_ip")) {
467 struct in_addr in_addr;
468 if (!strcmp(node->value, "flow")) {
469 tnl_cfg.ip_src_flow = true;
470 tnl_cfg.ip_src = htonl(0);
471 } else if (lookup_ip(node->value, &in_addr)) {
472 VLOG_WARN("%s: bad %s 'local_ip'", name, type);
473 } else {
474 tnl_cfg.ip_src = in_addr.s_addr;
475 }
476 } else if (!strcmp(node->key, "tos")) {
477 if (!strcmp(node->value, "inherit")) {
478 tnl_cfg.tos_inherit = true;
479 } else {
480 char *endptr;
481 int tos;
482 tos = strtol(node->value, &endptr, 0);
483 if (*endptr == '\0' && tos == (tos & IP_DSCP_MASK)) {
484 tnl_cfg.tos = tos;
485 } else {
486 VLOG_WARN("%s: invalid TOS %s", name, node->value);
487 }
488 }
489 } else if (!strcmp(node->key, "ttl")) {
490 if (!strcmp(node->value, "inherit")) {
491 tnl_cfg.ttl_inherit = true;
492 } else {
493 tnl_cfg.ttl = atoi(node->value);
494 }
495 } else if (!strcmp(node->key, "dst_port") && needs_dst_port) {
496 tnl_cfg.dst_port = htons(atoi(node->value));
497 } else if (!strcmp(node->key, "csum") && has_csum) {
498 if (!strcmp(node->value, "true")) {
499 tnl_cfg.csum = true;
500 }
501 } else if (!strcmp(node->key, "df_default")) {
502 if (!strcmp(node->value, "false")) {
503 tnl_cfg.dont_fragment = false;
504 }
505 } else if (!strcmp(node->key, "peer_cert") && tnl_cfg.ipsec) {
506 if (smap_get(args, "certificate")) {
507 ipsec_mech_set = true;
508 } else {
509 const char *use_ssl_cert;
510
511 /* If the "use_ssl_cert" is true, then "certificate" and
512 * "private_key" will be pulled from the SSL table. The
513 * use of this option is strongly discouraged, since it
514 * will like be removed when multiple SSL configurations
515 * are supported by OVS.
516 */
517 use_ssl_cert = smap_get(args, "use_ssl_cert");
518 if (!use_ssl_cert || strcmp(use_ssl_cert, "true")) {
519 VLOG_ERR("%s: 'peer_cert' requires 'certificate' argument",
520 name);
521 return EINVAL;
522 }
523 ipsec_mech_set = true;
524 }
525 } else if (!strcmp(node->key, "psk") && tnl_cfg.ipsec) {
526 ipsec_mech_set = true;
527 } else if (tnl_cfg.ipsec
528 && (!strcmp(node->key, "certificate")
529 || !strcmp(node->key, "private_key")
530 || !strcmp(node->key, "use_ssl_cert"))) {
531 /* Ignore options not used by the netdev. */
532 } else if (!strcmp(node->key, "key") ||
533 !strcmp(node->key, "in_key") ||
534 !strcmp(node->key, "out_key")) {
535 /* Handled separately below. */
536 } else if (!strcmp(node->key, "exts")) {
537 char *str = xstrdup(node->value);
538 char *ext, *save_ptr = NULL;
539
540 tnl_cfg.exts = 0;
541
542 ext = strtok_r(str, ",", &save_ptr);
543 while (ext) {
544 if (!strcmp(type, "vxlan") && !strcmp(ext, "gbp")) {
545 tnl_cfg.exts |= (1 << OVS_VXLAN_EXT_GBP);
546 } else {
547 VLOG_WARN("%s: unknown extension '%s'", name, ext);
548 }
549
550 ext = strtok_r(NULL, ",", &save_ptr);
551 }
552
553 free(str);
554 } else {
555 VLOG_WARN("%s: unknown %s argument '%s'", name, type, node->key);
556 }
557 }
558
559 if (tnl_cfg.ipsec) {
560 static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
561 static pid_t pid = 0;
562
563 #ifndef _WIN32
564 ovs_mutex_lock(&mutex);
565 if (pid <= 0) {
566 char *file_name = xasprintf("%s/%s", ovs_rundir(),
567 "ovs-monitor-ipsec.pid");
568 pid = read_pidfile(file_name);
569 free(file_name);
570 }
571 ovs_mutex_unlock(&mutex);
572 #endif
573
574 if (pid < 0) {
575 VLOG_ERR("%s: IPsec requires the ovs-monitor-ipsec daemon",
576 name);
577 return EINVAL;
578 }
579
580 if (smap_get(args, "peer_cert") && smap_get(args, "psk")) {
581 VLOG_ERR("%s: cannot define both 'peer_cert' and 'psk'", name);
582 return EINVAL;
583 }
584
585 if (!ipsec_mech_set) {
586 VLOG_ERR("%s: IPsec requires an 'peer_cert' or psk' argument",
587 name);
588 return EINVAL;
589 }
590 }
591
592 if (!tnl_cfg.ip_dst && !tnl_cfg.ip_dst_flow) {
593 VLOG_ERR("%s: %s type requires valid 'remote_ip' argument",
594 name, type);
595 return EINVAL;
596 }
597 if (tnl_cfg.ip_src_flow && !tnl_cfg.ip_dst_flow) {
598 VLOG_ERR("%s: %s type requires 'remote_ip=flow' with 'local_ip=flow'",
599 name, type);
600 return EINVAL;
601 }
602 if (!tnl_cfg.ttl) {
603 tnl_cfg.ttl = DEFAULT_TTL;
604 }
605
606 tnl_cfg.in_key = parse_key(args, "in_key",
607 &tnl_cfg.in_key_present,
608 &tnl_cfg.in_key_flow);
609
610 tnl_cfg.out_key = parse_key(args, "out_key",
611 &tnl_cfg.out_key_present,
612 &tnl_cfg.out_key_flow);
613
614 ovs_mutex_lock(&dev->mutex);
615 dev->tnl_cfg = tnl_cfg;
616 tunnel_check_status_change__(dev);
617 netdev_change_seq_changed(dev_);
618 ovs_mutex_unlock(&dev->mutex);
619
620 return 0;
621 }
622
623 static int
624 get_tunnel_config(const struct netdev *dev, struct smap *args)
625 {
626 struct netdev_vport *netdev = netdev_vport_cast(dev);
627 struct netdev_tunnel_config tnl_cfg;
628
629 ovs_mutex_lock(&netdev->mutex);
630 tnl_cfg = netdev->tnl_cfg;
631 ovs_mutex_unlock(&netdev->mutex);
632
633 if (tnl_cfg.ip_dst) {
634 smap_add_format(args, "remote_ip", IP_FMT, IP_ARGS(tnl_cfg.ip_dst));
635 } else if (tnl_cfg.ip_dst_flow) {
636 smap_add(args, "remote_ip", "flow");
637 }
638
639 if (tnl_cfg.ip_src) {
640 smap_add_format(args, "local_ip", IP_FMT, IP_ARGS(tnl_cfg.ip_src));
641 } else if (tnl_cfg.ip_src_flow) {
642 smap_add(args, "local_ip", "flow");
643 }
644
645 if (tnl_cfg.in_key_flow && tnl_cfg.out_key_flow) {
646 smap_add(args, "key", "flow");
647 } else if (tnl_cfg.in_key_present && tnl_cfg.out_key_present
648 && tnl_cfg.in_key == tnl_cfg.out_key) {
649 smap_add_format(args, "key", "%"PRIu64, ntohll(tnl_cfg.in_key));
650 } else {
651 if (tnl_cfg.in_key_flow) {
652 smap_add(args, "in_key", "flow");
653 } else if (tnl_cfg.in_key_present) {
654 smap_add_format(args, "in_key", "%"PRIu64,
655 ntohll(tnl_cfg.in_key));
656 }
657
658 if (tnl_cfg.out_key_flow) {
659 smap_add(args, "out_key", "flow");
660 } else if (tnl_cfg.out_key_present) {
661 smap_add_format(args, "out_key", "%"PRIu64,
662 ntohll(tnl_cfg.out_key));
663 }
664 }
665
666 if (tnl_cfg.ttl_inherit) {
667 smap_add(args, "ttl", "inherit");
668 } else if (tnl_cfg.ttl != DEFAULT_TTL) {
669 smap_add_format(args, "ttl", "%"PRIu8, tnl_cfg.ttl);
670 }
671
672 if (tnl_cfg.tos_inherit) {
673 smap_add(args, "tos", "inherit");
674 } else if (tnl_cfg.tos) {
675 smap_add_format(args, "tos", "0x%x", tnl_cfg.tos);
676 }
677
678 if (tnl_cfg.dst_port) {
679 uint16_t dst_port = ntohs(tnl_cfg.dst_port);
680 const char *type = netdev_get_type(dev);
681
682 if ((!strcmp("geneve", type) && dst_port != GENEVE_DST_PORT) ||
683 (!strcmp("vxlan", type) && dst_port != VXLAN_DST_PORT) ||
684 (!strcmp("lisp", type) && dst_port != LISP_DST_PORT)) {
685 smap_add_format(args, "dst_port", "%d", dst_port);
686 }
687 }
688
689 if (tnl_cfg.csum) {
690 smap_add(args, "csum", "true");
691 }
692
693 if (!tnl_cfg.dont_fragment) {
694 smap_add(args, "df_default", "false");
695 }
696
697 return 0;
698 }
699 \f
700 /* Code specific to patch ports. */
701
702 /* If 'netdev' is a patch port, returns the name of its peer as a malloc()'d
703 * string that the caller must free.
704 *
705 * If 'netdev' is not a patch port, returns NULL. */
706 char *
707 netdev_vport_patch_peer(const struct netdev *netdev_)
708 {
709 char *peer = NULL;
710
711 if (netdev_vport_is_patch(netdev_)) {
712 struct netdev_vport *netdev = netdev_vport_cast(netdev_);
713
714 ovs_mutex_lock(&netdev->mutex);
715 if (netdev->peer) {
716 peer = xstrdup(netdev->peer);
717 }
718 ovs_mutex_unlock(&netdev->mutex);
719 }
720
721 return peer;
722 }
723
724 void
725 netdev_vport_inc_rx(const struct netdev *netdev,
726 const struct dpif_flow_stats *stats)
727 {
728 if (is_vport_class(netdev_get_class(netdev))) {
729 struct netdev_vport *dev = netdev_vport_cast(netdev);
730
731 ovs_mutex_lock(&dev->mutex);
732 dev->stats.rx_packets += stats->n_packets;
733 dev->stats.rx_bytes += stats->n_bytes;
734 ovs_mutex_unlock(&dev->mutex);
735 }
736 }
737
738 void
739 netdev_vport_inc_tx(const struct netdev *netdev,
740 const struct dpif_flow_stats *stats)
741 {
742 if (is_vport_class(netdev_get_class(netdev))) {
743 struct netdev_vport *dev = netdev_vport_cast(netdev);
744
745 ovs_mutex_lock(&dev->mutex);
746 dev->stats.tx_packets += stats->n_packets;
747 dev->stats.tx_bytes += stats->n_bytes;
748 ovs_mutex_unlock(&dev->mutex);
749 }
750 }
751
752 static int
753 get_patch_config(const struct netdev *dev_, struct smap *args)
754 {
755 struct netdev_vport *dev = netdev_vport_cast(dev_);
756
757 ovs_mutex_lock(&dev->mutex);
758 if (dev->peer) {
759 smap_add(args, "peer", dev->peer);
760 }
761 ovs_mutex_unlock(&dev->mutex);
762
763 return 0;
764 }
765
766 static int
767 set_patch_config(struct netdev *dev_, const struct smap *args)
768 {
769 struct netdev_vport *dev = netdev_vport_cast(dev_);
770 const char *name = netdev_get_name(dev_);
771 const char *peer;
772
773 peer = smap_get(args, "peer");
774 if (!peer) {
775 VLOG_ERR("%s: patch type requires valid 'peer' argument", name);
776 return EINVAL;
777 }
778
779 if (smap_count(args) > 1) {
780 VLOG_ERR("%s: patch type takes only a 'peer' argument", name);
781 return EINVAL;
782 }
783
784 if (!strcmp(name, peer)) {
785 VLOG_ERR("%s: patch peer must not be self", name);
786 return EINVAL;
787 }
788
789 ovs_mutex_lock(&dev->mutex);
790 free(dev->peer);
791 dev->peer = xstrdup(peer);
792 netdev_change_seq_changed(dev_);
793 ovs_mutex_unlock(&dev->mutex);
794
795 return 0;
796 }
797
798 static int
799 get_stats(const struct netdev *netdev, struct netdev_stats *stats)
800 {
801 struct netdev_vport *dev = netdev_vport_cast(netdev);
802
803 ovs_mutex_lock(&dev->mutex);
804 *stats = dev->stats;
805 ovs_mutex_unlock(&dev->mutex);
806
807 return 0;
808 }
809
810 \f
811 /* Tunnel push pop ops. */
812
813 static struct ip_header *
814 ip_hdr(void *eth)
815 {
816 return (void *)((char *)eth + sizeof (struct eth_header));
817 }
818
819 static struct gre_base_hdr *
820 gre_hdr(struct ip_header *ip)
821 {
822 return (void *)((char *)ip + sizeof (struct ip_header));
823 }
824
825 static void *
826 ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl)
827 {
828 struct ip_header *nh;
829 void *l4;
830
831 nh = dp_packet_l3(packet);
832 l4 = dp_packet_l4(packet);
833
834 if (!nh || !l4) {
835 return NULL;
836 }
837
838 tnl->ip_src = get_16aligned_be32(&nh->ip_src);
839 tnl->ip_dst = get_16aligned_be32(&nh->ip_dst);
840 tnl->ip_tos = nh->ip_tos;
841
842 return l4;
843 }
844
845 /* Pushes the 'size' bytes of 'header' into the headroom of 'packet',
846 * reallocating the packet if necessary. 'header' should contain an Ethernet
847 * header, followed by an IPv4 header (without options), and an L4 header.
848 *
849 * This function sets the IP header's ip_tot_len field (which should be zeroed
850 * as part of 'header') and puts its value into '*ip_tot_size' as well. Also
851 * updates IP header checksum.
852 *
853 * Return pointer to the L4 header added to 'packet'. */
854 static void *
855 push_ip_header(struct dp_packet *packet,
856 const void *header, int size, int *ip_tot_size)
857 {
858 struct eth_header *eth;
859 struct ip_header *ip;
860
861 eth = dp_packet_push_uninit(packet, size);
862 *ip_tot_size = dp_packet_size(packet) - sizeof (struct eth_header);
863
864 memcpy(eth, header, size);
865 ip = ip_hdr(eth);
866 ip->ip_tot_len = htons(*ip_tot_size);
867
868
869 ip->ip_csum = recalc_csum16(ip->ip_csum, 0, ip->ip_tot_len);
870
871 return ip + 1;
872 }
873
874 static int
875 gre_header_len(ovs_be16 flags)
876 {
877 int hlen = sizeof(struct eth_header) +
878 sizeof(struct ip_header) + 4;
879
880 if (flags & htons(GRE_CSUM)) {
881 hlen += 4;
882 }
883 if (flags & htons(GRE_KEY)) {
884 hlen += 4;
885 }
886 if (flags & htons(GRE_SEQ)) {
887 hlen += 4;
888 }
889 return hlen;
890 }
891
892 static int
893 parse_gre_header(struct dp_packet *packet,
894 struct flow_tnl *tnl)
895 {
896 const struct gre_base_hdr *greh;
897 ovs_16aligned_be32 *options;
898 int hlen;
899
900 greh = ip_extract_tnl_md(packet, tnl);
901 if (!greh) {
902 return -EINVAL;
903 }
904
905 if (greh->flags & ~(htons(GRE_CSUM | GRE_KEY | GRE_SEQ))) {
906 return -EINVAL;
907 }
908
909 hlen = gre_header_len(greh->flags);
910 if (hlen > dp_packet_size(packet)) {
911 return -EINVAL;
912 }
913
914 options = (ovs_16aligned_be32 *)(greh + 1);
915 if (greh->flags & htons(GRE_CSUM)) {
916 ovs_be16 pkt_csum;
917
918 pkt_csum = csum(greh, dp_packet_size(packet) -
919 ((const unsigned char *)greh -
920 (const unsigned char *)dp_packet_l2(packet)));
921 if (pkt_csum) {
922 return -EINVAL;
923 }
924 tnl->flags = FLOW_TNL_F_CSUM;
925 options++;
926 }
927
928 if (greh->flags & htons(GRE_KEY)) {
929 tnl->tun_id = (OVS_FORCE ovs_be64) ((OVS_FORCE uint64_t)(get_16aligned_be32(options)) << 32);
930 tnl->flags |= FLOW_TNL_F_KEY;
931 options++;
932 }
933
934 if (greh->flags & htons(GRE_SEQ)) {
935 options++;
936 }
937
938 return hlen;
939 }
940
941 static void
942 reset_tnl_md(struct pkt_metadata *md)
943 {
944 memset(&md->tunnel, 0, sizeof(md->tunnel));
945 }
946
947 static void
948 gre_extract_md(struct dp_packet *packet)
949 {
950 struct pkt_metadata *md = &packet->md;
951 struct flow_tnl *tnl = &md->tunnel;
952 int hlen = sizeof(struct eth_header) +
953 sizeof(struct ip_header) + 4;
954
955 memset(md, 0, sizeof *md);
956 if (hlen > dp_packet_size(packet)) {
957 return;
958 }
959
960 hlen = parse_gre_header(packet, tnl);
961 if (hlen < 0) {
962 reset_tnl_md(md);
963 }
964
965 dp_packet_reset_packet(packet, hlen);
966 }
967
968 static int
969 netdev_gre_pop_header(struct netdev *netdev_ OVS_UNUSED,
970 struct dp_packet **pkt, int cnt)
971 {
972 int i;
973
974 for (i = 0; i < cnt; i++) {
975 gre_extract_md(pkt[i]);
976 }
977 return 0;
978 }
979
980 static void
981 netdev_gre_push_header__(struct dp_packet *packet,
982 const void *header, int size)
983 {
984 struct gre_base_hdr *greh;
985 int ip_tot_size;
986
987 greh = push_ip_header(packet, header, size, &ip_tot_size);
988
989 if (greh->flags & htons(GRE_CSUM)) {
990 ovs_16aligned_be32 *options = (ovs_16aligned_be32 *) (greh + 1);
991
992 put_16aligned_be32(options,
993 (OVS_FORCE ovs_be32) csum(greh, ip_tot_size - sizeof (struct ip_header)));
994 }
995 }
996
997 static int
998 netdev_gre_push_header(const struct netdev *netdev OVS_UNUSED,
999 struct dp_packet **packets, int cnt,
1000 const struct ovs_action_push_tnl *data)
1001 {
1002 int i;
1003
1004 for (i = 0; i < cnt; i++) {
1005 netdev_gre_push_header__(packets[i], data->header, data->header_len);
1006 packets[i]->md = PKT_METADATA_INITIALIZER(u32_to_odp(data->out_port));
1007 }
1008 return 0;
1009 }
1010
1011
1012 static int
1013 netdev_gre_build_header(const struct netdev *netdev,
1014 struct ovs_action_push_tnl *data,
1015 const struct flow *tnl_flow)
1016 {
1017 struct netdev_vport *dev = netdev_vport_cast(netdev);
1018 struct netdev_tunnel_config *tnl_cfg;
1019 struct ip_header *ip;
1020 struct gre_base_hdr *greh;
1021 ovs_16aligned_be32 *options;
1022 int hlen;
1023
1024 /* XXX: RCUfy tnl_cfg. */
1025 ovs_mutex_lock(&dev->mutex);
1026 tnl_cfg = &dev->tnl_cfg;
1027
1028 ip = ip_hdr(data->header);
1029 ip->ip_proto = IPPROTO_GRE;
1030
1031 greh = gre_hdr(ip);
1032 greh->protocol = htons(ETH_TYPE_TEB);
1033 greh->flags = 0;
1034
1035 options = (ovs_16aligned_be32 *) (greh + 1);
1036 if (tnl_cfg->csum) {
1037 greh->flags |= htons(GRE_CSUM);
1038 put_16aligned_be32(options, 0);
1039 options++;
1040 }
1041
1042 if (tnl_cfg->out_key_present) {
1043 greh->flags |= htons(GRE_KEY);
1044 put_16aligned_be32(options, (OVS_FORCE ovs_be32)
1045 ((OVS_FORCE uint64_t) tnl_flow->tunnel.tun_id >> 32));
1046 options++;
1047 }
1048
1049 ovs_mutex_unlock(&dev->mutex);
1050
1051 hlen = (uint8_t *) options - (uint8_t *) greh;
1052
1053 data->header_len = sizeof(struct eth_header) +
1054 sizeof(struct ip_header) + hlen;
1055 data->tnl_type = OVS_VPORT_TYPE_GRE;
1056 return 0;
1057 }
1058
1059 static void
1060 vxlan_extract_md(struct dp_packet *packet)
1061 {
1062 struct pkt_metadata *md = &packet->md;
1063 struct flow_tnl *tnl = &md->tunnel;
1064 struct udp_header *udp;
1065 struct vxlanhdr *vxh;
1066
1067 memset(md, 0, sizeof *md);
1068 if (VXLAN_HLEN > dp_packet_size(packet)) {
1069 return;
1070 }
1071
1072 udp = ip_extract_tnl_md(packet, tnl);
1073 if (!udp) {
1074 return;
1075 }
1076 vxh = (struct vxlanhdr *) (udp + 1);
1077
1078 if (get_16aligned_be32(&vxh->vx_flags) != htonl(VXLAN_FLAGS) ||
1079 (get_16aligned_be32(&vxh->vx_vni) & htonl(0xff))) {
1080 VLOG_WARN_RL(&err_rl, "invalid vxlan flags=%#x vni=%#x\n",
1081 ntohl(get_16aligned_be32(&vxh->vx_flags)),
1082 ntohl(get_16aligned_be32(&vxh->vx_vni)));
1083 reset_tnl_md(md);
1084 return;
1085 }
1086 tnl->tp_src = udp->udp_src;
1087 tnl->tp_dst = udp->udp_dst;
1088 tnl->tun_id = htonll(ntohl(get_16aligned_be32(&vxh->vx_vni)) >> 8);
1089
1090 dp_packet_reset_packet(packet, VXLAN_HLEN);
1091 }
1092
1093 static int
1094 netdev_vxlan_pop_header(struct netdev *netdev_ OVS_UNUSED,
1095 struct dp_packet **pkt, int cnt)
1096 {
1097 int i;
1098
1099 for (i = 0; i < cnt; i++) {
1100 vxlan_extract_md(pkt[i]);
1101 }
1102 return 0;
1103 }
1104
1105 static int
1106 netdev_vxlan_build_header(const struct netdev *netdev,
1107 struct ovs_action_push_tnl *data,
1108 const struct flow *tnl_flow)
1109 {
1110 struct netdev_vport *dev = netdev_vport_cast(netdev);
1111 struct netdev_tunnel_config *tnl_cfg;
1112 struct ip_header *ip;
1113 struct udp_header *udp;
1114 struct vxlanhdr *vxh;
1115
1116 /* XXX: RCUfy tnl_cfg. */
1117 ovs_mutex_lock(&dev->mutex);
1118 tnl_cfg = &dev->tnl_cfg;
1119
1120 ip = ip_hdr(data->header);
1121 ip->ip_proto = IPPROTO_UDP;
1122
1123 udp = (struct udp_header *) (ip + 1);
1124 udp->udp_dst = tnl_cfg->dst_port;
1125
1126 vxh = (struct vxlanhdr *) (udp + 1);
1127 put_16aligned_be32(&vxh->vx_flags, htonl(VXLAN_FLAGS));
1128 put_16aligned_be32(&vxh->vx_vni, htonl(ntohll(tnl_flow->tunnel.tun_id) << 8));
1129
1130 ovs_mutex_unlock(&dev->mutex);
1131 data->header_len = VXLAN_HLEN;
1132 data->tnl_type = OVS_VPORT_TYPE_VXLAN;
1133 return 0;
1134 }
1135
1136 static ovs_be16
1137 get_src_port(struct dp_packet *packet)
1138 {
1139 uint32_t hash;
1140
1141 hash = dp_packet_get_dp_hash(packet);
1142
1143 return htons((((uint64_t) hash * (tnl_udp_port_max - tnl_udp_port_min)) >> 32) +
1144 tnl_udp_port_min);
1145 }
1146
1147 static void
1148 netdev_vxlan_push_header__(struct dp_packet *packet,
1149 const void *header, int size)
1150 {
1151 struct udp_header *udp;
1152 int ip_tot_size;
1153
1154 udp = push_ip_header(packet, header, size, &ip_tot_size);
1155
1156 /* set udp src port */
1157 udp->udp_src = get_src_port(packet);
1158 udp->udp_len = htons(ip_tot_size - sizeof (struct ip_header));
1159 /* udp_csum is zero */
1160 }
1161
1162 static int
1163 netdev_vxlan_push_header(const struct netdev *netdev OVS_UNUSED,
1164 struct dp_packet **packets, int cnt,
1165 const struct ovs_action_push_tnl *data)
1166 {
1167 int i;
1168
1169 for (i = 0; i < cnt; i++) {
1170 netdev_vxlan_push_header__(packets[i],
1171 data->header, VXLAN_HLEN);
1172 packets[i]->md = PKT_METADATA_INITIALIZER(u32_to_odp(data->out_port));
1173 }
1174 return 0;
1175 }
1176
1177 static void
1178 netdev_vport_range(struct unixctl_conn *conn, int argc,
1179 const char *argv[], void *aux OVS_UNUSED)
1180 {
1181 int val1, val2;
1182
1183 if (argc < 3) {
1184 struct ds ds = DS_EMPTY_INITIALIZER;
1185
1186 ds_put_format(&ds, "Tunnel UDP source port range: %"PRIu16"-%"PRIu16"\n",
1187 tnl_udp_port_min, tnl_udp_port_max);
1188
1189 unixctl_command_reply(conn, ds_cstr(&ds));
1190 ds_destroy(&ds);
1191 return;
1192 }
1193
1194 if (argc != 3) {
1195 return;
1196 }
1197
1198 val1 = atoi(argv[1]);
1199 if (val1 <= 0 || val1 > UINT16_MAX) {
1200 unixctl_command_reply(conn, "Invalid min.");
1201 return;
1202 }
1203 val2 = atoi(argv[2]);
1204 if (val2 <= 0 || val2 > UINT16_MAX) {
1205 unixctl_command_reply(conn, "Invalid max.");
1206 return;
1207 }
1208
1209 if (val1 > val2) {
1210 tnl_udp_port_min = val2;
1211 tnl_udp_port_max = val1;
1212 } else {
1213 tnl_udp_port_min = val1;
1214 tnl_udp_port_max = val2;
1215 }
1216 seq_change(tnl_conf_seq);
1217
1218 unixctl_command_reply(conn, "OK");
1219 }
1220
1221 \f
1222 #define VPORT_FUNCTIONS(GET_CONFIG, SET_CONFIG, \
1223 GET_TUNNEL_CONFIG, GET_STATUS, \
1224 BUILD_HEADER, \
1225 PUSH_HEADER, POP_HEADER) \
1226 NULL, \
1227 netdev_vport_run, \
1228 netdev_vport_wait, \
1229 \
1230 netdev_vport_alloc, \
1231 netdev_vport_construct, \
1232 netdev_vport_destruct, \
1233 netdev_vport_dealloc, \
1234 GET_CONFIG, \
1235 SET_CONFIG, \
1236 GET_TUNNEL_CONFIG, \
1237 BUILD_HEADER, \
1238 PUSH_HEADER, \
1239 POP_HEADER, \
1240 NULL, /* get_numa_id */ \
1241 NULL, /* set_multiq */ \
1242 \
1243 NULL, /* send */ \
1244 NULL, /* send_wait */ \
1245 \
1246 netdev_vport_set_etheraddr, \
1247 netdev_vport_get_etheraddr, \
1248 NULL, /* get_mtu */ \
1249 NULL, /* set_mtu */ \
1250 NULL, /* get_ifindex */ \
1251 NULL, /* get_carrier */ \
1252 NULL, /* get_carrier_resets */ \
1253 NULL, /* get_miimon */ \
1254 get_stats, \
1255 \
1256 NULL, /* get_features */ \
1257 NULL, /* set_advertisements */ \
1258 \
1259 NULL, /* set_policing */ \
1260 NULL, /* get_qos_types */ \
1261 NULL, /* get_qos_capabilities */ \
1262 NULL, /* get_qos */ \
1263 NULL, /* set_qos */ \
1264 NULL, /* get_queue */ \
1265 NULL, /* set_queue */ \
1266 NULL, /* delete_queue */ \
1267 NULL, /* get_queue_stats */ \
1268 NULL, /* queue_dump_start */ \
1269 NULL, /* queue_dump_next */ \
1270 NULL, /* queue_dump_done */ \
1271 NULL, /* dump_queue_stats */ \
1272 \
1273 NULL, /* get_in4 */ \
1274 NULL, /* set_in4 */ \
1275 NULL, /* get_in6 */ \
1276 NULL, /* add_router */ \
1277 NULL, /* get_next_hop */ \
1278 GET_STATUS, \
1279 NULL, /* arp_lookup */ \
1280 \
1281 netdev_vport_update_flags, \
1282 \
1283 NULL, /* rx_alloc */ \
1284 NULL, /* rx_construct */ \
1285 NULL, /* rx_destruct */ \
1286 NULL, /* rx_dealloc */ \
1287 NULL, /* rx_recv */ \
1288 NULL, /* rx_wait */ \
1289 NULL, /* rx_drain */
1290
1291
1292 #define TUNNEL_CLASS(NAME, DPIF_PORT, BUILD_HEADER, PUSH_HEADER, POP_HEADER) \
1293 { DPIF_PORT, \
1294 { NAME, VPORT_FUNCTIONS(get_tunnel_config, \
1295 set_tunnel_config, \
1296 get_netdev_tunnel_config, \
1297 tunnel_get_status, \
1298 BUILD_HEADER, PUSH_HEADER, POP_HEADER) }}
1299
1300 void
1301 netdev_vport_tunnel_register(void)
1302 {
1303 /* The name of the dpif_port should be short enough to accomodate adding
1304 * a port number to the end if one is necessary. */
1305 static const struct vport_class vport_classes[] = {
1306 TUNNEL_CLASS("geneve", "genev_sys", NULL, NULL, NULL),
1307 TUNNEL_CLASS("gre", "gre_sys", netdev_gre_build_header,
1308 netdev_gre_push_header,
1309 netdev_gre_pop_header),
1310 TUNNEL_CLASS("ipsec_gre", "gre_sys", NULL, NULL, NULL),
1311 TUNNEL_CLASS("gre64", "gre64_sys", NULL, NULL, NULL),
1312 TUNNEL_CLASS("ipsec_gre64", "gre64_sys", NULL, NULL, NULL),
1313 TUNNEL_CLASS("vxlan", "vxlan_sys", netdev_vxlan_build_header,
1314 netdev_vxlan_push_header,
1315 netdev_vxlan_pop_header),
1316 TUNNEL_CLASS("lisp", "lisp_sys", NULL, NULL, NULL)
1317 };
1318 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
1319
1320 if (ovsthread_once_start(&once)) {
1321 int i;
1322
1323 for (i = 0; i < ARRAY_SIZE(vport_classes); i++) {
1324 netdev_register_provider(&vport_classes[i].netdev_class);
1325 }
1326
1327 unixctl_command_register("tnl/egress_port_range", "min max", 0, 2,
1328 netdev_vport_range, NULL);
1329
1330 ovsthread_once_done(&once);
1331 }
1332 }
1333
1334 void
1335 netdev_vport_patch_register(void)
1336 {
1337 static const struct vport_class patch_class =
1338 { NULL,
1339 { "patch", VPORT_FUNCTIONS(get_patch_config,
1340 set_patch_config,
1341 NULL,
1342 NULL, NULL, NULL, NULL) }};
1343 netdev_register_provider(&patch_class.netdev_class);
1344 }