]> git.proxmox.com Git - mirror_ovs.git/blame - lib/tnl-ports.c
netdev-offload-tc: Use single 'once' variable for probing tc features
[mirror_ovs.git] / lib / tnl-ports.c
CommitLineData
a36de779 1/*
f9ac0f03 2 * Copyright (c) 2014, 2015, 2017 Nicira, Inc.
a36de779
PS
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
3655f492
TLSC
18
19#include "tnl-ports.h"
20
a36de779
PS
21#include <stddef.h>
22#include <stdint.h>
4f6e5a69 23#include <string.h>
a36de779
PS
24
25#include "classifier.h"
3e8a2ad1 26#include "openvswitch/dynamic-string.h"
a36de779 27#include "hash.h"
b19bab5b 28#include "openvswitch/list.h"
4f6e5a69 29#include "netdev.h"
64c96779 30#include "openvswitch/ofpbuf.h"
a36de779
PS
31#include "ovs-thread.h"
32#include "odp-util.h"
a36de779
PS
33#include "unixctl.h"
34#include "util.h"
35
fccd7c09 36static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
a36de779
PS
37static struct classifier cls; /* Tunnel ports. */
38
7f9b8504
PS
39struct ip_device {
40 struct netdev *dev;
41 struct eth_addr mac;
a8704b50
PS
42 struct in6_addr *addr;
43 int n_addr;
7f9b8504
PS
44 uint64_t change_seq;
45 struct ovs_list node;
46 char dev_name[IFNAMSIZ];
47};
48
49static struct ovs_list addr_list;
50
51struct tnl_port {
52 odp_port_t port;
e7c9ff0e 53 struct ovs_refcount ref_cnt;
98e3f58e
PS
54 ovs_be16 tp_port;
55 uint8_t nw_proto;
7f9b8504
PS
56 char dev_name[IFNAMSIZ];
57 struct ovs_list node;
58};
59
60static struct ovs_list port_list;
61
a36de779
PS
62struct tnl_port_in {
63 struct cls_rule cr;
64 odp_port_t portno;
65 struct ovs_refcount ref_cnt;
66 char dev_name[IFNAMSIZ];
67};
68
69static struct tnl_port_in *
70tnl_port_cast(const struct cls_rule *cr)
71{
72 BUILD_ASSERT_DECL(offsetof(struct tnl_port_in, cr) == 0);
73
74 return CONTAINER_OF(cr, struct tnl_port_in, cr);
75}
76
77static void
78tnl_port_free(struct tnl_port_in *p)
79{
80 cls_rule_destroy(&p->cr);
81 free(p);
82}
83
84static void
7f9b8504 85tnl_port_init_flow(struct flow *flow, struct eth_addr mac,
98e3f58e 86 struct in6_addr *addr, uint8_t nw_proto, ovs_be16 tp_port)
a36de779
PS
87{
88 memset(flow, 0, sizeof *flow);
7f9b8504 89
7f9b8504 90 flow->dl_dst = mac;
293a104b
TLSC
91 if (IN6_IS_ADDR_V4MAPPED(addr)) {
92 flow->dl_type = htons(ETH_TYPE_IP);
93 flow->nw_dst = in6_addr_get_mapped_ipv4(addr);
94 } else {
95 flow->dl_type = htons(ETH_TYPE_IPV6);
96 flow->ipv6_dst = *addr;
97 }
7f9b8504 98
98e3f58e
PS
99 flow->nw_proto = nw_proto;
100 flow->tp_dst = tp_port;
a36de779
PS
101}
102
7f9b8504 103static void
293a104b 104map_insert(odp_port_t port, struct eth_addr mac, struct in6_addr *addr,
98e3f58e 105 uint8_t nw_proto, ovs_be16 tp_port, const char dev_name[])
a36de779
PS
106{
107 const struct cls_rule *cr;
108 struct tnl_port_in *p;
109 struct match match;
110
111 memset(&match, 0, sizeof match);
98e3f58e 112 tnl_port_init_flow(&match.flow, mac, addr, nw_proto, tp_port);
a36de779
PS
113
114 do {
44e0c35d 115 cr = classifier_lookup(&cls, OVS_VERSION_MAX, &match.flow, NULL);
a36de779
PS
116 p = tnl_port_cast(cr);
117 /* Try again if the rule was released before we get the reference. */
118 } while (p && !ovs_refcount_try_ref_rcu(&p->ref_cnt));
119
fccd7c09
JR
120 if (!p) {
121 p = xzalloc(sizeof *p);
122 p->portno = port;
a36de779 123
fccd7c09
JR
124 match.wc.masks.dl_type = OVS_BE16_MAX;
125 match.wc.masks.nw_proto = 0xff;
5be5370d
DDP
126 /* XXX: No fragments support. */
127 match.wc.masks.nw_frag = FLOW_NW_FRAG_MASK;
128
98e3f58e 129 /* 'tp_port' is zero for GRE tunnels. In this case it
5be5370d 130 * doesn't make sense to match on UDP port numbers. */
98e3f58e 131 if (tp_port) {
5be5370d
DDP
132 match.wc.masks.tp_dst = OVS_BE16_MAX;
133 }
293a104b
TLSC
134 if (IN6_IS_ADDR_V4MAPPED(addr)) {
135 match.wc.masks.nw_dst = OVS_BE32_MAX;
136 } else {
137 match.wc.masks.ipv6_dst = in6addr_exact;
138 }
f0fb825a 139 match.wc.masks.vlans[0].tci = OVS_BE16_MAX;
7f9b8504 140 memset(&match.wc.masks.dl_dst, 0xff, sizeof (struct eth_addr));
a36de779 141
bd53aa17 142 cls_rule_init(&p->cr, &match, 0); /* Priority == 0. */
fccd7c09 143 ovs_refcount_init(&p->ref_cnt);
8742957c 144 ovs_strlcpy(p->dev_name, dev_name, sizeof p->dev_name);
a36de779 145
44e0c35d 146 classifier_insert(&cls, &p->cr, OVS_VERSION_MIN, NULL, 0);
fccd7c09 147 }
7f9b8504
PS
148}
149
a8704b50
PS
150static void
151map_insert_ipdev__(struct ip_device *ip_dev, char dev_name[],
98e3f58e 152 odp_port_t port, uint8_t nw_proto, ovs_be16 tp_port)
a8704b50
PS
153{
154 if (ip_dev->n_addr) {
155 int i;
156
157 for (i = 0; i < ip_dev->n_addr; i++) {
158 map_insert(port, ip_dev->mac, &ip_dev->addr[i],
98e3f58e 159 nw_proto, tp_port, dev_name);
a8704b50
PS
160 }
161 }
162}
163
98e3f58e
PS
164static uint8_t
165tnl_type_to_nw_proto(const char type[])
166{
167 if (!strcmp(type, "geneve")) {
168 return IPPROTO_UDP;
169 }
170 if (!strcmp(type, "stt")) {
171 return IPPROTO_TCP;
172 }
7dc18ae9 173 if (!strcmp(type, "gre") || !strcmp(type, "erspan") ||
3b10ceee 174 !strcmp(type, "ip6erspan") || !strcmp(type, "ip6gre")) {
98e3f58e
PS
175 return IPPROTO_GRE;
176 }
177 if (!strcmp(type, "vxlan")) {
178 return IPPROTO_UDP;
179 }
3c6d05a0
WT
180 if (!strcmp(type, "gtpu")) {
181 return IPPROTO_UDP;
182 }
98e3f58e
PS
183 return 0;
184}
185
7f9b8504 186void
98e3f58e
PS
187tnl_port_map_insert(odp_port_t port, ovs_be16 tp_port,
188 const char dev_name[], const char type[])
7f9b8504
PS
189{
190 struct tnl_port *p;
191 struct ip_device *ip_dev;
98e3f58e
PS
192 uint8_t nw_proto;
193
194 nw_proto = tnl_type_to_nw_proto(type);
195 if (!nw_proto) {
196 return;
197 }
7f9b8504
PS
198
199 ovs_mutex_lock(&mutex);
200 LIST_FOR_EACH(p, node, &port_list) {
c8025aee 201 if (p->port == port && p->nw_proto == nw_proto) {
e7c9ff0e 202 ovs_refcount_ref(&p->ref_cnt);
203 goto out;
7f9b8504
PS
204 }
205 }
206
207 p = xzalloc(sizeof *p);
208 p->port = port;
98e3f58e
PS
209 p->tp_port = tp_port;
210 p->nw_proto = nw_proto;
7f9b8504 211 ovs_strlcpy(p->dev_name, dev_name, sizeof p->dev_name);
e7c9ff0e 212 ovs_refcount_init(&p->ref_cnt);
417e7e66 213 ovs_list_insert(&port_list, &p->node);
7f9b8504
PS
214
215 LIST_FOR_EACH(ip_dev, node, &addr_list) {
98e3f58e 216 map_insert_ipdev__(ip_dev, p->dev_name, p->port, p->nw_proto, p->tp_port);
7f9b8504
PS
217 }
218
219out:
fccd7c09 220 ovs_mutex_unlock(&mutex);
a36de779
PS
221}
222
223static void
224tnl_port_unref(const struct cls_rule *cr)
225{
226 struct tnl_port_in *p = tnl_port_cast(cr);
227
228 if (cr && ovs_refcount_unref_relaxed(&p->ref_cnt) == 1) {
46ab60bf
BP
229 classifier_remove_assert(&cls, cr);
230 ovsrcu_postpone(tnl_port_free, p);
a36de779
PS
231 }
232}
233
7f9b8504 234static void
98e3f58e
PS
235map_delete(struct eth_addr mac, struct in6_addr *addr,
236 ovs_be16 tp_port, uint8_t nw_proto)
a36de779
PS
237{
238 const struct cls_rule *cr;
239 struct flow flow;
240
98e3f58e 241 tnl_port_init_flow(&flow, mac, addr, nw_proto, tp_port);
a36de779 242
44e0c35d 243 cr = classifier_lookup(&cls, OVS_VERSION_MAX, &flow, NULL);
a36de779
PS
244 tnl_port_unref(cr);
245}
246
a8704b50 247static void
98e3f58e 248ipdev_map_delete(struct ip_device *ip_dev, ovs_be16 tp_port, uint8_t nw_proto)
a8704b50
PS
249{
250 if (ip_dev->n_addr) {
251 int i;
252
253 for (i = 0; i < ip_dev->n_addr; i++) {
98e3f58e 254 map_delete(ip_dev->mac, &ip_dev->addr[i], tp_port, nw_proto);
a8704b50
PS
255 }
256 }
257}
258
7f9b8504 259void
c8025aee 260tnl_port_map_delete(odp_port_t port, const char type[])
7f9b8504
PS
261{
262 struct tnl_port *p, *next;
263 struct ip_device *ip_dev;
98e3f58e
PS
264 uint8_t nw_proto;
265
266 nw_proto = tnl_type_to_nw_proto(type);
7f9b8504
PS
267
268 ovs_mutex_lock(&mutex);
269 LIST_FOR_EACH_SAFE(p, next, node, &port_list) {
c8025aee 270 if (p->port == port && p->nw_proto == nw_proto &&
e7c9ff0e 271 ovs_refcount_unref_relaxed(&p->ref_cnt) == 1) {
417e7e66 272 ovs_list_remove(&p->node);
e7c9ff0e 273 LIST_FOR_EACH(ip_dev, node, &addr_list) {
274 ipdev_map_delete(ip_dev, p->tp_port, p->nw_proto);
275 }
276 free(p);
7f9b8504
PS
277 break;
278 }
279 }
7f9b8504
PS
280 ovs_mutex_unlock(&mutex);
281}
282
2e0bded4
BP
283/* 'flow' is non-const to allow for temporary modifications during the lookup.
284 * Any changes are restored before returning. */
a36de779 285odp_port_t
2e0bded4 286tnl_port_map_lookup(struct flow *flow, struct flow_wildcards *wc)
a36de779 287{
44e0c35d 288 const struct cls_rule *cr = classifier_lookup(&cls, OVS_VERSION_MAX, flow,
2b7b1427 289 wc);
a36de779
PS
290
291 return (cr) ? tnl_port_cast(cr)->portno : ODPP_NONE;
292}
293
294static void
7f9b8504 295tnl_port_show_v(struct ds *ds)
a36de779 296{
a36de779
PS
297 const struct tnl_port_in *p;
298
a36de779
PS
299 CLS_FOR_EACH(p, cr, &cls) {
300 struct odputil_keybuf keybuf;
301 struct odputil_keybuf maskbuf;
302 struct flow flow;
303 const struct nlattr *key, *mask;
304 size_t key_len, mask_len;
305 struct flow_wildcards wc;
306 struct ofpbuf buf;
5262eea1
JG
307 struct odp_flow_key_parms odp_parms = {
308 .flow = &flow,
309 .mask = &wc.masks,
310 };
a36de779 311
7f9b8504 312 ds_put_format(ds, "%s (%"PRIu32") : ", p->dev_name, p->portno);
8fd47924
JR
313 minimask_expand(p->cr.match.mask, &wc);
314 miniflow_expand(p->cr.match.flow, &flow);
a36de779
PS
315
316 /* Key. */
2494ccd7 317 odp_parms.support.recirc = true;
a36de779 318 ofpbuf_use_stack(&buf, &keybuf, sizeof keybuf);
5262eea1 319 odp_flow_key_from_flow(&odp_parms, &buf);
6fd6ed71
PS
320 key = buf.data;
321 key_len = buf.size;
5262eea1 322
a36de779 323 /* mask*/
2494ccd7 324 odp_parms.support.recirc = false;
a36de779 325 ofpbuf_use_stack(&buf, &maskbuf, sizeof maskbuf);
5262eea1 326 odp_flow_key_from_mask(&odp_parms, &buf);
6fd6ed71
PS
327 mask = buf.data;
328 mask_len = buf.size;
a36de779
PS
329
330 /* build string. */
7f9b8504
PS
331 odp_flow_format(key, key_len, mask, mask_len, NULL, ds, false);
332 ds_put_format(ds, "\n");
333 }
334}
335
336static void
337tnl_port_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
338 const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
339{
340 struct ds ds = DS_EMPTY_INITIALIZER;
341 struct tnl_port *p;
342
343 ds_put_format(&ds, "Listening ports:\n");
344 ovs_mutex_lock(&mutex);
345 if (argc > 1) {
346 if (!strcasecmp(argv[1], "-v")) {
347 tnl_port_show_v(&ds);
348 goto out;
349 }
350 }
351
352 LIST_FOR_EACH(p, node, &port_list) {
c8025aee
BN
353 ds_put_format(&ds, "%s (%"PRIu32") ref_cnt=%u\n", p->dev_name, p->port,
354 ovs_refcount_read(&p->ref_cnt));
a36de779 355 }
7f9b8504
PS
356
357out:
358 ovs_mutex_unlock(&mutex);
a36de779
PS
359 unixctl_command_reply(conn, ds_cstr(&ds));
360 ds_destroy(&ds);
361}
362
7f9b8504
PS
363static void
364map_insert_ipdev(struct ip_device *ip_dev)
365{
366 struct tnl_port *p;
367
368 LIST_FOR_EACH(p, node, &port_list) {
98e3f58e 369 map_insert_ipdev__(ip_dev, p->dev_name, p->port, p->nw_proto, p->tp_port);
7f9b8504
PS
370 }
371}
372
373static void
a8704b50
PS
374insert_ipdev__(struct netdev *dev,
375 struct in6_addr *addr, int n_addr)
7f9b8504
PS
376{
377 struct ip_device *ip_dev;
378 enum netdev_flags flags;
7f9b8504 379 int error;
7f9b8504
PS
380
381 error = netdev_get_flags(dev, &flags);
382 if (error || (flags & NETDEV_LOOPBACK)) {
a8704b50 383 goto err;
7f9b8504
PS
384 }
385
386 ip_dev = xzalloc(sizeof *ip_dev);
a8704b50 387 ip_dev->dev = netdev_ref(dev);
7f9b8504
PS
388 ip_dev->change_seq = netdev_get_change_seq(dev);
389 error = netdev_get_etheraddr(ip_dev->dev, &ip_dev->mac);
390 if (error) {
a8704b50 391 goto err_free_ipdev;
7f9b8504 392 }
a8704b50
PS
393 ip_dev->addr = addr;
394 ip_dev->n_addr = n_addr;
7f9b8504 395 ovs_strlcpy(ip_dev->dev_name, netdev_get_name(dev), sizeof ip_dev->dev_name);
417e7e66 396 ovs_list_insert(&addr_list, &ip_dev->node);
7f9b8504 397 map_insert_ipdev(ip_dev);
a8704b50
PS
398 return;
399
400err_free_ipdev:
401 netdev_close(ip_dev->dev);
402 free(ip_dev);
403err:
404 free(addr);
405}
406
407static void
408insert_ipdev(const char dev_name[])
409{
410 struct in6_addr *addr, *mask;
411 struct netdev *dev;
412 int error, n_in6;
413
493e0550 414 error = netdev_open(dev_name, netdev_get_type_from_name(dev_name), &dev);
a8704b50
PS
415 if (error) {
416 return;
417 }
418
419 error = netdev_get_addr_list(dev, &addr, &mask, &n_in6);
420 if (error) {
421 netdev_close(dev);
422 return;
423 }
424 free(mask);
425 insert_ipdev__(dev, addr, n_in6);
426 netdev_close(dev);
7f9b8504
PS
427}
428
429static void
430delete_ipdev(struct ip_device *ip_dev)
431{
432 struct tnl_port *p;
433
434 LIST_FOR_EACH(p, node, &port_list) {
98e3f58e 435 ipdev_map_delete(ip_dev, p->tp_port, p->nw_proto);
7f9b8504
PS
436 }
437
417e7e66 438 ovs_list_remove(&ip_dev->node);
7f9b8504 439 netdev_close(ip_dev->dev);
a8704b50 440 free(ip_dev->addr);
7f9b8504
PS
441 free(ip_dev);
442}
443
444void
445tnl_port_map_insert_ipdev(const char dev_name[])
446{
c465f75f 447 struct ip_device *ip_dev, *next;
7f9b8504
PS
448
449 ovs_mutex_lock(&mutex);
450
c465f75f 451 LIST_FOR_EACH_SAFE(ip_dev, next, node, &addr_list) {
7f9b8504
PS
452 if (!strcmp(netdev_get_name(ip_dev->dev), dev_name)) {
453 if (ip_dev->change_seq == netdev_get_change_seq(ip_dev->dev)) {
454 goto out;
455 }
456 /* Address changed. */
457 delete_ipdev(ip_dev);
7f9b8504
PS
458 }
459 }
460 insert_ipdev(dev_name);
461
462out:
463 ovs_mutex_unlock(&mutex);
464}
465
466void
467tnl_port_map_delete_ipdev(const char dev_name[])
468{
469 struct ip_device *ip_dev, *next;
470
471 ovs_mutex_lock(&mutex);
472 LIST_FOR_EACH_SAFE(ip_dev, next, node, &addr_list) {
473 if (!strcmp(netdev_get_name(ip_dev->dev), dev_name)) {
474 delete_ipdev(ip_dev);
475 }
476 }
477 ovs_mutex_unlock(&mutex);
478}
479
480void
481tnl_port_map_run(void)
482{
c465f75f 483 struct ip_device *ip_dev, *next;
7f9b8504
PS
484
485 ovs_mutex_lock(&mutex);
c465f75f 486 LIST_FOR_EACH_SAFE(ip_dev, next, node, &addr_list) {
7f9b8504
PS
487 char dev_name[IFNAMSIZ];
488
489 if (ip_dev->change_seq == netdev_get_change_seq(ip_dev->dev)) {
490 continue;
491 }
492
493 /* Address changed. */
f9ac0f03 494 ovs_strlcpy_arrays(dev_name, ip_dev->dev_name);
7f9b8504
PS
495 delete_ipdev(ip_dev);
496 insert_ipdev(dev_name);
497 }
498 ovs_mutex_unlock(&mutex);
499}
500
a36de779
PS
501void
502tnl_port_map_init(void)
503{
d70e8c28 504 classifier_init(&cls, flow_segment_u64s);
417e7e66
BW
505 ovs_list_init(&addr_list);
506 ovs_list_init(&port_list);
7f9b8504 507 unixctl_command_register("tnl/ports/show", "-v", 0, 1, tnl_port_show, NULL);
a36de779 508}