]> git.proxmox.com Git - mirror_ovs.git/blob - lib/tnl-ports.c
netdev-offload-dpdk: Refactor action items freeing scheme.
[mirror_ovs.git] / lib / tnl-ports.c
1 /*
2 * Copyright (c) 2014, 2015, 2017 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include "tnl-ports.h"
20
21 #include <stddef.h>
22 #include <stdint.h>
23 #include <string.h>
24
25 #include "classifier.h"
26 #include "openvswitch/dynamic-string.h"
27 #include "hash.h"
28 #include "openvswitch/list.h"
29 #include "netdev.h"
30 #include "openvswitch/ofpbuf.h"
31 #include "ovs-thread.h"
32 #include "odp-util.h"
33 #include "ovs-thread.h"
34 #include "unixctl.h"
35 #include "util.h"
36
37 static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
38 static struct classifier cls; /* Tunnel ports. */
39
40 struct ip_device {
41 struct netdev *dev;
42 struct eth_addr mac;
43 struct in6_addr *addr;
44 int n_addr;
45 uint64_t change_seq;
46 struct ovs_list node;
47 char dev_name[IFNAMSIZ];
48 };
49
50 static struct ovs_list addr_list;
51
52 struct tnl_port {
53 odp_port_t port;
54 struct ovs_refcount ref_cnt;
55 ovs_be16 tp_port;
56 uint8_t nw_proto;
57 char dev_name[IFNAMSIZ];
58 struct ovs_list node;
59 };
60
61 static struct ovs_list port_list;
62
63 struct tnl_port_in {
64 struct cls_rule cr;
65 odp_port_t portno;
66 struct ovs_refcount ref_cnt;
67 char dev_name[IFNAMSIZ];
68 };
69
70 static struct tnl_port_in *
71 tnl_port_cast(const struct cls_rule *cr)
72 {
73 BUILD_ASSERT_DECL(offsetof(struct tnl_port_in, cr) == 0);
74
75 return CONTAINER_OF(cr, struct tnl_port_in, cr);
76 }
77
78 static void
79 tnl_port_free(struct tnl_port_in *p)
80 {
81 cls_rule_destroy(&p->cr);
82 free(p);
83 }
84
85 static void
86 tnl_port_init_flow(struct flow *flow, struct eth_addr mac,
87 struct in6_addr *addr, uint8_t nw_proto, ovs_be16 tp_port)
88 {
89 memset(flow, 0, sizeof *flow);
90
91 flow->dl_dst = mac;
92 if (IN6_IS_ADDR_V4MAPPED(addr)) {
93 flow->dl_type = htons(ETH_TYPE_IP);
94 flow->nw_dst = in6_addr_get_mapped_ipv4(addr);
95 } else {
96 flow->dl_type = htons(ETH_TYPE_IPV6);
97 flow->ipv6_dst = *addr;
98 }
99
100 flow->nw_proto = nw_proto;
101 flow->tp_dst = tp_port;
102 }
103
104 static void
105 map_insert(odp_port_t port, struct eth_addr mac, struct in6_addr *addr,
106 uint8_t nw_proto, ovs_be16 tp_port, const char dev_name[])
107 {
108 const struct cls_rule *cr;
109 struct tnl_port_in *p;
110 struct match match;
111
112 memset(&match, 0, sizeof match);
113 tnl_port_init_flow(&match.flow, mac, addr, nw_proto, tp_port);
114
115 do {
116 cr = classifier_lookup(&cls, OVS_VERSION_MAX, &match.flow, NULL);
117 p = tnl_port_cast(cr);
118 /* Try again if the rule was released before we get the reference. */
119 } while (p && !ovs_refcount_try_ref_rcu(&p->ref_cnt));
120
121 if (!p) {
122 p = xzalloc(sizeof *p);
123 p->portno = port;
124
125 match.wc.masks.dl_type = OVS_BE16_MAX;
126 match.wc.masks.nw_proto = 0xff;
127 /* XXX: No fragments support. */
128 match.wc.masks.nw_frag = FLOW_NW_FRAG_MASK;
129
130 /* 'tp_port' is zero for GRE tunnels. In this case it
131 * doesn't make sense to match on UDP port numbers. */
132 if (tp_port) {
133 match.wc.masks.tp_dst = OVS_BE16_MAX;
134 }
135 if (IN6_IS_ADDR_V4MAPPED(addr)) {
136 match.wc.masks.nw_dst = OVS_BE32_MAX;
137 } else {
138 match.wc.masks.ipv6_dst = in6addr_exact;
139 }
140 match.wc.masks.vlans[0].tci = OVS_BE16_MAX;
141 memset(&match.wc.masks.dl_dst, 0xff, sizeof (struct eth_addr));
142
143 cls_rule_init(&p->cr, &match, 0); /* Priority == 0. */
144 ovs_refcount_init(&p->ref_cnt);
145 ovs_strlcpy(p->dev_name, dev_name, sizeof p->dev_name);
146
147 classifier_insert(&cls, &p->cr, OVS_VERSION_MIN, NULL, 0);
148 }
149 }
150
151 static void
152 map_insert_ipdev__(struct ip_device *ip_dev, char dev_name[],
153 odp_port_t port, uint8_t nw_proto, ovs_be16 tp_port)
154 {
155 if (ip_dev->n_addr) {
156 int i;
157
158 for (i = 0; i < ip_dev->n_addr; i++) {
159 map_insert(port, ip_dev->mac, &ip_dev->addr[i],
160 nw_proto, tp_port, dev_name);
161 }
162 }
163 }
164
165 static uint8_t
166 tnl_type_to_nw_proto(const char type[])
167 {
168 if (!strcmp(type, "geneve")) {
169 return IPPROTO_UDP;
170 }
171 if (!strcmp(type, "stt")) {
172 return IPPROTO_TCP;
173 }
174 if (!strcmp(type, "gre") || !strcmp(type, "erspan") ||
175 !strcmp(type, "ip6erspan") || !strcmp(type, "ip6gre")) {
176 return IPPROTO_GRE;
177 }
178 if (!strcmp(type, "vxlan")) {
179 return IPPROTO_UDP;
180 }
181 return 0;
182 }
183
184 void
185 tnl_port_map_insert(odp_port_t port, ovs_be16 tp_port,
186 const char dev_name[], const char type[])
187 {
188 struct tnl_port *p;
189 struct ip_device *ip_dev;
190 uint8_t nw_proto;
191
192 nw_proto = tnl_type_to_nw_proto(type);
193 if (!nw_proto) {
194 return;
195 }
196
197 ovs_mutex_lock(&mutex);
198 LIST_FOR_EACH(p, node, &port_list) {
199 if (p->port == port && p->nw_proto == nw_proto) {
200 ovs_refcount_ref(&p->ref_cnt);
201 goto out;
202 }
203 }
204
205 p = xzalloc(sizeof *p);
206 p->port = port;
207 p->tp_port = tp_port;
208 p->nw_proto = nw_proto;
209 ovs_strlcpy(p->dev_name, dev_name, sizeof p->dev_name);
210 ovs_refcount_init(&p->ref_cnt);
211 ovs_list_insert(&port_list, &p->node);
212
213 LIST_FOR_EACH(ip_dev, node, &addr_list) {
214 map_insert_ipdev__(ip_dev, p->dev_name, p->port, p->nw_proto, p->tp_port);
215 }
216
217 out:
218 ovs_mutex_unlock(&mutex);
219 }
220
221 static void
222 tnl_port_unref(const struct cls_rule *cr)
223 {
224 struct tnl_port_in *p = tnl_port_cast(cr);
225
226 if (cr && ovs_refcount_unref_relaxed(&p->ref_cnt) == 1) {
227 classifier_remove_assert(&cls, cr);
228 ovsrcu_postpone(tnl_port_free, p);
229 }
230 }
231
232 static void
233 map_delete(struct eth_addr mac, struct in6_addr *addr,
234 ovs_be16 tp_port, uint8_t nw_proto)
235 {
236 const struct cls_rule *cr;
237 struct flow flow;
238
239 tnl_port_init_flow(&flow, mac, addr, nw_proto, tp_port);
240
241 cr = classifier_lookup(&cls, OVS_VERSION_MAX, &flow, NULL);
242 tnl_port_unref(cr);
243 }
244
245 static void
246 ipdev_map_delete(struct ip_device *ip_dev, ovs_be16 tp_port, uint8_t nw_proto)
247 {
248 if (ip_dev->n_addr) {
249 int i;
250
251 for (i = 0; i < ip_dev->n_addr; i++) {
252 map_delete(ip_dev->mac, &ip_dev->addr[i], tp_port, nw_proto);
253 }
254 }
255 }
256
257 void
258 tnl_port_map_delete(odp_port_t port, const char type[])
259 {
260 struct tnl_port *p, *next;
261 struct ip_device *ip_dev;
262 uint8_t nw_proto;
263
264 nw_proto = tnl_type_to_nw_proto(type);
265
266 ovs_mutex_lock(&mutex);
267 LIST_FOR_EACH_SAFE(p, next, node, &port_list) {
268 if (p->port == port && p->nw_proto == nw_proto &&
269 ovs_refcount_unref_relaxed(&p->ref_cnt) == 1) {
270 ovs_list_remove(&p->node);
271 LIST_FOR_EACH(ip_dev, node, &addr_list) {
272 ipdev_map_delete(ip_dev, p->tp_port, p->nw_proto);
273 }
274 free(p);
275 break;
276 }
277 }
278 ovs_mutex_unlock(&mutex);
279 }
280
281 /* 'flow' is non-const to allow for temporary modifications during the lookup.
282 * Any changes are restored before returning. */
283 odp_port_t
284 tnl_port_map_lookup(struct flow *flow, struct flow_wildcards *wc)
285 {
286 const struct cls_rule *cr = classifier_lookup(&cls, OVS_VERSION_MAX, flow,
287 wc);
288
289 return (cr) ? tnl_port_cast(cr)->portno : ODPP_NONE;
290 }
291
292 static void
293 tnl_port_show_v(struct ds *ds)
294 {
295 const struct tnl_port_in *p;
296
297 CLS_FOR_EACH(p, cr, &cls) {
298 struct odputil_keybuf keybuf;
299 struct odputil_keybuf maskbuf;
300 struct flow flow;
301 const struct nlattr *key, *mask;
302 size_t key_len, mask_len;
303 struct flow_wildcards wc;
304 struct ofpbuf buf;
305 struct odp_flow_key_parms odp_parms = {
306 .flow = &flow,
307 .mask = &wc.masks,
308 };
309
310 ds_put_format(ds, "%s (%"PRIu32") : ", p->dev_name, p->portno);
311 minimask_expand(p->cr.match.mask, &wc);
312 miniflow_expand(p->cr.match.flow, &flow);
313
314 /* Key. */
315 odp_parms.support.recirc = true;
316 ofpbuf_use_stack(&buf, &keybuf, sizeof keybuf);
317 odp_flow_key_from_flow(&odp_parms, &buf);
318 key = buf.data;
319 key_len = buf.size;
320
321 /* mask*/
322 odp_parms.support.recirc = false;
323 ofpbuf_use_stack(&buf, &maskbuf, sizeof maskbuf);
324 odp_flow_key_from_mask(&odp_parms, &buf);
325 mask = buf.data;
326 mask_len = buf.size;
327
328 /* build string. */
329 odp_flow_format(key, key_len, mask, mask_len, NULL, ds, false);
330 ds_put_format(ds, "\n");
331 }
332 }
333
334 static void
335 tnl_port_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
336 const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
337 {
338 struct ds ds = DS_EMPTY_INITIALIZER;
339 struct tnl_port *p;
340
341 ds_put_format(&ds, "Listening ports:\n");
342 ovs_mutex_lock(&mutex);
343 if (argc > 1) {
344 if (!strcasecmp(argv[1], "-v")) {
345 tnl_port_show_v(&ds);
346 goto out;
347 }
348 }
349
350 LIST_FOR_EACH(p, node, &port_list) {
351 ds_put_format(&ds, "%s (%"PRIu32") ref_cnt=%u\n", p->dev_name, p->port,
352 ovs_refcount_read(&p->ref_cnt));
353 }
354
355 out:
356 ovs_mutex_unlock(&mutex);
357 unixctl_command_reply(conn, ds_cstr(&ds));
358 ds_destroy(&ds);
359 }
360
361 static void
362 map_insert_ipdev(struct ip_device *ip_dev)
363 {
364 struct tnl_port *p;
365
366 LIST_FOR_EACH(p, node, &port_list) {
367 map_insert_ipdev__(ip_dev, p->dev_name, p->port, p->nw_proto, p->tp_port);
368 }
369 }
370
371 static void
372 insert_ipdev__(struct netdev *dev,
373 struct in6_addr *addr, int n_addr)
374 {
375 struct ip_device *ip_dev;
376 enum netdev_flags flags;
377 int error;
378
379 error = netdev_get_flags(dev, &flags);
380 if (error || (flags & NETDEV_LOOPBACK)) {
381 goto err;
382 }
383
384 ip_dev = xzalloc(sizeof *ip_dev);
385 ip_dev->dev = netdev_ref(dev);
386 ip_dev->change_seq = netdev_get_change_seq(dev);
387 error = netdev_get_etheraddr(ip_dev->dev, &ip_dev->mac);
388 if (error) {
389 goto err_free_ipdev;
390 }
391 ip_dev->addr = addr;
392 ip_dev->n_addr = n_addr;
393 ovs_strlcpy(ip_dev->dev_name, netdev_get_name(dev), sizeof ip_dev->dev_name);
394 ovs_list_insert(&addr_list, &ip_dev->node);
395 map_insert_ipdev(ip_dev);
396 return;
397
398 err_free_ipdev:
399 netdev_close(ip_dev->dev);
400 free(ip_dev);
401 err:
402 free(addr);
403 }
404
405 static void
406 insert_ipdev(const char dev_name[])
407 {
408 struct in6_addr *addr, *mask;
409 struct netdev *dev;
410 int error, n_in6;
411
412 error = netdev_open(dev_name, netdev_get_type_from_name(dev_name), &dev);
413 if (error) {
414 return;
415 }
416
417 error = netdev_get_addr_list(dev, &addr, &mask, &n_in6);
418 if (error) {
419 netdev_close(dev);
420 return;
421 }
422 free(mask);
423 insert_ipdev__(dev, addr, n_in6);
424 netdev_close(dev);
425 }
426
427 static void
428 delete_ipdev(struct ip_device *ip_dev)
429 {
430 struct tnl_port *p;
431
432 LIST_FOR_EACH(p, node, &port_list) {
433 ipdev_map_delete(ip_dev, p->tp_port, p->nw_proto);
434 }
435
436 ovs_list_remove(&ip_dev->node);
437 netdev_close(ip_dev->dev);
438 free(ip_dev->addr);
439 free(ip_dev);
440 }
441
442 void
443 tnl_port_map_insert_ipdev(const char dev_name[])
444 {
445 struct ip_device *ip_dev, *next;
446
447 ovs_mutex_lock(&mutex);
448
449 LIST_FOR_EACH_SAFE(ip_dev, next, node, &addr_list) {
450 if (!strcmp(netdev_get_name(ip_dev->dev), dev_name)) {
451 if (ip_dev->change_seq == netdev_get_change_seq(ip_dev->dev)) {
452 goto out;
453 }
454 /* Address changed. */
455 delete_ipdev(ip_dev);
456 }
457 }
458 insert_ipdev(dev_name);
459
460 out:
461 ovs_mutex_unlock(&mutex);
462 }
463
464 void
465 tnl_port_map_delete_ipdev(const char dev_name[])
466 {
467 struct ip_device *ip_dev, *next;
468
469 ovs_mutex_lock(&mutex);
470 LIST_FOR_EACH_SAFE(ip_dev, next, node, &addr_list) {
471 if (!strcmp(netdev_get_name(ip_dev->dev), dev_name)) {
472 delete_ipdev(ip_dev);
473 }
474 }
475 ovs_mutex_unlock(&mutex);
476 }
477
478 void
479 tnl_port_map_run(void)
480 {
481 struct ip_device *ip_dev, *next;
482
483 ovs_mutex_lock(&mutex);
484 LIST_FOR_EACH_SAFE(ip_dev, next, node, &addr_list) {
485 char dev_name[IFNAMSIZ];
486
487 if (ip_dev->change_seq == netdev_get_change_seq(ip_dev->dev)) {
488 continue;
489 }
490
491 /* Address changed. */
492 ovs_strlcpy_arrays(dev_name, ip_dev->dev_name);
493 delete_ipdev(ip_dev);
494 insert_ipdev(dev_name);
495 }
496 ovs_mutex_unlock(&mutex);
497 }
498
499 void
500 tnl_port_map_init(void)
501 {
502 classifier_init(&cls, flow_segment_u64s);
503 ovs_list_init(&addr_list);
504 ovs_list_init(&port_list);
505 unixctl_command_register("tnl/ports/show", "-v", 0, 1, tnl_port_show, NULL);
506 }