]> git.proxmox.com Git - mirror_ovs.git/blob - lib/dpif-netlink-rtnl.c
netdev-offload-tc: Use single 'once' variable for probing tc features
[mirror_ovs.git] / lib / dpif-netlink-rtnl.c
1 /*
2 * Copyright (c) 2017 Red Hat, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include "dpif-netlink-rtnl.h"
20
21 #include <net/if.h>
22 #include <linux/ip.h>
23 #include <linux/rtnetlink.h>
24
25 #include "dpif-netlink.h"
26 #include "netdev-vport.h"
27 #include "netlink-socket.h"
28 #include "openvswitch/vlog.h"
29
30 VLOG_DEFINE_THIS_MODULE(dpif_netlink_rtnl);
31
32 /* On some older systems, these enums are not defined. */
33 #ifndef IFLA_VXLAN_MAX
34 #define IFLA_VXLAN_MAX 0
35 #endif
36 #if IFLA_VXLAN_MAX < 27
37 #define IFLA_VXLAN_LEARNING 7
38 #define IFLA_VXLAN_PORT 15
39 #define IFLA_VXLAN_UDP_ZERO_CSUM6_RX 20
40 #define IFLA_VXLAN_GBP 23
41 #define IFLA_VXLAN_COLLECT_METADATA 25
42 #define IFLA_VXLAN_GPE 27
43 #endif
44
45 #ifndef IFLA_GRE_MAX
46 #define IFLA_GRE_MAX 0
47 #endif
48 #if IFLA_GRE_MAX < 18
49 #define IFLA_GRE_COLLECT_METADATA 18
50 #endif
51
52 #ifndef IFLA_GENEVE_MAX
53 #define IFLA_GENEVE_MAX 0
54 #endif
55 #if IFLA_GENEVE_MAX < 10
56 #define IFLA_GENEVE_PORT 5
57 #define IFLA_GENEVE_COLLECT_METADATA 6
58 #define IFLA_GENEVE_UDP_ZERO_CSUM6_RX 10
59 #endif
60
61 static const struct nl_policy rtlink_policy[] = {
62 [IFLA_LINKINFO] = { .type = NL_A_NESTED },
63 };
64 static const struct nl_policy linkinfo_policy[] = {
65 [IFLA_INFO_KIND] = { .type = NL_A_STRING },
66 [IFLA_INFO_DATA] = { .type = NL_A_NESTED },
67 };
68 static const struct nl_policy vxlan_policy[] = {
69 [IFLA_VXLAN_COLLECT_METADATA] = { .type = NL_A_U8 },
70 [IFLA_VXLAN_LEARNING] = { .type = NL_A_U8 },
71 [IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = { .type = NL_A_U8 },
72 [IFLA_VXLAN_PORT] = { .type = NL_A_U16 },
73 [IFLA_VXLAN_GBP] = { .type = NL_A_FLAG, .optional = true },
74 [IFLA_VXLAN_GPE] = { .type = NL_A_FLAG, .optional = true },
75 };
76 static const struct nl_policy gre_policy[] = {
77 [IFLA_GRE_COLLECT_METADATA] = { .type = NL_A_FLAG },
78 };
79 static const struct nl_policy geneve_policy[] = {
80 [IFLA_GENEVE_COLLECT_METADATA] = { .type = NL_A_FLAG },
81 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NL_A_U8 },
82 [IFLA_GENEVE_PORT] = { .type = NL_A_U16 },
83 };
84
85 static const char *
86 vport_type_to_kind(enum ovs_vport_type type,
87 const struct netdev_tunnel_config *tnl_cfg)
88 {
89 switch (type) {
90 case OVS_VPORT_TYPE_VXLAN:
91 return "vxlan";
92 case OVS_VPORT_TYPE_GRE:
93 if (tnl_cfg->pt_mode == NETDEV_PT_LEGACY_L3) {
94 return "gre";
95 } else if (tnl_cfg->pt_mode == NETDEV_PT_LEGACY_L2) {
96 return "gretap";
97 } else {
98 return NULL;
99 }
100 case OVS_VPORT_TYPE_GENEVE:
101 return "geneve";
102 case OVS_VPORT_TYPE_ERSPAN:
103 return "erspan";
104 case OVS_VPORT_TYPE_IP6ERSPAN:
105 return "ip6erspan";
106 case OVS_VPORT_TYPE_IP6GRE:
107 if (tnl_cfg->pt_mode == NETDEV_PT_LEGACY_L2) {
108 return "ip6gretap";
109 } else if (tnl_cfg->pt_mode == NETDEV_PT_LEGACY_L3) {
110 return NULL;
111 } else {
112 return NULL;
113 }
114 case OVS_VPORT_TYPE_GTPU:
115 return NULL;
116 case OVS_VPORT_TYPE_NETDEV:
117 case OVS_VPORT_TYPE_INTERNAL:
118 case OVS_VPORT_TYPE_LISP:
119 case OVS_VPORT_TYPE_STT:
120 case OVS_VPORT_TYPE_UNSPEC:
121 case __OVS_VPORT_TYPE_MAX:
122 default:
123 break;
124 }
125
126 return NULL;
127 }
128
129 static int
130 rtnl_transact(uint32_t type, uint32_t flags, const char *name,
131 struct ofpbuf **reply)
132 {
133 struct ofpbuf request;
134 int err;
135
136 ofpbuf_init(&request, 0);
137 nl_msg_put_nlmsghdr(&request, 0, type, flags);
138 ofpbuf_put_zeros(&request, sizeof(struct ifinfomsg));
139 nl_msg_put_string(&request, IFLA_IFNAME, name);
140
141 err = nl_transact(NETLINK_ROUTE, &request, reply);
142 ofpbuf_uninit(&request);
143
144 return err;
145 }
146
147 static int
148 dpif_netlink_rtnl_destroy(const char *name)
149 {
150 return rtnl_transact(RTM_DELLINK, NLM_F_REQUEST | NLM_F_ACK, name, NULL);
151 }
152
153 static int
154 dpif_netlink_rtnl_getlink(const char *name, struct ofpbuf **reply)
155 {
156 return rtnl_transact(RTM_GETLINK, NLM_F_REQUEST, name, reply);
157 }
158
159 static int
160 rtnl_policy_parse(const char *kind, struct ofpbuf *reply,
161 const struct nl_policy *policy,
162 struct nlattr *tnl_info[],
163 size_t policy_size)
164 {
165 struct nlattr *linkinfo[ARRAY_SIZE(linkinfo_policy)];
166 struct nlattr *rtlink[ARRAY_SIZE(rtlink_policy)];
167 int error = 0;
168
169 if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof(struct ifinfomsg),
170 rtlink_policy, rtlink, ARRAY_SIZE(rtlink_policy))
171 || !nl_parse_nested(rtlink[IFLA_LINKINFO], linkinfo_policy,
172 linkinfo, ARRAY_SIZE(linkinfo_policy))
173 || strcmp(nl_attr_get_string(linkinfo[IFLA_INFO_KIND]), kind)
174 || !nl_parse_nested(linkinfo[IFLA_INFO_DATA], policy,
175 tnl_info, policy_size)) {
176 error = EINVAL;
177 }
178
179 return error;
180 }
181
182 static int
183 dpif_netlink_rtnl_vxlan_verify(const struct netdev_tunnel_config *tnl_cfg,
184 const char *kind, struct ofpbuf *reply)
185 {
186 struct nlattr *vxlan[ARRAY_SIZE(vxlan_policy)];
187 int err;
188
189 err = rtnl_policy_parse(kind, reply, vxlan_policy, vxlan,
190 ARRAY_SIZE(vxlan_policy));
191 if (!err) {
192 if (0 != nl_attr_get_u8(vxlan[IFLA_VXLAN_LEARNING])
193 || 1 != nl_attr_get_u8(vxlan[IFLA_VXLAN_COLLECT_METADATA])
194 || 1 != nl_attr_get_u8(vxlan[IFLA_VXLAN_UDP_ZERO_CSUM6_RX])
195 || (tnl_cfg->dst_port
196 != nl_attr_get_be16(vxlan[IFLA_VXLAN_PORT]))
197 || (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GBP)
198 && !nl_attr_get_flag(vxlan[IFLA_VXLAN_GBP]))
199 || (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GPE)
200 && !nl_attr_get_flag(vxlan[IFLA_VXLAN_GPE]))) {
201 err = EINVAL;
202 }
203 }
204
205 return err;
206 }
207
208 static int
209 dpif_netlink_rtnl_gre_verify(const struct netdev_tunnel_config OVS_UNUSED *tnl,
210 const char *kind, struct ofpbuf *reply)
211 {
212 struct nlattr *gre[ARRAY_SIZE(gre_policy)];
213 int err;
214
215 err = rtnl_policy_parse(kind, reply, gre_policy, gre,
216 ARRAY_SIZE(gre_policy));
217 if (!err) {
218 if (!nl_attr_get_flag(gre[IFLA_GRE_COLLECT_METADATA])) {
219 err = EINVAL;
220 }
221 }
222
223 return err;
224 }
225
226 static int
227 dpif_netlink_rtnl_geneve_verify(const struct netdev_tunnel_config *tnl_cfg,
228 const char *kind, struct ofpbuf *reply)
229 {
230 struct nlattr *geneve[ARRAY_SIZE(geneve_policy)];
231 int err;
232
233 err = rtnl_policy_parse(kind, reply, geneve_policy, geneve,
234 ARRAY_SIZE(geneve_policy));
235 if (!err) {
236 if (!nl_attr_get_flag(geneve[IFLA_GENEVE_COLLECT_METADATA])
237 || 1 != nl_attr_get_u8(geneve[IFLA_GENEVE_UDP_ZERO_CSUM6_RX])
238 || (tnl_cfg->dst_port
239 != nl_attr_get_be16(geneve[IFLA_GENEVE_PORT]))) {
240 err = EINVAL;
241 }
242 }
243
244 return err;
245 }
246
247 static int
248 dpif_netlink_rtnl_verify(const struct netdev_tunnel_config *tnl_cfg,
249 enum ovs_vport_type type, const char *name)
250 {
251 struct ofpbuf *reply;
252 const char *kind;
253 int err;
254
255 kind = vport_type_to_kind(type, tnl_cfg);
256 if (!kind) {
257 return EOPNOTSUPP;
258 }
259
260 err = dpif_netlink_rtnl_getlink(name, &reply);
261 if (err) {
262 return err;
263 }
264
265 switch (type) {
266 case OVS_VPORT_TYPE_VXLAN:
267 err = dpif_netlink_rtnl_vxlan_verify(tnl_cfg, kind, reply);
268 break;
269 case OVS_VPORT_TYPE_GRE:
270 case OVS_VPORT_TYPE_ERSPAN:
271 case OVS_VPORT_TYPE_IP6ERSPAN:
272 case OVS_VPORT_TYPE_IP6GRE:
273 err = dpif_netlink_rtnl_gre_verify(tnl_cfg, kind, reply);
274 break;
275 case OVS_VPORT_TYPE_GENEVE:
276 err = dpif_netlink_rtnl_geneve_verify(tnl_cfg, kind, reply);
277 break;
278 case OVS_VPORT_TYPE_NETDEV:
279 case OVS_VPORT_TYPE_INTERNAL:
280 case OVS_VPORT_TYPE_LISP:
281 case OVS_VPORT_TYPE_STT:
282 case OVS_VPORT_TYPE_GTPU:
283 case OVS_VPORT_TYPE_UNSPEC:
284 case __OVS_VPORT_TYPE_MAX:
285 default:
286 OVS_NOT_REACHED();
287 }
288
289 ofpbuf_delete(reply);
290 return err;
291 }
292
293 static int
294 rtnl_set_mtu(const char *name, uint32_t mtu, struct ofpbuf *request)
295 {
296 ofpbuf_clear(request);
297 nl_msg_put_nlmsghdr(request, 0, RTM_SETLINK,
298 NLM_F_REQUEST | NLM_F_ACK);
299 ofpbuf_put_zeros(request, sizeof(struct ifinfomsg));
300 nl_msg_put_string(request, IFLA_IFNAME, name);
301 nl_msg_put_u32(request, IFLA_MTU, mtu);
302
303 return nl_transact(NETLINK_ROUTE, request, NULL);
304 }
305
306 static int
307 dpif_netlink_rtnl_create(const struct netdev_tunnel_config *tnl_cfg,
308 const char *name, enum ovs_vport_type type,
309 const char *kind, uint32_t flags)
310 {
311 enum {
312 /* For performance, we want to use the largest MTU that the system
313 * supports. Most existing tunnels will accept UINT16_MAX, treating it
314 * as the actual max MTU, but some do not. Thus, we use a slightly
315 * smaller value, that should always be safe yet does not noticeably
316 * reduce performance. */
317 MAX_MTU = 65000
318 };
319
320 size_t linkinfo_off, infodata_off;
321 struct ifinfomsg *ifinfo;
322 struct ofpbuf request;
323 int err;
324
325 ofpbuf_init(&request, 0);
326 nl_msg_put_nlmsghdr(&request, 0, RTM_NEWLINK, flags);
327 ifinfo = ofpbuf_put_zeros(&request, sizeof(struct ifinfomsg));
328 ifinfo->ifi_change = ifinfo->ifi_flags = IFF_UP;
329 nl_msg_put_string(&request, IFLA_IFNAME, name);
330 nl_msg_put_u32(&request, IFLA_MTU, MAX_MTU);
331 linkinfo_off = nl_msg_start_nested(&request, IFLA_LINKINFO);
332 nl_msg_put_string(&request, IFLA_INFO_KIND, kind);
333 infodata_off = nl_msg_start_nested(&request, IFLA_INFO_DATA);
334
335 /* tunnel unique info */
336 switch (type) {
337 case OVS_VPORT_TYPE_VXLAN:
338 nl_msg_put_u8(&request, IFLA_VXLAN_LEARNING, 0);
339 nl_msg_put_u8(&request, IFLA_VXLAN_COLLECT_METADATA, 1);
340 nl_msg_put_u8(&request, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, 1);
341 if (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GBP)) {
342 nl_msg_put_flag(&request, IFLA_VXLAN_GBP);
343 }
344 if (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GPE)) {
345 nl_msg_put_flag(&request, IFLA_VXLAN_GPE);
346 }
347 nl_msg_put_be16(&request, IFLA_VXLAN_PORT, tnl_cfg->dst_port);
348 break;
349 case OVS_VPORT_TYPE_GRE:
350 case OVS_VPORT_TYPE_ERSPAN:
351 case OVS_VPORT_TYPE_IP6ERSPAN:
352 case OVS_VPORT_TYPE_IP6GRE:
353 nl_msg_put_flag(&request, IFLA_GRE_COLLECT_METADATA);
354 break;
355 case OVS_VPORT_TYPE_GENEVE:
356 nl_msg_put_flag(&request, IFLA_GENEVE_COLLECT_METADATA);
357 nl_msg_put_u8(&request, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, 1);
358 nl_msg_put_be16(&request, IFLA_GENEVE_PORT, tnl_cfg->dst_port);
359 break;
360 case OVS_VPORT_TYPE_NETDEV:
361 case OVS_VPORT_TYPE_INTERNAL:
362 case OVS_VPORT_TYPE_LISP:
363 case OVS_VPORT_TYPE_STT:
364 case OVS_VPORT_TYPE_GTPU:
365 case OVS_VPORT_TYPE_UNSPEC:
366 case __OVS_VPORT_TYPE_MAX:
367 default:
368 err = EOPNOTSUPP;
369 goto exit;
370 }
371
372 nl_msg_end_nested(&request, infodata_off);
373 nl_msg_end_nested(&request, linkinfo_off);
374
375 err = nl_transact(NETLINK_ROUTE, &request, NULL);
376 if (!err && (type == OVS_VPORT_TYPE_GRE ||
377 type == OVS_VPORT_TYPE_IP6GRE)) {
378 /* Work around a bug in kernel GRE driver, which ignores IFLA_MTU in
379 * RTM_NEWLINK, by setting the MTU again. See
380 * https://bugzilla.redhat.com/show_bug.cgi?id=1488484.
381 *
382 * In case of MAX_MTU exceeds hw max MTU, retry a smaller value. */
383 int err2 = rtnl_set_mtu(name, MAX_MTU, &request);
384 if (err2) {
385 err2 = rtnl_set_mtu(name, 1450, &request);
386 }
387 if (err2) {
388 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
389
390 VLOG_WARN_RL(&rl, "setting MTU of tunnel %s failed (%s)",
391 name, ovs_strerror(err2));
392 }
393 }
394
395 exit:
396 ofpbuf_uninit(&request);
397
398 return err;
399 }
400
401 int
402 dpif_netlink_rtnl_port_create(struct netdev *netdev)
403 {
404 const struct netdev_tunnel_config *tnl_cfg;
405 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
406 enum ovs_vport_type type;
407 const char *name;
408 const char *kind;
409 uint32_t flags;
410 int err;
411
412 type = netdev_to_ovs_vport_type(netdev_get_type(netdev));
413 tnl_cfg = netdev_get_tunnel_config(netdev);
414 if (!tnl_cfg) {
415 return EOPNOTSUPP;
416 }
417
418 kind = vport_type_to_kind(type, tnl_cfg);
419 if (!kind) {
420 return EOPNOTSUPP;
421 }
422
423 name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
424 flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_EXCL;
425
426 err = dpif_netlink_rtnl_create(tnl_cfg, name, type, kind, flags);
427
428 /* If the device exists, validate and/or attempt to recreate it. */
429 if (err == EEXIST) {
430 err = dpif_netlink_rtnl_verify(tnl_cfg, type, name);
431 if (!err) {
432 return 0;
433 }
434 err = dpif_netlink_rtnl_destroy(name);
435 if (err) {
436 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
437
438 VLOG_WARN_RL(&rl, "RTNL device %s exists and cannot be "
439 "deleted: %s", name, ovs_strerror(err));
440 return err;
441 }
442 err = dpif_netlink_rtnl_create(tnl_cfg, name, type, kind, flags);
443 }
444 if (err) {
445 return err;
446 }
447
448 err = dpif_netlink_rtnl_verify(tnl_cfg, type, name);
449 if (err) {
450 int err2 = dpif_netlink_rtnl_destroy(name);
451
452 if (err2) {
453 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
454
455 VLOG_WARN_RL(&rl, "Failed to delete device %s during rtnl port "
456 "creation: %s", name, ovs_strerror(err2));
457 }
458 }
459
460 return err;
461 }
462
463 int
464 dpif_netlink_rtnl_port_destroy(const char *name, const char *type)
465 {
466 switch (netdev_to_ovs_vport_type(type)) {
467 case OVS_VPORT_TYPE_VXLAN:
468 case OVS_VPORT_TYPE_GRE:
469 case OVS_VPORT_TYPE_GENEVE:
470 case OVS_VPORT_TYPE_ERSPAN:
471 case OVS_VPORT_TYPE_IP6ERSPAN:
472 case OVS_VPORT_TYPE_IP6GRE:
473 return dpif_netlink_rtnl_destroy(name);
474 case OVS_VPORT_TYPE_NETDEV:
475 case OVS_VPORT_TYPE_INTERNAL:
476 case OVS_VPORT_TYPE_LISP:
477 case OVS_VPORT_TYPE_STT:
478 case OVS_VPORT_TYPE_GTPU:
479 case OVS_VPORT_TYPE_UNSPEC:
480 case __OVS_VPORT_TYPE_MAX:
481 default:
482 return EOPNOTSUPP;
483 }
484 return 0;
485 }
486
487 /**
488 * Probe for whether the modules are out-of-tree (openvswitch) or in-tree
489 * (upstream kernel).
490 *
491 * We probe for "ovs_geneve" via rtnetlink. As long as this returns something
492 * other than EOPNOTSUPP we know that the module in use is the out-of-tree one.
493 * This will be used to determine which netlink interface to use when creating
494 * ports; rtnetlink or compat/genetlink.
495 *
496 * See ovs_tunnels_out_of_tree
497 */
498 bool
499 dpif_netlink_rtnl_probe_oot_tunnels(void)
500 {
501 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
502 struct netdev *netdev = NULL;
503 bool out_of_tree = false;
504 const char *name;
505 int error;
506
507 error = netdev_open("ovs-system-probe", "geneve", &netdev);
508 if (!error) {
509 struct ofpbuf *reply;
510 const struct netdev_tunnel_config *tnl_cfg;
511
512 tnl_cfg = netdev_get_tunnel_config(netdev);
513 if (!tnl_cfg) {
514 return true;
515 }
516
517 name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
518
519 /* The geneve module exists when ovs-vswitchd crashes
520 * and restarts, handle the case here.
521 */
522 error = dpif_netlink_rtnl_getlink(name, &reply);
523 if (!error) {
524
525 struct nlattr *linkinfo[ARRAY_SIZE(linkinfo_policy)];
526 struct nlattr *rtlink[ARRAY_SIZE(rtlink_policy)];
527 const char *kind;
528
529 if (!nl_policy_parse(reply,
530 NLMSG_HDRLEN + sizeof(struct ifinfomsg),
531 rtlink_policy, rtlink,
532 ARRAY_SIZE(rtlink_policy))
533 || !nl_parse_nested(rtlink[IFLA_LINKINFO], linkinfo_policy,
534 linkinfo, ARRAY_SIZE(linkinfo_policy))) {
535 VLOG_ABORT("Error fetching Geneve tunnel device %s "
536 "linkinfo", name);
537 }
538
539 kind = nl_attr_get_string(linkinfo[IFLA_INFO_KIND]);
540
541 if (!strcmp(kind, "ovs_geneve")) {
542 out_of_tree = true;
543 } else if (!strcmp(kind, "geneve")) {
544 out_of_tree = false;
545 } else {
546 VLOG_ABORT("Geneve tunnel device %s with kind %s"
547 " not supported", name, kind);
548 }
549
550 ofpbuf_delete(reply);
551 netdev_close(netdev);
552
553 return out_of_tree;
554 }
555
556 error = dpif_netlink_rtnl_create(tnl_cfg, name, OVS_VPORT_TYPE_GENEVE,
557 "ovs_geneve",
558 (NLM_F_REQUEST | NLM_F_ACK
559 | NLM_F_CREATE));
560 if (error != EOPNOTSUPP) {
561 if (!error) {
562 dpif_netlink_rtnl_destroy(name);
563 }
564 out_of_tree = true;
565 }
566 netdev_close(netdev);
567 }
568
569 return out_of_tree;
570 }