]> git.proxmox.com Git - ovs.git/blob - lib/dpif-netlink-rtnl.c
dpif: Ensure ERSPAN GRE support
[ovs.git] / lib / dpif-netlink-rtnl.c
1 /*
2 * Copyright (c) 2017 Red Hat, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include "dpif-netlink-rtnl.h"
20
21 #include <net/if.h>
22 #include <linux/ip.h>
23 #include <linux/rtnetlink.h>
24
25 #include "dpif-netlink.h"
26 #include "netdev-vport.h"
27 #include "netlink-socket.h"
28 #include "openvswitch/vlog.h"
29
30 VLOG_DEFINE_THIS_MODULE(dpif_netlink_rtnl);
31
32 /* On some older systems, these enums are not defined. */
33 #ifndef IFLA_VXLAN_MAX
34 #define IFLA_VXLAN_MAX 0
35 #endif
36 #if IFLA_VXLAN_MAX < 27
37 #define IFLA_VXLAN_LEARNING 7
38 #define IFLA_VXLAN_PORT 15
39 #define IFLA_VXLAN_UDP_ZERO_CSUM6_RX 20
40 #define IFLA_VXLAN_GBP 23
41 #define IFLA_VXLAN_COLLECT_METADATA 25
42 #define IFLA_VXLAN_GPE 27
43 #endif
44
45 #ifndef IFLA_GRE_MAX
46 #define IFLA_GRE_MAX 0
47 #endif
48 #if IFLA_GRE_MAX < 24
49 #define IFLA_GRE_ERSPAN_HWID 24
50 #endif
51
52 #ifndef IFLA_GENEVE_MAX
53 #define IFLA_GENEVE_MAX 0
54 #endif
55 #if IFLA_GENEVE_MAX < 10
56 #define IFLA_GENEVE_PORT 5
57 #define IFLA_GENEVE_COLLECT_METADATA 6
58 #define IFLA_GENEVE_UDP_ZERO_CSUM6_RX 10
59 #endif
60
61 static const struct nl_policy rtlink_policy[] = {
62 [IFLA_LINKINFO] = { .type = NL_A_NESTED },
63 };
64 static const struct nl_policy linkinfo_policy[] = {
65 [IFLA_INFO_KIND] = { .type = NL_A_STRING },
66 [IFLA_INFO_DATA] = { .type = NL_A_NESTED },
67 };
68 static const struct nl_policy vxlan_policy[] = {
69 [IFLA_VXLAN_COLLECT_METADATA] = { .type = NL_A_U8 },
70 [IFLA_VXLAN_LEARNING] = { .type = NL_A_U8 },
71 [IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = { .type = NL_A_U8 },
72 [IFLA_VXLAN_PORT] = { .type = NL_A_U16 },
73 [IFLA_VXLAN_GBP] = { .type = NL_A_FLAG, .optional = true },
74 [IFLA_VXLAN_GPE] = { .type = NL_A_FLAG, .optional = true },
75 };
76 static const struct nl_policy gre_policy[] = {
77 [IFLA_GRE_ERSPAN_HWID] = { .type = NL_A_U16 },
78 };
79 static const struct nl_policy geneve_policy[] = {
80 [IFLA_GENEVE_COLLECT_METADATA] = { .type = NL_A_FLAG },
81 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NL_A_U8 },
82 [IFLA_GENEVE_PORT] = { .type = NL_A_U16 },
83 };
84
85 static const char *
86 vport_type_to_kind(enum ovs_vport_type type,
87 const struct netdev_tunnel_config *tnl_cfg)
88 {
89 switch (type) {
90 case OVS_VPORT_TYPE_VXLAN:
91 return "vxlan";
92 case OVS_VPORT_TYPE_GRE:
93 if (tnl_cfg->pt_mode == NETDEV_PT_LEGACY_L3) {
94 return "gre";
95 } else if (tnl_cfg->pt_mode == NETDEV_PT_LEGACY_L2) {
96 return "gretap";
97 } else {
98 return NULL;
99 }
100 case OVS_VPORT_TYPE_GENEVE:
101 return "geneve";
102 case OVS_VPORT_TYPE_ERSPAN:
103 return "erspan";
104 case OVS_VPORT_TYPE_IP6ERSPAN:
105 return "ip6erspan";
106 case OVS_VPORT_TYPE_IP6GRE:
107 return "ip6gre";
108 case OVS_VPORT_TYPE_NETDEV:
109 case OVS_VPORT_TYPE_INTERNAL:
110 case OVS_VPORT_TYPE_LISP:
111 case OVS_VPORT_TYPE_STT:
112 case OVS_VPORT_TYPE_UNSPEC:
113 case __OVS_VPORT_TYPE_MAX:
114 default:
115 break;
116 }
117
118 return NULL;
119 }
120
121 static int
122 rtnl_transact(uint32_t type, uint32_t flags, const char *name,
123 struct ofpbuf **reply)
124 {
125 struct ofpbuf request;
126 int err;
127
128 ofpbuf_init(&request, 0);
129 nl_msg_put_nlmsghdr(&request, 0, type, flags);
130 ofpbuf_put_zeros(&request, sizeof(struct ifinfomsg));
131 nl_msg_put_string(&request, IFLA_IFNAME, name);
132
133 err = nl_transact(NETLINK_ROUTE, &request, reply);
134 ofpbuf_uninit(&request);
135
136 return err;
137 }
138
139 static int
140 dpif_netlink_rtnl_destroy(const char *name)
141 {
142 return rtnl_transact(RTM_DELLINK, NLM_F_REQUEST | NLM_F_ACK, name, NULL);
143 }
144
145 static int
146 dpif_netlink_rtnl_getlink(const char *name, struct ofpbuf **reply)
147 {
148 return rtnl_transact(RTM_GETLINK, NLM_F_REQUEST, name, reply);
149 }
150
151 static int
152 rtnl_policy_parse(const char *kind, struct ofpbuf *reply,
153 const struct nl_policy *policy,
154 struct nlattr *tnl_info[],
155 size_t policy_size)
156 {
157 struct nlattr *linkinfo[ARRAY_SIZE(linkinfo_policy)];
158 struct nlattr *rtlink[ARRAY_SIZE(rtlink_policy)];
159 int error = 0;
160
161 if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof(struct ifinfomsg),
162 rtlink_policy, rtlink, ARRAY_SIZE(rtlink_policy))
163 || !nl_parse_nested(rtlink[IFLA_LINKINFO], linkinfo_policy,
164 linkinfo, ARRAY_SIZE(linkinfo_policy))
165 || strcmp(nl_attr_get_string(linkinfo[IFLA_INFO_KIND]), kind)
166 || !nl_parse_nested(linkinfo[IFLA_INFO_DATA], policy,
167 tnl_info, policy_size)) {
168 error = EINVAL;
169 }
170
171 return error;
172 }
173
174 static int
175 dpif_netlink_rtnl_vxlan_verify(const struct netdev_tunnel_config *tnl_cfg,
176 const char *kind, struct ofpbuf *reply)
177 {
178 struct nlattr *vxlan[ARRAY_SIZE(vxlan_policy)];
179 int err;
180
181 err = rtnl_policy_parse(kind, reply, vxlan_policy, vxlan,
182 ARRAY_SIZE(vxlan_policy));
183 if (!err) {
184 if (0 != nl_attr_get_u8(vxlan[IFLA_VXLAN_LEARNING])
185 || 1 != nl_attr_get_u8(vxlan[IFLA_VXLAN_COLLECT_METADATA])
186 || 1 != nl_attr_get_u8(vxlan[IFLA_VXLAN_UDP_ZERO_CSUM6_RX])
187 || (tnl_cfg->dst_port
188 != nl_attr_get_be16(vxlan[IFLA_VXLAN_PORT]))
189 || (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GBP)
190 && !nl_attr_get_flag(vxlan[IFLA_VXLAN_GBP]))
191 || (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GPE)
192 && !nl_attr_get_flag(vxlan[IFLA_VXLAN_GPE]))) {
193 err = EINVAL;
194 }
195 }
196
197 return err;
198 }
199
200 static int
201 dpif_netlink_rtnl_gre_verify(const struct netdev_tunnel_config OVS_UNUSED *tnl,
202 const char *kind, struct ofpbuf *reply)
203 {
204 struct nlattr *gre[ARRAY_SIZE(gre_policy)];
205 int err;
206
207 err = rtnl_policy_parse(kind, reply, gre_policy, gre,
208 ARRAY_SIZE(gre_policy));
209 if (!err) {
210 if (!nl_attr_get_u16(gre[IFLA_GRE_ERSPAN_HWID])) {
211 err = EINVAL;
212 }
213 }
214
215 return err;
216 }
217
218 static int
219 dpif_netlink_rtnl_geneve_verify(const struct netdev_tunnel_config *tnl_cfg,
220 const char *kind, struct ofpbuf *reply)
221 {
222 struct nlattr *geneve[ARRAY_SIZE(geneve_policy)];
223 int err;
224
225 err = rtnl_policy_parse(kind, reply, geneve_policy, geneve,
226 ARRAY_SIZE(geneve_policy));
227 if (!err) {
228 if (!nl_attr_get_flag(geneve[IFLA_GENEVE_COLLECT_METADATA])
229 || 1 != nl_attr_get_u8(geneve[IFLA_GENEVE_UDP_ZERO_CSUM6_RX])
230 || (tnl_cfg->dst_port
231 != nl_attr_get_be16(geneve[IFLA_GENEVE_PORT]))) {
232 err = EINVAL;
233 }
234 }
235
236 return err;
237 }
238
239 static int
240 dpif_netlink_rtnl_verify(const struct netdev_tunnel_config *tnl_cfg,
241 enum ovs_vport_type type, const char *name)
242 {
243 struct ofpbuf *reply;
244 const char *kind;
245 int err;
246
247 kind = vport_type_to_kind(type, tnl_cfg);
248 if (!kind) {
249 return EOPNOTSUPP;
250 }
251
252 err = dpif_netlink_rtnl_getlink(name, &reply);
253 if (err) {
254 return err;
255 }
256
257 switch (type) {
258 case OVS_VPORT_TYPE_VXLAN:
259 err = dpif_netlink_rtnl_vxlan_verify(tnl_cfg, kind, reply);
260 break;
261 case OVS_VPORT_TYPE_GRE:
262 case OVS_VPORT_TYPE_ERSPAN:
263 case OVS_VPORT_TYPE_IP6ERSPAN:
264 case OVS_VPORT_TYPE_IP6GRE:
265 err = dpif_netlink_rtnl_gre_verify(tnl_cfg, kind, reply);
266 break;
267 case OVS_VPORT_TYPE_GENEVE:
268 err = dpif_netlink_rtnl_geneve_verify(tnl_cfg, kind, reply);
269 break;
270 case OVS_VPORT_TYPE_NETDEV:
271 case OVS_VPORT_TYPE_INTERNAL:
272 case OVS_VPORT_TYPE_LISP:
273 case OVS_VPORT_TYPE_STT:
274 case OVS_VPORT_TYPE_UNSPEC:
275 case __OVS_VPORT_TYPE_MAX:
276 default:
277 OVS_NOT_REACHED();
278 }
279
280 ofpbuf_delete(reply);
281 return err;
282 }
283
284 static int
285 dpif_netlink_rtnl_create(const struct netdev_tunnel_config *tnl_cfg,
286 const char *name, enum ovs_vport_type type,
287 const char *kind, uint32_t flags)
288 {
289 enum {
290 /* For performance, we want to use the largest MTU that the system
291 * supports. Most existing tunnels will accept UINT16_MAX, treating it
292 * as the actual max MTU, but some do not. Thus, we use a slightly
293 * smaller value, that should always be safe yet does not noticeably
294 * reduce performance. */
295 MAX_MTU = 65000
296 };
297
298 size_t linkinfo_off, infodata_off;
299 struct ifinfomsg *ifinfo;
300 struct ofpbuf request;
301 int err;
302
303 ofpbuf_init(&request, 0);
304 nl_msg_put_nlmsghdr(&request, 0, RTM_NEWLINK, flags);
305 ifinfo = ofpbuf_put_zeros(&request, sizeof(struct ifinfomsg));
306 ifinfo->ifi_change = ifinfo->ifi_flags = IFF_UP;
307 nl_msg_put_string(&request, IFLA_IFNAME, name);
308 nl_msg_put_u32(&request, IFLA_MTU, MAX_MTU);
309 linkinfo_off = nl_msg_start_nested(&request, IFLA_LINKINFO);
310 nl_msg_put_string(&request, IFLA_INFO_KIND, kind);
311 infodata_off = nl_msg_start_nested(&request, IFLA_INFO_DATA);
312
313 /* tunnel unique info */
314 switch (type) {
315 case OVS_VPORT_TYPE_VXLAN:
316 nl_msg_put_u8(&request, IFLA_VXLAN_LEARNING, 0);
317 nl_msg_put_u8(&request, IFLA_VXLAN_COLLECT_METADATA, 1);
318 nl_msg_put_u8(&request, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, 1);
319 if (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GBP)) {
320 nl_msg_put_flag(&request, IFLA_VXLAN_GBP);
321 }
322 if (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GPE)) {
323 nl_msg_put_flag(&request, IFLA_VXLAN_GPE);
324 }
325 nl_msg_put_be16(&request, IFLA_VXLAN_PORT, tnl_cfg->dst_port);
326 break;
327 case OVS_VPORT_TYPE_GRE:
328 case OVS_VPORT_TYPE_ERSPAN:
329 case OVS_VPORT_TYPE_IP6ERSPAN:
330 case OVS_VPORT_TYPE_IP6GRE:
331 nl_msg_put_u16(&request, IFLA_GRE_ERSPAN_HWID, 0xdead);
332 break;
333 case OVS_VPORT_TYPE_GENEVE:
334 nl_msg_put_flag(&request, IFLA_GENEVE_COLLECT_METADATA);
335 nl_msg_put_u8(&request, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, 1);
336 nl_msg_put_be16(&request, IFLA_GENEVE_PORT, tnl_cfg->dst_port);
337 break;
338 case OVS_VPORT_TYPE_NETDEV:
339 case OVS_VPORT_TYPE_INTERNAL:
340 case OVS_VPORT_TYPE_LISP:
341 case OVS_VPORT_TYPE_STT:
342 case OVS_VPORT_TYPE_UNSPEC:
343 case __OVS_VPORT_TYPE_MAX:
344 default:
345 err = EOPNOTSUPP;
346 goto exit;
347 }
348
349 nl_msg_end_nested(&request, infodata_off);
350 nl_msg_end_nested(&request, linkinfo_off);
351
352 err = nl_transact(NETLINK_ROUTE, &request, NULL);
353 if (!err && (type == OVS_VPORT_TYPE_GRE ||
354 type == OVS_VPORT_TYPE_IP6GRE)) {
355 /* Work around a bug in kernel GRE driver, which ignores IFLA_MTU in
356 * RTM_NEWLINK, by setting the MTU again. See
357 * https://bugzilla.redhat.com/show_bug.cgi?id=1488484. */
358 ofpbuf_clear(&request);
359 nl_msg_put_nlmsghdr(&request, 0, RTM_SETLINK,
360 NLM_F_REQUEST | NLM_F_ACK);
361 ofpbuf_put_zeros(&request, sizeof(struct ifinfomsg));
362 nl_msg_put_string(&request, IFLA_IFNAME, name);
363 nl_msg_put_u32(&request, IFLA_MTU, MAX_MTU);
364
365 int err2 = nl_transact(NETLINK_ROUTE, &request, NULL);
366 if (err2) {
367 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
368
369 VLOG_WARN_RL(&rl, "setting MTU of tunnel %s failed (%s)",
370 name, ovs_strerror(err2));
371 }
372 }
373
374 exit:
375 ofpbuf_uninit(&request);
376
377 return err;
378 }
379
380 int
381 dpif_netlink_rtnl_port_create(struct netdev *netdev)
382 {
383 const struct netdev_tunnel_config *tnl_cfg;
384 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
385 enum ovs_vport_type type;
386 const char *name;
387 const char *kind;
388 uint32_t flags;
389 int err;
390
391 type = netdev_to_ovs_vport_type(netdev_get_type(netdev));
392 tnl_cfg = netdev_get_tunnel_config(netdev);
393 if (!tnl_cfg) {
394 return EOPNOTSUPP;
395 }
396
397 kind = vport_type_to_kind(type, tnl_cfg);
398 if (!kind) {
399 return EOPNOTSUPP;
400 }
401
402 name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
403 flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_EXCL;
404
405 err = dpif_netlink_rtnl_create(tnl_cfg, name, type, kind, flags);
406
407 /* If the device exists, validate and/or attempt to recreate it. */
408 if (err == EEXIST) {
409 err = dpif_netlink_rtnl_verify(tnl_cfg, type, name);
410 if (!err) {
411 return 0;
412 }
413 err = dpif_netlink_rtnl_destroy(name);
414 if (err) {
415 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
416
417 VLOG_WARN_RL(&rl, "RTNL device %s exists and cannot be "
418 "deleted: %s", name, ovs_strerror(err));
419 return err;
420 }
421 err = dpif_netlink_rtnl_create(tnl_cfg, name, type, kind, flags);
422 }
423 if (err) {
424 return err;
425 }
426
427 err = dpif_netlink_rtnl_verify(tnl_cfg, type, name);
428 if (err) {
429 int err2 = dpif_netlink_rtnl_destroy(name);
430
431 if (err2) {
432 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
433
434 VLOG_WARN_RL(&rl, "Failed to delete device %s during rtnl port "
435 "creation: %s", name, ovs_strerror(err2));
436 }
437 }
438
439 return err;
440 }
441
442 int
443 dpif_netlink_rtnl_port_destroy(const char *name, const char *type)
444 {
445 switch (netdev_to_ovs_vport_type(type)) {
446 case OVS_VPORT_TYPE_VXLAN:
447 case OVS_VPORT_TYPE_GRE:
448 case OVS_VPORT_TYPE_GENEVE:
449 case OVS_VPORT_TYPE_ERSPAN:
450 case OVS_VPORT_TYPE_IP6ERSPAN:
451 case OVS_VPORT_TYPE_IP6GRE:
452 return dpif_netlink_rtnl_destroy(name);
453 case OVS_VPORT_TYPE_NETDEV:
454 case OVS_VPORT_TYPE_INTERNAL:
455 case OVS_VPORT_TYPE_LISP:
456 case OVS_VPORT_TYPE_STT:
457 case OVS_VPORT_TYPE_UNSPEC:
458 case __OVS_VPORT_TYPE_MAX:
459 default:
460 return EOPNOTSUPP;
461 }
462 return 0;
463 }
464
465 /**
466 * Probe for whether the modules are out-of-tree (openvswitch) or in-tree
467 * (upstream kernel).
468 *
469 * We probe for "ovs_geneve" via rtnetlink. As long as this returns something
470 * other than EOPNOTSUPP we know that the module in use is the out-of-tree one.
471 * This will be used to determine which netlink interface to use when creating
472 * ports; rtnetlink or compat/genetlink.
473 *
474 * See ovs_tunnels_out_of_tree
475 */
476 bool
477 dpif_netlink_rtnl_probe_oot_tunnels(void)
478 {
479 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
480 struct netdev *netdev = NULL;
481 bool out_of_tree = false;
482 const char *name;
483 int error;
484
485 error = netdev_open("ovs-system-probe", "geneve", &netdev);
486 if (!error) {
487 struct ofpbuf *reply;
488 const struct netdev_tunnel_config *tnl_cfg;
489
490 tnl_cfg = netdev_get_tunnel_config(netdev);
491 if (!tnl_cfg) {
492 return true;
493 }
494
495 name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
496
497 /* The geneve module exists when ovs-vswitchd crashes
498 * and restarts, handle the case here.
499 */
500 error = dpif_netlink_rtnl_getlink(name, &reply);
501 if (!error) {
502
503 struct nlattr *linkinfo[ARRAY_SIZE(linkinfo_policy)];
504 struct nlattr *rtlink[ARRAY_SIZE(rtlink_policy)];
505 const char *kind;
506
507 if (!nl_policy_parse(reply,
508 NLMSG_HDRLEN + sizeof(struct ifinfomsg),
509 rtlink_policy, rtlink,
510 ARRAY_SIZE(rtlink_policy))
511 || !nl_parse_nested(rtlink[IFLA_LINKINFO], linkinfo_policy,
512 linkinfo, ARRAY_SIZE(linkinfo_policy))) {
513 VLOG_ABORT("Error fetching Geneve tunnel device %s "
514 "linkinfo", name);
515 }
516
517 kind = nl_attr_get_string(linkinfo[IFLA_INFO_KIND]);
518
519 if (!strcmp(kind, "ovs_geneve")) {
520 out_of_tree = true;
521 } else if (!strcmp(kind, "geneve")) {
522 out_of_tree = false;
523 } else {
524 VLOG_ABORT("Geneve tunnel device %s with kind %s"
525 " not supported", name, kind);
526 }
527
528 ofpbuf_delete(reply);
529 netdev_close(netdev);
530
531 return out_of_tree;
532 }
533
534 error = dpif_netlink_rtnl_create(tnl_cfg, name, OVS_VPORT_TYPE_GENEVE,
535 "ovs_geneve",
536 (NLM_F_REQUEST | NLM_F_ACK
537 | NLM_F_CREATE));
538 if (error != EOPNOTSUPP) {
539 if (!error) {
540 dpif_netlink_rtnl_destroy(name);
541 }
542 out_of_tree = true;
543 }
544 netdev_close(netdev);
545 }
546
547 return out_of_tree;
548 }