2 * Copyright (c) 2017 Red Hat, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "dpif-netlink-rtnl.h"
23 #include <linux/rtnetlink.h>
25 #include "dpif-netlink.h"
26 #include "netdev-vport.h"
27 #include "netlink-socket.h"
28 #include "openvswitch/vlog.h"
30 VLOG_DEFINE_THIS_MODULE(dpif_netlink_rtnl
);
32 /* On some older systems, these enums are not defined. */
33 #ifndef IFLA_VXLAN_MAX
34 #define IFLA_VXLAN_MAX 0
36 #if IFLA_VXLAN_MAX < 27
37 #define IFLA_VXLAN_LEARNING 7
38 #define IFLA_VXLAN_PORT 15
39 #define IFLA_VXLAN_UDP_ZERO_CSUM6_RX 20
40 #define IFLA_VXLAN_GBP 23
41 #define IFLA_VXLAN_COLLECT_METADATA 25
42 #define IFLA_VXLAN_GPE 27
46 #define IFLA_GRE_MAX 0
49 #define IFLA_GRE_ERSPAN_HWID 24
52 #ifndef IFLA_GENEVE_MAX
53 #define IFLA_GENEVE_MAX 0
55 #if IFLA_GENEVE_MAX < 10
56 #define IFLA_GENEVE_PORT 5
57 #define IFLA_GENEVE_COLLECT_METADATA 6
58 #define IFLA_GENEVE_UDP_ZERO_CSUM6_RX 10
61 static const struct nl_policy rtlink_policy
[] = {
62 [IFLA_LINKINFO
] = { .type
= NL_A_NESTED
},
64 static const struct nl_policy linkinfo_policy
[] = {
65 [IFLA_INFO_KIND
] = { .type
= NL_A_STRING
},
66 [IFLA_INFO_DATA
] = { .type
= NL_A_NESTED
},
68 static const struct nl_policy vxlan_policy
[] = {
69 [IFLA_VXLAN_COLLECT_METADATA
] = { .type
= NL_A_U8
},
70 [IFLA_VXLAN_LEARNING
] = { .type
= NL_A_U8
},
71 [IFLA_VXLAN_UDP_ZERO_CSUM6_RX
] = { .type
= NL_A_U8
},
72 [IFLA_VXLAN_PORT
] = { .type
= NL_A_U16
},
73 [IFLA_VXLAN_GBP
] = { .type
= NL_A_FLAG
, .optional
= true },
74 [IFLA_VXLAN_GPE
] = { .type
= NL_A_FLAG
, .optional
= true },
76 static const struct nl_policy gre_policy
[] = {
77 [IFLA_GRE_ERSPAN_HWID
] = { .type
= NL_A_U16
},
79 static const struct nl_policy geneve_policy
[] = {
80 [IFLA_GENEVE_COLLECT_METADATA
] = { .type
= NL_A_FLAG
},
81 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX
] = { .type
= NL_A_U8
},
82 [IFLA_GENEVE_PORT
] = { .type
= NL_A_U16
},
86 vport_type_to_kind(enum ovs_vport_type type
,
87 const struct netdev_tunnel_config
*tnl_cfg
)
90 case OVS_VPORT_TYPE_VXLAN
:
92 case OVS_VPORT_TYPE_GRE
:
93 if (tnl_cfg
->pt_mode
== NETDEV_PT_LEGACY_L3
) {
95 } else if (tnl_cfg
->pt_mode
== NETDEV_PT_LEGACY_L2
) {
100 case OVS_VPORT_TYPE_GENEVE
:
102 case OVS_VPORT_TYPE_ERSPAN
:
104 case OVS_VPORT_TYPE_IP6ERSPAN
:
106 case OVS_VPORT_TYPE_IP6GRE
:
108 case OVS_VPORT_TYPE_NETDEV
:
109 case OVS_VPORT_TYPE_INTERNAL
:
110 case OVS_VPORT_TYPE_LISP
:
111 case OVS_VPORT_TYPE_STT
:
112 case OVS_VPORT_TYPE_UNSPEC
:
113 case __OVS_VPORT_TYPE_MAX
:
122 rtnl_transact(uint32_t type
, uint32_t flags
, const char *name
,
123 struct ofpbuf
**reply
)
125 struct ofpbuf request
;
128 ofpbuf_init(&request
, 0);
129 nl_msg_put_nlmsghdr(&request
, 0, type
, flags
);
130 ofpbuf_put_zeros(&request
, sizeof(struct ifinfomsg
));
131 nl_msg_put_string(&request
, IFLA_IFNAME
, name
);
133 err
= nl_transact(NETLINK_ROUTE
, &request
, reply
);
134 ofpbuf_uninit(&request
);
140 dpif_netlink_rtnl_destroy(const char *name
)
142 return rtnl_transact(RTM_DELLINK
, NLM_F_REQUEST
| NLM_F_ACK
, name
, NULL
);
146 dpif_netlink_rtnl_getlink(const char *name
, struct ofpbuf
**reply
)
148 return rtnl_transact(RTM_GETLINK
, NLM_F_REQUEST
, name
, reply
);
152 rtnl_policy_parse(const char *kind
, struct ofpbuf
*reply
,
153 const struct nl_policy
*policy
,
154 struct nlattr
*tnl_info
[],
157 struct nlattr
*linkinfo
[ARRAY_SIZE(linkinfo_policy
)];
158 struct nlattr
*rtlink
[ARRAY_SIZE(rtlink_policy
)];
161 if (!nl_policy_parse(reply
, NLMSG_HDRLEN
+ sizeof(struct ifinfomsg
),
162 rtlink_policy
, rtlink
, ARRAY_SIZE(rtlink_policy
))
163 || !nl_parse_nested(rtlink
[IFLA_LINKINFO
], linkinfo_policy
,
164 linkinfo
, ARRAY_SIZE(linkinfo_policy
))
165 || strcmp(nl_attr_get_string(linkinfo
[IFLA_INFO_KIND
]), kind
)
166 || !nl_parse_nested(linkinfo
[IFLA_INFO_DATA
], policy
,
167 tnl_info
, policy_size
)) {
175 dpif_netlink_rtnl_vxlan_verify(const struct netdev_tunnel_config
*tnl_cfg
,
176 const char *kind
, struct ofpbuf
*reply
)
178 struct nlattr
*vxlan
[ARRAY_SIZE(vxlan_policy
)];
181 err
= rtnl_policy_parse(kind
, reply
, vxlan_policy
, vxlan
,
182 ARRAY_SIZE(vxlan_policy
));
184 if (0 != nl_attr_get_u8(vxlan
[IFLA_VXLAN_LEARNING
])
185 || 1 != nl_attr_get_u8(vxlan
[IFLA_VXLAN_COLLECT_METADATA
])
186 || 1 != nl_attr_get_u8(vxlan
[IFLA_VXLAN_UDP_ZERO_CSUM6_RX
])
187 || (tnl_cfg
->dst_port
188 != nl_attr_get_be16(vxlan
[IFLA_VXLAN_PORT
]))
189 || (tnl_cfg
->exts
& (1 << OVS_VXLAN_EXT_GBP
)
190 && !nl_attr_get_flag(vxlan
[IFLA_VXLAN_GBP
]))
191 || (tnl_cfg
->exts
& (1 << OVS_VXLAN_EXT_GPE
)
192 && !nl_attr_get_flag(vxlan
[IFLA_VXLAN_GPE
]))) {
201 dpif_netlink_rtnl_gre_verify(const struct netdev_tunnel_config OVS_UNUSED
*tnl
,
202 const char *kind
, struct ofpbuf
*reply
)
204 struct nlattr
*gre
[ARRAY_SIZE(gre_policy
)];
207 err
= rtnl_policy_parse(kind
, reply
, gre_policy
, gre
,
208 ARRAY_SIZE(gre_policy
));
210 if (!nl_attr_get_u16(gre
[IFLA_GRE_ERSPAN_HWID
])) {
219 dpif_netlink_rtnl_geneve_verify(const struct netdev_tunnel_config
*tnl_cfg
,
220 const char *kind
, struct ofpbuf
*reply
)
222 struct nlattr
*geneve
[ARRAY_SIZE(geneve_policy
)];
225 err
= rtnl_policy_parse(kind
, reply
, geneve_policy
, geneve
,
226 ARRAY_SIZE(geneve_policy
));
228 if (!nl_attr_get_flag(geneve
[IFLA_GENEVE_COLLECT_METADATA
])
229 || 1 != nl_attr_get_u8(geneve
[IFLA_GENEVE_UDP_ZERO_CSUM6_RX
])
230 || (tnl_cfg
->dst_port
231 != nl_attr_get_be16(geneve
[IFLA_GENEVE_PORT
]))) {
240 dpif_netlink_rtnl_verify(const struct netdev_tunnel_config
*tnl_cfg
,
241 enum ovs_vport_type type
, const char *name
)
243 struct ofpbuf
*reply
;
247 kind
= vport_type_to_kind(type
, tnl_cfg
);
252 err
= dpif_netlink_rtnl_getlink(name
, &reply
);
258 case OVS_VPORT_TYPE_VXLAN
:
259 err
= dpif_netlink_rtnl_vxlan_verify(tnl_cfg
, kind
, reply
);
261 case OVS_VPORT_TYPE_GRE
:
262 case OVS_VPORT_TYPE_ERSPAN
:
263 case OVS_VPORT_TYPE_IP6ERSPAN
:
264 case OVS_VPORT_TYPE_IP6GRE
:
265 err
= dpif_netlink_rtnl_gre_verify(tnl_cfg
, kind
, reply
);
267 case OVS_VPORT_TYPE_GENEVE
:
268 err
= dpif_netlink_rtnl_geneve_verify(tnl_cfg
, kind
, reply
);
270 case OVS_VPORT_TYPE_NETDEV
:
271 case OVS_VPORT_TYPE_INTERNAL
:
272 case OVS_VPORT_TYPE_LISP
:
273 case OVS_VPORT_TYPE_STT
:
274 case OVS_VPORT_TYPE_UNSPEC
:
275 case __OVS_VPORT_TYPE_MAX
:
280 ofpbuf_delete(reply
);
285 dpif_netlink_rtnl_create(const struct netdev_tunnel_config
*tnl_cfg
,
286 const char *name
, enum ovs_vport_type type
,
287 const char *kind
, uint32_t flags
)
290 /* For performance, we want to use the largest MTU that the system
291 * supports. Most existing tunnels will accept UINT16_MAX, treating it
292 * as the actual max MTU, but some do not. Thus, we use a slightly
293 * smaller value, that should always be safe yet does not noticeably
294 * reduce performance. */
298 size_t linkinfo_off
, infodata_off
;
299 struct ifinfomsg
*ifinfo
;
300 struct ofpbuf request
;
303 ofpbuf_init(&request
, 0);
304 nl_msg_put_nlmsghdr(&request
, 0, RTM_NEWLINK
, flags
);
305 ifinfo
= ofpbuf_put_zeros(&request
, sizeof(struct ifinfomsg
));
306 ifinfo
->ifi_change
= ifinfo
->ifi_flags
= IFF_UP
;
307 nl_msg_put_string(&request
, IFLA_IFNAME
, name
);
308 nl_msg_put_u32(&request
, IFLA_MTU
, MAX_MTU
);
309 linkinfo_off
= nl_msg_start_nested(&request
, IFLA_LINKINFO
);
310 nl_msg_put_string(&request
, IFLA_INFO_KIND
, kind
);
311 infodata_off
= nl_msg_start_nested(&request
, IFLA_INFO_DATA
);
313 /* tunnel unique info */
315 case OVS_VPORT_TYPE_VXLAN
:
316 nl_msg_put_u8(&request
, IFLA_VXLAN_LEARNING
, 0);
317 nl_msg_put_u8(&request
, IFLA_VXLAN_COLLECT_METADATA
, 1);
318 nl_msg_put_u8(&request
, IFLA_VXLAN_UDP_ZERO_CSUM6_RX
, 1);
319 if (tnl_cfg
->exts
& (1 << OVS_VXLAN_EXT_GBP
)) {
320 nl_msg_put_flag(&request
, IFLA_VXLAN_GBP
);
322 if (tnl_cfg
->exts
& (1 << OVS_VXLAN_EXT_GPE
)) {
323 nl_msg_put_flag(&request
, IFLA_VXLAN_GPE
);
325 nl_msg_put_be16(&request
, IFLA_VXLAN_PORT
, tnl_cfg
->dst_port
);
327 case OVS_VPORT_TYPE_GRE
:
328 case OVS_VPORT_TYPE_ERSPAN
:
329 case OVS_VPORT_TYPE_IP6ERSPAN
:
330 case OVS_VPORT_TYPE_IP6GRE
:
331 nl_msg_put_u16(&request
, IFLA_GRE_ERSPAN_HWID
, 0xdead);
333 case OVS_VPORT_TYPE_GENEVE
:
334 nl_msg_put_flag(&request
, IFLA_GENEVE_COLLECT_METADATA
);
335 nl_msg_put_u8(&request
, IFLA_GENEVE_UDP_ZERO_CSUM6_RX
, 1);
336 nl_msg_put_be16(&request
, IFLA_GENEVE_PORT
, tnl_cfg
->dst_port
);
338 case OVS_VPORT_TYPE_NETDEV
:
339 case OVS_VPORT_TYPE_INTERNAL
:
340 case OVS_VPORT_TYPE_LISP
:
341 case OVS_VPORT_TYPE_STT
:
342 case OVS_VPORT_TYPE_UNSPEC
:
343 case __OVS_VPORT_TYPE_MAX
:
349 nl_msg_end_nested(&request
, infodata_off
);
350 nl_msg_end_nested(&request
, linkinfo_off
);
352 err
= nl_transact(NETLINK_ROUTE
, &request
, NULL
);
353 if (!err
&& (type
== OVS_VPORT_TYPE_GRE
||
354 type
== OVS_VPORT_TYPE_IP6GRE
)) {
355 /* Work around a bug in kernel GRE driver, which ignores IFLA_MTU in
356 * RTM_NEWLINK, by setting the MTU again. See
357 * https://bugzilla.redhat.com/show_bug.cgi?id=1488484. */
358 ofpbuf_clear(&request
);
359 nl_msg_put_nlmsghdr(&request
, 0, RTM_SETLINK
,
360 NLM_F_REQUEST
| NLM_F_ACK
);
361 ofpbuf_put_zeros(&request
, sizeof(struct ifinfomsg
));
362 nl_msg_put_string(&request
, IFLA_IFNAME
, name
);
363 nl_msg_put_u32(&request
, IFLA_MTU
, MAX_MTU
);
365 int err2
= nl_transact(NETLINK_ROUTE
, &request
, NULL
);
367 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
369 VLOG_WARN_RL(&rl
, "setting MTU of tunnel %s failed (%s)",
370 name
, ovs_strerror(err2
));
375 ofpbuf_uninit(&request
);
381 dpif_netlink_rtnl_port_create(struct netdev
*netdev
)
383 const struct netdev_tunnel_config
*tnl_cfg
;
384 char namebuf
[NETDEV_VPORT_NAME_BUFSIZE
];
385 enum ovs_vport_type type
;
391 type
= netdev_to_ovs_vport_type(netdev_get_type(netdev
));
392 tnl_cfg
= netdev_get_tunnel_config(netdev
);
397 kind
= vport_type_to_kind(type
, tnl_cfg
);
402 name
= netdev_vport_get_dpif_port(netdev
, namebuf
, sizeof namebuf
);
403 flags
= NLM_F_REQUEST
| NLM_F_ACK
| NLM_F_CREATE
| NLM_F_EXCL
;
405 err
= dpif_netlink_rtnl_create(tnl_cfg
, name
, type
, kind
, flags
);
407 /* If the device exists, validate and/or attempt to recreate it. */
409 err
= dpif_netlink_rtnl_verify(tnl_cfg
, type
, name
);
413 err
= dpif_netlink_rtnl_destroy(name
);
415 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
417 VLOG_WARN_RL(&rl
, "RTNL device %s exists and cannot be "
418 "deleted: %s", name
, ovs_strerror(err
));
421 err
= dpif_netlink_rtnl_create(tnl_cfg
, name
, type
, kind
, flags
);
427 err
= dpif_netlink_rtnl_verify(tnl_cfg
, type
, name
);
429 int err2
= dpif_netlink_rtnl_destroy(name
);
432 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
434 VLOG_WARN_RL(&rl
, "Failed to delete device %s during rtnl port "
435 "creation: %s", name
, ovs_strerror(err2
));
443 dpif_netlink_rtnl_port_destroy(const char *name
, const char *type
)
445 switch (netdev_to_ovs_vport_type(type
)) {
446 case OVS_VPORT_TYPE_VXLAN
:
447 case OVS_VPORT_TYPE_GRE
:
448 case OVS_VPORT_TYPE_GENEVE
:
449 case OVS_VPORT_TYPE_ERSPAN
:
450 case OVS_VPORT_TYPE_IP6ERSPAN
:
451 case OVS_VPORT_TYPE_IP6GRE
:
452 return dpif_netlink_rtnl_destroy(name
);
453 case OVS_VPORT_TYPE_NETDEV
:
454 case OVS_VPORT_TYPE_INTERNAL
:
455 case OVS_VPORT_TYPE_LISP
:
456 case OVS_VPORT_TYPE_STT
:
457 case OVS_VPORT_TYPE_UNSPEC
:
458 case __OVS_VPORT_TYPE_MAX
:
466 * Probe for whether the modules are out-of-tree (openvswitch) or in-tree
469 * We probe for "ovs_geneve" via rtnetlink. As long as this returns something
470 * other than EOPNOTSUPP we know that the module in use is the out-of-tree one.
471 * This will be used to determine which netlink interface to use when creating
472 * ports; rtnetlink or compat/genetlink.
474 * See ovs_tunnels_out_of_tree
477 dpif_netlink_rtnl_probe_oot_tunnels(void)
479 char namebuf
[NETDEV_VPORT_NAME_BUFSIZE
];
480 struct netdev
*netdev
= NULL
;
481 bool out_of_tree
= false;
485 error
= netdev_open("ovs-system-probe", "geneve", &netdev
);
487 struct ofpbuf
*reply
;
488 const struct netdev_tunnel_config
*tnl_cfg
;
490 tnl_cfg
= netdev_get_tunnel_config(netdev
);
495 name
= netdev_vport_get_dpif_port(netdev
, namebuf
, sizeof namebuf
);
497 /* The geneve module exists when ovs-vswitchd crashes
498 * and restarts, handle the case here.
500 error
= dpif_netlink_rtnl_getlink(name
, &reply
);
503 struct nlattr
*linkinfo
[ARRAY_SIZE(linkinfo_policy
)];
504 struct nlattr
*rtlink
[ARRAY_SIZE(rtlink_policy
)];
507 if (!nl_policy_parse(reply
,
508 NLMSG_HDRLEN
+ sizeof(struct ifinfomsg
),
509 rtlink_policy
, rtlink
,
510 ARRAY_SIZE(rtlink_policy
))
511 || !nl_parse_nested(rtlink
[IFLA_LINKINFO
], linkinfo_policy
,
512 linkinfo
, ARRAY_SIZE(linkinfo_policy
))) {
513 VLOG_ABORT("Error fetching Geneve tunnel device %s "
517 kind
= nl_attr_get_string(linkinfo
[IFLA_INFO_KIND
]);
519 if (!strcmp(kind
, "ovs_geneve")) {
521 } else if (!strcmp(kind
, "geneve")) {
524 VLOG_ABORT("Geneve tunnel device %s with kind %s"
525 " not supported", name
, kind
);
528 ofpbuf_delete(reply
);
529 netdev_close(netdev
);
534 error
= dpif_netlink_rtnl_create(tnl_cfg
, name
, OVS_VPORT_TYPE_GENEVE
,
536 (NLM_F_REQUEST
| NLM_F_ACK
538 if (error
!= EOPNOTSUPP
) {
540 dpif_netlink_rtnl_destroy(name
);
544 netdev_close(netdev
);