2 * Copyright (c) 2017 Red Hat, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "dpif-netlink-rtnl.h"
23 #include <linux/rtnetlink.h>
25 #include "dpif-netlink.h"
26 #include "netdev-vport.h"
27 #include "netlink-socket.h"
28 #include "openvswitch/vlog.h"
30 VLOG_DEFINE_THIS_MODULE(dpif_netlink_rtnl
);
32 /* On some older systems, these enums are not defined. */
33 #ifndef IFLA_VXLAN_MAX
34 #define IFLA_VXLAN_MAX 0
36 #if IFLA_VXLAN_MAX < 27
37 #define IFLA_VXLAN_LEARNING 7
38 #define IFLA_VXLAN_PORT 15
39 #define IFLA_VXLAN_UDP_ZERO_CSUM6_RX 20
40 #define IFLA_VXLAN_GBP 23
41 #define IFLA_VXLAN_COLLECT_METADATA 25
42 #define IFLA_VXLAN_GPE 27
46 #define IFLA_GRE_MAX 0
49 #define IFLA_GRE_COLLECT_METADATA 18
52 #ifndef IFLA_GENEVE_MAX
53 #define IFLA_GENEVE_MAX 0
55 #if IFLA_GENEVE_MAX < 10
56 #define IFLA_GENEVE_PORT 5
57 #define IFLA_GENEVE_COLLECT_METADATA 6
58 #define IFLA_GENEVE_UDP_ZERO_CSUM6_RX 10
61 static const struct nl_policy rtlink_policy
[] = {
62 [IFLA_LINKINFO
] = { .type
= NL_A_NESTED
},
64 static const struct nl_policy linkinfo_policy
[] = {
65 [IFLA_INFO_KIND
] = { .type
= NL_A_STRING
},
66 [IFLA_INFO_DATA
] = { .type
= NL_A_NESTED
},
68 static const struct nl_policy vxlan_policy
[] = {
69 [IFLA_VXLAN_COLLECT_METADATA
] = { .type
= NL_A_U8
},
70 [IFLA_VXLAN_LEARNING
] = { .type
= NL_A_U8
},
71 [IFLA_VXLAN_UDP_ZERO_CSUM6_RX
] = { .type
= NL_A_U8
},
72 [IFLA_VXLAN_PORT
] = { .type
= NL_A_U16
},
73 [IFLA_VXLAN_GBP
] = { .type
= NL_A_FLAG
, .optional
= true },
74 [IFLA_VXLAN_GPE
] = { .type
= NL_A_FLAG
, .optional
= true },
76 static const struct nl_policy gre_policy
[] = {
77 [IFLA_GRE_COLLECT_METADATA
] = { .type
= NL_A_FLAG
},
79 static const struct nl_policy geneve_policy
[] = {
80 [IFLA_GENEVE_COLLECT_METADATA
] = { .type
= NL_A_FLAG
},
81 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX
] = { .type
= NL_A_U8
},
82 [IFLA_GENEVE_PORT
] = { .type
= NL_A_U16
},
86 vport_type_to_kind(enum ovs_vport_type type
,
87 const struct netdev_tunnel_config
*tnl_cfg
)
90 case OVS_VPORT_TYPE_VXLAN
:
92 case OVS_VPORT_TYPE_GRE
:
93 if (tnl_cfg
->pt_mode
== NETDEV_PT_LEGACY_L3
) {
95 } else if (tnl_cfg
->pt_mode
== NETDEV_PT_LEGACY_L2
) {
100 case OVS_VPORT_TYPE_GENEVE
:
102 case OVS_VPORT_TYPE_ERSPAN
:
104 case OVS_VPORT_TYPE_IP6ERSPAN
:
106 case OVS_VPORT_TYPE_IP6GRE
:
107 if (tnl_cfg
->pt_mode
== NETDEV_PT_LEGACY_L2
) {
109 } else if (tnl_cfg
->pt_mode
== NETDEV_PT_LEGACY_L3
) {
114 case OVS_VPORT_TYPE_GTPU
:
116 case OVS_VPORT_TYPE_NETDEV
:
117 case OVS_VPORT_TYPE_INTERNAL
:
118 case OVS_VPORT_TYPE_LISP
:
119 case OVS_VPORT_TYPE_STT
:
120 case OVS_VPORT_TYPE_UNSPEC
:
121 case __OVS_VPORT_TYPE_MAX
:
130 rtnl_transact(uint32_t type
, uint32_t flags
, const char *name
,
131 struct ofpbuf
**reply
)
133 struct ofpbuf request
;
136 ofpbuf_init(&request
, 0);
137 nl_msg_put_nlmsghdr(&request
, 0, type
, flags
);
138 ofpbuf_put_zeros(&request
, sizeof(struct ifinfomsg
));
139 nl_msg_put_string(&request
, IFLA_IFNAME
, name
);
141 err
= nl_transact(NETLINK_ROUTE
, &request
, reply
);
142 ofpbuf_uninit(&request
);
148 dpif_netlink_rtnl_destroy(const char *name
)
150 return rtnl_transact(RTM_DELLINK
, NLM_F_REQUEST
| NLM_F_ACK
, name
, NULL
);
154 dpif_netlink_rtnl_getlink(const char *name
, struct ofpbuf
**reply
)
156 return rtnl_transact(RTM_GETLINK
, NLM_F_REQUEST
, name
, reply
);
160 rtnl_policy_parse(const char *kind
, struct ofpbuf
*reply
,
161 const struct nl_policy
*policy
,
162 struct nlattr
*tnl_info
[],
165 struct nlattr
*linkinfo
[ARRAY_SIZE(linkinfo_policy
)];
166 struct nlattr
*rtlink
[ARRAY_SIZE(rtlink_policy
)];
169 if (!nl_policy_parse(reply
, NLMSG_HDRLEN
+ sizeof(struct ifinfomsg
),
170 rtlink_policy
, rtlink
, ARRAY_SIZE(rtlink_policy
))
171 || !nl_parse_nested(rtlink
[IFLA_LINKINFO
], linkinfo_policy
,
172 linkinfo
, ARRAY_SIZE(linkinfo_policy
))
173 || strcmp(nl_attr_get_string(linkinfo
[IFLA_INFO_KIND
]), kind
)
174 || !nl_parse_nested(linkinfo
[IFLA_INFO_DATA
], policy
,
175 tnl_info
, policy_size
)) {
183 dpif_netlink_rtnl_vxlan_verify(const struct netdev_tunnel_config
*tnl_cfg
,
184 const char *kind
, struct ofpbuf
*reply
)
186 struct nlattr
*vxlan
[ARRAY_SIZE(vxlan_policy
)];
189 err
= rtnl_policy_parse(kind
, reply
, vxlan_policy
, vxlan
,
190 ARRAY_SIZE(vxlan_policy
));
192 if (0 != nl_attr_get_u8(vxlan
[IFLA_VXLAN_LEARNING
])
193 || 1 != nl_attr_get_u8(vxlan
[IFLA_VXLAN_COLLECT_METADATA
])
194 || 1 != nl_attr_get_u8(vxlan
[IFLA_VXLAN_UDP_ZERO_CSUM6_RX
])
195 || (tnl_cfg
->dst_port
196 != nl_attr_get_be16(vxlan
[IFLA_VXLAN_PORT
]))
197 || (tnl_cfg
->exts
& (1 << OVS_VXLAN_EXT_GBP
)
198 && !nl_attr_get_flag(vxlan
[IFLA_VXLAN_GBP
]))
199 || (tnl_cfg
->exts
& (1 << OVS_VXLAN_EXT_GPE
)
200 && !nl_attr_get_flag(vxlan
[IFLA_VXLAN_GPE
]))) {
209 dpif_netlink_rtnl_gre_verify(const struct netdev_tunnel_config OVS_UNUSED
*tnl
,
210 const char *kind
, struct ofpbuf
*reply
)
212 struct nlattr
*gre
[ARRAY_SIZE(gre_policy
)];
215 err
= rtnl_policy_parse(kind
, reply
, gre_policy
, gre
,
216 ARRAY_SIZE(gre_policy
));
218 if (!nl_attr_get_flag(gre
[IFLA_GRE_COLLECT_METADATA
])) {
227 dpif_netlink_rtnl_geneve_verify(const struct netdev_tunnel_config
*tnl_cfg
,
228 const char *kind
, struct ofpbuf
*reply
)
230 struct nlattr
*geneve
[ARRAY_SIZE(geneve_policy
)];
233 err
= rtnl_policy_parse(kind
, reply
, geneve_policy
, geneve
,
234 ARRAY_SIZE(geneve_policy
));
236 if (!nl_attr_get_flag(geneve
[IFLA_GENEVE_COLLECT_METADATA
])
237 || 1 != nl_attr_get_u8(geneve
[IFLA_GENEVE_UDP_ZERO_CSUM6_RX
])
238 || (tnl_cfg
->dst_port
239 != nl_attr_get_be16(geneve
[IFLA_GENEVE_PORT
]))) {
248 dpif_netlink_rtnl_verify(const struct netdev_tunnel_config
*tnl_cfg
,
249 enum ovs_vport_type type
, const char *name
)
251 struct ofpbuf
*reply
;
255 kind
= vport_type_to_kind(type
, tnl_cfg
);
260 err
= dpif_netlink_rtnl_getlink(name
, &reply
);
266 case OVS_VPORT_TYPE_VXLAN
:
267 err
= dpif_netlink_rtnl_vxlan_verify(tnl_cfg
, kind
, reply
);
269 case OVS_VPORT_TYPE_GRE
:
270 case OVS_VPORT_TYPE_ERSPAN
:
271 case OVS_VPORT_TYPE_IP6ERSPAN
:
272 case OVS_VPORT_TYPE_IP6GRE
:
273 err
= dpif_netlink_rtnl_gre_verify(tnl_cfg
, kind
, reply
);
275 case OVS_VPORT_TYPE_GENEVE
:
276 err
= dpif_netlink_rtnl_geneve_verify(tnl_cfg
, kind
, reply
);
278 case OVS_VPORT_TYPE_NETDEV
:
279 case OVS_VPORT_TYPE_INTERNAL
:
280 case OVS_VPORT_TYPE_LISP
:
281 case OVS_VPORT_TYPE_STT
:
282 case OVS_VPORT_TYPE_GTPU
:
283 case OVS_VPORT_TYPE_UNSPEC
:
284 case __OVS_VPORT_TYPE_MAX
:
289 ofpbuf_delete(reply
);
294 rtnl_set_mtu(const char *name
, uint32_t mtu
, struct ofpbuf
*request
)
296 ofpbuf_clear(request
);
297 nl_msg_put_nlmsghdr(request
, 0, RTM_SETLINK
,
298 NLM_F_REQUEST
| NLM_F_ACK
);
299 ofpbuf_put_zeros(request
, sizeof(struct ifinfomsg
));
300 nl_msg_put_string(request
, IFLA_IFNAME
, name
);
301 nl_msg_put_u32(request
, IFLA_MTU
, mtu
);
303 return nl_transact(NETLINK_ROUTE
, request
, NULL
);
307 dpif_netlink_rtnl_create(const struct netdev_tunnel_config
*tnl_cfg
,
308 const char *name
, enum ovs_vport_type type
,
309 const char *kind
, uint32_t flags
)
312 /* For performance, we want to use the largest MTU that the system
313 * supports. Most existing tunnels will accept UINT16_MAX, treating it
314 * as the actual max MTU, but some do not. Thus, we use a slightly
315 * smaller value, that should always be safe yet does not noticeably
316 * reduce performance. */
320 size_t linkinfo_off
, infodata_off
;
321 struct ifinfomsg
*ifinfo
;
322 struct ofpbuf request
;
325 ofpbuf_init(&request
, 0);
326 nl_msg_put_nlmsghdr(&request
, 0, RTM_NEWLINK
, flags
);
327 ifinfo
= ofpbuf_put_zeros(&request
, sizeof(struct ifinfomsg
));
328 ifinfo
->ifi_change
= ifinfo
->ifi_flags
= IFF_UP
;
329 nl_msg_put_string(&request
, IFLA_IFNAME
, name
);
330 nl_msg_put_u32(&request
, IFLA_MTU
, MAX_MTU
);
331 linkinfo_off
= nl_msg_start_nested(&request
, IFLA_LINKINFO
);
332 nl_msg_put_string(&request
, IFLA_INFO_KIND
, kind
);
333 infodata_off
= nl_msg_start_nested(&request
, IFLA_INFO_DATA
);
335 /* tunnel unique info */
337 case OVS_VPORT_TYPE_VXLAN
:
338 nl_msg_put_u8(&request
, IFLA_VXLAN_LEARNING
, 0);
339 nl_msg_put_u8(&request
, IFLA_VXLAN_COLLECT_METADATA
, 1);
340 nl_msg_put_u8(&request
, IFLA_VXLAN_UDP_ZERO_CSUM6_RX
, 1);
341 if (tnl_cfg
->exts
& (1 << OVS_VXLAN_EXT_GBP
)) {
342 nl_msg_put_flag(&request
, IFLA_VXLAN_GBP
);
344 if (tnl_cfg
->exts
& (1 << OVS_VXLAN_EXT_GPE
)) {
345 nl_msg_put_flag(&request
, IFLA_VXLAN_GPE
);
347 nl_msg_put_be16(&request
, IFLA_VXLAN_PORT
, tnl_cfg
->dst_port
);
349 case OVS_VPORT_TYPE_GRE
:
350 case OVS_VPORT_TYPE_ERSPAN
:
351 case OVS_VPORT_TYPE_IP6ERSPAN
:
352 case OVS_VPORT_TYPE_IP6GRE
:
353 nl_msg_put_flag(&request
, IFLA_GRE_COLLECT_METADATA
);
355 case OVS_VPORT_TYPE_GENEVE
:
356 nl_msg_put_flag(&request
, IFLA_GENEVE_COLLECT_METADATA
);
357 nl_msg_put_u8(&request
, IFLA_GENEVE_UDP_ZERO_CSUM6_RX
, 1);
358 nl_msg_put_be16(&request
, IFLA_GENEVE_PORT
, tnl_cfg
->dst_port
);
360 case OVS_VPORT_TYPE_NETDEV
:
361 case OVS_VPORT_TYPE_INTERNAL
:
362 case OVS_VPORT_TYPE_LISP
:
363 case OVS_VPORT_TYPE_STT
:
364 case OVS_VPORT_TYPE_GTPU
:
365 case OVS_VPORT_TYPE_UNSPEC
:
366 case __OVS_VPORT_TYPE_MAX
:
372 nl_msg_end_nested(&request
, infodata_off
);
373 nl_msg_end_nested(&request
, linkinfo_off
);
375 err
= nl_transact(NETLINK_ROUTE
, &request
, NULL
);
376 if (!err
&& (type
== OVS_VPORT_TYPE_GRE
||
377 type
== OVS_VPORT_TYPE_IP6GRE
)) {
378 /* Work around a bug in kernel GRE driver, which ignores IFLA_MTU in
379 * RTM_NEWLINK, by setting the MTU again. See
380 * https://bugzilla.redhat.com/show_bug.cgi?id=1488484.
382 * In case of MAX_MTU exceeds hw max MTU, retry a smaller value. */
383 int err2
= rtnl_set_mtu(name
, MAX_MTU
, &request
);
385 err2
= rtnl_set_mtu(name
, 1450, &request
);
388 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
390 VLOG_WARN_RL(&rl
, "setting MTU of tunnel %s failed (%s)",
391 name
, ovs_strerror(err2
));
396 ofpbuf_uninit(&request
);
402 dpif_netlink_rtnl_port_create(struct netdev
*netdev
)
404 const struct netdev_tunnel_config
*tnl_cfg
;
405 char namebuf
[NETDEV_VPORT_NAME_BUFSIZE
];
406 enum ovs_vport_type type
;
412 type
= netdev_to_ovs_vport_type(netdev_get_type(netdev
));
413 tnl_cfg
= netdev_get_tunnel_config(netdev
);
418 kind
= vport_type_to_kind(type
, tnl_cfg
);
423 name
= netdev_vport_get_dpif_port(netdev
, namebuf
, sizeof namebuf
);
424 flags
= NLM_F_REQUEST
| NLM_F_ACK
| NLM_F_CREATE
| NLM_F_EXCL
;
426 err
= dpif_netlink_rtnl_create(tnl_cfg
, name
, type
, kind
, flags
);
428 /* If the device exists, validate and/or attempt to recreate it. */
430 err
= dpif_netlink_rtnl_verify(tnl_cfg
, type
, name
);
434 err
= dpif_netlink_rtnl_destroy(name
);
436 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
438 VLOG_WARN_RL(&rl
, "RTNL device %s exists and cannot be "
439 "deleted: %s", name
, ovs_strerror(err
));
442 err
= dpif_netlink_rtnl_create(tnl_cfg
, name
, type
, kind
, flags
);
448 err
= dpif_netlink_rtnl_verify(tnl_cfg
, type
, name
);
450 int err2
= dpif_netlink_rtnl_destroy(name
);
453 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
455 VLOG_WARN_RL(&rl
, "Failed to delete device %s during rtnl port "
456 "creation: %s", name
, ovs_strerror(err2
));
464 dpif_netlink_rtnl_port_destroy(const char *name
, const char *type
)
466 switch (netdev_to_ovs_vport_type(type
)) {
467 case OVS_VPORT_TYPE_VXLAN
:
468 case OVS_VPORT_TYPE_GRE
:
469 case OVS_VPORT_TYPE_GENEVE
:
470 case OVS_VPORT_TYPE_ERSPAN
:
471 case OVS_VPORT_TYPE_IP6ERSPAN
:
472 case OVS_VPORT_TYPE_IP6GRE
:
473 return dpif_netlink_rtnl_destroy(name
);
474 case OVS_VPORT_TYPE_NETDEV
:
475 case OVS_VPORT_TYPE_INTERNAL
:
476 case OVS_VPORT_TYPE_LISP
:
477 case OVS_VPORT_TYPE_STT
:
478 case OVS_VPORT_TYPE_GTPU
:
479 case OVS_VPORT_TYPE_UNSPEC
:
480 case __OVS_VPORT_TYPE_MAX
:
488 * Probe for whether the modules are out-of-tree (openvswitch) or in-tree
491 * We probe for "ovs_geneve" via rtnetlink. As long as this returns something
492 * other than EOPNOTSUPP we know that the module in use is the out-of-tree one.
493 * This will be used to determine which netlink interface to use when creating
494 * ports; rtnetlink or compat/genetlink.
496 * See ovs_tunnels_out_of_tree
499 dpif_netlink_rtnl_probe_oot_tunnels(void)
501 char namebuf
[NETDEV_VPORT_NAME_BUFSIZE
];
502 struct netdev
*netdev
= NULL
;
503 bool out_of_tree
= false;
507 error
= netdev_open("ovs-system-probe", "geneve", &netdev
);
509 struct ofpbuf
*reply
;
510 const struct netdev_tunnel_config
*tnl_cfg
;
512 tnl_cfg
= netdev_get_tunnel_config(netdev
);
517 name
= netdev_vport_get_dpif_port(netdev
, namebuf
, sizeof namebuf
);
519 /* The geneve module exists when ovs-vswitchd crashes
520 * and restarts, handle the case here.
522 error
= dpif_netlink_rtnl_getlink(name
, &reply
);
525 struct nlattr
*linkinfo
[ARRAY_SIZE(linkinfo_policy
)];
526 struct nlattr
*rtlink
[ARRAY_SIZE(rtlink_policy
)];
529 if (!nl_policy_parse(reply
,
530 NLMSG_HDRLEN
+ sizeof(struct ifinfomsg
),
531 rtlink_policy
, rtlink
,
532 ARRAY_SIZE(rtlink_policy
))
533 || !nl_parse_nested(rtlink
[IFLA_LINKINFO
], linkinfo_policy
,
534 linkinfo
, ARRAY_SIZE(linkinfo_policy
))) {
535 VLOG_ABORT("Error fetching Geneve tunnel device %s "
539 kind
= nl_attr_get_string(linkinfo
[IFLA_INFO_KIND
]);
541 if (!strcmp(kind
, "ovs_geneve")) {
543 } else if (!strcmp(kind
, "geneve")) {
546 VLOG_ABORT("Geneve tunnel device %s with kind %s"
547 " not supported", name
, kind
);
550 ofpbuf_delete(reply
);
551 netdev_close(netdev
);
556 error
= dpif_netlink_rtnl_create(tnl_cfg
, name
, OVS_VPORT_TYPE_GENEVE
,
558 (NLM_F_REQUEST
| NLM_F_ACK
560 if (error
!= EOPNOTSUPP
) {
562 dpif_netlink_rtnl_destroy(name
);
566 netdev_close(netdev
);