]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* Copyright (c) 2008, 2009 Nicira Networks |
064af421 | 2 | * |
a14bc59f BP |
3 | * Licensed under the Apache License, Version 2.0 (the "License"); |
4 | * you may not use this file except in compliance with the License. | |
5 | * You may obtain a copy of the License at: | |
064af421 | 6 | * |
a14bc59f | 7 | * http://www.apache.org/licenses/LICENSE-2.0 |
064af421 | 8 | * |
a14bc59f BP |
9 | * Unless required by applicable law or agreed to in writing, software |
10 | * distributed under the License is distributed on an "AS IS" BASIS, | |
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 | * See the License for the specific language governing permissions and | |
13 | * limitations under the License. | |
064af421 BP |
14 | */ |
15 | ||
16 | #include <config.h> | |
17 | ||
18 | #include <assert.h> | |
19 | #include <errno.h> | |
20 | #include <getopt.h> | |
21 | #include <inttypes.h> | |
22 | #include <limits.h> | |
23 | #include <net/if.h> | |
24 | #include <linux/genetlink.h> | |
25 | #include <linux/rtnetlink.h> | |
26 | #include <signal.h> | |
27 | #include <stdlib.h> | |
28 | #include <string.h> | |
29 | #include <sys/types.h> | |
30 | #include <sys/stat.h> | |
31 | #include <fcntl.h> | |
32 | #include <unistd.h> | |
33 | ||
34 | #include "cfg.h" | |
35 | #include "command-line.h" | |
36 | #include "coverage.h" | |
37 | #include "daemon.h" | |
38 | #include "dirs.h" | |
39 | #include "dpif.h" | |
40 | #include "fatal-signal.h" | |
41 | #include "fault.h" | |
42 | #include "leak-checker.h" | |
43 | #include "netdev.h" | |
44 | #include "netlink.h" | |
45 | #include "ofpbuf.h" | |
46 | #include "openvswitch/brcompat-netlink.h" | |
47 | #include "poll-loop.h" | |
48 | #include "process.h" | |
49 | #include "signals.h" | |
50 | #include "svec.h" | |
51 | #include "timeval.h" | |
52 | #include "unixctl.h" | |
53 | #include "util.h" | |
54 | ||
55 | #include "vlog.h" | |
56 | #define THIS_MODULE VLM_brcompatd | |
57 | ||
58 | ||
59 | /* xxx Just hangs if datapath is rmmod/insmod. Learn to reconnect? */ | |
60 | ||
61 | /* Actions to modify bridge compatibility configuration. */ | |
62 | enum bmc_action { | |
63 | BMC_ADD_DP, | |
64 | BMC_DEL_DP, | |
65 | BMC_ADD_PORT, | |
66 | BMC_DEL_PORT | |
67 | }; | |
68 | ||
69 | static void parse_options(int argc, char *argv[]); | |
70 | static void usage(void) NO_RETURN; | |
71 | ||
72 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 60); | |
73 | ||
74 | /* Maximum number of milliseconds to wait for the config file to be | |
75 | * unlocked. If set to zero, no waiting will occur. */ | |
76 | static int lock_timeout = 500; | |
77 | ||
78 | /* Maximum number of milliseconds to wait before pruning port entries that | |
79 | * no longer exist. If set to zero, ports are never pruned. */ | |
80 | static int prune_timeout = 5000; | |
81 | ||
82 | /* Config file shared with ovs-vswitchd (usually ovs-vswitchd.conf). */ | |
83 | static char *config_file; | |
84 | ||
85 | /* Command to run (via system()) to reload the ovs-vswitchd configuration | |
86 | * file. */ | |
87 | static char *reload_command; | |
88 | ||
89 | /* Netlink socket to listen for interface changes. */ | |
90 | static struct nl_sock *rtnl_sock; | |
91 | ||
92 | /* Netlink socket to bridge compatibility kernel module. */ | |
93 | static struct nl_sock *brc_sock; | |
94 | ||
95 | /* The Generic Netlink family number used for bridge compatibility. */ | |
96 | static int brc_family; | |
97 | ||
98 | static const struct nl_policy brc_multicast_policy[] = { | |
99 | [BRC_GENL_A_MC_GROUP] = {.type = NL_A_U32 } | |
100 | }; | |
101 | ||
102 | static const struct nl_policy rtnlgrp_link_policy[] = { | |
103 | [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false }, | |
104 | [IFLA_MASTER] = { .type = NL_A_U32, .optional = true }, | |
105 | }; | |
106 | ||
107 | static int | |
108 | lookup_brc_multicast_group(int *multicast_group) | |
109 | { | |
110 | struct nl_sock *sock; | |
111 | struct ofpbuf request, *reply; | |
112 | struct nlattr *attrs[ARRAY_SIZE(brc_multicast_policy)]; | |
113 | int retval; | |
114 | ||
115 | retval = nl_sock_create(NETLINK_GENERIC, 0, 0, 0, &sock); | |
116 | if (retval) { | |
117 | return retval; | |
118 | } | |
119 | ofpbuf_init(&request, 0); | |
120 | nl_msg_put_genlmsghdr(&request, sock, 0, brc_family, | |
121 | NLM_F_REQUEST, BRC_GENL_C_QUERY_MC, 1); | |
122 | retval = nl_sock_transact(sock, &request, &reply); | |
123 | ofpbuf_uninit(&request); | |
124 | if (retval) { | |
125 | nl_sock_destroy(sock); | |
126 | return retval; | |
127 | } | |
128 | if (!nl_policy_parse(reply, NLMSG_HDRLEN + GENL_HDRLEN, | |
129 | brc_multicast_policy, attrs, | |
130 | ARRAY_SIZE(brc_multicast_policy))) { | |
131 | nl_sock_destroy(sock); | |
132 | ofpbuf_delete(reply); | |
133 | return EPROTO; | |
134 | } | |
135 | *multicast_group = nl_attr_get_u32(attrs[BRC_GENL_A_MC_GROUP]); | |
136 | nl_sock_destroy(sock); | |
137 | ofpbuf_delete(reply); | |
138 | ||
139 | return 0; | |
140 | } | |
141 | ||
142 | /* Opens a socket for brcompat notifications. Returns 0 if successful, | |
143 | * otherwise a positive errno value. */ | |
144 | static int | |
145 | brc_open(struct nl_sock **sock) | |
146 | { | |
147 | int multicast_group = 0; | |
148 | int retval; | |
149 | ||
150 | retval = nl_lookup_genl_family(BRC_GENL_FAMILY_NAME, &brc_family); | |
151 | if (retval) { | |
152 | return retval; | |
153 | } | |
154 | ||
155 | retval = lookup_brc_multicast_group(&multicast_group); | |
156 | if (retval) { | |
157 | return retval; | |
158 | } | |
159 | ||
160 | retval = nl_sock_create(NETLINK_GENERIC, multicast_group, 0, 0, sock); | |
161 | if (retval) { | |
162 | return retval; | |
163 | } | |
164 | ||
165 | return 0; | |
166 | } | |
167 | ||
168 | static const struct nl_policy brc_dp_policy[] = { | |
169 | [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING }, | |
170 | }; | |
171 | ||
172 | static bool | |
173 | bridge_exists(const char *name) | |
174 | { | |
175 | return cfg_has_section("bridge.%s", name); | |
176 | } | |
177 | ||
178 | static int | |
179 | rewrite_and_reload_config(void) | |
180 | { | |
181 | if (cfg_is_dirty()) { | |
182 | int error1 = cfg_write(); | |
183 | int error2 = cfg_read(); | |
184 | long long int reload_start = time_msec(); | |
185 | int error3 = system(reload_command); | |
186 | long long int elapsed = time_msec() - reload_start; | |
187 | COVERAGE_INC(brcompatd_reload); | |
188 | if (elapsed > 0) { | |
189 | VLOG_INFO("reload command executed in %lld ms", elapsed); | |
190 | } | |
191 | if (error3 == -1) { | |
192 | VLOG_ERR("failed to execute reload command: %s", strerror(errno)); | |
193 | } else if (error3 != 0) { | |
194 | char *msg = process_status_msg(error3); | |
195 | VLOG_ERR("reload command exited with error (%s)", msg); | |
196 | free(msg); | |
197 | } | |
198 | return error1 ? error1 : error2 ? error2 : error3 ? ECHILD : 0; | |
199 | } | |
200 | return 0; | |
201 | } | |
202 | ||
203 | /* Go through the configuration file and remove any ports that no longer | |
204 | * exist associated with a bridge. */ | |
205 | static void | |
206 | prune_ports(void) | |
207 | { | |
208 | int i, j; | |
209 | int error; | |
210 | struct svec bridges, delete; | |
211 | ||
212 | if (cfg_lock(NULL, 0)) { | |
213 | /* Couldn't lock config file. */ | |
214 | return; | |
215 | } | |
216 | ||
217 | svec_init(&bridges); | |
218 | svec_init(&delete); | |
219 | cfg_get_subsections(&bridges, "bridge"); | |
220 | for (i=0; i<bridges.n; i++) { | |
221 | const char *br_name = bridges.names[i]; | |
222 | struct svec ports, ifaces; | |
223 | ||
224 | svec_init(&ports); | |
225 | ||
226 | /* Get all the interfaces for the given bridge, breaking bonded | |
227 | * interfaces down into their constituent parts. */ | |
228 | svec_init(&ifaces); | |
229 | cfg_get_all_keys(&ports, "bridge.%s.port", br_name); | |
230 | for (j=0; j<ports.n; j++) { | |
231 | const char *port_name = ports.names[j]; | |
232 | if (cfg_has_section("bonding.%s", port_name)) { | |
233 | struct svec slaves; | |
234 | svec_init(&slaves); | |
235 | cfg_get_all_keys(&slaves, "bonding.%s.slave", port_name); | |
236 | svec_append(&ifaces, &slaves); | |
237 | svec_destroy(&slaves); | |
238 | } else { | |
239 | svec_add(&ifaces, port_name); | |
240 | } | |
241 | } | |
242 | svec_destroy(&ports); | |
243 | ||
244 | /* Check that the interfaces exist. */ | |
245 | for (j = 0; j < ifaces.n; j++) { | |
246 | const char *iface_name = ifaces.names[j]; | |
247 | enum netdev_flags flags; | |
248 | ||
249 | /* The local port and internal ports are created and destroyed by | |
250 | * ovs-vswitchd itself, so don't bother checking for them at all. | |
251 | * In practice, they might not exist if ovs-vswitchd hasn't | |
252 | * finished reloading since the configuration file was updated. */ | |
253 | if (!strcmp(iface_name, br_name) | |
254 | || cfg_get_bool(0, "iface.%s.internal", iface_name)) { | |
255 | continue; | |
256 | } | |
257 | ||
258 | error = netdev_nodev_get_flags(iface_name, &flags); | |
259 | if (error == ENODEV) { | |
f06b2aa9 BP |
260 | VLOG_INFO_RL(&rl, "removing dead interface %s from %s", |
261 | iface_name, br_name); | |
064af421 BP |
262 | svec_add(&delete, iface_name); |
263 | } else if (error) { | |
f06b2aa9 BP |
264 | VLOG_INFO_RL(&rl, "unknown error %d on interface %s from %s", |
265 | error, iface_name, br_name); | |
064af421 BP |
266 | } |
267 | } | |
268 | svec_destroy(&ifaces); | |
269 | } | |
270 | svec_destroy(&bridges); | |
271 | ||
272 | if (delete.n) { | |
273 | size_t i; | |
274 | ||
275 | for (i = 0; i < delete.n; i++) { | |
276 | cfg_del_match("bridge.*.port=%s", delete.names[i]); | |
277 | cfg_del_match("bonding.*.slave=%s", delete.names[i]); | |
278 | } | |
279 | rewrite_and_reload_config(); | |
280 | cfg_unlock(); | |
281 | } else { | |
282 | cfg_unlock(); | |
283 | } | |
284 | svec_destroy(&delete); | |
285 | } | |
286 | ||
287 | ||
288 | /* Checks whether a network device named 'name' exists and returns true if so, | |
289 | * false otherwise. | |
290 | * | |
291 | * XXX it is possible that this doesn't entirely accomplish what we want in | |
292 | * context, since ovs-vswitchd.conf may cause vswitchd to create or destroy | |
293 | * network devices based on iface.*.internal settings. | |
294 | * | |
38553381 BP |
295 | * XXX may want to move this to lib/netdev. |
296 | * | |
297 | * XXX why not just use netdev_nodev_get_flags() or similar function? */ | |
064af421 BP |
298 | static bool |
299 | netdev_exists(const char *name) | |
300 | { | |
301 | struct stat s; | |
302 | char *filename; | |
303 | int error; | |
304 | ||
305 | filename = xasprintf("/sys/class/net/%s", name); | |
306 | error = stat(filename, &s); | |
307 | free(filename); | |
308 | return !error; | |
309 | } | |
310 | ||
311 | static int | |
312 | add_bridge(const char *br_name) | |
313 | { | |
314 | if (bridge_exists(br_name)) { | |
315 | VLOG_WARN("addbr %s: bridge %s exists", br_name, br_name); | |
316 | return EEXIST; | |
317 | } else if (netdev_exists(br_name)) { | |
318 | if (cfg_get_bool(0, "iface.%s.fake-bridge", br_name)) { | |
319 | VLOG_WARN("addbr %s: %s exists as a fake bridge", | |
320 | br_name, br_name); | |
321 | return 0; | |
322 | } else { | |
323 | VLOG_WARN("addbr %s: cannot create bridge %s because a network " | |
324 | "device named %s already exists", | |
325 | br_name, br_name, br_name); | |
326 | return EEXIST; | |
327 | } | |
328 | } | |
329 | ||
330 | cfg_add_entry("bridge.%s.port=%s", br_name, br_name); | |
331 | VLOG_INFO("addbr %s: success", br_name); | |
332 | ||
333 | return 0; | |
334 | } | |
335 | ||
336 | static int | |
337 | del_bridge(const char *br_name) | |
338 | { | |
339 | if (!bridge_exists(br_name)) { | |
340 | VLOG_WARN("delbr %s: no bridge named %s", br_name, br_name); | |
341 | return ENXIO; | |
342 | } | |
343 | ||
344 | cfg_del_section("bridge.%s", br_name); | |
345 | VLOG_INFO("delbr %s: success", br_name); | |
346 | ||
347 | return 0; | |
348 | } | |
349 | ||
350 | static int | |
351 | parse_command(struct ofpbuf *buffer, uint32_t *seq, const char **br_name, | |
352 | const char **port_name) | |
353 | { | |
354 | static const struct nl_policy policy[] = { | |
355 | [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING }, | |
356 | [BRC_GENL_A_PORT_NAME] = { .type = NL_A_STRING, .optional = true }, | |
357 | }; | |
358 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
359 | ||
360 | if (!nl_policy_parse(buffer, NLMSG_HDRLEN + GENL_HDRLEN, policy, | |
361 | attrs, ARRAY_SIZE(policy)) | |
362 | || (port_name && !attrs[BRC_GENL_A_PORT_NAME])) { | |
363 | return EINVAL; | |
364 | } | |
365 | ||
366 | *seq = ((struct nlmsghdr *) buffer->data)->nlmsg_seq; | |
367 | *br_name = nl_attr_get_string(attrs[BRC_GENL_A_DP_NAME]); | |
368 | if (port_name) { | |
369 | *port_name = nl_attr_get_string(attrs[BRC_GENL_A_PORT_NAME]); | |
370 | } | |
371 | return 0; | |
372 | } | |
373 | ||
374 | static void | |
375 | send_reply(uint32_t seq, int error) | |
376 | { | |
377 | struct ofpbuf msg; | |
378 | int retval; | |
379 | ||
380 | /* Compose reply. */ | |
381 | ofpbuf_init(&msg, 0); | |
382 | nl_msg_put_genlmsghdr(&msg, brc_sock, 32, brc_family, NLM_F_REQUEST, | |
383 | BRC_GENL_C_DP_RESULT, 1); | |
384 | ((struct nlmsghdr *) msg.data)->nlmsg_seq = seq; | |
385 | nl_msg_put_u32(&msg, BRC_GENL_A_ERR_CODE, error); | |
386 | ||
387 | /* Send reply. */ | |
388 | retval = nl_sock_send(brc_sock, &msg, false); | |
389 | if (retval) { | |
390 | VLOG_WARN_RL(&rl, "replying to brcompat request: %s", | |
391 | strerror(retval)); | |
392 | } | |
393 | ofpbuf_uninit(&msg); | |
394 | } | |
395 | ||
396 | static int | |
397 | handle_bridge_cmd(struct ofpbuf *buffer, bool add) | |
398 | { | |
399 | const char *br_name; | |
400 | uint32_t seq; | |
401 | int error; | |
402 | ||
403 | error = parse_command(buffer, &seq, &br_name, NULL); | |
404 | if (!error) { | |
405 | error = add ? add_bridge(br_name) : del_bridge(br_name); | |
406 | if (!error) { | |
407 | error = rewrite_and_reload_config(); | |
408 | } | |
409 | send_reply(seq, error); | |
410 | } | |
411 | return error; | |
412 | } | |
413 | ||
414 | static const struct nl_policy brc_port_policy[] = { | |
415 | [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING }, | |
416 | [BRC_GENL_A_PORT_NAME] = { .type = NL_A_STRING }, | |
417 | }; | |
418 | ||
419 | static void | |
420 | del_port(const char *br_name, const char *port_name) | |
421 | { | |
422 | cfg_del_entry("bridge.%s.port=%s", br_name, port_name); | |
423 | cfg_del_match("bonding.*.slave=%s", port_name); | |
424 | cfg_del_match("vlan.%s.*", port_name); | |
425 | } | |
426 | ||
427 | static int | |
428 | handle_port_cmd(struct ofpbuf *buffer, bool add) | |
429 | { | |
430 | const char *cmd_name = add ? "add-if" : "del-if"; | |
431 | const char *br_name, *port_name; | |
432 | uint32_t seq; | |
433 | int error; | |
434 | ||
435 | error = parse_command(buffer, &seq, &br_name, &port_name); | |
436 | if (!error) { | |
437 | if (!bridge_exists(br_name)) { | |
438 | VLOG_WARN("%s %s %s: no bridge named %s", | |
439 | cmd_name, br_name, port_name, br_name); | |
440 | error = EINVAL; | |
441 | } else if (!netdev_exists(port_name)) { | |
442 | VLOG_WARN("%s %s %s: no network device named %s", | |
443 | cmd_name, br_name, port_name, port_name); | |
444 | error = EINVAL; | |
445 | } else { | |
446 | if (add) { | |
447 | cfg_add_entry("bridge.%s.port=%s", br_name, port_name); | |
448 | } else { | |
449 | del_port(br_name, port_name); | |
450 | } | |
451 | VLOG_INFO("%s %s %s: success", cmd_name, br_name, port_name); | |
452 | error = rewrite_and_reload_config(); | |
453 | } | |
454 | send_reply(seq, error); | |
455 | } | |
456 | ||
457 | return error; | |
458 | } | |
459 | ||
460 | static int | |
461 | brc_recv_update(void) | |
462 | { | |
463 | int retval; | |
464 | struct ofpbuf *buffer; | |
465 | struct genlmsghdr *genlmsghdr; | |
466 | ||
467 | ||
468 | buffer = NULL; | |
469 | do { | |
470 | ofpbuf_delete(buffer); | |
471 | retval = nl_sock_recv(brc_sock, &buffer, false); | |
472 | } while (retval == ENOBUFS | |
473 | || (!retval | |
474 | && (nl_msg_nlmsgerr(buffer, NULL) | |
475 | || nl_msg_nlmsghdr(buffer)->nlmsg_type == NLMSG_DONE))); | |
476 | if (retval) { | |
477 | if (retval != EAGAIN) { | |
478 | VLOG_WARN_RL(&rl, "brc_recv_update: %s", strerror(retval)); | |
479 | } | |
480 | return retval; | |
481 | } | |
482 | ||
483 | genlmsghdr = nl_msg_genlmsghdr(buffer); | |
484 | if (!genlmsghdr) { | |
485 | VLOG_WARN_RL(&rl, "received packet too short for generic NetLink"); | |
486 | goto error; | |
487 | } | |
488 | ||
489 | if (nl_msg_nlmsghdr(buffer)->nlmsg_type != brc_family) { | |
490 | VLOG_DBG_RL(&rl, "received type (%"PRIu16") != brcompat family (%d)", | |
491 | nl_msg_nlmsghdr(buffer)->nlmsg_type, brc_family); | |
492 | goto error; | |
493 | } | |
494 | ||
495 | if (cfg_lock(NULL, lock_timeout)) { | |
496 | /* Couldn't lock config file. */ | |
497 | retval = EAGAIN; | |
498 | goto error; | |
499 | } | |
500 | ||
501 | switch (genlmsghdr->cmd) { | |
502 | case BRC_GENL_C_DP_ADD: | |
503 | retval = handle_bridge_cmd(buffer, true); | |
504 | break; | |
505 | ||
506 | case BRC_GENL_C_DP_DEL: | |
507 | retval = handle_bridge_cmd(buffer, false); | |
508 | break; | |
509 | ||
510 | case BRC_GENL_C_PORT_ADD: | |
511 | retval = handle_port_cmd(buffer, true); | |
512 | break; | |
513 | ||
514 | case BRC_GENL_C_PORT_DEL: | |
515 | retval = handle_port_cmd(buffer, false); | |
516 | break; | |
517 | ||
518 | default: | |
519 | retval = EPROTO; | |
520 | } | |
521 | ||
522 | cfg_unlock(); | |
523 | ||
524 | error: | |
525 | ofpbuf_delete(buffer); | |
526 | return retval; | |
527 | } | |
528 | ||
529 | /* Check for interface configuration changes announced through RTNL. */ | |
530 | static void | |
531 | rtnl_recv_update(void) | |
532 | { | |
533 | struct ofpbuf *buf; | |
534 | ||
535 | int error = nl_sock_recv(rtnl_sock, &buf, false); | |
536 | if (error == EAGAIN) { | |
537 | /* Nothing to do. */ | |
538 | } else if (error == ENOBUFS) { | |
539 | VLOG_WARN_RL(&rl, "network monitor socket overflowed"); | |
540 | } else if (error) { | |
541 | VLOG_WARN_RL(&rl, "error on network monitor socket: %s", | |
542 | strerror(error)); | |
543 | } else { | |
544 | struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)]; | |
545 | struct nlmsghdr *nlh; | |
546 | struct ifinfomsg *iim; | |
547 | ||
548 | nlh = ofpbuf_at(buf, 0, NLMSG_HDRLEN); | |
549 | iim = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *iim); | |
550 | if (!iim) { | |
551 | VLOG_WARN_RL(&rl, "received bad rtnl message (no ifinfomsg)"); | |
552 | ofpbuf_delete(buf); | |
553 | return; | |
554 | } | |
555 | ||
556 | if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg), | |
557 | rtnlgrp_link_policy, | |
558 | attrs, ARRAY_SIZE(rtnlgrp_link_policy))) { | |
559 | VLOG_WARN_RL(&rl,"received bad rtnl message (policy)"); | |
560 | ofpbuf_delete(buf); | |
561 | return; | |
562 | } | |
563 | if (nlh->nlmsg_type == RTM_DELLINK && attrs[IFLA_MASTER]) { | |
564 | const char *port_name = nl_attr_get_string(attrs[IFLA_IFNAME]); | |
565 | char br_name[IFNAMSIZ]; | |
566 | uint32_t br_idx = nl_attr_get_u32(attrs[IFLA_MASTER]); | |
567 | struct svec ports; | |
38553381 | 568 | enum netdev_flags flags; |
064af421 BP |
569 | |
570 | if (!if_indextoname(br_idx, br_name)) { | |
571 | ofpbuf_delete(buf); | |
572 | return; | |
573 | } | |
574 | ||
575 | if (cfg_lock(NULL, lock_timeout)) { | |
576 | /* Couldn't lock config file. */ | |
577 | /* xxx this should try again and print error msg. */ | |
578 | ofpbuf_delete(buf); | |
579 | return; | |
580 | } | |
581 | ||
38553381 BP |
582 | if (netdev_nodev_get_flags(port_name, &flags) == ENODEV) { |
583 | /* Network device is really gone. */ | |
584 | VLOG_INFO("network device %s destroyed, " | |
585 | "removing from bridge %s", port_name, br_name); | |
586 | svec_init(&ports); | |
587 | cfg_get_all_keys(&ports, "bridge.%s.port", br_name); | |
588 | svec_sort(&ports); | |
589 | if (svec_contains(&ports, port_name)) { | |
590 | del_port(br_name, port_name); | |
591 | rewrite_and_reload_config(); | |
592 | } | |
593 | } else { | |
594 | /* A network device by that name exists even though the kernel | |
595 | * told us it had disappeared. Probably, what happened was | |
596 | * this: | |
597 | * | |
598 | * 1. Device destroyed. | |
599 | * 2. Notification sent to us. | |
600 | * 3. New device created with same name as old one. | |
601 | * 4. ovs-brcompatd notified, removes device from bridge. | |
602 | * | |
603 | * There's no a priori reason that in this situation that the | |
604 | * new device with the same name should remain in the bridge; | |
605 | * on the contrary, that would be unexpected. *But* there is | |
606 | * one important situation where, if we do this, bad things | |
607 | * happen. This is the case of XenServer Tools version 5.0.0, | |
608 | * which on boot of a Windows VM cause something like this to | |
609 | * happen on the Xen host: | |
610 | * | |
611 | * i. Create tap1.0 and vif1.0. | |
612 | * ii. Delete tap1.0. | |
613 | * iii. Delete vif1.0. | |
614 | * iv. Re-create vif1.0. | |
615 | * | |
616 | * (XenServer Tools 5.5.0 does not exhibit this behavior, and | |
617 | * neither does a VM without Tools installed at all.@.) | |
618 | * | |
619 | * Steps iii and iv happen within a few seconds of each other. | |
620 | * Step iv causes /etc/xensource/scripts/vif to run, which in | |
621 | * turn calls ovs-cfg-mod to add the new device to the bridge. | |
622 | * If step iv happens after step 4 (in our first list of | |
623 | * steps), then all is well, but if it happens between 3 and 4 | |
624 | * (which can easily happen if ovs-brcompatd has to wait to | |
625 | * lock the configuration file), then we will remove the new | |
626 | * incarnation from the bridge instead of the old one! | |
627 | * | |
628 | * So, to avoid this problem, we do nothing here. This is | |
629 | * strictly incorrect except for this one particular case, and | |
630 | * perhaps that will bite us someday. If that happens, then we | |
631 | * will have to somehow track network devices by ifindex, since | |
632 | * a new device will have a new ifindex even if it has the same | |
633 | * name as an old device. | |
634 | */ | |
635 | VLOG_INFO("kernel reported network device %s removed but " | |
636 | "a device by that name exists (XS Tools 5.0.0?)", | |
637 | port_name); | |
064af421 BP |
638 | } |
639 | cfg_unlock(); | |
640 | } | |
641 | ofpbuf_delete(buf); | |
642 | } | |
643 | } | |
644 | ||
645 | int | |
646 | main(int argc, char *argv[]) | |
647 | { | |
648 | struct unixctl_server *unixctl; | |
649 | int retval; | |
650 | ||
651 | set_program_name(argv[0]); | |
652 | register_fault_handlers(); | |
653 | time_init(); | |
654 | vlog_init(); | |
655 | parse_options(argc, argv); | |
656 | signal(SIGPIPE, SIG_IGN); | |
657 | process_init(); | |
658 | ||
659 | die_if_already_running(); | |
660 | daemonize(); | |
661 | ||
662 | retval = unixctl_server_create(NULL, &unixctl); | |
663 | if (retval) { | |
664 | ovs_fatal(retval, "could not listen for vlog connections"); | |
665 | } | |
666 | ||
667 | if (brc_open(&brc_sock)) { | |
668 | ovs_fatal(0, "could not open brcompat socket. Check " | |
669 | "\"brcompat\" kernel module."); | |
670 | } | |
671 | ||
672 | if (prune_timeout) { | |
673 | if (nl_sock_create(NETLINK_ROUTE, RTNLGRP_LINK, 0, 0, &rtnl_sock)) { | |
674 | ovs_fatal(0, "could not create rtnetlink socket"); | |
675 | } | |
676 | } | |
677 | ||
678 | cfg_read(); | |
679 | ||
680 | for (;;) { | |
681 | unixctl_server_run(unixctl); | |
682 | brc_recv_update(); | |
683 | ||
684 | /* If 'prune_timeout' is non-zero, we actively prune from the | |
685 | * config file any 'bridge.<br_name>.port' entries that are no | |
686 | * longer valid. We use two methods: | |
687 | * | |
688 | * 1) The kernel explicitly notifies us of removed ports | |
689 | * through the RTNL messages. | |
690 | * | |
691 | * 2) We periodically check all ports associated with bridges | |
692 | * to see if they no longer exist. | |
693 | */ | |
694 | if (prune_timeout) { | |
695 | rtnl_recv_update(); | |
696 | prune_ports(); | |
697 | ||
698 | nl_sock_wait(rtnl_sock, POLLIN); | |
699 | poll_timer_wait(prune_timeout); | |
700 | } | |
701 | ||
702 | nl_sock_wait(brc_sock, POLLIN); | |
703 | unixctl_server_wait(unixctl); | |
704 | poll_block(); | |
705 | } | |
706 | ||
707 | return 0; | |
708 | } | |
709 | ||
710 | static void | |
711 | parse_options(int argc, char *argv[]) | |
712 | { | |
713 | enum { | |
714 | OPT_LOCK_TIMEOUT = UCHAR_MAX + 1, | |
715 | OPT_PRUNE_TIMEOUT, | |
716 | OPT_RELOAD_COMMAND, | |
717 | VLOG_OPTION_ENUMS, | |
718 | LEAK_CHECKER_OPTION_ENUMS | |
719 | }; | |
720 | static struct option long_options[] = { | |
721 | {"help", no_argument, 0, 'h'}, | |
722 | {"version", no_argument, 0, 'V'}, | |
723 | {"lock-timeout", required_argument, 0, OPT_LOCK_TIMEOUT}, | |
724 | {"prune-timeout", required_argument, 0, OPT_PRUNE_TIMEOUT}, | |
725 | {"reload-command", required_argument, 0, OPT_RELOAD_COMMAND}, | |
726 | DAEMON_LONG_OPTIONS, | |
727 | VLOG_LONG_OPTIONS, | |
728 | LEAK_CHECKER_LONG_OPTIONS, | |
729 | {0, 0, 0, 0}, | |
730 | }; | |
731 | char *short_options = long_options_to_short_options(long_options); | |
732 | int error; | |
733 | ||
734 | reload_command = xasprintf("%s/ovs-appctl -t " | |
735 | "%s/ovs-vswitchd.`cat %s/ovs-vswitchd.pid`.ctl " | |
736 | "-e vswitchd/reload 2>&1 " | |
737 | "| /usr/bin/logger -t brcompatd-reload", | |
738 | ovs_bindir, ovs_rundir, ovs_rundir); | |
739 | for (;;) { | |
740 | int c; | |
741 | ||
742 | c = getopt_long(argc, argv, short_options, long_options, NULL); | |
743 | if (c == -1) { | |
744 | break; | |
745 | } | |
746 | ||
747 | switch (c) { | |
748 | case 'H': | |
749 | case 'h': | |
750 | usage(); | |
751 | ||
752 | case 'V': | |
753 | OVS_PRINT_VERSION(0, 0); | |
754 | exit(EXIT_SUCCESS); | |
755 | ||
756 | case OPT_LOCK_TIMEOUT: | |
757 | lock_timeout = atoi(optarg); | |
758 | break; | |
759 | ||
760 | case OPT_PRUNE_TIMEOUT: | |
761 | prune_timeout = atoi(optarg) * 1000; | |
762 | break; | |
763 | ||
764 | case OPT_RELOAD_COMMAND: | |
765 | reload_command = optarg; | |
766 | break; | |
767 | ||
768 | VLOG_OPTION_HANDLERS | |
769 | DAEMON_OPTION_HANDLERS | |
770 | LEAK_CHECKER_OPTION_HANDLERS | |
771 | ||
772 | case '?': | |
773 | exit(EXIT_FAILURE); | |
774 | ||
775 | default: | |
776 | abort(); | |
777 | } | |
778 | } | |
779 | free(short_options); | |
780 | ||
781 | argc -= optind; | |
782 | argv += optind; | |
783 | ||
784 | if (argc != 1) { | |
785 | ovs_fatal(0, "exactly one non-option argument required; " | |
786 | "use --help for usage"); | |
787 | } | |
788 | ||
789 | config_file = argv[0]; | |
790 | error = cfg_set_file(config_file); | |
791 | if (error) { | |
792 | ovs_fatal(error, "failed to add configuration file \"%s\"", | |
793 | config_file); | |
794 | } | |
795 | } | |
796 | ||
797 | static void | |
798 | usage(void) | |
799 | { | |
800 | printf("%s: bridge compatibility front-end for ovs-vswitchd\n" | |
801 | "usage: %s [OPTIONS] CONFIG\n" | |
802 | "CONFIG is the configuration file used by ovs-vswitchd.\n", | |
803 | program_name, program_name); | |
804 | printf("\nConfiguration options:\n" | |
805 | " --reload-command=COMMAND shell command to reload ovs-vswitchd\n" | |
806 | " --prune-timeout=SECS wait at most SECS before pruning ports\n" | |
807 | " --lock-timeout=MSECS wait at most MSECS for CONFIG to unlock\n" | |
808 | ); | |
809 | daemon_usage(); | |
810 | vlog_usage(); | |
811 | printf("\nOther options:\n" | |
812 | " -h, --help display this help message\n" | |
813 | " -V, --version display version information\n"); | |
814 | leak_checker_usage(); | |
815 | printf("\nThe default reload command is:\n%s\n", reload_command); | |
816 | exit(EXIT_SUCCESS); | |
817 | } |