]> git.proxmox.com Git - ovs.git/blame - vswitchd/ovs-brcompatd.c
Consistently write null pointer constants as NULL instead of 0.
[ovs.git] / vswitchd / ovs-brcompatd.c
CommitLineData
db5ce514 1/* Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks
064af421 2 *
a14bc59f
BP
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
064af421 6 *
a14bc59f 7 * http://www.apache.org/licenses/LICENSE-2.0
064af421 8 *
a14bc59f
BP
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
064af421
BP
14 */
15
16#include <config.h>
17
3c303e5f 18#include <asm/param.h>
064af421
BP
19#include <assert.h>
20#include <errno.h>
21#include <getopt.h>
22#include <inttypes.h>
23#include <limits.h>
24#include <net/if.h>
25#include <linux/genetlink.h>
26#include <linux/rtnetlink.h>
27#include <signal.h>
28#include <stdlib.h>
29#include <string.h>
30#include <sys/types.h>
31#include <sys/stat.h>
3c303e5f 32#include <time.h>
064af421
BP
33#include <fcntl.h>
34#include <unistd.h>
35
064af421
BP
36#include "command-line.h"
37#include "coverage.h"
38#include "daemon.h"
39#include "dirs.h"
3c303e5f 40#include "dynamic-string.h"
064af421 41#include "fatal-signal.h"
1e86ae6f 42#include "json.h"
064af421
BP
43#include "leak-checker.h"
44#include "netdev.h"
45#include "netlink.h"
2fe27d5a 46#include "netlink-socket.h"
064af421
BP
47#include "ofpbuf.h"
48#include "openvswitch/brcompat-netlink.h"
9852694f 49#include "ovsdb-idl.h"
3c303e5f 50#include "packets.h"
064af421
BP
51#include "poll-loop.h"
52#include "process.h"
53#include "signals.h"
96ca8c29 54#include "sset.h"
064af421
BP
55#include "timeval.h"
56#include "unixctl.h"
57#include "util.h"
5136ce49 58#include "vlog.h"
9852694f 59#include "vswitchd/vswitch-idl.h"
064af421 60
d98e6007 61VLOG_DEFINE_THIS_MODULE(brcompatd);
064af421
BP
62
63
64/* xxx Just hangs if datapath is rmmod/insmod. Learn to reconnect? */
65
66/* Actions to modify bridge compatibility configuration. */
67enum bmc_action {
68 BMC_ADD_DP,
69 BMC_DEL_DP,
70 BMC_ADD_PORT,
71 BMC_DEL_PORT
72};
73
9852694f 74static const char *parse_options(int argc, char *argv[]);
064af421
BP
75static void usage(void) NO_RETURN;
76
77static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 60);
78
d295e8e9 79/* Maximum number of milliseconds to wait before pruning port entries that
064af421
BP
80 * no longer exist. If set to zero, ports are never pruned. */
81static int prune_timeout = 5000;
82
3c303e5f
BP
83/* Shell command to execute (via popen()) to send a control command to the
84 * running ovs-vswitchd process. The string must contain one instance of %s,
85 * which is replaced by the control command. */
86static char *appctl_command;
064af421
BP
87
88/* Netlink socket to listen for interface changes. */
89static struct nl_sock *rtnl_sock;
90
91/* Netlink socket to bridge compatibility kernel module. */
92static struct nl_sock *brc_sock;
93
94/* The Generic Netlink family number used for bridge compatibility. */
95static int brc_family;
96
97static const struct nl_policy brc_multicast_policy[] = {
98 [BRC_GENL_A_MC_GROUP] = {.type = NL_A_U32 }
99};
100
101static const struct nl_policy rtnlgrp_link_policy[] = {
102 [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false },
103 [IFLA_MASTER] = { .type = NL_A_U32, .optional = true },
104};
105
106static int
107lookup_brc_multicast_group(int *multicast_group)
108{
109 struct nl_sock *sock;
110 struct ofpbuf request, *reply;
111 struct nlattr *attrs[ARRAY_SIZE(brc_multicast_policy)];
112 int retval;
113
cceb11f5 114 retval = nl_sock_create(NETLINK_GENERIC, &sock);
064af421
BP
115 if (retval) {
116 return retval;
117 }
118 ofpbuf_init(&request, 0);
69123704 119 nl_msg_put_genlmsghdr(&request, 0, brc_family,
064af421
BP
120 NLM_F_REQUEST, BRC_GENL_C_QUERY_MC, 1);
121 retval = nl_sock_transact(sock, &request, &reply);
122 ofpbuf_uninit(&request);
123 if (retval) {
124 nl_sock_destroy(sock);
125 return retval;
126 }
127 if (!nl_policy_parse(reply, NLMSG_HDRLEN + GENL_HDRLEN,
128 brc_multicast_policy, attrs,
129 ARRAY_SIZE(brc_multicast_policy))) {
130 nl_sock_destroy(sock);
131 ofpbuf_delete(reply);
132 return EPROTO;
133 }
134 *multicast_group = nl_attr_get_u32(attrs[BRC_GENL_A_MC_GROUP]);
135 nl_sock_destroy(sock);
136 ofpbuf_delete(reply);
137
138 return 0;
139}
140
141/* Opens a socket for brcompat notifications. Returns 0 if successful,
142 * otherwise a positive errno value. */
143static int
144brc_open(struct nl_sock **sock)
145{
146 int multicast_group = 0;
147 int retval;
148
149 retval = nl_lookup_genl_family(BRC_GENL_FAMILY_NAME, &brc_family);
150 if (retval) {
151 return retval;
152 }
153
154 retval = lookup_brc_multicast_group(&multicast_group);
155 if (retval) {
156 return retval;
157 }
158
cceb11f5 159 retval = nl_sock_create(NETLINK_GENERIC, sock);
064af421
BP
160 if (retval) {
161 return retval;
162 }
163
cceb11f5
BP
164 retval = nl_sock_join_mcgroup(*sock, multicast_group);
165 if (retval) {
166 nl_sock_destroy(*sock);
167 *sock = NULL;
168 }
169 return retval;
064af421
BP
170}
171
172static const struct nl_policy brc_dp_policy[] = {
173 [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING },
174};
175
9852694f
JP
176static struct ovsrec_bridge *
177find_bridge(const struct ovsrec_open_vswitch *ovs, const char *br_name)
064af421 178{
9852694f
JP
179 size_t i;
180
181 for (i = 0; i < ovs->n_bridges; i++) {
182 if (!strcmp(br_name, ovs->bridges[i]->name)) {
183 return ovs->bridges[i];
184 }
185 }
186
187 return NULL;
064af421
BP
188}
189
3c303e5f
BP
190static int
191execute_appctl_command(const char *unixctl_command, char **output)
192{
193 char *stdout_log, *stderr_log;
194 int error, status;
195 char *argv[5];
196
197 argv[0] = "/bin/sh";
198 argv[1] = "-c";
199 argv[2] = xasprintf(appctl_command, unixctl_command);
200 argv[3] = NULL;
201
202 /* Run process and log status. */
203 error = process_run_capture(argv, &stdout_log, &stderr_log, &status);
204 if (error) {
205 VLOG_ERR("failed to execute %s command via ovs-appctl: %s",
206 unixctl_command, strerror(error));
207 } else if (status) {
208 char *msg = process_status_msg(status);
209 VLOG_ERR("ovs-appctl exited with error (%s)", msg);
210 free(msg);
211 error = ECHILD;
212 }
213
214 /* Deal with stdout_log. */
215 if (output) {
216 *output = stdout_log;
217 } else {
218 free(stdout_log);
219 }
220
221 /* Deal with stderr_log */
222 if (stderr_log && *stderr_log) {
223 VLOG_INFO("ovs-appctl wrote to stderr:\n%s", stderr_log);
224 }
225 free(stderr_log);
226
227 free(argv[2]);
228
229 return error;
230}
231
14551cea 232static void
96ca8c29 233do_get_bridge_parts(const struct ovsrec_bridge *br, struct sset *parts,
9852694f 234 int vlan, bool break_down_bonds)
14551cea 235{
9852694f 236 size_t i, j;
14551cea 237
9852694f
JP
238 for (i = 0; i < br->n_ports; i++) {
239 const struct ovsrec_port *port = br->ports[i];
240
c735214e 241 if (vlan >= 0) {
9852694f 242 int port_vlan = port->n_tag ? *port->tag : 0;
c735214e
BP
243 if (vlan != port_vlan) {
244 continue;
245 }
246 }
9852694f
JP
247 if (break_down_bonds) {
248 for (j = 0; j < port->n_interfaces; j++) {
249 const struct ovsrec_interface *iface = port->interfaces[j];
96ca8c29 250 sset_add(parts, iface->name);
9852694f 251 }
14551cea 252 } else {
96ca8c29 253 sset_add(parts, port->name);
14551cea
BP
254 }
255 }
14551cea
BP
256}
257
db322751
BP
258/* Add all the interfaces for 'bridge' to 'ifaces', breaking bonded interfaces
259 * down into their constituent parts.
260 *
261 * If 'vlan' < 0, all interfaces on 'bridge' are reported. If 'vlan' == 0,
262 * then only interfaces for trunk ports or ports with implicit VLAN 0 are
263 * reported. If 'vlan' > 0, only interfaces with implicit VLAN 'vlan' are
264 * reported. */
265static void
96ca8c29 266get_bridge_ifaces(const struct ovsrec_bridge *br, struct sset *ifaces,
9852694f 267 int vlan)
db322751 268{
9852694f 269 do_get_bridge_parts(br, ifaces, vlan, true);
db322751
BP
270}
271
272/* Add all the ports for 'bridge' to 'ports'. Bonded ports are reported under
273 * the bond name, not broken down into their constituent interfaces.
274 *
275 * If 'vlan' < 0, all ports on 'bridge' are reported. If 'vlan' == 0, then
276 * only trunk ports or ports with implicit VLAN 0 are reported. If 'vlan' > 0,
277 * only port with implicit VLAN 'vlan' are reported. */
278static void
96ca8c29 279get_bridge_ports(const struct ovsrec_bridge *br, struct sset *ports,
9852694f 280 int vlan)
db322751 281{
9852694f 282 do_get_bridge_parts(br, ports, vlan, false);
db322751
BP
283}
284
9852694f
JP
285static struct ovsdb_idl_txn *
286txn_from_openvswitch(const struct ovsrec_open_vswitch *ovs)
287{
288 return ovsdb_idl_txn_get(&ovs->header_);
289}
290
291static bool
292port_is_fake_bridge(const struct ovsrec_port *port)
293{
294 return (port->fake_bridge
295 && port->tag
296 && *port->tag >= 1 && *port->tag <= 4095);
297}
298
299static void
300ovs_insert_bridge(const struct ovsrec_open_vswitch *ovs,
301 struct ovsrec_bridge *bridge)
302{
303 struct ovsrec_bridge **bridges;
d295e8e9 304 size_t i;
9852694f
JP
305
306 bridges = xmalloc(sizeof *ovs->bridges * (ovs->n_bridges + 1));
307 for (i = 0; i < ovs->n_bridges; i++) {
308 bridges[i] = ovs->bridges[i];
309 }
310 bridges[ovs->n_bridges] = bridge;
311 ovsrec_open_vswitch_set_bridges(ovs, bridges, ovs->n_bridges + 1);
312 free(bridges);
d295e8e9 313}
064af421 314
1e86ae6f
BP
315static struct json *
316where_uuid_equals(const struct uuid *uuid)
317{
318 return
319 json_array_create_1(
320 json_array_create_3(
321 json_string_create("_uuid"),
322 json_string_create("=="),
323 json_array_create_2(
324 json_string_create("uuid"),
325 json_string_create_nocopy(
326 xasprintf(UUID_FMT, UUID_ARGS(uuid))))));
327}
328
329/* Commits 'txn'. If 'wait_for_reload' is true, also waits for Open vSwitch to
330 reload the configuration before returning.
331
332 Returns EAGAIN if the caller should try the operation again, 0 on success,
333 otherwise a positive errno value. */
334static int
335commit_txn(struct ovsdb_idl_txn *txn, bool wait_for_reload)
336{
337 struct ovsdb_idl *idl = ovsdb_idl_txn_get_idl (txn);
338 enum ovsdb_idl_txn_status status;
339 int64_t next_cfg = 0;
340
341 if (wait_for_reload) {
342 const struct ovsrec_open_vswitch *ovs = ovsrec_open_vswitch_first(idl);
343 struct json *where = where_uuid_equals(&ovs->header_.uuid);
344 ovsdb_idl_txn_increment(txn, "Open_vSwitch", "next_cfg", where);
345 json_destroy(where);
346 }
347 status = ovsdb_idl_txn_commit_block(txn);
348 if (wait_for_reload && status == TXN_SUCCESS) {
349 next_cfg = ovsdb_idl_txn_get_increment_new_value(txn);
350 }
351 ovsdb_idl_txn_destroy(txn);
352
353 switch (status) {
354 case TXN_INCOMPLETE:
355 NOT_REACHED();
356
357 case TXN_ABORTED:
358 VLOG_ERR_RL(&rl, "OVSDB transaction unexpectedly aborted");
359 return ECONNABORTED;
360
361 case TXN_UNCHANGED:
362 return 0;
363
364 case TXN_SUCCESS:
365 if (wait_for_reload) {
366 for (;;) {
367 /* We can't use 'ovs' any longer because ovsdb_idl_run() can
368 * destroy it. */
369 const struct ovsrec_open_vswitch *ovs2;
370
371 ovsdb_idl_run(idl);
372 OVSREC_OPEN_VSWITCH_FOR_EACH (ovs2, idl) {
373 if (ovs2->cur_cfg >= next_cfg) {
374 goto done;
375 }
376 }
377 ovsdb_idl_wait(idl);
378 poll_block();
379 }
380 done: ;
381 }
382 return 0;
383
384 case TXN_TRY_AGAIN:
385 VLOG_ERR_RL(&rl, "OVSDB transaction needs retry");
386 return EAGAIN;
387
388 case TXN_ERROR:
389 VLOG_ERR_RL(&rl, "OVSDB transaction failed: %s",
390 ovsdb_idl_txn_get_error(txn));
391 return EBUSY;
392
393 default:
394 NOT_REACHED();
395 }
396}
397
064af421 398static int
1e86ae6f
BP
399add_bridge(struct ovsdb_idl *idl, const struct ovsrec_open_vswitch *ovs,
400 const char *br_name)
064af421 401{
9852694f
JP
402 struct ovsrec_bridge *br;
403 struct ovsrec_port *port;
404 struct ovsrec_interface *iface;
1e86ae6f 405 struct ovsdb_idl_txn *txn;
9852694f
JP
406
407 if (find_bridge(ovs, br_name)) {
064af421
BP
408 VLOG_WARN("addbr %s: bridge %s exists", br_name, br_name);
409 return EEXIST;
410 } else if (netdev_exists(br_name)) {
9852694f
JP
411 size_t i;
412
413 for (i = 0; i < ovs->n_bridges; i++) {
414 size_t j;
415 struct ovsrec_bridge *br_cfg = ovs->bridges[i];
416
417 for (j = 0; j < br_cfg->n_ports; j++) {
418 if (port_is_fake_bridge(br_cfg->ports[j])) {
419 VLOG_WARN("addbr %s: %s exists as a fake bridge",
420 br_name, br_name);
421 return 0;
422 }
423 }
064af421 424 }
9852694f
JP
425
426 VLOG_WARN("addbr %s: cannot create bridge %s because a network "
427 "device named %s already exists",
428 br_name, br_name, br_name);
429 return EEXIST;
064af421
BP
430 }
431
1e86ae6f
BP
432 txn = ovsdb_idl_txn_create(idl);
433
e1c0e2d1 434 ovsdb_idl_txn_add_comment(txn, "ovs-brcompatd: addbr %s", br_name);
b959290b 435
9852694f
JP
436 iface = ovsrec_interface_insert(txn_from_openvswitch(ovs));
437 ovsrec_interface_set_name(iface, br_name);
438
439 port = ovsrec_port_insert(txn_from_openvswitch(ovs));
440 ovsrec_port_set_name(port, br_name);
441 ovsrec_port_set_interfaces(port, &iface, 1);
d295e8e9 442
9852694f
JP
443 br = ovsrec_bridge_insert(txn_from_openvswitch(ovs));
444 ovsrec_bridge_set_name(br, br_name);
445 ovsrec_bridge_set_ports(br, &port, 1);
d295e8e9 446
9852694f
JP
447 ovs_insert_bridge(ovs, br);
448
1e86ae6f 449 return commit_txn(txn, true);
064af421
BP
450}
451
9852694f 452static void
d295e8e9 453add_port(const struct ovsrec_open_vswitch *ovs,
9852694f
JP
454 const struct ovsrec_bridge *br, const char *port_name)
455{
456 struct ovsrec_interface *iface;
457 struct ovsrec_port *port;
458 struct ovsrec_port **ports;
459 size_t i;
460
461 /* xxx Check conflicts? */
462 iface = ovsrec_interface_insert(txn_from_openvswitch(ovs));
463 ovsrec_interface_set_name(iface, port_name);
464
465 port = ovsrec_port_insert(txn_from_openvswitch(ovs));
466 ovsrec_port_set_name(port, port_name);
467 ovsrec_port_set_interfaces(port, &iface, 1);
468
469 ports = xmalloc(sizeof *br->ports * (br->n_ports + 1));
470 for (i = 0; i < br->n_ports; i++) {
471 ports[i] = br->ports[i];
472 }
473 ports[br->n_ports] = port;
474 ovsrec_bridge_set_ports(br, ports, br->n_ports + 1);
475 free(ports);
476}
477
4b3a0009
BP
478/* Deletes 'port' from 'br'.
479 *
480 * After calling this function, 'port' must not be referenced again. */
9852694f 481static void
4b3a0009 482del_port(const struct ovsrec_bridge *br, const struct ovsrec_port *port)
9852694f 483{
4b3a0009
BP
484 struct ovsrec_port **ports;
485 size_t i, n;
486
487 /* Remove 'port' from the bridge's list of ports. */
488 ports = xmalloc(sizeof *br->ports * br->n_ports);
489 for (i = n = 0; i < br->n_ports; i++) {
490 if (br->ports[i] != port) {
491 ports[n++] = br->ports[i];
492 }
493 }
494 ovsrec_bridge_set_ports(br, ports, n);
495 free(ports);
4b3a0009
BP
496}
497
498/* Delete 'iface' from 'port' (which must be within 'br'). If 'iface' was
499 * 'port''s only interface, delete 'port' from 'br' also.
500 *
501 * After calling this function, 'iface' must not be referenced again. */
502static void
503del_interface(const struct ovsrec_bridge *br,
504 const struct ovsrec_port *port,
505 const struct ovsrec_interface *iface)
506{
507 if (port->n_interfaces == 1) {
508 del_port(br, port);
509 } else {
510 struct ovsrec_interface **ifaces;
511 size_t i, n;
512
513 ifaces = xmalloc(sizeof *port->interfaces * port->n_interfaces);
514 for (i = n = 0; i < port->n_interfaces; i++) {
515 if (port->interfaces[i] != iface) {
516 ifaces[n++] = port->interfaces[i];
517 }
518 }
519 ovsrec_port_set_interfaces(port, ifaces, n);
520 free(ifaces);
4b3a0009
BP
521 }
522}
523
524/* Find and return a port within 'br' named 'port_name'. */
525static const struct ovsrec_port *
526find_port(const struct ovsrec_bridge *br, const char *port_name)
527{
528 size_t i;
9852694f
JP
529
530 for (i = 0; i < br->n_ports; i++) {
531 struct ovsrec_port *port = br->ports[i];
532 if (!strcmp(port_name, port->name)) {
4b3a0009 533 return port;
9852694f
JP
534 }
535 }
4b3a0009
BP
536 return NULL;
537}
9852694f 538
4b3a0009
BP
539/* Find and return an interface within 'br' named 'iface_name'. */
540static const struct ovsrec_interface *
541find_interface(const struct ovsrec_bridge *br, const char *iface_name,
542 struct ovsrec_port **portp)
543{
544 size_t i;
9852694f 545
4b3a0009
BP
546 for (i = 0; i < br->n_ports; i++) {
547 struct ovsrec_port *port = br->ports[i];
548 size_t j;
549
550 for (j = 0; j < port->n_interfaces; j++) {
551 struct ovsrec_interface *iface = port->interfaces[j];
552 if (!strcmp(iface->name, iface_name)) {
553 *portp = port;
554 return iface;
9852694f
JP
555 }
556 }
9852694f 557 }
4b3a0009
BP
558
559 *portp = NULL;
560 return NULL;
9852694f
JP
561}
562
1e86ae6f
BP
563static int
564del_bridge(struct ovsdb_idl *idl,
565 const struct ovsrec_open_vswitch *ovs, const char *br_name)
064af421 566{
9852694f
JP
567 struct ovsrec_bridge *br = find_bridge(ovs, br_name);
568 struct ovsrec_bridge **bridges;
1e86ae6f 569 struct ovsdb_idl_txn *txn;
9852694f
JP
570 size_t i, n;
571
572 if (!br) {
064af421
BP
573 VLOG_WARN("delbr %s: no bridge named %s", br_name, br_name);
574 return ENXIO;
575 }
576
1e86ae6f
BP
577 txn = ovsdb_idl_txn_create(idl);
578
e1c0e2d1 579 ovsdb_idl_txn_add_comment(txn, "ovs-brcompatd: delbr %s", br_name);
b959290b 580
4b3a0009 581 /* Remove 'br' from the vswitch's list of bridges. */
9852694f
JP
582 bridges = xmalloc(sizeof *ovs->bridges * ovs->n_bridges);
583 for (i = n = 0; i < ovs->n_bridges; i++) {
584 if (ovs->bridges[i] != br) {
585 bridges[n++] = ovs->bridges[i];
586 }
587 }
588 ovsrec_open_vswitch_set_bridges(ovs, bridges, n);
589 free(bridges);
590
1e86ae6f 591 return commit_txn(txn, true);
064af421
BP
592}
593
594static int
595parse_command(struct ofpbuf *buffer, uint32_t *seq, const char **br_name,
3c303e5f 596 const char **port_name, uint64_t *count, uint64_t *skip)
064af421
BP
597{
598 static const struct nl_policy policy[] = {
7f42c1d7 599 [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING, .optional = true },
064af421 600 [BRC_GENL_A_PORT_NAME] = { .type = NL_A_STRING, .optional = true },
3c303e5f
BP
601 [BRC_GENL_A_FDB_COUNT] = { .type = NL_A_U64, .optional = true },
602 [BRC_GENL_A_FDB_SKIP] = { .type = NL_A_U64, .optional = true },
064af421
BP
603 };
604 struct nlattr *attrs[ARRAY_SIZE(policy)];
605
606 if (!nl_policy_parse(buffer, NLMSG_HDRLEN + GENL_HDRLEN, policy,
607 attrs, ARRAY_SIZE(policy))
7f42c1d7 608 || (br_name && !attrs[BRC_GENL_A_DP_NAME])
3c303e5f
BP
609 || (port_name && !attrs[BRC_GENL_A_PORT_NAME])
610 || (count && !attrs[BRC_GENL_A_FDB_COUNT])
611 || (skip && !attrs[BRC_GENL_A_FDB_SKIP])) {
064af421
BP
612 return EINVAL;
613 }
614
615 *seq = ((struct nlmsghdr *) buffer->data)->nlmsg_seq;
7f42c1d7
BP
616 if (br_name) {
617 *br_name = nl_attr_get_string(attrs[BRC_GENL_A_DP_NAME]);
618 }
064af421
BP
619 if (port_name) {
620 *port_name = nl_attr_get_string(attrs[BRC_GENL_A_PORT_NAME]);
621 }
3c303e5f
BP
622 if (count) {
623 *count = nl_attr_get_u64(attrs[BRC_GENL_A_FDB_COUNT]);
624 }
625 if (skip) {
626 *skip = nl_attr_get_u64(attrs[BRC_GENL_A_FDB_SKIP]);
627 }
064af421
BP
628 return 0;
629}
630
41e754bc
BP
631/* Composes and returns a reply to a request made by the datapath with Netlink
632 * sequence number 'seq' and error code 'error'. The caller may add additional
633 * attributes to the message, then it may send it with send_reply(). */
634static struct ofpbuf *
635compose_reply(uint32_t seq, int error)
064af421 636{
41e754bc 637 struct ofpbuf *reply = ofpbuf_new(4096);
69123704 638 nl_msg_put_genlmsghdr(reply, 32, brc_family, NLM_F_REQUEST,
064af421 639 BRC_GENL_C_DP_RESULT, 1);
41e754bc
BP
640 ((struct nlmsghdr *) reply->data)->nlmsg_seq = seq;
641 nl_msg_put_u32(reply, BRC_GENL_A_ERR_CODE, error);
642 return reply;
643}
064af421 644
41e754bc
BP
645/* Sends 'reply' to the datapath and frees it. */
646static void
647send_reply(struct ofpbuf *reply)
648{
649 int retval = nl_sock_send(brc_sock, reply, false);
064af421
BP
650 if (retval) {
651 VLOG_WARN_RL(&rl, "replying to brcompat request: %s",
652 strerror(retval));
653 }
41e754bc
BP
654 ofpbuf_delete(reply);
655}
656
657/* Composes and sends a reply to a request made by the datapath with Netlink
658 * sequence number 'seq' and error code 'error'. */
659static void
660send_simple_reply(uint32_t seq, int error)
661{
662 send_reply(compose_reply(seq, error));
064af421
BP
663}
664
665static int
1e86ae6f 666handle_bridge_cmd(struct ovsdb_idl *idl,
d295e8e9 667 const struct ovsrec_open_vswitch *ovs,
9852694f 668 struct ofpbuf *buffer, bool add)
064af421
BP
669{
670 const char *br_name;
671 uint32_t seq;
672 int error;
673
3c303e5f 674 error = parse_command(buffer, &seq, &br_name, NULL, NULL, NULL);
064af421 675 if (!error) {
1e86ae6f
BP
676 int retval;
677
678 do {
679 retval = (add ? add_bridge : del_bridge)(idl, ovs, br_name);
680 VLOG_INFO_RL(&rl, "%sbr %s: %s",
681 add ? "add" : "del", br_name, strerror(retval));
682 } while (retval == EAGAIN);
683
41e754bc 684 send_simple_reply(seq, error);
064af421
BP
685 }
686 return error;
687}
688
689static const struct nl_policy brc_port_policy[] = {
690 [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING },
691 [BRC_GENL_A_PORT_NAME] = { .type = NL_A_STRING },
692};
693
064af421 694static int
1e86ae6f
BP
695handle_port_cmd(struct ovsdb_idl *idl,
696 const struct ovsrec_open_vswitch *ovs,
9852694f 697 struct ofpbuf *buffer, bool add)
064af421
BP
698{
699 const char *cmd_name = add ? "add-if" : "del-if";
700 const char *br_name, *port_name;
701 uint32_t seq;
702 int error;
703
3c303e5f 704 error = parse_command(buffer, &seq, &br_name, &port_name, NULL, NULL);
064af421 705 if (!error) {
9852694f
JP
706 struct ovsrec_bridge *br = find_bridge(ovs, br_name);
707
708 if (!br) {
064af421
BP
709 VLOG_WARN("%s %s %s: no bridge named %s",
710 cmd_name, br_name, port_name, br_name);
711 error = EINVAL;
712 } else if (!netdev_exists(port_name)) {
713 VLOG_WARN("%s %s %s: no network device named %s",
714 cmd_name, br_name, port_name, port_name);
715 error = EINVAL;
716 } else {
1e86ae6f
BP
717 do {
718 struct ovsdb_idl_txn *txn = ovsdb_idl_txn_create(idl);
b959290b 719
1e86ae6f 720 if (add) {
e1c0e2d1
BP
721 ovsdb_idl_txn_add_comment(txn, "ovs-brcompatd: add-if %s",
722 port_name);
1e86ae6f
BP
723 add_port(ovs, br, port_name);
724 } else {
4b3a0009
BP
725 const struct ovsrec_port *port = find_port(br, port_name);
726 if (port) {
727 ovsdb_idl_txn_add_comment(txn,
728 "ovs-brcompatd: del-if %s",
729 port_name);
9ad73195 730 del_port(br, port);
4b3a0009 731 }
1e86ae6f 732 }
b959290b 733
1e86ae6f 734 error = commit_txn(txn, true);
e569fae6 735 VLOG_INFO_RL(&rl, "%s %s %s: %s",
1e86ae6f
BP
736 cmd_name, br_name, port_name, strerror(error));
737 } while (error == EAGAIN);
064af421 738 }
41e754bc 739 send_simple_reply(seq, error);
064af421
BP
740 }
741
742 return error;
743}
744
9852694f
JP
745/* The caller is responsible for freeing '*ovs_name' if the call is
746 * successful. */
ae1281cf 747static int
9852694f
JP
748linux_bridge_to_ovs_bridge(const struct ovsrec_open_vswitch *ovs,
749 const char *linux_name,
750 const struct ovsrec_bridge **ovs_bridge,
751 int *br_vlan)
ae1281cf 752{
9852694f
JP
753 *ovs_bridge = find_bridge(ovs, linux_name);
754 if (*ovs_bridge) {
ae1281cf 755 /* Bridge name is the same. We are interested in VLAN 0. */
ae1281cf
BP
756 *br_vlan = 0;
757 return 0;
758 } else {
9852694f
JP
759 /* No such Open vSwitch bridge 'linux_name', but there might be an
760 * internal port named 'linux_name' on some other bridge
ae1281cf 761 * 'ovs_bridge'. If so then we are interested in the VLAN assigned to
9852694f
JP
762 * port 'linux_name' on the bridge named 'ovs_bridge'. */
763 size_t i, j;
764
765 for (i = 0; i < ovs->n_bridges; i++) {
766 const struct ovsrec_bridge *br = ovs->bridges[i];
767
768 for (j = 0; j < br->n_ports; j++) {
769 const struct ovsrec_port *port = br->ports[j];
770
771 if (!strcmp(port->name, linux_name)) {
772 *ovs_bridge = br;
773 *br_vlan = port->n_tag ? *port->tag : -1;
774 return 0;
775 }
776 }
ae1281cf 777
ae1281cf 778 }
9852694f 779 return ENODEV;
ae1281cf
BP
780 }
781}
782
3c303e5f 783static int
9852694f
JP
784handle_fdb_query_cmd(const struct ovsrec_open_vswitch *ovs,
785 struct ofpbuf *buffer)
3c303e5f
BP
786{
787 /* This structure is copied directly from the Linux 2.6.30 header files.
788 * It would be more straightforward to #include <linux/if_bridge.h>, but
789 * the 'port_hi' member was only introduced in Linux 2.6.26 and so systems
790 * with old header files won't have it. */
791 struct __fdb_entry {
792 __u8 mac_addr[6];
793 __u8 port_no;
794 __u8 is_local;
795 __u32 ageing_timer_value;
796 __u8 port_hi;
797 __u8 pad0;
798 __u16 unused;
799 };
800
801 struct mac {
802 uint8_t addr[6];
803 };
804 struct mac *local_macs;
805 int n_local_macs;
806 int i;
807
c735214e
BP
808 /* Impedance matching between the vswitchd and Linux kernel notions of what
809 * a bridge is. The kernel only handles a single VLAN per bridge, but
810 * vswitchd can deal with all the VLANs on a single bridge. We have to
811 * pretend that the former is the case even though the latter is the
812 * implementation. */
9852694f
JP
813 const char *linux_name; /* Name used by brctl. */
814 const struct ovsrec_bridge *ovs_bridge; /* Bridge used by ovs-vswitchd. */
c735214e 815 int br_vlan; /* VLAN tag. */
96ca8c29 816 struct sset ifaces;
c735214e 817
3c303e5f 818 struct ofpbuf query_data;
96ca8c29 819 const char *iface_name;
41e754bc 820 struct ofpbuf *reply;
3c303e5f
BP
821 char *unixctl_command;
822 uint64_t count, skip;
3c303e5f
BP
823 char *output;
824 char *save_ptr;
825 uint32_t seq;
826 int error;
827
828 /* Parse the command received from brcompat_mod. */
9852694f 829 error = parse_command(buffer, &seq, &linux_name, NULL, &count, &skip);
3c303e5f
BP
830 if (error) {
831 return error;
832 }
833
c735214e 834 /* Figure out vswitchd bridge and VLAN. */
d295e8e9 835 error = linux_bridge_to_ovs_bridge(ovs, linux_name,
9852694f 836 &ovs_bridge, &br_vlan);
ae1281cf
BP
837 if (error) {
838 send_simple_reply(seq, error);
839 return error;
c735214e
BP
840 }
841
3c303e5f 842 /* Fetch the forwarding database using ovs-appctl. */
9852694f 843 unixctl_command = xasprintf("fdb/show %s", ovs_bridge->name);
3c303e5f
BP
844 error = execute_appctl_command(unixctl_command, &output);
845 free(unixctl_command);
846 if (error) {
41e754bc 847 send_simple_reply(seq, error);
3c303e5f
BP
848 return error;
849 }
850
851 /* Fetch the MAC address for each interface on the bridge, so that we can
852 * fill in the is_local field in the response. */
96ca8c29 853 sset_init(&ifaces);
c735214e 854 get_bridge_ifaces(ovs_bridge, &ifaces, br_vlan);
96ca8c29 855 local_macs = xmalloc(sset_count(&ifaces) * sizeof *local_macs);
3c303e5f 856 n_local_macs = 0;
96ca8c29 857 SSET_FOR_EACH (iface_name, &ifaces) {
3c303e5f 858 struct mac *mac = &local_macs[n_local_macs];
07c318f4
BP
859 struct netdev *netdev;
860
149f577a 861 error = netdev_open_default(iface_name, &netdev);
4869f1b1 862 if (!error) {
07c318f4
BP
863 if (!netdev_get_etheraddr(netdev, mac->addr)) {
864 n_local_macs++;
865 }
866 netdev_close(netdev);
3c303e5f
BP
867 }
868 }
96ca8c29 869 sset_destroy(&ifaces);
3c303e5f
BP
870
871 /* Parse the response from ovs-appctl and convert it to binary format to
872 * pass back to the kernel. */
873 ofpbuf_init(&query_data, sizeof(struct __fdb_entry) * 8);
874 save_ptr = NULL;
875 strtok_r(output, "\n", &save_ptr); /* Skip header line. */
876 while (count > 0) {
877 struct __fdb_entry *entry;
878 int port, vlan, age;
879 uint8_t mac[ETH_ADDR_LEN];
880 char *line;
881 bool is_local;
882
883 line = strtok_r(NULL, "\n", &save_ptr);
884 if (!line) {
885 break;
886 }
887
888 if (sscanf(line, "%d %d "ETH_ADDR_SCAN_FMT" %d",
889 &port, &vlan, ETH_ADDR_SCAN_ARGS(mac), &age)
890 != 2 + ETH_ADDR_SCAN_COUNT + 1) {
db5ce514 891 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
3c303e5f
BP
892 VLOG_INFO_RL(&rl, "fdb/show output has invalid format: %s", line);
893 continue;
894 }
895
c735214e
BP
896 if (vlan != br_vlan) {
897 continue;
898 }
899
3c303e5f
BP
900 if (skip > 0) {
901 skip--;
902 continue;
903 }
904
905 /* Is this the MAC address of an interface on the bridge? */
906 is_local = false;
907 for (i = 0; i < n_local_macs; i++) {
908 if (eth_addr_equals(local_macs[i].addr, mac)) {
909 is_local = true;
910 break;
911 }
912 }
913
914 entry = ofpbuf_put_uninit(&query_data, sizeof *entry);
915 memcpy(entry->mac_addr, mac, ETH_ADDR_LEN);
916 entry->port_no = port & 0xff;
917 entry->is_local = is_local;
918 entry->ageing_timer_value = age * HZ;
919 entry->port_hi = (port & 0xff00) >> 8;
920 entry->pad0 = 0;
921 entry->unused = 0;
922 count--;
923 }
924 free(output);
925
41e754bc
BP
926 /* Compose and send reply to datapath. */
927 reply = compose_reply(seq, 0);
928 nl_msg_put_unspec(reply, BRC_GENL_A_FDB_DATA,
929 query_data.data, query_data.size);
930 send_reply(reply);
931
932 /* Free memory. */
3c303e5f 933 ofpbuf_uninit(&query_data);
05edc34c 934 free(local_macs);
3c303e5f
BP
935
936 return 0;
937}
938
db322751 939static void
96ca8c29 940send_ifindex_reply(uint32_t seq, struct sset *ifaces)
db322751
BP
941{
942 struct ofpbuf *reply;
943 const char *iface;
944 size_t n_indices;
945 int *indices;
db322751
BP
946
947 /* Convert 'ifaces' into ifindexes. */
948 n_indices = 0;
96ca8c29
BP
949 indices = xmalloc(sset_count(ifaces) * sizeof *indices);
950 SSET_FOR_EACH (iface, ifaces) {
db322751
BP
951 int ifindex = if_nametoindex(iface);
952 if (ifindex) {
953 indices[n_indices++] = ifindex;
954 }
955 }
956
957 /* Compose and send reply. */
958 reply = compose_reply(seq, 0);
959 nl_msg_put_unspec(reply, BRC_GENL_A_IFINDEXES,
960 indices, n_indices * sizeof *indices);
961 send_reply(reply);
962
963 /* Free memory. */
964 free(indices);
965}
966
967static int
9852694f
JP
968handle_get_bridges_cmd(const struct ovsrec_open_vswitch *ovs,
969 struct ofpbuf *buffer)
db322751 970{
96ca8c29 971 struct sset bridges;
9852694f 972 size_t i, j;
db322751
BP
973
974 uint32_t seq;
975
976 int error;
977
978 /* Parse Netlink command.
979 *
980 * The command doesn't actually have any arguments, but we need the
981 * sequence number to send the reply. */
982 error = parse_command(buffer, &seq, NULL, NULL, NULL, NULL);
983 if (error) {
984 return error;
985 }
986
987 /* Get all the real bridges and all the fake ones. */
96ca8c29 988 sset_init(&bridges);
9852694f
JP
989 for (i = 0; i < ovs->n_bridges; i++) {
990 const struct ovsrec_bridge *br = ovs->bridges[i];
db322751 991
96ca8c29 992 sset_add(&bridges, br->name);
9852694f
JP
993 for (j = 0; j < br->n_ports; j++) {
994 const struct ovsrec_port *port = br->ports[j];
995
996 if (port->fake_bridge) {
96ca8c29 997 sset_add(&bridges, port->name);
db322751
BP
998 }
999 }
db322751
BP
1000 }
1001
1002 send_ifindex_reply(seq, &bridges);
96ca8c29 1003 sset_destroy(&bridges);
db322751
BP
1004
1005 return 0;
1006}
1007
1008static int
9852694f
JP
1009handle_get_ports_cmd(const struct ovsrec_open_vswitch *ovs,
1010 struct ofpbuf *buffer)
db322751
BP
1011{
1012 uint32_t seq;
1013
9852694f
JP
1014 const char *linux_name;
1015 const struct ovsrec_bridge *ovs_bridge;
db322751
BP
1016 int br_vlan;
1017
96ca8c29 1018 struct sset ports;
db322751
BP
1019
1020 int error;
1021
1022 /* Parse Netlink command. */
9852694f 1023 error = parse_command(buffer, &seq, &linux_name, NULL, NULL, NULL);
db322751
BP
1024 if (error) {
1025 return error;
1026 }
1027
d295e8e9 1028 error = linux_bridge_to_ovs_bridge(ovs, linux_name,
9852694f 1029 &ovs_bridge, &br_vlan);
db322751
BP
1030 if (error) {
1031 send_simple_reply(seq, error);
1032 return error;
1033 }
1034
96ca8c29 1035 sset_init(&ports);
db322751 1036 get_bridge_ports(ovs_bridge, &ports, br_vlan);
96ca8c29 1037 sset_find_and_delete(&ports, linux_name);
db322751 1038 send_ifindex_reply(seq, &ports); /* XXX bonds won't show up */
96ca8c29 1039 sset_destroy(&ports);
db322751 1040
db322751
BP
1041 return 0;
1042}
1043
1cec7ca1
BP
1044static struct ofpbuf *
1045brc_recv_update__(void)
1046{
1047 for (;;) {
1048 struct ofpbuf *buffer;
1049 int retval;
1050
1051 retval = nl_sock_recv(brc_sock, &buffer, false);
1052 switch (retval) {
1053 case 0:
1054 if (nl_msg_nlmsgerr(buffer, NULL)
1055 || nl_msg_nlmsghdr(buffer)->nlmsg_type == NLMSG_DONE) {
1056 break;
1057 }
1058 return buffer;
1059
1060 case ENOBUFS:
1061 break;
1062
1063 case EAGAIN:
1064 return NULL;
1065
1066 default:
1067 VLOG_WARN_RL(&rl, "brc_recv_update: %s", strerror(retval));
1068 return NULL;
1069 }
1070 ofpbuf_delete(buffer);
1071 }
1072}
1073
9852694f 1074static void
1e86ae6f 1075brc_recv_update(struct ovsdb_idl *idl)
064af421 1076{
064af421
BP
1077 struct ofpbuf *buffer;
1078 struct genlmsghdr *genlmsghdr;
1e86ae6f 1079 const struct ovsrec_open_vswitch *ovs;
064af421 1080
1cec7ca1
BP
1081 buffer = brc_recv_update__();
1082 if (!buffer) {
9852694f 1083 return;
064af421
BP
1084 }
1085
1086 genlmsghdr = nl_msg_genlmsghdr(buffer);
1087 if (!genlmsghdr) {
1088 VLOG_WARN_RL(&rl, "received packet too short for generic NetLink");
1089 goto error;
1090 }
1091
1092 if (nl_msg_nlmsghdr(buffer)->nlmsg_type != brc_family) {
1093 VLOG_DBG_RL(&rl, "received type (%"PRIu16") != brcompat family (%d)",
1094 nl_msg_nlmsghdr(buffer)->nlmsg_type, brc_family);
1095 goto error;
1096 }
1097
1e86ae6f
BP
1098 /* Get the Open vSwitch configuration. Just drop the request on the floor
1099 * if a valid configuration doesn't exist. (We could check this earlier,
1100 * but we want to drain pending Netlink messages even when there is no Open
1101 * vSwitch configuration.) */
1102 ovs = ovsrec_open_vswitch_first(idl);
5ff22a06
JP
1103 if (!ovs) {
1104 VLOG_WARN_RL(&rl, "could not find valid configuration to update");
1105 goto error;
1106 }
1107
064af421
BP
1108 switch (genlmsghdr->cmd) {
1109 case BRC_GENL_C_DP_ADD:
1e86ae6f 1110 handle_bridge_cmd(idl, ovs, buffer, true);
064af421
BP
1111 break;
1112
1113 case BRC_GENL_C_DP_DEL:
1e86ae6f 1114 handle_bridge_cmd(idl, ovs, buffer, false);
064af421
BP
1115 break;
1116
1117 case BRC_GENL_C_PORT_ADD:
1e86ae6f 1118 handle_port_cmd(idl, ovs, buffer, true);
064af421
BP
1119 break;
1120
1121 case BRC_GENL_C_PORT_DEL:
1e86ae6f 1122 handle_port_cmd(idl, ovs, buffer, false);
064af421
BP
1123 break;
1124
3c303e5f 1125 case BRC_GENL_C_FDB_QUERY:
9852694f 1126 handle_fdb_query_cmd(ovs, buffer);
3c303e5f
BP
1127 break;
1128
db322751 1129 case BRC_GENL_C_GET_BRIDGES:
9852694f 1130 handle_get_bridges_cmd(ovs, buffer);
db322751
BP
1131 break;
1132
1133 case BRC_GENL_C_GET_PORTS:
9852694f 1134 handle_get_ports_cmd(ovs, buffer);
db322751
BP
1135 break;
1136
064af421 1137 default:
9852694f 1138 VLOG_WARN_RL(&rl, "received unknown brc netlink command: %d\n",
1e86ae6f 1139 genlmsghdr->cmd);
9852694f 1140 break;
064af421
BP
1141 }
1142
064af421
BP
1143error:
1144 ofpbuf_delete(buffer);
9852694f 1145 return;
064af421
BP
1146}
1147
1148/* Check for interface configuration changes announced through RTNL. */
1149static void
1e86ae6f
BP
1150rtnl_recv_update(struct ovsdb_idl *idl,
1151 const struct ovsrec_open_vswitch *ovs)
064af421
BP
1152{
1153 struct ofpbuf *buf;
1154
1155 int error = nl_sock_recv(rtnl_sock, &buf, false);
1156 if (error == EAGAIN) {
1157 /* Nothing to do. */
1158 } else if (error == ENOBUFS) {
1159 VLOG_WARN_RL(&rl, "network monitor socket overflowed");
1160 } else if (error) {
d295e8e9 1161 VLOG_WARN_RL(&rl, "error on network monitor socket: %s",
064af421
BP
1162 strerror(error));
1163 } else {
1164 struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)];
1165 struct nlmsghdr *nlh;
1166 struct ifinfomsg *iim;
1167
1168 nlh = ofpbuf_at(buf, 0, NLMSG_HDRLEN);
1169 iim = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *iim);
1170 if (!iim) {
1171 VLOG_WARN_RL(&rl, "received bad rtnl message (no ifinfomsg)");
1172 ofpbuf_delete(buf);
1173 return;
d295e8e9
JP
1174 }
1175
064af421
BP
1176 if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg),
1177 rtnlgrp_link_policy,
1178 attrs, ARRAY_SIZE(rtnlgrp_link_policy))) {
1179 VLOG_WARN_RL(&rl,"received bad rtnl message (policy)");
1180 ofpbuf_delete(buf);
1181 return;
1182 }
1183 if (nlh->nlmsg_type == RTM_DELLINK && attrs[IFLA_MASTER]) {
1184 const char *port_name = nl_attr_get_string(attrs[IFLA_IFNAME]);
1185 char br_name[IFNAMSIZ];
1186 uint32_t br_idx = nl_attr_get_u32(attrs[IFLA_MASTER]);
064af421
BP
1187
1188 if (!if_indextoname(br_idx, br_name)) {
1189 ofpbuf_delete(buf);
1190 return;
1191 }
1192
5bfc0cd3 1193 if (!netdev_exists(port_name)) {
38553381 1194 /* Network device is really gone. */
1e86ae6f 1195 struct ovsdb_idl_txn *txn;
4b3a0009
BP
1196 const struct ovsrec_interface *iface;
1197 struct ovsrec_port *port;
1e86ae6f 1198 struct ovsrec_bridge *br;
694f2679 1199
38553381
BP
1200 VLOG_INFO("network device %s destroyed, "
1201 "removing from bridge %s", port_name, br_name);
694f2679 1202
1e86ae6f 1203 br = find_bridge(ovs, br_name);
9c8149dc 1204 if (!br) {
d295e8e9 1205 VLOG_WARN("no bridge named %s from which to remove %s",
9c8149dc
JP
1206 br_name, port_name);
1207 ofpbuf_delete(buf);
1208 return;
38553381 1209 }
9c8149dc 1210
1e86ae6f 1211 txn = ovsdb_idl_txn_create(idl);
b959290b 1212
4b3a0009
BP
1213 iface = find_interface(br, port_name, &port);
1214 if (iface) {
1215 del_interface(br, port, iface);
1216 ovsdb_idl_txn_add_comment(txn,
1217 "ovs-brcompatd: destroy port %s",
1218 port_name);
1219 }
1220
1e86ae6f 1221 commit_txn(txn, false);
38553381
BP
1222 } else {
1223 /* A network device by that name exists even though the kernel
1224 * told us it had disappeared. Probably, what happened was
1225 * this:
1226 *
1227 * 1. Device destroyed.
1228 * 2. Notification sent to us.
1229 * 3. New device created with same name as old one.
1230 * 4. ovs-brcompatd notified, removes device from bridge.
1231 *
1232 * There's no a priori reason that in this situation that the
1233 * new device with the same name should remain in the bridge;
1234 * on the contrary, that would be unexpected. *But* there is
1235 * one important situation where, if we do this, bad things
1236 * happen. This is the case of XenServer Tools version 5.0.0,
1237 * which on boot of a Windows VM cause something like this to
1238 * happen on the Xen host:
1239 *
1240 * i. Create tap1.0 and vif1.0.
1241 * ii. Delete tap1.0.
1242 * iii. Delete vif1.0.
1243 * iv. Re-create vif1.0.
1244 *
1245 * (XenServer Tools 5.5.0 does not exhibit this behavior, and
1246 * neither does a VM without Tools installed at all.@.)
1247 *
1248 * Steps iii and iv happen within a few seconds of each other.
1249 * Step iv causes /etc/xensource/scripts/vif to run, which in
1250 * turn calls ovs-cfg-mod to add the new device to the bridge.
1251 * If step iv happens after step 4 (in our first list of
1252 * steps), then all is well, but if it happens between 3 and 4
1253 * (which can easily happen if ovs-brcompatd has to wait to
1254 * lock the configuration file), then we will remove the new
1255 * incarnation from the bridge instead of the old one!
1256 *
1257 * So, to avoid this problem, we do nothing here. This is
1258 * strictly incorrect except for this one particular case, and
1259 * perhaps that will bite us someday. If that happens, then we
1260 * will have to somehow track network devices by ifindex, since
1261 * a new device will have a new ifindex even if it has the same
1262 * name as an old device.
1263 */
1264 VLOG_INFO("kernel reported network device %s removed but "
1265 "a device by that name exists (XS Tools 5.0.0?)",
1266 port_name);
064af421 1267 }
064af421
BP
1268 }
1269 ofpbuf_delete(buf);
1270 }
1271}
1272
1273int
1274main(int argc, char *argv[])
1275{
480ce8ab 1276 extern struct vlog_module VLM_reconnect;
064af421 1277 struct unixctl_server *unixctl;
9852694f
JP
1278 const char *remote;
1279 struct ovsdb_idl *idl;
064af421
BP
1280 int retval;
1281
40f0707c 1282 proctitle_init(argc, argv);
064af421 1283 set_program_name(argv[0]);
480ce8ab
BP
1284 vlog_set_levels(NULL, VLF_CONSOLE, VLL_WARN);
1285 vlog_set_levels(&VLM_reconnect, VLF_ANY_FACILITY, VLL_WARN);
9852694f
JP
1286
1287 remote = parse_options(argc, argv);
064af421
BP
1288 signal(SIGPIPE, SIG_IGN);
1289 process_init();
bd76d25d 1290 ovsrec_init();
064af421 1291
95440284 1292 daemonize_start();
064af421
BP
1293
1294 retval = unixctl_server_create(NULL, &unixctl);
1295 if (retval) {
4d12270a 1296 exit(EXIT_FAILURE);
064af421
BP
1297 }
1298
1299 if (brc_open(&brc_sock)) {
279c9e03
BP
1300 VLOG_FATAL("could not open brcompat socket. Check "
1301 "\"brcompat\" kernel module.");
064af421
BP
1302 }
1303
1304 if (prune_timeout) {
cceb11f5
BP
1305 int error;
1306
1307 error = nl_sock_create(NETLINK_ROUTE, &rtnl_sock);
1308 if (error) {
279c9e03
BP
1309 VLOG_FATAL("could not create rtnetlink socket (%s)",
1310 strerror(error));
cceb11f5
BP
1311 }
1312
1313 error = nl_sock_join_mcgroup(rtnl_sock, RTNLGRP_LINK);
1314 if (error) {
279c9e03
BP
1315 VLOG_FATAL("could not join RTNLGRP_LINK multicast group (%s)",
1316 strerror(error));
064af421
BP
1317 }
1318 }
1319
95440284
BP
1320 daemonize_complete();
1321
ef73f86c 1322 idl = ovsdb_idl_create(remote, &ovsrec_idl_class, true);
064af421
BP
1323
1324 for (;;) {
9852694f 1325 const struct ovsrec_open_vswitch *ovs;
9852694f
JP
1326
1327 ovsdb_idl_run(idl);
1328
064af421 1329 unixctl_server_run(unixctl);
1e86ae6f 1330 brc_recv_update(idl);
5ff22a06 1331
1e86ae6f 1332 ovs = ovsrec_open_vswitch_first(idl);
f3d64521 1333 if (!ovs && ovsdb_idl_has_ever_connected(idl)) {
058fd2a2
BP
1334 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1335 VLOG_WARN_RL(&rl, "%s: database does not contain any Open vSwitch "
1336 "configuration", remote);
058fd2a2 1337 }
8b61709d 1338 netdev_run();
064af421
BP
1339
1340 /* If 'prune_timeout' is non-zero, we actively prune from the
d295e8e9
JP
1341 * configuration of port entries that are no longer valid. We
1342 * use two methods:
064af421
BP
1343 *
1344 * 1) The kernel explicitly notifies us of removed ports
1345 * through the RTNL messages.
1346 *
1347 * 2) We periodically check all ports associated with bridges
1348 * to see if they no longer exist.
1349 */
9c8149dc 1350 if (ovs && prune_timeout) {
1e86ae6f 1351 rtnl_recv_update(idl, ovs);
064af421
BP
1352 nl_sock_wait(rtnl_sock, POLLIN);
1353 poll_timer_wait(prune_timeout);
9852694f 1354 }
9852694f 1355
064af421
BP
1356
1357 nl_sock_wait(brc_sock, POLLIN);
9852694f 1358 ovsdb_idl_wait(idl);
064af421 1359 unixctl_server_wait(unixctl);
8b61709d 1360 netdev_wait();
064af421
BP
1361 poll_block();
1362 }
1363
9852694f
JP
1364 ovsdb_idl_destroy(idl);
1365
064af421
BP
1366 return 0;
1367}
1368
3c303e5f
BP
1369static void
1370validate_appctl_command(void)
1371{
1372 const char *p;
1373 int n;
1374
1375 n = 0;
1376 for (p = strchr(appctl_command, '%'); p; p = strchr(p + 2, '%')) {
1377 if (p[1] == '%') {
1378 /* Nothing to do. */
1379 } else if (p[1] == 's') {
1380 n++;
1381 } else {
279c9e03 1382 VLOG_FATAL("only '%%s' and '%%%%' allowed in --appctl-command");
3c303e5f
BP
1383 }
1384 }
1385 if (n != 1) {
279c9e03 1386 VLOG_FATAL("'%%s' must appear exactly once in --appctl-command");
3c303e5f
BP
1387 }
1388}
1389
9852694f 1390static const char *
064af421
BP
1391parse_options(int argc, char *argv[])
1392{
1393 enum {
064af421 1394 OPT_PRUNE_TIMEOUT,
3c303e5f 1395 OPT_APPCTL_COMMAND,
064af421 1396 VLOG_OPTION_ENUMS,
8274ae95
BP
1397 LEAK_CHECKER_OPTION_ENUMS,
1398 DAEMON_OPTION_ENUMS
064af421
BP
1399 };
1400 static struct option long_options[] = {
e3c17733
BP
1401 {"help", no_argument, NULL, 'h'},
1402 {"version", no_argument, NULL, 'V'},
1403 {"prune-timeout", required_argument, NULL, OPT_PRUNE_TIMEOUT},
1404 {"appctl-command", required_argument, NULL, OPT_APPCTL_COMMAND},
064af421
BP
1405 DAEMON_LONG_OPTIONS,
1406 VLOG_LONG_OPTIONS,
1407 LEAK_CHECKER_LONG_OPTIONS,
e3c17733 1408 {NULL, 0, NULL, 0},
064af421
BP
1409 };
1410 char *short_options = long_options_to_short_options(long_options);
064af421 1411
b43c6fe2 1412 appctl_command = xasprintf("%s/ovs-appctl %%s", ovs_bindir());
064af421
BP
1413 for (;;) {
1414 int c;
1415
1416 c = getopt_long(argc, argv, short_options, long_options, NULL);
1417 if (c == -1) {
1418 break;
1419 }
1420
1421 switch (c) {
1422 case 'H':
1423 case 'h':
1424 usage();
1425
1426 case 'V':
1427 OVS_PRINT_VERSION(0, 0);
1428 exit(EXIT_SUCCESS);
1429
064af421
BP
1430 case OPT_PRUNE_TIMEOUT:
1431 prune_timeout = atoi(optarg) * 1000;
1432 break;
1433
3c303e5f
BP
1434 case OPT_APPCTL_COMMAND:
1435 appctl_command = optarg;
064af421
BP
1436 break;
1437
1438 VLOG_OPTION_HANDLERS
1439 DAEMON_OPTION_HANDLERS
1440 LEAK_CHECKER_OPTION_HANDLERS
1441
1442 case '?':
1443 exit(EXIT_FAILURE);
1444
1445 default:
1446 abort();
1447 }
1448 }
1449 free(short_options);
1450
3c303e5f
BP
1451 validate_appctl_command();
1452
064af421
BP
1453 argc -= optind;
1454 argv += optind;
1455
1456 if (argc != 1) {
279c9e03
BP
1457 VLOG_FATAL("database socket is non-option argument; "
1458 "use --help for usage");
064af421
BP
1459 }
1460
9852694f 1461 return argv[0];
064af421
BP
1462}
1463
1464static void
1465usage(void)
1466{
1467 printf("%s: bridge compatibility front-end for ovs-vswitchd\n"
1468 "usage: %s [OPTIONS] CONFIG\n"
1469 "CONFIG is the configuration file used by ovs-vswitchd.\n",
1470 program_name, program_name);
1471 printf("\nConfiguration options:\n"
3c303e5f 1472 " --appctl-command=COMMAND shell command to run ovs-appctl\n"
064af421 1473 " --prune-timeout=SECS wait at most SECS before pruning ports\n"
064af421
BP
1474 );
1475 daemon_usage();
1476 vlog_usage();
1477 printf("\nOther options:\n"
1478 " -h, --help display this help message\n"
1479 " -V, --version display version information\n");
1480 leak_checker_usage();
3c303e5f 1481 printf("\nThe default appctl command is:\n%s\n", appctl_command);
064af421
BP
1482 exit(EXIT_SUCCESS);
1483}