]>
Commit | Line | Data |
---|---|---|
db5ce514 | 1 | /* Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks |
064af421 | 2 | * |
a14bc59f BP |
3 | * Licensed under the Apache License, Version 2.0 (the "License"); |
4 | * you may not use this file except in compliance with the License. | |
5 | * You may obtain a copy of the License at: | |
064af421 | 6 | * |
a14bc59f | 7 | * http://www.apache.org/licenses/LICENSE-2.0 |
064af421 | 8 | * |
a14bc59f BP |
9 | * Unless required by applicable law or agreed to in writing, software |
10 | * distributed under the License is distributed on an "AS IS" BASIS, | |
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 | * See the License for the specific language governing permissions and | |
13 | * limitations under the License. | |
064af421 BP |
14 | */ |
15 | ||
16 | #include <config.h> | |
17 | ||
3c303e5f | 18 | #include <asm/param.h> |
064af421 BP |
19 | #include <assert.h> |
20 | #include <errno.h> | |
21 | #include <getopt.h> | |
22 | #include <inttypes.h> | |
23 | #include <limits.h> | |
24 | #include <net/if.h> | |
25 | #include <linux/genetlink.h> | |
26 | #include <linux/rtnetlink.h> | |
27 | #include <signal.h> | |
28 | #include <stdlib.h> | |
29 | #include <string.h> | |
30 | #include <sys/types.h> | |
31 | #include <sys/stat.h> | |
3c303e5f | 32 | #include <time.h> |
064af421 BP |
33 | #include <fcntl.h> |
34 | #include <unistd.h> | |
35 | ||
064af421 BP |
36 | #include "command-line.h" |
37 | #include "coverage.h" | |
38 | #include "daemon.h" | |
39 | #include "dirs.h" | |
3c303e5f | 40 | #include "dynamic-string.h" |
064af421 | 41 | #include "fatal-signal.h" |
1e86ae6f | 42 | #include "json.h" |
064af421 BP |
43 | #include "leak-checker.h" |
44 | #include "netdev.h" | |
45 | #include "netlink.h" | |
2fe27d5a | 46 | #include "netlink-socket.h" |
064af421 BP |
47 | #include "ofpbuf.h" |
48 | #include "openvswitch/brcompat-netlink.h" | |
9852694f | 49 | #include "ovsdb-idl.h" |
3c303e5f | 50 | #include "packets.h" |
064af421 BP |
51 | #include "poll-loop.h" |
52 | #include "process.h" | |
53 | #include "signals.h" | |
96ca8c29 | 54 | #include "sset.h" |
064af421 BP |
55 | #include "timeval.h" |
56 | #include "unixctl.h" | |
57 | #include "util.h" | |
5136ce49 | 58 | #include "vlog.h" |
9852694f | 59 | #include "vswitchd/vswitch-idl.h" |
064af421 | 60 | |
d98e6007 | 61 | VLOG_DEFINE_THIS_MODULE(brcompatd); |
064af421 BP |
62 | |
63 | ||
64 | /* xxx Just hangs if datapath is rmmod/insmod. Learn to reconnect? */ | |
65 | ||
66 | /* Actions to modify bridge compatibility configuration. */ | |
67 | enum bmc_action { | |
68 | BMC_ADD_DP, | |
69 | BMC_DEL_DP, | |
70 | BMC_ADD_PORT, | |
71 | BMC_DEL_PORT | |
72 | }; | |
73 | ||
9852694f | 74 | static const char *parse_options(int argc, char *argv[]); |
064af421 BP |
75 | static void usage(void) NO_RETURN; |
76 | ||
77 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 60); | |
78 | ||
d295e8e9 | 79 | /* Maximum number of milliseconds to wait before pruning port entries that |
064af421 BP |
80 | * no longer exist. If set to zero, ports are never pruned. */ |
81 | static int prune_timeout = 5000; | |
82 | ||
3c303e5f BP |
83 | /* Shell command to execute (via popen()) to send a control command to the |
84 | * running ovs-vswitchd process. The string must contain one instance of %s, | |
85 | * which is replaced by the control command. */ | |
86 | static char *appctl_command; | |
064af421 BP |
87 | |
88 | /* Netlink socket to listen for interface changes. */ | |
89 | static struct nl_sock *rtnl_sock; | |
90 | ||
91 | /* Netlink socket to bridge compatibility kernel module. */ | |
92 | static struct nl_sock *brc_sock; | |
93 | ||
94 | /* The Generic Netlink family number used for bridge compatibility. */ | |
95 | static int brc_family; | |
96 | ||
97 | static const struct nl_policy brc_multicast_policy[] = { | |
98 | [BRC_GENL_A_MC_GROUP] = {.type = NL_A_U32 } | |
99 | }; | |
100 | ||
101 | static const struct nl_policy rtnlgrp_link_policy[] = { | |
102 | [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false }, | |
103 | [IFLA_MASTER] = { .type = NL_A_U32, .optional = true }, | |
104 | }; | |
105 | ||
106 | static int | |
107 | lookup_brc_multicast_group(int *multicast_group) | |
108 | { | |
109 | struct nl_sock *sock; | |
110 | struct ofpbuf request, *reply; | |
111 | struct nlattr *attrs[ARRAY_SIZE(brc_multicast_policy)]; | |
112 | int retval; | |
113 | ||
cceb11f5 | 114 | retval = nl_sock_create(NETLINK_GENERIC, &sock); |
064af421 BP |
115 | if (retval) { |
116 | return retval; | |
117 | } | |
118 | ofpbuf_init(&request, 0); | |
69123704 | 119 | nl_msg_put_genlmsghdr(&request, 0, brc_family, |
064af421 BP |
120 | NLM_F_REQUEST, BRC_GENL_C_QUERY_MC, 1); |
121 | retval = nl_sock_transact(sock, &request, &reply); | |
122 | ofpbuf_uninit(&request); | |
123 | if (retval) { | |
124 | nl_sock_destroy(sock); | |
125 | return retval; | |
126 | } | |
127 | if (!nl_policy_parse(reply, NLMSG_HDRLEN + GENL_HDRLEN, | |
128 | brc_multicast_policy, attrs, | |
129 | ARRAY_SIZE(brc_multicast_policy))) { | |
130 | nl_sock_destroy(sock); | |
131 | ofpbuf_delete(reply); | |
132 | return EPROTO; | |
133 | } | |
134 | *multicast_group = nl_attr_get_u32(attrs[BRC_GENL_A_MC_GROUP]); | |
135 | nl_sock_destroy(sock); | |
136 | ofpbuf_delete(reply); | |
137 | ||
138 | return 0; | |
139 | } | |
140 | ||
141 | /* Opens a socket for brcompat notifications. Returns 0 if successful, | |
142 | * otherwise a positive errno value. */ | |
143 | static int | |
144 | brc_open(struct nl_sock **sock) | |
145 | { | |
146 | int multicast_group = 0; | |
147 | int retval; | |
148 | ||
149 | retval = nl_lookup_genl_family(BRC_GENL_FAMILY_NAME, &brc_family); | |
150 | if (retval) { | |
151 | return retval; | |
152 | } | |
153 | ||
154 | retval = lookup_brc_multicast_group(&multicast_group); | |
155 | if (retval) { | |
156 | return retval; | |
157 | } | |
158 | ||
cceb11f5 | 159 | retval = nl_sock_create(NETLINK_GENERIC, sock); |
064af421 BP |
160 | if (retval) { |
161 | return retval; | |
162 | } | |
163 | ||
cceb11f5 BP |
164 | retval = nl_sock_join_mcgroup(*sock, multicast_group); |
165 | if (retval) { | |
166 | nl_sock_destroy(*sock); | |
167 | *sock = NULL; | |
168 | } | |
169 | return retval; | |
064af421 BP |
170 | } |
171 | ||
172 | static const struct nl_policy brc_dp_policy[] = { | |
173 | [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING }, | |
174 | }; | |
175 | ||
9852694f JP |
176 | static struct ovsrec_bridge * |
177 | find_bridge(const struct ovsrec_open_vswitch *ovs, const char *br_name) | |
064af421 | 178 | { |
9852694f JP |
179 | size_t i; |
180 | ||
181 | for (i = 0; i < ovs->n_bridges; i++) { | |
182 | if (!strcmp(br_name, ovs->bridges[i]->name)) { | |
183 | return ovs->bridges[i]; | |
184 | } | |
185 | } | |
186 | ||
187 | return NULL; | |
064af421 BP |
188 | } |
189 | ||
3c303e5f BP |
190 | static int |
191 | execute_appctl_command(const char *unixctl_command, char **output) | |
192 | { | |
193 | char *stdout_log, *stderr_log; | |
194 | int error, status; | |
195 | char *argv[5]; | |
196 | ||
197 | argv[0] = "/bin/sh"; | |
198 | argv[1] = "-c"; | |
199 | argv[2] = xasprintf(appctl_command, unixctl_command); | |
200 | argv[3] = NULL; | |
201 | ||
202 | /* Run process and log status. */ | |
203 | error = process_run_capture(argv, &stdout_log, &stderr_log, &status); | |
204 | if (error) { | |
205 | VLOG_ERR("failed to execute %s command via ovs-appctl: %s", | |
206 | unixctl_command, strerror(error)); | |
207 | } else if (status) { | |
208 | char *msg = process_status_msg(status); | |
209 | VLOG_ERR("ovs-appctl exited with error (%s)", msg); | |
210 | free(msg); | |
211 | error = ECHILD; | |
212 | } | |
213 | ||
214 | /* Deal with stdout_log. */ | |
215 | if (output) { | |
216 | *output = stdout_log; | |
217 | } else { | |
218 | free(stdout_log); | |
219 | } | |
220 | ||
221 | /* Deal with stderr_log */ | |
222 | if (stderr_log && *stderr_log) { | |
223 | VLOG_INFO("ovs-appctl wrote to stderr:\n%s", stderr_log); | |
224 | } | |
225 | free(stderr_log); | |
226 | ||
227 | free(argv[2]); | |
228 | ||
229 | return error; | |
230 | } | |
231 | ||
14551cea | 232 | static void |
96ca8c29 | 233 | do_get_bridge_parts(const struct ovsrec_bridge *br, struct sset *parts, |
9852694f | 234 | int vlan, bool break_down_bonds) |
14551cea | 235 | { |
9852694f | 236 | size_t i, j; |
14551cea | 237 | |
9852694f JP |
238 | for (i = 0; i < br->n_ports; i++) { |
239 | const struct ovsrec_port *port = br->ports[i]; | |
240 | ||
c735214e | 241 | if (vlan >= 0) { |
9852694f | 242 | int port_vlan = port->n_tag ? *port->tag : 0; |
c735214e BP |
243 | if (vlan != port_vlan) { |
244 | continue; | |
245 | } | |
246 | } | |
9852694f JP |
247 | if (break_down_bonds) { |
248 | for (j = 0; j < port->n_interfaces; j++) { | |
249 | const struct ovsrec_interface *iface = port->interfaces[j]; | |
96ca8c29 | 250 | sset_add(parts, iface->name); |
9852694f | 251 | } |
14551cea | 252 | } else { |
96ca8c29 | 253 | sset_add(parts, port->name); |
14551cea BP |
254 | } |
255 | } | |
14551cea BP |
256 | } |
257 | ||
db322751 BP |
258 | /* Add all the interfaces for 'bridge' to 'ifaces', breaking bonded interfaces |
259 | * down into their constituent parts. | |
260 | * | |
261 | * If 'vlan' < 0, all interfaces on 'bridge' are reported. If 'vlan' == 0, | |
262 | * then only interfaces for trunk ports or ports with implicit VLAN 0 are | |
263 | * reported. If 'vlan' > 0, only interfaces with implicit VLAN 'vlan' are | |
264 | * reported. */ | |
265 | static void | |
96ca8c29 | 266 | get_bridge_ifaces(const struct ovsrec_bridge *br, struct sset *ifaces, |
9852694f | 267 | int vlan) |
db322751 | 268 | { |
9852694f | 269 | do_get_bridge_parts(br, ifaces, vlan, true); |
db322751 BP |
270 | } |
271 | ||
272 | /* Add all the ports for 'bridge' to 'ports'. Bonded ports are reported under | |
273 | * the bond name, not broken down into their constituent interfaces. | |
274 | * | |
275 | * If 'vlan' < 0, all ports on 'bridge' are reported. If 'vlan' == 0, then | |
276 | * only trunk ports or ports with implicit VLAN 0 are reported. If 'vlan' > 0, | |
277 | * only port with implicit VLAN 'vlan' are reported. */ | |
278 | static void | |
96ca8c29 | 279 | get_bridge_ports(const struct ovsrec_bridge *br, struct sset *ports, |
9852694f | 280 | int vlan) |
db322751 | 281 | { |
9852694f | 282 | do_get_bridge_parts(br, ports, vlan, false); |
db322751 BP |
283 | } |
284 | ||
9852694f JP |
285 | static struct ovsdb_idl_txn * |
286 | txn_from_openvswitch(const struct ovsrec_open_vswitch *ovs) | |
287 | { | |
288 | return ovsdb_idl_txn_get(&ovs->header_); | |
289 | } | |
290 | ||
291 | static bool | |
292 | port_is_fake_bridge(const struct ovsrec_port *port) | |
293 | { | |
294 | return (port->fake_bridge | |
295 | && port->tag | |
296 | && *port->tag >= 1 && *port->tag <= 4095); | |
297 | } | |
298 | ||
299 | static void | |
300 | ovs_insert_bridge(const struct ovsrec_open_vswitch *ovs, | |
301 | struct ovsrec_bridge *bridge) | |
302 | { | |
303 | struct ovsrec_bridge **bridges; | |
d295e8e9 | 304 | size_t i; |
9852694f JP |
305 | |
306 | bridges = xmalloc(sizeof *ovs->bridges * (ovs->n_bridges + 1)); | |
307 | for (i = 0; i < ovs->n_bridges; i++) { | |
308 | bridges[i] = ovs->bridges[i]; | |
309 | } | |
310 | bridges[ovs->n_bridges] = bridge; | |
311 | ovsrec_open_vswitch_set_bridges(ovs, bridges, ovs->n_bridges + 1); | |
312 | free(bridges); | |
d295e8e9 | 313 | } |
064af421 | 314 | |
1e86ae6f BP |
315 | static struct json * |
316 | where_uuid_equals(const struct uuid *uuid) | |
317 | { | |
318 | return | |
319 | json_array_create_1( | |
320 | json_array_create_3( | |
321 | json_string_create("_uuid"), | |
322 | json_string_create("=="), | |
323 | json_array_create_2( | |
324 | json_string_create("uuid"), | |
325 | json_string_create_nocopy( | |
326 | xasprintf(UUID_FMT, UUID_ARGS(uuid)))))); | |
327 | } | |
328 | ||
329 | /* Commits 'txn'. If 'wait_for_reload' is true, also waits for Open vSwitch to | |
330 | reload the configuration before returning. | |
331 | ||
332 | Returns EAGAIN if the caller should try the operation again, 0 on success, | |
333 | otherwise a positive errno value. */ | |
334 | static int | |
335 | commit_txn(struct ovsdb_idl_txn *txn, bool wait_for_reload) | |
336 | { | |
337 | struct ovsdb_idl *idl = ovsdb_idl_txn_get_idl (txn); | |
338 | enum ovsdb_idl_txn_status status; | |
339 | int64_t next_cfg = 0; | |
340 | ||
341 | if (wait_for_reload) { | |
342 | const struct ovsrec_open_vswitch *ovs = ovsrec_open_vswitch_first(idl); | |
343 | struct json *where = where_uuid_equals(&ovs->header_.uuid); | |
344 | ovsdb_idl_txn_increment(txn, "Open_vSwitch", "next_cfg", where); | |
345 | json_destroy(where); | |
346 | } | |
347 | status = ovsdb_idl_txn_commit_block(txn); | |
348 | if (wait_for_reload && status == TXN_SUCCESS) { | |
349 | next_cfg = ovsdb_idl_txn_get_increment_new_value(txn); | |
350 | } | |
351 | ovsdb_idl_txn_destroy(txn); | |
352 | ||
353 | switch (status) { | |
354 | case TXN_INCOMPLETE: | |
355 | NOT_REACHED(); | |
356 | ||
357 | case TXN_ABORTED: | |
358 | VLOG_ERR_RL(&rl, "OVSDB transaction unexpectedly aborted"); | |
359 | return ECONNABORTED; | |
360 | ||
361 | case TXN_UNCHANGED: | |
362 | return 0; | |
363 | ||
364 | case TXN_SUCCESS: | |
365 | if (wait_for_reload) { | |
366 | for (;;) { | |
367 | /* We can't use 'ovs' any longer because ovsdb_idl_run() can | |
368 | * destroy it. */ | |
369 | const struct ovsrec_open_vswitch *ovs2; | |
370 | ||
371 | ovsdb_idl_run(idl); | |
372 | OVSREC_OPEN_VSWITCH_FOR_EACH (ovs2, idl) { | |
373 | if (ovs2->cur_cfg >= next_cfg) { | |
374 | goto done; | |
375 | } | |
376 | } | |
377 | ovsdb_idl_wait(idl); | |
378 | poll_block(); | |
379 | } | |
380 | done: ; | |
381 | } | |
382 | return 0; | |
383 | ||
384 | case TXN_TRY_AGAIN: | |
385 | VLOG_ERR_RL(&rl, "OVSDB transaction needs retry"); | |
386 | return EAGAIN; | |
387 | ||
388 | case TXN_ERROR: | |
389 | VLOG_ERR_RL(&rl, "OVSDB transaction failed: %s", | |
390 | ovsdb_idl_txn_get_error(txn)); | |
391 | return EBUSY; | |
392 | ||
393 | default: | |
394 | NOT_REACHED(); | |
395 | } | |
396 | } | |
397 | ||
064af421 | 398 | static int |
1e86ae6f BP |
399 | add_bridge(struct ovsdb_idl *idl, const struct ovsrec_open_vswitch *ovs, |
400 | const char *br_name) | |
064af421 | 401 | { |
9852694f JP |
402 | struct ovsrec_bridge *br; |
403 | struct ovsrec_port *port; | |
404 | struct ovsrec_interface *iface; | |
1e86ae6f | 405 | struct ovsdb_idl_txn *txn; |
9852694f JP |
406 | |
407 | if (find_bridge(ovs, br_name)) { | |
064af421 BP |
408 | VLOG_WARN("addbr %s: bridge %s exists", br_name, br_name); |
409 | return EEXIST; | |
410 | } else if (netdev_exists(br_name)) { | |
9852694f JP |
411 | size_t i; |
412 | ||
413 | for (i = 0; i < ovs->n_bridges; i++) { | |
414 | size_t j; | |
415 | struct ovsrec_bridge *br_cfg = ovs->bridges[i]; | |
416 | ||
417 | for (j = 0; j < br_cfg->n_ports; j++) { | |
418 | if (port_is_fake_bridge(br_cfg->ports[j])) { | |
419 | VLOG_WARN("addbr %s: %s exists as a fake bridge", | |
420 | br_name, br_name); | |
421 | return 0; | |
422 | } | |
423 | } | |
064af421 | 424 | } |
9852694f JP |
425 | |
426 | VLOG_WARN("addbr %s: cannot create bridge %s because a network " | |
427 | "device named %s already exists", | |
428 | br_name, br_name, br_name); | |
429 | return EEXIST; | |
064af421 BP |
430 | } |
431 | ||
1e86ae6f BP |
432 | txn = ovsdb_idl_txn_create(idl); |
433 | ||
e1c0e2d1 | 434 | ovsdb_idl_txn_add_comment(txn, "ovs-brcompatd: addbr %s", br_name); |
b959290b | 435 | |
9852694f JP |
436 | iface = ovsrec_interface_insert(txn_from_openvswitch(ovs)); |
437 | ovsrec_interface_set_name(iface, br_name); | |
438 | ||
439 | port = ovsrec_port_insert(txn_from_openvswitch(ovs)); | |
440 | ovsrec_port_set_name(port, br_name); | |
441 | ovsrec_port_set_interfaces(port, &iface, 1); | |
d295e8e9 | 442 | |
9852694f JP |
443 | br = ovsrec_bridge_insert(txn_from_openvswitch(ovs)); |
444 | ovsrec_bridge_set_name(br, br_name); | |
445 | ovsrec_bridge_set_ports(br, &port, 1); | |
d295e8e9 | 446 | |
9852694f JP |
447 | ovs_insert_bridge(ovs, br); |
448 | ||
1e86ae6f | 449 | return commit_txn(txn, true); |
064af421 BP |
450 | } |
451 | ||
9852694f | 452 | static void |
d295e8e9 | 453 | add_port(const struct ovsrec_open_vswitch *ovs, |
9852694f JP |
454 | const struct ovsrec_bridge *br, const char *port_name) |
455 | { | |
456 | struct ovsrec_interface *iface; | |
457 | struct ovsrec_port *port; | |
458 | struct ovsrec_port **ports; | |
459 | size_t i; | |
460 | ||
461 | /* xxx Check conflicts? */ | |
462 | iface = ovsrec_interface_insert(txn_from_openvswitch(ovs)); | |
463 | ovsrec_interface_set_name(iface, port_name); | |
464 | ||
465 | port = ovsrec_port_insert(txn_from_openvswitch(ovs)); | |
466 | ovsrec_port_set_name(port, port_name); | |
467 | ovsrec_port_set_interfaces(port, &iface, 1); | |
468 | ||
469 | ports = xmalloc(sizeof *br->ports * (br->n_ports + 1)); | |
470 | for (i = 0; i < br->n_ports; i++) { | |
471 | ports[i] = br->ports[i]; | |
472 | } | |
473 | ports[br->n_ports] = port; | |
474 | ovsrec_bridge_set_ports(br, ports, br->n_ports + 1); | |
475 | free(ports); | |
476 | } | |
477 | ||
4b3a0009 BP |
478 | /* Deletes 'port' from 'br'. |
479 | * | |
480 | * After calling this function, 'port' must not be referenced again. */ | |
9852694f | 481 | static void |
4b3a0009 | 482 | del_port(const struct ovsrec_bridge *br, const struct ovsrec_port *port) |
9852694f | 483 | { |
4b3a0009 BP |
484 | struct ovsrec_port **ports; |
485 | size_t i, n; | |
486 | ||
487 | /* Remove 'port' from the bridge's list of ports. */ | |
488 | ports = xmalloc(sizeof *br->ports * br->n_ports); | |
489 | for (i = n = 0; i < br->n_ports; i++) { | |
490 | if (br->ports[i] != port) { | |
491 | ports[n++] = br->ports[i]; | |
492 | } | |
493 | } | |
494 | ovsrec_bridge_set_ports(br, ports, n); | |
495 | free(ports); | |
4b3a0009 BP |
496 | } |
497 | ||
498 | /* Delete 'iface' from 'port' (which must be within 'br'). If 'iface' was | |
499 | * 'port''s only interface, delete 'port' from 'br' also. | |
500 | * | |
501 | * After calling this function, 'iface' must not be referenced again. */ | |
502 | static void | |
503 | del_interface(const struct ovsrec_bridge *br, | |
504 | const struct ovsrec_port *port, | |
505 | const struct ovsrec_interface *iface) | |
506 | { | |
507 | if (port->n_interfaces == 1) { | |
508 | del_port(br, port); | |
509 | } else { | |
510 | struct ovsrec_interface **ifaces; | |
511 | size_t i, n; | |
512 | ||
513 | ifaces = xmalloc(sizeof *port->interfaces * port->n_interfaces); | |
514 | for (i = n = 0; i < port->n_interfaces; i++) { | |
515 | if (port->interfaces[i] != iface) { | |
516 | ifaces[n++] = port->interfaces[i]; | |
517 | } | |
518 | } | |
519 | ovsrec_port_set_interfaces(port, ifaces, n); | |
520 | free(ifaces); | |
4b3a0009 BP |
521 | } |
522 | } | |
523 | ||
524 | /* Find and return a port within 'br' named 'port_name'. */ | |
525 | static const struct ovsrec_port * | |
526 | find_port(const struct ovsrec_bridge *br, const char *port_name) | |
527 | { | |
528 | size_t i; | |
9852694f JP |
529 | |
530 | for (i = 0; i < br->n_ports; i++) { | |
531 | struct ovsrec_port *port = br->ports[i]; | |
532 | if (!strcmp(port_name, port->name)) { | |
4b3a0009 | 533 | return port; |
9852694f JP |
534 | } |
535 | } | |
4b3a0009 BP |
536 | return NULL; |
537 | } | |
9852694f | 538 | |
4b3a0009 BP |
539 | /* Find and return an interface within 'br' named 'iface_name'. */ |
540 | static const struct ovsrec_interface * | |
541 | find_interface(const struct ovsrec_bridge *br, const char *iface_name, | |
542 | struct ovsrec_port **portp) | |
543 | { | |
544 | size_t i; | |
9852694f | 545 | |
4b3a0009 BP |
546 | for (i = 0; i < br->n_ports; i++) { |
547 | struct ovsrec_port *port = br->ports[i]; | |
548 | size_t j; | |
549 | ||
550 | for (j = 0; j < port->n_interfaces; j++) { | |
551 | struct ovsrec_interface *iface = port->interfaces[j]; | |
552 | if (!strcmp(iface->name, iface_name)) { | |
553 | *portp = port; | |
554 | return iface; | |
9852694f JP |
555 | } |
556 | } | |
9852694f | 557 | } |
4b3a0009 BP |
558 | |
559 | *portp = NULL; | |
560 | return NULL; | |
9852694f JP |
561 | } |
562 | ||
1e86ae6f BP |
563 | static int |
564 | del_bridge(struct ovsdb_idl *idl, | |
565 | const struct ovsrec_open_vswitch *ovs, const char *br_name) | |
064af421 | 566 | { |
9852694f JP |
567 | struct ovsrec_bridge *br = find_bridge(ovs, br_name); |
568 | struct ovsrec_bridge **bridges; | |
1e86ae6f | 569 | struct ovsdb_idl_txn *txn; |
9852694f JP |
570 | size_t i, n; |
571 | ||
572 | if (!br) { | |
064af421 BP |
573 | VLOG_WARN("delbr %s: no bridge named %s", br_name, br_name); |
574 | return ENXIO; | |
575 | } | |
576 | ||
1e86ae6f BP |
577 | txn = ovsdb_idl_txn_create(idl); |
578 | ||
e1c0e2d1 | 579 | ovsdb_idl_txn_add_comment(txn, "ovs-brcompatd: delbr %s", br_name); |
b959290b | 580 | |
4b3a0009 | 581 | /* Remove 'br' from the vswitch's list of bridges. */ |
9852694f JP |
582 | bridges = xmalloc(sizeof *ovs->bridges * ovs->n_bridges); |
583 | for (i = n = 0; i < ovs->n_bridges; i++) { | |
584 | if (ovs->bridges[i] != br) { | |
585 | bridges[n++] = ovs->bridges[i]; | |
586 | } | |
587 | } | |
588 | ovsrec_open_vswitch_set_bridges(ovs, bridges, n); | |
589 | free(bridges); | |
590 | ||
1e86ae6f | 591 | return commit_txn(txn, true); |
064af421 BP |
592 | } |
593 | ||
594 | static int | |
595 | parse_command(struct ofpbuf *buffer, uint32_t *seq, const char **br_name, | |
3c303e5f | 596 | const char **port_name, uint64_t *count, uint64_t *skip) |
064af421 BP |
597 | { |
598 | static const struct nl_policy policy[] = { | |
7f42c1d7 | 599 | [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING, .optional = true }, |
064af421 | 600 | [BRC_GENL_A_PORT_NAME] = { .type = NL_A_STRING, .optional = true }, |
3c303e5f BP |
601 | [BRC_GENL_A_FDB_COUNT] = { .type = NL_A_U64, .optional = true }, |
602 | [BRC_GENL_A_FDB_SKIP] = { .type = NL_A_U64, .optional = true }, | |
064af421 BP |
603 | }; |
604 | struct nlattr *attrs[ARRAY_SIZE(policy)]; | |
605 | ||
606 | if (!nl_policy_parse(buffer, NLMSG_HDRLEN + GENL_HDRLEN, policy, | |
607 | attrs, ARRAY_SIZE(policy)) | |
7f42c1d7 | 608 | || (br_name && !attrs[BRC_GENL_A_DP_NAME]) |
3c303e5f BP |
609 | || (port_name && !attrs[BRC_GENL_A_PORT_NAME]) |
610 | || (count && !attrs[BRC_GENL_A_FDB_COUNT]) | |
611 | || (skip && !attrs[BRC_GENL_A_FDB_SKIP])) { | |
064af421 BP |
612 | return EINVAL; |
613 | } | |
614 | ||
615 | *seq = ((struct nlmsghdr *) buffer->data)->nlmsg_seq; | |
7f42c1d7 BP |
616 | if (br_name) { |
617 | *br_name = nl_attr_get_string(attrs[BRC_GENL_A_DP_NAME]); | |
618 | } | |
064af421 BP |
619 | if (port_name) { |
620 | *port_name = nl_attr_get_string(attrs[BRC_GENL_A_PORT_NAME]); | |
621 | } | |
3c303e5f BP |
622 | if (count) { |
623 | *count = nl_attr_get_u64(attrs[BRC_GENL_A_FDB_COUNT]); | |
624 | } | |
625 | if (skip) { | |
626 | *skip = nl_attr_get_u64(attrs[BRC_GENL_A_FDB_SKIP]); | |
627 | } | |
064af421 BP |
628 | return 0; |
629 | } | |
630 | ||
41e754bc BP |
631 | /* Composes and returns a reply to a request made by the datapath with Netlink |
632 | * sequence number 'seq' and error code 'error'. The caller may add additional | |
633 | * attributes to the message, then it may send it with send_reply(). */ | |
634 | static struct ofpbuf * | |
635 | compose_reply(uint32_t seq, int error) | |
064af421 | 636 | { |
41e754bc | 637 | struct ofpbuf *reply = ofpbuf_new(4096); |
69123704 | 638 | nl_msg_put_genlmsghdr(reply, 32, brc_family, NLM_F_REQUEST, |
064af421 | 639 | BRC_GENL_C_DP_RESULT, 1); |
41e754bc BP |
640 | ((struct nlmsghdr *) reply->data)->nlmsg_seq = seq; |
641 | nl_msg_put_u32(reply, BRC_GENL_A_ERR_CODE, error); | |
642 | return reply; | |
643 | } | |
064af421 | 644 | |
41e754bc BP |
645 | /* Sends 'reply' to the datapath and frees it. */ |
646 | static void | |
647 | send_reply(struct ofpbuf *reply) | |
648 | { | |
649 | int retval = nl_sock_send(brc_sock, reply, false); | |
064af421 BP |
650 | if (retval) { |
651 | VLOG_WARN_RL(&rl, "replying to brcompat request: %s", | |
652 | strerror(retval)); | |
653 | } | |
41e754bc BP |
654 | ofpbuf_delete(reply); |
655 | } | |
656 | ||
657 | /* Composes and sends a reply to a request made by the datapath with Netlink | |
658 | * sequence number 'seq' and error code 'error'. */ | |
659 | static void | |
660 | send_simple_reply(uint32_t seq, int error) | |
661 | { | |
662 | send_reply(compose_reply(seq, error)); | |
064af421 BP |
663 | } |
664 | ||
665 | static int | |
1e86ae6f | 666 | handle_bridge_cmd(struct ovsdb_idl *idl, |
d295e8e9 | 667 | const struct ovsrec_open_vswitch *ovs, |
9852694f | 668 | struct ofpbuf *buffer, bool add) |
064af421 BP |
669 | { |
670 | const char *br_name; | |
671 | uint32_t seq; | |
672 | int error; | |
673 | ||
3c303e5f | 674 | error = parse_command(buffer, &seq, &br_name, NULL, NULL, NULL); |
064af421 | 675 | if (!error) { |
1e86ae6f BP |
676 | int retval; |
677 | ||
678 | do { | |
679 | retval = (add ? add_bridge : del_bridge)(idl, ovs, br_name); | |
680 | VLOG_INFO_RL(&rl, "%sbr %s: %s", | |
681 | add ? "add" : "del", br_name, strerror(retval)); | |
682 | } while (retval == EAGAIN); | |
683 | ||
41e754bc | 684 | send_simple_reply(seq, error); |
064af421 BP |
685 | } |
686 | return error; | |
687 | } | |
688 | ||
689 | static const struct nl_policy brc_port_policy[] = { | |
690 | [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING }, | |
691 | [BRC_GENL_A_PORT_NAME] = { .type = NL_A_STRING }, | |
692 | }; | |
693 | ||
064af421 | 694 | static int |
1e86ae6f BP |
695 | handle_port_cmd(struct ovsdb_idl *idl, |
696 | const struct ovsrec_open_vswitch *ovs, | |
9852694f | 697 | struct ofpbuf *buffer, bool add) |
064af421 BP |
698 | { |
699 | const char *cmd_name = add ? "add-if" : "del-if"; | |
700 | const char *br_name, *port_name; | |
701 | uint32_t seq; | |
702 | int error; | |
703 | ||
3c303e5f | 704 | error = parse_command(buffer, &seq, &br_name, &port_name, NULL, NULL); |
064af421 | 705 | if (!error) { |
9852694f JP |
706 | struct ovsrec_bridge *br = find_bridge(ovs, br_name); |
707 | ||
708 | if (!br) { | |
064af421 BP |
709 | VLOG_WARN("%s %s %s: no bridge named %s", |
710 | cmd_name, br_name, port_name, br_name); | |
711 | error = EINVAL; | |
712 | } else if (!netdev_exists(port_name)) { | |
713 | VLOG_WARN("%s %s %s: no network device named %s", | |
714 | cmd_name, br_name, port_name, port_name); | |
715 | error = EINVAL; | |
716 | } else { | |
1e86ae6f BP |
717 | do { |
718 | struct ovsdb_idl_txn *txn = ovsdb_idl_txn_create(idl); | |
b959290b | 719 | |
1e86ae6f | 720 | if (add) { |
e1c0e2d1 BP |
721 | ovsdb_idl_txn_add_comment(txn, "ovs-brcompatd: add-if %s", |
722 | port_name); | |
1e86ae6f BP |
723 | add_port(ovs, br, port_name); |
724 | } else { | |
4b3a0009 BP |
725 | const struct ovsrec_port *port = find_port(br, port_name); |
726 | if (port) { | |
727 | ovsdb_idl_txn_add_comment(txn, | |
728 | "ovs-brcompatd: del-if %s", | |
729 | port_name); | |
9ad73195 | 730 | del_port(br, port); |
4b3a0009 | 731 | } |
1e86ae6f | 732 | } |
b959290b | 733 | |
1e86ae6f | 734 | error = commit_txn(txn, true); |
e569fae6 | 735 | VLOG_INFO_RL(&rl, "%s %s %s: %s", |
1e86ae6f BP |
736 | cmd_name, br_name, port_name, strerror(error)); |
737 | } while (error == EAGAIN); | |
064af421 | 738 | } |
41e754bc | 739 | send_simple_reply(seq, error); |
064af421 BP |
740 | } |
741 | ||
742 | return error; | |
743 | } | |
744 | ||
9852694f JP |
745 | /* The caller is responsible for freeing '*ovs_name' if the call is |
746 | * successful. */ | |
ae1281cf | 747 | static int |
9852694f JP |
748 | linux_bridge_to_ovs_bridge(const struct ovsrec_open_vswitch *ovs, |
749 | const char *linux_name, | |
750 | const struct ovsrec_bridge **ovs_bridge, | |
751 | int *br_vlan) | |
ae1281cf | 752 | { |
9852694f JP |
753 | *ovs_bridge = find_bridge(ovs, linux_name); |
754 | if (*ovs_bridge) { | |
ae1281cf | 755 | /* Bridge name is the same. We are interested in VLAN 0. */ |
ae1281cf BP |
756 | *br_vlan = 0; |
757 | return 0; | |
758 | } else { | |
9852694f JP |
759 | /* No such Open vSwitch bridge 'linux_name', but there might be an |
760 | * internal port named 'linux_name' on some other bridge | |
ae1281cf | 761 | * 'ovs_bridge'. If so then we are interested in the VLAN assigned to |
9852694f JP |
762 | * port 'linux_name' on the bridge named 'ovs_bridge'. */ |
763 | size_t i, j; | |
764 | ||
765 | for (i = 0; i < ovs->n_bridges; i++) { | |
766 | const struct ovsrec_bridge *br = ovs->bridges[i]; | |
767 | ||
768 | for (j = 0; j < br->n_ports; j++) { | |
769 | const struct ovsrec_port *port = br->ports[j]; | |
770 | ||
771 | if (!strcmp(port->name, linux_name)) { | |
772 | *ovs_bridge = br; | |
773 | *br_vlan = port->n_tag ? *port->tag : -1; | |
774 | return 0; | |
775 | } | |
776 | } | |
ae1281cf | 777 | |
ae1281cf | 778 | } |
9852694f | 779 | return ENODEV; |
ae1281cf BP |
780 | } |
781 | } | |
782 | ||
3c303e5f | 783 | static int |
9852694f JP |
784 | handle_fdb_query_cmd(const struct ovsrec_open_vswitch *ovs, |
785 | struct ofpbuf *buffer) | |
3c303e5f BP |
786 | { |
787 | /* This structure is copied directly from the Linux 2.6.30 header files. | |
788 | * It would be more straightforward to #include <linux/if_bridge.h>, but | |
789 | * the 'port_hi' member was only introduced in Linux 2.6.26 and so systems | |
790 | * with old header files won't have it. */ | |
791 | struct __fdb_entry { | |
792 | __u8 mac_addr[6]; | |
793 | __u8 port_no; | |
794 | __u8 is_local; | |
795 | __u32 ageing_timer_value; | |
796 | __u8 port_hi; | |
797 | __u8 pad0; | |
798 | __u16 unused; | |
799 | }; | |
800 | ||
801 | struct mac { | |
802 | uint8_t addr[6]; | |
803 | }; | |
804 | struct mac *local_macs; | |
805 | int n_local_macs; | |
806 | int i; | |
807 | ||
c735214e BP |
808 | /* Impedance matching between the vswitchd and Linux kernel notions of what |
809 | * a bridge is. The kernel only handles a single VLAN per bridge, but | |
810 | * vswitchd can deal with all the VLANs on a single bridge. We have to | |
811 | * pretend that the former is the case even though the latter is the | |
812 | * implementation. */ | |
9852694f JP |
813 | const char *linux_name; /* Name used by brctl. */ |
814 | const struct ovsrec_bridge *ovs_bridge; /* Bridge used by ovs-vswitchd. */ | |
c735214e | 815 | int br_vlan; /* VLAN tag. */ |
96ca8c29 | 816 | struct sset ifaces; |
c735214e | 817 | |
3c303e5f | 818 | struct ofpbuf query_data; |
96ca8c29 | 819 | const char *iface_name; |
41e754bc | 820 | struct ofpbuf *reply; |
3c303e5f BP |
821 | char *unixctl_command; |
822 | uint64_t count, skip; | |
3c303e5f BP |
823 | char *output; |
824 | char *save_ptr; | |
825 | uint32_t seq; | |
826 | int error; | |
827 | ||
828 | /* Parse the command received from brcompat_mod. */ | |
9852694f | 829 | error = parse_command(buffer, &seq, &linux_name, NULL, &count, &skip); |
3c303e5f BP |
830 | if (error) { |
831 | return error; | |
832 | } | |
833 | ||
c735214e | 834 | /* Figure out vswitchd bridge and VLAN. */ |
d295e8e9 | 835 | error = linux_bridge_to_ovs_bridge(ovs, linux_name, |
9852694f | 836 | &ovs_bridge, &br_vlan); |
ae1281cf BP |
837 | if (error) { |
838 | send_simple_reply(seq, error); | |
839 | return error; | |
c735214e BP |
840 | } |
841 | ||
3c303e5f | 842 | /* Fetch the forwarding database using ovs-appctl. */ |
9852694f | 843 | unixctl_command = xasprintf("fdb/show %s", ovs_bridge->name); |
3c303e5f BP |
844 | error = execute_appctl_command(unixctl_command, &output); |
845 | free(unixctl_command); | |
846 | if (error) { | |
41e754bc | 847 | send_simple_reply(seq, error); |
3c303e5f BP |
848 | return error; |
849 | } | |
850 | ||
851 | /* Fetch the MAC address for each interface on the bridge, so that we can | |
852 | * fill in the is_local field in the response. */ | |
96ca8c29 | 853 | sset_init(&ifaces); |
c735214e | 854 | get_bridge_ifaces(ovs_bridge, &ifaces, br_vlan); |
96ca8c29 | 855 | local_macs = xmalloc(sset_count(&ifaces) * sizeof *local_macs); |
3c303e5f | 856 | n_local_macs = 0; |
96ca8c29 | 857 | SSET_FOR_EACH (iface_name, &ifaces) { |
3c303e5f | 858 | struct mac *mac = &local_macs[n_local_macs]; |
07c318f4 BP |
859 | struct netdev *netdev; |
860 | ||
149f577a | 861 | error = netdev_open_default(iface_name, &netdev); |
4869f1b1 | 862 | if (!error) { |
07c318f4 BP |
863 | if (!netdev_get_etheraddr(netdev, mac->addr)) { |
864 | n_local_macs++; | |
865 | } | |
866 | netdev_close(netdev); | |
3c303e5f BP |
867 | } |
868 | } | |
96ca8c29 | 869 | sset_destroy(&ifaces); |
3c303e5f BP |
870 | |
871 | /* Parse the response from ovs-appctl and convert it to binary format to | |
872 | * pass back to the kernel. */ | |
873 | ofpbuf_init(&query_data, sizeof(struct __fdb_entry) * 8); | |
874 | save_ptr = NULL; | |
875 | strtok_r(output, "\n", &save_ptr); /* Skip header line. */ | |
876 | while (count > 0) { | |
877 | struct __fdb_entry *entry; | |
878 | int port, vlan, age; | |
879 | uint8_t mac[ETH_ADDR_LEN]; | |
880 | char *line; | |
881 | bool is_local; | |
882 | ||
883 | line = strtok_r(NULL, "\n", &save_ptr); | |
884 | if (!line) { | |
885 | break; | |
886 | } | |
887 | ||
888 | if (sscanf(line, "%d %d "ETH_ADDR_SCAN_FMT" %d", | |
889 | &port, &vlan, ETH_ADDR_SCAN_ARGS(mac), &age) | |
890 | != 2 + ETH_ADDR_SCAN_COUNT + 1) { | |
db5ce514 | 891 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); |
3c303e5f BP |
892 | VLOG_INFO_RL(&rl, "fdb/show output has invalid format: %s", line); |
893 | continue; | |
894 | } | |
895 | ||
c735214e BP |
896 | if (vlan != br_vlan) { |
897 | continue; | |
898 | } | |
899 | ||
3c303e5f BP |
900 | if (skip > 0) { |
901 | skip--; | |
902 | continue; | |
903 | } | |
904 | ||
905 | /* Is this the MAC address of an interface on the bridge? */ | |
906 | is_local = false; | |
907 | for (i = 0; i < n_local_macs; i++) { | |
908 | if (eth_addr_equals(local_macs[i].addr, mac)) { | |
909 | is_local = true; | |
910 | break; | |
911 | } | |
912 | } | |
913 | ||
914 | entry = ofpbuf_put_uninit(&query_data, sizeof *entry); | |
915 | memcpy(entry->mac_addr, mac, ETH_ADDR_LEN); | |
916 | entry->port_no = port & 0xff; | |
917 | entry->is_local = is_local; | |
918 | entry->ageing_timer_value = age * HZ; | |
919 | entry->port_hi = (port & 0xff00) >> 8; | |
920 | entry->pad0 = 0; | |
921 | entry->unused = 0; | |
922 | count--; | |
923 | } | |
924 | free(output); | |
925 | ||
41e754bc BP |
926 | /* Compose and send reply to datapath. */ |
927 | reply = compose_reply(seq, 0); | |
928 | nl_msg_put_unspec(reply, BRC_GENL_A_FDB_DATA, | |
929 | query_data.data, query_data.size); | |
930 | send_reply(reply); | |
931 | ||
932 | /* Free memory. */ | |
3c303e5f | 933 | ofpbuf_uninit(&query_data); |
05edc34c | 934 | free(local_macs); |
3c303e5f BP |
935 | |
936 | return 0; | |
937 | } | |
938 | ||
db322751 | 939 | static void |
96ca8c29 | 940 | send_ifindex_reply(uint32_t seq, struct sset *ifaces) |
db322751 BP |
941 | { |
942 | struct ofpbuf *reply; | |
943 | const char *iface; | |
944 | size_t n_indices; | |
945 | int *indices; | |
db322751 BP |
946 | |
947 | /* Convert 'ifaces' into ifindexes. */ | |
948 | n_indices = 0; | |
96ca8c29 BP |
949 | indices = xmalloc(sset_count(ifaces) * sizeof *indices); |
950 | SSET_FOR_EACH (iface, ifaces) { | |
db322751 BP |
951 | int ifindex = if_nametoindex(iface); |
952 | if (ifindex) { | |
953 | indices[n_indices++] = ifindex; | |
954 | } | |
955 | } | |
956 | ||
957 | /* Compose and send reply. */ | |
958 | reply = compose_reply(seq, 0); | |
959 | nl_msg_put_unspec(reply, BRC_GENL_A_IFINDEXES, | |
960 | indices, n_indices * sizeof *indices); | |
961 | send_reply(reply); | |
962 | ||
963 | /* Free memory. */ | |
964 | free(indices); | |
965 | } | |
966 | ||
967 | static int | |
9852694f JP |
968 | handle_get_bridges_cmd(const struct ovsrec_open_vswitch *ovs, |
969 | struct ofpbuf *buffer) | |
db322751 | 970 | { |
96ca8c29 | 971 | struct sset bridges; |
9852694f | 972 | size_t i, j; |
db322751 BP |
973 | |
974 | uint32_t seq; | |
975 | ||
976 | int error; | |
977 | ||
978 | /* Parse Netlink command. | |
979 | * | |
980 | * The command doesn't actually have any arguments, but we need the | |
981 | * sequence number to send the reply. */ | |
982 | error = parse_command(buffer, &seq, NULL, NULL, NULL, NULL); | |
983 | if (error) { | |
984 | return error; | |
985 | } | |
986 | ||
987 | /* Get all the real bridges and all the fake ones. */ | |
96ca8c29 | 988 | sset_init(&bridges); |
9852694f JP |
989 | for (i = 0; i < ovs->n_bridges; i++) { |
990 | const struct ovsrec_bridge *br = ovs->bridges[i]; | |
db322751 | 991 | |
96ca8c29 | 992 | sset_add(&bridges, br->name); |
9852694f JP |
993 | for (j = 0; j < br->n_ports; j++) { |
994 | const struct ovsrec_port *port = br->ports[j]; | |
995 | ||
996 | if (port->fake_bridge) { | |
96ca8c29 | 997 | sset_add(&bridges, port->name); |
db322751 BP |
998 | } |
999 | } | |
db322751 BP |
1000 | } |
1001 | ||
1002 | send_ifindex_reply(seq, &bridges); | |
96ca8c29 | 1003 | sset_destroy(&bridges); |
db322751 BP |
1004 | |
1005 | return 0; | |
1006 | } | |
1007 | ||
1008 | static int | |
9852694f JP |
1009 | handle_get_ports_cmd(const struct ovsrec_open_vswitch *ovs, |
1010 | struct ofpbuf *buffer) | |
db322751 BP |
1011 | { |
1012 | uint32_t seq; | |
1013 | ||
9852694f JP |
1014 | const char *linux_name; |
1015 | const struct ovsrec_bridge *ovs_bridge; | |
db322751 BP |
1016 | int br_vlan; |
1017 | ||
96ca8c29 | 1018 | struct sset ports; |
db322751 BP |
1019 | |
1020 | int error; | |
1021 | ||
1022 | /* Parse Netlink command. */ | |
9852694f | 1023 | error = parse_command(buffer, &seq, &linux_name, NULL, NULL, NULL); |
db322751 BP |
1024 | if (error) { |
1025 | return error; | |
1026 | } | |
1027 | ||
d295e8e9 | 1028 | error = linux_bridge_to_ovs_bridge(ovs, linux_name, |
9852694f | 1029 | &ovs_bridge, &br_vlan); |
db322751 BP |
1030 | if (error) { |
1031 | send_simple_reply(seq, error); | |
1032 | return error; | |
1033 | } | |
1034 | ||
96ca8c29 | 1035 | sset_init(&ports); |
db322751 | 1036 | get_bridge_ports(ovs_bridge, &ports, br_vlan); |
96ca8c29 | 1037 | sset_find_and_delete(&ports, linux_name); |
db322751 | 1038 | send_ifindex_reply(seq, &ports); /* XXX bonds won't show up */ |
96ca8c29 | 1039 | sset_destroy(&ports); |
db322751 | 1040 | |
db322751 BP |
1041 | return 0; |
1042 | } | |
1043 | ||
1cec7ca1 BP |
1044 | static struct ofpbuf * |
1045 | brc_recv_update__(void) | |
1046 | { | |
1047 | for (;;) { | |
1048 | struct ofpbuf *buffer; | |
1049 | int retval; | |
1050 | ||
1051 | retval = nl_sock_recv(brc_sock, &buffer, false); | |
1052 | switch (retval) { | |
1053 | case 0: | |
1054 | if (nl_msg_nlmsgerr(buffer, NULL) | |
1055 | || nl_msg_nlmsghdr(buffer)->nlmsg_type == NLMSG_DONE) { | |
1056 | break; | |
1057 | } | |
1058 | return buffer; | |
1059 | ||
1060 | case ENOBUFS: | |
1061 | break; | |
1062 | ||
1063 | case EAGAIN: | |
1064 | return NULL; | |
1065 | ||
1066 | default: | |
1067 | VLOG_WARN_RL(&rl, "brc_recv_update: %s", strerror(retval)); | |
1068 | return NULL; | |
1069 | } | |
1070 | ofpbuf_delete(buffer); | |
1071 | } | |
1072 | } | |
1073 | ||
9852694f | 1074 | static void |
1e86ae6f | 1075 | brc_recv_update(struct ovsdb_idl *idl) |
064af421 | 1076 | { |
064af421 BP |
1077 | struct ofpbuf *buffer; |
1078 | struct genlmsghdr *genlmsghdr; | |
1e86ae6f | 1079 | const struct ovsrec_open_vswitch *ovs; |
064af421 | 1080 | |
1cec7ca1 BP |
1081 | buffer = brc_recv_update__(); |
1082 | if (!buffer) { | |
9852694f | 1083 | return; |
064af421 BP |
1084 | } |
1085 | ||
1086 | genlmsghdr = nl_msg_genlmsghdr(buffer); | |
1087 | if (!genlmsghdr) { | |
1088 | VLOG_WARN_RL(&rl, "received packet too short for generic NetLink"); | |
1089 | goto error; | |
1090 | } | |
1091 | ||
1092 | if (nl_msg_nlmsghdr(buffer)->nlmsg_type != brc_family) { | |
1093 | VLOG_DBG_RL(&rl, "received type (%"PRIu16") != brcompat family (%d)", | |
1094 | nl_msg_nlmsghdr(buffer)->nlmsg_type, brc_family); | |
1095 | goto error; | |
1096 | } | |
1097 | ||
1e86ae6f BP |
1098 | /* Get the Open vSwitch configuration. Just drop the request on the floor |
1099 | * if a valid configuration doesn't exist. (We could check this earlier, | |
1100 | * but we want to drain pending Netlink messages even when there is no Open | |
1101 | * vSwitch configuration.) */ | |
1102 | ovs = ovsrec_open_vswitch_first(idl); | |
5ff22a06 JP |
1103 | if (!ovs) { |
1104 | VLOG_WARN_RL(&rl, "could not find valid configuration to update"); | |
1105 | goto error; | |
1106 | } | |
1107 | ||
064af421 BP |
1108 | switch (genlmsghdr->cmd) { |
1109 | case BRC_GENL_C_DP_ADD: | |
1e86ae6f | 1110 | handle_bridge_cmd(idl, ovs, buffer, true); |
064af421 BP |
1111 | break; |
1112 | ||
1113 | case BRC_GENL_C_DP_DEL: | |
1e86ae6f | 1114 | handle_bridge_cmd(idl, ovs, buffer, false); |
064af421 BP |
1115 | break; |
1116 | ||
1117 | case BRC_GENL_C_PORT_ADD: | |
1e86ae6f | 1118 | handle_port_cmd(idl, ovs, buffer, true); |
064af421 BP |
1119 | break; |
1120 | ||
1121 | case BRC_GENL_C_PORT_DEL: | |
1e86ae6f | 1122 | handle_port_cmd(idl, ovs, buffer, false); |
064af421 BP |
1123 | break; |
1124 | ||
3c303e5f | 1125 | case BRC_GENL_C_FDB_QUERY: |
9852694f | 1126 | handle_fdb_query_cmd(ovs, buffer); |
3c303e5f BP |
1127 | break; |
1128 | ||
db322751 | 1129 | case BRC_GENL_C_GET_BRIDGES: |
9852694f | 1130 | handle_get_bridges_cmd(ovs, buffer); |
db322751 BP |
1131 | break; |
1132 | ||
1133 | case BRC_GENL_C_GET_PORTS: | |
9852694f | 1134 | handle_get_ports_cmd(ovs, buffer); |
db322751 BP |
1135 | break; |
1136 | ||
064af421 | 1137 | default: |
9852694f | 1138 | VLOG_WARN_RL(&rl, "received unknown brc netlink command: %d\n", |
1e86ae6f | 1139 | genlmsghdr->cmd); |
9852694f | 1140 | break; |
064af421 BP |
1141 | } |
1142 | ||
064af421 BP |
1143 | error: |
1144 | ofpbuf_delete(buffer); | |
9852694f | 1145 | return; |
064af421 BP |
1146 | } |
1147 | ||
1148 | /* Check for interface configuration changes announced through RTNL. */ | |
1149 | static void | |
1e86ae6f BP |
1150 | rtnl_recv_update(struct ovsdb_idl *idl, |
1151 | const struct ovsrec_open_vswitch *ovs) | |
064af421 BP |
1152 | { |
1153 | struct ofpbuf *buf; | |
1154 | ||
1155 | int error = nl_sock_recv(rtnl_sock, &buf, false); | |
1156 | if (error == EAGAIN) { | |
1157 | /* Nothing to do. */ | |
1158 | } else if (error == ENOBUFS) { | |
1159 | VLOG_WARN_RL(&rl, "network monitor socket overflowed"); | |
1160 | } else if (error) { | |
d295e8e9 | 1161 | VLOG_WARN_RL(&rl, "error on network monitor socket: %s", |
064af421 BP |
1162 | strerror(error)); |
1163 | } else { | |
1164 | struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)]; | |
1165 | struct nlmsghdr *nlh; | |
1166 | struct ifinfomsg *iim; | |
1167 | ||
1168 | nlh = ofpbuf_at(buf, 0, NLMSG_HDRLEN); | |
1169 | iim = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *iim); | |
1170 | if (!iim) { | |
1171 | VLOG_WARN_RL(&rl, "received bad rtnl message (no ifinfomsg)"); | |
1172 | ofpbuf_delete(buf); | |
1173 | return; | |
d295e8e9 JP |
1174 | } |
1175 | ||
064af421 BP |
1176 | if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg), |
1177 | rtnlgrp_link_policy, | |
1178 | attrs, ARRAY_SIZE(rtnlgrp_link_policy))) { | |
1179 | VLOG_WARN_RL(&rl,"received bad rtnl message (policy)"); | |
1180 | ofpbuf_delete(buf); | |
1181 | return; | |
1182 | } | |
1183 | if (nlh->nlmsg_type == RTM_DELLINK && attrs[IFLA_MASTER]) { | |
1184 | const char *port_name = nl_attr_get_string(attrs[IFLA_IFNAME]); | |
1185 | char br_name[IFNAMSIZ]; | |
1186 | uint32_t br_idx = nl_attr_get_u32(attrs[IFLA_MASTER]); | |
064af421 BP |
1187 | |
1188 | if (!if_indextoname(br_idx, br_name)) { | |
1189 | ofpbuf_delete(buf); | |
1190 | return; | |
1191 | } | |
1192 | ||
5bfc0cd3 | 1193 | if (!netdev_exists(port_name)) { |
38553381 | 1194 | /* Network device is really gone. */ |
1e86ae6f | 1195 | struct ovsdb_idl_txn *txn; |
4b3a0009 BP |
1196 | const struct ovsrec_interface *iface; |
1197 | struct ovsrec_port *port; | |
1e86ae6f | 1198 | struct ovsrec_bridge *br; |
694f2679 | 1199 | |
38553381 BP |
1200 | VLOG_INFO("network device %s destroyed, " |
1201 | "removing from bridge %s", port_name, br_name); | |
694f2679 | 1202 | |
1e86ae6f | 1203 | br = find_bridge(ovs, br_name); |
9c8149dc | 1204 | if (!br) { |
d295e8e9 | 1205 | VLOG_WARN("no bridge named %s from which to remove %s", |
9c8149dc JP |
1206 | br_name, port_name); |
1207 | ofpbuf_delete(buf); | |
1208 | return; | |
38553381 | 1209 | } |
9c8149dc | 1210 | |
1e86ae6f | 1211 | txn = ovsdb_idl_txn_create(idl); |
b959290b | 1212 | |
4b3a0009 BP |
1213 | iface = find_interface(br, port_name, &port); |
1214 | if (iface) { | |
1215 | del_interface(br, port, iface); | |
1216 | ovsdb_idl_txn_add_comment(txn, | |
1217 | "ovs-brcompatd: destroy port %s", | |
1218 | port_name); | |
1219 | } | |
1220 | ||
1e86ae6f | 1221 | commit_txn(txn, false); |
38553381 BP |
1222 | } else { |
1223 | /* A network device by that name exists even though the kernel | |
1224 | * told us it had disappeared. Probably, what happened was | |
1225 | * this: | |
1226 | * | |
1227 | * 1. Device destroyed. | |
1228 | * 2. Notification sent to us. | |
1229 | * 3. New device created with same name as old one. | |
1230 | * 4. ovs-brcompatd notified, removes device from bridge. | |
1231 | * | |
1232 | * There's no a priori reason that in this situation that the | |
1233 | * new device with the same name should remain in the bridge; | |
1234 | * on the contrary, that would be unexpected. *But* there is | |
1235 | * one important situation where, if we do this, bad things | |
1236 | * happen. This is the case of XenServer Tools version 5.0.0, | |
1237 | * which on boot of a Windows VM cause something like this to | |
1238 | * happen on the Xen host: | |
1239 | * | |
1240 | * i. Create tap1.0 and vif1.0. | |
1241 | * ii. Delete tap1.0. | |
1242 | * iii. Delete vif1.0. | |
1243 | * iv. Re-create vif1.0. | |
1244 | * | |
1245 | * (XenServer Tools 5.5.0 does not exhibit this behavior, and | |
1246 | * neither does a VM without Tools installed at all.@.) | |
1247 | * | |
1248 | * Steps iii and iv happen within a few seconds of each other. | |
1249 | * Step iv causes /etc/xensource/scripts/vif to run, which in | |
1250 | * turn calls ovs-cfg-mod to add the new device to the bridge. | |
1251 | * If step iv happens after step 4 (in our first list of | |
1252 | * steps), then all is well, but if it happens between 3 and 4 | |
1253 | * (which can easily happen if ovs-brcompatd has to wait to | |
1254 | * lock the configuration file), then we will remove the new | |
1255 | * incarnation from the bridge instead of the old one! | |
1256 | * | |
1257 | * So, to avoid this problem, we do nothing here. This is | |
1258 | * strictly incorrect except for this one particular case, and | |
1259 | * perhaps that will bite us someday. If that happens, then we | |
1260 | * will have to somehow track network devices by ifindex, since | |
1261 | * a new device will have a new ifindex even if it has the same | |
1262 | * name as an old device. | |
1263 | */ | |
1264 | VLOG_INFO("kernel reported network device %s removed but " | |
1265 | "a device by that name exists (XS Tools 5.0.0?)", | |
1266 | port_name); | |
064af421 | 1267 | } |
064af421 BP |
1268 | } |
1269 | ofpbuf_delete(buf); | |
1270 | } | |
1271 | } | |
1272 | ||
1273 | int | |
1274 | main(int argc, char *argv[]) | |
1275 | { | |
480ce8ab | 1276 | extern struct vlog_module VLM_reconnect; |
064af421 | 1277 | struct unixctl_server *unixctl; |
9852694f JP |
1278 | const char *remote; |
1279 | struct ovsdb_idl *idl; | |
064af421 BP |
1280 | int retval; |
1281 | ||
40f0707c | 1282 | proctitle_init(argc, argv); |
064af421 | 1283 | set_program_name(argv[0]); |
480ce8ab BP |
1284 | vlog_set_levels(NULL, VLF_CONSOLE, VLL_WARN); |
1285 | vlog_set_levels(&VLM_reconnect, VLF_ANY_FACILITY, VLL_WARN); | |
9852694f JP |
1286 | |
1287 | remote = parse_options(argc, argv); | |
064af421 BP |
1288 | signal(SIGPIPE, SIG_IGN); |
1289 | process_init(); | |
bd76d25d | 1290 | ovsrec_init(); |
064af421 | 1291 | |
95440284 | 1292 | daemonize_start(); |
064af421 BP |
1293 | |
1294 | retval = unixctl_server_create(NULL, &unixctl); | |
1295 | if (retval) { | |
4d12270a | 1296 | exit(EXIT_FAILURE); |
064af421 BP |
1297 | } |
1298 | ||
1299 | if (brc_open(&brc_sock)) { | |
279c9e03 BP |
1300 | VLOG_FATAL("could not open brcompat socket. Check " |
1301 | "\"brcompat\" kernel module."); | |
064af421 BP |
1302 | } |
1303 | ||
1304 | if (prune_timeout) { | |
cceb11f5 BP |
1305 | int error; |
1306 | ||
1307 | error = nl_sock_create(NETLINK_ROUTE, &rtnl_sock); | |
1308 | if (error) { | |
279c9e03 BP |
1309 | VLOG_FATAL("could not create rtnetlink socket (%s)", |
1310 | strerror(error)); | |
cceb11f5 BP |
1311 | } |
1312 | ||
1313 | error = nl_sock_join_mcgroup(rtnl_sock, RTNLGRP_LINK); | |
1314 | if (error) { | |
279c9e03 BP |
1315 | VLOG_FATAL("could not join RTNLGRP_LINK multicast group (%s)", |
1316 | strerror(error)); | |
064af421 BP |
1317 | } |
1318 | } | |
1319 | ||
95440284 BP |
1320 | daemonize_complete(); |
1321 | ||
ef73f86c | 1322 | idl = ovsdb_idl_create(remote, &ovsrec_idl_class, true); |
064af421 BP |
1323 | |
1324 | for (;;) { | |
9852694f | 1325 | const struct ovsrec_open_vswitch *ovs; |
9852694f JP |
1326 | |
1327 | ovsdb_idl_run(idl); | |
1328 | ||
064af421 | 1329 | unixctl_server_run(unixctl); |
1e86ae6f | 1330 | brc_recv_update(idl); |
5ff22a06 | 1331 | |
1e86ae6f | 1332 | ovs = ovsrec_open_vswitch_first(idl); |
f3d64521 | 1333 | if (!ovs && ovsdb_idl_has_ever_connected(idl)) { |
058fd2a2 BP |
1334 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); |
1335 | VLOG_WARN_RL(&rl, "%s: database does not contain any Open vSwitch " | |
1336 | "configuration", remote); | |
058fd2a2 | 1337 | } |
8b61709d | 1338 | netdev_run(); |
064af421 BP |
1339 | |
1340 | /* If 'prune_timeout' is non-zero, we actively prune from the | |
d295e8e9 JP |
1341 | * configuration of port entries that are no longer valid. We |
1342 | * use two methods: | |
064af421 BP |
1343 | * |
1344 | * 1) The kernel explicitly notifies us of removed ports | |
1345 | * through the RTNL messages. | |
1346 | * | |
1347 | * 2) We periodically check all ports associated with bridges | |
1348 | * to see if they no longer exist. | |
1349 | */ | |
9c8149dc | 1350 | if (ovs && prune_timeout) { |
1e86ae6f | 1351 | rtnl_recv_update(idl, ovs); |
064af421 BP |
1352 | nl_sock_wait(rtnl_sock, POLLIN); |
1353 | poll_timer_wait(prune_timeout); | |
9852694f | 1354 | } |
9852694f | 1355 | |
064af421 BP |
1356 | |
1357 | nl_sock_wait(brc_sock, POLLIN); | |
9852694f | 1358 | ovsdb_idl_wait(idl); |
064af421 | 1359 | unixctl_server_wait(unixctl); |
8b61709d | 1360 | netdev_wait(); |
064af421 BP |
1361 | poll_block(); |
1362 | } | |
1363 | ||
9852694f JP |
1364 | ovsdb_idl_destroy(idl); |
1365 | ||
064af421 BP |
1366 | return 0; |
1367 | } | |
1368 | ||
3c303e5f BP |
1369 | static void |
1370 | validate_appctl_command(void) | |
1371 | { | |
1372 | const char *p; | |
1373 | int n; | |
1374 | ||
1375 | n = 0; | |
1376 | for (p = strchr(appctl_command, '%'); p; p = strchr(p + 2, '%')) { | |
1377 | if (p[1] == '%') { | |
1378 | /* Nothing to do. */ | |
1379 | } else if (p[1] == 's') { | |
1380 | n++; | |
1381 | } else { | |
279c9e03 | 1382 | VLOG_FATAL("only '%%s' and '%%%%' allowed in --appctl-command"); |
3c303e5f BP |
1383 | } |
1384 | } | |
1385 | if (n != 1) { | |
279c9e03 | 1386 | VLOG_FATAL("'%%s' must appear exactly once in --appctl-command"); |
3c303e5f BP |
1387 | } |
1388 | } | |
1389 | ||
9852694f | 1390 | static const char * |
064af421 BP |
1391 | parse_options(int argc, char *argv[]) |
1392 | { | |
1393 | enum { | |
064af421 | 1394 | OPT_PRUNE_TIMEOUT, |
3c303e5f | 1395 | OPT_APPCTL_COMMAND, |
064af421 | 1396 | VLOG_OPTION_ENUMS, |
8274ae95 BP |
1397 | LEAK_CHECKER_OPTION_ENUMS, |
1398 | DAEMON_OPTION_ENUMS | |
064af421 BP |
1399 | }; |
1400 | static struct option long_options[] = { | |
e3c17733 BP |
1401 | {"help", no_argument, NULL, 'h'}, |
1402 | {"version", no_argument, NULL, 'V'}, | |
1403 | {"prune-timeout", required_argument, NULL, OPT_PRUNE_TIMEOUT}, | |
1404 | {"appctl-command", required_argument, NULL, OPT_APPCTL_COMMAND}, | |
064af421 BP |
1405 | DAEMON_LONG_OPTIONS, |
1406 | VLOG_LONG_OPTIONS, | |
1407 | LEAK_CHECKER_LONG_OPTIONS, | |
e3c17733 | 1408 | {NULL, 0, NULL, 0}, |
064af421 BP |
1409 | }; |
1410 | char *short_options = long_options_to_short_options(long_options); | |
064af421 | 1411 | |
b43c6fe2 | 1412 | appctl_command = xasprintf("%s/ovs-appctl %%s", ovs_bindir()); |
064af421 BP |
1413 | for (;;) { |
1414 | int c; | |
1415 | ||
1416 | c = getopt_long(argc, argv, short_options, long_options, NULL); | |
1417 | if (c == -1) { | |
1418 | break; | |
1419 | } | |
1420 | ||
1421 | switch (c) { | |
1422 | case 'H': | |
1423 | case 'h': | |
1424 | usage(); | |
1425 | ||
1426 | case 'V': | |
1427 | OVS_PRINT_VERSION(0, 0); | |
1428 | exit(EXIT_SUCCESS); | |
1429 | ||
064af421 BP |
1430 | case OPT_PRUNE_TIMEOUT: |
1431 | prune_timeout = atoi(optarg) * 1000; | |
1432 | break; | |
1433 | ||
3c303e5f BP |
1434 | case OPT_APPCTL_COMMAND: |
1435 | appctl_command = optarg; | |
064af421 BP |
1436 | break; |
1437 | ||
1438 | VLOG_OPTION_HANDLERS | |
1439 | DAEMON_OPTION_HANDLERS | |
1440 | LEAK_CHECKER_OPTION_HANDLERS | |
1441 | ||
1442 | case '?': | |
1443 | exit(EXIT_FAILURE); | |
1444 | ||
1445 | default: | |
1446 | abort(); | |
1447 | } | |
1448 | } | |
1449 | free(short_options); | |
1450 | ||
3c303e5f BP |
1451 | validate_appctl_command(); |
1452 | ||
064af421 BP |
1453 | argc -= optind; |
1454 | argv += optind; | |
1455 | ||
1456 | if (argc != 1) { | |
279c9e03 BP |
1457 | VLOG_FATAL("database socket is non-option argument; " |
1458 | "use --help for usage"); | |
064af421 BP |
1459 | } |
1460 | ||
9852694f | 1461 | return argv[0]; |
064af421 BP |
1462 | } |
1463 | ||
1464 | static void | |
1465 | usage(void) | |
1466 | { | |
1467 | printf("%s: bridge compatibility front-end for ovs-vswitchd\n" | |
1468 | "usage: %s [OPTIONS] CONFIG\n" | |
1469 | "CONFIG is the configuration file used by ovs-vswitchd.\n", | |
1470 | program_name, program_name); | |
1471 | printf("\nConfiguration options:\n" | |
3c303e5f | 1472 | " --appctl-command=COMMAND shell command to run ovs-appctl\n" |
064af421 | 1473 | " --prune-timeout=SECS wait at most SECS before pruning ports\n" |
064af421 BP |
1474 | ); |
1475 | daemon_usage(); | |
1476 | vlog_usage(); | |
1477 | printf("\nOther options:\n" | |
1478 | " -h, --help display this help message\n" | |
1479 | " -V, --version display version information\n"); | |
1480 | leak_checker_usage(); | |
3c303e5f | 1481 | printf("\nThe default appctl command is:\n%s\n", appctl_command); |
064af421 BP |
1482 | exit(EXIT_SUCCESS); |
1483 | } |