]> git.proxmox.com Git - mirror_ovs.git/blob - ovn/controller/binding.c
ovn: Support chassis hostname in requested-chassis.
[mirror_ovs.git] / ovn / controller / binding.c
1 /* Copyright (c) 2015, 2016, 2017 Nicira, Inc.
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include <config.h>
17 #include "binding.h"
18 #include "gchassis.h"
19 #include "lflow.h"
20 #include "lport.h"
21
22 #include "lib/bitmap.h"
23 #include "lib/poll-loop.h"
24 #include "lib/sset.h"
25 #include "lib/util.h"
26 #include "lib/netdev.h"
27 #include "lib/vswitch-idl.h"
28 #include "openvswitch/hmap.h"
29 #include "openvswitch/vlog.h"
30 #include "ovn/lib/chassis-index.h"
31 #include "ovn/lib/ovn-sb-idl.h"
32 #include "ovn-controller.h"
33
34 VLOG_DEFINE_THIS_MODULE(binding);
35
36 #define OVN_QOS_TYPE "linux-htb"
37
38 struct qos_queue {
39 struct hmap_node node;
40 uint32_t queue_id;
41 uint32_t max_rate;
42 uint32_t burst;
43 };
44
45 void
46 binding_register_ovs_idl(struct ovsdb_idl *ovs_idl)
47 {
48 ovsdb_idl_add_table(ovs_idl, &ovsrec_table_open_vswitch);
49 ovsdb_idl_add_column(ovs_idl, &ovsrec_open_vswitch_col_bridges);
50
51 ovsdb_idl_add_table(ovs_idl, &ovsrec_table_bridge);
52 ovsdb_idl_add_column(ovs_idl, &ovsrec_bridge_col_name);
53 ovsdb_idl_add_column(ovs_idl, &ovsrec_bridge_col_ports);
54
55 ovsdb_idl_add_table(ovs_idl, &ovsrec_table_port);
56 ovsdb_idl_add_column(ovs_idl, &ovsrec_port_col_name);
57 ovsdb_idl_add_column(ovs_idl, &ovsrec_port_col_interfaces);
58 ovsdb_idl_add_column(ovs_idl, &ovsrec_port_col_qos);
59
60 ovsdb_idl_add_table(ovs_idl, &ovsrec_table_interface);
61 ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_name);
62 ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_external_ids);
63 ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_bfd);
64 ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_bfd_status);
65 ovsdb_idl_add_column(ovs_idl, &ovsrec_interface_col_status);
66
67 ovsdb_idl_add_table(ovs_idl, &ovsrec_table_qos);
68 ovsdb_idl_add_column(ovs_idl, &ovsrec_qos_col_type);
69 }
70
71 static void
72 get_local_iface_ids(const struct ovsrec_bridge *br_int,
73 struct shash *lport_to_iface,
74 struct sset *local_lports,
75 struct sset *egress_ifaces)
76 {
77 int i;
78
79 for (i = 0; i < br_int->n_ports; i++) {
80 const struct ovsrec_port *port_rec = br_int->ports[i];
81 const char *iface_id;
82 int j;
83
84 if (!strcmp(port_rec->name, br_int->name)) {
85 continue;
86 }
87
88 for (j = 0; j < port_rec->n_interfaces; j++) {
89 const struct ovsrec_interface *iface_rec;
90
91 iface_rec = port_rec->interfaces[j];
92 iface_id = smap_get(&iface_rec->external_ids, "iface-id");
93 int64_t ofport = iface_rec->n_ofport ? *iface_rec->ofport : 0;
94
95 if (iface_id && ofport > 0) {
96 shash_add(lport_to_iface, iface_id, iface_rec);
97 sset_add(local_lports, iface_id);
98 }
99
100 /* Check if this is a tunnel interface. */
101 if (smap_get(&iface_rec->options, "remote_ip")) {
102 const char *tunnel_iface
103 = smap_get(&iface_rec->status, "tunnel_egress_iface");
104 if (tunnel_iface) {
105 sset_add(egress_ifaces, tunnel_iface);
106 }
107 }
108 }
109 }
110 }
111
112 static void
113 add_local_datapath__(struct controller_ctx *ctx,
114 const struct sbrec_datapath_binding *datapath,
115 bool has_local_l3gateway, int depth,
116 struct hmap *local_datapaths)
117 {
118 uint32_t dp_key = datapath->tunnel_key;
119 const struct sbrec_port_binding *pb;
120 struct ovsdb_idl_index_cursor cursor;
121 struct sbrec_port_binding *lpval;
122
123 struct local_datapath *ld = get_local_datapath(local_datapaths, dp_key);
124 if (ld) {
125 if (has_local_l3gateway) {
126 ld->has_local_l3gateway = true;
127 }
128 return;
129 }
130
131 ld = xzalloc(sizeof *ld);
132 hmap_insert(local_datapaths, &ld->hmap_node, dp_key);
133 ld->datapath = datapath;
134 ld->localnet_port = NULL;
135 ld->has_local_l3gateway = has_local_l3gateway;
136
137 if (depth >= 100) {
138 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
139 VLOG_WARN_RL(&rl, "datapaths nested too deep");
140 return;
141 }
142
143 /* Recursively add logical datapaths to which this one patches. */
144 lpval = sbrec_port_binding_index_init_row(ctx->ovnsb_idl,
145 &sbrec_table_port_binding);
146 sbrec_port_binding_index_set_datapath(lpval, datapath);
147 ovsdb_idl_initialize_cursor(ctx->ovnsb_idl, &sbrec_table_port_binding,
148 "lport-by-datapath", &cursor);
149
150 SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, &cursor, lpval) {
151 if (!strcmp(pb->type, "patch")) {
152 const char *peer_name = smap_get(&pb->options, "peer");
153 if (peer_name) {
154 const struct sbrec_port_binding *peer;
155
156 peer = lport_lookup_by_name( ctx->ovnsb_idl, peer_name);
157
158 if (peer && peer->datapath) {
159 add_local_datapath__(ctx, peer->datapath,
160 false, depth + 1, local_datapaths);
161 ld->n_peer_dps++;
162 ld->peer_dps = xrealloc(
163 ld->peer_dps,
164 ld->n_peer_dps * sizeof *ld->peer_dps);
165 ld->peer_dps[ld->n_peer_dps - 1] = datapath_lookup_by_key(
166 ctx->ovnsb_idl, peer->datapath->tunnel_key);
167 }
168 }
169 }
170 }
171 sbrec_port_binding_index_destroy_row(lpval);
172 }
173
174 static void
175 add_local_datapath(struct controller_ctx *ctx,
176 const struct sbrec_datapath_binding *datapath,
177 bool has_local_l3gateway, struct hmap *local_datapaths)
178 {
179 add_local_datapath__(ctx, datapath, has_local_l3gateway, 0,
180 local_datapaths);
181 }
182
183 static void
184 get_qos_params(const struct sbrec_port_binding *pb, struct hmap *queue_map)
185 {
186 uint32_t max_rate = smap_get_int(&pb->options, "qos_max_rate", 0);
187 uint32_t burst = smap_get_int(&pb->options, "qos_burst", 0);
188 uint32_t queue_id = smap_get_int(&pb->options, "qdisc_queue_id", 0);
189
190 if ((!max_rate && !burst) || !queue_id) {
191 /* Qos is not configured for this port. */
192 return;
193 }
194
195 struct qos_queue *node = xzalloc(sizeof *node);
196 hmap_insert(queue_map, &node->node, hash_int(queue_id, 0));
197 node->max_rate = max_rate;
198 node->burst = burst;
199 node->queue_id = queue_id;
200 }
201
202 static const struct ovsrec_qos *
203 get_noop_qos(struct controller_ctx *ctx)
204 {
205 const struct ovsrec_qos *qos;
206 OVSREC_QOS_FOR_EACH (qos, ctx->ovs_idl) {
207 if (!strcmp(qos->type, "linux-noop")) {
208 return qos;
209 }
210 }
211
212 if (!ctx->ovs_idl_txn) {
213 return NULL;
214 }
215 qos = ovsrec_qos_insert(ctx->ovs_idl_txn);
216 ovsrec_qos_set_type(qos, "linux-noop");
217 return qos;
218 }
219
220 static bool
221 set_noop_qos(struct controller_ctx *ctx, struct sset *egress_ifaces)
222 {
223 if (!ctx->ovs_idl_txn) {
224 return false;
225 }
226
227 const struct ovsrec_qos *noop_qos = get_noop_qos(ctx);
228 if (!noop_qos) {
229 return false;
230 }
231
232 const struct ovsrec_port *port;
233 size_t count = 0;
234
235 OVSREC_PORT_FOR_EACH (port, ctx->ovs_idl) {
236 if (sset_contains(egress_ifaces, port->name)) {
237 ovsrec_port_set_qos(port, noop_qos);
238 count++;
239 }
240 if (sset_count(egress_ifaces) == count) {
241 break;
242 }
243 }
244 return true;
245 }
246
247 static void
248 set_qos_type(struct netdev *netdev, const char *type)
249 {
250 int error = netdev_set_qos(netdev, type, NULL);
251 if (error) {
252 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
253 VLOG_WARN_RL(&rl, "%s: could not set qdisc type \"%s\" (%s)",
254 netdev_get_name(netdev), type, ovs_strerror(error));
255 }
256 }
257
258 static void
259 setup_qos(const char *egress_iface, struct hmap *queue_map)
260 {
261 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
262 struct netdev *netdev_phy;
263
264 if (!egress_iface) {
265 /* Queues cannot be configured. */
266 return;
267 }
268
269 int error = netdev_open(egress_iface, NULL, &netdev_phy);
270 if (error) {
271 VLOG_WARN_RL(&rl, "%s: could not open netdev (%s)",
272 egress_iface, ovs_strerror(error));
273 return;
274 }
275
276 /* Check current qdisc. */
277 const char *qdisc_type;
278 struct smap qdisc_details;
279
280 smap_init(&qdisc_details);
281 if (netdev_get_qos(netdev_phy, &qdisc_type, &qdisc_details) != 0 ||
282 qdisc_type[0] == '\0') {
283 smap_destroy(&qdisc_details);
284 netdev_close(netdev_phy);
285 /* Qos is not supported. */
286 return;
287 }
288 smap_destroy(&qdisc_details);
289
290 /* If we're not actually being requested to do any QoS:
291 *
292 * - If the current qdisc type is OVN_QOS_TYPE, then we clear the qdisc
293 * type to "". Otherwise, it's possible that our own leftover qdisc
294 * settings could cause strange behavior on egress. Also, QoS is
295 * expensive and may waste CPU time even if it's not really in use.
296 *
297 * OVN isn't the only software that can configure qdiscs, and
298 * physical interfaces are shared resources, so there is some risk in
299 * this strategy: we could disrupt some other program's QoS.
300 * Probably, to entirely avoid this possibility we would need to add
301 * a configuration setting.
302 *
303 * - Otherwise leave the qdisc alone. */
304 if (hmap_is_empty(queue_map)) {
305 if (!strcmp(qdisc_type, OVN_QOS_TYPE)) {
306 set_qos_type(netdev_phy, "");
307 }
308 netdev_close(netdev_phy);
309 return;
310 }
311
312 /* Configure qdisc. */
313 if (strcmp(qdisc_type, OVN_QOS_TYPE)) {
314 set_qos_type(netdev_phy, OVN_QOS_TYPE);
315 }
316
317 /* Check and delete if needed. */
318 struct netdev_queue_dump dump;
319 unsigned int queue_id;
320 struct smap queue_details;
321 struct qos_queue *sb_info;
322 struct hmap consistent_queues;
323
324 smap_init(&queue_details);
325 hmap_init(&consistent_queues);
326 NETDEV_QUEUE_FOR_EACH (&queue_id, &queue_details, &dump, netdev_phy) {
327 bool is_queue_needed = false;
328
329 HMAP_FOR_EACH_WITH_HASH (sb_info, node, hash_int(queue_id, 0),
330 queue_map) {
331 is_queue_needed = true;
332 if (sb_info->max_rate ==
333 smap_get_int(&queue_details, "max-rate", 0)
334 && sb_info->burst == smap_get_int(&queue_details, "burst", 0)) {
335 /* This queue is consistent. */
336 hmap_insert(&consistent_queues, &sb_info->node,
337 hash_int(queue_id, 0));
338 break;
339 }
340 }
341
342 if (!is_queue_needed) {
343 error = netdev_delete_queue(netdev_phy, queue_id);
344 if (error) {
345 VLOG_WARN_RL(&rl, "%s: could not delete queue %u (%s)",
346 egress_iface, queue_id, ovs_strerror(error));
347 }
348 }
349 }
350
351 /* Create/Update queues. */
352 HMAP_FOR_EACH (sb_info, node, queue_map) {
353 if (hmap_contains(&consistent_queues, &sb_info->node)) {
354 hmap_remove(&consistent_queues, &sb_info->node);
355 continue;
356 }
357
358 smap_clear(&queue_details);
359 smap_add_format(&queue_details, "max-rate", "%d", sb_info->max_rate);
360 smap_add_format(&queue_details, "burst", "%d", sb_info->burst);
361 error = netdev_set_queue(netdev_phy, sb_info->queue_id,
362 &queue_details);
363 if (error) {
364 VLOG_WARN_RL(&rl, "%s: could not configure queue %u (%s)",
365 egress_iface, sb_info->queue_id, ovs_strerror(error));
366 }
367 }
368 smap_destroy(&queue_details);
369 hmap_destroy(&consistent_queues);
370 netdev_close(netdev_phy);
371 }
372
373 static void
374 consider_local_datapath(struct controller_ctx *ctx,
375 const struct chassis_index *chassis_index,
376 struct sset *active_tunnels,
377 const struct sbrec_chassis *chassis_rec,
378 const struct sbrec_port_binding *binding_rec,
379 struct hmap *qos_map,
380 struct hmap *local_datapaths,
381 struct shash *lport_to_iface,
382 struct sset *local_lports)
383 {
384 const struct ovsrec_interface *iface_rec
385 = shash_find_data(lport_to_iface, binding_rec->logical_port);
386 struct ovs_list *gateway_chassis = NULL;
387
388 bool our_chassis = false;
389 if (iface_rec
390 || (binding_rec->parent_port && binding_rec->parent_port[0] &&
391 sset_contains(local_lports, binding_rec->parent_port))) {
392 if (binding_rec->parent_port && binding_rec->parent_port[0]) {
393 /* Add child logical port to the set of all local ports. */
394 sset_add(local_lports, binding_rec->logical_port);
395 }
396 add_local_datapath(ctx, binding_rec->datapath,
397 false, local_datapaths);
398 if (iface_rec && qos_map && ctx->ovs_idl_txn) {
399 get_qos_params(binding_rec, qos_map);
400 }
401 /* This port is in our chassis unless it is a localport. */
402 if (strcmp(binding_rec->type, "localport")) {
403 our_chassis = true;
404 }
405 } else if (!strcmp(binding_rec->type, "l2gateway")) {
406 const char *chassis_id = smap_get(&binding_rec->options,
407 "l2gateway-chassis");
408 our_chassis = chassis_id && !strcmp(chassis_id, chassis_rec->name);
409 if (our_chassis) {
410 sset_add(local_lports, binding_rec->logical_port);
411 add_local_datapath(ctx, binding_rec->datapath,
412 false, local_datapaths);
413 }
414 } else if (!strcmp(binding_rec->type, "chassisredirect")) {
415 gateway_chassis = gateway_chassis_get_ordered(binding_rec,
416 chassis_index);
417 if (gateway_chassis &&
418 gateway_chassis_contains(gateway_chassis, chassis_rec)) {
419
420 our_chassis = gateway_chassis_is_active(
421 gateway_chassis, chassis_rec, active_tunnels);
422
423 add_local_datapath(ctx, binding_rec->datapath,
424 false, local_datapaths);
425 }
426 gateway_chassis_destroy(gateway_chassis);
427 } else if (!strcmp(binding_rec->type, "l3gateway")) {
428 const char *chassis_id = smap_get(&binding_rec->options,
429 "l3gateway-chassis");
430 our_chassis = chassis_id && !strcmp(chassis_id, chassis_rec->name);
431 if (our_chassis) {
432 add_local_datapath(ctx, binding_rec->datapath,
433 true, local_datapaths);
434 }
435 } else if (!strcmp(binding_rec->type, "localnet")) {
436 /* Add all localnet ports to local_lports so that we allocate ct zones
437 * for them. */
438 sset_add(local_lports, binding_rec->logical_port);
439 our_chassis = false;
440 }
441
442 if (ctx->ovnsb_idl_txn) {
443 const char *vif_chassis = smap_get(&binding_rec->options,
444 "requested-chassis");
445 bool can_bind = !vif_chassis || !vif_chassis[0]
446 || !strcmp(vif_chassis, chassis_rec->name)
447 || !strcmp(vif_chassis, chassis_rec->hostname);
448
449 if (can_bind && our_chassis) {
450 if (binding_rec->chassis != chassis_rec) {
451 if (binding_rec->chassis) {
452 VLOG_INFO("Changing chassis for lport %s from %s to %s.",
453 binding_rec->logical_port,
454 binding_rec->chassis->name,
455 chassis_rec->name);
456 } else {
457 VLOG_INFO("Claiming lport %s for this chassis.",
458 binding_rec->logical_port);
459 }
460 for (int i = 0; i < binding_rec->n_mac; i++) {
461 VLOG_INFO("%s: Claiming %s",
462 binding_rec->logical_port, binding_rec->mac[i]);
463 }
464 sbrec_port_binding_set_chassis(binding_rec, chassis_rec);
465 }
466 } else if (binding_rec->chassis == chassis_rec) {
467 VLOG_INFO("Releasing lport %s from this chassis.",
468 binding_rec->logical_port);
469 sbrec_port_binding_set_chassis(binding_rec, NULL);
470 } else if (our_chassis) {
471 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
472 VLOG_INFO_RL(&rl,
473 "Not claiming lport %s, chassis %s "
474 "requested-chassis %s",
475 binding_rec->logical_port,
476 chassis_rec->name,
477 vif_chassis);
478 }
479 }
480 }
481
482 static void
483 consider_localnet_port(const struct sbrec_port_binding *binding_rec,
484 struct hmap *local_datapaths)
485 {
486 struct local_datapath *ld
487 = get_local_datapath(local_datapaths,
488 binding_rec->datapath->tunnel_key);
489 if (!ld) {
490 return;
491 }
492
493 if (ld->localnet_port && strcmp(ld->localnet_port->logical_port,
494 binding_rec->logical_port)) {
495 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
496 VLOG_WARN_RL(&rl, "localnet port '%s' already set for datapath "
497 "'%"PRId64"', skipping the new port '%s'.",
498 ld->localnet_port->logical_port,
499 binding_rec->datapath->tunnel_key,
500 binding_rec->logical_port);
501 return;
502 }
503 ld->localnet_port = binding_rec;
504 }
505
506 void
507 binding_run(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int,
508 const struct sbrec_chassis *chassis_rec,
509 const struct chassis_index *chassis_index,
510 struct sset *active_tunnels,
511 struct hmap *local_datapaths, struct sset *local_lports)
512 {
513 if (!chassis_rec) {
514 return;
515 }
516
517 const struct sbrec_port_binding *binding_rec;
518 struct shash lport_to_iface = SHASH_INITIALIZER(&lport_to_iface);
519 struct sset egress_ifaces = SSET_INITIALIZER(&egress_ifaces);
520 struct hmap qos_map;
521
522 hmap_init(&qos_map);
523 if (br_int) {
524 get_local_iface_ids(br_int, &lport_to_iface, local_lports,
525 &egress_ifaces);
526 }
527
528 /* Run through each binding record to see if it is resident on this
529 * chassis and update the binding accordingly. This includes both
530 * directly connected logical ports and children of those ports. */
531 SBREC_PORT_BINDING_FOR_EACH(binding_rec, ctx->ovnsb_idl) {
532 consider_local_datapath(ctx, chassis_index,
533 active_tunnels, chassis_rec, binding_rec,
534 sset_is_empty(&egress_ifaces) ? NULL :
535 &qos_map, local_datapaths, &lport_to_iface,
536 local_lports);
537
538 }
539
540 /* Run through each binding record to see if it is a localnet port
541 * on local datapaths discovered from above loop, and update the
542 * corresponding local datapath accordingly. */
543 SBREC_PORT_BINDING_FOR_EACH (binding_rec, ctx->ovnsb_idl) {
544 if (!strcmp(binding_rec->type, "localnet")) {
545 consider_localnet_port(binding_rec, local_datapaths);
546 }
547 }
548
549 if (!sset_is_empty(&egress_ifaces)
550 && set_noop_qos(ctx, &egress_ifaces)) {
551 const char *entry;
552 SSET_FOR_EACH (entry, &egress_ifaces) {
553 setup_qos(entry, &qos_map);
554 }
555 }
556
557 shash_destroy(&lport_to_iface);
558 sset_destroy(&egress_ifaces);
559 hmap_destroy(&qos_map);
560 }
561
562 /* Returns true if the database is all cleaned up, false if more work is
563 * required. */
564 bool
565 binding_cleanup(struct controller_ctx *ctx,
566 const struct sbrec_chassis *chassis_rec)
567 {
568 if (!ctx->ovnsb_idl_txn) {
569 return false;
570 }
571 if (!chassis_rec) {
572 return true;
573 }
574
575 ovsdb_idl_txn_add_comment(
576 ctx->ovnsb_idl_txn,
577 "ovn-controller: removing all port bindings for '%s'",
578 chassis_rec->name);
579
580 const struct sbrec_port_binding *binding_rec;
581 bool any_changes = false;
582 SBREC_PORT_BINDING_FOR_EACH(binding_rec, ctx->ovnsb_idl) {
583 if (binding_rec->chassis == chassis_rec) {
584 sbrec_port_binding_set_chassis(binding_rec, NULL);
585 any_changes = true;
586 }
587 }
588 return !any_changes;
589 }