1 /* Copyright (c) 2015, 2016, 2017 Nicira, Inc.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
22 #include "lib/bitmap.h"
23 #include "lib/poll-loop.h"
26 #include "lib/netdev.h"
27 #include "lib/vswitch-idl.h"
28 #include "openvswitch/hmap.h"
29 #include "openvswitch/vlog.h"
30 #include "ovn/lib/chassis-index.h"
31 #include "ovn/lib/ovn-sb-idl.h"
32 #include "ovn-controller.h"
34 VLOG_DEFINE_THIS_MODULE(binding
);
36 #define OVN_QOS_TYPE "linux-htb"
39 struct hmap_node node
;
46 binding_register_ovs_idl(struct ovsdb_idl
*ovs_idl
)
48 ovsdb_idl_add_table(ovs_idl
, &ovsrec_table_open_vswitch
);
49 ovsdb_idl_add_column(ovs_idl
, &ovsrec_open_vswitch_col_bridges
);
51 ovsdb_idl_add_table(ovs_idl
, &ovsrec_table_bridge
);
52 ovsdb_idl_add_column(ovs_idl
, &ovsrec_bridge_col_name
);
53 ovsdb_idl_add_column(ovs_idl
, &ovsrec_bridge_col_ports
);
55 ovsdb_idl_add_table(ovs_idl
, &ovsrec_table_port
);
56 ovsdb_idl_add_column(ovs_idl
, &ovsrec_port_col_name
);
57 ovsdb_idl_add_column(ovs_idl
, &ovsrec_port_col_interfaces
);
58 ovsdb_idl_add_column(ovs_idl
, &ovsrec_port_col_qos
);
60 ovsdb_idl_add_table(ovs_idl
, &ovsrec_table_interface
);
61 ovsdb_idl_add_column(ovs_idl
, &ovsrec_interface_col_name
);
62 ovsdb_idl_add_column(ovs_idl
, &ovsrec_interface_col_external_ids
);
63 ovsdb_idl_add_column(ovs_idl
, &ovsrec_interface_col_bfd
);
64 ovsdb_idl_add_column(ovs_idl
, &ovsrec_interface_col_bfd_status
);
65 ovsdb_idl_add_column(ovs_idl
, &ovsrec_interface_col_status
);
67 ovsdb_idl_add_table(ovs_idl
, &ovsrec_table_qos
);
68 ovsdb_idl_add_column(ovs_idl
, &ovsrec_qos_col_type
);
72 get_local_iface_ids(const struct ovsrec_bridge
*br_int
,
73 struct shash
*lport_to_iface
,
74 struct sset
*local_lports
,
75 struct sset
*egress_ifaces
)
79 for (i
= 0; i
< br_int
->n_ports
; i
++) {
80 const struct ovsrec_port
*port_rec
= br_int
->ports
[i
];
84 if (!strcmp(port_rec
->name
, br_int
->name
)) {
88 for (j
= 0; j
< port_rec
->n_interfaces
; j
++) {
89 const struct ovsrec_interface
*iface_rec
;
91 iface_rec
= port_rec
->interfaces
[j
];
92 iface_id
= smap_get(&iface_rec
->external_ids
, "iface-id");
93 int64_t ofport
= iface_rec
->n_ofport
? *iface_rec
->ofport
: 0;
95 if (iface_id
&& ofport
> 0) {
96 shash_add(lport_to_iface
, iface_id
, iface_rec
);
97 sset_add(local_lports
, iface_id
);
100 /* Check if this is a tunnel interface. */
101 if (smap_get(&iface_rec
->options
, "remote_ip")) {
102 const char *tunnel_iface
103 = smap_get(&iface_rec
->status
, "tunnel_egress_iface");
105 sset_add(egress_ifaces
, tunnel_iface
);
113 add_local_datapath__(struct controller_ctx
*ctx
,
114 const struct sbrec_datapath_binding
*datapath
,
115 bool has_local_l3gateway
, int depth
,
116 struct hmap
*local_datapaths
)
118 uint32_t dp_key
= datapath
->tunnel_key
;
119 const struct sbrec_port_binding
*pb
;
120 struct ovsdb_idl_index_cursor cursor
;
121 struct sbrec_port_binding
*lpval
;
123 struct local_datapath
*ld
= get_local_datapath(local_datapaths
, dp_key
);
125 if (has_local_l3gateway
) {
126 ld
->has_local_l3gateway
= true;
131 ld
= xzalloc(sizeof *ld
);
132 hmap_insert(local_datapaths
, &ld
->hmap_node
, dp_key
);
133 ld
->datapath
= datapath
;
134 ld
->localnet_port
= NULL
;
135 ld
->has_local_l3gateway
= has_local_l3gateway
;
138 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
139 VLOG_WARN_RL(&rl
, "datapaths nested too deep");
143 /* Recursively add logical datapaths to which this one patches. */
144 lpval
= sbrec_port_binding_index_init_row(ctx
->ovnsb_idl
,
145 &sbrec_table_port_binding
);
146 sbrec_port_binding_index_set_datapath(lpval
, datapath
);
147 ovsdb_idl_initialize_cursor(ctx
->ovnsb_idl
, &sbrec_table_port_binding
,
148 "lport-by-datapath", &cursor
);
150 SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb
, &cursor
, lpval
) {
151 if (!strcmp(pb
->type
, "patch")) {
152 const char *peer_name
= smap_get(&pb
->options
, "peer");
154 const struct sbrec_port_binding
*peer
;
156 peer
= lport_lookup_by_name( ctx
->ovnsb_idl
, peer_name
);
158 if (peer
&& peer
->datapath
) {
159 add_local_datapath__(ctx
, peer
->datapath
,
160 false, depth
+ 1, local_datapaths
);
162 ld
->peer_dps
= xrealloc(
164 ld
->n_peer_dps
* sizeof *ld
->peer_dps
);
165 ld
->peer_dps
[ld
->n_peer_dps
- 1] = datapath_lookup_by_key(
166 ctx
->ovnsb_idl
, peer
->datapath
->tunnel_key
);
171 sbrec_port_binding_index_destroy_row(lpval
);
175 add_local_datapath(struct controller_ctx
*ctx
,
176 const struct sbrec_datapath_binding
*datapath
,
177 bool has_local_l3gateway
, struct hmap
*local_datapaths
)
179 add_local_datapath__(ctx
, datapath
, has_local_l3gateway
, 0,
184 get_qos_params(const struct sbrec_port_binding
*pb
, struct hmap
*queue_map
)
186 uint32_t max_rate
= smap_get_int(&pb
->options
, "qos_max_rate", 0);
187 uint32_t burst
= smap_get_int(&pb
->options
, "qos_burst", 0);
188 uint32_t queue_id
= smap_get_int(&pb
->options
, "qdisc_queue_id", 0);
190 if ((!max_rate
&& !burst
) || !queue_id
) {
191 /* Qos is not configured for this port. */
195 struct qos_queue
*node
= xzalloc(sizeof *node
);
196 hmap_insert(queue_map
, &node
->node
, hash_int(queue_id
, 0));
197 node
->max_rate
= max_rate
;
199 node
->queue_id
= queue_id
;
202 static const struct ovsrec_qos
*
203 get_noop_qos(struct controller_ctx
*ctx
)
205 const struct ovsrec_qos
*qos
;
206 OVSREC_QOS_FOR_EACH (qos
, ctx
->ovs_idl
) {
207 if (!strcmp(qos
->type
, "linux-noop")) {
212 if (!ctx
->ovs_idl_txn
) {
215 qos
= ovsrec_qos_insert(ctx
->ovs_idl_txn
);
216 ovsrec_qos_set_type(qos
, "linux-noop");
221 set_noop_qos(struct controller_ctx
*ctx
, struct sset
*egress_ifaces
)
223 if (!ctx
->ovs_idl_txn
) {
227 const struct ovsrec_qos
*noop_qos
= get_noop_qos(ctx
);
232 const struct ovsrec_port
*port
;
235 OVSREC_PORT_FOR_EACH (port
, ctx
->ovs_idl
) {
236 if (sset_contains(egress_ifaces
, port
->name
)) {
237 ovsrec_port_set_qos(port
, noop_qos
);
240 if (sset_count(egress_ifaces
) == count
) {
248 set_qos_type(struct netdev
*netdev
, const char *type
)
250 int error
= netdev_set_qos(netdev
, type
, NULL
);
252 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
253 VLOG_WARN_RL(&rl
, "%s: could not set qdisc type \"%s\" (%s)",
254 netdev_get_name(netdev
), type
, ovs_strerror(error
));
259 setup_qos(const char *egress_iface
, struct hmap
*queue_map
)
261 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 5);
262 struct netdev
*netdev_phy
;
265 /* Queues cannot be configured. */
269 int error
= netdev_open(egress_iface
, NULL
, &netdev_phy
);
271 VLOG_WARN_RL(&rl
, "%s: could not open netdev (%s)",
272 egress_iface
, ovs_strerror(error
));
276 /* Check current qdisc. */
277 const char *qdisc_type
;
278 struct smap qdisc_details
;
280 smap_init(&qdisc_details
);
281 if (netdev_get_qos(netdev_phy
, &qdisc_type
, &qdisc_details
) != 0 ||
282 qdisc_type
[0] == '\0') {
283 smap_destroy(&qdisc_details
);
284 netdev_close(netdev_phy
);
285 /* Qos is not supported. */
288 smap_destroy(&qdisc_details
);
290 /* If we're not actually being requested to do any QoS:
292 * - If the current qdisc type is OVN_QOS_TYPE, then we clear the qdisc
293 * type to "". Otherwise, it's possible that our own leftover qdisc
294 * settings could cause strange behavior on egress. Also, QoS is
295 * expensive and may waste CPU time even if it's not really in use.
297 * OVN isn't the only software that can configure qdiscs, and
298 * physical interfaces are shared resources, so there is some risk in
299 * this strategy: we could disrupt some other program's QoS.
300 * Probably, to entirely avoid this possibility we would need to add
301 * a configuration setting.
303 * - Otherwise leave the qdisc alone. */
304 if (hmap_is_empty(queue_map
)) {
305 if (!strcmp(qdisc_type
, OVN_QOS_TYPE
)) {
306 set_qos_type(netdev_phy
, "");
308 netdev_close(netdev_phy
);
312 /* Configure qdisc. */
313 if (strcmp(qdisc_type
, OVN_QOS_TYPE
)) {
314 set_qos_type(netdev_phy
, OVN_QOS_TYPE
);
317 /* Check and delete if needed. */
318 struct netdev_queue_dump dump
;
319 unsigned int queue_id
;
320 struct smap queue_details
;
321 struct qos_queue
*sb_info
;
322 struct hmap consistent_queues
;
324 smap_init(&queue_details
);
325 hmap_init(&consistent_queues
);
326 NETDEV_QUEUE_FOR_EACH (&queue_id
, &queue_details
, &dump
, netdev_phy
) {
327 bool is_queue_needed
= false;
329 HMAP_FOR_EACH_WITH_HASH (sb_info
, node
, hash_int(queue_id
, 0),
331 is_queue_needed
= true;
332 if (sb_info
->max_rate
==
333 smap_get_int(&queue_details
, "max-rate", 0)
334 && sb_info
->burst
== smap_get_int(&queue_details
, "burst", 0)) {
335 /* This queue is consistent. */
336 hmap_insert(&consistent_queues
, &sb_info
->node
,
337 hash_int(queue_id
, 0));
342 if (!is_queue_needed
) {
343 error
= netdev_delete_queue(netdev_phy
, queue_id
);
345 VLOG_WARN_RL(&rl
, "%s: could not delete queue %u (%s)",
346 egress_iface
, queue_id
, ovs_strerror(error
));
351 /* Create/Update queues. */
352 HMAP_FOR_EACH (sb_info
, node
, queue_map
) {
353 if (hmap_contains(&consistent_queues
, &sb_info
->node
)) {
354 hmap_remove(&consistent_queues
, &sb_info
->node
);
358 smap_clear(&queue_details
);
359 smap_add_format(&queue_details
, "max-rate", "%d", sb_info
->max_rate
);
360 smap_add_format(&queue_details
, "burst", "%d", sb_info
->burst
);
361 error
= netdev_set_queue(netdev_phy
, sb_info
->queue_id
,
364 VLOG_WARN_RL(&rl
, "%s: could not configure queue %u (%s)",
365 egress_iface
, sb_info
->queue_id
, ovs_strerror(error
));
368 smap_destroy(&queue_details
);
369 hmap_destroy(&consistent_queues
);
370 netdev_close(netdev_phy
);
374 consider_local_datapath(struct controller_ctx
*ctx
,
375 const struct chassis_index
*chassis_index
,
376 struct sset
*active_tunnels
,
377 const struct sbrec_chassis
*chassis_rec
,
378 const struct sbrec_port_binding
*binding_rec
,
379 struct hmap
*qos_map
,
380 struct hmap
*local_datapaths
,
381 struct shash
*lport_to_iface
,
382 struct sset
*local_lports
)
384 const struct ovsrec_interface
*iface_rec
385 = shash_find_data(lport_to_iface
, binding_rec
->logical_port
);
386 struct ovs_list
*gateway_chassis
= NULL
;
388 bool our_chassis
= false;
390 || (binding_rec
->parent_port
&& binding_rec
->parent_port
[0] &&
391 sset_contains(local_lports
, binding_rec
->parent_port
))) {
392 if (binding_rec
->parent_port
&& binding_rec
->parent_port
[0]) {
393 /* Add child logical port to the set of all local ports. */
394 sset_add(local_lports
, binding_rec
->logical_port
);
396 add_local_datapath(ctx
, binding_rec
->datapath
,
397 false, local_datapaths
);
398 if (iface_rec
&& qos_map
&& ctx
->ovs_idl_txn
) {
399 get_qos_params(binding_rec
, qos_map
);
401 /* This port is in our chassis unless it is a localport. */
402 if (strcmp(binding_rec
->type
, "localport")) {
405 } else if (!strcmp(binding_rec
->type
, "l2gateway")) {
406 const char *chassis_id
= smap_get(&binding_rec
->options
,
407 "l2gateway-chassis");
408 our_chassis
= chassis_id
&& !strcmp(chassis_id
, chassis_rec
->name
);
410 sset_add(local_lports
, binding_rec
->logical_port
);
411 add_local_datapath(ctx
, binding_rec
->datapath
,
412 false, local_datapaths
);
414 } else if (!strcmp(binding_rec
->type
, "chassisredirect")) {
415 gateway_chassis
= gateway_chassis_get_ordered(binding_rec
,
417 if (gateway_chassis
&&
418 gateway_chassis_contains(gateway_chassis
, chassis_rec
)) {
420 our_chassis
= gateway_chassis_is_active(
421 gateway_chassis
, chassis_rec
, active_tunnels
);
423 add_local_datapath(ctx
, binding_rec
->datapath
,
424 false, local_datapaths
);
426 gateway_chassis_destroy(gateway_chassis
);
427 } else if (!strcmp(binding_rec
->type
, "l3gateway")) {
428 const char *chassis_id
= smap_get(&binding_rec
->options
,
429 "l3gateway-chassis");
430 our_chassis
= chassis_id
&& !strcmp(chassis_id
, chassis_rec
->name
);
432 add_local_datapath(ctx
, binding_rec
->datapath
,
433 true, local_datapaths
);
435 } else if (!strcmp(binding_rec
->type
, "localnet")) {
436 /* Add all localnet ports to local_lports so that we allocate ct zones
438 sset_add(local_lports
, binding_rec
->logical_port
);
442 if (ctx
->ovnsb_idl_txn
) {
443 const char *vif_chassis
= smap_get(&binding_rec
->options
,
444 "requested-chassis");
445 bool can_bind
= !vif_chassis
|| !vif_chassis
[0]
446 || !strcmp(vif_chassis
, chassis_rec
->name
)
447 || !strcmp(vif_chassis
, chassis_rec
->hostname
);
449 if (can_bind
&& our_chassis
) {
450 if (binding_rec
->chassis
!= chassis_rec
) {
451 if (binding_rec
->chassis
) {
452 VLOG_INFO("Changing chassis for lport %s from %s to %s.",
453 binding_rec
->logical_port
,
454 binding_rec
->chassis
->name
,
457 VLOG_INFO("Claiming lport %s for this chassis.",
458 binding_rec
->logical_port
);
460 for (int i
= 0; i
< binding_rec
->n_mac
; i
++) {
461 VLOG_INFO("%s: Claiming %s",
462 binding_rec
->logical_port
, binding_rec
->mac
[i
]);
464 sbrec_port_binding_set_chassis(binding_rec
, chassis_rec
);
466 } else if (binding_rec
->chassis
== chassis_rec
) {
467 VLOG_INFO("Releasing lport %s from this chassis.",
468 binding_rec
->logical_port
);
469 sbrec_port_binding_set_chassis(binding_rec
, NULL
);
470 } else if (our_chassis
) {
471 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
473 "Not claiming lport %s, chassis %s "
474 "requested-chassis %s",
475 binding_rec
->logical_port
,
483 consider_localnet_port(const struct sbrec_port_binding
*binding_rec
,
484 struct hmap
*local_datapaths
)
486 struct local_datapath
*ld
487 = get_local_datapath(local_datapaths
,
488 binding_rec
->datapath
->tunnel_key
);
493 if (ld
->localnet_port
&& strcmp(ld
->localnet_port
->logical_port
,
494 binding_rec
->logical_port
)) {
495 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
496 VLOG_WARN_RL(&rl
, "localnet port '%s' already set for datapath "
497 "'%"PRId64
"', skipping the new port '%s'.",
498 ld
->localnet_port
->logical_port
,
499 binding_rec
->datapath
->tunnel_key
,
500 binding_rec
->logical_port
);
503 ld
->localnet_port
= binding_rec
;
507 binding_run(struct controller_ctx
*ctx
, const struct ovsrec_bridge
*br_int
,
508 const struct sbrec_chassis
*chassis_rec
,
509 const struct chassis_index
*chassis_index
,
510 struct sset
*active_tunnels
,
511 struct hmap
*local_datapaths
, struct sset
*local_lports
)
517 const struct sbrec_port_binding
*binding_rec
;
518 struct shash lport_to_iface
= SHASH_INITIALIZER(&lport_to_iface
);
519 struct sset egress_ifaces
= SSET_INITIALIZER(&egress_ifaces
);
524 get_local_iface_ids(br_int
, &lport_to_iface
, local_lports
,
528 /* Run through each binding record to see if it is resident on this
529 * chassis and update the binding accordingly. This includes both
530 * directly connected logical ports and children of those ports. */
531 SBREC_PORT_BINDING_FOR_EACH(binding_rec
, ctx
->ovnsb_idl
) {
532 consider_local_datapath(ctx
, chassis_index
,
533 active_tunnels
, chassis_rec
, binding_rec
,
534 sset_is_empty(&egress_ifaces
) ? NULL
:
535 &qos_map
, local_datapaths
, &lport_to_iface
,
540 /* Run through each binding record to see if it is a localnet port
541 * on local datapaths discovered from above loop, and update the
542 * corresponding local datapath accordingly. */
543 SBREC_PORT_BINDING_FOR_EACH (binding_rec
, ctx
->ovnsb_idl
) {
544 if (!strcmp(binding_rec
->type
, "localnet")) {
545 consider_localnet_port(binding_rec
, local_datapaths
);
549 if (!sset_is_empty(&egress_ifaces
)
550 && set_noop_qos(ctx
, &egress_ifaces
)) {
552 SSET_FOR_EACH (entry
, &egress_ifaces
) {
553 setup_qos(entry
, &qos_map
);
557 shash_destroy(&lport_to_iface
);
558 sset_destroy(&egress_ifaces
);
559 hmap_destroy(&qos_map
);
562 /* Returns true if the database is all cleaned up, false if more work is
565 binding_cleanup(struct controller_ctx
*ctx
,
566 const struct sbrec_chassis
*chassis_rec
)
568 if (!ctx
->ovnsb_idl_txn
) {
575 ovsdb_idl_txn_add_comment(
577 "ovn-controller: removing all port bindings for '%s'",
580 const struct sbrec_port_binding
*binding_rec
;
581 bool any_changes
= false;
582 SBREC_PORT_BINDING_FOR_EACH(binding_rec
, ctx
->ovnsb_idl
) {
583 if (binding_rec
->chassis
== chassis_rec
) {
584 sbrec_port_binding_set_chassis(binding_rec
, NULL
);