/*
- * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
+ * Copyright (c) 2008-2017 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include <stdlib.h>
#include <math.h>
-#include "ofp-util.h"
-#include "ofp-actions.h"
-#include "ofpbuf.h"
-#include "ofproto/ofproto-provider.h"
-#include "ofproto/ofproto-dpif.h"
-#include "ofproto/ofproto-dpif-rid.h"
#include "connectivity.h"
#include "coverage.h"
-#include "dynamic-string.h"
+#include "dp-packet.h"
#include "flow.h"
-#include "hmap.h"
+#include "openvswitch/hmap.h"
#include "lacp.h"
-#include "list.h"
#include "netdev.h"
#include "odp-util.h"
-#include "ofpbuf.h"
+#include "ofproto/ofproto-dpif.h"
+#include "ofproto/ofproto-dpif-rid.h"
+#include "ofproto/ofproto-provider.h"
+#include "openvswitch/dynamic-string.h"
+#include "openvswitch/list.h"
+#include "openvswitch/match.h"
+#include "openvswitch/ofp-actions.h"
+#include "openvswitch/ofp-util.h"
+#include "openvswitch/ofpbuf.h"
+#include "openvswitch/vlog.h"
#include "packets.h"
-#include "dp-packet.h"
#include "poll-loop.h"
#include "seq.h"
-#include "match.h"
-#include "shash.h"
+#include "openvswitch/shash.h"
#include "timeval.h"
#include "unixctl.h"
-#include "openvswitch/vlog.h"
+#include "util.h"
VLOG_DEFINE_THIS_MODULE(bond);
#define BOND_MASK 0xff
#define BOND_BUCKETS (BOND_MASK + 1)
+/* Priority for internal rules created to handle recirculation */
+#define RECIRC_RULE_PRIORITY 20
+
/* A hash bucket for mapping a flow to a slave.
* "struct bond" has an array of BOND_BUCKETS of these. */
struct bond_entry {
void *aux; /* Client-provided handle for this slave. */
struct netdev *netdev; /* Network device, owned by the client. */
- unsigned int change_seq; /* Tracks changes in 'netdev'. */
+ uint64_t change_seq; /* Tracks changes in 'netdev'. */
ofp_port_t ofp_port; /* OpenFlow port number. */
char *name; /* Name (a copy of netdev_get_name(netdev)). */
OVS_REQ_WRLOCK(rwlock);
static void bond_choose_active_slave(struct bond *)
OVS_REQ_WRLOCK(rwlock);
-static unsigned int bond_hash_src(const struct eth_addr mac,
- uint16_t vlan, uint32_t basis);
-static unsigned int bond_hash_tcp(const struct flow *, uint16_t vlan,
- uint32_t basis);
static struct bond_entry *lookup_bond_entry(const struct bond *,
const struct flow *,
uint16_t vlan)
struct flow_wildcards *,
uint16_t vlan)
OVS_REQ_RDLOCK(rwlock);
+static void update_recirc_rules__(struct bond *bond);
/* Attempts to parse 's' as the name of a bond balancing mode. If successful,
* stores the mode in '*balance' and returns true. Otherwise returns false
bond = xzalloc(sizeof *bond);
bond->ofproto = ofproto;
hmap_init(&bond->slaves);
- list_init(&bond->enabled_slaves);
+ ovs_list_init(&bond->enabled_slaves);
ovs_mutex_init(&bond->mutex);
ovs_refcount_init(&bond->ref_cnt);
-
- bond->recirc_id = 0;
hmap_init(&bond->pr_rule_ops);
+ bond->active_slave_mac = eth_addr_zero;
+ bond->active_slave_changed = false;
+
bond_reconfigure(bond, s);
return bond;
}
void
bond_unref(struct bond *bond)
{
- struct bond_slave *slave, *next_slave;
- struct bond_pr_rule_op *pr_op, *next_op;
+ struct bond_slave *slave;
if (!bond || ovs_refcount_unref_relaxed(&bond->ref_cnt) != 1) {
return;
hmap_remove(all_bonds, &bond->hmap_node);
ovs_rwlock_unlock(&rwlock);
- HMAP_FOR_EACH_SAFE (slave, next_slave, hmap_node, &bond->slaves) {
- hmap_remove(&bond->slaves, &slave->hmap_node);
+ HMAP_FOR_EACH_POP (slave, hmap_node, &bond->slaves) {
/* Client owns 'slave->netdev'. */
free(slave->name);
free(slave);
hmap_destroy(&bond->slaves);
ovs_mutex_destroy(&bond->mutex);
- free(bond->hash);
- free(bond->name);
-
- HMAP_FOR_EACH_SAFE(pr_op, next_op, hmap_node, &bond->pr_rule_ops) {
- hmap_remove(&bond->pr_rule_ops, &pr_op->hmap_node);
- free(pr_op);
- }
- hmap_destroy(&bond->pr_rule_ops);
+ /* Free bond resources. Remove existing post recirc rules. */
if (bond->recirc_id) {
recirc_free_id(bond->recirc_id);
+ bond->recirc_id = 0;
}
+ free(bond->hash);
+ bond->hash = NULL;
+ update_recirc_rules__(bond);
+ hmap_destroy(&bond->pr_rule_ops);
+ free(bond->name);
free(bond);
}
hmap_insert(&bond->pr_rule_ops, &pr_op->hmap_node, hash);
}
+/* This function should almost never be called directly.
+ * 'update_recirc_rules()' should be called instead. Since
+ * this function modifies 'bond->pr_rule_ops', it is only
+ * safe when 'rwlock' is held.
+ *
+ * However, when the 'bond' is the only reference in the system,
+ * calling this function avoid acquiring lock only to satisfy
+ * lock annotation. Currently, only 'bond_unref()' calls
+ * this function directly. */
static void
-update_recirc_rules(struct bond *bond)
- OVS_REQ_WRLOCK(rwlock)
+update_recirc_rules__(struct bond *bond)
{
struct match match;
struct bond_pr_rule_op *pr_op, *next_op;
RECIRC_RULE_PRIORITY, 0,
&ofpacts, pr_op->pr_rule);
if (error) {
- char *err_s = match_to_string(&pr_op->match,
+ char *err_s = match_to_string(&pr_op->match, NULL,
RECIRC_RULE_PRIORITY);
VLOG_ERR("failed to add post recirculation flow %s", err_s);
&pr_op->match,
RECIRC_RULE_PRIORITY);
if (error) {
- char *err_s = match_to_string(&pr_op->match,
+ char *err_s = match_to_string(&pr_op->match, NULL,
RECIRC_RULE_PRIORITY);
VLOG_ERR("failed to remove post recirculation flow %s", err_s);
ofpbuf_uninit(&ofpacts);
}
+static void
+update_recirc_rules(struct bond *bond)
+ OVS_REQ_RDLOCK(rwlock)
+{
+ update_recirc_rules__(bond);
+}
/* Updates 'bond''s overall configuration to 's'.
*
bond_entry_reset(bond);
}
- bond->active_slave_mac = s->active_slave_mac;
- bond->active_slave_changed = false;
-
ovs_rwlock_unlock(&rwlock);
return revalidate;
}
static void
bond_active_slave_changed(struct bond *bond)
{
- struct eth_addr mac;
-
- netdev_get_etheraddr(bond->active_slave->netdev, &mac);
- bond->active_slave_mac = mac;
+ if (bond->active_slave) {
+ struct eth_addr mac;
+ netdev_get_etheraddr(bond->active_slave->netdev, &mac);
+ bond->active_slave_mac = mac;
+ } else {
+ bond->active_slave_mac = eth_addr_zero;
+ }
bond->active_slave_changed = true;
seq_change(connectivity_seq_get());
}
if (!bond->lacp_fallback_ab) {
goto out;
}
+ break;
case LACP_DISABLED:
+ if (bond->balance == BM_TCP) {
+ goto out;
+ }
break;
}
if (!bond->lacp_fallback_ab) {
goto out;
}
+ /* fall through */
case BM_AB:
/* Drop all packets which arrive on backup slaves. This is similar to
}
}
-bool
-bond_may_recirc(const struct bond *bond, uint32_t *recirc_id,
- uint32_t *hash_bias)
+static bool
+bond_may_recirc(const struct bond *bond)
{
- if (bond->balance == BM_TCP && bond->recirc_id) {
- if (recirc_id) {
- *recirc_id = bond->recirc_id;
- }
- if (hash_bias) {
- *hash_bias = bond->basis;
- }
- return true;
- } else {
- return false;
- }
+ return bond->balance == BM_TCP && bond->recirc_id;
}
static void
}
void
-bond_update_post_recirc_rules(struct bond* bond, const bool force)
+bond_update_post_recirc_rules(struct bond *bond, uint32_t *recirc_id,
+ uint32_t *hash_basis)
{
- ovs_rwlock_wrlock(&rwlock);
- bond_update_post_recirc_rules__(bond, force);
- ovs_rwlock_unlock(&rwlock);
+ bool may_recirc = bond_may_recirc(bond);
+
+ if (may_recirc) {
+ /* To avoid unnecessary locking, bond_may_recirc() is first
+ * called outside of the 'rwlock'. After acquiring the lock,
+ * check again to make sure bond configuration has not been changed. */
+ ovs_rwlock_wrlock(&rwlock);
+ may_recirc = bond_may_recirc(bond);
+ if (may_recirc) {
+ *recirc_id = bond->recirc_id;
+ *hash_basis = bond->basis;
+ bond_update_post_recirc_rules__(bond, false);
+ }
+ ovs_rwlock_unlock(&rwlock);
+ }
+
+ if (!may_recirc) {
+ *recirc_id = *hash_basis = 0;
+ }
}
+
\f
/* Rebalancing. */
if (!slave->enabled) {
ds_put_cstr(&ds, " (disabled)");
}
- if (!list_is_empty(&slave->entries)) {
+ if (!ovs_list_is_empty(&slave->entries)) {
struct bond_entry *e;
ds_put_cstr(&ds, " (");
LIST_FOR_EACH (e, list_node, &slave->entries) {
- if (&e->list_node != list_front(&slave->entries)) {
+ if (&e->list_node != ovs_list_front(&slave->entries)) {
ds_put_cstr(&ds, " + ");
}
ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64"kB",
{
struct bond_entry *e;
- if (list_is_short(&from->entries)) {
+ if (ovs_list_is_short(&from->entries)) {
/* 'from' carries no more than one MAC hash, so shifting load away from
* it would be pointless. */
return NULL;
}
LIST_FOR_EACH (e, list_node, &from->entries) {
- double old_ratio, new_ratio;
- uint64_t delta;
-
- if (to_tx_bytes == 0) {
- /* Nothing on the new slave, move it. */
- return e;
- }
-
- delta = e->tx_bytes;
- old_ratio = (double)from->tx_bytes / to_tx_bytes;
- new_ratio = (double)(from->tx_bytes - delta) / (to_tx_bytes + delta);
- if (old_ratio - new_ratio > 0.1
- && fabs(new_ratio - 1.0) < fabs(old_ratio - 1.0)) {
- /* We're aiming for an ideal ratio of 1, meaning both the 'from'
- and 'to' slave have the same load. Therefore, we only move an
- entry if it decreases the load on 'from', and brings us closer
- to equal traffic load. */
+ uint64_t delta = e->tx_bytes; /* The amount to rebalance. */
+ uint64_t ideal_tx_bytes = (from->tx_bytes + to_tx_bytes)/2;
+ /* Note, the ideal traffic is the mid point
+ * between 'from' and 'to'. This value does
+ * not change by rebalancing. */
+ uint64_t new_low; /* The lower bandwidth between 'to' and 'from'
+ after rebalancing. */
+
+ new_low = MIN(from->tx_bytes - delta, to_tx_bytes + delta);
+
+ if ((new_low > to_tx_bytes) &&
+ (new_low - to_tx_bytes >= (ideal_tx_bytes - to_tx_bytes) / 10)) {
+ /* Only rebalance if the new 'low' is closer to to the mid point,
+ * and the improvement exceeds 10% of current traffic
+ * deviation from the ideal split.
+ *
+ * The improvement on the 'high' side is always the same as the
+ * 'low' side. Thus consider 'low' side is sufficient. */
return e;
}
}
break;
}
}
- list_insert(&pos->bal_node, &slave->bal_node);
+ ovs_list_insert(&pos->bal_node, &slave->bal_node);
}
/* Removes 'slave' from its current list and then inserts it into 'bals' so
static void
reinsert_bal(struct ovs_list *bals, struct bond_slave *slave)
{
- list_remove(&slave->bal_node);
+ ovs_list_remove(&slave->bal_node);
insert_bal(bals, slave);
}
}
bond->next_rebalance = time_msec() + bond->rebalance_interval;
- use_recirc = ofproto_dpif_get_support(bond->ofproto)->odp.recirc &&
- bond_may_recirc(bond, NULL, NULL);
+ use_recirc = bond->ofproto->backer->support.odp.recirc &&
+ bond_may_recirc(bond);
if (use_recirc) {
bond_recirculation_account(bond);
* Compute each slave's tx_bytes as the sum of its entries' tx_bytes. */
HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
slave->tx_bytes = 0;
- list_init(&slave->entries);
+ ovs_list_init(&slave->entries);
}
for (e = &bond->hash[0]; e <= &bond->hash[BOND_MASK]; e++) {
if (e->slave && e->tx_bytes) {
e->slave->tx_bytes += e->tx_bytes;
- list_push_back(&e->slave->entries, &e->list_node);
+ ovs_list_push_back(&e->slave->entries, &e->list_node);
}
}
*
* XXX This is O(n**2) in the number of slaves but it could be O(n lg n)
* with a proper list sort algorithm. */
- list_init(&bals);
+ ovs_list_init(&bals);
HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
if (slave->enabled) {
insert_bal(&bals, slave);
log_bals(bond, &bals);
/* Shift load from the most-loaded slaves to the least-loaded slaves. */
- while (!list_is_short(&bals)) {
- struct bond_slave *from = bond_slave_from_bal_node(list_front(&bals));
- struct bond_slave *to = bond_slave_from_bal_node(list_back(&bals));
+ while (!ovs_list_is_short(&bals)) {
+ struct bond_slave *from = bond_slave_from_bal_node(ovs_list_front(&bals));
+ struct bond_slave *to = bond_slave_from_bal_node(ovs_list_back(&bals));
uint64_t overload;
overload = from->tx_bytes - to->tx_bytes;
* We don't add the element to to->hashes. That would only allow
* 'e' to be migrated to another slave in this rebalancing run, and
* there is no point in doing that. */
- list_remove(&e->list_node);
+ ovs_list_remove(&e->list_node);
/* Re-sort 'bals'. */
reinsert_bal(&bals, from);
} else {
/* Can't usefully migrate anything away from 'from'.
* Don't reconsider it. */
- list_remove(&from->bal_node);
+ ovs_list_remove(&from->bal_node);
}
}
ds_put_format(ds, "bond_mode: %s\n",
bond_mode_to_string(bond->balance));
- may_recirc = bond_may_recirc(bond, &recirc_id, NULL);
+ may_recirc = bond_may_recirc(bond);
+ recirc_id = bond->recirc_id;
ds_put_format(ds, "bond may use recirculation: %s, Recirc-ID : %d\n",
may_recirc ? "yes" : "no", may_recirc ? recirc_id: -1);
break;
}
+ ds_put_format(ds, "lacp_fallback_ab: %s\n",
+ bond->lacp_fallback_ab ? "true" : "false");
+
ds_put_cstr(ds, "active slave mac: ");
ds_put_format(ds, ETH_ADDR_FMT, ETH_ADDR_ARGS(bond->active_slave_mac));
slave = bond_find_slave_by_mac(bond, bond->active_slave_mac);
}
if (ovs_scan(mac_s, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
- hash = bond_hash_src(mac, vlan, basis) & BOND_MASK;
+ hash = hash_mac(mac, vlan, basis) & BOND_MASK;
hash_cstr = xasprintf("%u", hash);
unixctl_command_reply(conn, hash_cstr);
} else {
free(bond->hash);
bond->hash = NULL;
+ /* Remove existing post recirc rules. */
+ update_recirc_rules(bond);
}
}
ovs_mutex_lock(&slave->bond->mutex);
if (enable) {
- list_insert(&slave->bond->enabled_slaves, &slave->list_node);
+ ovs_list_insert(&slave->bond->enabled_slaves, &slave->list_node);
} else {
- list_remove(&slave->list_node);
+ ovs_list_remove(&slave->list_node);
}
ovs_mutex_unlock(&slave->bond->mutex);
}
}
-static unsigned int
-bond_hash_src(const struct eth_addr mac, uint16_t vlan, uint32_t basis)
-{
- return hash_mac(mac, vlan, basis);
-}
-
-static unsigned int
-bond_hash_tcp(const struct flow *flow, uint16_t vlan, uint32_t basis)
-{
- struct flow hash_flow = *flow;
- hash_flow.vlan_tci = htons(vlan);
-
- /* The symmetric quality of this hash function is not required, but
- * flow_hash_symmetric_l4 already exists, and is sufficient for our
- * purposes, so we use it out of convenience. */
- return flow_hash_symmetric_l4(&hash_flow, basis);
-}
-
static unsigned int
bond_hash(const struct bond *bond, const struct flow *flow, uint16_t vlan)
{
ovs_assert(bond->balance == BM_TCP || bond->balance == BM_SLB);
return (bond->balance == BM_TCP
- ? bond_hash_tcp(flow, vlan, bond->basis)
- : bond_hash_src(flow->dl_src, vlan, bond->basis));
+ ? flow_hash_5tuple(flow, bond->basis)
+ : hash_mac(flow->dl_src, vlan, bond->basis));
}
static struct bond_entry *
struct ovs_list *node;
ovs_mutex_lock(&bond->mutex);
- if (list_is_empty(&bond->enabled_slaves)) {
+ if (ovs_list_is_empty(&bond->enabled_slaves)) {
ovs_mutex_unlock(&bond->mutex);
return NULL;
}
- node = list_pop_front(&bond->enabled_slaves);
- list_push_back(&bond->enabled_slaves, node);
+ node = ovs_list_pop_front(&bond->enabled_slaves);
+ ovs_list_push_back(&bond->enabled_slaves, node);
ovs_mutex_unlock(&bond->mutex);
return CONTAINER_OF(node, struct bond_slave, list_node);
bond_active_slave_changed(bond);
}
} else if (old_active_slave) {
+ bond_active_slave_changed(bond);
VLOG_INFO_RL(&rl, "bond %s: all interfaces disabled", bond->name);
}
}