/*
- * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
+ * Copyright (c) 2008-2017 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include "coverage.h"
#include "dp-packet.h"
#include "flow.h"
-#include "hmap.h"
+#include "openvswitch/hmap.h"
#include "lacp.h"
#include "netdev.h"
#include "odp-util.h"
#include "openvswitch/list.h"
#include "openvswitch/match.h"
#include "openvswitch/ofp-actions.h"
-#include "openvswitch/ofp-util.h"
#include "openvswitch/ofpbuf.h"
#include "openvswitch/vlog.h"
#include "packets.h"
-#include "poll-loop.h"
+#include "openvswitch/poll-loop.h"
#include "seq.h"
-#include "shash.h"
+#include "openvswitch/shash.h"
#include "timeval.h"
#include "unixctl.h"
+#include "util.h"
VLOG_DEFINE_THIS_MODULE(bond);
#define BOND_MASK 0xff
#define BOND_BUCKETS (BOND_MASK + 1)
+/* Priority for internal rules created to handle recirculation */
+#define RECIRC_RULE_PRIORITY 20
+
/* A hash bucket for mapping a flow to a slave.
* "struct bond" has an array of BOND_BUCKETS of these. */
struct bond_entry {
struct netdev *netdev; /* Network device, owned by the client. */
uint64_t change_seq; /* Tracks changes in 'netdev'. */
- ofp_port_t ofp_port; /* OpenFlow port number. */
char *name; /* Name (a copy of netdev_get_name(netdev)). */
+ ofp_port_t ofp_port; /* OpenFlow port number. */
/* Link status. */
- long long delay_expires; /* Time after which 'enabled' may change. */
bool enabled; /* May be chosen for flows? */
bool may_enable; /* Client considers this slave bondable. */
+ long long delay_expires; /* Time after which 'enabled' may change. */
/* Rebalancing info. Used only by bond_rebalance(). */
struct ovs_list bal_node; /* In bond_rebalance()'s 'bals' list. */
OVS_REQ_WRLOCK(rwlock);
static void bond_choose_active_slave(struct bond *)
OVS_REQ_WRLOCK(rwlock);
-static unsigned int bond_hash_src(const struct eth_addr mac,
- uint16_t vlan, uint32_t basis);
-static unsigned int bond_hash_tcp(const struct flow *, uint16_t vlan,
- uint32_t basis);
static struct bond_entry *lookup_bond_entry(const struct bond *,
const struct flow *,
uint16_t vlan)
struct flow_wildcards *,
uint16_t vlan)
OVS_REQ_RDLOCK(rwlock);
+static void update_recirc_rules__(struct bond *bond);
+static bool bond_is_falling_back_to_ab(const struct bond *);
/* Attempts to parse 's' as the name of a bond balancing mode. If successful,
* stores the mode in '*balance' and returns true. Otherwise returns false
ovs_refcount_init(&bond->ref_cnt);
hmap_init(&bond->pr_rule_ops);
+ bond->active_slave_mac = eth_addr_zero;
+ bond->active_slave_changed = false;
+
bond_reconfigure(bond, s);
return bond;
}
void
bond_unref(struct bond *bond)
{
- struct bond_slave *slave, *next_slave;
- struct bond_pr_rule_op *pr_op, *next_op;
+ struct bond_slave *slave;
if (!bond || ovs_refcount_unref_relaxed(&bond->ref_cnt) != 1) {
return;
hmap_remove(all_bonds, &bond->hmap_node);
ovs_rwlock_unlock(&rwlock);
- HMAP_FOR_EACH_SAFE (slave, next_slave, hmap_node, &bond->slaves) {
- hmap_remove(&bond->slaves, &slave->hmap_node);
+ HMAP_FOR_EACH_POP (slave, hmap_node, &bond->slaves) {
/* Client owns 'slave->netdev'. */
free(slave->name);
free(slave);
hmap_destroy(&bond->slaves);
ovs_mutex_destroy(&bond->mutex);
- free(bond->hash);
- free(bond->name);
-
- HMAP_FOR_EACH_SAFE(pr_op, next_op, hmap_node, &bond->pr_rule_ops) {
- hmap_remove(&bond->pr_rule_ops, &pr_op->hmap_node);
- free(pr_op);
- }
- hmap_destroy(&bond->pr_rule_ops);
+ /* Free bond resources. Remove existing post recirc rules. */
if (bond->recirc_id) {
recirc_free_id(bond->recirc_id);
+ bond->recirc_id = 0;
}
+ free(bond->hash);
+ bond->hash = NULL;
+ update_recirc_rules__(bond);
+ hmap_destroy(&bond->pr_rule_ops);
+ free(bond->name);
free(bond);
}
hmap_insert(&bond->pr_rule_ops, &pr_op->hmap_node, hash);
}
+/* This function should almost never be called directly.
+ * 'update_recirc_rules()' should be called instead. Since
+ * this function modifies 'bond->pr_rule_ops', it is only
+ * safe when 'rwlock' is held.
+ *
+ * However, when the 'bond' is the only reference in the system,
+ * calling this function avoid acquiring lock only to satisfy
+ * lock annotation. Currently, only 'bond_unref()' calls
+ * this function directly. */
static void
-update_recirc_rules(struct bond *bond)
- OVS_REQ_WRLOCK(rwlock)
+update_recirc_rules__(struct bond *bond)
{
struct match match;
struct bond_pr_rule_op *pr_op, *next_op;
RECIRC_RULE_PRIORITY, 0,
&ofpacts, pr_op->pr_rule);
if (error) {
- char *err_s = match_to_string(&pr_op->match,
+ char *err_s = match_to_string(&pr_op->match, NULL,
RECIRC_RULE_PRIORITY);
VLOG_ERR("failed to add post recirculation flow %s", err_s);
&pr_op->match,
RECIRC_RULE_PRIORITY);
if (error) {
- char *err_s = match_to_string(&pr_op->match,
+ char *err_s = match_to_string(&pr_op->match, NULL,
RECIRC_RULE_PRIORITY);
VLOG_ERR("failed to remove post recirculation flow %s", err_s);
}
hmap_remove(&bond->pr_rule_ops, &pr_op->hmap_node);
- *pr_op->pr_rule = NULL;
+ if (bond->hash) {
+ *pr_op->pr_rule = NULL;
+ }
free(pr_op);
break;
}
ofpbuf_uninit(&ofpacts);
}
+static void
+update_recirc_rules(struct bond *bond)
+ OVS_REQ_RDLOCK(rwlock)
+{
+ update_recirc_rules__(bond);
+}
/* Updates 'bond''s overall configuration to 's'.
*
bond_entry_reset(bond);
}
- bond->active_slave_mac = s->active_slave_mac;
- bond->active_slave_changed = false;
-
ovs_rwlock_unlock(&rwlock);
return revalidate;
}
static void
bond_active_slave_changed(struct bond *bond)
{
- struct eth_addr mac;
-
- netdev_get_etheraddr(bond->active_slave->netdev, &mac);
- bond->active_slave_mac = mac;
+ if (bond->active_slave) {
+ struct eth_addr mac;
+ netdev_get_etheraddr(bond->active_slave->netdev, &mac);
+ bond->active_slave_mac = mac;
+ } else {
+ bond->active_slave_mac = eth_addr_zero;
+ }
bond->active_slave_changed = true;
seq_change(connectivity_seq_get());
}
if (bond->lacp_status != lacp_status) {
bond->lacp_status = lacp_status;
bond->bond_revalidate = true;
+
+ /* Change in LACP status can affect whether the bond is falling back to
+ * active-backup. Make sure to create or destroy buckets if
+ * necessary. */
+ if (bond_is_falling_back_to_ab(bond) || !bond->hash) {
+ bond_entry_reset(bond);
+ }
}
/* Enable slaves based on link status and LACP feedback. */
return packet;
}
\f
+
+static bool
+bond_is_falling_back_to_ab(const struct bond *bond)
+{
+ return (bond->lacp_fallback_ab
+ && (bond->balance == BM_SLB || bond->balance == BM_TCP)
+ && bond->lacp_status == LACP_CONFIGURED);
+}
+
/* Checks whether a packet that arrived on 'slave_' within 'bond', with an
* Ethernet destination address of 'eth_dst', should be admitted.
*
if (!bond->lacp_fallback_ab) {
goto out;
}
+ break;
case LACP_DISABLED:
+ if (bond->balance == BM_TCP) {
+ goto out;
+ }
break;
}
if (!bond->lacp_fallback_ab) {
goto out;
}
+ /* fall through */
case BM_AB:
/* Drop all packets which arrive on backup slaves. This is similar to
}
}
-bool
-bond_may_recirc(const struct bond *bond, uint32_t *recirc_id,
- uint32_t *hash_bias)
+static bool
+bond_may_recirc(const struct bond *bond)
{
- if (bond->balance == BM_TCP && bond->recirc_id) {
- if (recirc_id) {
- *recirc_id = bond->recirc_id;
- }
- if (hash_bias) {
- *hash_bias = bond->basis;
- }
- return true;
- } else {
- return false;
- }
+ return (bond->balance == BM_TCP && bond->recirc_id
+ && !bond_is_falling_back_to_ab(bond));
}
static void
}
void
-bond_update_post_recirc_rules(struct bond* bond, const bool force)
+bond_update_post_recirc_rules(struct bond *bond, uint32_t *recirc_id,
+ uint32_t *hash_basis)
{
- ovs_rwlock_wrlock(&rwlock);
- bond_update_post_recirc_rules__(bond, force);
- ovs_rwlock_unlock(&rwlock);
+ bool may_recirc = bond_may_recirc(bond);
+
+ if (may_recirc) {
+ /* To avoid unnecessary locking, bond_may_recirc() is first
+ * called outside of the 'rwlock'. After acquiring the lock,
+ * check again to make sure bond configuration has not been changed. */
+ ovs_rwlock_wrlock(&rwlock);
+ may_recirc = bond_may_recirc(bond);
+ if (may_recirc) {
+ *recirc_id = bond->recirc_id;
+ *hash_basis = bond->basis;
+ bond_update_post_recirc_rules__(bond, false);
+ }
+ ovs_rwlock_unlock(&rwlock);
+ }
+
+ if (!may_recirc) {
+ *recirc_id = *hash_basis = 0;
+ }
}
+
\f
/* Rebalancing. */
bond_is_balanced(const struct bond *bond) OVS_REQ_RDLOCK(rwlock)
{
return bond->rebalance_interval
- && (bond->balance == BM_SLB || bond->balance == BM_TCP);
+ && (bond->balance == BM_SLB || bond->balance == BM_TCP)
+ && !(bond->lacp_fallback_ab && bond->lacp_status == LACP_CONFIGURED);
}
/* Notifies 'bond' that 'n_bytes' bytes were sent in 'flow' within 'vlan'. */
}
bond->next_rebalance = time_msec() + bond->rebalance_interval;
- use_recirc = ofproto_dpif_get_support(bond->ofproto)->odp.recirc &&
- bond_may_recirc(bond, NULL, NULL);
+ use_recirc = bond->ofproto->backer->rt_support.odp.recirc &&
+ bond_may_recirc(bond);
if (use_recirc) {
bond_recirculation_account(bond);
ds_put_format(ds, "bond_mode: %s\n",
bond_mode_to_string(bond->balance));
- may_recirc = bond_may_recirc(bond, &recirc_id, NULL);
+ may_recirc = bond_may_recirc(bond);
+ recirc_id = bond->recirc_id;
ds_put_format(ds, "bond may use recirculation: %s, Recirc-ID : %d\n",
may_recirc ? "yes" : "no", may_recirc ? recirc_id: -1);
break;
}
+ ds_put_format(ds, "lacp_fallback_ab: %s\n",
+ bond->lacp_fallback_ab ? "true" : "false");
+
ds_put_cstr(ds, "active slave mac: ");
ds_put_format(ds, ETH_ADDR_FMT, ETH_ADDR_ARGS(bond->active_slave_mac));
slave = bond_find_slave_by_mac(bond, bond->active_slave_mac);
ds_put_format(ds, "\nslave %s: %s\n",
slave->name, slave->enabled ? "enabled" : "disabled");
if (slave == bond->active_slave) {
- ds_put_cstr(ds, "\tactive slave\n");
+ ds_put_cstr(ds, " active slave\n");
}
if (slave->delay_expires != LLONG_MAX) {
- ds_put_format(ds, "\t%s expires in %lld ms\n",
+ ds_put_format(ds, " %s expires in %lld ms\n",
slave->enabled ? "downdelay" : "updelay",
slave->delay_expires - time_msec());
}
- ds_put_format(ds, "\tmay_enable: %s\n",
+ ds_put_format(ds, " may_enable: %s\n",
slave->may_enable ? "true" : "false");
if (!bond_is_balanced(bond)) {
be_tx_k = be->tx_bytes / 1024;
if (be_tx_k) {
- ds_put_format(ds, "\thash %d: %"PRIu64" kB load\n",
+ ds_put_format(ds, " hash %d: %"PRIu64" kB load\n",
hash, be_tx_k);
}
}
if (ovs_scan(mac_s, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
- hash = bond_hash_src(mac, vlan, basis) & BOND_MASK;
+ hash = hash_mac(mac, vlan, basis) & BOND_MASK;
hash_cstr = xasprintf("%u", hash);
unixctl_command_reply(conn, hash_cstr);
static void
bond_entry_reset(struct bond *bond)
{
- if (bond->balance != BM_AB) {
+ if (bond->balance != BM_AB && !bond_is_falling_back_to_ab(bond)) {
size_t hash_len = BOND_BUCKETS * sizeof *bond->hash;
if (!bond->hash) {
} else {
free(bond->hash);
bond->hash = NULL;
+ /* Remove existing post recirc rules. */
+ update_recirc_rules(bond);
}
}
static void
bond_enable_slave(struct bond_slave *slave, bool enable)
{
+ struct bond *bond = slave->bond;
+
slave->delay_expires = LLONG_MAX;
if (enable != slave->enabled) {
slave->bond->bond_revalidate = true;
if (enable) {
ovs_list_insert(&slave->bond->enabled_slaves, &slave->list_node);
} else {
+ bond->send_learning_packets = true;
ovs_list_remove(&slave->list_node);
}
ovs_mutex_unlock(&slave->bond->mutex);
VLOG_INFO_RL(&rl, "interface %s: will not be %s",
slave->name, up ? "disabled" : "enabled");
} else {
- int delay = (bond->lacp_status != LACP_DISABLED ? 0
- : up ? bond->updelay : bond->downdelay);
+ int delay = up ? bond->updelay : bond->downdelay;
slave->delay_expires = time_msec() + delay;
if (delay) {
VLOG_INFO_RL(&rl, "interface %s: will be %s if it stays %s "
}
}
-static unsigned int
-bond_hash_src(const struct eth_addr mac, uint16_t vlan, uint32_t basis)
-{
- return hash_mac(mac, vlan, basis);
-}
-
-static unsigned int
-bond_hash_tcp(const struct flow *flow, uint16_t vlan, uint32_t basis)
-{
- struct flow hash_flow = *flow;
- hash_flow.vlan_tci = htons(vlan);
-
- /* The symmetric quality of this hash function is not required, but
- * flow_hash_symmetric_l4 already exists, and is sufficient for our
- * purposes, so we use it out of convenience. */
- return flow_hash_symmetric_l4(&hash_flow, basis);
-}
-
static unsigned int
bond_hash(const struct bond *bond, const struct flow *flow, uint16_t vlan)
{
ovs_assert(bond->balance == BM_TCP || bond->balance == BM_SLB);
return (bond->balance == BM_TCP
- ? bond_hash_tcp(flow, vlan, bond->basis)
- : bond_hash_src(flow->dl_src, vlan, bond->basis));
+ ? flow_hash_5tuple(flow, bond->basis)
+ : hash_mac(flow->dl_src, vlan, bond->basis));
}
static struct bond_entry *
return NULL;
}
if (wc) {
- flow_mask_hash_fields(flow, wc, NX_HASH_FIELDS_SYMMETRIC_L4);
+ flow_mask_hash_fields(flow, wc, NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP);
}
/* Fall Through. */
case BM_SLB:
- if (wc) {
+ if (wc && balance == BM_SLB) {
flow_mask_hash_fields(flow, wc, NX_HASH_FIELDS_ETH_SRC);
}
e = lookup_bond_entry(bond, flow, vlan);
bond_active_slave_changed(bond);
}
} else if (old_active_slave) {
+ bond_active_slave_changed(bond);
VLOG_INFO_RL(&rl, "bond %s: all interfaces disabled", bond->name);
}
}