#include <linux/module.h>
#include <linux/in.h>
#include <linux/rcupdate.h>
+#include <linux/cpumask.h>
#include <linux/if_arp.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <net/ipv6.h>
#include <net/ndisc.h>
-#include "vlan.h"
+#include "flow_netlink.h"
#define TBL_MIN_BUCKETS 1024
#define MASK_ARRAY_SIZE_MIN 16
}
void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
- const struct sw_flow_mask *mask)
+ bool full, const struct sw_flow_mask *mask)
{
- const long *m = (const long *)((const u8 *)&mask->key +
- mask->range.start);
- const long *s = (const long *)((const u8 *)src +
- mask->range.start);
- long *d = (long *)((u8 *)dst + mask->range.start);
+ int start = full ? 0 : mask->range.start;
+ int len = full ? sizeof *dst : range_n_bytes(&mask->range);
+ const long *m = (const long *)((const u8 *)&mask->key + start);
+ const long *s = (const long *)((const u8 *)src + start);
+ long *d = (long *)((u8 *)dst + start);
int i;
- /* The memory outside of the 'mask->range' are not set since
- * further operations on 'dst' only uses contents within
- * 'mask->range'.
+ /* If 'full' is true then all of 'dst' is fully initialized. Otherwise,
+ * if 'full' is false the memory outside of the 'mask->range' is left
+ * uninitialized. This can be used as an optimization when further
+ * operations on 'dst' only use contents within 'mask->range'.
*/
- for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long))
+ for (i = 0; i < len; i += sizeof(long))
*d++ = *s++ & *m++;
}
struct sw_flow *ovs_flow_alloc(void)
{
struct sw_flow *flow;
- struct flow_stats *stats;
- int node;
+ struct sw_flow_stats *stats;
- flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
+ flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL);
if (!flow)
return ERR_PTR(-ENOMEM);
- flow->sf_acts = NULL;
- flow->mask = NULL;
- flow->id.ufid_len = 0;
- flow->id.unmasked_key = NULL;
- flow->stats_last_writer = NUMA_NO_NODE;
+ flow->stats_last_writer = -1;
/* Initialize the default stat node. */
stats = kmem_cache_alloc_node(flow_stats_cache,
- GFP_KERNEL | __GFP_ZERO, 0);
+ GFP_KERNEL | __GFP_ZERO,
+ node_online(0) ? 0 : NUMA_NO_NODE);
if (!stats)
goto err;
RCU_INIT_POINTER(flow->stats[0], stats);
- for_each_node(node)
- if (node != 0)
- RCU_INIT_POINTER(flow->stats[node], NULL);
+ cpumask_set_cpu(0, &flow->cpu_used_mask);
return flow;
err:
return table->count;
}
-static struct flex_array *alloc_buckets(unsigned int n_buckets)
-{
- struct flex_array *buckets;
- int i, err;
-
- buckets = flex_array_alloc(sizeof(struct hlist_head),
- n_buckets, GFP_KERNEL);
- if (!buckets)
- return NULL;
-
- err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL);
- if (err) {
- flex_array_free(buckets);
- return NULL;
- }
-
- for (i = 0; i < n_buckets; i++)
- INIT_HLIST_HEAD((struct hlist_head *)
- flex_array_get(buckets, i));
-
- return buckets;
-}
-
static void flow_free(struct sw_flow *flow)
{
- int node;
+ int cpu;
if (ovs_identifier_is_key(&flow->id))
kfree(flow->id.unmasked_key);
- kfree(rcu_dereference_raw(flow->sf_acts));
- for_each_node(node)
- if (flow->stats[node])
+ if (flow->sf_acts)
+ ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts);
+ /* We open code this to make sure cpu 0 is always considered */
+ for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask))
+ if (flow->stats[cpu])
kmem_cache_free(flow_stats_cache,
- rcu_dereference_raw(flow->stats[node]));
+ rcu_dereference_raw(flow->stats[cpu]));
kmem_cache_free(flow_cache, flow);
}
flow_free(flow);
}
-static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu)
-{
- struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu);
-
- kfree(mask);
-}
-
void ovs_flow_free(struct sw_flow *flow, bool deferred)
{
if (!flow)
flow_free(flow);
}
-static void free_buckets(struct flex_array *buckets)
-{
- flex_array_free(buckets);
-}
-
-
static void __table_instance_destroy(struct table_instance *ti)
{
- free_buckets(ti->buckets);
+ kvfree(ti->buckets);
kfree(ti);
}
static struct table_instance *table_instance_alloc(int new_size)
{
struct table_instance *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
+ int i;
if (!ti)
return NULL;
- ti->buckets = alloc_buckets(new_size);
-
+ ti->buckets = kvmalloc_array(new_size, sizeof(struct hlist_head),
+ GFP_KERNEL);
if (!ti->buckets) {
kfree(ti);
return NULL;
}
+
+ for (i = 0; i < new_size; i++)
+ INIT_HLIST_HEAD(&ti->buckets[i]);
+
ti->n_buckets = new_size;
ti->node_ver = 0;
ti->keep_flows = false;
return 0;
}
+static int tbl_mask_array_add_mask(struct flow_table *tbl,
+ struct sw_flow_mask *new)
+{
+ struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+ int err, ma_count = READ_ONCE(ma->count);
+
+ if (ma_count >= ma->max) {
+ err = tbl_mask_array_realloc(tbl, ma->max +
+ MASK_ARRAY_SIZE_MIN);
+ if (err)
+ return err;
+
+ ma = ovsl_dereference(tbl->mask_array);
+ }
+
+ BUG_ON(ovsl_dereference(ma->masks[ma_count]));
+
+ rcu_assign_pointer(ma->masks[ma_count], new);
+ WRITE_ONCE(ma->count, ma_count +1);
+
+ return 0;
+}
+
+static void tbl_mask_array_del_mask(struct flow_table *tbl,
+ struct sw_flow_mask *mask)
+{
+ struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+ int i, ma_count = READ_ONCE(ma->count);
+
+ /* Remove the deleted mask pointers from the array */
+ for (i = 0; i < ma_count; i++) {
+ if (mask == ovsl_dereference(ma->masks[i]))
+ goto found;
+ }
+
+ BUG();
+ return;
+
+found:
+ WRITE_ONCE(ma->count, ma_count -1);
+
+ rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]);
+ RCU_INIT_POINTER(ma->masks[ma_count -1], NULL);
+
+ kfree_rcu(mask, rcu);
+
+ /* Shrink the mask array if necessary. */
+ if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
+ ma_count <= (ma->max / 3))
+ tbl_mask_array_realloc(tbl, ma->max / 2);
+}
+
+/* Remove 'mask' from the mask list, if it is not needed any more. */
+static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
+{
+ if (mask) {
+ /* ovs-lock is required to protect mask-refcount and
+ * mask list.
+ */
+ ASSERT_OVSL();
+ BUG_ON(!mask->ref_count);
+ mask->ref_count--;
+
+ if (!mask->ref_count)
+ tbl_mask_array_del_mask(tbl, mask);
+ }
+}
+
int ovs_flow_tbl_init(struct flow_table *table)
{
struct table_instance *ti, *ufid_ti;
__table_instance_destroy(ti);
}
-static void table_instance_destroy(struct table_instance *ti,
+static void table_instance_flow_free(struct flow_table *table,
+ struct table_instance *ti,
+ struct table_instance *ufid_ti,
+ struct sw_flow *flow,
+ bool count)
+{
+ hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
+ if (count)
+ table->count--;
+
+ if (ovs_identifier_is_ufid(&flow->id)) {
+ hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
+
+ if (count)
+ table->ufid_count--;
+ }
+
+ flow_mask_remove(table, flow->mask);
+}
+
+static void table_instance_destroy(struct flow_table *table,
+ struct table_instance *ti,
struct table_instance *ufid_ti,
bool deferred)
{
for (i = 0; i < ti->n_buckets; i++) {
struct sw_flow *flow;
- struct hlist_head *head = flex_array_get(ti->buckets, i);
+ struct hlist_head *head = &ti->buckets[i];
struct hlist_node *n;
- int ver = ti->node_ver;
- int ufid_ver = ufid_ti->node_ver;
- hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) {
- hlist_del_rcu(&flow->flow_table.node[ver]);
- if (ovs_identifier_is_ufid(&flow->id))
- hlist_del_rcu(&flow->ufid_table.node[ufid_ver]);
+ hlist_for_each_entry_safe(flow, n, head,
+ flow_table.node[ti->node_ver]) {
+
+ table_instance_flow_free(table, ti, ufid_ti,
+ flow, false);
ovs_flow_free(flow, deferred);
}
}
free_percpu(table->mask_cache);
kfree(rcu_dereference_raw(table->mask_array));
- table_instance_destroy(ti, ufid_ti, false);
+ table_instance_destroy(table, ti, ufid_ti, false);
}
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
ver = ti->node_ver;
while (*bucket < ti->n_buckets) {
i = 0;
- head = flex_array_get(ti->buckets, *bucket);
+ head = &ti->buckets[*bucket];
hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) {
if (i < *last) {
i++;
static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash)
{
hash = jhash_1word(hash, ti->hash_seed);
- return flex_array_get(ti->buckets,
- (hash & (ti->n_buckets - 1)));
+ return &ti->buckets[hash & (ti->n_buckets - 1)];
}
static void table_instance_insert(struct table_instance *ti,
/* Insert in new table. */
for (i = 0; i < old->n_buckets; i++) {
struct sw_flow *flow;
- struct hlist_head *head;
-
- head = flex_array_get(old->buckets, i);
+ struct hlist_head *head = &old->buckets[i];
if (ufid)
- hlist_for_each_entry(flow, head,
- ufid_table.node[old_ver])
+ hlist_for_each_entry_rcu(flow, head,
+ ufid_table.node[old_ver])
ufid_table_instance_insert(new, flow);
else
- hlist_for_each_entry(flow, head,
- flow_table.node[old_ver])
+ hlist_for_each_entry_rcu(flow, head,
+ flow_table.node[old_ver])
table_instance_insert(new, flow);
}
flow_table->count = 0;
flow_table->ufid_count = 0;
- table_instance_destroy(old_ti, old_ufid_ti, true);
+ table_instance_destroy(flow_table, old_ti, old_ufid_ti, true);
return 0;
err_free_ti:
static u32 flow_hash(const struct sw_flow_key *key,
const struct sw_flow_key_range *range)
{
- int key_start = range->start;
- int key_end = range->end;
- const u32 *hash_key = (const u32 *)((const u8 *)key + key_start);
- int hash_u32s = (key_end - key_start) >> 2;
+ const u32 *hash_key = (const u32 *)((const u8 *)key + range->start);
/* Make sure number of hash bytes are multiple of u32. */
- BUILD_BUG_ON(sizeof(long) % sizeof(u32));
+ int hash_u32s = range_n_bytes(range) >> 2;
return jhash2(hash_key, hash_u32s, 0);
}
static int flow_key_start(const struct sw_flow_key *key)
{
- if (key->tun_key.ipv4_dst)
+ if (key->tun_proto)
return 0;
else
return rounddown(offsetof(struct sw_flow_key, phy),
u32 hash;
struct sw_flow_key masked_key;
- ovs_flow_mask_key(&masked_key, unmasked, mask);
+ ovs_flow_mask_key(&masked_key, unmasked, false, mask);
hash = flow_hash(&masked_key, &mask->range);
head = find_bucket(ti, hash);
(*n_mask_hit)++;
u32 *n_mask_hit,
u32 *index)
{
- struct sw_flow_mask *mask;
struct sw_flow *flow;
+ struct sw_flow_mask *mask;
int i;
- if (*index < ma->max) {
+ if (likely(*index < ma->max)) {
mask = rcu_dereference_ovsl(ma->masks[*index]);
if (mask) {
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
continue;
mask = rcu_dereference_ovsl(ma->masks[i]);
- if (!mask)
- continue;
+ if (unlikely(!mask))
+ break;
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
if (flow) { /* Found */
struct mask_array *ma;
ma = rcu_dereference_ovsl(table->mask_array);
- return ma->count;
+ return READ_ONCE(ma->count);
}
static struct table_instance *table_instance_expand(struct table_instance *ti,
return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
}
-static void tbl_mask_array_delete_mask(struct mask_array *ma,
- struct sw_flow_mask *mask)
-{
- int i;
-
- /* Remove the deleted mask pointers from the array */
- for (i = 0; i < ma->max; i++) {
- if (mask == ovsl_dereference(ma->masks[i])) {
- RCU_INIT_POINTER(ma->masks[i], NULL);
- ma->count--;
- call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb);
- return;
- }
- }
- BUG();
-}
-
-/* Remove 'mask' from the mask list, if it is not needed any more. */
-static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
-{
- if (mask) {
- /* ovs-lock is required to protect mask-refcount and
- * mask list.
- */
- ASSERT_OVSL();
- BUG_ON(!mask->ref_count);
- mask->ref_count--;
-
- if (!mask->ref_count) {
- struct mask_array *ma;
-
- ma = ovsl_dereference(tbl->mask_array);
- tbl_mask_array_delete_mask(ma, mask);
-
- /* Shrink the mask array if necessary. */
- if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
- ma->count <= (ma->max / 3))
- tbl_mask_array_realloc(tbl, ma->max / 2);
-
- }
- }
-}
-
/* Must be called with OVS mutex held. */
void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
{
struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
BUG_ON(table->count == 0);
- hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
- table->count--;
- if (ovs_identifier_is_ufid(&flow->id)) {
- hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
- table->ufid_count--;
- }
-
- /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
- * accessible as long as the RCU read lock is held.
- */
- flow_mask_remove(table, flow->mask);
+ table_instance_flow_free(table, ti, ufid_ti, flow, true);
}
static struct sw_flow_mask *mask_alloc(void)
mask = flow_mask_find(tbl, new);
if (!mask) {
- struct mask_array *ma;
- int i;
-
/* Allocate a new mask if none exsits. */
mask = mask_alloc();
if (!mask)
mask->range = new->range;
/* Add mask to mask-list. */
- ma = ovsl_dereference(tbl->mask_array);
- if (ma->count >= ma->max) {
- int err;
-
- err = tbl_mask_array_realloc(tbl, ma->max +
- MASK_ARRAY_SIZE_MIN);
- if (err) {
- kfree(mask);
- return err;
- }
- ma = ovsl_dereference(tbl->mask_array);
- }
-
- for (i = 0; i < ma->max; i++) {
- struct sw_flow_mask *t;
-
- t = ovsl_dereference(ma->masks[i]);
- if (!t) {
- rcu_assign_pointer(ma->masks[i], mask);
- ma->count++;
- break;
- }
+ if (tbl_mask_array_add_mask(tbl, mask)) {
+ kfree(mask);
+ return -ENOMEM;
}
} else {
BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
- + (nr_node_ids
- * sizeof(struct flow_stats *)),
+ + (nr_cpu_ids
+ * sizeof(struct sw_flow_stats *)),
0, 0, NULL);
if (flow_cache == NULL)
return -ENOMEM;
flow_stats_cache
- = kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats),
+ = kmem_cache_create("sw_flow_stats", sizeof(struct sw_flow_stats),
0, SLAB_HWCACHE_ALIGN, NULL);
if (flow_stats_cache == NULL) {
kmem_cache_destroy(flow_cache);