]> git.proxmox.com Git - ovs.git/blame - ofproto/ofproto-dpif-rid.c
nx-match: Only store significant bytes to stack.
[ovs.git] / ofproto / ofproto-dpif-rid.c
CommitLineData
f5374617 1/*
85b9cb2e 2 * Copyright (c) 2014, 2015, 2016 Nicira, Inc.
f5374617
AZ
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
18
64c96779 19#include "openvswitch/ofpbuf.h"
e672ff9b 20#include "ofproto-dpif.h"
f5374617 21#include "ofproto-dpif-rid.h"
e672ff9b
JR
22#include "ofproto-provider.h"
23#include "openvswitch/vlog.h"
f5374617 24
e672ff9b 25VLOG_DEFINE_THIS_MODULE(ofproto_dpif_rid);
f5374617 26
b70e6976 27static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
f5374617 28
b70e6976
BP
29static struct cmap id_map = CMAP_INITIALIZER;
30static struct cmap metadata_map = CMAP_INITIALIZER;
e672ff9b 31
b70e6976
BP
32static struct ovs_list expiring OVS_GUARDED_BY(mutex)
33 = OVS_LIST_INITIALIZER(&expiring);
34static struct ovs_list expired OVS_GUARDED_BY(mutex)
35 = OVS_LIST_INITIALIZER(&expired);
e672ff9b 36
b70e6976 37static uint32_t next_id OVS_GUARDED_BY(mutex) = 1; /* Possible next free id. */
e672ff9b
JR
38
39#define RECIRC_POOL_STATIC_IDS 1024
40
85b9cb2e
BP
41static void recirc_id_node_free(struct recirc_id_node *);
42
e672ff9b
JR
43/* This should be called by the revalidator once at each round (every 500ms or
44 * more). */
f5374617 45void
e672ff9b
JR
46recirc_run(void)
47{
48 static long long int last = 0;
49 long long int now = time_msec();
50
51 /* Do maintenance at most 4 times / sec. */
52 ovs_mutex_lock(&mutex);
53 if (now - last > 250) {
5f03c983 54 struct recirc_id_node *node;
e672ff9b
JR
55
56 last = now;
57
58 /* Nodes in 'expiring' and 'expired' lists have the refcount of zero,
59 * which means that while they can still be found (by id), no new
60 * references can be taken on them. We have removed the entry from the
61 * 'metadata_map', at the time when refcount reached zero, causing any
62 * new translations to allocate a new ID. This allows the expiring
63 * entry to be safely deleted while any sudden new use of the similar
64 * recirculation will safely start using a new recirculation ID. When
65 * the refcount gets to zero, the node is also added to the 'expiring'
66 * list. At any time after that the nodes in the 'expiring' list can
67 * be moved to the 'expired' list, from which they are deleted at least
68 * 250ms afterwards. */
69
70 /* Delete the expired. These have been lingering for at least 250 ms,
71 * which should be enough for any ongoing recirculations to be
72 * finished. */
5f03c983 73 LIST_FOR_EACH_POP (node, exp_node, &expired) {
e672ff9b 74 cmap_remove(&id_map, &node->id_node, node->id);
85b9cb2e 75 ovsrcu_postpone(recirc_id_node_free, node);
e672ff9b
JR
76 }
77
417e7e66 78 if (!ovs_list_is_empty(&expiring)) {
e672ff9b 79 /* 'expired' is now empty, move nodes in 'expiring' to it. */
417e7e66 80 ovs_list_splice(&expired, ovs_list_front(&expiring), &expiring);
e672ff9b
JR
81 }
82 }
83 ovs_mutex_unlock(&mutex);
84}
85
86/* We use the id as the hash value, which works due to cmap internal rehashing.
87 * We also only insert nodes with unique IDs, so all possible hash collisions
88 * remain internal to the cmap. */
89static struct recirc_id_node *
90recirc_find__(uint32_t id)
91 OVS_REQUIRES(mutex)
92{
93 struct cmap_node *node = cmap_find_protected(&id_map, id);
94
95 return node ? CONTAINER_OF(node, struct recirc_id_node, id_node) : NULL;
96}
97
98/* Lockless RCU protected lookup. If node is needed accross RCU quiescent
99 * state, caller should copy the contents. */
100const struct recirc_id_node *
101recirc_id_node_find(uint32_t id)
102{
103 const struct cmap_node *node = cmap_find(&id_map, id);
104
105 return node
106 ? CONTAINER_OF(node, const struct recirc_id_node, id_node)
107 : NULL;
108}
109
110static uint32_t
1d361a81 111frozen_state_hash(const struct frozen_state *state)
e672ff9b
JR
112{
113 uint32_t hash;
114
290835f9 115 hash = uuid_hash(&state->ofproto_uuid);
2082425c 116 hash = hash_int(state->table_id, hash);
ffe4c74f 117 if (flow_tnl_dst_is_set(state->metadata.tunnel)) {
59781952
JR
118 /* We may leave remainder bytes unhashed, but that is unlikely as
119 * the tunnel is not in the datapath format. */
0a96a21b
BP
120 hash = hash_bytes64((const uint64_t *) state->metadata.tunnel,
121 flow_tnl_size(state->metadata.tunnel), hash);
59781952 122 }
07659514 123 hash = hash_boolean(state->conntracked, hash);
0a96a21b
BP
124 hash = hash_bytes64((const uint64_t *) &state->metadata.metadata,
125 sizeof state->metadata - sizeof state->metadata.tunnel,
e672ff9b 126 hash);
84cf3c1f
JR
127 if (state->stack && state->stack_size) {
128 hash = hash_bytes(state->stack, state->stack_size, hash);
e672ff9b 129 }
29bae541 130 hash = hash_int(state->mirrors, hash);
2082425c 131 hash = hash_int(state->action_set_len, hash);
417509fa
BP
132 if (state->action_set_len) {
133 hash = hash_bytes64(ALIGNED_CAST(const uint64_t *, state->action_set),
134 state->action_set_len, hash);
135 }
2082425c 136 if (state->ofpacts_len) {
0a96a21b
BP
137 hash = hash_bytes64(ALIGNED_CAST(const uint64_t *, state->ofpacts),
138 state->ofpacts_len, hash);
e672ff9b
JR
139 }
140 return hash;
141}
142
143static bool
1d361a81 144frozen_state_equal(const struct frozen_state *a, const struct frozen_state *b)
e672ff9b 145{
2082425c 146 return (a->table_id == b->table_id
290835f9 147 && uuid_equals(&a->ofproto_uuid, &b->ofproto_uuid)
59781952
JR
148 && flow_tnl_equal(a->metadata.tunnel, b->metadata.tunnel)
149 && !memcmp(&a->metadata.metadata, &b->metadata.metadata,
150 sizeof a->metadata - sizeof a->metadata.tunnel)
84cf3c1f
JR
151 && a->stack_size == b->stack_size
152 && !memcmp(a->stack, b->stack, a->stack_size)
29bae541 153 && a->mirrors == b->mirrors
07659514 154 && a->conntracked == b->conntracked
2082425c 155 && ofpacts_equal(a->ofpacts, a->ofpacts_len,
417509fa
BP
156 b->ofpacts, b->ofpacts_len)
157 && ofpacts_equal(a->action_set, a->action_set_len,
158 b->action_set, b->action_set_len));
e672ff9b
JR
159}
160
161/* Lockless RCU protected lookup. If node is needed accross RCU quiescent
162 * state, caller should take a reference. */
163static struct recirc_id_node *
1d361a81 164recirc_find_equal(const struct frozen_state *target, uint32_t hash)
e672ff9b
JR
165{
166 struct recirc_id_node *node;
167
2082425c 168 CMAP_FOR_EACH_WITH_HASH (node, metadata_node, hash, &metadata_map) {
1d361a81 169 if (frozen_state_equal(&node->state, target)) {
e672ff9b
JR
170 return node;
171 }
172 }
173 return NULL;
174}
175
176static struct recirc_id_node *
1d361a81 177recirc_ref_equal(const struct frozen_state *target, uint32_t hash)
e672ff9b
JR
178{
179 struct recirc_id_node *node;
180
181 do {
2082425c 182 node = recirc_find_equal(target, hash);
e672ff9b
JR
183
184 /* Try again if the node was released before we get the reference. */
185 } while (node && !ovs_refcount_try_ref_rcu(&node->refcount));
186
187 return node;
188}
189
2082425c 190static void
1d361a81 191frozen_state_clone(struct frozen_state *new, const struct frozen_state *old,
59781952 192 struct flow_tnl *tunnel)
2082425c
BP
193{
194 *new = *old;
59781952
JR
195 flow_tnl_copy__(tunnel, old->metadata.tunnel);
196 new->metadata.tunnel = tunnel;
197
84cf3c1f
JR
198 new->stack = (new->stack_size
199 ? xmemdup(new->stack, new->stack_size)
5c1b2314
BP
200 : NULL);
201 new->ofpacts = (new->ofpacts_len
202 ? xmemdup(new->ofpacts, new->ofpacts_len)
203 : NULL);
417509fa
BP
204 new->action_set = (new->action_set_len
205 ? xmemdup(new->action_set, new->action_set_len)
206 : NULL);
2082425c
BP
207}
208
85b9cb2e 209static void
1d361a81 210frozen_state_free(struct frozen_state *state)
85b9cb2e 211{
5c1b2314 212 free(state->stack);
85b9cb2e 213 free(state->ofpacts);
417509fa 214 free(state->action_set);
85b9cb2e
BP
215}
216
e672ff9b
JR
217/* Allocate a unique recirculation id for the given set of flow metadata.
218 * The ID space is 2^^32, so there should never be a situation in which all
219 * the IDs are used up. We loop until we find a free one.
220 * hash is recomputed if it is passed in as 0. */
221static struct recirc_id_node *
1d361a81 222recirc_alloc_id__(const struct frozen_state *state, uint32_t hash)
e672ff9b 223{
2082425c
BP
224 ovs_assert(state->action_set_len <= state->ofpacts_len);
225
226 struct recirc_id_node *node = xzalloc(sizeof *node);
59781952 227
e672ff9b
JR
228 node->hash = hash;
229 ovs_refcount_init(&node->refcount);
1d361a81 230 frozen_state_clone(CONST_CAST(struct frozen_state *, &node->state), state,
59781952 231 &node->state_metadata_tunnel);
e672ff9b
JR
232
233 ovs_mutex_lock(&mutex);
234 for (;;) {
235 /* Claim the next ID. The ID space should be sparse enough for the
236 allocation to succeed at the first try. We do skip the first
237 RECIRC_POOL_STATIC_IDS IDs on the later rounds, though, as some of
238 the initial allocations may be for long term uses (like bonds). */
239 node->id = next_id++;
240 if (OVS_UNLIKELY(!node->id)) {
241 next_id = RECIRC_POOL_STATIC_IDS + 1;
242 node->id = next_id++;
243 }
244 /* Find if the id is free. */
245 if (OVS_LIKELY(!recirc_find__(node->id))) {
246 break;
247 }
248 }
249 cmap_insert(&id_map, &node->id_node, node->id);
250 cmap_insert(&metadata_map, &node->metadata_node, node->hash);
251 ovs_mutex_unlock(&mutex);
252 return node;
253}
254
255/* Look up an existing ID for the given flow's metadata and optional actions.
256 */
257uint32_t
1d361a81 258recirc_find_id(const struct frozen_state *target)
e672ff9b 259{
1d361a81 260 uint32_t hash = frozen_state_hash(target);
2082425c 261 struct recirc_id_node *node = recirc_find_equal(target, hash);
e672ff9b
JR
262 return node ? node->id : 0;
263}
264
265/* Allocate a unique recirculation id for the given set of flow metadata and
266 optional actions. */
267uint32_t
1d361a81 268recirc_alloc_id_ctx(const struct frozen_state *state)
f5374617 269{
1d361a81 270 uint32_t hash = frozen_state_hash(state);
2082425c 271 struct recirc_id_node *node = recirc_ref_equal(state, hash);
e672ff9b 272 if (!node) {
2082425c 273 node = recirc_alloc_id__(state, hash);
e672ff9b 274 }
e672ff9b 275 return node->id;
f5374617
AZ
276}
277
e672ff9b 278/* Allocate a unique recirculation id. */
f5374617 279uint32_t
e672ff9b 280recirc_alloc_id(struct ofproto_dpif *ofproto)
f5374617 281{
59781952
JR
282 struct flow_tnl tunnel;
283 tunnel.ip_dst = htonl(0);
ffe4c74f 284 tunnel.ipv6_dst = in6addr_any;
1d361a81 285 struct frozen_state state = {
2082425c 286 .table_id = TBL_INTERNAL,
07a3cd5c 287 .ofproto_uuid = ofproto->uuid,
59781952 288 .metadata = { .tunnel = &tunnel, .in_port = OFPP_NONE },
2082425c 289 };
1d361a81 290 return recirc_alloc_id__(&state, frozen_state_hash(&state))->id;
e672ff9b 291}
f5374617 292
85b9cb2e
BP
293static void
294recirc_id_node_free(struct recirc_id_node *node)
295{
1d361a81 296 frozen_state_free(CONST_CAST(struct frozen_state *, &node->state));
85b9cb2e
BP
297 free(node);
298}
299
e672ff9b
JR
300void
301recirc_id_node_unref(const struct recirc_id_node *node_)
302 OVS_EXCLUDED(mutex)
303{
304 struct recirc_id_node *node = CONST_CAST(struct recirc_id_node *, node_);
305
306 if (node && ovs_refcount_unref(&node->refcount) == 1) {
307 ovs_mutex_lock(&mutex);
308 /* Prevent re-use of this node by removing the node from 'metadata_map'
309 */
310 cmap_remove(&metadata_map, &node->metadata_node, node->hash);
311 /* We keep the node in the 'id_map' so that it can be found as long
312 * as it lingers, and add it to the 'expiring' list. */
417e7e66 313 ovs_list_insert(&expiring, &node->exp_node);
e672ff9b 314 ovs_mutex_unlock(&mutex);
27c24749 315 }
e672ff9b 316}
27c24749 317
e672ff9b
JR
318void
319recirc_free_id(uint32_t id)
320{
321 const struct recirc_id_node *node;
322
323 node = recirc_id_node_find(id);
324 if (node) {
325 recirc_id_node_unref(node);
326 } else {
327 VLOG_ERR("Freeing nonexistent recirculation ID: %"PRIu32, id);
328 }
f5374617
AZ
329}
330
e672ff9b
JR
331/* Called when 'ofproto' is destructed. Checks for and clears any
332 * recirc_id leak.
333 * No other thread may have access to the 'ofproto' being destructed.
334 * All related datapath flows must be deleted before calling this. */
f5374617 335void
e672ff9b 336recirc_free_ofproto(struct ofproto_dpif *ofproto, const char *ofproto_name)
f5374617 337{
e672ff9b
JR
338 struct recirc_id_node *n;
339
340 CMAP_FOR_EACH (n, metadata_node, &metadata_map) {
07a3cd5c 341 if (uuid_equals(&n->state.ofproto_uuid, &ofproto->uuid)) {
e672ff9b
JR
342 VLOG_ERR("recirc_id %"PRIu32
343 " left allocated when ofproto (%s)"
344 " is destructed", n->id, ofproto_name);
345 }
346 }
f5374617 347}