]> git.proxmox.com Git - mirror_ovs.git/blame - ofproto/ofproto-dpif-rid.c
ofproto-dpif: Fix for recirc issue with mpls traffic with dp_hash
[mirror_ovs.git] / ofproto / ofproto-dpif-rid.c
CommitLineData
f5374617 1/*
6cb5e507 2 * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc.
f5374617
AZ
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
18
64c96779 19#include "openvswitch/ofpbuf.h"
e672ff9b 20#include "ofproto-dpif.h"
f5374617 21#include "ofproto-dpif-rid.h"
e672ff9b
JR
22#include "ofproto-provider.h"
23#include "openvswitch/vlog.h"
f5374617 24
e672ff9b 25VLOG_DEFINE_THIS_MODULE(ofproto_dpif_rid);
f5374617 26
b70e6976 27static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
f5374617 28
b70e6976
BP
29static struct cmap id_map = CMAP_INITIALIZER;
30static struct cmap metadata_map = CMAP_INITIALIZER;
e672ff9b 31
b70e6976
BP
32static struct ovs_list expiring OVS_GUARDED_BY(mutex)
33 = OVS_LIST_INITIALIZER(&expiring);
34static struct ovs_list expired OVS_GUARDED_BY(mutex)
35 = OVS_LIST_INITIALIZER(&expired);
e672ff9b 36
b70e6976 37static uint32_t next_id OVS_GUARDED_BY(mutex) = 1; /* Possible next free id. */
e672ff9b
JR
38
39#define RECIRC_POOL_STATIC_IDS 1024
40
85b9cb2e
BP
41static void recirc_id_node_free(struct recirc_id_node *);
42
e672ff9b
JR
43/* This should be called by the revalidator once at each round (every 500ms or
44 * more). */
f5374617 45void
e672ff9b
JR
46recirc_run(void)
47{
48 static long long int last = 0;
49 long long int now = time_msec();
50
51 /* Do maintenance at most 4 times / sec. */
52 ovs_mutex_lock(&mutex);
53 if (now - last > 250) {
5f03c983 54 struct recirc_id_node *node;
e672ff9b
JR
55
56 last = now;
57
58 /* Nodes in 'expiring' and 'expired' lists have the refcount of zero,
59 * which means that while they can still be found (by id), no new
60 * references can be taken on them. We have removed the entry from the
61 * 'metadata_map', at the time when refcount reached zero, causing any
62 * new translations to allocate a new ID. This allows the expiring
63 * entry to be safely deleted while any sudden new use of the similar
64 * recirculation will safely start using a new recirculation ID. When
65 * the refcount gets to zero, the node is also added to the 'expiring'
66 * list. At any time after that the nodes in the 'expiring' list can
67 * be moved to the 'expired' list, from which they are deleted at least
68 * 250ms afterwards. */
69
70 /* Delete the expired. These have been lingering for at least 250 ms,
71 * which should be enough for any ongoing recirculations to be
72 * finished. */
5f03c983 73 LIST_FOR_EACH_POP (node, exp_node, &expired) {
e672ff9b 74 cmap_remove(&id_map, &node->id_node, node->id);
85b9cb2e 75 ovsrcu_postpone(recirc_id_node_free, node);
e672ff9b
JR
76 }
77
417e7e66 78 if (!ovs_list_is_empty(&expiring)) {
e672ff9b 79 /* 'expired' is now empty, move nodes in 'expiring' to it. */
417e7e66 80 ovs_list_splice(&expired, ovs_list_front(&expiring), &expiring);
e672ff9b
JR
81 }
82 }
83 ovs_mutex_unlock(&mutex);
84}
85
86/* We use the id as the hash value, which works due to cmap internal rehashing.
87 * We also only insert nodes with unique IDs, so all possible hash collisions
88 * remain internal to the cmap. */
89static struct recirc_id_node *
90recirc_find__(uint32_t id)
91 OVS_REQUIRES(mutex)
92{
93 struct cmap_node *node = cmap_find_protected(&id_map, id);
94
95 return node ? CONTAINER_OF(node, struct recirc_id_node, id_node) : NULL;
96}
97
98/* Lockless RCU protected lookup. If node is needed accross RCU quiescent
99 * state, caller should copy the contents. */
100const struct recirc_id_node *
101recirc_id_node_find(uint32_t id)
102{
103 const struct cmap_node *node = cmap_find(&id_map, id);
104
105 return node
106 ? CONTAINER_OF(node, const struct recirc_id_node, id_node)
107 : NULL;
108}
109
e6bc8e74
YHW
110bool
111recirc_id_node_find_and_ref(uint32_t id)
112{
113 struct recirc_id_node *rid_node =
114 CONST_CAST(struct recirc_id_node *, recirc_id_node_find(id));
115
116 if (!rid_node) {
117 return false;
118 }
119
120 return ovs_refcount_try_ref_rcu(&rid_node->refcount);
121}
122
e672ff9b 123static uint32_t
1d361a81 124frozen_state_hash(const struct frozen_state *state)
e672ff9b
JR
125{
126 uint32_t hash;
127
290835f9 128 hash = uuid_hash(&state->ofproto_uuid);
2082425c 129 hash = hash_int(state->table_id, hash);
8014f465
JP
130 hash = hash_bytes64((const uint64_t *) &state->metadata,
131 sizeof state->metadata, hash);
07659514 132 hash = hash_boolean(state->conntracked, hash);
aeb6566d 133 hash = hash_boolean(state->was_mpls, hash);
84cf3c1f
JR
134 if (state->stack && state->stack_size) {
135 hash = hash_bytes(state->stack, state->stack_size, hash);
e672ff9b 136 }
29bae541 137 hash = hash_int(state->mirrors, hash);
417509fa
BP
138 if (state->action_set_len) {
139 hash = hash_bytes64(ALIGNED_CAST(const uint64_t *, state->action_set),
140 state->action_set_len, hash);
141 }
2082425c 142 if (state->ofpacts_len) {
0a96a21b
BP
143 hash = hash_bytes64(ALIGNED_CAST(const uint64_t *, state->ofpacts),
144 state->ofpacts_len, hash);
e672ff9b 145 }
d39ec23d
JP
146 if (state->userdata && state->userdata_len) {
147 hash = hash_bytes(state->userdata, state->userdata_len, hash);
148 }
e672ff9b
JR
149 return hash;
150}
151
152static bool
1d361a81 153frozen_state_equal(const struct frozen_state *a, const struct frozen_state *b)
e672ff9b 154{
2082425c 155 return (a->table_id == b->table_id
290835f9 156 && uuid_equals(&a->ofproto_uuid, &b->ofproto_uuid)
8014f465 157 && !memcmp(&a->metadata, &b->metadata, sizeof a->metadata)
84cf3c1f
JR
158 && a->stack_size == b->stack_size
159 && !memcmp(a->stack, b->stack, a->stack_size)
29bae541 160 && a->mirrors == b->mirrors
07659514 161 && a->conntracked == b->conntracked
aeb6566d 162 && a->was_mpls == b->was_mpls
2082425c 163 && ofpacts_equal(a->ofpacts, a->ofpacts_len,
417509fa
BP
164 b->ofpacts, b->ofpacts_len)
165 && ofpacts_equal(a->action_set, a->action_set_len,
d39ec23d 166 b->action_set, b->action_set_len)
00135b86
ZB
167 && !memcmp(a->userdata, b->userdata, a->userdata_len)
168 && uuid_equals(&a->xport_uuid, &b->xport_uuid));
e672ff9b
JR
169}
170
171/* Lockless RCU protected lookup. If node is needed accross RCU quiescent
172 * state, caller should take a reference. */
173static struct recirc_id_node *
1d361a81 174recirc_find_equal(const struct frozen_state *target, uint32_t hash)
e672ff9b
JR
175{
176 struct recirc_id_node *node;
177
2082425c 178 CMAP_FOR_EACH_WITH_HASH (node, metadata_node, hash, &metadata_map) {
1d361a81 179 if (frozen_state_equal(&node->state, target)) {
e672ff9b
JR
180 return node;
181 }
182 }
183 return NULL;
184}
185
186static struct recirc_id_node *
1d361a81 187recirc_ref_equal(const struct frozen_state *target, uint32_t hash)
e672ff9b
JR
188{
189 struct recirc_id_node *node;
190
191 do {
2082425c 192 node = recirc_find_equal(target, hash);
e672ff9b
JR
193
194 /* Try again if the node was released before we get the reference. */
195 } while (node && !ovs_refcount_try_ref_rcu(&node->refcount));
196
197 return node;
198}
199
2082425c 200static void
6cb5e507 201frozen_state_clone(struct frozen_state *new, const struct frozen_state *old)
2082425c
BP
202{
203 *new = *old;
84cf3c1f
JR
204 new->stack = (new->stack_size
205 ? xmemdup(new->stack, new->stack_size)
5c1b2314
BP
206 : NULL);
207 new->ofpacts = (new->ofpacts_len
208 ? xmemdup(new->ofpacts, new->ofpacts_len)
209 : NULL);
417509fa
BP
210 new->action_set = (new->action_set_len
211 ? xmemdup(new->action_set, new->action_set_len)
212 : NULL);
d39ec23d
JP
213 new->userdata = (new->userdata_len
214 ? xmemdup(new->userdata, new->userdata_len)
215 : NULL);
2082425c
BP
216}
217
85b9cb2e 218static void
1d361a81 219frozen_state_free(struct frozen_state *state)
85b9cb2e 220{
5c1b2314 221 free(state->stack);
85b9cb2e 222 free(state->ofpacts);
417509fa 223 free(state->action_set);
d39ec23d 224 free(state->userdata);
85b9cb2e
BP
225}
226
e672ff9b
JR
227/* Allocate a unique recirculation id for the given set of flow metadata.
228 * The ID space is 2^^32, so there should never be a situation in which all
95263037 229 * the IDs are used up. We loop until we find a free one. */
e672ff9b 230static struct recirc_id_node *
1d361a81 231recirc_alloc_id__(const struct frozen_state *state, uint32_t hash)
e672ff9b 232{
2082425c
BP
233 ovs_assert(state->action_set_len <= state->ofpacts_len);
234
235 struct recirc_id_node *node = xzalloc(sizeof *node);
59781952 236
e672ff9b
JR
237 node->hash = hash;
238 ovs_refcount_init(&node->refcount);
6cb5e507 239 frozen_state_clone(CONST_CAST(struct frozen_state *, &node->state), state);
e672ff9b
JR
240
241 ovs_mutex_lock(&mutex);
242 for (;;) {
243 /* Claim the next ID. The ID space should be sparse enough for the
244 allocation to succeed at the first try. We do skip the first
245 RECIRC_POOL_STATIC_IDS IDs on the later rounds, though, as some of
246 the initial allocations may be for long term uses (like bonds). */
247 node->id = next_id++;
248 if (OVS_UNLIKELY(!node->id)) {
249 next_id = RECIRC_POOL_STATIC_IDS + 1;
250 node->id = next_id++;
251 }
252 /* Find if the id is free. */
253 if (OVS_LIKELY(!recirc_find__(node->id))) {
254 break;
255 }
256 }
257 cmap_insert(&id_map, &node->id_node, node->id);
258 cmap_insert(&metadata_map, &node->metadata_node, node->hash);
259 ovs_mutex_unlock(&mutex);
260 return node;
261}
262
263/* Look up an existing ID for the given flow's metadata and optional actions.
264 */
265uint32_t
1d361a81 266recirc_find_id(const struct frozen_state *target)
e672ff9b 267{
1d361a81 268 uint32_t hash = frozen_state_hash(target);
2082425c 269 struct recirc_id_node *node = recirc_find_equal(target, hash);
e672ff9b
JR
270 return node ? node->id : 0;
271}
272
273/* Allocate a unique recirculation id for the given set of flow metadata and
274 optional actions. */
275uint32_t
1d361a81 276recirc_alloc_id_ctx(const struct frozen_state *state)
f5374617 277{
1d361a81 278 uint32_t hash = frozen_state_hash(state);
2082425c 279 struct recirc_id_node *node = recirc_ref_equal(state, hash);
e672ff9b 280 if (!node) {
2082425c 281 node = recirc_alloc_id__(state, hash);
e672ff9b 282 }
e672ff9b 283 return node->id;
f5374617
AZ
284}
285
e672ff9b 286/* Allocate a unique recirculation id. */
f5374617 287uint32_t
e672ff9b 288recirc_alloc_id(struct ofproto_dpif *ofproto)
f5374617 289{
1d361a81 290 struct frozen_state state = {
2082425c 291 .table_id = TBL_INTERNAL,
07a3cd5c 292 .ofproto_uuid = ofproto->uuid,
6cb5e507
JP
293 .metadata = {
294 .tunnel = {
295 .ip_dst = htonl(0),
296 .ipv6_dst = in6addr_any,
297 },
298 .in_port = OFPP_NONE },
2082425c 299 };
00135b86
ZB
300 /* In order to make sparse happy, xport_uuid needs to be set separately. */
301 state.xport_uuid = UUID_ZERO;
1d361a81 302 return recirc_alloc_id__(&state, frozen_state_hash(&state))->id;
e672ff9b 303}
f5374617 304
85b9cb2e
BP
305static void
306recirc_id_node_free(struct recirc_id_node *node)
307{
1d361a81 308 frozen_state_free(CONST_CAST(struct frozen_state *, &node->state));
85b9cb2e
BP
309 free(node);
310}
311
e672ff9b
JR
312void
313recirc_id_node_unref(const struct recirc_id_node *node_)
314 OVS_EXCLUDED(mutex)
315{
316 struct recirc_id_node *node = CONST_CAST(struct recirc_id_node *, node_);
317
318 if (node && ovs_refcount_unref(&node->refcount) == 1) {
319 ovs_mutex_lock(&mutex);
320 /* Prevent re-use of this node by removing the node from 'metadata_map'
321 */
322 cmap_remove(&metadata_map, &node->metadata_node, node->hash);
323 /* We keep the node in the 'id_map' so that it can be found as long
324 * as it lingers, and add it to the 'expiring' list. */
417e7e66 325 ovs_list_insert(&expiring, &node->exp_node);
e672ff9b 326 ovs_mutex_unlock(&mutex);
27c24749 327 }
e672ff9b 328}
27c24749 329
e672ff9b
JR
330void
331recirc_free_id(uint32_t id)
332{
333 const struct recirc_id_node *node;
334
335 node = recirc_id_node_find(id);
336 if (node) {
337 recirc_id_node_unref(node);
338 } else {
339 VLOG_ERR("Freeing nonexistent recirculation ID: %"PRIu32, id);
340 }
f5374617
AZ
341}
342
e672ff9b
JR
343/* Called when 'ofproto' is destructed. Checks for and clears any
344 * recirc_id leak.
345 * No other thread may have access to the 'ofproto' being destructed.
346 * All related datapath flows must be deleted before calling this. */
f5374617 347void
e672ff9b 348recirc_free_ofproto(struct ofproto_dpif *ofproto, const char *ofproto_name)
f5374617 349{
e672ff9b
JR
350 struct recirc_id_node *n;
351
352 CMAP_FOR_EACH (n, metadata_node, &metadata_map) {
07a3cd5c 353 if (uuid_equals(&n->state.ofproto_uuid, &ofproto->uuid)) {
e672ff9b
JR
354 VLOG_ERR("recirc_id %"PRIu32
355 " left allocated when ofproto (%s)"
356 " is destructed", n->id, ofproto_name);
357 }
358 }
f5374617 359}