2 * Copyright (c) 2014, 2015, 2016 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
20 #include "ofproto-dpif.h"
21 #include "ofproto-dpif-rid.h"
22 #include "ofproto-provider.h"
23 #include "openvswitch/vlog.h"
25 VLOG_DEFINE_THIS_MODULE(ofproto_dpif_rid
);
27 static struct ovs_mutex mutex
;
29 static struct cmap id_map
;
30 static struct cmap metadata_map
;
32 static struct ovs_list expiring
OVS_GUARDED_BY(mutex
);
33 static struct ovs_list expired
OVS_GUARDED_BY(mutex
);
35 static uint32_t next_id
OVS_GUARDED_BY(mutex
); /* Possible next free id. */
37 #define RECIRC_POOL_STATIC_IDS 1024
39 static void recirc_id_node_free(struct recirc_id_node
*);
44 static struct ovsthread_once once
= OVSTHREAD_ONCE_INITIALIZER
;
46 if (ovsthread_once_start(&once
)) {
47 ovs_mutex_init(&mutex
);
48 ovs_mutex_lock(&mutex
);
49 next_id
= 1; /* 0 is not a valid ID. */
51 cmap_init(&metadata_map
);
54 ovs_mutex_unlock(&mutex
);
56 ovsthread_once_done(&once
);
61 /* This should be called by the revalidator once at each round (every 500ms or
66 static long long int last
= 0;
67 long long int now
= time_msec();
69 /* Do maintenance at most 4 times / sec. */
70 ovs_mutex_lock(&mutex
);
71 if (now
- last
> 250) {
72 struct recirc_id_node
*node
;
76 /* Nodes in 'expiring' and 'expired' lists have the refcount of zero,
77 * which means that while they can still be found (by id), no new
78 * references can be taken on them. We have removed the entry from the
79 * 'metadata_map', at the time when refcount reached zero, causing any
80 * new translations to allocate a new ID. This allows the expiring
81 * entry to be safely deleted while any sudden new use of the similar
82 * recirculation will safely start using a new recirculation ID. When
83 * the refcount gets to zero, the node is also added to the 'expiring'
84 * list. At any time after that the nodes in the 'expiring' list can
85 * be moved to the 'expired' list, from which they are deleted at least
86 * 250ms afterwards. */
88 /* Delete the expired. These have been lingering for at least 250 ms,
89 * which should be enough for any ongoing recirculations to be
91 LIST_FOR_EACH_POP (node
, exp_node
, &expired
) {
92 cmap_remove(&id_map
, &node
->id_node
, node
->id
);
93 ovsrcu_postpone(recirc_id_node_free
, node
);
96 if (!list_is_empty(&expiring
)) {
97 /* 'expired' is now empty, move nodes in 'expiring' to it. */
98 list_splice(&expired
, list_front(&expiring
), &expiring
);
101 ovs_mutex_unlock(&mutex
);
104 /* We use the id as the hash value, which works due to cmap internal rehashing.
105 * We also only insert nodes with unique IDs, so all possible hash collisions
106 * remain internal to the cmap. */
107 static struct recirc_id_node
*
108 recirc_find__(uint32_t id
)
111 struct cmap_node
*node
= cmap_find_protected(&id_map
, id
);
113 return node
? CONTAINER_OF(node
, struct recirc_id_node
, id_node
) : NULL
;
116 /* Lockless RCU protected lookup. If node is needed accross RCU quiescent
117 * state, caller should copy the contents. */
118 const struct recirc_id_node
*
119 recirc_id_node_find(uint32_t id
)
121 const struct cmap_node
*node
= cmap_find(&id_map
, id
);
124 ? CONTAINER_OF(node
, const struct recirc_id_node
, id_node
)
129 recirc_metadata_hash(const struct recirc_state
*state
)
133 hash
= hash_pointer(state
->ofproto
, 0);
134 hash
= hash_int(state
->table_id
, hash
);
135 if (flow_tnl_dst_is_set(state
->metadata
.tunnel
)) {
136 /* We may leave remainder bytes unhashed, but that is unlikely as
137 * the tunnel is not in the datapath format. */
138 hash
= hash_words64((const uint64_t *) state
->metadata
.tunnel
,
139 flow_tnl_size(state
->metadata
.tunnel
)
140 / sizeof(uint64_t), hash
);
142 hash
= hash_boolean(state
->conntracked
, hash
);
143 hash
= hash_words64((const uint64_t *) &state
->metadata
.metadata
,
144 (sizeof state
->metadata
- sizeof state
->metadata
.tunnel
)
147 if (state
->stack
&& state
->stack
->size
!= 0) {
148 hash
= hash_words64((const uint64_t *) state
->stack
->data
,
149 state
->stack
->size
/ sizeof(uint64_t), hash
);
151 hash
= hash_int(state
->mirrors
, hash
);
152 hash
= hash_int(state
->action_set_len
, hash
);
153 if (state
->ofpacts_len
) {
154 hash
= hash_words64(ALIGNED_CAST(const uint64_t *, state
->ofpacts
),
155 state
->ofpacts_len
/ sizeof(uint64_t),
162 recirc_metadata_equal(const struct recirc_state
*a
,
163 const struct recirc_state
*b
)
165 return (a
->table_id
== b
->table_id
166 && a
->ofproto
== b
->ofproto
167 && flow_tnl_equal(a
->metadata
.tunnel
, b
->metadata
.tunnel
)
168 && !memcmp(&a
->metadata
.metadata
, &b
->metadata
.metadata
,
169 sizeof a
->metadata
- sizeof a
->metadata
.tunnel
)
170 && (((!a
->stack
|| !a
->stack
->size
) &&
171 (!b
->stack
|| !b
->stack
->size
))
172 || (a
->stack
&& b
->stack
&& ofpbuf_equal(a
->stack
, b
->stack
)))
173 && a
->mirrors
== b
->mirrors
174 && a
->conntracked
== b
->conntracked
175 && a
->action_set_len
== b
->action_set_len
176 && ofpacts_equal(a
->ofpacts
, a
->ofpacts_len
,
177 b
->ofpacts
, b
->ofpacts_len
));
180 /* Lockless RCU protected lookup. If node is needed accross RCU quiescent
181 * state, caller should take a reference. */
182 static struct recirc_id_node
*
183 recirc_find_equal(const struct recirc_state
*target
, uint32_t hash
)
185 struct recirc_id_node
*node
;
187 CMAP_FOR_EACH_WITH_HASH (node
, metadata_node
, hash
, &metadata_map
) {
188 if (recirc_metadata_equal(&node
->state
, target
)) {
195 static struct recirc_id_node
*
196 recirc_ref_equal(const struct recirc_state
*target
, uint32_t hash
)
198 struct recirc_id_node
*node
;
201 node
= recirc_find_equal(target
, hash
);
203 /* Try again if the node was released before we get the reference. */
204 } while (node
&& !ovs_refcount_try_ref_rcu(&node
->refcount
));
210 recirc_state_clone(struct recirc_state
*new, const struct recirc_state
*old
,
211 struct flow_tnl
*tunnel
)
214 flow_tnl_copy__(tunnel
, old
->metadata
.tunnel
);
215 new->metadata
.tunnel
= tunnel
;
218 new->stack
= new->stack
->size
? ofpbuf_clone(new->stack
) : NULL
;
221 new->ofpacts
= (new->ofpacts_len
222 ? xmemdup(new->ofpacts
, new->ofpacts_len
)
228 recirc_state_free(struct recirc_state
*state
)
230 ofpbuf_delete(state
->stack
);
231 free(state
->ofpacts
);
234 /* Allocate a unique recirculation id for the given set of flow metadata.
235 * The ID space is 2^^32, so there should never be a situation in which all
236 * the IDs are used up. We loop until we find a free one.
237 * hash is recomputed if it is passed in as 0. */
238 static struct recirc_id_node
*
239 recirc_alloc_id__(const struct recirc_state
*state
, uint32_t hash
)
241 ovs_assert(state
->action_set_len
<= state
->ofpacts_len
);
243 struct recirc_id_node
*node
= xzalloc(sizeof *node
);
246 ovs_refcount_init(&node
->refcount
);
247 recirc_state_clone(CONST_CAST(struct recirc_state
*, &node
->state
), state
,
248 &node
->state_metadata_tunnel
);
250 ovs_mutex_lock(&mutex
);
252 /* Claim the next ID. The ID space should be sparse enough for the
253 allocation to succeed at the first try. We do skip the first
254 RECIRC_POOL_STATIC_IDS IDs on the later rounds, though, as some of
255 the initial allocations may be for long term uses (like bonds). */
256 node
->id
= next_id
++;
257 if (OVS_UNLIKELY(!node
->id
)) {
258 next_id
= RECIRC_POOL_STATIC_IDS
+ 1;
259 node
->id
= next_id
++;
261 /* Find if the id is free. */
262 if (OVS_LIKELY(!recirc_find__(node
->id
))) {
266 cmap_insert(&id_map
, &node
->id_node
, node
->id
);
267 cmap_insert(&metadata_map
, &node
->metadata_node
, node
->hash
);
268 ovs_mutex_unlock(&mutex
);
272 /* Look up an existing ID for the given flow's metadata and optional actions.
275 recirc_find_id(const struct recirc_state
*target
)
277 uint32_t hash
= recirc_metadata_hash(target
);
278 struct recirc_id_node
*node
= recirc_find_equal(target
, hash
);
279 return node
? node
->id
: 0;
282 /* Allocate a unique recirculation id for the given set of flow metadata and
285 recirc_alloc_id_ctx(const struct recirc_state
*state
)
287 uint32_t hash
= recirc_metadata_hash(state
);
288 struct recirc_id_node
*node
= recirc_ref_equal(state
, hash
);
290 node
= recirc_alloc_id__(state
, hash
);
295 /* Allocate a unique recirculation id. */
297 recirc_alloc_id(struct ofproto_dpif
*ofproto
)
299 struct flow_tnl tunnel
;
300 tunnel
.ip_dst
= htonl(0);
301 tunnel
.ipv6_dst
= in6addr_any
;
302 struct recirc_state state
= {
303 .table_id
= TBL_INTERNAL
,
305 .metadata
= { .tunnel
= &tunnel
, .in_port
= OFPP_NONE
},
307 return recirc_alloc_id__(&state
, recirc_metadata_hash(&state
))->id
;
311 recirc_id_node_free(struct recirc_id_node
*node
)
313 recirc_state_free(CONST_CAST(struct recirc_state
*, &node
->state
));
318 recirc_id_node_unref(const struct recirc_id_node
*node_
)
321 struct recirc_id_node
*node
= CONST_CAST(struct recirc_id_node
*, node_
);
323 if (node
&& ovs_refcount_unref(&node
->refcount
) == 1) {
324 ovs_mutex_lock(&mutex
);
325 /* Prevent re-use of this node by removing the node from 'metadata_map'
327 cmap_remove(&metadata_map
, &node
->metadata_node
, node
->hash
);
328 /* We keep the node in the 'id_map' so that it can be found as long
329 * as it lingers, and add it to the 'expiring' list. */
330 list_insert(&expiring
, &node
->exp_node
);
331 ovs_mutex_unlock(&mutex
);
336 recirc_free_id(uint32_t id
)
338 const struct recirc_id_node
*node
;
340 node
= recirc_id_node_find(id
);
342 recirc_id_node_unref(node
);
344 VLOG_ERR("Freeing nonexistent recirculation ID: %"PRIu32
, id
);
348 /* Called when 'ofproto' is destructed. Checks for and clears any
350 * No other thread may have access to the 'ofproto' being destructed.
351 * All related datapath flows must be deleted before calling this. */
353 recirc_free_ofproto(struct ofproto_dpif
*ofproto
, const char *ofproto_name
)
355 struct recirc_id_node
*n
;
357 CMAP_FOR_EACH (n
, metadata_node
, &metadata_map
) {
358 if (n
->state
.ofproto
== ofproto
) {
359 VLOG_ERR("recirc_id %"PRIu32
360 " left allocated when ofproto (%s)"
361 " is destructed", n
->id
, ofproto_name
);