]>
Commit | Line | Data |
---|---|---|
f5374617 | 1 | /* |
85b9cb2e | 2 | * Copyright (c) 2014, 2015, 2016 Nicira, Inc. |
f5374617 AZ |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | ||
e672ff9b JR |
19 | #include "ofpbuf.h" |
20 | #include "ofproto-dpif.h" | |
f5374617 | 21 | #include "ofproto-dpif-rid.h" |
e672ff9b JR |
22 | #include "ofproto-provider.h" |
23 | #include "openvswitch/vlog.h" | |
f5374617 | 24 | |
e672ff9b | 25 | VLOG_DEFINE_THIS_MODULE(ofproto_dpif_rid); |
f5374617 | 26 | |
e672ff9b | 27 | static struct ovs_mutex mutex; |
f5374617 | 28 | |
e672ff9b JR |
29 | static struct cmap id_map; |
30 | static struct cmap metadata_map; | |
31 | ||
32 | static struct ovs_list expiring OVS_GUARDED_BY(mutex); | |
33 | static struct ovs_list expired OVS_GUARDED_BY(mutex); | |
34 | ||
35 | static uint32_t next_id OVS_GUARDED_BY(mutex); /* Possible next free id. */ | |
36 | ||
37 | #define RECIRC_POOL_STATIC_IDS 1024 | |
38 | ||
85b9cb2e BP |
39 | static void recirc_id_node_free(struct recirc_id_node *); |
40 | ||
e672ff9b JR |
41 | void |
42 | recirc_init(void) | |
f5374617 | 43 | { |
e672ff9b | 44 | static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; |
f5374617 | 45 | |
e672ff9b JR |
46 | if (ovsthread_once_start(&once)) { |
47 | ovs_mutex_init(&mutex); | |
48 | ovs_mutex_lock(&mutex); | |
49 | next_id = 1; /* 0 is not a valid ID. */ | |
50 | cmap_init(&id_map); | |
51 | cmap_init(&metadata_map); | |
52 | list_init(&expiring); | |
53 | list_init(&expired); | |
54 | ovs_mutex_unlock(&mutex); | |
55 | ||
56 | ovsthread_once_done(&once); | |
57 | } | |
f5374617 | 58 | |
f5374617 AZ |
59 | } |
60 | ||
e672ff9b JR |
61 | /* This should be called by the revalidator once at each round (every 500ms or |
62 | * more). */ | |
f5374617 | 63 | void |
e672ff9b JR |
64 | recirc_run(void) |
65 | { | |
66 | static long long int last = 0; | |
67 | long long int now = time_msec(); | |
68 | ||
69 | /* Do maintenance at most 4 times / sec. */ | |
70 | ovs_mutex_lock(&mutex); | |
71 | if (now - last > 250) { | |
5f03c983 | 72 | struct recirc_id_node *node; |
e672ff9b JR |
73 | |
74 | last = now; | |
75 | ||
76 | /* Nodes in 'expiring' and 'expired' lists have the refcount of zero, | |
77 | * which means that while they can still be found (by id), no new | |
78 | * references can be taken on them. We have removed the entry from the | |
79 | * 'metadata_map', at the time when refcount reached zero, causing any | |
80 | * new translations to allocate a new ID. This allows the expiring | |
81 | * entry to be safely deleted while any sudden new use of the similar | |
82 | * recirculation will safely start using a new recirculation ID. When | |
83 | * the refcount gets to zero, the node is also added to the 'expiring' | |
84 | * list. At any time after that the nodes in the 'expiring' list can | |
85 | * be moved to the 'expired' list, from which they are deleted at least | |
86 | * 250ms afterwards. */ | |
87 | ||
88 | /* Delete the expired. These have been lingering for at least 250 ms, | |
89 | * which should be enough for any ongoing recirculations to be | |
90 | * finished. */ | |
5f03c983 | 91 | LIST_FOR_EACH_POP (node, exp_node, &expired) { |
e672ff9b | 92 | cmap_remove(&id_map, &node->id_node, node->id); |
85b9cb2e | 93 | ovsrcu_postpone(recirc_id_node_free, node); |
e672ff9b JR |
94 | } |
95 | ||
96 | if (!list_is_empty(&expiring)) { | |
97 | /* 'expired' is now empty, move nodes in 'expiring' to it. */ | |
98 | list_splice(&expired, list_front(&expiring), &expiring); | |
99 | } | |
100 | } | |
101 | ovs_mutex_unlock(&mutex); | |
102 | } | |
103 | ||
104 | /* We use the id as the hash value, which works due to cmap internal rehashing. | |
105 | * We also only insert nodes with unique IDs, so all possible hash collisions | |
106 | * remain internal to the cmap. */ | |
107 | static struct recirc_id_node * | |
108 | recirc_find__(uint32_t id) | |
109 | OVS_REQUIRES(mutex) | |
110 | { | |
111 | struct cmap_node *node = cmap_find_protected(&id_map, id); | |
112 | ||
113 | return node ? CONTAINER_OF(node, struct recirc_id_node, id_node) : NULL; | |
114 | } | |
115 | ||
116 | /* Lockless RCU protected lookup. If node is needed accross RCU quiescent | |
117 | * state, caller should copy the contents. */ | |
118 | const struct recirc_id_node * | |
119 | recirc_id_node_find(uint32_t id) | |
120 | { | |
121 | const struct cmap_node *node = cmap_find(&id_map, id); | |
122 | ||
123 | return node | |
124 | ? CONTAINER_OF(node, const struct recirc_id_node, id_node) | |
125 | : NULL; | |
126 | } | |
127 | ||
128 | static uint32_t | |
2082425c | 129 | recirc_metadata_hash(const struct recirc_state *state) |
e672ff9b JR |
130 | { |
131 | uint32_t hash; | |
132 | ||
2082425c BP |
133 | hash = hash_pointer(state->ofproto, 0); |
134 | hash = hash_int(state->table_id, hash); | |
ffe4c74f | 135 | if (flow_tnl_dst_is_set(state->metadata.tunnel)) { |
59781952 JR |
136 | /* We may leave remainder bytes unhashed, but that is unlikely as |
137 | * the tunnel is not in the datapath format. */ | |
0a96a21b BP |
138 | hash = hash_bytes64((const uint64_t *) state->metadata.tunnel, |
139 | flow_tnl_size(state->metadata.tunnel), hash); | |
59781952 | 140 | } |
07659514 | 141 | hash = hash_boolean(state->conntracked, hash); |
0a96a21b BP |
142 | hash = hash_bytes64((const uint64_t *) &state->metadata.metadata, |
143 | sizeof state->metadata - sizeof state->metadata.tunnel, | |
e672ff9b | 144 | hash); |
5c1b2314 BP |
145 | if (state->stack && state->n_stack) { |
146 | hash = hash_bytes64((const uint64_t *) state->stack, | |
147 | state->n_stack * sizeof *state->stack, hash); | |
e672ff9b | 148 | } |
29bae541 | 149 | hash = hash_int(state->mirrors, hash); |
2082425c BP |
150 | hash = hash_int(state->action_set_len, hash); |
151 | if (state->ofpacts_len) { | |
0a96a21b BP |
152 | hash = hash_bytes64(ALIGNED_CAST(const uint64_t *, state->ofpacts), |
153 | state->ofpacts_len, hash); | |
e672ff9b JR |
154 | } |
155 | return hash; | |
156 | } | |
157 | ||
158 | static bool | |
2082425c BP |
159 | recirc_metadata_equal(const struct recirc_state *a, |
160 | const struct recirc_state *b) | |
e672ff9b | 161 | { |
2082425c BP |
162 | return (a->table_id == b->table_id |
163 | && a->ofproto == b->ofproto | |
59781952 JR |
164 | && flow_tnl_equal(a->metadata.tunnel, b->metadata.tunnel) |
165 | && !memcmp(&a->metadata.metadata, &b->metadata.metadata, | |
166 | sizeof a->metadata - sizeof a->metadata.tunnel) | |
5c1b2314 BP |
167 | && a->n_stack == b->n_stack |
168 | && !memcmp(a->stack, b->stack, a->n_stack * sizeof *a->stack) | |
29bae541 | 169 | && a->mirrors == b->mirrors |
07659514 | 170 | && a->conntracked == b->conntracked |
2082425c BP |
171 | && a->action_set_len == b->action_set_len |
172 | && ofpacts_equal(a->ofpacts, a->ofpacts_len, | |
173 | b->ofpacts, b->ofpacts_len)); | |
e672ff9b JR |
174 | } |
175 | ||
176 | /* Lockless RCU protected lookup. If node is needed accross RCU quiescent | |
177 | * state, caller should take a reference. */ | |
178 | static struct recirc_id_node * | |
2082425c | 179 | recirc_find_equal(const struct recirc_state *target, uint32_t hash) |
e672ff9b JR |
180 | { |
181 | struct recirc_id_node *node; | |
182 | ||
2082425c BP |
183 | CMAP_FOR_EACH_WITH_HASH (node, metadata_node, hash, &metadata_map) { |
184 | if (recirc_metadata_equal(&node->state, target)) { | |
e672ff9b JR |
185 | return node; |
186 | } | |
187 | } | |
188 | return NULL; | |
189 | } | |
190 | ||
191 | static struct recirc_id_node * | |
2082425c | 192 | recirc_ref_equal(const struct recirc_state *target, uint32_t hash) |
e672ff9b JR |
193 | { |
194 | struct recirc_id_node *node; | |
195 | ||
196 | do { | |
2082425c | 197 | node = recirc_find_equal(target, hash); |
e672ff9b JR |
198 | |
199 | /* Try again if the node was released before we get the reference. */ | |
200 | } while (node && !ovs_refcount_try_ref_rcu(&node->refcount)); | |
201 | ||
202 | return node; | |
203 | } | |
204 | ||
2082425c | 205 | static void |
59781952 JR |
206 | recirc_state_clone(struct recirc_state *new, const struct recirc_state *old, |
207 | struct flow_tnl *tunnel) | |
2082425c BP |
208 | { |
209 | *new = *old; | |
59781952 JR |
210 | flow_tnl_copy__(tunnel, old->metadata.tunnel); |
211 | new->metadata.tunnel = tunnel; | |
212 | ||
5c1b2314 BP |
213 | new->stack = (new->n_stack |
214 | ? xmemdup(new->stack, new->n_stack * sizeof *new->stack) | |
215 | : NULL); | |
216 | new->ofpacts = (new->ofpacts_len | |
217 | ? xmemdup(new->ofpacts, new->ofpacts_len) | |
218 | : NULL); | |
2082425c BP |
219 | } |
220 | ||
85b9cb2e BP |
221 | static void |
222 | recirc_state_free(struct recirc_state *state) | |
223 | { | |
5c1b2314 | 224 | free(state->stack); |
85b9cb2e BP |
225 | free(state->ofpacts); |
226 | } | |
227 | ||
e672ff9b JR |
228 | /* Allocate a unique recirculation id for the given set of flow metadata. |
229 | * The ID space is 2^^32, so there should never be a situation in which all | |
230 | * the IDs are used up. We loop until we find a free one. | |
231 | * hash is recomputed if it is passed in as 0. */ | |
232 | static struct recirc_id_node * | |
2082425c | 233 | recirc_alloc_id__(const struct recirc_state *state, uint32_t hash) |
e672ff9b | 234 | { |
2082425c BP |
235 | ovs_assert(state->action_set_len <= state->ofpacts_len); |
236 | ||
237 | struct recirc_id_node *node = xzalloc(sizeof *node); | |
59781952 | 238 | |
e672ff9b JR |
239 | node->hash = hash; |
240 | ovs_refcount_init(&node->refcount); | |
59781952 JR |
241 | recirc_state_clone(CONST_CAST(struct recirc_state *, &node->state), state, |
242 | &node->state_metadata_tunnel); | |
e672ff9b JR |
243 | |
244 | ovs_mutex_lock(&mutex); | |
245 | for (;;) { | |
246 | /* Claim the next ID. The ID space should be sparse enough for the | |
247 | allocation to succeed at the first try. We do skip the first | |
248 | RECIRC_POOL_STATIC_IDS IDs on the later rounds, though, as some of | |
249 | the initial allocations may be for long term uses (like bonds). */ | |
250 | node->id = next_id++; | |
251 | if (OVS_UNLIKELY(!node->id)) { | |
252 | next_id = RECIRC_POOL_STATIC_IDS + 1; | |
253 | node->id = next_id++; | |
254 | } | |
255 | /* Find if the id is free. */ | |
256 | if (OVS_LIKELY(!recirc_find__(node->id))) { | |
257 | break; | |
258 | } | |
259 | } | |
260 | cmap_insert(&id_map, &node->id_node, node->id); | |
261 | cmap_insert(&metadata_map, &node->metadata_node, node->hash); | |
262 | ovs_mutex_unlock(&mutex); | |
263 | return node; | |
264 | } | |
265 | ||
266 | /* Look up an existing ID for the given flow's metadata and optional actions. | |
267 | */ | |
268 | uint32_t | |
2082425c | 269 | recirc_find_id(const struct recirc_state *target) |
e672ff9b | 270 | { |
2082425c BP |
271 | uint32_t hash = recirc_metadata_hash(target); |
272 | struct recirc_id_node *node = recirc_find_equal(target, hash); | |
e672ff9b JR |
273 | return node ? node->id : 0; |
274 | } | |
275 | ||
276 | /* Allocate a unique recirculation id for the given set of flow metadata and | |
277 | optional actions. */ | |
278 | uint32_t | |
2082425c | 279 | recirc_alloc_id_ctx(const struct recirc_state *state) |
f5374617 | 280 | { |
2082425c BP |
281 | uint32_t hash = recirc_metadata_hash(state); |
282 | struct recirc_id_node *node = recirc_ref_equal(state, hash); | |
e672ff9b | 283 | if (!node) { |
2082425c | 284 | node = recirc_alloc_id__(state, hash); |
e672ff9b | 285 | } |
e672ff9b | 286 | return node->id; |
f5374617 AZ |
287 | } |
288 | ||
e672ff9b | 289 | /* Allocate a unique recirculation id. */ |
f5374617 | 290 | uint32_t |
e672ff9b | 291 | recirc_alloc_id(struct ofproto_dpif *ofproto) |
f5374617 | 292 | { |
59781952 JR |
293 | struct flow_tnl tunnel; |
294 | tunnel.ip_dst = htonl(0); | |
ffe4c74f | 295 | tunnel.ipv6_dst = in6addr_any; |
2082425c BP |
296 | struct recirc_state state = { |
297 | .table_id = TBL_INTERNAL, | |
298 | .ofproto = ofproto, | |
59781952 | 299 | .metadata = { .tunnel = &tunnel, .in_port = OFPP_NONE }, |
2082425c BP |
300 | }; |
301 | return recirc_alloc_id__(&state, recirc_metadata_hash(&state))->id; | |
e672ff9b | 302 | } |
f5374617 | 303 | |
85b9cb2e BP |
304 | static void |
305 | recirc_id_node_free(struct recirc_id_node *node) | |
306 | { | |
307 | recirc_state_free(CONST_CAST(struct recirc_state *, &node->state)); | |
308 | free(node); | |
309 | } | |
310 | ||
e672ff9b JR |
311 | void |
312 | recirc_id_node_unref(const struct recirc_id_node *node_) | |
313 | OVS_EXCLUDED(mutex) | |
314 | { | |
315 | struct recirc_id_node *node = CONST_CAST(struct recirc_id_node *, node_); | |
316 | ||
317 | if (node && ovs_refcount_unref(&node->refcount) == 1) { | |
318 | ovs_mutex_lock(&mutex); | |
319 | /* Prevent re-use of this node by removing the node from 'metadata_map' | |
320 | */ | |
321 | cmap_remove(&metadata_map, &node->metadata_node, node->hash); | |
322 | /* We keep the node in the 'id_map' so that it can be found as long | |
323 | * as it lingers, and add it to the 'expiring' list. */ | |
324 | list_insert(&expiring, &node->exp_node); | |
325 | ovs_mutex_unlock(&mutex); | |
27c24749 | 326 | } |
e672ff9b | 327 | } |
27c24749 | 328 | |
e672ff9b JR |
329 | void |
330 | recirc_free_id(uint32_t id) | |
331 | { | |
332 | const struct recirc_id_node *node; | |
333 | ||
334 | node = recirc_id_node_find(id); | |
335 | if (node) { | |
336 | recirc_id_node_unref(node); | |
337 | } else { | |
338 | VLOG_ERR("Freeing nonexistent recirculation ID: %"PRIu32, id); | |
339 | } | |
f5374617 AZ |
340 | } |
341 | ||
e672ff9b JR |
342 | /* Called when 'ofproto' is destructed. Checks for and clears any |
343 | * recirc_id leak. | |
344 | * No other thread may have access to the 'ofproto' being destructed. | |
345 | * All related datapath flows must be deleted before calling this. */ | |
f5374617 | 346 | void |
e672ff9b | 347 | recirc_free_ofproto(struct ofproto_dpif *ofproto, const char *ofproto_name) |
f5374617 | 348 | { |
e672ff9b JR |
349 | struct recirc_id_node *n; |
350 | ||
351 | CMAP_FOR_EACH (n, metadata_node, &metadata_map) { | |
2082425c | 352 | if (n->state.ofproto == ofproto) { |
e672ff9b JR |
353 | VLOG_ERR("recirc_id %"PRIu32 |
354 | " left allocated when ofproto (%s)" | |
355 | " is destructed", n->id, ofproto_name); | |
356 | } | |
357 | } | |
f5374617 | 358 | } |