]>
Commit | Line | Data |
---|---|---|
f5374617 | 1 | /* |
85b9cb2e | 2 | * Copyright (c) 2014, 2015, 2016 Nicira, Inc. |
f5374617 AZ |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | ||
e672ff9b JR |
19 | #include "ofpbuf.h" |
20 | #include "ofproto-dpif.h" | |
f5374617 | 21 | #include "ofproto-dpif-rid.h" |
e672ff9b JR |
22 | #include "ofproto-provider.h" |
23 | #include "openvswitch/vlog.h" | |
f5374617 | 24 | |
e672ff9b | 25 | VLOG_DEFINE_THIS_MODULE(ofproto_dpif_rid); |
f5374617 | 26 | |
e672ff9b | 27 | static struct ovs_mutex mutex; |
f5374617 | 28 | |
e672ff9b JR |
29 | static struct cmap id_map; |
30 | static struct cmap metadata_map; | |
31 | ||
32 | static struct ovs_list expiring OVS_GUARDED_BY(mutex); | |
33 | static struct ovs_list expired OVS_GUARDED_BY(mutex); | |
34 | ||
35 | static uint32_t next_id OVS_GUARDED_BY(mutex); /* Possible next free id. */ | |
36 | ||
37 | #define RECIRC_POOL_STATIC_IDS 1024 | |
38 | ||
85b9cb2e BP |
39 | static void recirc_id_node_free(struct recirc_id_node *); |
40 | ||
e672ff9b JR |
41 | void |
42 | recirc_init(void) | |
f5374617 | 43 | { |
e672ff9b | 44 | static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; |
f5374617 | 45 | |
e672ff9b JR |
46 | if (ovsthread_once_start(&once)) { |
47 | ovs_mutex_init(&mutex); | |
48 | ovs_mutex_lock(&mutex); | |
49 | next_id = 1; /* 0 is not a valid ID. */ | |
50 | cmap_init(&id_map); | |
51 | cmap_init(&metadata_map); | |
52 | list_init(&expiring); | |
53 | list_init(&expired); | |
54 | ovs_mutex_unlock(&mutex); | |
55 | ||
56 | ovsthread_once_done(&once); | |
57 | } | |
f5374617 | 58 | |
f5374617 AZ |
59 | } |
60 | ||
e672ff9b JR |
61 | /* This should be called by the revalidator once at each round (every 500ms or |
62 | * more). */ | |
f5374617 | 63 | void |
e672ff9b JR |
64 | recirc_run(void) |
65 | { | |
66 | static long long int last = 0; | |
67 | long long int now = time_msec(); | |
68 | ||
69 | /* Do maintenance at most 4 times / sec. */ | |
70 | ovs_mutex_lock(&mutex); | |
71 | if (now - last > 250) { | |
5f03c983 | 72 | struct recirc_id_node *node; |
e672ff9b JR |
73 | |
74 | last = now; | |
75 | ||
76 | /* Nodes in 'expiring' and 'expired' lists have the refcount of zero, | |
77 | * which means that while they can still be found (by id), no new | |
78 | * references can be taken on them. We have removed the entry from the | |
79 | * 'metadata_map', at the time when refcount reached zero, causing any | |
80 | * new translations to allocate a new ID. This allows the expiring | |
81 | * entry to be safely deleted while any sudden new use of the similar | |
82 | * recirculation will safely start using a new recirculation ID. When | |
83 | * the refcount gets to zero, the node is also added to the 'expiring' | |
84 | * list. At any time after that the nodes in the 'expiring' list can | |
85 | * be moved to the 'expired' list, from which they are deleted at least | |
86 | * 250ms afterwards. */ | |
87 | ||
88 | /* Delete the expired. These have been lingering for at least 250 ms, | |
89 | * which should be enough for any ongoing recirculations to be | |
90 | * finished. */ | |
5f03c983 | 91 | LIST_FOR_EACH_POP (node, exp_node, &expired) { |
e672ff9b | 92 | cmap_remove(&id_map, &node->id_node, node->id); |
85b9cb2e | 93 | ovsrcu_postpone(recirc_id_node_free, node); |
e672ff9b JR |
94 | } |
95 | ||
96 | if (!list_is_empty(&expiring)) { | |
97 | /* 'expired' is now empty, move nodes in 'expiring' to it. */ | |
98 | list_splice(&expired, list_front(&expiring), &expiring); | |
99 | } | |
100 | } | |
101 | ovs_mutex_unlock(&mutex); | |
102 | } | |
103 | ||
104 | /* We use the id as the hash value, which works due to cmap internal rehashing. | |
105 | * We also only insert nodes with unique IDs, so all possible hash collisions | |
106 | * remain internal to the cmap. */ | |
107 | static struct recirc_id_node * | |
108 | recirc_find__(uint32_t id) | |
109 | OVS_REQUIRES(mutex) | |
110 | { | |
111 | struct cmap_node *node = cmap_find_protected(&id_map, id); | |
112 | ||
113 | return node ? CONTAINER_OF(node, struct recirc_id_node, id_node) : NULL; | |
114 | } | |
115 | ||
116 | /* Lockless RCU protected lookup. If node is needed accross RCU quiescent | |
117 | * state, caller should copy the contents. */ | |
118 | const struct recirc_id_node * | |
119 | recirc_id_node_find(uint32_t id) | |
120 | { | |
121 | const struct cmap_node *node = cmap_find(&id_map, id); | |
122 | ||
123 | return node | |
124 | ? CONTAINER_OF(node, const struct recirc_id_node, id_node) | |
125 | : NULL; | |
126 | } | |
127 | ||
128 | static uint32_t | |
2082425c | 129 | recirc_metadata_hash(const struct recirc_state *state) |
e672ff9b JR |
130 | { |
131 | uint32_t hash; | |
132 | ||
2082425c BP |
133 | hash = hash_pointer(state->ofproto, 0); |
134 | hash = hash_int(state->table_id, hash); | |
ffe4c74f | 135 | if (flow_tnl_dst_is_set(state->metadata.tunnel)) { |
59781952 JR |
136 | /* We may leave remainder bytes unhashed, but that is unlikely as |
137 | * the tunnel is not in the datapath format. */ | |
138 | hash = hash_words64((const uint64_t *) state->metadata.tunnel, | |
139 | flow_tnl_size(state->metadata.tunnel) | |
140 | / sizeof(uint64_t), hash); | |
141 | } | |
07659514 | 142 | hash = hash_boolean(state->conntracked, hash); |
59781952 JR |
143 | hash = hash_words64((const uint64_t *) &state->metadata.metadata, |
144 | (sizeof state->metadata - sizeof state->metadata.tunnel) | |
145 | / sizeof(uint64_t), | |
e672ff9b | 146 | hash); |
2082425c BP |
147 | if (state->stack && state->stack->size != 0) { |
148 | hash = hash_words64((const uint64_t *) state->stack->data, | |
149 | state->stack->size / sizeof(uint64_t), hash); | |
e672ff9b | 150 | } |
29bae541 | 151 | hash = hash_int(state->mirrors, hash); |
2082425c BP |
152 | hash = hash_int(state->action_set_len, hash); |
153 | if (state->ofpacts_len) { | |
154 | hash = hash_words64(ALIGNED_CAST(const uint64_t *, state->ofpacts), | |
155 | state->ofpacts_len / sizeof(uint64_t), | |
e672ff9b JR |
156 | hash); |
157 | } | |
158 | return hash; | |
159 | } | |
160 | ||
161 | static bool | |
2082425c BP |
162 | recirc_metadata_equal(const struct recirc_state *a, |
163 | const struct recirc_state *b) | |
e672ff9b | 164 | { |
2082425c BP |
165 | return (a->table_id == b->table_id |
166 | && a->ofproto == b->ofproto | |
59781952 JR |
167 | && flow_tnl_equal(a->metadata.tunnel, b->metadata.tunnel) |
168 | && !memcmp(&a->metadata.metadata, &b->metadata.metadata, | |
169 | sizeof a->metadata - sizeof a->metadata.tunnel) | |
2082425c BP |
170 | && (((!a->stack || !a->stack->size) && |
171 | (!b->stack || !b->stack->size)) | |
172 | || (a->stack && b->stack && ofpbuf_equal(a->stack, b->stack))) | |
29bae541 | 173 | && a->mirrors == b->mirrors |
07659514 | 174 | && a->conntracked == b->conntracked |
2082425c BP |
175 | && a->action_set_len == b->action_set_len |
176 | && ofpacts_equal(a->ofpacts, a->ofpacts_len, | |
177 | b->ofpacts, b->ofpacts_len)); | |
e672ff9b JR |
178 | } |
179 | ||
180 | /* Lockless RCU protected lookup. If node is needed accross RCU quiescent | |
181 | * state, caller should take a reference. */ | |
182 | static struct recirc_id_node * | |
2082425c | 183 | recirc_find_equal(const struct recirc_state *target, uint32_t hash) |
e672ff9b JR |
184 | { |
185 | struct recirc_id_node *node; | |
186 | ||
2082425c BP |
187 | CMAP_FOR_EACH_WITH_HASH (node, metadata_node, hash, &metadata_map) { |
188 | if (recirc_metadata_equal(&node->state, target)) { | |
e672ff9b JR |
189 | return node; |
190 | } | |
191 | } | |
192 | return NULL; | |
193 | } | |
194 | ||
195 | static struct recirc_id_node * | |
2082425c | 196 | recirc_ref_equal(const struct recirc_state *target, uint32_t hash) |
e672ff9b JR |
197 | { |
198 | struct recirc_id_node *node; | |
199 | ||
200 | do { | |
2082425c | 201 | node = recirc_find_equal(target, hash); |
e672ff9b JR |
202 | |
203 | /* Try again if the node was released before we get the reference. */ | |
204 | } while (node && !ovs_refcount_try_ref_rcu(&node->refcount)); | |
205 | ||
206 | return node; | |
207 | } | |
208 | ||
2082425c | 209 | static void |
59781952 JR |
210 | recirc_state_clone(struct recirc_state *new, const struct recirc_state *old, |
211 | struct flow_tnl *tunnel) | |
2082425c BP |
212 | { |
213 | *new = *old; | |
59781952 JR |
214 | flow_tnl_copy__(tunnel, old->metadata.tunnel); |
215 | new->metadata.tunnel = tunnel; | |
216 | ||
2082425c BP |
217 | if (new->stack) { |
218 | new->stack = new->stack->size ? ofpbuf_clone(new->stack) : NULL; | |
219 | } | |
220 | if (new->ofpacts) { | |
221 | new->ofpacts = (new->ofpacts_len | |
222 | ? xmemdup(new->ofpacts, new->ofpacts_len) | |
223 | : NULL); | |
224 | } | |
225 | } | |
226 | ||
85b9cb2e BP |
227 | static void |
228 | recirc_state_free(struct recirc_state *state) | |
229 | { | |
230 | ofpbuf_delete(state->stack); | |
231 | free(state->ofpacts); | |
232 | } | |
233 | ||
e672ff9b JR |
234 | /* Allocate a unique recirculation id for the given set of flow metadata. |
235 | * The ID space is 2^^32, so there should never be a situation in which all | |
236 | * the IDs are used up. We loop until we find a free one. | |
237 | * hash is recomputed if it is passed in as 0. */ | |
238 | static struct recirc_id_node * | |
2082425c | 239 | recirc_alloc_id__(const struct recirc_state *state, uint32_t hash) |
e672ff9b | 240 | { |
2082425c BP |
241 | ovs_assert(state->action_set_len <= state->ofpacts_len); |
242 | ||
243 | struct recirc_id_node *node = xzalloc(sizeof *node); | |
59781952 | 244 | |
e672ff9b JR |
245 | node->hash = hash; |
246 | ovs_refcount_init(&node->refcount); | |
59781952 JR |
247 | recirc_state_clone(CONST_CAST(struct recirc_state *, &node->state), state, |
248 | &node->state_metadata_tunnel); | |
e672ff9b JR |
249 | |
250 | ovs_mutex_lock(&mutex); | |
251 | for (;;) { | |
252 | /* Claim the next ID. The ID space should be sparse enough for the | |
253 | allocation to succeed at the first try. We do skip the first | |
254 | RECIRC_POOL_STATIC_IDS IDs on the later rounds, though, as some of | |
255 | the initial allocations may be for long term uses (like bonds). */ | |
256 | node->id = next_id++; | |
257 | if (OVS_UNLIKELY(!node->id)) { | |
258 | next_id = RECIRC_POOL_STATIC_IDS + 1; | |
259 | node->id = next_id++; | |
260 | } | |
261 | /* Find if the id is free. */ | |
262 | if (OVS_LIKELY(!recirc_find__(node->id))) { | |
263 | break; | |
264 | } | |
265 | } | |
266 | cmap_insert(&id_map, &node->id_node, node->id); | |
267 | cmap_insert(&metadata_map, &node->metadata_node, node->hash); | |
268 | ovs_mutex_unlock(&mutex); | |
269 | return node; | |
270 | } | |
271 | ||
272 | /* Look up an existing ID for the given flow's metadata and optional actions. | |
273 | */ | |
274 | uint32_t | |
2082425c | 275 | recirc_find_id(const struct recirc_state *target) |
e672ff9b | 276 | { |
2082425c BP |
277 | uint32_t hash = recirc_metadata_hash(target); |
278 | struct recirc_id_node *node = recirc_find_equal(target, hash); | |
e672ff9b JR |
279 | return node ? node->id : 0; |
280 | } | |
281 | ||
282 | /* Allocate a unique recirculation id for the given set of flow metadata and | |
283 | optional actions. */ | |
284 | uint32_t | |
2082425c | 285 | recirc_alloc_id_ctx(const struct recirc_state *state) |
f5374617 | 286 | { |
2082425c BP |
287 | uint32_t hash = recirc_metadata_hash(state); |
288 | struct recirc_id_node *node = recirc_ref_equal(state, hash); | |
e672ff9b | 289 | if (!node) { |
2082425c | 290 | node = recirc_alloc_id__(state, hash); |
e672ff9b | 291 | } |
e672ff9b | 292 | return node->id; |
f5374617 AZ |
293 | } |
294 | ||
e672ff9b | 295 | /* Allocate a unique recirculation id. */ |
f5374617 | 296 | uint32_t |
e672ff9b | 297 | recirc_alloc_id(struct ofproto_dpif *ofproto) |
f5374617 | 298 | { |
59781952 JR |
299 | struct flow_tnl tunnel; |
300 | tunnel.ip_dst = htonl(0); | |
ffe4c74f | 301 | tunnel.ipv6_dst = in6addr_any; |
2082425c BP |
302 | struct recirc_state state = { |
303 | .table_id = TBL_INTERNAL, | |
304 | .ofproto = ofproto, | |
59781952 | 305 | .metadata = { .tunnel = &tunnel, .in_port = OFPP_NONE }, |
2082425c BP |
306 | }; |
307 | return recirc_alloc_id__(&state, recirc_metadata_hash(&state))->id; | |
e672ff9b | 308 | } |
f5374617 | 309 | |
85b9cb2e BP |
310 | static void |
311 | recirc_id_node_free(struct recirc_id_node *node) | |
312 | { | |
313 | recirc_state_free(CONST_CAST(struct recirc_state *, &node->state)); | |
314 | free(node); | |
315 | } | |
316 | ||
e672ff9b JR |
317 | void |
318 | recirc_id_node_unref(const struct recirc_id_node *node_) | |
319 | OVS_EXCLUDED(mutex) | |
320 | { | |
321 | struct recirc_id_node *node = CONST_CAST(struct recirc_id_node *, node_); | |
322 | ||
323 | if (node && ovs_refcount_unref(&node->refcount) == 1) { | |
324 | ovs_mutex_lock(&mutex); | |
325 | /* Prevent re-use of this node by removing the node from 'metadata_map' | |
326 | */ | |
327 | cmap_remove(&metadata_map, &node->metadata_node, node->hash); | |
328 | /* We keep the node in the 'id_map' so that it can be found as long | |
329 | * as it lingers, and add it to the 'expiring' list. */ | |
330 | list_insert(&expiring, &node->exp_node); | |
331 | ovs_mutex_unlock(&mutex); | |
27c24749 | 332 | } |
e672ff9b | 333 | } |
27c24749 | 334 | |
e672ff9b JR |
335 | void |
336 | recirc_free_id(uint32_t id) | |
337 | { | |
338 | const struct recirc_id_node *node; | |
339 | ||
340 | node = recirc_id_node_find(id); | |
341 | if (node) { | |
342 | recirc_id_node_unref(node); | |
343 | } else { | |
344 | VLOG_ERR("Freeing nonexistent recirculation ID: %"PRIu32, id); | |
345 | } | |
f5374617 AZ |
346 | } |
347 | ||
e672ff9b JR |
348 | /* Called when 'ofproto' is destructed. Checks for and clears any |
349 | * recirc_id leak. | |
350 | * No other thread may have access to the 'ofproto' being destructed. | |
351 | * All related datapath flows must be deleted before calling this. */ | |
f5374617 | 352 | void |
e672ff9b | 353 | recirc_free_ofproto(struct ofproto_dpif *ofproto, const char *ofproto_name) |
f5374617 | 354 | { |
e672ff9b JR |
355 | struct recirc_id_node *n; |
356 | ||
357 | CMAP_FOR_EACH (n, metadata_node, &metadata_map) { | |
2082425c | 358 | if (n->state.ofproto == ofproto) { |
e672ff9b JR |
359 | VLOG_ERR("recirc_id %"PRIu32 |
360 | " left allocated when ofproto (%s)" | |
361 | " is destructed", n->id, ofproto_name); | |
362 | } | |
363 | } | |
f5374617 | 364 | } |