]>
Commit | Line | Data |
---|---|---|
f5374617 | 1 | /* |
6cb5e507 | 2 | * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc. |
f5374617 AZ |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | ||
64c96779 | 19 | #include "openvswitch/ofpbuf.h" |
e672ff9b | 20 | #include "ofproto-dpif.h" |
f5374617 | 21 | #include "ofproto-dpif-rid.h" |
e672ff9b JR |
22 | #include "ofproto-provider.h" |
23 | #include "openvswitch/vlog.h" | |
f5374617 | 24 | |
e672ff9b | 25 | VLOG_DEFINE_THIS_MODULE(ofproto_dpif_rid); |
f5374617 | 26 | |
b70e6976 | 27 | static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER; |
f5374617 | 28 | |
b70e6976 BP |
29 | static struct cmap id_map = CMAP_INITIALIZER; |
30 | static struct cmap metadata_map = CMAP_INITIALIZER; | |
e672ff9b | 31 | |
b70e6976 BP |
32 | static struct ovs_list expiring OVS_GUARDED_BY(mutex) |
33 | = OVS_LIST_INITIALIZER(&expiring); | |
34 | static struct ovs_list expired OVS_GUARDED_BY(mutex) | |
35 | = OVS_LIST_INITIALIZER(&expired); | |
e672ff9b | 36 | |
b70e6976 | 37 | static uint32_t next_id OVS_GUARDED_BY(mutex) = 1; /* Possible next free id. */ |
e672ff9b JR |
38 | |
39 | #define RECIRC_POOL_STATIC_IDS 1024 | |
40 | ||
85b9cb2e BP |
41 | static void recirc_id_node_free(struct recirc_id_node *); |
42 | ||
e672ff9b JR |
43 | /* This should be called by the revalidator once at each round (every 500ms or |
44 | * more). */ | |
f5374617 | 45 | void |
e672ff9b JR |
46 | recirc_run(void) |
47 | { | |
48 | static long long int last = 0; | |
49 | long long int now = time_msec(); | |
50 | ||
51 | /* Do maintenance at most 4 times / sec. */ | |
52 | ovs_mutex_lock(&mutex); | |
53 | if (now - last > 250) { | |
5f03c983 | 54 | struct recirc_id_node *node; |
e672ff9b JR |
55 | |
56 | last = now; | |
57 | ||
58 | /* Nodes in 'expiring' and 'expired' lists have the refcount of zero, | |
59 | * which means that while they can still be found (by id), no new | |
60 | * references can be taken on them. We have removed the entry from the | |
61 | * 'metadata_map', at the time when refcount reached zero, causing any | |
62 | * new translations to allocate a new ID. This allows the expiring | |
63 | * entry to be safely deleted while any sudden new use of the similar | |
64 | * recirculation will safely start using a new recirculation ID. When | |
65 | * the refcount gets to zero, the node is also added to the 'expiring' | |
66 | * list. At any time after that the nodes in the 'expiring' list can | |
67 | * be moved to the 'expired' list, from which they are deleted at least | |
68 | * 250ms afterwards. */ | |
69 | ||
70 | /* Delete the expired. These have been lingering for at least 250 ms, | |
71 | * which should be enough for any ongoing recirculations to be | |
72 | * finished. */ | |
5f03c983 | 73 | LIST_FOR_EACH_POP (node, exp_node, &expired) { |
e672ff9b | 74 | cmap_remove(&id_map, &node->id_node, node->id); |
85b9cb2e | 75 | ovsrcu_postpone(recirc_id_node_free, node); |
e672ff9b JR |
76 | } |
77 | ||
417e7e66 | 78 | if (!ovs_list_is_empty(&expiring)) { |
e672ff9b | 79 | /* 'expired' is now empty, move nodes in 'expiring' to it. */ |
417e7e66 | 80 | ovs_list_splice(&expired, ovs_list_front(&expiring), &expiring); |
e672ff9b JR |
81 | } |
82 | } | |
83 | ovs_mutex_unlock(&mutex); | |
84 | } | |
85 | ||
86 | /* We use the id as the hash value, which works due to cmap internal rehashing. | |
87 | * We also only insert nodes with unique IDs, so all possible hash collisions | |
88 | * remain internal to the cmap. */ | |
89 | static struct recirc_id_node * | |
90 | recirc_find__(uint32_t id) | |
91 | OVS_REQUIRES(mutex) | |
92 | { | |
93 | struct cmap_node *node = cmap_find_protected(&id_map, id); | |
94 | ||
95 | return node ? CONTAINER_OF(node, struct recirc_id_node, id_node) : NULL; | |
96 | } | |
97 | ||
98 | /* Lockless RCU protected lookup. If node is needed accross RCU quiescent | |
99 | * state, caller should copy the contents. */ | |
100 | const struct recirc_id_node * | |
101 | recirc_id_node_find(uint32_t id) | |
102 | { | |
103 | const struct cmap_node *node = cmap_find(&id_map, id); | |
104 | ||
105 | return node | |
106 | ? CONTAINER_OF(node, const struct recirc_id_node, id_node) | |
107 | : NULL; | |
108 | } | |
109 | ||
e6bc8e74 YHW |
110 | bool |
111 | recirc_id_node_find_and_ref(uint32_t id) | |
112 | { | |
113 | struct recirc_id_node *rid_node = | |
114 | CONST_CAST(struct recirc_id_node *, recirc_id_node_find(id)); | |
115 | ||
116 | if (!rid_node) { | |
117 | return false; | |
118 | } | |
119 | ||
120 | return ovs_refcount_try_ref_rcu(&rid_node->refcount); | |
121 | } | |
122 | ||
e672ff9b | 123 | static uint32_t |
1d361a81 | 124 | frozen_state_hash(const struct frozen_state *state) |
e672ff9b JR |
125 | { |
126 | uint32_t hash; | |
127 | ||
290835f9 | 128 | hash = uuid_hash(&state->ofproto_uuid); |
2082425c | 129 | hash = hash_int(state->table_id, hash); |
8014f465 JP |
130 | hash = hash_bytes64((const uint64_t *) &state->metadata, |
131 | sizeof state->metadata, hash); | |
07659514 | 132 | hash = hash_boolean(state->conntracked, hash); |
84cf3c1f JR |
133 | if (state->stack && state->stack_size) { |
134 | hash = hash_bytes(state->stack, state->stack_size, hash); | |
e672ff9b | 135 | } |
29bae541 | 136 | hash = hash_int(state->mirrors, hash); |
417509fa BP |
137 | if (state->action_set_len) { |
138 | hash = hash_bytes64(ALIGNED_CAST(const uint64_t *, state->action_set), | |
139 | state->action_set_len, hash); | |
140 | } | |
2082425c | 141 | if (state->ofpacts_len) { |
0a96a21b BP |
142 | hash = hash_bytes64(ALIGNED_CAST(const uint64_t *, state->ofpacts), |
143 | state->ofpacts_len, hash); | |
e672ff9b | 144 | } |
d39ec23d JP |
145 | if (state->userdata && state->userdata_len) { |
146 | hash = hash_bytes(state->userdata, state->userdata_len, hash); | |
147 | } | |
e672ff9b JR |
148 | return hash; |
149 | } | |
150 | ||
151 | static bool | |
1d361a81 | 152 | frozen_state_equal(const struct frozen_state *a, const struct frozen_state *b) |
e672ff9b | 153 | { |
2082425c | 154 | return (a->table_id == b->table_id |
290835f9 | 155 | && uuid_equals(&a->ofproto_uuid, &b->ofproto_uuid) |
8014f465 | 156 | && !memcmp(&a->metadata, &b->metadata, sizeof a->metadata) |
84cf3c1f JR |
157 | && a->stack_size == b->stack_size |
158 | && !memcmp(a->stack, b->stack, a->stack_size) | |
29bae541 | 159 | && a->mirrors == b->mirrors |
07659514 | 160 | && a->conntracked == b->conntracked |
2082425c | 161 | && ofpacts_equal(a->ofpacts, a->ofpacts_len, |
417509fa BP |
162 | b->ofpacts, b->ofpacts_len) |
163 | && ofpacts_equal(a->action_set, a->action_set_len, | |
d39ec23d | 164 | b->action_set, b->action_set_len) |
00135b86 ZB |
165 | && !memcmp(a->userdata, b->userdata, a->userdata_len) |
166 | && uuid_equals(&a->xport_uuid, &b->xport_uuid)); | |
e672ff9b JR |
167 | } |
168 | ||
169 | /* Lockless RCU protected lookup. If node is needed accross RCU quiescent | |
170 | * state, caller should take a reference. */ | |
171 | static struct recirc_id_node * | |
1d361a81 | 172 | recirc_find_equal(const struct frozen_state *target, uint32_t hash) |
e672ff9b JR |
173 | { |
174 | struct recirc_id_node *node; | |
175 | ||
2082425c | 176 | CMAP_FOR_EACH_WITH_HASH (node, metadata_node, hash, &metadata_map) { |
1d361a81 | 177 | if (frozen_state_equal(&node->state, target)) { |
e672ff9b JR |
178 | return node; |
179 | } | |
180 | } | |
181 | return NULL; | |
182 | } | |
183 | ||
184 | static struct recirc_id_node * | |
1d361a81 | 185 | recirc_ref_equal(const struct frozen_state *target, uint32_t hash) |
e672ff9b JR |
186 | { |
187 | struct recirc_id_node *node; | |
188 | ||
189 | do { | |
2082425c | 190 | node = recirc_find_equal(target, hash); |
e672ff9b JR |
191 | |
192 | /* Try again if the node was released before we get the reference. */ | |
193 | } while (node && !ovs_refcount_try_ref_rcu(&node->refcount)); | |
194 | ||
195 | return node; | |
196 | } | |
197 | ||
2082425c | 198 | static void |
6cb5e507 | 199 | frozen_state_clone(struct frozen_state *new, const struct frozen_state *old) |
2082425c BP |
200 | { |
201 | *new = *old; | |
84cf3c1f JR |
202 | new->stack = (new->stack_size |
203 | ? xmemdup(new->stack, new->stack_size) | |
5c1b2314 BP |
204 | : NULL); |
205 | new->ofpacts = (new->ofpacts_len | |
206 | ? xmemdup(new->ofpacts, new->ofpacts_len) | |
207 | : NULL); | |
417509fa BP |
208 | new->action_set = (new->action_set_len |
209 | ? xmemdup(new->action_set, new->action_set_len) | |
210 | : NULL); | |
d39ec23d JP |
211 | new->userdata = (new->userdata_len |
212 | ? xmemdup(new->userdata, new->userdata_len) | |
213 | : NULL); | |
2082425c BP |
214 | } |
215 | ||
85b9cb2e | 216 | static void |
1d361a81 | 217 | frozen_state_free(struct frozen_state *state) |
85b9cb2e | 218 | { |
5c1b2314 | 219 | free(state->stack); |
85b9cb2e | 220 | free(state->ofpacts); |
417509fa | 221 | free(state->action_set); |
d39ec23d | 222 | free(state->userdata); |
85b9cb2e BP |
223 | } |
224 | ||
e672ff9b JR |
225 | /* Allocate a unique recirculation id for the given set of flow metadata. |
226 | * The ID space is 2^^32, so there should never be a situation in which all | |
95263037 | 227 | * the IDs are used up. We loop until we find a free one. */ |
e672ff9b | 228 | static struct recirc_id_node * |
1d361a81 | 229 | recirc_alloc_id__(const struct frozen_state *state, uint32_t hash) |
e672ff9b | 230 | { |
2082425c BP |
231 | ovs_assert(state->action_set_len <= state->ofpacts_len); |
232 | ||
233 | struct recirc_id_node *node = xzalloc(sizeof *node); | |
59781952 | 234 | |
e672ff9b JR |
235 | node->hash = hash; |
236 | ovs_refcount_init(&node->refcount); | |
6cb5e507 | 237 | frozen_state_clone(CONST_CAST(struct frozen_state *, &node->state), state); |
e672ff9b JR |
238 | |
239 | ovs_mutex_lock(&mutex); | |
240 | for (;;) { | |
241 | /* Claim the next ID. The ID space should be sparse enough for the | |
242 | allocation to succeed at the first try. We do skip the first | |
243 | RECIRC_POOL_STATIC_IDS IDs on the later rounds, though, as some of | |
244 | the initial allocations may be for long term uses (like bonds). */ | |
245 | node->id = next_id++; | |
246 | if (OVS_UNLIKELY(!node->id)) { | |
247 | next_id = RECIRC_POOL_STATIC_IDS + 1; | |
248 | node->id = next_id++; | |
249 | } | |
250 | /* Find if the id is free. */ | |
251 | if (OVS_LIKELY(!recirc_find__(node->id))) { | |
252 | break; | |
253 | } | |
254 | } | |
255 | cmap_insert(&id_map, &node->id_node, node->id); | |
256 | cmap_insert(&metadata_map, &node->metadata_node, node->hash); | |
257 | ovs_mutex_unlock(&mutex); | |
258 | return node; | |
259 | } | |
260 | ||
261 | /* Look up an existing ID for the given flow's metadata and optional actions. | |
262 | */ | |
263 | uint32_t | |
1d361a81 | 264 | recirc_find_id(const struct frozen_state *target) |
e672ff9b | 265 | { |
1d361a81 | 266 | uint32_t hash = frozen_state_hash(target); |
2082425c | 267 | struct recirc_id_node *node = recirc_find_equal(target, hash); |
e672ff9b JR |
268 | return node ? node->id : 0; |
269 | } | |
270 | ||
271 | /* Allocate a unique recirculation id for the given set of flow metadata and | |
272 | optional actions. */ | |
273 | uint32_t | |
1d361a81 | 274 | recirc_alloc_id_ctx(const struct frozen_state *state) |
f5374617 | 275 | { |
1d361a81 | 276 | uint32_t hash = frozen_state_hash(state); |
2082425c | 277 | struct recirc_id_node *node = recirc_ref_equal(state, hash); |
e672ff9b | 278 | if (!node) { |
2082425c | 279 | node = recirc_alloc_id__(state, hash); |
e672ff9b | 280 | } |
e672ff9b | 281 | return node->id; |
f5374617 AZ |
282 | } |
283 | ||
e672ff9b | 284 | /* Allocate a unique recirculation id. */ |
f5374617 | 285 | uint32_t |
e672ff9b | 286 | recirc_alloc_id(struct ofproto_dpif *ofproto) |
f5374617 | 287 | { |
1d361a81 | 288 | struct frozen_state state = { |
2082425c | 289 | .table_id = TBL_INTERNAL, |
07a3cd5c | 290 | .ofproto_uuid = ofproto->uuid, |
6cb5e507 JP |
291 | .metadata = { |
292 | .tunnel = { | |
293 | .ip_dst = htonl(0), | |
294 | .ipv6_dst = in6addr_any, | |
295 | }, | |
296 | .in_port = OFPP_NONE }, | |
2082425c | 297 | }; |
00135b86 ZB |
298 | /* In order to make sparse happy, xport_uuid needs to be set separately. */ |
299 | state.xport_uuid = UUID_ZERO; | |
1d361a81 | 300 | return recirc_alloc_id__(&state, frozen_state_hash(&state))->id; |
e672ff9b | 301 | } |
f5374617 | 302 | |
85b9cb2e BP |
303 | static void |
304 | recirc_id_node_free(struct recirc_id_node *node) | |
305 | { | |
1d361a81 | 306 | frozen_state_free(CONST_CAST(struct frozen_state *, &node->state)); |
85b9cb2e BP |
307 | free(node); |
308 | } | |
309 | ||
e672ff9b JR |
310 | void |
311 | recirc_id_node_unref(const struct recirc_id_node *node_) | |
312 | OVS_EXCLUDED(mutex) | |
313 | { | |
314 | struct recirc_id_node *node = CONST_CAST(struct recirc_id_node *, node_); | |
315 | ||
316 | if (node && ovs_refcount_unref(&node->refcount) == 1) { | |
317 | ovs_mutex_lock(&mutex); | |
318 | /* Prevent re-use of this node by removing the node from 'metadata_map' | |
319 | */ | |
320 | cmap_remove(&metadata_map, &node->metadata_node, node->hash); | |
321 | /* We keep the node in the 'id_map' so that it can be found as long | |
322 | * as it lingers, and add it to the 'expiring' list. */ | |
417e7e66 | 323 | ovs_list_insert(&expiring, &node->exp_node); |
e672ff9b | 324 | ovs_mutex_unlock(&mutex); |
27c24749 | 325 | } |
e672ff9b | 326 | } |
27c24749 | 327 | |
e672ff9b JR |
328 | void |
329 | recirc_free_id(uint32_t id) | |
330 | { | |
331 | const struct recirc_id_node *node; | |
332 | ||
333 | node = recirc_id_node_find(id); | |
334 | if (node) { | |
335 | recirc_id_node_unref(node); | |
336 | } else { | |
337 | VLOG_ERR("Freeing nonexistent recirculation ID: %"PRIu32, id); | |
338 | } | |
f5374617 AZ |
339 | } |
340 | ||
e672ff9b JR |
341 | /* Called when 'ofproto' is destructed. Checks for and clears any |
342 | * recirc_id leak. | |
343 | * No other thread may have access to the 'ofproto' being destructed. | |
344 | * All related datapath flows must be deleted before calling this. */ | |
f5374617 | 345 | void |
e672ff9b | 346 | recirc_free_ofproto(struct ofproto_dpif *ofproto, const char *ofproto_name) |
f5374617 | 347 | { |
e672ff9b JR |
348 | struct recirc_id_node *n; |
349 | ||
350 | CMAP_FOR_EACH (n, metadata_node, &metadata_map) { | |
07a3cd5c | 351 | if (uuid_equals(&n->state.ofproto_uuid, &ofproto->uuid)) { |
e672ff9b JR |
352 | VLOG_ERR("recirc_id %"PRIu32 |
353 | " left allocated when ofproto (%s)" | |
354 | " is destructed", n->id, ofproto_name); | |
355 | } | |
356 | } | |
f5374617 | 357 | } |