]>
Commit | Line | Data |
---|---|---|
f5374617 | 1 | /* |
6cb5e507 | 2 | * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc. |
f5374617 AZ |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | ||
64c96779 | 19 | #include "openvswitch/ofpbuf.h" |
e672ff9b | 20 | #include "ofproto-dpif.h" |
f5374617 | 21 | #include "ofproto-dpif-rid.h" |
e672ff9b JR |
22 | #include "ofproto-provider.h" |
23 | #include "openvswitch/vlog.h" | |
f5374617 | 24 | |
e672ff9b | 25 | VLOG_DEFINE_THIS_MODULE(ofproto_dpif_rid); |
f5374617 | 26 | |
b70e6976 | 27 | static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER; |
f5374617 | 28 | |
b70e6976 BP |
29 | static struct cmap id_map = CMAP_INITIALIZER; |
30 | static struct cmap metadata_map = CMAP_INITIALIZER; | |
e672ff9b | 31 | |
b70e6976 BP |
32 | static struct ovs_list expiring OVS_GUARDED_BY(mutex) |
33 | = OVS_LIST_INITIALIZER(&expiring); | |
34 | static struct ovs_list expired OVS_GUARDED_BY(mutex) | |
35 | = OVS_LIST_INITIALIZER(&expired); | |
e672ff9b | 36 | |
b70e6976 | 37 | static uint32_t next_id OVS_GUARDED_BY(mutex) = 1; /* Possible next free id. */ |
e672ff9b JR |
38 | |
39 | #define RECIRC_POOL_STATIC_IDS 1024 | |
40 | ||
85b9cb2e BP |
41 | static void recirc_id_node_free(struct recirc_id_node *); |
42 | ||
e672ff9b JR |
43 | /* This should be called by the revalidator once at each round (every 500ms or |
44 | * more). */ | |
f5374617 | 45 | void |
e672ff9b JR |
46 | recirc_run(void) |
47 | { | |
48 | static long long int last = 0; | |
49 | long long int now = time_msec(); | |
50 | ||
51 | /* Do maintenance at most 4 times / sec. */ | |
52 | ovs_mutex_lock(&mutex); | |
53 | if (now - last > 250) { | |
5f03c983 | 54 | struct recirc_id_node *node; |
e672ff9b JR |
55 | |
56 | last = now; | |
57 | ||
58 | /* Nodes in 'expiring' and 'expired' lists have the refcount of zero, | |
59 | * which means that while they can still be found (by id), no new | |
60 | * references can be taken on them. We have removed the entry from the | |
61 | * 'metadata_map', at the time when refcount reached zero, causing any | |
62 | * new translations to allocate a new ID. This allows the expiring | |
63 | * entry to be safely deleted while any sudden new use of the similar | |
64 | * recirculation will safely start using a new recirculation ID. When | |
65 | * the refcount gets to zero, the node is also added to the 'expiring' | |
66 | * list. At any time after that the nodes in the 'expiring' list can | |
67 | * be moved to the 'expired' list, from which they are deleted at least | |
68 | * 250ms afterwards. */ | |
69 | ||
70 | /* Delete the expired. These have been lingering for at least 250 ms, | |
71 | * which should be enough for any ongoing recirculations to be | |
72 | * finished. */ | |
5f03c983 | 73 | LIST_FOR_EACH_POP (node, exp_node, &expired) { |
e672ff9b | 74 | cmap_remove(&id_map, &node->id_node, node->id); |
85b9cb2e | 75 | ovsrcu_postpone(recirc_id_node_free, node); |
e672ff9b JR |
76 | } |
77 | ||
417e7e66 | 78 | if (!ovs_list_is_empty(&expiring)) { |
e672ff9b | 79 | /* 'expired' is now empty, move nodes in 'expiring' to it. */ |
417e7e66 | 80 | ovs_list_splice(&expired, ovs_list_front(&expiring), &expiring); |
e672ff9b JR |
81 | } |
82 | } | |
83 | ovs_mutex_unlock(&mutex); | |
84 | } | |
85 | ||
86 | /* We use the id as the hash value, which works due to cmap internal rehashing. | |
87 | * We also only insert nodes with unique IDs, so all possible hash collisions | |
88 | * remain internal to the cmap. */ | |
89 | static struct recirc_id_node * | |
90 | recirc_find__(uint32_t id) | |
91 | OVS_REQUIRES(mutex) | |
92 | { | |
93 | struct cmap_node *node = cmap_find_protected(&id_map, id); | |
94 | ||
95 | return node ? CONTAINER_OF(node, struct recirc_id_node, id_node) : NULL; | |
96 | } | |
97 | ||
98 | /* Lockless RCU protected lookup. If node is needed accross RCU quiescent | |
99 | * state, caller should copy the contents. */ | |
100 | const struct recirc_id_node * | |
101 | recirc_id_node_find(uint32_t id) | |
102 | { | |
103 | const struct cmap_node *node = cmap_find(&id_map, id); | |
104 | ||
105 | return node | |
106 | ? CONTAINER_OF(node, const struct recirc_id_node, id_node) | |
107 | : NULL; | |
108 | } | |
109 | ||
e6bc8e74 YHW |
110 | bool |
111 | recirc_id_node_find_and_ref(uint32_t id) | |
112 | { | |
113 | struct recirc_id_node *rid_node = | |
114 | CONST_CAST(struct recirc_id_node *, recirc_id_node_find(id)); | |
115 | ||
116 | if (!rid_node) { | |
117 | return false; | |
118 | } | |
119 | ||
120 | return ovs_refcount_try_ref_rcu(&rid_node->refcount); | |
121 | } | |
122 | ||
e672ff9b | 123 | static uint32_t |
1d361a81 | 124 | frozen_state_hash(const struct frozen_state *state) |
e672ff9b JR |
125 | { |
126 | uint32_t hash; | |
127 | ||
290835f9 | 128 | hash = uuid_hash(&state->ofproto_uuid); |
2082425c | 129 | hash = hash_int(state->table_id, hash); |
8014f465 JP |
130 | hash = hash_bytes64((const uint64_t *) &state->metadata, |
131 | sizeof state->metadata, hash); | |
07659514 | 132 | hash = hash_boolean(state->conntracked, hash); |
aeb6566d | 133 | hash = hash_boolean(state->was_mpls, hash); |
84cf3c1f JR |
134 | if (state->stack && state->stack_size) { |
135 | hash = hash_bytes(state->stack, state->stack_size, hash); | |
e672ff9b | 136 | } |
29bae541 | 137 | hash = hash_int(state->mirrors, hash); |
417509fa BP |
138 | if (state->action_set_len) { |
139 | hash = hash_bytes64(ALIGNED_CAST(const uint64_t *, state->action_set), | |
140 | state->action_set_len, hash); | |
141 | } | |
2082425c | 142 | if (state->ofpacts_len) { |
0a96a21b BP |
143 | hash = hash_bytes64(ALIGNED_CAST(const uint64_t *, state->ofpacts), |
144 | state->ofpacts_len, hash); | |
e672ff9b | 145 | } |
d39ec23d JP |
146 | if (state->userdata && state->userdata_len) { |
147 | hash = hash_bytes(state->userdata, state->userdata_len, hash); | |
148 | } | |
e672ff9b JR |
149 | return hash; |
150 | } | |
151 | ||
152 | static bool | |
1d361a81 | 153 | frozen_state_equal(const struct frozen_state *a, const struct frozen_state *b) |
e672ff9b | 154 | { |
2082425c | 155 | return (a->table_id == b->table_id |
290835f9 | 156 | && uuid_equals(&a->ofproto_uuid, &b->ofproto_uuid) |
8014f465 | 157 | && !memcmp(&a->metadata, &b->metadata, sizeof a->metadata) |
84cf3c1f JR |
158 | && a->stack_size == b->stack_size |
159 | && !memcmp(a->stack, b->stack, a->stack_size) | |
29bae541 | 160 | && a->mirrors == b->mirrors |
07659514 | 161 | && a->conntracked == b->conntracked |
aeb6566d | 162 | && a->was_mpls == b->was_mpls |
2082425c | 163 | && ofpacts_equal(a->ofpacts, a->ofpacts_len, |
417509fa BP |
164 | b->ofpacts, b->ofpacts_len) |
165 | && ofpacts_equal(a->action_set, a->action_set_len, | |
d39ec23d | 166 | b->action_set, b->action_set_len) |
00135b86 ZB |
167 | && !memcmp(a->userdata, b->userdata, a->userdata_len) |
168 | && uuid_equals(&a->xport_uuid, &b->xport_uuid)); | |
e672ff9b JR |
169 | } |
170 | ||
171 | /* Lockless RCU protected lookup. If node is needed accross RCU quiescent | |
172 | * state, caller should take a reference. */ | |
173 | static struct recirc_id_node * | |
1d361a81 | 174 | recirc_find_equal(const struct frozen_state *target, uint32_t hash) |
e672ff9b JR |
175 | { |
176 | struct recirc_id_node *node; | |
177 | ||
2082425c | 178 | CMAP_FOR_EACH_WITH_HASH (node, metadata_node, hash, &metadata_map) { |
1d361a81 | 179 | if (frozen_state_equal(&node->state, target)) { |
e672ff9b JR |
180 | return node; |
181 | } | |
182 | } | |
183 | return NULL; | |
184 | } | |
185 | ||
186 | static struct recirc_id_node * | |
1d361a81 | 187 | recirc_ref_equal(const struct frozen_state *target, uint32_t hash) |
e672ff9b JR |
188 | { |
189 | struct recirc_id_node *node; | |
190 | ||
191 | do { | |
2082425c | 192 | node = recirc_find_equal(target, hash); |
e672ff9b JR |
193 | |
194 | /* Try again if the node was released before we get the reference. */ | |
195 | } while (node && !ovs_refcount_try_ref_rcu(&node->refcount)); | |
196 | ||
197 | return node; | |
198 | } | |
199 | ||
2082425c | 200 | static void |
6cb5e507 | 201 | frozen_state_clone(struct frozen_state *new, const struct frozen_state *old) |
2082425c BP |
202 | { |
203 | *new = *old; | |
84cf3c1f JR |
204 | new->stack = (new->stack_size |
205 | ? xmemdup(new->stack, new->stack_size) | |
5c1b2314 BP |
206 | : NULL); |
207 | new->ofpacts = (new->ofpacts_len | |
208 | ? xmemdup(new->ofpacts, new->ofpacts_len) | |
209 | : NULL); | |
417509fa BP |
210 | new->action_set = (new->action_set_len |
211 | ? xmemdup(new->action_set, new->action_set_len) | |
212 | : NULL); | |
d39ec23d JP |
213 | new->userdata = (new->userdata_len |
214 | ? xmemdup(new->userdata, new->userdata_len) | |
215 | : NULL); | |
2082425c BP |
216 | } |
217 | ||
85b9cb2e | 218 | static void |
1d361a81 | 219 | frozen_state_free(struct frozen_state *state) |
85b9cb2e | 220 | { |
5c1b2314 | 221 | free(state->stack); |
85b9cb2e | 222 | free(state->ofpacts); |
417509fa | 223 | free(state->action_set); |
d39ec23d | 224 | free(state->userdata); |
85b9cb2e BP |
225 | } |
226 | ||
e672ff9b JR |
227 | /* Allocate a unique recirculation id for the given set of flow metadata. |
228 | * The ID space is 2^^32, so there should never be a situation in which all | |
95263037 | 229 | * the IDs are used up. We loop until we find a free one. */ |
e672ff9b | 230 | static struct recirc_id_node * |
1d361a81 | 231 | recirc_alloc_id__(const struct frozen_state *state, uint32_t hash) |
e672ff9b | 232 | { |
2082425c BP |
233 | ovs_assert(state->action_set_len <= state->ofpacts_len); |
234 | ||
235 | struct recirc_id_node *node = xzalloc(sizeof *node); | |
59781952 | 236 | |
e672ff9b JR |
237 | node->hash = hash; |
238 | ovs_refcount_init(&node->refcount); | |
6cb5e507 | 239 | frozen_state_clone(CONST_CAST(struct frozen_state *, &node->state), state); |
e672ff9b JR |
240 | |
241 | ovs_mutex_lock(&mutex); | |
242 | for (;;) { | |
243 | /* Claim the next ID. The ID space should be sparse enough for the | |
244 | allocation to succeed at the first try. We do skip the first | |
245 | RECIRC_POOL_STATIC_IDS IDs on the later rounds, though, as some of | |
246 | the initial allocations may be for long term uses (like bonds). */ | |
247 | node->id = next_id++; | |
248 | if (OVS_UNLIKELY(!node->id)) { | |
249 | next_id = RECIRC_POOL_STATIC_IDS + 1; | |
250 | node->id = next_id++; | |
251 | } | |
252 | /* Find if the id is free. */ | |
253 | if (OVS_LIKELY(!recirc_find__(node->id))) { | |
254 | break; | |
255 | } | |
256 | } | |
257 | cmap_insert(&id_map, &node->id_node, node->id); | |
258 | cmap_insert(&metadata_map, &node->metadata_node, node->hash); | |
259 | ovs_mutex_unlock(&mutex); | |
260 | return node; | |
261 | } | |
262 | ||
263 | /* Look up an existing ID for the given flow's metadata and optional actions. | |
264 | */ | |
265 | uint32_t | |
1d361a81 | 266 | recirc_find_id(const struct frozen_state *target) |
e672ff9b | 267 | { |
1d361a81 | 268 | uint32_t hash = frozen_state_hash(target); |
2082425c | 269 | struct recirc_id_node *node = recirc_find_equal(target, hash); |
e672ff9b JR |
270 | return node ? node->id : 0; |
271 | } | |
272 | ||
273 | /* Allocate a unique recirculation id for the given set of flow metadata and | |
274 | optional actions. */ | |
275 | uint32_t | |
1d361a81 | 276 | recirc_alloc_id_ctx(const struct frozen_state *state) |
f5374617 | 277 | { |
1d361a81 | 278 | uint32_t hash = frozen_state_hash(state); |
2082425c | 279 | struct recirc_id_node *node = recirc_ref_equal(state, hash); |
e672ff9b | 280 | if (!node) { |
2082425c | 281 | node = recirc_alloc_id__(state, hash); |
e672ff9b | 282 | } |
e672ff9b | 283 | return node->id; |
f5374617 AZ |
284 | } |
285 | ||
e672ff9b | 286 | /* Allocate a unique recirculation id. */ |
f5374617 | 287 | uint32_t |
e672ff9b | 288 | recirc_alloc_id(struct ofproto_dpif *ofproto) |
f5374617 | 289 | { |
1d361a81 | 290 | struct frozen_state state = { |
2082425c | 291 | .table_id = TBL_INTERNAL, |
07a3cd5c | 292 | .ofproto_uuid = ofproto->uuid, |
6cb5e507 JP |
293 | .metadata = { |
294 | .tunnel = { | |
295 | .ip_dst = htonl(0), | |
296 | .ipv6_dst = in6addr_any, | |
297 | }, | |
298 | .in_port = OFPP_NONE }, | |
2082425c | 299 | }; |
00135b86 ZB |
300 | /* In order to make sparse happy, xport_uuid needs to be set separately. */ |
301 | state.xport_uuid = UUID_ZERO; | |
1d361a81 | 302 | return recirc_alloc_id__(&state, frozen_state_hash(&state))->id; |
e672ff9b | 303 | } |
f5374617 | 304 | |
85b9cb2e BP |
305 | static void |
306 | recirc_id_node_free(struct recirc_id_node *node) | |
307 | { | |
1d361a81 | 308 | frozen_state_free(CONST_CAST(struct frozen_state *, &node->state)); |
85b9cb2e BP |
309 | free(node); |
310 | } | |
311 | ||
e672ff9b JR |
312 | void |
313 | recirc_id_node_unref(const struct recirc_id_node *node_) | |
314 | OVS_EXCLUDED(mutex) | |
315 | { | |
316 | struct recirc_id_node *node = CONST_CAST(struct recirc_id_node *, node_); | |
317 | ||
318 | if (node && ovs_refcount_unref(&node->refcount) == 1) { | |
319 | ovs_mutex_lock(&mutex); | |
320 | /* Prevent re-use of this node by removing the node from 'metadata_map' | |
321 | */ | |
322 | cmap_remove(&metadata_map, &node->metadata_node, node->hash); | |
323 | /* We keep the node in the 'id_map' so that it can be found as long | |
324 | * as it lingers, and add it to the 'expiring' list. */ | |
417e7e66 | 325 | ovs_list_insert(&expiring, &node->exp_node); |
e672ff9b | 326 | ovs_mutex_unlock(&mutex); |
27c24749 | 327 | } |
e672ff9b | 328 | } |
27c24749 | 329 | |
e672ff9b JR |
330 | void |
331 | recirc_free_id(uint32_t id) | |
332 | { | |
333 | const struct recirc_id_node *node; | |
334 | ||
335 | node = recirc_id_node_find(id); | |
336 | if (node) { | |
337 | recirc_id_node_unref(node); | |
338 | } else { | |
339 | VLOG_ERR("Freeing nonexistent recirculation ID: %"PRIu32, id); | |
340 | } | |
f5374617 AZ |
341 | } |
342 | ||
e672ff9b JR |
343 | /* Called when 'ofproto' is destructed. Checks for and clears any |
344 | * recirc_id leak. | |
345 | * No other thread may have access to the 'ofproto' being destructed. | |
346 | * All related datapath flows must be deleted before calling this. */ | |
f5374617 | 347 | void |
e672ff9b | 348 | recirc_free_ofproto(struct ofproto_dpif *ofproto, const char *ofproto_name) |
f5374617 | 349 | { |
e672ff9b JR |
350 | struct recirc_id_node *n; |
351 | ||
352 | CMAP_FOR_EACH (n, metadata_node, &metadata_map) { | |
07a3cd5c | 353 | if (uuid_equals(&n->state.ofproto_uuid, &ofproto->uuid)) { |
e672ff9b JR |
354 | VLOG_ERR("recirc_id %"PRIu32 |
355 | " left allocated when ofproto (%s)" | |
356 | " is destructed", n->id, ofproto_name); | |
357 | } | |
358 | } | |
f5374617 | 359 | } |