]>
Commit | Line | Data |
---|---|---|
f5374617 | 1 | /* |
85b9cb2e | 2 | * Copyright (c) 2014, 2015, 2016 Nicira, Inc. |
f5374617 AZ |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | ||
64c96779 | 19 | #include "openvswitch/ofpbuf.h" |
e672ff9b | 20 | #include "ofproto-dpif.h" |
f5374617 | 21 | #include "ofproto-dpif-rid.h" |
e672ff9b JR |
22 | #include "ofproto-provider.h" |
23 | #include "openvswitch/vlog.h" | |
f5374617 | 24 | |
e672ff9b | 25 | VLOG_DEFINE_THIS_MODULE(ofproto_dpif_rid); |
f5374617 | 26 | |
b70e6976 | 27 | static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER; |
f5374617 | 28 | |
b70e6976 BP |
29 | static struct cmap id_map = CMAP_INITIALIZER; |
30 | static struct cmap metadata_map = CMAP_INITIALIZER; | |
e672ff9b | 31 | |
b70e6976 BP |
32 | static struct ovs_list expiring OVS_GUARDED_BY(mutex) |
33 | = OVS_LIST_INITIALIZER(&expiring); | |
34 | static struct ovs_list expired OVS_GUARDED_BY(mutex) | |
35 | = OVS_LIST_INITIALIZER(&expired); | |
e672ff9b | 36 | |
b70e6976 | 37 | static uint32_t next_id OVS_GUARDED_BY(mutex) = 1; /* Possible next free id. */ |
e672ff9b JR |
38 | |
39 | #define RECIRC_POOL_STATIC_IDS 1024 | |
40 | ||
85b9cb2e BP |
41 | static void recirc_id_node_free(struct recirc_id_node *); |
42 | ||
e672ff9b JR |
43 | /* This should be called by the revalidator once at each round (every 500ms or |
44 | * more). */ | |
f5374617 | 45 | void |
e672ff9b JR |
46 | recirc_run(void) |
47 | { | |
48 | static long long int last = 0; | |
49 | long long int now = time_msec(); | |
50 | ||
51 | /* Do maintenance at most 4 times / sec. */ | |
52 | ovs_mutex_lock(&mutex); | |
53 | if (now - last > 250) { | |
5f03c983 | 54 | struct recirc_id_node *node; |
e672ff9b JR |
55 | |
56 | last = now; | |
57 | ||
58 | /* Nodes in 'expiring' and 'expired' lists have the refcount of zero, | |
59 | * which means that while they can still be found (by id), no new | |
60 | * references can be taken on them. We have removed the entry from the | |
61 | * 'metadata_map', at the time when refcount reached zero, causing any | |
62 | * new translations to allocate a new ID. This allows the expiring | |
63 | * entry to be safely deleted while any sudden new use of the similar | |
64 | * recirculation will safely start using a new recirculation ID. When | |
65 | * the refcount gets to zero, the node is also added to the 'expiring' | |
66 | * list. At any time after that the nodes in the 'expiring' list can | |
67 | * be moved to the 'expired' list, from which they are deleted at least | |
68 | * 250ms afterwards. */ | |
69 | ||
70 | /* Delete the expired. These have been lingering for at least 250 ms, | |
71 | * which should be enough for any ongoing recirculations to be | |
72 | * finished. */ | |
5f03c983 | 73 | LIST_FOR_EACH_POP (node, exp_node, &expired) { |
e672ff9b | 74 | cmap_remove(&id_map, &node->id_node, node->id); |
85b9cb2e | 75 | ovsrcu_postpone(recirc_id_node_free, node); |
e672ff9b JR |
76 | } |
77 | ||
417e7e66 | 78 | if (!ovs_list_is_empty(&expiring)) { |
e672ff9b | 79 | /* 'expired' is now empty, move nodes in 'expiring' to it. */ |
417e7e66 | 80 | ovs_list_splice(&expired, ovs_list_front(&expiring), &expiring); |
e672ff9b JR |
81 | } |
82 | } | |
83 | ovs_mutex_unlock(&mutex); | |
84 | } | |
85 | ||
86 | /* We use the id as the hash value, which works due to cmap internal rehashing. | |
87 | * We also only insert nodes with unique IDs, so all possible hash collisions | |
88 | * remain internal to the cmap. */ | |
89 | static struct recirc_id_node * | |
90 | recirc_find__(uint32_t id) | |
91 | OVS_REQUIRES(mutex) | |
92 | { | |
93 | struct cmap_node *node = cmap_find_protected(&id_map, id); | |
94 | ||
95 | return node ? CONTAINER_OF(node, struct recirc_id_node, id_node) : NULL; | |
96 | } | |
97 | ||
98 | /* Lockless RCU protected lookup. If node is needed accross RCU quiescent | |
99 | * state, caller should copy the contents. */ | |
100 | const struct recirc_id_node * | |
101 | recirc_id_node_find(uint32_t id) | |
102 | { | |
103 | const struct cmap_node *node = cmap_find(&id_map, id); | |
104 | ||
105 | return node | |
106 | ? CONTAINER_OF(node, const struct recirc_id_node, id_node) | |
107 | : NULL; | |
108 | } | |
109 | ||
110 | static uint32_t | |
1d361a81 | 111 | frozen_state_hash(const struct frozen_state *state) |
e672ff9b JR |
112 | { |
113 | uint32_t hash; | |
114 | ||
290835f9 | 115 | hash = uuid_hash(&state->ofproto_uuid); |
2082425c | 116 | hash = hash_int(state->table_id, hash); |
ffe4c74f | 117 | if (flow_tnl_dst_is_set(state->metadata.tunnel)) { |
59781952 JR |
118 | /* We may leave remainder bytes unhashed, but that is unlikely as |
119 | * the tunnel is not in the datapath format. */ | |
0a96a21b BP |
120 | hash = hash_bytes64((const uint64_t *) state->metadata.tunnel, |
121 | flow_tnl_size(state->metadata.tunnel), hash); | |
59781952 | 122 | } |
07659514 | 123 | hash = hash_boolean(state->conntracked, hash); |
0a96a21b BP |
124 | hash = hash_bytes64((const uint64_t *) &state->metadata.metadata, |
125 | sizeof state->metadata - sizeof state->metadata.tunnel, | |
e672ff9b | 126 | hash); |
84cf3c1f JR |
127 | if (state->stack && state->stack_size) { |
128 | hash = hash_bytes(state->stack, state->stack_size, hash); | |
e672ff9b | 129 | } |
29bae541 | 130 | hash = hash_int(state->mirrors, hash); |
2082425c | 131 | hash = hash_int(state->action_set_len, hash); |
417509fa BP |
132 | if (state->action_set_len) { |
133 | hash = hash_bytes64(ALIGNED_CAST(const uint64_t *, state->action_set), | |
134 | state->action_set_len, hash); | |
135 | } | |
2082425c | 136 | if (state->ofpacts_len) { |
0a96a21b BP |
137 | hash = hash_bytes64(ALIGNED_CAST(const uint64_t *, state->ofpacts), |
138 | state->ofpacts_len, hash); | |
e672ff9b JR |
139 | } |
140 | return hash; | |
141 | } | |
142 | ||
143 | static bool | |
1d361a81 | 144 | frozen_state_equal(const struct frozen_state *a, const struct frozen_state *b) |
e672ff9b | 145 | { |
2082425c | 146 | return (a->table_id == b->table_id |
290835f9 | 147 | && uuid_equals(&a->ofproto_uuid, &b->ofproto_uuid) |
59781952 JR |
148 | && flow_tnl_equal(a->metadata.tunnel, b->metadata.tunnel) |
149 | && !memcmp(&a->metadata.metadata, &b->metadata.metadata, | |
150 | sizeof a->metadata - sizeof a->metadata.tunnel) | |
84cf3c1f JR |
151 | && a->stack_size == b->stack_size |
152 | && !memcmp(a->stack, b->stack, a->stack_size) | |
29bae541 | 153 | && a->mirrors == b->mirrors |
07659514 | 154 | && a->conntracked == b->conntracked |
2082425c | 155 | && ofpacts_equal(a->ofpacts, a->ofpacts_len, |
417509fa BP |
156 | b->ofpacts, b->ofpacts_len) |
157 | && ofpacts_equal(a->action_set, a->action_set_len, | |
158 | b->action_set, b->action_set_len)); | |
e672ff9b JR |
159 | } |
160 | ||
161 | /* Lockless RCU protected lookup. If node is needed accross RCU quiescent | |
162 | * state, caller should take a reference. */ | |
163 | static struct recirc_id_node * | |
1d361a81 | 164 | recirc_find_equal(const struct frozen_state *target, uint32_t hash) |
e672ff9b JR |
165 | { |
166 | struct recirc_id_node *node; | |
167 | ||
2082425c | 168 | CMAP_FOR_EACH_WITH_HASH (node, metadata_node, hash, &metadata_map) { |
1d361a81 | 169 | if (frozen_state_equal(&node->state, target)) { |
e672ff9b JR |
170 | return node; |
171 | } | |
172 | } | |
173 | return NULL; | |
174 | } | |
175 | ||
176 | static struct recirc_id_node * | |
1d361a81 | 177 | recirc_ref_equal(const struct frozen_state *target, uint32_t hash) |
e672ff9b JR |
178 | { |
179 | struct recirc_id_node *node; | |
180 | ||
181 | do { | |
2082425c | 182 | node = recirc_find_equal(target, hash); |
e672ff9b JR |
183 | |
184 | /* Try again if the node was released before we get the reference. */ | |
185 | } while (node && !ovs_refcount_try_ref_rcu(&node->refcount)); | |
186 | ||
187 | return node; | |
188 | } | |
189 | ||
2082425c | 190 | static void |
1d361a81 | 191 | frozen_state_clone(struct frozen_state *new, const struct frozen_state *old, |
59781952 | 192 | struct flow_tnl *tunnel) |
2082425c BP |
193 | { |
194 | *new = *old; | |
59781952 JR |
195 | flow_tnl_copy__(tunnel, old->metadata.tunnel); |
196 | new->metadata.tunnel = tunnel; | |
197 | ||
84cf3c1f JR |
198 | new->stack = (new->stack_size |
199 | ? xmemdup(new->stack, new->stack_size) | |
5c1b2314 BP |
200 | : NULL); |
201 | new->ofpacts = (new->ofpacts_len | |
202 | ? xmemdup(new->ofpacts, new->ofpacts_len) | |
203 | : NULL); | |
417509fa BP |
204 | new->action_set = (new->action_set_len |
205 | ? xmemdup(new->action_set, new->action_set_len) | |
206 | : NULL); | |
2082425c BP |
207 | } |
208 | ||
85b9cb2e | 209 | static void |
1d361a81 | 210 | frozen_state_free(struct frozen_state *state) |
85b9cb2e | 211 | { |
5c1b2314 | 212 | free(state->stack); |
85b9cb2e | 213 | free(state->ofpacts); |
417509fa | 214 | free(state->action_set); |
85b9cb2e BP |
215 | } |
216 | ||
e672ff9b JR |
217 | /* Allocate a unique recirculation id for the given set of flow metadata. |
218 | * The ID space is 2^^32, so there should never be a situation in which all | |
219 | * the IDs are used up. We loop until we find a free one. | |
220 | * hash is recomputed if it is passed in as 0. */ | |
221 | static struct recirc_id_node * | |
1d361a81 | 222 | recirc_alloc_id__(const struct frozen_state *state, uint32_t hash) |
e672ff9b | 223 | { |
2082425c BP |
224 | ovs_assert(state->action_set_len <= state->ofpacts_len); |
225 | ||
226 | struct recirc_id_node *node = xzalloc(sizeof *node); | |
59781952 | 227 | |
e672ff9b JR |
228 | node->hash = hash; |
229 | ovs_refcount_init(&node->refcount); | |
1d361a81 | 230 | frozen_state_clone(CONST_CAST(struct frozen_state *, &node->state), state, |
59781952 | 231 | &node->state_metadata_tunnel); |
e672ff9b JR |
232 | |
233 | ovs_mutex_lock(&mutex); | |
234 | for (;;) { | |
235 | /* Claim the next ID. The ID space should be sparse enough for the | |
236 | allocation to succeed at the first try. We do skip the first | |
237 | RECIRC_POOL_STATIC_IDS IDs on the later rounds, though, as some of | |
238 | the initial allocations may be for long term uses (like bonds). */ | |
239 | node->id = next_id++; | |
240 | if (OVS_UNLIKELY(!node->id)) { | |
241 | next_id = RECIRC_POOL_STATIC_IDS + 1; | |
242 | node->id = next_id++; | |
243 | } | |
244 | /* Find if the id is free. */ | |
245 | if (OVS_LIKELY(!recirc_find__(node->id))) { | |
246 | break; | |
247 | } | |
248 | } | |
249 | cmap_insert(&id_map, &node->id_node, node->id); | |
250 | cmap_insert(&metadata_map, &node->metadata_node, node->hash); | |
251 | ovs_mutex_unlock(&mutex); | |
252 | return node; | |
253 | } | |
254 | ||
255 | /* Look up an existing ID for the given flow's metadata and optional actions. | |
256 | */ | |
257 | uint32_t | |
1d361a81 | 258 | recirc_find_id(const struct frozen_state *target) |
e672ff9b | 259 | { |
1d361a81 | 260 | uint32_t hash = frozen_state_hash(target); |
2082425c | 261 | struct recirc_id_node *node = recirc_find_equal(target, hash); |
e672ff9b JR |
262 | return node ? node->id : 0; |
263 | } | |
264 | ||
265 | /* Allocate a unique recirculation id for the given set of flow metadata and | |
266 | optional actions. */ | |
267 | uint32_t | |
1d361a81 | 268 | recirc_alloc_id_ctx(const struct frozen_state *state) |
f5374617 | 269 | { |
1d361a81 | 270 | uint32_t hash = frozen_state_hash(state); |
2082425c | 271 | struct recirc_id_node *node = recirc_ref_equal(state, hash); |
e672ff9b | 272 | if (!node) { |
2082425c | 273 | node = recirc_alloc_id__(state, hash); |
e672ff9b | 274 | } |
e672ff9b | 275 | return node->id; |
f5374617 AZ |
276 | } |
277 | ||
e672ff9b | 278 | /* Allocate a unique recirculation id. */ |
f5374617 | 279 | uint32_t |
e672ff9b | 280 | recirc_alloc_id(struct ofproto_dpif *ofproto) |
f5374617 | 281 | { |
59781952 JR |
282 | struct flow_tnl tunnel; |
283 | tunnel.ip_dst = htonl(0); | |
ffe4c74f | 284 | tunnel.ipv6_dst = in6addr_any; |
1d361a81 | 285 | struct frozen_state state = { |
2082425c | 286 | .table_id = TBL_INTERNAL, |
07a3cd5c | 287 | .ofproto_uuid = ofproto->uuid, |
59781952 | 288 | .metadata = { .tunnel = &tunnel, .in_port = OFPP_NONE }, |
2082425c | 289 | }; |
1d361a81 | 290 | return recirc_alloc_id__(&state, frozen_state_hash(&state))->id; |
e672ff9b | 291 | } |
f5374617 | 292 | |
85b9cb2e BP |
293 | static void |
294 | recirc_id_node_free(struct recirc_id_node *node) | |
295 | { | |
1d361a81 | 296 | frozen_state_free(CONST_CAST(struct frozen_state *, &node->state)); |
85b9cb2e BP |
297 | free(node); |
298 | } | |
299 | ||
e672ff9b JR |
300 | void |
301 | recirc_id_node_unref(const struct recirc_id_node *node_) | |
302 | OVS_EXCLUDED(mutex) | |
303 | { | |
304 | struct recirc_id_node *node = CONST_CAST(struct recirc_id_node *, node_); | |
305 | ||
306 | if (node && ovs_refcount_unref(&node->refcount) == 1) { | |
307 | ovs_mutex_lock(&mutex); | |
308 | /* Prevent re-use of this node by removing the node from 'metadata_map' | |
309 | */ | |
310 | cmap_remove(&metadata_map, &node->metadata_node, node->hash); | |
311 | /* We keep the node in the 'id_map' so that it can be found as long | |
312 | * as it lingers, and add it to the 'expiring' list. */ | |
417e7e66 | 313 | ovs_list_insert(&expiring, &node->exp_node); |
e672ff9b | 314 | ovs_mutex_unlock(&mutex); |
27c24749 | 315 | } |
e672ff9b | 316 | } |
27c24749 | 317 | |
e672ff9b JR |
318 | void |
319 | recirc_free_id(uint32_t id) | |
320 | { | |
321 | const struct recirc_id_node *node; | |
322 | ||
323 | node = recirc_id_node_find(id); | |
324 | if (node) { | |
325 | recirc_id_node_unref(node); | |
326 | } else { | |
327 | VLOG_ERR("Freeing nonexistent recirculation ID: %"PRIu32, id); | |
328 | } | |
f5374617 AZ |
329 | } |
330 | ||
e672ff9b JR |
331 | /* Called when 'ofproto' is destructed. Checks for and clears any |
332 | * recirc_id leak. | |
333 | * No other thread may have access to the 'ofproto' being destructed. | |
334 | * All related datapath flows must be deleted before calling this. */ | |
f5374617 | 335 | void |
e672ff9b | 336 | recirc_free_ofproto(struct ofproto_dpif *ofproto, const char *ofproto_name) |
f5374617 | 337 | { |
e672ff9b JR |
338 | struct recirc_id_node *n; |
339 | ||
340 | CMAP_FOR_EACH (n, metadata_node, &metadata_map) { | |
07a3cd5c | 341 | if (uuid_equals(&n->state.ofproto_uuid, &ofproto->uuid)) { |
e672ff9b JR |
342 | VLOG_ERR("recirc_id %"PRIu32 |
343 | " left allocated when ofproto (%s)" | |
344 | " is destructed", n->id, ofproto_name); | |
345 | } | |
346 | } | |
f5374617 | 347 | } |