]> git.proxmox.com Git - mirror_ovs.git/blob - ofproto/ofproto-dpif-rid.c
Add support for connection tracking.
[mirror_ovs.git] / ofproto / ofproto-dpif-rid.c
1 /*
2 * Copyright (c) 2014, 2015 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include "ofpbuf.h"
20 #include "ofproto-dpif.h"
21 #include "ofproto-dpif-rid.h"
22 #include "ofproto-provider.h"
23 #include "openvswitch/vlog.h"
24
25 VLOG_DEFINE_THIS_MODULE(ofproto_dpif_rid);
26
27 static struct ovs_mutex mutex;
28
29 static struct cmap id_map;
30 static struct cmap metadata_map;
31
32 static struct ovs_list expiring OVS_GUARDED_BY(mutex);
33 static struct ovs_list expired OVS_GUARDED_BY(mutex);
34
35 static uint32_t next_id OVS_GUARDED_BY(mutex); /* Possible next free id. */
36
37 #define RECIRC_POOL_STATIC_IDS 1024
38
39 void
40 recirc_init(void)
41 {
42 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
43
44 if (ovsthread_once_start(&once)) {
45 ovs_mutex_init(&mutex);
46 ovs_mutex_lock(&mutex);
47 next_id = 1; /* 0 is not a valid ID. */
48 cmap_init(&id_map);
49 cmap_init(&metadata_map);
50 list_init(&expiring);
51 list_init(&expired);
52 ovs_mutex_unlock(&mutex);
53
54 ovsthread_once_done(&once);
55 }
56
57 }
58
59 /* This should be called by the revalidator once at each round (every 500ms or
60 * more). */
61 void
62 recirc_run(void)
63 {
64 static long long int last = 0;
65 long long int now = time_msec();
66
67 /* Do maintenance at most 4 times / sec. */
68 ovs_mutex_lock(&mutex);
69 if (now - last > 250) {
70 struct recirc_id_node *node;
71
72 last = now;
73
74 /* Nodes in 'expiring' and 'expired' lists have the refcount of zero,
75 * which means that while they can still be found (by id), no new
76 * references can be taken on them. We have removed the entry from the
77 * 'metadata_map', at the time when refcount reached zero, causing any
78 * new translations to allocate a new ID. This allows the expiring
79 * entry to be safely deleted while any sudden new use of the similar
80 * recirculation will safely start using a new recirculation ID. When
81 * the refcount gets to zero, the node is also added to the 'expiring'
82 * list. At any time after that the nodes in the 'expiring' list can
83 * be moved to the 'expired' list, from which they are deleted at least
84 * 250ms afterwards. */
85
86 /* Delete the expired. These have been lingering for at least 250 ms,
87 * which should be enough for any ongoing recirculations to be
88 * finished. */
89 LIST_FOR_EACH_POP (node, exp_node, &expired) {
90 cmap_remove(&id_map, &node->id_node, node->id);
91 ovsrcu_postpone(free, node);
92 }
93
94 if (!list_is_empty(&expiring)) {
95 /* 'expired' is now empty, move nodes in 'expiring' to it. */
96 list_splice(&expired, list_front(&expiring), &expiring);
97 }
98 }
99 ovs_mutex_unlock(&mutex);
100 }
101
102 /* We use the id as the hash value, which works due to cmap internal rehashing.
103 * We also only insert nodes with unique IDs, so all possible hash collisions
104 * remain internal to the cmap. */
105 static struct recirc_id_node *
106 recirc_find__(uint32_t id)
107 OVS_REQUIRES(mutex)
108 {
109 struct cmap_node *node = cmap_find_protected(&id_map, id);
110
111 return node ? CONTAINER_OF(node, struct recirc_id_node, id_node) : NULL;
112 }
113
114 /* Lockless RCU protected lookup. If node is needed accross RCU quiescent
115 * state, caller should copy the contents. */
116 const struct recirc_id_node *
117 recirc_id_node_find(uint32_t id)
118 {
119 const struct cmap_node *node = cmap_find(&id_map, id);
120
121 return node
122 ? CONTAINER_OF(node, const struct recirc_id_node, id_node)
123 : NULL;
124 }
125
126 static uint32_t
127 recirc_metadata_hash(const struct recirc_state *state)
128 {
129 uint32_t hash;
130
131 hash = hash_pointer(state->ofproto, 0);
132 hash = hash_int(state->table_id, hash);
133 if (state->metadata.tunnel->ip_dst) {
134 /* We may leave remainder bytes unhashed, but that is unlikely as
135 * the tunnel is not in the datapath format. */
136 hash = hash_words64((const uint64_t *) state->metadata.tunnel,
137 flow_tnl_size(state->metadata.tunnel)
138 / sizeof(uint64_t), hash);
139 }
140 hash = hash_boolean(state->conntracked, hash);
141 hash = hash_words64((const uint64_t *) &state->metadata.metadata,
142 (sizeof state->metadata - sizeof state->metadata.tunnel)
143 / sizeof(uint64_t),
144 hash);
145 if (state->stack && state->stack->size != 0) {
146 hash = hash_words64((const uint64_t *) state->stack->data,
147 state->stack->size / sizeof(uint64_t), hash);
148 }
149 hash = hash_int(state->mirrors, hash);
150 hash = hash_int(state->action_set_len, hash);
151 if (state->ofpacts_len) {
152 hash = hash_words64(ALIGNED_CAST(const uint64_t *, state->ofpacts),
153 state->ofpacts_len / sizeof(uint64_t),
154 hash);
155 }
156 return hash;
157 }
158
159 static bool
160 recirc_metadata_equal(const struct recirc_state *a,
161 const struct recirc_state *b)
162 {
163 return (a->table_id == b->table_id
164 && a->ofproto == b->ofproto
165 && flow_tnl_equal(a->metadata.tunnel, b->metadata.tunnel)
166 && !memcmp(&a->metadata.metadata, &b->metadata.metadata,
167 sizeof a->metadata - sizeof a->metadata.tunnel)
168 && (((!a->stack || !a->stack->size) &&
169 (!b->stack || !b->stack->size))
170 || (a->stack && b->stack && ofpbuf_equal(a->stack, b->stack)))
171 && a->mirrors == b->mirrors
172 && a->conntracked == b->conntracked
173 && a->action_set_len == b->action_set_len
174 && ofpacts_equal(a->ofpacts, a->ofpacts_len,
175 b->ofpacts, b->ofpacts_len));
176 }
177
178 /* Lockless RCU protected lookup. If node is needed accross RCU quiescent
179 * state, caller should take a reference. */
180 static struct recirc_id_node *
181 recirc_find_equal(const struct recirc_state *target, uint32_t hash)
182 {
183 struct recirc_id_node *node;
184
185 CMAP_FOR_EACH_WITH_HASH (node, metadata_node, hash, &metadata_map) {
186 if (recirc_metadata_equal(&node->state, target)) {
187 return node;
188 }
189 }
190 return NULL;
191 }
192
193 static struct recirc_id_node *
194 recirc_ref_equal(const struct recirc_state *target, uint32_t hash)
195 {
196 struct recirc_id_node *node;
197
198 do {
199 node = recirc_find_equal(target, hash);
200
201 /* Try again if the node was released before we get the reference. */
202 } while (node && !ovs_refcount_try_ref_rcu(&node->refcount));
203
204 return node;
205 }
206
207 static void
208 recirc_state_clone(struct recirc_state *new, const struct recirc_state *old,
209 struct flow_tnl *tunnel)
210 {
211 *new = *old;
212 flow_tnl_copy__(tunnel, old->metadata.tunnel);
213 new->metadata.tunnel = tunnel;
214
215 if (new->stack) {
216 new->stack = new->stack->size ? ofpbuf_clone(new->stack) : NULL;
217 }
218 if (new->ofpacts) {
219 new->ofpacts = (new->ofpacts_len
220 ? xmemdup(new->ofpacts, new->ofpacts_len)
221 : NULL);
222 }
223 }
224
225 /* Allocate a unique recirculation id for the given set of flow metadata.
226 * The ID space is 2^^32, so there should never be a situation in which all
227 * the IDs are used up. We loop until we find a free one.
228 * hash is recomputed if it is passed in as 0. */
229 static struct recirc_id_node *
230 recirc_alloc_id__(const struct recirc_state *state, uint32_t hash)
231 {
232 ovs_assert(state->action_set_len <= state->ofpacts_len);
233
234 struct recirc_id_node *node = xzalloc(sizeof *node);
235
236 node->hash = hash;
237 ovs_refcount_init(&node->refcount);
238 recirc_state_clone(CONST_CAST(struct recirc_state *, &node->state), state,
239 &node->state_metadata_tunnel);
240
241 ovs_mutex_lock(&mutex);
242 for (;;) {
243 /* Claim the next ID. The ID space should be sparse enough for the
244 allocation to succeed at the first try. We do skip the first
245 RECIRC_POOL_STATIC_IDS IDs on the later rounds, though, as some of
246 the initial allocations may be for long term uses (like bonds). */
247 node->id = next_id++;
248 if (OVS_UNLIKELY(!node->id)) {
249 next_id = RECIRC_POOL_STATIC_IDS + 1;
250 node->id = next_id++;
251 }
252 /* Find if the id is free. */
253 if (OVS_LIKELY(!recirc_find__(node->id))) {
254 break;
255 }
256 }
257 cmap_insert(&id_map, &node->id_node, node->id);
258 cmap_insert(&metadata_map, &node->metadata_node, node->hash);
259 ovs_mutex_unlock(&mutex);
260 return node;
261 }
262
263 /* Look up an existing ID for the given flow's metadata and optional actions.
264 */
265 uint32_t
266 recirc_find_id(const struct recirc_state *target)
267 {
268 uint32_t hash = recirc_metadata_hash(target);
269 struct recirc_id_node *node = recirc_find_equal(target, hash);
270 return node ? node->id : 0;
271 }
272
273 /* Allocate a unique recirculation id for the given set of flow metadata and
274 optional actions. */
275 uint32_t
276 recirc_alloc_id_ctx(const struct recirc_state *state)
277 {
278 uint32_t hash = recirc_metadata_hash(state);
279 struct recirc_id_node *node = recirc_ref_equal(state, hash);
280 if (!node) {
281 node = recirc_alloc_id__(state, hash);
282 }
283 return node->id;
284 }
285
286 /* Allocate a unique recirculation id. */
287 uint32_t
288 recirc_alloc_id(struct ofproto_dpif *ofproto)
289 {
290 struct flow_tnl tunnel;
291 tunnel.ip_dst = htonl(0);
292 struct recirc_state state = {
293 .table_id = TBL_INTERNAL,
294 .ofproto = ofproto,
295 .metadata = { .tunnel = &tunnel, .in_port = OFPP_NONE },
296 };
297 return recirc_alloc_id__(&state, recirc_metadata_hash(&state))->id;
298 }
299
300 void
301 recirc_id_node_unref(const struct recirc_id_node *node_)
302 OVS_EXCLUDED(mutex)
303 {
304 struct recirc_id_node *node = CONST_CAST(struct recirc_id_node *, node_);
305
306 if (node && ovs_refcount_unref(&node->refcount) == 1) {
307 ovs_mutex_lock(&mutex);
308 /* Prevent re-use of this node by removing the node from 'metadata_map'
309 */
310 cmap_remove(&metadata_map, &node->metadata_node, node->hash);
311 /* We keep the node in the 'id_map' so that it can be found as long
312 * as it lingers, and add it to the 'expiring' list. */
313 list_insert(&expiring, &node->exp_node);
314 ovs_mutex_unlock(&mutex);
315 }
316 }
317
318 void
319 recirc_free_id(uint32_t id)
320 {
321 const struct recirc_id_node *node;
322
323 node = recirc_id_node_find(id);
324 if (node) {
325 recirc_id_node_unref(node);
326 } else {
327 VLOG_ERR("Freeing nonexistent recirculation ID: %"PRIu32, id);
328 }
329 }
330
331 /* Called when 'ofproto' is destructed. Checks for and clears any
332 * recirc_id leak.
333 * No other thread may have access to the 'ofproto' being destructed.
334 * All related datapath flows must be deleted before calling this. */
335 void
336 recirc_free_ofproto(struct ofproto_dpif *ofproto, const char *ofproto_name)
337 {
338 struct recirc_id_node *n;
339
340 CMAP_FOR_EACH (n, metadata_node, &metadata_map) {
341 if (n->state.ofproto == ofproto) {
342 VLOG_ERR("recirc_id %"PRIu32
343 " left allocated when ofproto (%s)"
344 " is destructed", n->id, ofproto_name);
345 }
346 }
347 }