]>
Commit | Line | Data |
---|---|---|
e6445719 PS |
1 | /* |
2 | * Copyright (c) 2007-2013 Nicira, Inc. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of version 2 of the GNU General Public | |
6 | * License as published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, but | |
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * General Public License for more details. | |
12 | * | |
13 | * You should have received a copy of the GNU General Public License | |
14 | * along with this program; if not, write to the Free Software | |
15 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | |
16 | * 02110-1301, USA | |
17 | */ | |
18 | ||
19 | #include "flow.h" | |
20 | #include "datapath.h" | |
21 | #include <linux/uaccess.h> | |
22 | #include <linux/netdevice.h> | |
23 | #include <linux/etherdevice.h> | |
24 | #include <linux/if_ether.h> | |
25 | #include <linux/if_vlan.h> | |
26 | #include <net/llc_pdu.h> | |
27 | #include <linux/kernel.h> | |
28 | #include <linux/jhash.h> | |
29 | #include <linux/jiffies.h> | |
30 | #include <linux/llc.h> | |
31 | #include <linux/module.h> | |
32 | #include <linux/in.h> | |
33 | #include <linux/rcupdate.h> | |
34 | #include <linux/if_arp.h> | |
35 | #include <linux/ip.h> | |
36 | #include <linux/ipv6.h> | |
37 | #include <linux/sctp.h> | |
38 | #include <linux/tcp.h> | |
39 | #include <linux/udp.h> | |
40 | #include <linux/icmp.h> | |
41 | #include <linux/icmpv6.h> | |
42 | #include <linux/rculist.h> | |
43 | #include <net/ip.h> | |
44 | #include <net/ipv6.h> | |
45 | #include <net/ndisc.h> | |
46 | ||
47 | static struct kmem_cache *flow_cache; | |
48 | ||
49 | static u16 range_n_bytes(const struct sw_flow_key_range *range) | |
50 | { | |
51 | return range->end - range->start; | |
52 | } | |
53 | ||
54 | void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, | |
55 | const struct sw_flow_mask *mask) | |
56 | { | |
57 | const long *m = (long *)((u8 *)&mask->key + mask->range.start); | |
58 | const long *s = (long *)((u8 *)src + mask->range.start); | |
59 | long *d = (long *)((u8 *)dst + mask->range.start); | |
60 | int i; | |
61 | ||
62 | /* The memory outside of the 'mask->range' are not set since | |
63 | * further operations on 'dst' only uses contents within | |
64 | * 'mask->range'. | |
65 | */ | |
66 | for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long)) | |
67 | *d++ = *s++ & *m++; | |
68 | } | |
69 | ||
70 | struct sw_flow *ovs_flow_alloc(void) | |
71 | { | |
72 | struct sw_flow *flow; | |
73 | ||
74 | flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); | |
75 | if (!flow) | |
76 | return ERR_PTR(-ENOMEM); | |
77 | ||
78 | spin_lock_init(&flow->lock); | |
79 | flow->sf_acts = NULL; | |
80 | flow->mask = NULL; | |
81 | ||
82 | return flow; | |
83 | } | |
84 | ||
85 | static struct flex_array *alloc_buckets(unsigned int n_buckets) | |
86 | { | |
87 | struct flex_array *buckets; | |
88 | int i, err; | |
89 | ||
90 | buckets = flex_array_alloc(sizeof(struct hlist_head), | |
91 | n_buckets, GFP_KERNEL); | |
92 | if (!buckets) | |
93 | return NULL; | |
94 | ||
95 | err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL); | |
96 | if (err) { | |
97 | flex_array_free(buckets); | |
98 | return NULL; | |
99 | } | |
100 | ||
101 | for (i = 0; i < n_buckets; i++) | |
102 | INIT_HLIST_HEAD((struct hlist_head *) | |
103 | flex_array_get(buckets, i)); | |
104 | ||
105 | return buckets; | |
106 | } | |
107 | ||
108 | static void flow_free(struct sw_flow *flow) | |
109 | { | |
110 | kfree((struct sf_flow_acts __force *)flow->sf_acts); | |
111 | kmem_cache_free(flow_cache, flow); | |
112 | } | |
113 | ||
114 | static void rcu_free_flow_callback(struct rcu_head *rcu) | |
115 | { | |
116 | struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); | |
117 | ||
118 | flow_free(flow); | |
119 | } | |
120 | ||
121 | void ovs_flow_free(struct sw_flow *flow, bool deferred) | |
122 | { | |
123 | if (!flow) | |
124 | return; | |
125 | ||
126 | ovs_sw_flow_mask_del_ref(flow->mask, deferred); | |
127 | ||
128 | if (deferred) | |
129 | call_rcu(&flow->rcu, rcu_free_flow_callback); | |
130 | else | |
131 | flow_free(flow); | |
132 | } | |
133 | ||
134 | static void free_buckets(struct flex_array *buckets) | |
135 | { | |
136 | flex_array_free(buckets); | |
137 | } | |
138 | ||
139 | static void __flow_tbl_destroy(struct flow_table *table) | |
140 | { | |
141 | int i; | |
142 | ||
143 | if (table->keep_flows) | |
144 | goto skip_flows; | |
145 | ||
146 | for (i = 0; i < table->n_buckets; i++) { | |
147 | struct sw_flow *flow; | |
148 | struct hlist_head *head = flex_array_get(table->buckets, i); | |
149 | struct hlist_node *n; | |
150 | int ver = table->node_ver; | |
151 | ||
152 | hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { | |
153 | hlist_del(&flow->hash_node[ver]); | |
154 | ovs_flow_free(flow, false); | |
155 | } | |
156 | } | |
157 | ||
158 | BUG_ON(!list_empty(table->mask_list)); | |
159 | kfree(table->mask_list); | |
160 | ||
161 | skip_flows: | |
162 | free_buckets(table->buckets); | |
163 | kfree(table); | |
164 | } | |
165 | ||
166 | static struct flow_table *__flow_tbl_alloc(int new_size) | |
167 | { | |
168 | struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL); | |
169 | ||
170 | if (!table) | |
171 | return NULL; | |
172 | ||
173 | table->buckets = alloc_buckets(new_size); | |
174 | ||
175 | if (!table->buckets) { | |
176 | kfree(table); | |
177 | return NULL; | |
178 | } | |
179 | table->n_buckets = new_size; | |
180 | table->count = 0; | |
181 | table->node_ver = 0; | |
182 | table->keep_flows = false; | |
183 | get_random_bytes(&table->hash_seed, sizeof(u32)); | |
184 | table->mask_list = NULL; | |
185 | ||
186 | return table; | |
187 | } | |
188 | ||
189 | struct flow_table *ovs_flow_tbl_alloc(int new_size) | |
190 | { | |
191 | struct flow_table *table = __flow_tbl_alloc(new_size); | |
192 | ||
193 | if (!table) | |
194 | return NULL; | |
195 | ||
196 | table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL); | |
197 | if (!table->mask_list) { | |
198 | table->keep_flows = true; | |
199 | __flow_tbl_destroy(table); | |
200 | return NULL; | |
201 | } | |
202 | INIT_LIST_HEAD(table->mask_list); | |
203 | ||
204 | return table; | |
205 | } | |
206 | ||
207 | static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) | |
208 | { | |
209 | struct flow_table *table = container_of(rcu, struct flow_table, rcu); | |
210 | ||
211 | __flow_tbl_destroy(table); | |
212 | } | |
213 | ||
214 | void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred) | |
215 | { | |
216 | if (!table) | |
217 | return; | |
218 | ||
219 | if (deferred) | |
220 | call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb); | |
221 | else | |
222 | __flow_tbl_destroy(table); | |
223 | } | |
224 | ||
225 | struct sw_flow *ovs_flow_tbl_dump_next(struct flow_table *table, | |
226 | u32 *bucket, u32 *last) | |
227 | { | |
228 | struct sw_flow *flow; | |
229 | struct hlist_head *head; | |
230 | int ver; | |
231 | int i; | |
232 | ||
233 | ver = table->node_ver; | |
234 | while (*bucket < table->n_buckets) { | |
235 | i = 0; | |
236 | head = flex_array_get(table->buckets, *bucket); | |
237 | hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { | |
238 | if (i < *last) { | |
239 | i++; | |
240 | continue; | |
241 | } | |
242 | *last = i + 1; | |
243 | return flow; | |
244 | } | |
245 | (*bucket)++; | |
246 | *last = 0; | |
247 | } | |
248 | ||
249 | return NULL; | |
250 | } | |
251 | ||
252 | static struct hlist_head *find_bucket(struct flow_table *table, u32 hash) | |
253 | { | |
254 | hash = jhash_1word(hash, table->hash_seed); | |
255 | return flex_array_get(table->buckets, | |
256 | (hash & (table->n_buckets - 1))); | |
257 | } | |
258 | ||
259 | static void __tbl_insert(struct flow_table *table, struct sw_flow *flow) | |
260 | { | |
261 | struct hlist_head *head; | |
262 | ||
263 | head = find_bucket(table, flow->hash); | |
264 | hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); | |
265 | ||
266 | table->count++; | |
267 | } | |
268 | ||
269 | static void flow_table_copy_flows(struct flow_table *old, | |
270 | struct flow_table *new) | |
271 | { | |
272 | int old_ver; | |
273 | int i; | |
274 | ||
275 | old_ver = old->node_ver; | |
276 | new->node_ver = !old_ver; | |
277 | ||
278 | /* Insert in new table. */ | |
279 | for (i = 0; i < old->n_buckets; i++) { | |
280 | struct sw_flow *flow; | |
281 | struct hlist_head *head; | |
282 | ||
283 | head = flex_array_get(old->buckets, i); | |
284 | ||
285 | hlist_for_each_entry(flow, head, hash_node[old_ver]) | |
286 | __tbl_insert(new, flow); | |
287 | } | |
288 | ||
289 | new->mask_list = old->mask_list; | |
290 | old->keep_flows = true; | |
291 | } | |
292 | ||
293 | static struct flow_table *__flow_tbl_rehash(struct flow_table *table, | |
294 | int n_buckets) | |
295 | { | |
296 | struct flow_table *new_table; | |
297 | ||
298 | new_table = __flow_tbl_alloc(n_buckets); | |
299 | if (!new_table) | |
300 | return ERR_PTR(-ENOMEM); | |
301 | ||
302 | flow_table_copy_flows(table, new_table); | |
303 | ||
304 | return new_table; | |
305 | } | |
306 | ||
307 | struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table) | |
308 | { | |
309 | return __flow_tbl_rehash(table, table->n_buckets); | |
310 | } | |
311 | ||
312 | struct flow_table *ovs_flow_tbl_expand(struct flow_table *table) | |
313 | { | |
314 | return __flow_tbl_rehash(table, table->n_buckets * 2); | |
315 | } | |
316 | ||
317 | static u32 flow_hash(const struct sw_flow_key *key, int key_start, | |
318 | int key_end) | |
319 | { | |
320 | u32 *hash_key = (u32 *)((u8 *)key + key_start); | |
321 | int hash_u32s = (key_end - key_start) >> 2; | |
322 | ||
323 | /* Make sure number of hash bytes are multiple of u32. */ | |
324 | BUILD_BUG_ON(sizeof(long) % sizeof(u32)); | |
325 | ||
326 | return jhash2(hash_key, hash_u32s, 0); | |
327 | } | |
328 | ||
329 | static int flow_key_start(const struct sw_flow_key *key) | |
330 | { | |
331 | if (key->tun_key.ipv4_dst) | |
332 | return 0; | |
333 | else | |
334 | return rounddown(offsetof(struct sw_flow_key, phy), | |
335 | sizeof(long)); | |
336 | } | |
337 | ||
338 | static bool cmp_key(const struct sw_flow_key *key1, | |
339 | const struct sw_flow_key *key2, | |
340 | int key_start, int key_end) | |
341 | { | |
342 | const long *cp1 = (long *)((u8 *)key1 + key_start); | |
343 | const long *cp2 = (long *)((u8 *)key2 + key_start); | |
344 | long diffs = 0; | |
345 | int i; | |
346 | ||
347 | for (i = key_start; i < key_end; i += sizeof(long)) | |
348 | diffs |= *cp1++ ^ *cp2++; | |
349 | ||
350 | return diffs == 0; | |
351 | } | |
352 | ||
353 | static bool flow_cmp_masked_key(const struct sw_flow *flow, | |
354 | const struct sw_flow_key *key, | |
355 | int key_start, int key_end) | |
356 | { | |
357 | return cmp_key(&flow->key, key, key_start, key_end); | |
358 | } | |
359 | ||
360 | bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, | |
361 | struct sw_flow_match *match) | |
362 | { | |
363 | struct sw_flow_key *key = match->key; | |
364 | int key_start = flow_key_start(key); | |
365 | int key_end = match->range.end; | |
366 | ||
367 | return cmp_key(&flow->unmasked_key, key, key_start, key_end); | |
368 | } | |
369 | ||
370 | static struct sw_flow *masked_flow_lookup(struct flow_table *table, | |
371 | const struct sw_flow_key *unmasked, | |
372 | struct sw_flow_mask *mask) | |
373 | { | |
374 | struct sw_flow *flow; | |
375 | struct hlist_head *head; | |
376 | int key_start = mask->range.start; | |
377 | int key_end = mask->range.end; | |
378 | u32 hash; | |
379 | struct sw_flow_key masked_key; | |
380 | ||
381 | ovs_flow_mask_key(&masked_key, unmasked, mask); | |
382 | hash = flow_hash(&masked_key, key_start, key_end); | |
383 | head = find_bucket(table, hash); | |
384 | hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { | |
385 | if (flow->mask == mask && | |
386 | flow_cmp_masked_key(flow, &masked_key, | |
387 | key_start, key_end)) | |
388 | return flow; | |
389 | } | |
390 | return NULL; | |
391 | } | |
392 | ||
393 | struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, | |
394 | const struct sw_flow_key *key) | |
395 | { | |
396 | struct sw_flow *flow = NULL; | |
397 | struct sw_flow_mask *mask; | |
398 | ||
399 | list_for_each_entry_rcu(mask, tbl->mask_list, list) { | |
400 | flow = masked_flow_lookup(tbl, key, mask); | |
401 | if (flow) /* Found */ | |
402 | break; | |
403 | } | |
404 | ||
405 | return flow; | |
406 | } | |
407 | ||
408 | void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) | |
409 | { | |
410 | flow->hash = flow_hash(&flow->key, flow->mask->range.start, | |
411 | flow->mask->range.end); | |
412 | __tbl_insert(table, flow); | |
413 | } | |
414 | ||
415 | void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) | |
416 | { | |
417 | BUG_ON(table->count == 0); | |
418 | hlist_del_rcu(&flow->hash_node[table->node_ver]); | |
419 | table->count--; | |
420 | } | |
421 | ||
422 | struct sw_flow_mask *ovs_sw_flow_mask_alloc(void) | |
423 | { | |
424 | struct sw_flow_mask *mask; | |
425 | ||
426 | mask = kmalloc(sizeof(*mask), GFP_KERNEL); | |
427 | if (mask) | |
428 | mask->ref_count = 0; | |
429 | ||
430 | return mask; | |
431 | } | |
432 | ||
433 | void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *mask) | |
434 | { | |
435 | mask->ref_count++; | |
436 | } | |
437 | ||
438 | static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu) | |
439 | { | |
440 | struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu); | |
441 | ||
442 | kfree(mask); | |
443 | } | |
444 | ||
445 | void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) | |
446 | { | |
447 | if (!mask) | |
448 | return; | |
449 | ||
450 | BUG_ON(!mask->ref_count); | |
451 | mask->ref_count--; | |
452 | ||
453 | if (!mask->ref_count) { | |
454 | list_del_rcu(&mask->list); | |
455 | if (deferred) | |
456 | call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb); | |
457 | else | |
458 | kfree(mask); | |
459 | } | |
460 | } | |
461 | ||
462 | static bool mask_equal(const struct sw_flow_mask *a, | |
463 | const struct sw_flow_mask *b) | |
464 | { | |
465 | u8 *a_ = (u8 *)&a->key + a->range.start; | |
466 | u8 *b_ = (u8 *)&b->key + b->range.start; | |
467 | ||
468 | return (a->range.end == b->range.end) | |
469 | && (a->range.start == b->range.start) | |
470 | && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0); | |
471 | } | |
472 | ||
473 | struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl, | |
474 | const struct sw_flow_mask *mask) | |
475 | { | |
476 | struct list_head *ml; | |
477 | ||
478 | list_for_each(ml, tbl->mask_list) { | |
479 | struct sw_flow_mask *m; | |
480 | m = container_of(ml, struct sw_flow_mask, list); | |
481 | if (mask_equal(mask, m)) | |
482 | return m; | |
483 | } | |
484 | ||
485 | return NULL; | |
486 | } | |
487 | ||
488 | /** | |
489 | * add a new mask into the mask list. | |
490 | * The caller needs to make sure that 'mask' is not the same | |
491 | * as any masks that are already on the list. | |
492 | */ | |
493 | void ovs_sw_flow_mask_insert(struct flow_table *tbl, struct sw_flow_mask *mask) | |
494 | { | |
495 | list_add_rcu(&mask->list, tbl->mask_list); | |
496 | } | |
497 | ||
498 | /* Initializes the flow module. | |
499 | * Returns zero if successful or a negative error code. */ | |
500 | int ovs_flow_init(void) | |
501 | { | |
502 | BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); | |
503 | BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); | |
504 | ||
505 | flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, | |
506 | 0, NULL); | |
507 | if (flow_cache == NULL) | |
508 | return -ENOMEM; | |
509 | ||
510 | return 0; | |
511 | } | |
512 | ||
513 | /* Uninitializes the flow module. */ | |
514 | void ovs_flow_exit(void) | |
515 | { | |
516 | kmem_cache_destroy(flow_cache); | |
517 | } |