]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
4aacd02d | 2 | * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. |
064af421 | 3 | * |
a14bc59f BP |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
064af421 | 7 | * |
a14bc59f BP |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | ||
17 | #include <config.h> | |
18 | #include "classifier.h" | |
064af421 BP |
19 | #include <errno.h> |
20 | #include <netinet/in.h> | |
844dff32 | 21 | #include "byte-order.h" |
68d1c8c3 | 22 | #include "dynamic-string.h" |
064af421 BP |
23 | #include "flow.h" |
24 | #include "hash.h" | |
07b37e8f | 25 | #include "odp-util.h" |
d8ae4d67 | 26 | #include "ofp-util.h" |
0b4f2078 | 27 | #include "ovs-thread.h" |
13751fd8 JR |
28 | #include "packets.h" |
29 | #include "vlog.h" | |
30 | ||
31 | VLOG_DEFINE_THIS_MODULE(classifier); | |
064af421 | 32 | |
cabd4c43 JR |
33 | struct trie_node; |
34 | ||
35 | /* Prefix trie for a 'field' */ | |
36 | struct cls_trie { | |
37 | const struct mf_field *field; /* Trie field, or NULL. */ | |
38 | struct trie_node *root; /* NULL if none. */ | |
39 | }; | |
40 | ||
ec988646 JR |
41 | struct cls_subtable_entry { |
42 | struct cls_subtable *subtable; | |
43 | uint32_t *mask_values; | |
44 | tag_type tag; | |
45 | unsigned int max_priority; | |
46 | }; | |
47 | ||
48 | struct cls_subtable_cache { | |
49 | struct cls_subtable_entry *subtables; | |
50 | size_t alloc_size; /* Number of allocated elements. */ | |
51 | size_t size; /* One past last valid array element. */ | |
52 | }; | |
53 | ||
627fb667 JR |
54 | enum { |
55 | CLS_MAX_INDICES = 3 /* Maximum number of lookup indices per subtable. */ | |
56 | }; | |
57 | ||
cabd4c43 JR |
58 | struct cls_classifier { |
59 | int n_rules; /* Total number of rules. */ | |
60 | uint8_t n_flow_segments; | |
61 | uint8_t flow_segments[CLS_MAX_INDICES]; /* Flow segment boundaries to use | |
62 | * for staged lookup. */ | |
63 | struct hmap subtables; /* Contains "struct cls_subtable"s. */ | |
ec988646 | 64 | struct cls_subtable_cache subtables_priority; |
cabd4c43 JR |
65 | struct hmap partitions; /* Contains "struct cls_partition"s. */ |
66 | struct cls_trie tries[CLS_MAX_TRIES]; /* Prefix tries. */ | |
67 | unsigned int n_tries; | |
68 | }; | |
69 | ||
70 | /* A set of rules that all have the same fields wildcarded. */ | |
71 | struct cls_subtable { | |
72 | struct hmap_node hmap_node; /* Within struct cls_classifier 'subtables' | |
73 | * hmap. */ | |
cabd4c43 JR |
74 | struct hmap rules; /* Contains "struct cls_rule"s. */ |
75 | struct minimask mask; /* Wildcards for fields. */ | |
76 | int n_rules; /* Number of rules, including duplicates. */ | |
77 | unsigned int max_priority; /* Max priority of any rule in the subtable. */ | |
78 | unsigned int max_count; /* Count of max_priority rules. */ | |
79 | tag_type tag; /* Tag generated from mask for partitioning. */ | |
80 | uint8_t n_indices; /* How many indices to use. */ | |
81 | uint8_t index_ofs[CLS_MAX_INDICES]; /* u32 flow segment boundaries. */ | |
82 | struct hindex indices[CLS_MAX_INDICES]; /* Staged lookup indices. */ | |
83 | unsigned int trie_plen[CLS_MAX_TRIES]; /* Trie prefix length in 'mask'. */ | |
84 | }; | |
85 | ||
86 | /* Associates a metadata value (that is, a value of the OpenFlow 1.1+ metadata | |
87 | * field) with tags for the "cls_subtable"s that contain rules that match that | |
88 | * metadata value. */ | |
89 | struct cls_partition { | |
90 | struct hmap_node hmap_node; /* In struct cls_classifier's 'partitions' | |
91 | * hmap. */ | |
92 | ovs_be64 metadata; /* metadata value for this partition. */ | |
93 | tag_type tags; /* OR of each flow's cls_subtable tag. */ | |
94 | struct tag_tracker tracker; /* Tracks the bits in 'tags'. */ | |
95 | }; | |
96 | ||
627fb667 JR |
97 | /* Internal representation of a rule in a "struct cls_subtable". */ |
98 | struct cls_match { | |
99 | struct cls_rule *cls_rule; | |
100 | struct hindex_node index_nodes[CLS_MAX_INDICES]; /* Within subtable's | |
101 | * 'indices'. */ | |
102 | struct hmap_node hmap_node; /* Within struct cls_subtable 'rules'. */ | |
103 | unsigned int priority; /* Larger numbers are higher priorities. */ | |
104 | struct cls_partition *partition; | |
105 | struct list list; /* List of identical, lower-priority rules. */ | |
106 | struct minimatch match; /* Matching rule. */ | |
107 | }; | |
cabd4c43 | 108 | |
627fb667 JR |
109 | static struct cls_match * |
110 | cls_match_alloc(struct cls_rule *rule) | |
111 | { | |
112 | struct cls_match *cls_match = xmalloc(sizeof *cls_match); | |
113 | ||
114 | cls_match->cls_rule = rule; | |
115 | minimatch_clone(&cls_match->match, &rule->match); | |
116 | cls_match->priority = rule->priority; | |
117 | rule->cls_match = cls_match; | |
118 | ||
119 | return cls_match; | |
120 | } | |
cabd4c43 | 121 | |
13751fd8 | 122 | struct trie_ctx; |
cabd4c43 | 123 | static struct cls_subtable *find_subtable(const struct cls_classifier *, |
03868246 | 124 | const struct minimask *); |
cabd4c43 | 125 | static struct cls_subtable *insert_subtable(struct cls_classifier *, |
03868246 | 126 | const struct minimask *); |
b5d97350 | 127 | |
cabd4c43 | 128 | static void destroy_subtable(struct cls_classifier *, struct cls_subtable *); |
b5d97350 | 129 | |
cabd4c43 | 130 | static void update_subtables_after_insertion(struct cls_classifier *, |
03868246 JR |
131 | struct cls_subtable *, |
132 | unsigned int new_priority); | |
cabd4c43 | 133 | static void update_subtables_after_removal(struct cls_classifier *, |
03868246 JR |
134 | struct cls_subtable *, |
135 | unsigned int del_priority); | |
4aacd02d | 136 | |
627fb667 JR |
137 | static struct cls_match *find_match_wc(const struct cls_subtable *, |
138 | const struct flow *, struct trie_ctx *, | |
139 | unsigned int n_tries, | |
140 | struct flow_wildcards *); | |
141 | static struct cls_match *find_equal(struct cls_subtable *, | |
142 | const struct miniflow *, uint32_t hash); | |
143 | static struct cls_match *insert_rule(struct cls_classifier *, | |
144 | struct cls_subtable *, struct cls_rule *); | |
b5d97350 | 145 | |
b5d97350 BP |
146 | /* Iterates RULE over HEAD and all of the cls_rules on HEAD->list. */ |
147 | #define FOR_EACH_RULE_IN_LIST(RULE, HEAD) \ | |
148 | for ((RULE) = (HEAD); (RULE) != NULL; (RULE) = next_rule_in_list(RULE)) | |
149 | #define FOR_EACH_RULE_IN_LIST_SAFE(RULE, NEXT, HEAD) \ | |
150 | for ((RULE) = (HEAD); \ | |
151 | (RULE) != NULL && ((NEXT) = next_rule_in_list(RULE), true); \ | |
152 | (RULE) = (NEXT)) | |
153 | ||
627fb667 JR |
154 | static struct cls_match *next_rule_in_list__(struct cls_match *); |
155 | static struct cls_match *next_rule_in_list(struct cls_match *); | |
13751fd8 JR |
156 | |
157 | static unsigned int minimask_get_prefix_len(const struct minimask *, | |
158 | const struct mf_field *); | |
cabd4c43 | 159 | static void trie_init(struct cls_classifier *, int trie_idx, |
13751fd8 JR |
160 | const struct mf_field *); |
161 | static unsigned int trie_lookup(const struct cls_trie *, const struct flow *, | |
162 | unsigned int *checkbits); | |
163 | ||
164 | static void trie_destroy(struct trie_node *); | |
165 | static void trie_insert(struct cls_trie *, const struct cls_rule *, int mlen); | |
166 | static void trie_remove(struct cls_trie *, const struct cls_rule *, int mlen); | |
167 | static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t be32ofs, | |
168 | unsigned int nbits); | |
169 | static bool mask_prefix_bits_set(const struct flow_wildcards *, | |
170 | uint8_t be32ofs, unsigned int nbits); | |
ec988646 JR |
171 | |
172 | static void | |
173 | cls_subtable_cache_init(struct cls_subtable_cache *array) | |
174 | { | |
175 | memset(array, 0, sizeof *array); | |
176 | } | |
177 | ||
178 | static void | |
179 | cls_subtable_cache_destroy(struct cls_subtable_cache *array) | |
180 | { | |
181 | free(array->subtables); | |
182 | memset(array, 0, sizeof *array); | |
183 | } | |
184 | ||
185 | /* Array insertion. */ | |
186 | static void | |
187 | cls_subtable_cache_push_back(struct cls_subtable_cache *array, | |
188 | struct cls_subtable_entry a) | |
189 | { | |
190 | if (array->size == array->alloc_size) { | |
191 | array->subtables = x2nrealloc(array->subtables, &array->alloc_size, | |
192 | sizeof a); | |
193 | } | |
194 | ||
195 | array->subtables[array->size++] = a; | |
196 | } | |
197 | ||
198 | /* Only for rearranging entries in the same cache. */ | |
199 | static inline void | |
200 | cls_subtable_cache_splice(struct cls_subtable_entry *to, | |
201 | struct cls_subtable_entry *start, | |
202 | struct cls_subtable_entry *end) | |
203 | { | |
204 | if (to > end) { | |
205 | /* Same as splicing entries to (start) from [end, to). */ | |
206 | struct cls_subtable_entry *temp = to; | |
207 | to = start; start = end; end = temp; | |
208 | } | |
209 | if (to < start) { | |
210 | while (start != end) { | |
211 | struct cls_subtable_entry temp = *start; | |
212 | ||
213 | memmove(to + 1, to, (start - to) * sizeof *to); | |
214 | *to = temp; | |
215 | start++; | |
216 | } | |
217 | } /* Else nothing to be done. */ | |
218 | } | |
219 | ||
220 | /* Array removal. */ | |
221 | static inline void | |
222 | cls_subtable_cache_remove(struct cls_subtable_cache *array, | |
223 | struct cls_subtable_entry *elem) | |
224 | { | |
225 | ssize_t size = (&array->subtables[array->size] | |
226 | - (elem + 1)) * sizeof *elem; | |
227 | if (size > 0) { | |
228 | memmove(elem, elem + 1, size); | |
229 | } | |
230 | array->size--; | |
231 | } | |
232 | ||
233 | #define CLS_SUBTABLE_CACHE_FOR_EACH(SUBTABLE, ITER, ARRAY) \ | |
234 | for (ITER = (ARRAY)->subtables; \ | |
235 | ITER < &(ARRAY)->subtables[(ARRAY)->size] \ | |
236 | && OVS_LIKELY(SUBTABLE = ITER->subtable); \ | |
237 | ++ITER) | |
238 | #define CLS_SUBTABLE_CACHE_FOR_EACH_CONTINUE(SUBTABLE, ITER, ARRAY) \ | |
239 | for (++ITER; \ | |
240 | ITER < &(ARRAY)->subtables[(ARRAY)->size] \ | |
241 | && OVS_LIKELY(SUBTABLE = ITER->subtable); \ | |
242 | ++ITER) | |
243 | #define CLS_SUBTABLE_CACHE_FOR_EACH_REVERSE(SUBTABLE, ITER, ARRAY) \ | |
244 | for (ITER = &(ARRAY)->subtables[(ARRAY)->size]; \ | |
245 | ITER > (ARRAY)->subtables \ | |
246 | && OVS_LIKELY(SUBTABLE = (--ITER)->subtable);) | |
247 | ||
3d91d909 JR |
248 | \f |
249 | /* flow/miniflow/minimask/minimatch utilities. | |
250 | * These are only used by the classifier, so place them here to allow | |
251 | * for better optimization. */ | |
252 | ||
253 | static inline uint64_t | |
254 | miniflow_get_map_in_range(const struct miniflow *miniflow, | |
255 | uint8_t start, uint8_t end, unsigned int *offset) | |
256 | { | |
257 | uint64_t map = miniflow->map; | |
258 | *offset = 0; | |
259 | ||
260 | if (start > 0) { | |
261 | uint64_t msk = (UINT64_C(1) << start) - 1; /* 'start' LSBs set */ | |
262 | *offset = count_1bits(map & msk); | |
263 | map &= ~msk; | |
264 | } | |
265 | if (end < FLOW_U32S) { | |
266 | uint64_t msk = (UINT64_C(1) << end) - 1; /* 'end' LSBs set */ | |
267 | map &= msk; | |
268 | } | |
269 | return map; | |
270 | } | |
271 | ||
272 | /* Returns a hash value for the bits of 'flow' where there are 1-bits in | |
273 | * 'mask', given 'basis'. | |
274 | * | |
275 | * The hash values returned by this function are the same as those returned by | |
276 | * miniflow_hash_in_minimask(), only the form of the arguments differ. */ | |
277 | static inline uint32_t | |
278 | flow_hash_in_minimask(const struct flow *flow, const struct minimask *mask, | |
279 | uint32_t basis) | |
280 | { | |
281 | const uint32_t *flow_u32 = (const uint32_t *)flow; | |
282 | const uint32_t *p = mask->masks.values; | |
283 | uint32_t hash; | |
284 | uint64_t map; | |
285 | ||
286 | hash = basis; | |
287 | for (map = mask->masks.map; map; map = zero_rightmost_1bit(map)) { | |
288 | hash = mhash_add(hash, flow_u32[raw_ctz(map)] & *p++); | |
289 | } | |
290 | ||
291 | return mhash_finish(hash, (p - mask->masks.values) * 4); | |
292 | } | |
293 | ||
294 | /* Returns a hash value for the bits of 'flow' where there are 1-bits in | |
295 | * 'mask', given 'basis'. | |
296 | * | |
297 | * The hash values returned by this function are the same as those returned by | |
298 | * flow_hash_in_minimask(), only the form of the arguments differ. */ | |
299 | static inline uint32_t | |
300 | miniflow_hash_in_minimask(const struct miniflow *flow, | |
301 | const struct minimask *mask, uint32_t basis) | |
302 | { | |
303 | const uint32_t *p = mask->masks.values; | |
304 | uint32_t hash = basis; | |
305 | uint32_t flow_u32; | |
306 | ||
307 | MINIFLOW_FOR_EACH_IN_MAP(flow_u32, flow, mask->masks.map) { | |
308 | hash = mhash_add(hash, flow_u32 & *p++); | |
309 | } | |
310 | ||
311 | return mhash_finish(hash, (p - mask->masks.values) * 4); | |
312 | } | |
313 | ||
314 | /* Returns a hash value for the bits of range [start, end) in 'flow', | |
315 | * where there are 1-bits in 'mask', given 'hash'. | |
316 | * | |
317 | * The hash values returned by this function are the same as those returned by | |
318 | * minimatch_hash_range(), only the form of the arguments differ. */ | |
319 | static inline uint32_t | |
320 | flow_hash_in_minimask_range(const struct flow *flow, | |
321 | const struct minimask *mask, | |
322 | uint8_t start, uint8_t end, uint32_t *basis) | |
323 | { | |
324 | const uint32_t *flow_u32 = (const uint32_t *)flow; | |
325 | unsigned int offset; | |
326 | uint64_t map = miniflow_get_map_in_range(&mask->masks, start, end, | |
327 | &offset); | |
328 | const uint32_t *p = mask->masks.values + offset; | |
329 | uint32_t hash = *basis; | |
330 | ||
331 | for (; map; map = zero_rightmost_1bit(map)) { | |
332 | hash = mhash_add(hash, flow_u32[raw_ctz(map)] & *p++); | |
333 | } | |
334 | ||
335 | *basis = hash; /* Allow continuation from the unfinished value. */ | |
336 | return mhash_finish(hash, (p - mask->masks.values) * 4); | |
337 | } | |
338 | ||
339 | /* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask. */ | |
340 | static inline void | |
341 | flow_wildcards_fold_minimask(struct flow_wildcards *wc, | |
342 | const struct minimask *mask) | |
343 | { | |
344 | flow_union_with_miniflow(&wc->masks, &mask->masks); | |
345 | } | |
346 | ||
347 | /* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask | |
348 | * in range [start, end). */ | |
349 | static inline void | |
350 | flow_wildcards_fold_minimask_range(struct flow_wildcards *wc, | |
351 | const struct minimask *mask, | |
352 | uint8_t start, uint8_t end) | |
353 | { | |
354 | uint32_t *dst_u32 = (uint32_t *)&wc->masks; | |
355 | unsigned int offset; | |
356 | uint64_t map = miniflow_get_map_in_range(&mask->masks, start, end, | |
357 | &offset); | |
358 | const uint32_t *p = mask->masks.values + offset; | |
359 | ||
360 | for (; map; map = zero_rightmost_1bit(map)) { | |
361 | dst_u32[raw_ctz(map)] |= *p++; | |
362 | } | |
363 | } | |
364 | ||
365 | /* Returns a hash value for 'flow', given 'basis'. */ | |
366 | static inline uint32_t | |
367 | miniflow_hash(const struct miniflow *flow, uint32_t basis) | |
368 | { | |
369 | const uint32_t *p = flow->values; | |
370 | uint32_t hash = basis; | |
371 | uint64_t hash_map = 0; | |
372 | uint64_t map; | |
373 | ||
374 | for (map = flow->map; map; map = zero_rightmost_1bit(map)) { | |
375 | if (*p) { | |
376 | hash = mhash_add(hash, *p); | |
377 | hash_map |= rightmost_1bit(map); | |
378 | } | |
379 | p++; | |
380 | } | |
381 | hash = mhash_add(hash, hash_map); | |
382 | hash = mhash_add(hash, hash_map >> 32); | |
383 | ||
384 | return mhash_finish(hash, p - flow->values); | |
385 | } | |
386 | ||
387 | /* Returns a hash value for 'mask', given 'basis'. */ | |
388 | static inline uint32_t | |
389 | minimask_hash(const struct minimask *mask, uint32_t basis) | |
390 | { | |
391 | return miniflow_hash(&mask->masks, basis); | |
392 | } | |
393 | ||
394 | /* Returns a hash value for 'match', given 'basis'. */ | |
395 | static inline uint32_t | |
396 | minimatch_hash(const struct minimatch *match, uint32_t basis) | |
397 | { | |
398 | return miniflow_hash(&match->flow, minimask_hash(&match->mask, basis)); | |
399 | } | |
400 | ||
401 | /* Returns a hash value for the bits of range [start, end) in 'minimatch', | |
402 | * given 'basis'. | |
403 | * | |
404 | * The hash values returned by this function are the same as those returned by | |
405 | * flow_hash_in_minimask_range(), only the form of the arguments differ. */ | |
406 | static inline uint32_t | |
407 | minimatch_hash_range(const struct minimatch *match, uint8_t start, uint8_t end, | |
408 | uint32_t *basis) | |
409 | { | |
410 | unsigned int offset; | |
411 | const uint32_t *p, *q; | |
412 | uint32_t hash = *basis; | |
413 | int n, i; | |
414 | ||
415 | n = count_1bits(miniflow_get_map_in_range(&match->mask.masks, start, end, | |
416 | &offset)); | |
417 | q = match->mask.masks.values + offset; | |
418 | p = match->flow.values + offset; | |
419 | ||
420 | for (i = 0; i < n; i++) { | |
421 | hash = mhash_add(hash, p[i] & q[i]); | |
422 | } | |
423 | *basis = hash; /* Allow continuation from the unfinished value. */ | |
424 | return mhash_finish(hash, (offset + n) * 4); | |
425 | } | |
426 | ||
81a76618 BP |
427 | \f |
428 | /* cls_rule. */ | |
b5d97350 | 429 | |
81a76618 | 430 | /* Initializes 'rule' to match packets specified by 'match' at the given |
5cb7a798 BP |
431 | * 'priority'. 'match' must satisfy the invariant described in the comment at |
432 | * the definition of struct match. | |
66642cb4 | 433 | * |
48d28ac1 BP |
434 | * The caller must eventually destroy 'rule' with cls_rule_destroy(). |
435 | * | |
81a76618 BP |
436 | * (OpenFlow uses priorities between 0 and UINT16_MAX, inclusive, but |
437 | * internally Open vSwitch supports a wider range.) */ | |
47284b1f | 438 | void |
81a76618 BP |
439 | cls_rule_init(struct cls_rule *rule, |
440 | const struct match *match, unsigned int priority) | |
47284b1f | 441 | { |
5cb7a798 BP |
442 | minimatch_init(&rule->match, match); |
443 | rule->priority = priority; | |
627fb667 | 444 | rule->cls_match = NULL; |
5cb7a798 BP |
445 | } |
446 | ||
447 | /* Same as cls_rule_init() for initialization from a "struct minimatch". */ | |
448 | void | |
449 | cls_rule_init_from_minimatch(struct cls_rule *rule, | |
450 | const struct minimatch *match, | |
451 | unsigned int priority) | |
452 | { | |
453 | minimatch_clone(&rule->match, match); | |
81a76618 | 454 | rule->priority = priority; |
627fb667 | 455 | rule->cls_match = NULL; |
685a51a5 JP |
456 | } |
457 | ||
48d28ac1 BP |
458 | /* Initializes 'dst' as a copy of 'src'. |
459 | * | |
b2c1f00b | 460 | * The caller must eventually destroy 'dst' with cls_rule_destroy(). */ |
48d28ac1 BP |
461 | void |
462 | cls_rule_clone(struct cls_rule *dst, const struct cls_rule *src) | |
463 | { | |
5cb7a798 BP |
464 | minimatch_clone(&dst->match, &src->match); |
465 | dst->priority = src->priority; | |
627fb667 | 466 | dst->cls_match = NULL; |
48d28ac1 BP |
467 | } |
468 | ||
b2c1f00b BP |
469 | /* Initializes 'dst' with the data in 'src', destroying 'src'. |
470 | * | |
471 | * The caller must eventually destroy 'dst' with cls_rule_destroy(). */ | |
472 | void | |
473 | cls_rule_move(struct cls_rule *dst, struct cls_rule *src) | |
474 | { | |
475 | minimatch_move(&dst->match, &src->match); | |
476 | dst->priority = src->priority; | |
627fb667 | 477 | dst->cls_match = NULL; |
b2c1f00b BP |
478 | } |
479 | ||
48d28ac1 BP |
480 | /* Frees memory referenced by 'rule'. Doesn't free 'rule' itself (it's |
481 | * normally embedded into a larger structure). | |
482 | * | |
483 | * ('rule' must not currently be in a classifier.) */ | |
484 | void | |
5cb7a798 | 485 | cls_rule_destroy(struct cls_rule *rule) |
48d28ac1 | 486 | { |
627fb667 | 487 | ovs_assert(!rule->cls_match); |
5cb7a798 | 488 | minimatch_destroy(&rule->match); |
48d28ac1 BP |
489 | } |
490 | ||
81a76618 BP |
491 | /* Returns true if 'a' and 'b' match the same packets at the same priority, |
492 | * false if they differ in some way. */ | |
193eb874 BP |
493 | bool |
494 | cls_rule_equal(const struct cls_rule *a, const struct cls_rule *b) | |
495 | { | |
5cb7a798 | 496 | return a->priority == b->priority && minimatch_equal(&a->match, &b->match); |
193eb874 BP |
497 | } |
498 | ||
81a76618 | 499 | /* Returns a hash value for 'rule', folding in 'basis'. */ |
57452fdc BP |
500 | uint32_t |
501 | cls_rule_hash(const struct cls_rule *rule, uint32_t basis) | |
502 | { | |
5cb7a798 | 503 | return minimatch_hash(&rule->match, hash_int(rule->priority, basis)); |
73f33563 BP |
504 | } |
505 | ||
81a76618 | 506 | /* Appends a string describing 'rule' to 's'. */ |
07b37e8f BP |
507 | void |
508 | cls_rule_format(const struct cls_rule *rule, struct ds *s) | |
509 | { | |
5cb7a798 | 510 | minimatch_format(&rule->match, s, rule->priority); |
064af421 | 511 | } |
3ca1de08 BP |
512 | |
513 | /* Returns true if 'rule' matches every packet, false otherwise. */ | |
514 | bool | |
515 | cls_rule_is_catchall(const struct cls_rule *rule) | |
516 | { | |
5cb7a798 | 517 | return minimask_is_catchall(&rule->match.mask); |
3ca1de08 | 518 | } |
064af421 BP |
519 | \f |
520 | /* Initializes 'cls' as a classifier that initially contains no classification | |
521 | * rules. */ | |
522 | void | |
cabd4c43 | 523 | classifier_init(struct classifier *cls_, const uint8_t *flow_segments) |
064af421 | 524 | { |
cabd4c43 JR |
525 | struct cls_classifier *cls = xmalloc(sizeof *cls); |
526 | ||
527 | fat_rwlock_init(&cls_->rwlock); | |
528 | ||
529 | cls_->cls = cls; | |
530 | ||
064af421 | 531 | cls->n_rules = 0; |
03868246 | 532 | hmap_init(&cls->subtables); |
ec988646 | 533 | cls_subtable_cache_init(&cls->subtables_priority); |
c906cedf | 534 | hmap_init(&cls->partitions); |
476f36e8 JR |
535 | cls->n_flow_segments = 0; |
536 | if (flow_segments) { | |
537 | while (cls->n_flow_segments < CLS_MAX_INDICES | |
538 | && *flow_segments < FLOW_U32S) { | |
539 | cls->flow_segments[cls->n_flow_segments++] = *flow_segments++; | |
540 | } | |
541 | } | |
13751fd8 | 542 | cls->n_tries = 0; |
064af421 BP |
543 | } |
544 | ||
545 | /* Destroys 'cls'. Rules within 'cls', if any, are not freed; this is the | |
546 | * caller's responsibility. */ | |
547 | void | |
cabd4c43 | 548 | classifier_destroy(struct classifier *cls_) |
064af421 | 549 | { |
cabd4c43 JR |
550 | if (cls_) { |
551 | struct cls_classifier *cls = cls_->cls; | |
03868246 JR |
552 | struct cls_subtable *partition, *next_partition; |
553 | struct cls_subtable *subtable, *next_subtable; | |
13751fd8 JR |
554 | int i; |
555 | ||
cabd4c43 JR |
556 | fat_rwlock_destroy(&cls_->rwlock); |
557 | if (!cls) { | |
558 | return; | |
559 | } | |
560 | ||
13751fd8 JR |
561 | for (i = 0; i < cls->n_tries; i++) { |
562 | trie_destroy(cls->tries[i].root); | |
563 | } | |
064af421 | 564 | |
03868246 JR |
565 | HMAP_FOR_EACH_SAFE (subtable, next_subtable, hmap_node, |
566 | &cls->subtables) { | |
567 | destroy_subtable(cls, subtable); | |
064af421 | 568 | } |
03868246 | 569 | hmap_destroy(&cls->subtables); |
c906cedf BP |
570 | |
571 | HMAP_FOR_EACH_SAFE (partition, next_partition, hmap_node, | |
572 | &cls->partitions) { | |
573 | hmap_remove(&cls->partitions, &partition->hmap_node); | |
574 | free(partition); | |
575 | } | |
576 | hmap_destroy(&cls->partitions); | |
cabd4c43 | 577 | |
ec988646 | 578 | cls_subtable_cache_destroy(&cls->subtables_priority); |
cabd4c43 | 579 | free(cls); |
064af421 BP |
580 | } |
581 | } | |
582 | ||
13751fd8 JR |
583 | /* We use uint64_t as a set for the fields below. */ |
584 | BUILD_ASSERT_DECL(MFF_N_IDS <= 64); | |
585 | ||
586 | /* Set the fields for which prefix lookup should be performed. */ | |
587 | void | |
cabd4c43 | 588 | classifier_set_prefix_fields(struct classifier *cls_, |
13751fd8 JR |
589 | const enum mf_field_id *trie_fields, |
590 | unsigned int n_fields) | |
591 | { | |
cabd4c43 | 592 | struct cls_classifier *cls = cls_->cls; |
13751fd8 JR |
593 | uint64_t fields = 0; |
594 | int i, trie; | |
595 | ||
596 | for (i = 0, trie = 0; i < n_fields && trie < CLS_MAX_TRIES; i++) { | |
597 | const struct mf_field *field = mf_from_id(trie_fields[i]); | |
598 | if (field->flow_be32ofs < 0 || field->n_bits % 32) { | |
599 | /* Incompatible field. This is the only place where we | |
600 | * enforce these requirements, but the rest of the trie code | |
601 | * depends on the flow_be32ofs to be non-negative and the | |
602 | * field length to be a multiple of 32 bits. */ | |
603 | continue; | |
604 | } | |
605 | ||
606 | if (fields & (UINT64_C(1) << trie_fields[i])) { | |
607 | /* Duplicate field, there is no need to build more than | |
608 | * one index for any one field. */ | |
609 | continue; | |
610 | } | |
611 | fields |= UINT64_C(1) << trie_fields[i]; | |
612 | ||
613 | if (trie >= cls->n_tries || field != cls->tries[trie].field) { | |
614 | trie_init(cls, trie, field); | |
615 | } | |
616 | trie++; | |
617 | } | |
618 | ||
619 | /* Destroy the rest. */ | |
620 | for (i = trie; i < cls->n_tries; i++) { | |
621 | trie_init(cls, i, NULL); | |
622 | } | |
623 | cls->n_tries = trie; | |
624 | } | |
625 | ||
626 | static void | |
cabd4c43 | 627 | trie_init(struct cls_classifier *cls, int trie_idx, |
13751fd8 JR |
628 | const struct mf_field *field) |
629 | { | |
630 | struct cls_trie *trie = &cls->tries[trie_idx]; | |
631 | struct cls_subtable *subtable; | |
ec988646 | 632 | struct cls_subtable_entry *iter; |
13751fd8 JR |
633 | |
634 | if (trie_idx < cls->n_tries) { | |
635 | trie_destroy(trie->root); | |
636 | } | |
637 | trie->root = NULL; | |
638 | trie->field = field; | |
639 | ||
640 | /* Add existing rules to the trie. */ | |
ec988646 | 641 | CLS_SUBTABLE_CACHE_FOR_EACH (subtable, iter, &cls->subtables_priority) { |
13751fd8 JR |
642 | unsigned int plen; |
643 | ||
644 | plen = field ? minimask_get_prefix_len(&subtable->mask, field) : 0; | |
645 | /* Initialize subtable's prefix length on this field. */ | |
646 | subtable->trie_plen[trie_idx] = plen; | |
647 | ||
648 | if (plen) { | |
627fb667 | 649 | struct cls_match *head; |
13751fd8 JR |
650 | |
651 | HMAP_FOR_EACH (head, hmap_node, &subtable->rules) { | |
627fb667 | 652 | struct cls_match *match; |
13751fd8 | 653 | |
627fb667 JR |
654 | FOR_EACH_RULE_IN_LIST (match, head) { |
655 | trie_insert(trie, match->cls_rule, plen); | |
13751fd8 JR |
656 | } |
657 | } | |
658 | } | |
659 | } | |
660 | } | |
661 | ||
b5d97350 | 662 | /* Returns true if 'cls' contains no classification rules, false otherwise. */ |
064af421 BP |
663 | bool |
664 | classifier_is_empty(const struct classifier *cls) | |
665 | { | |
cabd4c43 | 666 | return cls->cls->n_rules == 0; |
064af421 BP |
667 | } |
668 | ||
dbda2960 | 669 | /* Returns the number of rules in 'cls'. */ |
064af421 BP |
670 | int |
671 | classifier_count(const struct classifier *cls) | |
672 | { | |
cabd4c43 | 673 | return cls->cls->n_rules; |
064af421 BP |
674 | } |
675 | ||
c906cedf BP |
676 | static uint32_t |
677 | hash_metadata(ovs_be64 metadata_) | |
678 | { | |
679 | uint64_t metadata = (OVS_FORCE uint64_t) metadata_; | |
965607c8 | 680 | return hash_uint64(metadata); |
c906cedf BP |
681 | } |
682 | ||
683 | static struct cls_partition * | |
cabd4c43 JR |
684 | find_partition(const struct cls_classifier *cls, ovs_be64 metadata, |
685 | uint32_t hash) | |
c906cedf BP |
686 | { |
687 | struct cls_partition *partition; | |
688 | ||
689 | HMAP_FOR_EACH_IN_BUCKET (partition, hmap_node, hash, &cls->partitions) { | |
690 | if (partition->metadata == metadata) { | |
691 | return partition; | |
692 | } | |
693 | } | |
694 | ||
695 | return NULL; | |
696 | } | |
697 | ||
698 | static struct cls_partition * | |
cabd4c43 | 699 | create_partition(struct cls_classifier *cls, struct cls_subtable *subtable, |
c906cedf BP |
700 | ovs_be64 metadata) |
701 | { | |
702 | uint32_t hash = hash_metadata(metadata); | |
703 | struct cls_partition *partition = find_partition(cls, metadata, hash); | |
704 | if (!partition) { | |
705 | partition = xmalloc(sizeof *partition); | |
706 | partition->metadata = metadata; | |
707 | partition->tags = 0; | |
183126a1 | 708 | tag_tracker_init(&partition->tracker); |
c906cedf BP |
709 | hmap_insert(&cls->partitions, &partition->hmap_node, hash); |
710 | } | |
03868246 | 711 | tag_tracker_add(&partition->tracker, &partition->tags, subtable->tag); |
c906cedf BP |
712 | return partition; |
713 | } | |
714 | ||
b5d97350 BP |
715 | /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller |
716 | * must not modify or free it. | |
064af421 BP |
717 | * |
718 | * If 'cls' already contains an identical rule (including wildcards, values of | |
719 | * fixed fields, and priority), replaces the old rule by 'rule' and returns the | |
720 | * rule that was replaced. The caller takes ownership of the returned rule and | |
48d28ac1 BP |
721 | * is thus responsible for destroying it with cls_rule_destroy(), freeing the |
722 | * memory block in which it resides, etc., as necessary. | |
064af421 BP |
723 | * |
724 | * Returns NULL if 'cls' does not contain a rule with an identical key, after | |
725 | * inserting the new rule. In this case, no rules are displaced by the new | |
726 | * rule, even rules that cannot have any effect because the new rule matches a | |
727 | * superset of their flows and has higher priority. */ | |
728 | struct cls_rule * | |
cabd4c43 | 729 | classifier_replace(struct classifier *cls_, struct cls_rule *rule) |
064af421 | 730 | { |
cabd4c43 | 731 | struct cls_classifier *cls = cls_->cls; |
627fb667 | 732 | struct cls_match *old_rule; |
03868246 | 733 | struct cls_subtable *subtable; |
b5d97350 | 734 | |
03868246 JR |
735 | subtable = find_subtable(cls, &rule->match.mask); |
736 | if (!subtable) { | |
737 | subtable = insert_subtable(cls, &rule->match.mask); | |
b5d97350 BP |
738 | } |
739 | ||
03868246 | 740 | old_rule = insert_rule(cls, subtable, rule); |
b5d97350 | 741 | if (!old_rule) { |
13751fd8 JR |
742 | int i; |
743 | ||
627fb667 | 744 | rule->cls_match->partition = NULL; |
c906cedf BP |
745 | if (minimask_get_metadata_mask(&rule->match.mask) == OVS_BE64_MAX) { |
746 | ovs_be64 metadata = miniflow_get_metadata(&rule->match.flow); | |
627fb667 JR |
747 | rule->cls_match->partition = create_partition(cls, subtable, |
748 | metadata); | |
c906cedf BP |
749 | } |
750 | ||
03868246 | 751 | subtable->n_rules++; |
064af421 | 752 | cls->n_rules++; |
13751fd8 JR |
753 | |
754 | for (i = 0; i < cls->n_tries; i++) { | |
755 | if (subtable->trie_plen[i]) { | |
756 | trie_insert(&cls->tries[i], rule, subtable->trie_plen[i]); | |
757 | } | |
758 | } | |
627fb667 | 759 | return NULL; |
c906cedf | 760 | } else { |
627fb667 JR |
761 | struct cls_rule *old_cls_rule = old_rule->cls_rule; |
762 | ||
763 | rule->cls_match->partition = old_rule->partition; | |
764 | old_cls_rule->cls_match = NULL; | |
765 | free(old_rule); | |
766 | return old_cls_rule; | |
064af421 | 767 | } |
064af421 BP |
768 | } |
769 | ||
08944c1d BP |
770 | /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller |
771 | * must not modify or free it. | |
772 | * | |
773 | * 'cls' must not contain an identical rule (including wildcards, values of | |
774 | * fixed fields, and priority). Use classifier_find_rule_exactly() to find | |
775 | * such a rule. */ | |
776 | void | |
777 | classifier_insert(struct classifier *cls, struct cls_rule *rule) | |
778 | { | |
779 | struct cls_rule *displaced_rule = classifier_replace(cls, rule); | |
cb22974d | 780 | ovs_assert(!displaced_rule); |
08944c1d BP |
781 | } |
782 | ||
48d28ac1 BP |
783 | /* Removes 'rule' from 'cls'. It is the caller's responsibility to destroy |
784 | * 'rule' with cls_rule_destroy(), freeing the memory block in which 'rule' | |
785 | * resides, etc., as necessary. */ | |
064af421 | 786 | void |
cabd4c43 | 787 | classifier_remove(struct classifier *cls_, struct cls_rule *rule) |
064af421 | 788 | { |
cabd4c43 | 789 | struct cls_classifier *cls = cls_->cls; |
c906cedf | 790 | struct cls_partition *partition; |
627fb667 JR |
791 | struct cls_match *cls_match = rule->cls_match; |
792 | struct cls_match *head; | |
03868246 | 793 | struct cls_subtable *subtable; |
476f36e8 | 794 | int i; |
064af421 | 795 | |
627fb667 JR |
796 | ovs_assert(cls_match); |
797 | ||
03868246 | 798 | subtable = find_subtable(cls, &rule->match.mask); |
476f36e8 | 799 | |
627fb667 JR |
800 | ovs_assert(subtable); |
801 | ||
13751fd8 JR |
802 | for (i = 0; i < cls->n_tries; i++) { |
803 | if (subtable->trie_plen[i]) { | |
804 | trie_remove(&cls->tries[i], rule, subtable->trie_plen[i]); | |
805 | } | |
806 | } | |
807 | ||
476f36e8 JR |
808 | /* Remove rule node from indices. */ |
809 | for (i = 0; i < subtable->n_indices; i++) { | |
627fb667 | 810 | hindex_remove(&subtable->indices[i], &cls_match->index_nodes[i]); |
476f36e8 JR |
811 | } |
812 | ||
627fb667 JR |
813 | head = find_equal(subtable, &rule->match.flow, cls_match->hmap_node.hash); |
814 | if (head != cls_match) { | |
815 | list_remove(&cls_match->list); | |
816 | } else if (list_is_empty(&cls_match->list)) { | |
817 | hmap_remove(&subtable->rules, &cls_match->hmap_node); | |
b5d97350 | 818 | } else { |
627fb667 JR |
819 | struct cls_match *next = CONTAINER_OF(cls_match->list.next, |
820 | struct cls_match, list); | |
064af421 | 821 | |
627fb667 JR |
822 | list_remove(&cls_match->list); |
823 | hmap_replace(&subtable->rules, &cls_match->hmap_node, | |
824 | &next->hmap_node); | |
b5d97350 | 825 | } |
064af421 | 826 | |
627fb667 | 827 | partition = cls_match->partition; |
183126a1 BP |
828 | if (partition) { |
829 | tag_tracker_subtract(&partition->tracker, &partition->tags, | |
03868246 | 830 | subtable->tag); |
183126a1 BP |
831 | if (!partition->tags) { |
832 | hmap_remove(&cls->partitions, &partition->hmap_node); | |
833 | free(partition); | |
834 | } | |
c906cedf BP |
835 | } |
836 | ||
03868246 JR |
837 | if (--subtable->n_rules == 0) { |
838 | destroy_subtable(cls, subtable); | |
4aacd02d | 839 | } else { |
627fb667 | 840 | update_subtables_after_removal(cls, subtable, cls_match->priority); |
4d935a6b | 841 | } |
13751fd8 | 842 | |
b5d97350 | 843 | cls->n_rules--; |
627fb667 JR |
844 | |
845 | rule->cls_match = NULL; | |
846 | free(cls_match); | |
064af421 BP |
847 | } |
848 | ||
13751fd8 JR |
849 | /* Prefix tree context. Valid when 'lookup_done' is true. Can skip all |
850 | * subtables which have more than 'match_plen' bits in their corresponding | |
851 | * field at offset 'be32ofs'. If skipped, 'maskbits' prefix bits should be | |
852 | * unwildcarded to quarantee datapath flow matches only packets it should. */ | |
853 | struct trie_ctx { | |
854 | const struct cls_trie *trie; | |
855 | bool lookup_done; /* Status of the lookup. */ | |
856 | uint8_t be32ofs; /* U32 offset of the field in question. */ | |
857 | unsigned int match_plen; /* Longest prefix than could possibly match. */ | |
858 | unsigned int maskbits; /* Prefix length needed to avoid false matches. */ | |
859 | }; | |
860 | ||
861 | static void | |
862 | trie_ctx_init(struct trie_ctx *ctx, const struct cls_trie *trie) | |
863 | { | |
864 | ctx->trie = trie; | |
865 | ctx->be32ofs = trie->field->flow_be32ofs; | |
866 | ctx->lookup_done = false; | |
867 | } | |
868 | ||
ec988646 JR |
869 | static inline void |
870 | lookahead_subtable(const struct cls_subtable_entry *subtables) | |
871 | { | |
872 | ovs_prefetch_range(subtables->subtable, sizeof *subtables->subtable); | |
873 | ovs_prefetch_range(subtables->mask_values, 1); | |
874 | } | |
875 | ||
48c3de13 BP |
876 | /* Finds and returns the highest-priority rule in 'cls' that matches 'flow'. |
877 | * Returns a null pointer if no rules in 'cls' match 'flow'. If multiple rules | |
74f74083 EJ |
878 | * of equal priority match 'flow', returns one arbitrarily. |
879 | * | |
880 | * If a rule is found and 'wc' is non-null, bitwise-OR's 'wc' with the | |
881 | * set of bits that were significant in the lookup. At some point | |
882 | * earlier, 'wc' should have been initialized (e.g., by | |
883 | * flow_wildcards_init_catchall()). */ | |
48c3de13 | 884 | struct cls_rule * |
cabd4c43 | 885 | classifier_lookup(const struct classifier *cls_, const struct flow *flow, |
74f74083 | 886 | struct flow_wildcards *wc) |
48c3de13 | 887 | { |
cabd4c43 | 888 | struct cls_classifier *cls = cls_->cls; |
c906cedf | 889 | const struct cls_partition *partition; |
c906cedf | 890 | tag_type tags; |
627fb667 | 891 | struct cls_match *best; |
13751fd8 JR |
892 | struct trie_ctx trie_ctx[CLS_MAX_TRIES]; |
893 | int i; | |
ec988646 JR |
894 | struct cls_subtable_entry *subtables = cls->subtables_priority.subtables; |
895 | int n_subtables = cls->subtables_priority.size; | |
896 | int64_t best_priority = -1; | |
897 | ||
898 | /* Prefetch the subtables array. */ | |
899 | ovs_prefetch_range(subtables, n_subtables * sizeof *subtables); | |
c906cedf | 900 | |
03868246 JR |
901 | /* Determine 'tags' such that, if 'subtable->tag' doesn't intersect them, |
902 | * then 'flow' cannot possibly match in 'subtable': | |
c906cedf BP |
903 | * |
904 | * - If flow->metadata maps to a given 'partition', then we can use | |
905 | * 'tags' for 'partition->tags'. | |
906 | * | |
907 | * - If flow->metadata has no partition, then no rule in 'cls' has an | |
908 | * exact-match for flow->metadata. That means that we don't need to | |
03868246 | 909 | * search any subtable that includes flow->metadata in its mask. |
c906cedf | 910 | * |
03868246 | 911 | * In either case, we always need to search any cls_subtables that do not |
c906cedf | 912 | * include flow->metadata in its mask. One way to do that would be to |
03868246 JR |
913 | * check the "cls_subtable"s explicitly for that, but that would require an |
914 | * extra branch per subtable. Instead, we mark such a cls_subtable's | |
915 | * 'tags' as TAG_ALL and make sure that 'tags' is never empty. This means | |
916 | * that 'tags' always intersects such a cls_subtable's 'tags', so we don't | |
917 | * need a special case. | |
c906cedf BP |
918 | */ |
919 | partition = (hmap_is_empty(&cls->partitions) | |
920 | ? NULL | |
921 | : find_partition(cls, flow->metadata, | |
922 | hash_metadata(flow->metadata))); | |
923 | tags = partition ? partition->tags : TAG_ARBITRARY; | |
48c3de13 | 924 | |
13751fd8 JR |
925 | /* Initialize trie contexts for match_find_wc(). */ |
926 | for (i = 0; i < cls->n_tries; i++) { | |
927 | trie_ctx_init(&trie_ctx[i], &cls->tries[i]); | |
928 | } | |
ec988646 JR |
929 | |
930 | /* Prefetch the first subtables. */ | |
931 | if (n_subtables > 1) { | |
932 | lookahead_subtable(subtables); | |
933 | lookahead_subtable(subtables + 1); | |
934 | } | |
935 | ||
b5d97350 | 936 | best = NULL; |
ec988646 | 937 | for (i = 0; OVS_LIKELY(i < n_subtables); i++) { |
627fb667 | 938 | struct cls_match *rule; |
c906cedf | 939 | |
ec988646 JR |
940 | if ((int64_t)subtables[i].max_priority <= best_priority) { |
941 | /* Subtables are in descending priority order, | |
942 | * can not find anything better. */ | |
943 | break; | |
944 | } | |
945 | ||
946 | /* Prefetch a forthcoming subtable. */ | |
947 | if (i + 2 < n_subtables) { | |
948 | lookahead_subtable(&subtables[i + 2]); | |
949 | } | |
950 | ||
951 | if (!tag_intersects(tags, subtables[i].tag)) { | |
c906cedf BP |
952 | continue; |
953 | } | |
74f74083 | 954 | |
ec988646 JR |
955 | rule = find_match_wc(subtables[i].subtable, flow, trie_ctx, |
956 | cls->n_tries, wc); | |
957 | if (rule && (int64_t)rule->priority > best_priority) { | |
958 | best_priority = (int64_t)rule->priority; | |
1f3c5efc | 959 | best = rule; |
b5d97350 | 960 | } |
48c3de13 | 961 | } |
13751fd8 | 962 | |
627fb667 | 963 | return best ? best->cls_rule : NULL; |
48c3de13 BP |
964 | } |
965 | ||
2abf78ff JR |
966 | /* Returns true if 'target' satisifies 'match', that is, if each bit for which |
967 | * 'match' specifies a particular value has the correct value in 'target'. */ | |
968 | static bool | |
969 | minimatch_matches_miniflow(const struct minimatch *match, | |
970 | const struct miniflow *target) | |
971 | { | |
972 | const uint32_t *flowp = (const uint32_t *)match->flow.values; | |
973 | const uint32_t *maskp = (const uint32_t *)match->mask.masks.values; | |
974 | uint32_t target_u32; | |
975 | ||
976 | MINIFLOW_FOR_EACH_IN_MAP(target_u32, target, match->mask.masks.map) { | |
977 | if ((*flowp++ ^ target_u32) & *maskp++) { | |
978 | return false; | |
979 | } | |
980 | } | |
981 | ||
982 | return true; | |
983 | } | |
984 | ||
627fb667 | 985 | static inline struct cls_match * |
2abf78ff JR |
986 | find_match_miniflow(const struct cls_subtable *subtable, |
987 | const struct miniflow *flow, | |
988 | uint32_t hash) | |
989 | { | |
627fb667 | 990 | struct cls_match *rule; |
2abf78ff JR |
991 | |
992 | HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &subtable->rules) { | |
993 | if (minimatch_matches_miniflow(&rule->match, flow)) { | |
994 | return rule; | |
995 | } | |
996 | } | |
997 | ||
998 | return NULL; | |
999 | } | |
1000 | ||
1001 | /* Finds and returns the highest-priority rule in 'cls' that matches | |
1002 | * 'miniflow'. Returns a null pointer if no rules in 'cls' match 'flow'. | |
1003 | * If multiple rules of equal priority match 'flow', returns one arbitrarily. | |
1004 | * | |
1005 | * This function is optimized for the userspace datapath, which only ever has | |
1006 | * one priority value for it's flows! | |
1007 | */ | |
cabd4c43 | 1008 | struct cls_rule *classifier_lookup_miniflow_first(const struct classifier *cls_, |
2abf78ff JR |
1009 | const struct miniflow *flow) |
1010 | { | |
cabd4c43 | 1011 | struct cls_classifier *cls = cls_->cls; |
2abf78ff | 1012 | struct cls_subtable *subtable; |
ec988646 | 1013 | struct cls_subtable_entry *iter; |
2abf78ff | 1014 | |
ec988646 | 1015 | CLS_SUBTABLE_CACHE_FOR_EACH (subtable, iter, &cls->subtables_priority) { |
627fb667 | 1016 | struct cls_match *rule; |
2abf78ff JR |
1017 | |
1018 | rule = find_match_miniflow(subtable, flow, | |
1019 | miniflow_hash_in_minimask(flow, | |
1020 | &subtable->mask, | |
1021 | 0)); | |
1022 | if (rule) { | |
627fb667 | 1023 | return rule->cls_rule; |
2abf78ff JR |
1024 | } |
1025 | } | |
1026 | ||
1027 | return NULL; | |
1028 | } | |
1029 | ||
b5d97350 BP |
1030 | /* Finds and returns a rule in 'cls' with exactly the same priority and |
1031 | * matching criteria as 'target'. Returns a null pointer if 'cls' doesn't | |
c084ce1d | 1032 | * contain an exact match. */ |
064af421 | 1033 | struct cls_rule * |
cabd4c43 | 1034 | classifier_find_rule_exactly(const struct classifier *cls_, |
76ecc721 | 1035 | const struct cls_rule *target) |
064af421 | 1036 | { |
cabd4c43 | 1037 | struct cls_classifier *cls = cls_->cls; |
627fb667 | 1038 | struct cls_match *head, *rule; |
03868246 | 1039 | struct cls_subtable *subtable; |
064af421 | 1040 | |
03868246 JR |
1041 | subtable = find_subtable(cls, &target->match.mask); |
1042 | if (!subtable) { | |
b5d97350 | 1043 | return NULL; |
064af421 BP |
1044 | } |
1045 | ||
4d935a6b | 1046 | /* Skip if there is no hope. */ |
03868246 | 1047 | if (target->priority > subtable->max_priority) { |
4d935a6b JR |
1048 | return NULL; |
1049 | } | |
1050 | ||
03868246 | 1051 | head = find_equal(subtable, &target->match.flow, |
5cb7a798 BP |
1052 | miniflow_hash_in_minimask(&target->match.flow, |
1053 | &target->match.mask, 0)); | |
b5d97350 BP |
1054 | FOR_EACH_RULE_IN_LIST (rule, head) { |
1055 | if (target->priority >= rule->priority) { | |
627fb667 | 1056 | return target->priority == rule->priority ? rule->cls_rule : NULL; |
064af421 BP |
1057 | } |
1058 | } | |
1059 | return NULL; | |
1060 | } | |
1061 | ||
81a76618 BP |
1062 | /* Finds and returns a rule in 'cls' with priority 'priority' and exactly the |
1063 | * same matching criteria as 'target'. Returns a null pointer if 'cls' doesn't | |
1064 | * contain an exact match. */ | |
1065 | struct cls_rule * | |
1066 | classifier_find_match_exactly(const struct classifier *cls, | |
1067 | const struct match *target, | |
1068 | unsigned int priority) | |
1069 | { | |
1070 | struct cls_rule *retval; | |
1071 | struct cls_rule cr; | |
1072 | ||
1073 | cls_rule_init(&cr, target, priority); | |
1074 | retval = classifier_find_rule_exactly(cls, &cr); | |
48d28ac1 | 1075 | cls_rule_destroy(&cr); |
81a76618 BP |
1076 | |
1077 | return retval; | |
1078 | } | |
1079 | ||
faa50f40 BP |
1080 | /* Checks if 'target' would overlap any other rule in 'cls'. Two rules are |
1081 | * considered to overlap if both rules have the same priority and a packet | |
1082 | * could match both. */ | |
49bdc010 | 1083 | bool |
cabd4c43 | 1084 | classifier_rule_overlaps(const struct classifier *cls_, |
faa50f40 | 1085 | const struct cls_rule *target) |
49bdc010 | 1086 | { |
cabd4c43 | 1087 | struct cls_classifier *cls = cls_->cls; |
03868246 | 1088 | struct cls_subtable *subtable; |
ec988646 | 1089 | struct cls_subtable_entry *iter; |
49bdc010 | 1090 | |
03868246 | 1091 | /* Iterate subtables in the descending max priority order. */ |
ec988646 | 1092 | CLS_SUBTABLE_CACHE_FOR_EACH (subtable, iter, &cls->subtables_priority) { |
5cb7a798 BP |
1093 | uint32_t storage[FLOW_U32S]; |
1094 | struct minimask mask; | |
627fb667 | 1095 | struct cls_match *head; |
49bdc010 | 1096 | |
ec988646 | 1097 | if (target->priority > iter->max_priority) { |
03868246 | 1098 | break; /* Can skip this and the rest of the subtables. */ |
4d935a6b JR |
1099 | } |
1100 | ||
03868246 JR |
1101 | minimask_combine(&mask, &target->match.mask, &subtable->mask, storage); |
1102 | HMAP_FOR_EACH (head, hmap_node, &subtable->rules) { | |
627fb667 | 1103 | struct cls_match *rule; |
49bdc010 | 1104 | |
b5d97350 | 1105 | FOR_EACH_RULE_IN_LIST (rule, head) { |
4d935a6b JR |
1106 | if (rule->priority < target->priority) { |
1107 | break; /* Rules in descending priority order. */ | |
1108 | } | |
faa50f40 | 1109 | if (rule->priority == target->priority |
5cb7a798 BP |
1110 | && miniflow_equal_in_minimask(&target->match.flow, |
1111 | &rule->match.flow, &mask)) { | |
49bdc010 JP |
1112 | return true; |
1113 | } | |
1114 | } | |
1115 | } | |
1116 | } | |
1117 | ||
1118 | return false; | |
1119 | } | |
6ceeaa92 BP |
1120 | |
1121 | /* Returns true if 'rule' exactly matches 'criteria' or if 'rule' is more | |
1122 | * specific than 'criteria'. That is, 'rule' matches 'criteria' and this | |
1123 | * function returns true if, for every field: | |
1124 | * | |
1125 | * - 'criteria' and 'rule' specify the same (non-wildcarded) value for the | |
1126 | * field, or | |
1127 | * | |
1128 | * - 'criteria' wildcards the field, | |
1129 | * | |
1130 | * Conversely, 'rule' does not match 'criteria' and this function returns false | |
1131 | * if, for at least one field: | |
1132 | * | |
1133 | * - 'criteria' and 'rule' specify different values for the field, or | |
1134 | * | |
1135 | * - 'criteria' specifies a value for the field but 'rule' wildcards it. | |
1136 | * | |
1137 | * Equivalently, the truth table for whether a field matches is: | |
1138 | * | |
1139 | * rule | |
1140 | * | |
1141 | * c wildcard exact | |
1142 | * r +---------+---------+ | |
1143 | * i wild | yes | yes | | |
1144 | * t card | | | | |
1145 | * e +---------+---------+ | |
1146 | * r exact | no |if values| | |
1147 | * i | |are equal| | |
1148 | * a +---------+---------+ | |
1149 | * | |
1150 | * This is the matching rule used by OpenFlow 1.0 non-strict OFPT_FLOW_MOD | |
1151 | * commands and by OpenFlow 1.0 aggregate and flow stats. | |
1152 | * | |
81a76618 | 1153 | * Ignores rule->priority. */ |
6ceeaa92 BP |
1154 | bool |
1155 | cls_rule_is_loose_match(const struct cls_rule *rule, | |
5cb7a798 | 1156 | const struct minimatch *criteria) |
6ceeaa92 | 1157 | { |
5cb7a798 BP |
1158 | return (!minimask_has_extra(&rule->match.mask, &criteria->mask) |
1159 | && miniflow_equal_in_minimask(&rule->match.flow, &criteria->flow, | |
1160 | &criteria->mask)); | |
6ceeaa92 | 1161 | } |
b5d97350 | 1162 | \f |
5ecc9d81 BP |
1163 | /* Iteration. */ |
1164 | ||
1165 | static bool | |
627fb667 | 1166 | rule_matches(const struct cls_match *rule, const struct cls_rule *target) |
5ecc9d81 BP |
1167 | { |
1168 | return (!target | |
5cb7a798 BP |
1169 | || miniflow_equal_in_minimask(&rule->match.flow, |
1170 | &target->match.flow, | |
1171 | &target->match.mask)); | |
5ecc9d81 BP |
1172 | } |
1173 | ||
627fb667 | 1174 | static struct cls_match * |
03868246 JR |
1175 | search_subtable(const struct cls_subtable *subtable, |
1176 | const struct cls_rule *target) | |
5ecc9d81 | 1177 | { |
03868246 | 1178 | if (!target || !minimask_has_extra(&subtable->mask, &target->match.mask)) { |
627fb667 | 1179 | struct cls_match *rule; |
5ecc9d81 | 1180 | |
03868246 | 1181 | HMAP_FOR_EACH (rule, hmap_node, &subtable->rules) { |
5ecc9d81 BP |
1182 | if (rule_matches(rule, target)) { |
1183 | return rule; | |
1184 | } | |
1185 | } | |
1186 | } | |
1187 | return NULL; | |
1188 | } | |
1189 | ||
6ceeaa92 | 1190 | /* Initializes 'cursor' for iterating through rules in 'cls': |
5ecc9d81 | 1191 | * |
6ceeaa92 | 1192 | * - If 'target' is null, the cursor will visit every rule in 'cls'. |
5ecc9d81 | 1193 | * |
6ceeaa92 BP |
1194 | * - If 'target' is nonnull, the cursor will visit each 'rule' in 'cls' |
1195 | * such that cls_rule_is_loose_match(rule, target) returns true. | |
5ecc9d81 | 1196 | * |
6ceeaa92 | 1197 | * Ignores target->priority. */ |
5ecc9d81 BP |
1198 | void |
1199 | cls_cursor_init(struct cls_cursor *cursor, const struct classifier *cls, | |
1200 | const struct cls_rule *target) | |
1201 | { | |
cabd4c43 | 1202 | cursor->cls = cls->cls; |
3ca1de08 | 1203 | cursor->target = target && !cls_rule_is_catchall(target) ? target : NULL; |
5ecc9d81 BP |
1204 | } |
1205 | ||
1206 | /* Returns the first matching cls_rule in 'cursor''s iteration, or a null | |
1207 | * pointer if there are no matches. */ | |
1208 | struct cls_rule * | |
1209 | cls_cursor_first(struct cls_cursor *cursor) | |
1210 | { | |
03868246 | 1211 | struct cls_subtable *subtable; |
5ecc9d81 | 1212 | |
03868246 | 1213 | HMAP_FOR_EACH (subtable, hmap_node, &cursor->cls->subtables) { |
627fb667 | 1214 | struct cls_match *rule = search_subtable(subtable, cursor->target); |
5ecc9d81 | 1215 | if (rule) { |
03868246 | 1216 | cursor->subtable = subtable; |
627fb667 | 1217 | return rule->cls_rule; |
5ecc9d81 BP |
1218 | } |
1219 | } | |
1220 | ||
1221 | return NULL; | |
1222 | } | |
1223 | ||
1224 | /* Returns the next matching cls_rule in 'cursor''s iteration, or a null | |
1225 | * pointer if there are no more matches. */ | |
1226 | struct cls_rule * | |
9850cd0f | 1227 | cls_cursor_next(struct cls_cursor *cursor, const struct cls_rule *rule_) |
5ecc9d81 | 1228 | { |
627fb667 | 1229 | struct cls_match *rule = CONST_CAST(struct cls_match *, rule_->cls_match); |
03868246 | 1230 | const struct cls_subtable *subtable; |
627fb667 | 1231 | struct cls_match *next; |
5ecc9d81 | 1232 | |
955f579d BP |
1233 | next = next_rule_in_list__(rule); |
1234 | if (next->priority < rule->priority) { | |
627fb667 | 1235 | return next->cls_rule; |
5ecc9d81 BP |
1236 | } |
1237 | ||
955f579d | 1238 | /* 'next' is the head of the list, that is, the rule that is included in |
03868246 JR |
1239 | * the subtable's hmap. (This is important when the classifier contains |
1240 | * rules that differ only in priority.) */ | |
955f579d | 1241 | rule = next; |
03868246 | 1242 | HMAP_FOR_EACH_CONTINUE (rule, hmap_node, &cursor->subtable->rules) { |
5ecc9d81 | 1243 | if (rule_matches(rule, cursor->target)) { |
627fb667 | 1244 | return rule->cls_rule; |
5ecc9d81 BP |
1245 | } |
1246 | } | |
1247 | ||
03868246 JR |
1248 | subtable = cursor->subtable; |
1249 | HMAP_FOR_EACH_CONTINUE (subtable, hmap_node, &cursor->cls->subtables) { | |
1250 | rule = search_subtable(subtable, cursor->target); | |
5ecc9d81 | 1251 | if (rule) { |
03868246 | 1252 | cursor->subtable = subtable; |
627fb667 | 1253 | return rule->cls_rule; |
5ecc9d81 BP |
1254 | } |
1255 | } | |
1256 | ||
1257 | return NULL; | |
1258 | } | |
1259 | \f | |
03868246 | 1260 | static struct cls_subtable * |
cabd4c43 | 1261 | find_subtable(const struct cls_classifier *cls, const struct minimask *mask) |
b5d97350 | 1262 | { |
03868246 | 1263 | struct cls_subtable *subtable; |
064af421 | 1264 | |
03868246 JR |
1265 | HMAP_FOR_EACH_IN_BUCKET (subtable, hmap_node, minimask_hash(mask, 0), |
1266 | &cls->subtables) { | |
1267 | if (minimask_equal(mask, &subtable->mask)) { | |
1268 | return subtable; | |
064af421 BP |
1269 | } |
1270 | } | |
b5d97350 | 1271 | return NULL; |
064af421 | 1272 | } |
064af421 | 1273 | |
03868246 | 1274 | static struct cls_subtable * |
cabd4c43 | 1275 | insert_subtable(struct cls_classifier *cls, const struct minimask *mask) |
b5d97350 | 1276 | { |
c906cedf | 1277 | uint32_t hash = minimask_hash(mask, 0); |
03868246 | 1278 | struct cls_subtable *subtable; |
476f36e8 JR |
1279 | int i, index = 0; |
1280 | struct flow_wildcards old, new; | |
1281 | uint8_t prev; | |
ec988646 | 1282 | struct cls_subtable_entry elem; |
064af421 | 1283 | |
03868246 JR |
1284 | subtable = xzalloc(sizeof *subtable); |
1285 | hmap_init(&subtable->rules); | |
1286 | minimask_clone(&subtable->mask, mask); | |
476f36e8 JR |
1287 | |
1288 | /* Init indices for segmented lookup, if any. */ | |
1289 | flow_wildcards_init_catchall(&new); | |
1290 | old = new; | |
1291 | prev = 0; | |
1292 | for (i = 0; i < cls->n_flow_segments; i++) { | |
1293 | flow_wildcards_fold_minimask_range(&new, mask, prev, | |
1294 | cls->flow_segments[i]); | |
1295 | /* Add an index if it adds mask bits. */ | |
1296 | if (!flow_wildcards_equal(&new, &old)) { | |
1297 | hindex_init(&subtable->indices[index]); | |
1298 | subtable->index_ofs[index] = cls->flow_segments[i]; | |
1299 | index++; | |
1300 | old = new; | |
1301 | } | |
1302 | prev = cls->flow_segments[i]; | |
1303 | } | |
1304 | /* Check if the rest of the subtable's mask adds any bits, | |
1305 | * and remove the last index if it doesn't. */ | |
1306 | if (index > 0) { | |
1307 | flow_wildcards_fold_minimask_range(&new, mask, prev, FLOW_U32S); | |
1308 | if (flow_wildcards_equal(&new, &old)) { | |
1309 | --index; | |
1310 | subtable->index_ofs[index] = 0; | |
1311 | hindex_destroy(&subtable->indices[index]); | |
1312 | } | |
1313 | } | |
1314 | subtable->n_indices = index; | |
1315 | ||
03868246 JR |
1316 | subtable->tag = (minimask_get_metadata_mask(mask) == OVS_BE64_MAX |
1317 | ? tag_create_deterministic(hash) | |
1318 | : TAG_ALL); | |
064af421 | 1319 | |
13751fd8 JR |
1320 | for (i = 0; i < cls->n_tries; i++) { |
1321 | subtable->trie_plen[i] = minimask_get_prefix_len(mask, | |
1322 | cls->tries[i].field); | |
1323 | } | |
1324 | ||
ec988646 JR |
1325 | hmap_insert(&cls->subtables, &subtable->hmap_node, hash); |
1326 | elem.subtable = subtable; | |
1327 | elem.mask_values = subtable->mask.masks.values; | |
1328 | elem.tag = subtable->tag; | |
1329 | elem.max_priority = subtable->max_priority; | |
1330 | cls_subtable_cache_push_back(&cls->subtables_priority, elem); | |
1331 | ||
03868246 | 1332 | return subtable; |
064af421 BP |
1333 | } |
1334 | ||
b5d97350 | 1335 | static void |
cabd4c43 | 1336 | destroy_subtable(struct cls_classifier *cls, struct cls_subtable *subtable) |
b5d97350 | 1337 | { |
476f36e8 | 1338 | int i; |
ec988646 JR |
1339 | struct cls_subtable *table = NULL; |
1340 | struct cls_subtable_entry *iter; | |
1341 | ||
1342 | CLS_SUBTABLE_CACHE_FOR_EACH (table, iter, &cls->subtables_priority) { | |
1343 | if (table == subtable) { | |
1344 | cls_subtable_cache_remove(&cls->subtables_priority, iter); | |
1345 | break; | |
1346 | } | |
1347 | } | |
476f36e8 JR |
1348 | |
1349 | for (i = 0; i < subtable->n_indices; i++) { | |
1350 | hindex_destroy(&subtable->indices[i]); | |
1351 | } | |
03868246 JR |
1352 | minimask_destroy(&subtable->mask); |
1353 | hmap_remove(&cls->subtables, &subtable->hmap_node); | |
1354 | hmap_destroy(&subtable->rules); | |
03868246 | 1355 | free(subtable); |
b5d97350 | 1356 | } |
064af421 | 1357 | |
03868246 JR |
1358 | /* This function performs the following updates for 'subtable' in 'cls' |
1359 | * following the addition of a new rule with priority 'new_priority' to | |
1360 | * 'subtable': | |
4aacd02d | 1361 | * |
03868246 | 1362 | * - Update 'subtable->max_priority' and 'subtable->max_count' if necessary. |
4aacd02d | 1363 | * |
03868246 | 1364 | * - Update 'subtable''s position in 'cls->subtables_priority' if necessary. |
4aacd02d BP |
1365 | * |
1366 | * This function should only be called after adding a new rule, not after | |
1367 | * replacing a rule by an identical one or modifying a rule in-place. */ | |
1368 | static void | |
cabd4c43 | 1369 | update_subtables_after_insertion(struct cls_classifier *cls, |
03868246 JR |
1370 | struct cls_subtable *subtable, |
1371 | unsigned int new_priority) | |
1372 | { | |
1373 | if (new_priority == subtable->max_priority) { | |
1374 | ++subtable->max_count; | |
1375 | } else if (new_priority > subtable->max_priority) { | |
ec988646 JR |
1376 | struct cls_subtable *table; |
1377 | struct cls_subtable_entry *iter, *subtable_iter = NULL; | |
03868246 JR |
1378 | |
1379 | subtable->max_priority = new_priority; | |
1380 | subtable->max_count = 1; | |
1381 | ||
1382 | /* Possibly move 'subtable' earlier in the priority list. If we break | |
ec988646 | 1383 | * out of the loop, then 'subtable_iter' should be moved just before |
03868246 | 1384 | * 'iter'. If the loop terminates normally, then 'iter' will be the |
ec988646 JR |
1385 | * first list element and we'll move subtable just before that |
1386 | * (e.g. to the front of the list). */ | |
1387 | CLS_SUBTABLE_CACHE_FOR_EACH_REVERSE (table, iter, &cls->subtables_priority) { | |
1388 | if (table == subtable) { | |
1389 | subtable_iter = iter; /* Locate the subtable as we go. */ | |
1390 | iter->max_priority = new_priority; | |
1391 | } else if (table->max_priority >= new_priority) { | |
1392 | ovs_assert(subtable_iter != NULL); | |
1393 | iter++; | |
4aacd02d BP |
1394 | break; |
1395 | } | |
1396 | } | |
1397 | ||
ec988646 JR |
1398 | /* Move 'subtable' just before 'iter' (unless it's already there). */ |
1399 | if (iter != subtable_iter) { | |
1400 | cls_subtable_cache_splice(iter, subtable_iter, subtable_iter + 1); | |
4aacd02d BP |
1401 | } |
1402 | } | |
1403 | } | |
1404 | ||
03868246 JR |
1405 | /* This function performs the following updates for 'subtable' in 'cls' |
1406 | * following the deletion of a rule with priority 'del_priority' from | |
1407 | * 'subtable': | |
4aacd02d | 1408 | * |
03868246 | 1409 | * - Update 'subtable->max_priority' and 'subtable->max_count' if necessary. |
4aacd02d | 1410 | * |
03868246 | 1411 | * - Update 'subtable''s position in 'cls->subtables_priority' if necessary. |
4aacd02d BP |
1412 | * |
1413 | * This function should only be called after removing a rule, not after | |
1414 | * replacing a rule by an identical one or modifying a rule in-place. */ | |
1415 | static void | |
cabd4c43 | 1416 | update_subtables_after_removal(struct cls_classifier *cls, |
03868246 JR |
1417 | struct cls_subtable *subtable, |
1418 | unsigned int del_priority) | |
4aacd02d | 1419 | { |
03868246 | 1420 | if (del_priority == subtable->max_priority && --subtable->max_count == 0) { |
627fb667 | 1421 | struct cls_match *head; |
ec988646 JR |
1422 | struct cls_subtable *table; |
1423 | struct cls_subtable_entry *iter, *subtable_iter = NULL; | |
4aacd02d | 1424 | |
03868246 JR |
1425 | subtable->max_priority = 0; |
1426 | HMAP_FOR_EACH (head, hmap_node, &subtable->rules) { | |
1427 | if (head->priority > subtable->max_priority) { | |
1428 | subtable->max_priority = head->priority; | |
1429 | subtable->max_count = 1; | |
1430 | } else if (head->priority == subtable->max_priority) { | |
1431 | ++subtable->max_count; | |
4aacd02d BP |
1432 | } |
1433 | } | |
1434 | ||
03868246 JR |
1435 | /* Possibly move 'subtable' later in the priority list. If we break |
1436 | * out of the loop, then 'subtable' should be moved just before that | |
1437 | * 'iter'. If the loop terminates normally, then 'iter' will be the | |
1438 | * list head and we'll move subtable just before that (e.g. to the back | |
1439 | * of the list). */ | |
ec988646 JR |
1440 | CLS_SUBTABLE_CACHE_FOR_EACH (table, iter, &cls->subtables_priority) { |
1441 | if (table == subtable) { | |
1442 | subtable_iter = iter; /* Locate the subtable as we go. */ | |
1443 | iter->max_priority = subtable->max_priority; | |
1444 | } else if (table->max_priority <= subtable->max_priority) { | |
1445 | ovs_assert(subtable_iter != NULL); | |
4aacd02d BP |
1446 | break; |
1447 | } | |
1448 | } | |
1449 | ||
03868246 | 1450 | /* Move 'subtable' just before 'iter' (unless it's already there). */ |
ec988646 JR |
1451 | if (iter != subtable_iter) { |
1452 | cls_subtable_cache_splice(iter, subtable_iter, subtable_iter + 1); | |
4aacd02d BP |
1453 | } |
1454 | } | |
1455 | } | |
1456 | ||
13751fd8 JR |
1457 | struct range { |
1458 | uint8_t start; | |
1459 | uint8_t end; | |
1460 | }; | |
1461 | ||
1462 | /* Return 'true' if can skip rest of the subtable based on the prefix trie | |
1463 | * lookup results. */ | |
1464 | static inline bool | |
1465 | check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries, | |
1466 | const unsigned int field_plen[CLS_MAX_TRIES], | |
1467 | const struct range ofs, const struct flow *flow, | |
1468 | struct flow_wildcards *wc) | |
1469 | { | |
1470 | int j; | |
1471 | ||
1472 | /* Check if we could avoid fully unwildcarding the next level of | |
1473 | * fields using the prefix tries. The trie checks are done only as | |
1474 | * needed to avoid folding in additional bits to the wildcards mask. */ | |
1475 | for (j = 0; j < n_tries; j++) { | |
1476 | /* Is the trie field relevant for this subtable? */ | |
1477 | if (field_plen[j]) { | |
1478 | struct trie_ctx *ctx = &trie_ctx[j]; | |
1479 | uint8_t be32ofs = ctx->be32ofs; | |
1480 | ||
1481 | /* Is the trie field within the current range of fields? */ | |
1482 | if (be32ofs >= ofs.start && be32ofs < ofs.end) { | |
1483 | /* On-demand trie lookup. */ | |
1484 | if (!ctx->lookup_done) { | |
1485 | ctx->match_plen = trie_lookup(ctx->trie, flow, | |
1486 | &ctx->maskbits); | |
1487 | ctx->lookup_done = true; | |
1488 | } | |
1489 | /* Possible to skip the rest of the subtable if subtable's | |
1490 | * prefix on the field is longer than what is known to match | |
1491 | * based on the trie lookup. */ | |
1492 | if (field_plen[j] > ctx->match_plen) { | |
1493 | /* RFC: We want the trie lookup to never result in | |
1494 | * unwildcarding any bits that would not be unwildcarded | |
1495 | * otherwise. Since the trie is shared by the whole | |
1496 | * classifier, it is possible that the 'maskbits' contain | |
1497 | * bits that are irrelevant for the partition of the | |
1498 | * classifier relevant for the current flow. */ | |
1499 | ||
1500 | /* Can skip if the field is already unwildcarded. */ | |
1501 | if (mask_prefix_bits_set(wc, be32ofs, ctx->maskbits)) { | |
1502 | return true; | |
1503 | } | |
1504 | /* Check that the trie result will not unwildcard more bits | |
1505 | * than this stage will. */ | |
1506 | if (ctx->maskbits <= field_plen[j]) { | |
1507 | /* Unwildcard the bits and skip the rest. */ | |
1508 | mask_set_prefix_bits(wc, be32ofs, ctx->maskbits); | |
1509 | /* Note: Prerequisite already unwildcarded, as the only | |
1510 | * prerequisite of the supported trie lookup fields is | |
1511 | * the ethertype, which is currently always | |
1512 | * unwildcarded. | |
1513 | */ | |
1514 | return true; | |
1515 | } | |
1516 | } | |
1517 | } | |
1518 | } | |
1519 | } | |
1520 | return false; | |
1521 | } | |
1522 | ||
627fb667 | 1523 | static inline struct cls_match * |
476f36e8 JR |
1524 | find_match(const struct cls_subtable *subtable, const struct flow *flow, |
1525 | uint32_t hash) | |
b5d97350 | 1526 | { |
627fb667 | 1527 | struct cls_match *rule; |
b5d97350 | 1528 | |
03868246 | 1529 | HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &subtable->rules) { |
df40c152 | 1530 | if (minimatch_matches_flow(&rule->match, flow)) { |
b5d97350 | 1531 | return rule; |
064af421 BP |
1532 | } |
1533 | } | |
c23740be | 1534 | |
064af421 BP |
1535 | return NULL; |
1536 | } | |
1537 | ||
627fb667 | 1538 | static struct cls_match * |
476f36e8 | 1539 | find_match_wc(const struct cls_subtable *subtable, const struct flow *flow, |
13751fd8 JR |
1540 | struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries, |
1541 | struct flow_wildcards *wc) | |
476f36e8 JR |
1542 | { |
1543 | uint32_t basis = 0, hash; | |
627fb667 | 1544 | struct cls_match *rule = NULL; |
476f36e8 | 1545 | int i; |
13751fd8 | 1546 | struct range ofs; |
476f36e8 | 1547 | |
ec988646 | 1548 | if (OVS_UNLIKELY(!wc)) { |
476f36e8 JR |
1549 | return find_match(subtable, flow, |
1550 | flow_hash_in_minimask(flow, &subtable->mask, 0)); | |
1551 | } | |
1552 | ||
13751fd8 | 1553 | ofs.start = 0; |
476f36e8 JR |
1554 | /* Try to finish early by checking fields in segments. */ |
1555 | for (i = 0; i < subtable->n_indices; i++) { | |
1556 | struct hindex_node *inode; | |
13751fd8 | 1557 | ofs.end = subtable->index_ofs[i]; |
476f36e8 | 1558 | |
13751fd8 JR |
1559 | if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow, |
1560 | wc)) { | |
1561 | goto range_out; | |
1562 | } | |
1563 | hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start, | |
1564 | ofs.end, &basis); | |
1565 | ofs.start = ofs.end; | |
476f36e8 JR |
1566 | inode = hindex_node_with_hash(&subtable->indices[i], hash); |
1567 | if (!inode) { | |
1568 | /* No match, can stop immediately, but must fold in the mask | |
1569 | * covered so far. */ | |
13751fd8 | 1570 | goto range_out; |
476f36e8 JR |
1571 | } |
1572 | ||
1573 | /* If we have narrowed down to a single rule already, check whether | |
1574 | * that rule matches. If it does match, then we're done. If it does | |
1575 | * not match, then we know that we will never get a match, but we do | |
1576 | * not yet know how many wildcards we need to fold into 'wc' so we | |
1577 | * continue iterating through indices to find that out. (We won't | |
1578 | * waste time calling minimatch_matches_flow() again because we've set | |
1579 | * 'rule' nonnull.) | |
1580 | * | |
1581 | * This check shows a measurable benefit with non-trivial flow tables. | |
1582 | * | |
1583 | * (Rare) hash collisions may cause us to miss the opportunity for this | |
1584 | * optimization. */ | |
1585 | if (!inode->s && !rule) { | |
1586 | ASSIGN_CONTAINER(rule, inode - i, index_nodes); | |
1587 | if (minimatch_matches_flow(&rule->match, flow)) { | |
1588 | goto out; | |
1589 | } | |
1590 | } | |
1591 | } | |
13751fd8 JR |
1592 | ofs.end = FLOW_U32S; |
1593 | /* Trie check for the final range. */ | |
1594 | if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow, wc)) { | |
1595 | goto range_out; | |
1596 | } | |
476f36e8 JR |
1597 | if (!rule) { |
1598 | /* Multiple potential matches exist, look for one. */ | |
13751fd8 JR |
1599 | hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start, |
1600 | ofs.end, &basis); | |
476f36e8 JR |
1601 | rule = find_match(subtable, flow, hash); |
1602 | } else { | |
1603 | /* We already narrowed the matching candidates down to just 'rule', | |
1604 | * but it didn't match. */ | |
1605 | rule = NULL; | |
1606 | } | |
1607 | out: | |
13751fd8 | 1608 | /* Must unwildcard all the fields, as they were looked at. */ |
476f36e8 JR |
1609 | flow_wildcards_fold_minimask(wc, &subtable->mask); |
1610 | return rule; | |
13751fd8 JR |
1611 | |
1612 | range_out: | |
1613 | /* Must unwildcard the fields looked up so far, if any. */ | |
1614 | if (ofs.start) { | |
1615 | flow_wildcards_fold_minimask_range(wc, &subtable->mask, 0, ofs.start); | |
1616 | } | |
1617 | return NULL; | |
476f36e8 JR |
1618 | } |
1619 | ||
627fb667 | 1620 | static struct cls_match * |
03868246 JR |
1621 | find_equal(struct cls_subtable *subtable, const struct miniflow *flow, |
1622 | uint32_t hash) | |
064af421 | 1623 | { |
627fb667 | 1624 | struct cls_match *head; |
064af421 | 1625 | |
03868246 | 1626 | HMAP_FOR_EACH_WITH_HASH (head, hmap_node, hash, &subtable->rules) { |
5cb7a798 | 1627 | if (miniflow_equal(&head->match.flow, flow)) { |
b5d97350 | 1628 | return head; |
064af421 BP |
1629 | } |
1630 | } | |
1631 | return NULL; | |
1632 | } | |
1633 | ||
627fb667 | 1634 | static struct cls_match * |
cabd4c43 | 1635 | insert_rule(struct cls_classifier *cls, struct cls_subtable *subtable, |
03868246 | 1636 | struct cls_rule *new) |
064af421 | 1637 | { |
627fb667 JR |
1638 | struct cls_match *cls_match = cls_match_alloc(new); |
1639 | struct cls_match *head; | |
1640 | struct cls_match *old = NULL; | |
476f36e8 JR |
1641 | int i; |
1642 | uint32_t basis = 0, hash; | |
13751fd8 | 1643 | uint8_t prev_be32ofs = 0; |
476f36e8 JR |
1644 | |
1645 | /* Add new node to segment indices. */ | |
1646 | for (i = 0; i < subtable->n_indices; i++) { | |
13751fd8 | 1647 | hash = minimatch_hash_range(&new->match, prev_be32ofs, |
476f36e8 | 1648 | subtable->index_ofs[i], &basis); |
627fb667 | 1649 | hindex_insert(&subtable->indices[i], &cls_match->index_nodes[i], hash); |
13751fd8 | 1650 | prev_be32ofs = subtable->index_ofs[i]; |
476f36e8 | 1651 | } |
13751fd8 | 1652 | hash = minimatch_hash_range(&new->match, prev_be32ofs, FLOW_U32S, &basis); |
476f36e8 | 1653 | head = find_equal(subtable, &new->match.flow, hash); |
b5d97350 | 1654 | if (!head) { |
627fb667 JR |
1655 | hmap_insert(&subtable->rules, &cls_match->hmap_node, hash); |
1656 | list_init(&cls_match->list); | |
4aacd02d | 1657 | goto out; |
b5d97350 BP |
1658 | } else { |
1659 | /* Scan the list for the insertion point that will keep the list in | |
1660 | * order of decreasing priority. */ | |
627fb667 | 1661 | struct cls_match *rule; |
476f36e8 | 1662 | |
627fb667 | 1663 | cls_match->hmap_node.hash = hash; /* Otherwise done by hmap_insert. */ |
476f36e8 | 1664 | |
b5d97350 | 1665 | FOR_EACH_RULE_IN_LIST (rule, head) { |
627fb667 | 1666 | if (cls_match->priority >= rule->priority) { |
b5d97350 BP |
1667 | if (rule == head) { |
1668 | /* 'new' is the new highest-priority flow in the list. */ | |
03868246 | 1669 | hmap_replace(&subtable->rules, |
627fb667 | 1670 | &rule->hmap_node, &cls_match->hmap_node); |
b5d97350 | 1671 | } |
064af421 | 1672 | |
627fb667 JR |
1673 | if (cls_match->priority == rule->priority) { |
1674 | list_replace(&cls_match->list, &rule->list); | |
4aacd02d BP |
1675 | old = rule; |
1676 | goto out; | |
b5d97350 | 1677 | } else { |
627fb667 | 1678 | list_insert(&rule->list, &cls_match->list); |
4aacd02d | 1679 | goto out; |
b5d97350 BP |
1680 | } |
1681 | } | |
1682 | } | |
064af421 | 1683 | |
b5d97350 | 1684 | /* Insert 'new' at the end of the list. */ |
627fb667 | 1685 | list_push_back(&head->list, &cls_match->list); |
064af421 | 1686 | } |
4aacd02d BP |
1687 | |
1688 | out: | |
1689 | if (!old) { | |
627fb667 | 1690 | update_subtables_after_insertion(cls, subtable, cls_match->priority); |
476f36e8 JR |
1691 | } else { |
1692 | /* Remove old node from indices. */ | |
1693 | for (i = 0; i < subtable->n_indices; i++) { | |
1694 | hindex_remove(&subtable->indices[i], &old->index_nodes[i]); | |
1695 | } | |
4aacd02d BP |
1696 | } |
1697 | return old; | |
064af421 BP |
1698 | } |
1699 | ||
627fb667 JR |
1700 | static struct cls_match * |
1701 | next_rule_in_list__(struct cls_match *rule) | |
064af421 | 1702 | { |
627fb667 | 1703 | struct cls_match *next = OBJECT_CONTAINING(rule->list.next, next, list); |
955f579d BP |
1704 | return next; |
1705 | } | |
1706 | ||
627fb667 JR |
1707 | static struct cls_match * |
1708 | next_rule_in_list(struct cls_match *rule) | |
955f579d | 1709 | { |
627fb667 | 1710 | struct cls_match *next = next_rule_in_list__(rule); |
b5d97350 | 1711 | return next->priority < rule->priority ? next : NULL; |
064af421 | 1712 | } |
13751fd8 JR |
1713 | \f |
1714 | /* A longest-prefix match tree. */ | |
1715 | struct trie_node { | |
1716 | uint32_t prefix; /* Prefix bits for this node, MSB first. */ | |
1717 | uint8_t nbits; /* Never zero, except for the root node. */ | |
1718 | unsigned int n_rules; /* Number of rules that have this prefix. */ | |
1719 | struct trie_node *edges[2]; /* Both NULL if leaf. */ | |
1720 | }; | |
1721 | ||
1722 | /* Max bits per node. Must fit in struct trie_node's 'prefix'. | |
1723 | * Also tested with 16, 8, and 5 to stress the implementation. */ | |
1724 | #define TRIE_PREFIX_BITS 32 | |
1725 | ||
1726 | /* Return at least 'plen' bits of the 'prefix', starting at bit offset 'ofs'. | |
1727 | * Prefixes are in the network byte order, and the offset 0 corresponds to | |
1728 | * the most significant bit of the first byte. The offset can be read as | |
1729 | * "how many bits to skip from the start of the prefix starting at 'pr'". */ | |
1730 | static uint32_t | |
1731 | raw_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen) | |
1732 | { | |
1733 | uint32_t prefix; | |
1734 | ||
1735 | pr += ofs / 32; /* Where to start. */ | |
1736 | ofs %= 32; /* How many bits to skip at 'pr'. */ | |
1737 | ||
1738 | prefix = ntohl(*pr) << ofs; /* Get the first 32 - ofs bits. */ | |
1739 | if (plen > 32 - ofs) { /* Need more than we have already? */ | |
1740 | prefix |= ntohl(*++pr) >> (32 - ofs); | |
1741 | } | |
1742 | /* Return with possible unwanted bits at the end. */ | |
1743 | return prefix; | |
1744 | } | |
1745 | ||
1746 | /* Return min(TRIE_PREFIX_BITS, plen) bits of the 'prefix', starting at bit | |
1747 | * offset 'ofs'. Prefixes are in the network byte order, and the offset 0 | |
1748 | * corresponds to the most significant bit of the first byte. The offset can | |
1749 | * be read as "how many bits to skip from the start of the prefix starting at | |
1750 | * 'pr'". */ | |
1751 | static uint32_t | |
1752 | trie_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen) | |
1753 | { | |
1754 | if (!plen) { | |
1755 | return 0; | |
1756 | } | |
1757 | if (plen > TRIE_PREFIX_BITS) { | |
1758 | plen = TRIE_PREFIX_BITS; /* Get at most TRIE_PREFIX_BITS. */ | |
1759 | } | |
1760 | /* Return with unwanted bits cleared. */ | |
1761 | return raw_get_prefix(pr, ofs, plen) & ~0u << (32 - plen); | |
1762 | } | |
1763 | ||
1764 | /* Return the number of equal bits in 'nbits' of 'prefix's MSBs and a 'value' | |
1765 | * starting at "MSB 0"-based offset 'ofs'. */ | |
1766 | static unsigned int | |
1767 | prefix_equal_bits(uint32_t prefix, unsigned int nbits, const ovs_be32 value[], | |
1768 | unsigned int ofs) | |
1769 | { | |
1770 | uint64_t diff = prefix ^ raw_get_prefix(value, ofs, nbits); | |
1771 | /* Set the bit after the relevant bits to limit the result. */ | |
1772 | return raw_clz64(diff << 32 | UINT64_C(1) << (63 - nbits)); | |
1773 | } | |
1774 | ||
1775 | /* Return the number of equal bits in 'node' prefix and a 'prefix' of length | |
1776 | * 'plen', starting at "MSB 0"-based offset 'ofs'. */ | |
1777 | static unsigned int | |
1778 | trie_prefix_equal_bits(const struct trie_node *node, const ovs_be32 prefix[], | |
1779 | unsigned int ofs, unsigned int plen) | |
1780 | { | |
1781 | return prefix_equal_bits(node->prefix, MIN(node->nbits, plen - ofs), | |
1782 | prefix, ofs); | |
1783 | } | |
1784 | ||
1785 | /* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' can | |
1786 | * be greater than 31. */ | |
1787 | static unsigned int | |
1788 | be_get_bit_at(const ovs_be32 value[], unsigned int ofs) | |
1789 | { | |
1790 | return (((const uint8_t *)value)[ofs / 8] >> (7 - ofs % 8)) & 1u; | |
1791 | } | |
1792 | ||
1793 | /* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' must | |
1794 | * be between 0 and 31, inclusive. */ | |
1795 | static unsigned int | |
1796 | get_bit_at(const uint32_t prefix, unsigned int ofs) | |
1797 | { | |
1798 | return (prefix >> (31 - ofs)) & 1u; | |
1799 | } | |
1800 | ||
1801 | /* Create new branch. */ | |
1802 | static struct trie_node * | |
1803 | trie_branch_create(const ovs_be32 *prefix, unsigned int ofs, unsigned int plen, | |
1804 | unsigned int n_rules) | |
1805 | { | |
1806 | struct trie_node *node = xmalloc(sizeof *node); | |
1807 | ||
1808 | node->prefix = trie_get_prefix(prefix, ofs, plen); | |
1809 | ||
1810 | if (plen <= TRIE_PREFIX_BITS) { | |
1811 | node->nbits = plen; | |
1812 | node->edges[0] = NULL; | |
1813 | node->edges[1] = NULL; | |
1814 | node->n_rules = n_rules; | |
1815 | } else { /* Need intermediate nodes. */ | |
1816 | struct trie_node *subnode = trie_branch_create(prefix, | |
1817 | ofs + TRIE_PREFIX_BITS, | |
1818 | plen - TRIE_PREFIX_BITS, | |
1819 | n_rules); | |
1820 | int bit = get_bit_at(subnode->prefix, 0); | |
1821 | node->nbits = TRIE_PREFIX_BITS; | |
1822 | node->edges[bit] = subnode; | |
1823 | node->edges[!bit] = NULL; | |
1824 | node->n_rules = 0; | |
1825 | } | |
1826 | return node; | |
1827 | } | |
1828 | ||
1829 | static void | |
1830 | trie_node_destroy(struct trie_node *node) | |
1831 | { | |
1832 | free(node); | |
1833 | } | |
1834 | ||
1835 | static void | |
1836 | trie_destroy(struct trie_node *node) | |
1837 | { | |
1838 | if (node) { | |
1839 | trie_destroy(node->edges[0]); | |
1840 | trie_destroy(node->edges[1]); | |
1841 | free(node); | |
1842 | } | |
1843 | } | |
1844 | ||
1845 | static bool | |
1846 | trie_is_leaf(const struct trie_node *trie) | |
1847 | { | |
1848 | return !trie->edges[0] && !trie->edges[1]; /* No children. */ | |
1849 | } | |
1850 | ||
1851 | static void | |
1852 | mask_set_prefix_bits(struct flow_wildcards *wc, uint8_t be32ofs, | |
1853 | unsigned int nbits) | |
1854 | { | |
1855 | ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs]; | |
1856 | unsigned int i; | |
1857 | ||
1858 | for (i = 0; i < nbits / 32; i++) { | |
1859 | mask[i] = OVS_BE32_MAX; | |
1860 | } | |
1861 | if (nbits % 32) { | |
1862 | mask[i] |= htonl(~0u << (32 - nbits % 32)); | |
1863 | } | |
1864 | } | |
1865 | ||
1866 | static bool | |
1867 | mask_prefix_bits_set(const struct flow_wildcards *wc, uint8_t be32ofs, | |
1868 | unsigned int nbits) | |
1869 | { | |
1870 | ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs]; | |
1871 | unsigned int i; | |
1872 | ovs_be32 zeroes = 0; | |
1873 | ||
1874 | for (i = 0; i < nbits / 32; i++) { | |
1875 | zeroes |= ~mask[i]; | |
1876 | } | |
1877 | if (nbits % 32) { | |
1878 | zeroes |= ~mask[i] & htonl(~0u << (32 - nbits % 32)); | |
1879 | } | |
1880 | ||
1881 | return !zeroes; /* All 'nbits' bits set. */ | |
1882 | } | |
1883 | ||
1884 | static struct trie_node ** | |
1885 | trie_next_edge(struct trie_node *node, const ovs_be32 value[], | |
1886 | unsigned int ofs) | |
1887 | { | |
1888 | return node->edges + be_get_bit_at(value, ofs); | |
1889 | } | |
1890 | ||
1891 | static const struct trie_node * | |
1892 | trie_next_node(const struct trie_node *node, const ovs_be32 value[], | |
1893 | unsigned int ofs) | |
1894 | { | |
1895 | return node->edges[be_get_bit_at(value, ofs)]; | |
1896 | } | |
1897 | ||
1898 | /* Return the prefix mask length necessary to find the longest-prefix match for | |
1899 | * the '*value' in the prefix tree 'node'. | |
1900 | * '*checkbits' is set to the number of bits in the prefix mask necessary to | |
1901 | * determine a mismatch, in case there are longer prefixes in the tree below | |
1902 | * the one that matched. | |
1903 | */ | |
1904 | static unsigned int | |
1905 | trie_lookup_value(const struct trie_node *node, const ovs_be32 value[], | |
1906 | unsigned int *checkbits) | |
1907 | { | |
1908 | unsigned int plen = 0, match_len = 0; | |
1909 | const struct trie_node *prev = NULL; | |
1910 | ||
1911 | for (; node; prev = node, node = trie_next_node(node, value, plen)) { | |
1912 | unsigned int eqbits; | |
1913 | /* Check if this edge can be followed. */ | |
1914 | eqbits = prefix_equal_bits(node->prefix, node->nbits, value, plen); | |
1915 | plen += eqbits; | |
1916 | if (eqbits < node->nbits) { /* Mismatch, nothing more to be found. */ | |
1917 | /* Bit at offset 'plen' differed. */ | |
1918 | *checkbits = plen + 1; /* Includes the first mismatching bit. */ | |
1919 | return match_len; | |
1920 | } | |
1921 | /* Full match, check if rules exist at this prefix length. */ | |
1922 | if (node->n_rules > 0) { | |
1923 | match_len = plen; | |
1924 | } | |
1925 | } | |
1926 | /* Dead end, exclude the other branch if it exists. */ | |
1927 | *checkbits = !prev || trie_is_leaf(prev) ? plen : plen + 1; | |
1928 | return match_len; | |
1929 | } | |
1930 | ||
1931 | static unsigned int | |
1932 | trie_lookup(const struct cls_trie *trie, const struct flow *flow, | |
1933 | unsigned int *checkbits) | |
1934 | { | |
1935 | const struct mf_field *mf = trie->field; | |
1936 | ||
1937 | /* Check that current flow matches the prerequisites for the trie | |
1938 | * field. Some match fields are used for multiple purposes, so we | |
1939 | * must check that the trie is relevant for this flow. */ | |
1940 | if (mf_are_prereqs_ok(mf, flow)) { | |
1941 | return trie_lookup_value(trie->root, | |
1942 | &((ovs_be32 *)flow)[mf->flow_be32ofs], | |
1943 | checkbits); | |
1944 | } | |
1945 | *checkbits = 0; /* Value not used in this case. */ | |
1946 | return UINT_MAX; | |
1947 | } | |
1948 | ||
1949 | /* Returns the length of a prefix match mask for the field 'mf' in 'minimask'. | |
1950 | * Returns the u32 offset to the miniflow data in '*miniflow_index', if | |
1951 | * 'miniflow_index' is not NULL. */ | |
1952 | static unsigned int | |
1953 | minimask_get_prefix_len(const struct minimask *minimask, | |
1954 | const struct mf_field *mf) | |
1955 | { | |
1956 | unsigned int nbits = 0, mask_tz = 0; /* Non-zero when end of mask seen. */ | |
1957 | uint8_t u32_ofs = mf->flow_be32ofs; | |
1958 | uint8_t u32_end = u32_ofs + mf->n_bytes / 4; | |
1959 | ||
1960 | for (; u32_ofs < u32_end; ++u32_ofs) { | |
1961 | uint32_t mask; | |
1962 | mask = ntohl((OVS_FORCE ovs_be32)minimask_get(minimask, u32_ofs)); | |
1963 | ||
1964 | /* Validate mask, count the mask length. */ | |
1965 | if (mask_tz) { | |
1966 | if (mask) { | |
1967 | return 0; /* No bits allowed after mask ended. */ | |
1968 | } | |
1969 | } else { | |
1970 | if (~mask & (~mask + 1)) { | |
1971 | return 0; /* Mask not contiguous. */ | |
1972 | } | |
1973 | mask_tz = ctz32(mask); | |
1974 | nbits += 32 - mask_tz; | |
1975 | } | |
1976 | } | |
1977 | ||
1978 | return nbits; | |
1979 | } | |
1980 | ||
1981 | /* | |
1982 | * This is called only when mask prefix is known to be CIDR and non-zero. | |
1983 | * Relies on the fact that the flow and mask have the same map, and since | |
1984 | * the mask is CIDR, the storage for the flow field exists even if it | |
1985 | * happened to be zeros. | |
1986 | */ | |
1987 | static const ovs_be32 * | |
1988 | minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf) | |
1989 | { | |
3d91d909 | 1990 | return match->flow.values + |
13751fd8 JR |
1991 | count_1bits(match->flow.map & ((UINT64_C(1) << mf->flow_be32ofs) - 1)); |
1992 | } | |
1993 | ||
1994 | /* Insert rule in to the prefix tree. | |
1995 | * 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask | |
1996 | * in 'rule'. */ | |
1997 | static void | |
1998 | trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen) | |
1999 | { | |
2000 | const ovs_be32 *prefix = minimatch_get_prefix(&rule->match, trie->field); | |
2001 | struct trie_node *node; | |
2002 | struct trie_node **edge; | |
2003 | int ofs = 0; | |
2004 | ||
2005 | /* Walk the tree. */ | |
2006 | for (edge = &trie->root; | |
2007 | (node = *edge) != NULL; | |
2008 | edge = trie_next_edge(node, prefix, ofs)) { | |
2009 | unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen); | |
2010 | ofs += eqbits; | |
2011 | if (eqbits < node->nbits) { | |
2012 | /* Mismatch, new node needs to be inserted above. */ | |
2013 | int old_branch = get_bit_at(node->prefix, eqbits); | |
2014 | ||
2015 | /* New parent node. */ | |
2016 | *edge = trie_branch_create(prefix, ofs - eqbits, eqbits, | |
2017 | ofs == mlen ? 1 : 0); | |
2018 | ||
2019 | /* Adjust old node for its new position in the tree. */ | |
2020 | node->prefix <<= eqbits; | |
2021 | node->nbits -= eqbits; | |
2022 | (*edge)->edges[old_branch] = node; | |
2023 | ||
2024 | /* Check if need a new branch for the new rule. */ | |
2025 | if (ofs < mlen) { | |
2026 | (*edge)->edges[!old_branch] | |
2027 | = trie_branch_create(prefix, ofs, mlen - ofs, 1); | |
2028 | } | |
2029 | return; | |
2030 | } | |
2031 | /* Full match so far. */ | |
2032 | ||
2033 | if (ofs == mlen) { | |
2034 | /* Full match at the current node, rule needs to be added here. */ | |
2035 | node->n_rules++; | |
2036 | return; | |
2037 | } | |
2038 | } | |
2039 | /* Must insert a new tree branch for the new rule. */ | |
2040 | *edge = trie_branch_create(prefix, ofs, mlen - ofs, 1); | |
2041 | } | |
2042 | ||
2043 | /* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask | |
2044 | * in 'rule'. */ | |
2045 | static void | |
2046 | trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen) | |
2047 | { | |
2048 | const ovs_be32 *prefix = minimatch_get_prefix(&rule->match, trie->field); | |
2049 | struct trie_node *node; | |
2050 | struct trie_node **edges[sizeof(union mf_value) * 8]; | |
2051 | int depth = 0, ofs = 0; | |
2052 | ||
2053 | /* Walk the tree. */ | |
2054 | for (edges[depth] = &trie->root; | |
2055 | (node = *edges[depth]) != NULL; | |
2056 | edges[++depth] = trie_next_edge(node, prefix, ofs)) { | |
2057 | unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen); | |
2058 | if (eqbits < node->nbits) { | |
2059 | /* Mismatch, nothing to be removed. This should never happen, as | |
2060 | * only rules in the classifier are ever removed. */ | |
2061 | break; /* Log a warning. */ | |
2062 | } | |
2063 | /* Full match so far. */ | |
2064 | ofs += eqbits; | |
2065 | ||
2066 | if (ofs == mlen) { | |
2067 | /* Full prefix match at the current node, remove rule here. */ | |
2068 | if (!node->n_rules) { | |
2069 | break; /* Log a warning. */ | |
2070 | } | |
2071 | node->n_rules--; | |
2072 | ||
2073 | /* Check if can prune the tree. */ | |
2074 | while (!node->n_rules && !(node->edges[0] && node->edges[1])) { | |
2075 | /* No rules and at most one child node, remove this node. */ | |
2076 | struct trie_node *next; | |
2077 | next = node->edges[0] ? node->edges[0] : node->edges[1]; | |
2078 | ||
2079 | if (next) { | |
2080 | if (node->nbits + next->nbits > TRIE_PREFIX_BITS) { | |
2081 | break; /* Cannot combine. */ | |
2082 | } | |
2083 | /* Combine node with next. */ | |
2084 | next->prefix = node->prefix | next->prefix >> node->nbits; | |
2085 | next->nbits += node->nbits; | |
2086 | } | |
2087 | trie_node_destroy(node); | |
2088 | /* Update the parent's edge. */ | |
2089 | *edges[depth] = next; | |
2090 | if (next || !depth) { | |
2091 | /* Branch not pruned or at root, nothing more to do. */ | |
2092 | break; | |
2093 | } | |
2094 | node = *edges[--depth]; | |
2095 | } | |
2096 | return; | |
2097 | } | |
2098 | } | |
2099 | /* Cannot go deeper. This should never happen, since only rules | |
2100 | * that actually exist in the classifier are ever removed. */ | |
2101 | VLOG_WARN("Trying to remove non-existing rule from a prefix trie."); | |
2102 | } |