]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
4aacd02d | 2 | * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. |
064af421 | 3 | * |
a14bc59f BP |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
064af421 | 7 | * |
a14bc59f BP |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | ||
17 | #include <config.h> | |
18 | #include "classifier.h" | |
064af421 BP |
19 | #include <errno.h> |
20 | #include <netinet/in.h> | |
844dff32 | 21 | #include "byte-order.h" |
68d1c8c3 | 22 | #include "dynamic-string.h" |
064af421 BP |
23 | #include "flow.h" |
24 | #include "hash.h" | |
07b37e8f | 25 | #include "odp-util.h" |
d8ae4d67 | 26 | #include "ofp-util.h" |
0b4f2078 | 27 | #include "ovs-thread.h" |
13751fd8 JR |
28 | #include "packets.h" |
29 | #include "vlog.h" | |
30 | ||
31 | VLOG_DEFINE_THIS_MODULE(classifier); | |
064af421 | 32 | |
13751fd8 | 33 | struct trie_ctx; |
03868246 JR |
34 | static struct cls_subtable *find_subtable(const struct classifier *, |
35 | const struct minimask *); | |
36 | static struct cls_subtable *insert_subtable(struct classifier *, | |
37 | const struct minimask *); | |
b5d97350 | 38 | |
03868246 | 39 | static void destroy_subtable(struct classifier *, struct cls_subtable *); |
b5d97350 | 40 | |
03868246 JR |
41 | static void update_subtables_after_insertion(struct classifier *, |
42 | struct cls_subtable *, | |
43 | unsigned int new_priority); | |
44 | static void update_subtables_after_removal(struct classifier *, | |
45 | struct cls_subtable *, | |
46 | unsigned int del_priority); | |
4aacd02d | 47 | |
476f36e8 | 48 | static struct cls_rule *find_match_wc(const struct cls_subtable *, |
13751fd8 JR |
49 | const struct flow *, struct trie_ctx *, |
50 | unsigned int n_tries, | |
476f36e8 | 51 | struct flow_wildcards *); |
03868246 | 52 | static struct cls_rule *find_equal(struct cls_subtable *, |
5cb7a798 | 53 | const struct miniflow *, uint32_t hash); |
4aacd02d | 54 | static struct cls_rule *insert_rule(struct classifier *, |
03868246 | 55 | struct cls_subtable *, struct cls_rule *); |
b5d97350 | 56 | |
b5d97350 BP |
57 | /* Iterates RULE over HEAD and all of the cls_rules on HEAD->list. */ |
58 | #define FOR_EACH_RULE_IN_LIST(RULE, HEAD) \ | |
59 | for ((RULE) = (HEAD); (RULE) != NULL; (RULE) = next_rule_in_list(RULE)) | |
60 | #define FOR_EACH_RULE_IN_LIST_SAFE(RULE, NEXT, HEAD) \ | |
61 | for ((RULE) = (HEAD); \ | |
62 | (RULE) != NULL && ((NEXT) = next_rule_in_list(RULE), true); \ | |
63 | (RULE) = (NEXT)) | |
64 | ||
955f579d | 65 | static struct cls_rule *next_rule_in_list__(struct cls_rule *); |
b5d97350 | 66 | static struct cls_rule *next_rule_in_list(struct cls_rule *); |
13751fd8 JR |
67 | |
68 | static unsigned int minimask_get_prefix_len(const struct minimask *, | |
69 | const struct mf_field *); | |
70 | static void trie_init(struct classifier *, int trie_idx, | |
71 | const struct mf_field *); | |
72 | static unsigned int trie_lookup(const struct cls_trie *, const struct flow *, | |
73 | unsigned int *checkbits); | |
74 | ||
75 | static void trie_destroy(struct trie_node *); | |
76 | static void trie_insert(struct cls_trie *, const struct cls_rule *, int mlen); | |
77 | static void trie_remove(struct cls_trie *, const struct cls_rule *, int mlen); | |
78 | static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t be32ofs, | |
79 | unsigned int nbits); | |
80 | static bool mask_prefix_bits_set(const struct flow_wildcards *, | |
81 | uint8_t be32ofs, unsigned int nbits); | |
3d91d909 JR |
82 | \f |
83 | /* flow/miniflow/minimask/minimatch utilities. | |
84 | * These are only used by the classifier, so place them here to allow | |
85 | * for better optimization. */ | |
86 | ||
87 | static inline uint64_t | |
88 | miniflow_get_map_in_range(const struct miniflow *miniflow, | |
89 | uint8_t start, uint8_t end, unsigned int *offset) | |
90 | { | |
91 | uint64_t map = miniflow->map; | |
92 | *offset = 0; | |
93 | ||
94 | if (start > 0) { | |
95 | uint64_t msk = (UINT64_C(1) << start) - 1; /* 'start' LSBs set */ | |
96 | *offset = count_1bits(map & msk); | |
97 | map &= ~msk; | |
98 | } | |
99 | if (end < FLOW_U32S) { | |
100 | uint64_t msk = (UINT64_C(1) << end) - 1; /* 'end' LSBs set */ | |
101 | map &= msk; | |
102 | } | |
103 | return map; | |
104 | } | |
105 | ||
106 | /* Returns a hash value for the bits of 'flow' where there are 1-bits in | |
107 | * 'mask', given 'basis'. | |
108 | * | |
109 | * The hash values returned by this function are the same as those returned by | |
110 | * miniflow_hash_in_minimask(), only the form of the arguments differ. */ | |
111 | static inline uint32_t | |
112 | flow_hash_in_minimask(const struct flow *flow, const struct minimask *mask, | |
113 | uint32_t basis) | |
114 | { | |
115 | const uint32_t *flow_u32 = (const uint32_t *)flow; | |
116 | const uint32_t *p = mask->masks.values; | |
117 | uint32_t hash; | |
118 | uint64_t map; | |
119 | ||
120 | hash = basis; | |
121 | for (map = mask->masks.map; map; map = zero_rightmost_1bit(map)) { | |
122 | hash = mhash_add(hash, flow_u32[raw_ctz(map)] & *p++); | |
123 | } | |
124 | ||
125 | return mhash_finish(hash, (p - mask->masks.values) * 4); | |
126 | } | |
127 | ||
128 | /* Returns a hash value for the bits of 'flow' where there are 1-bits in | |
129 | * 'mask', given 'basis'. | |
130 | * | |
131 | * The hash values returned by this function are the same as those returned by | |
132 | * flow_hash_in_minimask(), only the form of the arguments differ. */ | |
133 | static inline uint32_t | |
134 | miniflow_hash_in_minimask(const struct miniflow *flow, | |
135 | const struct minimask *mask, uint32_t basis) | |
136 | { | |
137 | const uint32_t *p = mask->masks.values; | |
138 | uint32_t hash = basis; | |
139 | uint32_t flow_u32; | |
140 | ||
141 | MINIFLOW_FOR_EACH_IN_MAP(flow_u32, flow, mask->masks.map) { | |
142 | hash = mhash_add(hash, flow_u32 & *p++); | |
143 | } | |
144 | ||
145 | return mhash_finish(hash, (p - mask->masks.values) * 4); | |
146 | } | |
147 | ||
148 | /* Returns a hash value for the bits of range [start, end) in 'flow', | |
149 | * where there are 1-bits in 'mask', given 'hash'. | |
150 | * | |
151 | * The hash values returned by this function are the same as those returned by | |
152 | * minimatch_hash_range(), only the form of the arguments differ. */ | |
153 | static inline uint32_t | |
154 | flow_hash_in_minimask_range(const struct flow *flow, | |
155 | const struct minimask *mask, | |
156 | uint8_t start, uint8_t end, uint32_t *basis) | |
157 | { | |
158 | const uint32_t *flow_u32 = (const uint32_t *)flow; | |
159 | unsigned int offset; | |
160 | uint64_t map = miniflow_get_map_in_range(&mask->masks, start, end, | |
161 | &offset); | |
162 | const uint32_t *p = mask->masks.values + offset; | |
163 | uint32_t hash = *basis; | |
164 | ||
165 | for (; map; map = zero_rightmost_1bit(map)) { | |
166 | hash = mhash_add(hash, flow_u32[raw_ctz(map)] & *p++); | |
167 | } | |
168 | ||
169 | *basis = hash; /* Allow continuation from the unfinished value. */ | |
170 | return mhash_finish(hash, (p - mask->masks.values) * 4); | |
171 | } | |
172 | ||
173 | /* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask. */ | |
174 | static inline void | |
175 | flow_wildcards_fold_minimask(struct flow_wildcards *wc, | |
176 | const struct minimask *mask) | |
177 | { | |
178 | flow_union_with_miniflow(&wc->masks, &mask->masks); | |
179 | } | |
180 | ||
181 | /* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask | |
182 | * in range [start, end). */ | |
183 | static inline void | |
184 | flow_wildcards_fold_minimask_range(struct flow_wildcards *wc, | |
185 | const struct minimask *mask, | |
186 | uint8_t start, uint8_t end) | |
187 | { | |
188 | uint32_t *dst_u32 = (uint32_t *)&wc->masks; | |
189 | unsigned int offset; | |
190 | uint64_t map = miniflow_get_map_in_range(&mask->masks, start, end, | |
191 | &offset); | |
192 | const uint32_t *p = mask->masks.values + offset; | |
193 | ||
194 | for (; map; map = zero_rightmost_1bit(map)) { | |
195 | dst_u32[raw_ctz(map)] |= *p++; | |
196 | } | |
197 | } | |
198 | ||
199 | /* Returns a hash value for 'flow', given 'basis'. */ | |
200 | static inline uint32_t | |
201 | miniflow_hash(const struct miniflow *flow, uint32_t basis) | |
202 | { | |
203 | const uint32_t *p = flow->values; | |
204 | uint32_t hash = basis; | |
205 | uint64_t hash_map = 0; | |
206 | uint64_t map; | |
207 | ||
208 | for (map = flow->map; map; map = zero_rightmost_1bit(map)) { | |
209 | if (*p) { | |
210 | hash = mhash_add(hash, *p); | |
211 | hash_map |= rightmost_1bit(map); | |
212 | } | |
213 | p++; | |
214 | } | |
215 | hash = mhash_add(hash, hash_map); | |
216 | hash = mhash_add(hash, hash_map >> 32); | |
217 | ||
218 | return mhash_finish(hash, p - flow->values); | |
219 | } | |
220 | ||
221 | /* Returns a hash value for 'mask', given 'basis'. */ | |
222 | static inline uint32_t | |
223 | minimask_hash(const struct minimask *mask, uint32_t basis) | |
224 | { | |
225 | return miniflow_hash(&mask->masks, basis); | |
226 | } | |
227 | ||
228 | /* Returns a hash value for 'match', given 'basis'. */ | |
229 | static inline uint32_t | |
230 | minimatch_hash(const struct minimatch *match, uint32_t basis) | |
231 | { | |
232 | return miniflow_hash(&match->flow, minimask_hash(&match->mask, basis)); | |
233 | } | |
234 | ||
235 | /* Returns a hash value for the bits of range [start, end) in 'minimatch', | |
236 | * given 'basis'. | |
237 | * | |
238 | * The hash values returned by this function are the same as those returned by | |
239 | * flow_hash_in_minimask_range(), only the form of the arguments differ. */ | |
240 | static inline uint32_t | |
241 | minimatch_hash_range(const struct minimatch *match, uint8_t start, uint8_t end, | |
242 | uint32_t *basis) | |
243 | { | |
244 | unsigned int offset; | |
245 | const uint32_t *p, *q; | |
246 | uint32_t hash = *basis; | |
247 | int n, i; | |
248 | ||
249 | n = count_1bits(miniflow_get_map_in_range(&match->mask.masks, start, end, | |
250 | &offset)); | |
251 | q = match->mask.masks.values + offset; | |
252 | p = match->flow.values + offset; | |
253 | ||
254 | for (i = 0; i < n; i++) { | |
255 | hash = mhash_add(hash, p[i] & q[i]); | |
256 | } | |
257 | *basis = hash; /* Allow continuation from the unfinished value. */ | |
258 | return mhash_finish(hash, (offset + n) * 4); | |
259 | } | |
260 | ||
81a76618 BP |
261 | \f |
262 | /* cls_rule. */ | |
b5d97350 | 263 | |
81a76618 | 264 | /* Initializes 'rule' to match packets specified by 'match' at the given |
5cb7a798 BP |
265 | * 'priority'. 'match' must satisfy the invariant described in the comment at |
266 | * the definition of struct match. | |
66642cb4 | 267 | * |
48d28ac1 BP |
268 | * The caller must eventually destroy 'rule' with cls_rule_destroy(). |
269 | * | |
81a76618 BP |
270 | * (OpenFlow uses priorities between 0 and UINT16_MAX, inclusive, but |
271 | * internally Open vSwitch supports a wider range.) */ | |
47284b1f | 272 | void |
81a76618 BP |
273 | cls_rule_init(struct cls_rule *rule, |
274 | const struct match *match, unsigned int priority) | |
47284b1f | 275 | { |
5cb7a798 BP |
276 | minimatch_init(&rule->match, match); |
277 | rule->priority = priority; | |
278 | } | |
279 | ||
280 | /* Same as cls_rule_init() for initialization from a "struct minimatch". */ | |
281 | void | |
282 | cls_rule_init_from_minimatch(struct cls_rule *rule, | |
283 | const struct minimatch *match, | |
284 | unsigned int priority) | |
285 | { | |
286 | minimatch_clone(&rule->match, match); | |
81a76618 | 287 | rule->priority = priority; |
685a51a5 JP |
288 | } |
289 | ||
48d28ac1 BP |
290 | /* Initializes 'dst' as a copy of 'src'. |
291 | * | |
b2c1f00b | 292 | * The caller must eventually destroy 'dst' with cls_rule_destroy(). */ |
48d28ac1 BP |
293 | void |
294 | cls_rule_clone(struct cls_rule *dst, const struct cls_rule *src) | |
295 | { | |
5cb7a798 BP |
296 | minimatch_clone(&dst->match, &src->match); |
297 | dst->priority = src->priority; | |
48d28ac1 BP |
298 | } |
299 | ||
b2c1f00b BP |
300 | /* Initializes 'dst' with the data in 'src', destroying 'src'. |
301 | * | |
302 | * The caller must eventually destroy 'dst' with cls_rule_destroy(). */ | |
303 | void | |
304 | cls_rule_move(struct cls_rule *dst, struct cls_rule *src) | |
305 | { | |
306 | minimatch_move(&dst->match, &src->match); | |
307 | dst->priority = src->priority; | |
308 | } | |
309 | ||
48d28ac1 BP |
310 | /* Frees memory referenced by 'rule'. Doesn't free 'rule' itself (it's |
311 | * normally embedded into a larger structure). | |
312 | * | |
313 | * ('rule' must not currently be in a classifier.) */ | |
314 | void | |
5cb7a798 | 315 | cls_rule_destroy(struct cls_rule *rule) |
48d28ac1 | 316 | { |
5cb7a798 | 317 | minimatch_destroy(&rule->match); |
48d28ac1 BP |
318 | } |
319 | ||
81a76618 BP |
320 | /* Returns true if 'a' and 'b' match the same packets at the same priority, |
321 | * false if they differ in some way. */ | |
193eb874 BP |
322 | bool |
323 | cls_rule_equal(const struct cls_rule *a, const struct cls_rule *b) | |
324 | { | |
5cb7a798 | 325 | return a->priority == b->priority && minimatch_equal(&a->match, &b->match); |
193eb874 BP |
326 | } |
327 | ||
81a76618 | 328 | /* Returns a hash value for 'rule', folding in 'basis'. */ |
57452fdc BP |
329 | uint32_t |
330 | cls_rule_hash(const struct cls_rule *rule, uint32_t basis) | |
331 | { | |
5cb7a798 | 332 | return minimatch_hash(&rule->match, hash_int(rule->priority, basis)); |
73f33563 BP |
333 | } |
334 | ||
81a76618 | 335 | /* Appends a string describing 'rule' to 's'. */ |
07b37e8f BP |
336 | void |
337 | cls_rule_format(const struct cls_rule *rule, struct ds *s) | |
338 | { | |
5cb7a798 | 339 | minimatch_format(&rule->match, s, rule->priority); |
064af421 | 340 | } |
3ca1de08 BP |
341 | |
342 | /* Returns true if 'rule' matches every packet, false otherwise. */ | |
343 | bool | |
344 | cls_rule_is_catchall(const struct cls_rule *rule) | |
345 | { | |
5cb7a798 | 346 | return minimask_is_catchall(&rule->match.mask); |
3ca1de08 | 347 | } |
064af421 BP |
348 | \f |
349 | /* Initializes 'cls' as a classifier that initially contains no classification | |
350 | * rules. */ | |
351 | void | |
476f36e8 | 352 | classifier_init(struct classifier *cls, const uint8_t *flow_segments) |
064af421 | 353 | { |
064af421 | 354 | cls->n_rules = 0; |
03868246 JR |
355 | hmap_init(&cls->subtables); |
356 | list_init(&cls->subtables_priority); | |
c906cedf | 357 | hmap_init(&cls->partitions); |
06f81620 | 358 | fat_rwlock_init(&cls->rwlock); |
476f36e8 JR |
359 | cls->n_flow_segments = 0; |
360 | if (flow_segments) { | |
361 | while (cls->n_flow_segments < CLS_MAX_INDICES | |
362 | && *flow_segments < FLOW_U32S) { | |
363 | cls->flow_segments[cls->n_flow_segments++] = *flow_segments++; | |
364 | } | |
365 | } | |
13751fd8 | 366 | cls->n_tries = 0; |
064af421 BP |
367 | } |
368 | ||
369 | /* Destroys 'cls'. Rules within 'cls', if any, are not freed; this is the | |
370 | * caller's responsibility. */ | |
371 | void | |
372 | classifier_destroy(struct classifier *cls) | |
373 | { | |
374 | if (cls) { | |
03868246 JR |
375 | struct cls_subtable *partition, *next_partition; |
376 | struct cls_subtable *subtable, *next_subtable; | |
13751fd8 JR |
377 | int i; |
378 | ||
379 | for (i = 0; i < cls->n_tries; i++) { | |
380 | trie_destroy(cls->tries[i].root); | |
381 | } | |
064af421 | 382 | |
03868246 JR |
383 | HMAP_FOR_EACH_SAFE (subtable, next_subtable, hmap_node, |
384 | &cls->subtables) { | |
385 | destroy_subtable(cls, subtable); | |
064af421 | 386 | } |
03868246 | 387 | hmap_destroy(&cls->subtables); |
c906cedf BP |
388 | |
389 | HMAP_FOR_EACH_SAFE (partition, next_partition, hmap_node, | |
390 | &cls->partitions) { | |
391 | hmap_remove(&cls->partitions, &partition->hmap_node); | |
392 | free(partition); | |
393 | } | |
394 | hmap_destroy(&cls->partitions); | |
06f81620 | 395 | fat_rwlock_destroy(&cls->rwlock); |
064af421 BP |
396 | } |
397 | } | |
398 | ||
13751fd8 JR |
399 | /* We use uint64_t as a set for the fields below. */ |
400 | BUILD_ASSERT_DECL(MFF_N_IDS <= 64); | |
401 | ||
402 | /* Set the fields for which prefix lookup should be performed. */ | |
403 | void | |
404 | classifier_set_prefix_fields(struct classifier *cls, | |
405 | const enum mf_field_id *trie_fields, | |
406 | unsigned int n_fields) | |
407 | { | |
408 | uint64_t fields = 0; | |
409 | int i, trie; | |
410 | ||
411 | for (i = 0, trie = 0; i < n_fields && trie < CLS_MAX_TRIES; i++) { | |
412 | const struct mf_field *field = mf_from_id(trie_fields[i]); | |
413 | if (field->flow_be32ofs < 0 || field->n_bits % 32) { | |
414 | /* Incompatible field. This is the only place where we | |
415 | * enforce these requirements, but the rest of the trie code | |
416 | * depends on the flow_be32ofs to be non-negative and the | |
417 | * field length to be a multiple of 32 bits. */ | |
418 | continue; | |
419 | } | |
420 | ||
421 | if (fields & (UINT64_C(1) << trie_fields[i])) { | |
422 | /* Duplicate field, there is no need to build more than | |
423 | * one index for any one field. */ | |
424 | continue; | |
425 | } | |
426 | fields |= UINT64_C(1) << trie_fields[i]; | |
427 | ||
428 | if (trie >= cls->n_tries || field != cls->tries[trie].field) { | |
429 | trie_init(cls, trie, field); | |
430 | } | |
431 | trie++; | |
432 | } | |
433 | ||
434 | /* Destroy the rest. */ | |
435 | for (i = trie; i < cls->n_tries; i++) { | |
436 | trie_init(cls, i, NULL); | |
437 | } | |
438 | cls->n_tries = trie; | |
439 | } | |
440 | ||
441 | static void | |
442 | trie_init(struct classifier *cls, int trie_idx, | |
443 | const struct mf_field *field) | |
444 | { | |
445 | struct cls_trie *trie = &cls->tries[trie_idx]; | |
446 | struct cls_subtable *subtable; | |
447 | ||
448 | if (trie_idx < cls->n_tries) { | |
449 | trie_destroy(trie->root); | |
450 | } | |
451 | trie->root = NULL; | |
452 | trie->field = field; | |
453 | ||
454 | /* Add existing rules to the trie. */ | |
455 | LIST_FOR_EACH (subtable, list_node, &cls->subtables_priority) { | |
456 | unsigned int plen; | |
457 | ||
458 | plen = field ? minimask_get_prefix_len(&subtable->mask, field) : 0; | |
459 | /* Initialize subtable's prefix length on this field. */ | |
460 | subtable->trie_plen[trie_idx] = plen; | |
461 | ||
462 | if (plen) { | |
463 | struct cls_rule *head; | |
464 | ||
465 | HMAP_FOR_EACH (head, hmap_node, &subtable->rules) { | |
466 | struct cls_rule *rule; | |
467 | ||
468 | FOR_EACH_RULE_IN_LIST (rule, head) { | |
469 | trie_insert(trie, rule, plen); | |
470 | } | |
471 | } | |
472 | } | |
473 | } | |
474 | } | |
475 | ||
b5d97350 | 476 | /* Returns true if 'cls' contains no classification rules, false otherwise. */ |
064af421 BP |
477 | bool |
478 | classifier_is_empty(const struct classifier *cls) | |
479 | { | |
480 | return cls->n_rules == 0; | |
481 | } | |
482 | ||
dbda2960 | 483 | /* Returns the number of rules in 'cls'. */ |
064af421 BP |
484 | int |
485 | classifier_count(const struct classifier *cls) | |
486 | { | |
487 | return cls->n_rules; | |
488 | } | |
489 | ||
c906cedf BP |
490 | static uint32_t |
491 | hash_metadata(ovs_be64 metadata_) | |
492 | { | |
493 | uint64_t metadata = (OVS_FORCE uint64_t) metadata_; | |
965607c8 | 494 | return hash_uint64(metadata); |
c906cedf BP |
495 | } |
496 | ||
497 | static struct cls_partition * | |
498 | find_partition(const struct classifier *cls, ovs_be64 metadata, uint32_t hash) | |
499 | { | |
500 | struct cls_partition *partition; | |
501 | ||
502 | HMAP_FOR_EACH_IN_BUCKET (partition, hmap_node, hash, &cls->partitions) { | |
503 | if (partition->metadata == metadata) { | |
504 | return partition; | |
505 | } | |
506 | } | |
507 | ||
508 | return NULL; | |
509 | } | |
510 | ||
511 | static struct cls_partition * | |
03868246 | 512 | create_partition(struct classifier *cls, struct cls_subtable *subtable, |
c906cedf BP |
513 | ovs_be64 metadata) |
514 | { | |
515 | uint32_t hash = hash_metadata(metadata); | |
516 | struct cls_partition *partition = find_partition(cls, metadata, hash); | |
517 | if (!partition) { | |
518 | partition = xmalloc(sizeof *partition); | |
519 | partition->metadata = metadata; | |
520 | partition->tags = 0; | |
183126a1 | 521 | tag_tracker_init(&partition->tracker); |
c906cedf BP |
522 | hmap_insert(&cls->partitions, &partition->hmap_node, hash); |
523 | } | |
03868246 | 524 | tag_tracker_add(&partition->tracker, &partition->tags, subtable->tag); |
c906cedf BP |
525 | return partition; |
526 | } | |
527 | ||
b5d97350 BP |
528 | /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller |
529 | * must not modify or free it. | |
064af421 BP |
530 | * |
531 | * If 'cls' already contains an identical rule (including wildcards, values of | |
532 | * fixed fields, and priority), replaces the old rule by 'rule' and returns the | |
533 | * rule that was replaced. The caller takes ownership of the returned rule and | |
48d28ac1 BP |
534 | * is thus responsible for destroying it with cls_rule_destroy(), freeing the |
535 | * memory block in which it resides, etc., as necessary. | |
064af421 BP |
536 | * |
537 | * Returns NULL if 'cls' does not contain a rule with an identical key, after | |
538 | * inserting the new rule. In this case, no rules are displaced by the new | |
539 | * rule, even rules that cannot have any effect because the new rule matches a | |
540 | * superset of their flows and has higher priority. */ | |
541 | struct cls_rule * | |
08944c1d | 542 | classifier_replace(struct classifier *cls, struct cls_rule *rule) |
064af421 | 543 | { |
b5d97350 | 544 | struct cls_rule *old_rule; |
03868246 | 545 | struct cls_subtable *subtable; |
b5d97350 | 546 | |
03868246 JR |
547 | subtable = find_subtable(cls, &rule->match.mask); |
548 | if (!subtable) { | |
549 | subtable = insert_subtable(cls, &rule->match.mask); | |
b5d97350 BP |
550 | } |
551 | ||
03868246 | 552 | old_rule = insert_rule(cls, subtable, rule); |
b5d97350 | 553 | if (!old_rule) { |
13751fd8 JR |
554 | int i; |
555 | ||
c906cedf BP |
556 | if (minimask_get_metadata_mask(&rule->match.mask) == OVS_BE64_MAX) { |
557 | ovs_be64 metadata = miniflow_get_metadata(&rule->match.flow); | |
03868246 | 558 | rule->partition = create_partition(cls, subtable, metadata); |
c906cedf BP |
559 | } else { |
560 | rule->partition = NULL; | |
561 | } | |
562 | ||
03868246 | 563 | subtable->n_rules++; |
064af421 | 564 | cls->n_rules++; |
13751fd8 JR |
565 | |
566 | for (i = 0; i < cls->n_tries; i++) { | |
567 | if (subtable->trie_plen[i]) { | |
568 | trie_insert(&cls->tries[i], rule, subtable->trie_plen[i]); | |
569 | } | |
570 | } | |
c906cedf BP |
571 | } else { |
572 | rule->partition = old_rule->partition; | |
064af421 | 573 | } |
b5d97350 | 574 | return old_rule; |
064af421 BP |
575 | } |
576 | ||
08944c1d BP |
577 | /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller |
578 | * must not modify or free it. | |
579 | * | |
580 | * 'cls' must not contain an identical rule (including wildcards, values of | |
581 | * fixed fields, and priority). Use classifier_find_rule_exactly() to find | |
582 | * such a rule. */ | |
583 | void | |
584 | classifier_insert(struct classifier *cls, struct cls_rule *rule) | |
585 | { | |
586 | struct cls_rule *displaced_rule = classifier_replace(cls, rule); | |
cb22974d | 587 | ovs_assert(!displaced_rule); |
08944c1d BP |
588 | } |
589 | ||
48d28ac1 BP |
590 | /* Removes 'rule' from 'cls'. It is the caller's responsibility to destroy |
591 | * 'rule' with cls_rule_destroy(), freeing the memory block in which 'rule' | |
592 | * resides, etc., as necessary. */ | |
064af421 BP |
593 | void |
594 | classifier_remove(struct classifier *cls, struct cls_rule *rule) | |
595 | { | |
c906cedf | 596 | struct cls_partition *partition; |
b5d97350 | 597 | struct cls_rule *head; |
03868246 | 598 | struct cls_subtable *subtable; |
476f36e8 | 599 | int i; |
064af421 | 600 | |
03868246 | 601 | subtable = find_subtable(cls, &rule->match.mask); |
476f36e8 | 602 | |
13751fd8 JR |
603 | for (i = 0; i < cls->n_tries; i++) { |
604 | if (subtable->trie_plen[i]) { | |
605 | trie_remove(&cls->tries[i], rule, subtable->trie_plen[i]); | |
606 | } | |
607 | } | |
608 | ||
476f36e8 JR |
609 | /* Remove rule node from indices. */ |
610 | for (i = 0; i < subtable->n_indices; i++) { | |
611 | hindex_remove(&subtable->indices[i], &rule->index_nodes[i]); | |
612 | } | |
613 | ||
03868246 | 614 | head = find_equal(subtable, &rule->match.flow, rule->hmap_node.hash); |
b5d97350 BP |
615 | if (head != rule) { |
616 | list_remove(&rule->list); | |
617 | } else if (list_is_empty(&rule->list)) { | |
03868246 | 618 | hmap_remove(&subtable->rules, &rule->hmap_node); |
b5d97350 BP |
619 | } else { |
620 | struct cls_rule *next = CONTAINER_OF(rule->list.next, | |
621 | struct cls_rule, list); | |
064af421 | 622 | |
b5d97350 | 623 | list_remove(&rule->list); |
03868246 | 624 | hmap_replace(&subtable->rules, &rule->hmap_node, &next->hmap_node); |
b5d97350 | 625 | } |
064af421 | 626 | |
c906cedf | 627 | partition = rule->partition; |
183126a1 BP |
628 | if (partition) { |
629 | tag_tracker_subtract(&partition->tracker, &partition->tags, | |
03868246 | 630 | subtable->tag); |
183126a1 BP |
631 | if (!partition->tags) { |
632 | hmap_remove(&cls->partitions, &partition->hmap_node); | |
633 | free(partition); | |
634 | } | |
c906cedf BP |
635 | } |
636 | ||
03868246 JR |
637 | if (--subtable->n_rules == 0) { |
638 | destroy_subtable(cls, subtable); | |
4aacd02d | 639 | } else { |
03868246 | 640 | update_subtables_after_removal(cls, subtable, rule->priority); |
4d935a6b | 641 | } |
13751fd8 | 642 | |
b5d97350 | 643 | cls->n_rules--; |
064af421 BP |
644 | } |
645 | ||
13751fd8 JR |
646 | /* Prefix tree context. Valid when 'lookup_done' is true. Can skip all |
647 | * subtables which have more than 'match_plen' bits in their corresponding | |
648 | * field at offset 'be32ofs'. If skipped, 'maskbits' prefix bits should be | |
649 | * unwildcarded to quarantee datapath flow matches only packets it should. */ | |
650 | struct trie_ctx { | |
651 | const struct cls_trie *trie; | |
652 | bool lookup_done; /* Status of the lookup. */ | |
653 | uint8_t be32ofs; /* U32 offset of the field in question. */ | |
654 | unsigned int match_plen; /* Longest prefix than could possibly match. */ | |
655 | unsigned int maskbits; /* Prefix length needed to avoid false matches. */ | |
656 | }; | |
657 | ||
658 | static void | |
659 | trie_ctx_init(struct trie_ctx *ctx, const struct cls_trie *trie) | |
660 | { | |
661 | ctx->trie = trie; | |
662 | ctx->be32ofs = trie->field->flow_be32ofs; | |
663 | ctx->lookup_done = false; | |
664 | } | |
665 | ||
48c3de13 BP |
666 | /* Finds and returns the highest-priority rule in 'cls' that matches 'flow'. |
667 | * Returns a null pointer if no rules in 'cls' match 'flow'. If multiple rules | |
74f74083 EJ |
668 | * of equal priority match 'flow', returns one arbitrarily. |
669 | * | |
670 | * If a rule is found and 'wc' is non-null, bitwise-OR's 'wc' with the | |
671 | * set of bits that were significant in the lookup. At some point | |
672 | * earlier, 'wc' should have been initialized (e.g., by | |
673 | * flow_wildcards_init_catchall()). */ | |
48c3de13 | 674 | struct cls_rule * |
74f74083 EJ |
675 | classifier_lookup(const struct classifier *cls, const struct flow *flow, |
676 | struct flow_wildcards *wc) | |
48c3de13 | 677 | { |
c906cedf | 678 | const struct cls_partition *partition; |
03868246 | 679 | struct cls_subtable *subtable; |
b5d97350 | 680 | struct cls_rule *best; |
c906cedf | 681 | tag_type tags; |
13751fd8 JR |
682 | struct trie_ctx trie_ctx[CLS_MAX_TRIES]; |
683 | int i; | |
c906cedf | 684 | |
03868246 JR |
685 | /* Determine 'tags' such that, if 'subtable->tag' doesn't intersect them, |
686 | * then 'flow' cannot possibly match in 'subtable': | |
c906cedf BP |
687 | * |
688 | * - If flow->metadata maps to a given 'partition', then we can use | |
689 | * 'tags' for 'partition->tags'. | |
690 | * | |
691 | * - If flow->metadata has no partition, then no rule in 'cls' has an | |
692 | * exact-match for flow->metadata. That means that we don't need to | |
03868246 | 693 | * search any subtable that includes flow->metadata in its mask. |
c906cedf | 694 | * |
03868246 | 695 | * In either case, we always need to search any cls_subtables that do not |
c906cedf | 696 | * include flow->metadata in its mask. One way to do that would be to |
03868246 JR |
697 | * check the "cls_subtable"s explicitly for that, but that would require an |
698 | * extra branch per subtable. Instead, we mark such a cls_subtable's | |
699 | * 'tags' as TAG_ALL and make sure that 'tags' is never empty. This means | |
700 | * that 'tags' always intersects such a cls_subtable's 'tags', so we don't | |
701 | * need a special case. | |
c906cedf BP |
702 | */ |
703 | partition = (hmap_is_empty(&cls->partitions) | |
704 | ? NULL | |
705 | : find_partition(cls, flow->metadata, | |
706 | hash_metadata(flow->metadata))); | |
707 | tags = partition ? partition->tags : TAG_ARBITRARY; | |
48c3de13 | 708 | |
13751fd8 JR |
709 | /* Initialize trie contexts for match_find_wc(). */ |
710 | for (i = 0; i < cls->n_tries; i++) { | |
711 | trie_ctx_init(&trie_ctx[i], &cls->tries[i]); | |
712 | } | |
b5d97350 | 713 | best = NULL; |
03868246 | 714 | LIST_FOR_EACH (subtable, list_node, &cls->subtables_priority) { |
c906cedf BP |
715 | struct cls_rule *rule; |
716 | ||
03868246 | 717 | if (!tag_intersects(tags, subtable->tag)) { |
c906cedf BP |
718 | continue; |
719 | } | |
74f74083 | 720 | |
13751fd8 | 721 | rule = find_match_wc(subtable, flow, trie_ctx, cls->n_tries, wc); |
1f3c5efc JR |
722 | if (rule) { |
723 | best = rule; | |
03868246 JR |
724 | LIST_FOR_EACH_CONTINUE (subtable, list_node, |
725 | &cls->subtables_priority) { | |
726 | if (subtable->max_priority <= best->priority) { | |
727 | /* Subtables are in descending priority order, | |
1f3c5efc JR |
728 | * can not find anything better. */ |
729 | return best; | |
730 | } | |
03868246 | 731 | if (!tag_intersects(tags, subtable->tag)) { |
c906cedf BP |
732 | continue; |
733 | } | |
734 | ||
13751fd8 JR |
735 | rule = find_match_wc(subtable, flow, trie_ctx, cls->n_tries, |
736 | wc); | |
1f3c5efc JR |
737 | if (rule && rule->priority > best->priority) { |
738 | best = rule; | |
739 | } | |
4d935a6b | 740 | } |
1f3c5efc | 741 | break; |
b5d97350 | 742 | } |
48c3de13 | 743 | } |
13751fd8 | 744 | |
b5d97350 | 745 | return best; |
48c3de13 BP |
746 | } |
747 | ||
2abf78ff JR |
748 | /* Returns true if 'target' satisifies 'match', that is, if each bit for which |
749 | * 'match' specifies a particular value has the correct value in 'target'. */ | |
750 | static bool | |
751 | minimatch_matches_miniflow(const struct minimatch *match, | |
752 | const struct miniflow *target) | |
753 | { | |
754 | const uint32_t *flowp = (const uint32_t *)match->flow.values; | |
755 | const uint32_t *maskp = (const uint32_t *)match->mask.masks.values; | |
756 | uint32_t target_u32; | |
757 | ||
758 | MINIFLOW_FOR_EACH_IN_MAP(target_u32, target, match->mask.masks.map) { | |
759 | if ((*flowp++ ^ target_u32) & *maskp++) { | |
760 | return false; | |
761 | } | |
762 | } | |
763 | ||
764 | return true; | |
765 | } | |
766 | ||
767 | static inline struct cls_rule * | |
768 | find_match_miniflow(const struct cls_subtable *subtable, | |
769 | const struct miniflow *flow, | |
770 | uint32_t hash) | |
771 | { | |
772 | struct cls_rule *rule; | |
773 | ||
774 | HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &subtable->rules) { | |
775 | if (minimatch_matches_miniflow(&rule->match, flow)) { | |
776 | return rule; | |
777 | } | |
778 | } | |
779 | ||
780 | return NULL; | |
781 | } | |
782 | ||
783 | /* Finds and returns the highest-priority rule in 'cls' that matches | |
784 | * 'miniflow'. Returns a null pointer if no rules in 'cls' match 'flow'. | |
785 | * If multiple rules of equal priority match 'flow', returns one arbitrarily. | |
786 | * | |
787 | * This function is optimized for the userspace datapath, which only ever has | |
788 | * one priority value for it's flows! | |
789 | */ | |
790 | struct cls_rule *classifier_lookup_miniflow_first(const struct classifier *cls, | |
791 | const struct miniflow *flow) | |
792 | { | |
793 | struct cls_subtable *subtable; | |
794 | ||
795 | LIST_FOR_EACH (subtable, list_node, &cls->subtables_priority) { | |
796 | struct cls_rule *rule; | |
797 | ||
798 | rule = find_match_miniflow(subtable, flow, | |
799 | miniflow_hash_in_minimask(flow, | |
800 | &subtable->mask, | |
801 | 0)); | |
802 | if (rule) { | |
803 | return rule; | |
804 | } | |
805 | } | |
806 | ||
807 | return NULL; | |
808 | } | |
809 | ||
b5d97350 BP |
810 | /* Finds and returns a rule in 'cls' with exactly the same priority and |
811 | * matching criteria as 'target'. Returns a null pointer if 'cls' doesn't | |
c084ce1d | 812 | * contain an exact match. */ |
064af421 BP |
813 | struct cls_rule * |
814 | classifier_find_rule_exactly(const struct classifier *cls, | |
76ecc721 | 815 | const struct cls_rule *target) |
064af421 | 816 | { |
b5d97350 | 817 | struct cls_rule *head, *rule; |
03868246 | 818 | struct cls_subtable *subtable; |
064af421 | 819 | |
03868246 JR |
820 | subtable = find_subtable(cls, &target->match.mask); |
821 | if (!subtable) { | |
b5d97350 | 822 | return NULL; |
064af421 BP |
823 | } |
824 | ||
4d935a6b | 825 | /* Skip if there is no hope. */ |
03868246 | 826 | if (target->priority > subtable->max_priority) { |
4d935a6b JR |
827 | return NULL; |
828 | } | |
829 | ||
03868246 | 830 | head = find_equal(subtable, &target->match.flow, |
5cb7a798 BP |
831 | miniflow_hash_in_minimask(&target->match.flow, |
832 | &target->match.mask, 0)); | |
b5d97350 BP |
833 | FOR_EACH_RULE_IN_LIST (rule, head) { |
834 | if (target->priority >= rule->priority) { | |
835 | return target->priority == rule->priority ? rule : NULL; | |
064af421 BP |
836 | } |
837 | } | |
838 | return NULL; | |
839 | } | |
840 | ||
81a76618 BP |
841 | /* Finds and returns a rule in 'cls' with priority 'priority' and exactly the |
842 | * same matching criteria as 'target'. Returns a null pointer if 'cls' doesn't | |
843 | * contain an exact match. */ | |
844 | struct cls_rule * | |
845 | classifier_find_match_exactly(const struct classifier *cls, | |
846 | const struct match *target, | |
847 | unsigned int priority) | |
848 | { | |
849 | struct cls_rule *retval; | |
850 | struct cls_rule cr; | |
851 | ||
852 | cls_rule_init(&cr, target, priority); | |
853 | retval = classifier_find_rule_exactly(cls, &cr); | |
48d28ac1 | 854 | cls_rule_destroy(&cr); |
81a76618 BP |
855 | |
856 | return retval; | |
857 | } | |
858 | ||
faa50f40 BP |
859 | /* Checks if 'target' would overlap any other rule in 'cls'. Two rules are |
860 | * considered to overlap if both rules have the same priority and a packet | |
861 | * could match both. */ | |
49bdc010 JP |
862 | bool |
863 | classifier_rule_overlaps(const struct classifier *cls, | |
faa50f40 | 864 | const struct cls_rule *target) |
49bdc010 | 865 | { |
03868246 | 866 | struct cls_subtable *subtable; |
49bdc010 | 867 | |
03868246 JR |
868 | /* Iterate subtables in the descending max priority order. */ |
869 | LIST_FOR_EACH (subtable, list_node, &cls->subtables_priority) { | |
5cb7a798 BP |
870 | uint32_t storage[FLOW_U32S]; |
871 | struct minimask mask; | |
b5d97350 | 872 | struct cls_rule *head; |
49bdc010 | 873 | |
03868246 JR |
874 | if (target->priority > subtable->max_priority) { |
875 | break; /* Can skip this and the rest of the subtables. */ | |
4d935a6b JR |
876 | } |
877 | ||
03868246 JR |
878 | minimask_combine(&mask, &target->match.mask, &subtable->mask, storage); |
879 | HMAP_FOR_EACH (head, hmap_node, &subtable->rules) { | |
49bdc010 JP |
880 | struct cls_rule *rule; |
881 | ||
b5d97350 | 882 | FOR_EACH_RULE_IN_LIST (rule, head) { |
4d935a6b JR |
883 | if (rule->priority < target->priority) { |
884 | break; /* Rules in descending priority order. */ | |
885 | } | |
faa50f40 | 886 | if (rule->priority == target->priority |
5cb7a798 BP |
887 | && miniflow_equal_in_minimask(&target->match.flow, |
888 | &rule->match.flow, &mask)) { | |
49bdc010 JP |
889 | return true; |
890 | } | |
891 | } | |
892 | } | |
893 | } | |
894 | ||
895 | return false; | |
896 | } | |
6ceeaa92 BP |
897 | |
898 | /* Returns true if 'rule' exactly matches 'criteria' or if 'rule' is more | |
899 | * specific than 'criteria'. That is, 'rule' matches 'criteria' and this | |
900 | * function returns true if, for every field: | |
901 | * | |
902 | * - 'criteria' and 'rule' specify the same (non-wildcarded) value for the | |
903 | * field, or | |
904 | * | |
905 | * - 'criteria' wildcards the field, | |
906 | * | |
907 | * Conversely, 'rule' does not match 'criteria' and this function returns false | |
908 | * if, for at least one field: | |
909 | * | |
910 | * - 'criteria' and 'rule' specify different values for the field, or | |
911 | * | |
912 | * - 'criteria' specifies a value for the field but 'rule' wildcards it. | |
913 | * | |
914 | * Equivalently, the truth table for whether a field matches is: | |
915 | * | |
916 | * rule | |
917 | * | |
918 | * c wildcard exact | |
919 | * r +---------+---------+ | |
920 | * i wild | yes | yes | | |
921 | * t card | | | | |
922 | * e +---------+---------+ | |
923 | * r exact | no |if values| | |
924 | * i | |are equal| | |
925 | * a +---------+---------+ | |
926 | * | |
927 | * This is the matching rule used by OpenFlow 1.0 non-strict OFPT_FLOW_MOD | |
928 | * commands and by OpenFlow 1.0 aggregate and flow stats. | |
929 | * | |
81a76618 | 930 | * Ignores rule->priority. */ |
6ceeaa92 BP |
931 | bool |
932 | cls_rule_is_loose_match(const struct cls_rule *rule, | |
5cb7a798 | 933 | const struct minimatch *criteria) |
6ceeaa92 | 934 | { |
5cb7a798 BP |
935 | return (!minimask_has_extra(&rule->match.mask, &criteria->mask) |
936 | && miniflow_equal_in_minimask(&rule->match.flow, &criteria->flow, | |
937 | &criteria->mask)); | |
6ceeaa92 | 938 | } |
b5d97350 | 939 | \f |
5ecc9d81 BP |
940 | /* Iteration. */ |
941 | ||
942 | static bool | |
943 | rule_matches(const struct cls_rule *rule, const struct cls_rule *target) | |
944 | { | |
945 | return (!target | |
5cb7a798 BP |
946 | || miniflow_equal_in_minimask(&rule->match.flow, |
947 | &target->match.flow, | |
948 | &target->match.mask)); | |
5ecc9d81 BP |
949 | } |
950 | ||
951 | static struct cls_rule * | |
03868246 JR |
952 | search_subtable(const struct cls_subtable *subtable, |
953 | const struct cls_rule *target) | |
5ecc9d81 | 954 | { |
03868246 | 955 | if (!target || !minimask_has_extra(&subtable->mask, &target->match.mask)) { |
5ecc9d81 BP |
956 | struct cls_rule *rule; |
957 | ||
03868246 | 958 | HMAP_FOR_EACH (rule, hmap_node, &subtable->rules) { |
5ecc9d81 BP |
959 | if (rule_matches(rule, target)) { |
960 | return rule; | |
961 | } | |
962 | } | |
963 | } | |
964 | return NULL; | |
965 | } | |
966 | ||
6ceeaa92 | 967 | /* Initializes 'cursor' for iterating through rules in 'cls': |
5ecc9d81 | 968 | * |
6ceeaa92 | 969 | * - If 'target' is null, the cursor will visit every rule in 'cls'. |
5ecc9d81 | 970 | * |
6ceeaa92 BP |
971 | * - If 'target' is nonnull, the cursor will visit each 'rule' in 'cls' |
972 | * such that cls_rule_is_loose_match(rule, target) returns true. | |
5ecc9d81 | 973 | * |
6ceeaa92 | 974 | * Ignores target->priority. */ |
5ecc9d81 BP |
975 | void |
976 | cls_cursor_init(struct cls_cursor *cursor, const struct classifier *cls, | |
977 | const struct cls_rule *target) | |
978 | { | |
979 | cursor->cls = cls; | |
3ca1de08 | 980 | cursor->target = target && !cls_rule_is_catchall(target) ? target : NULL; |
5ecc9d81 BP |
981 | } |
982 | ||
983 | /* Returns the first matching cls_rule in 'cursor''s iteration, or a null | |
984 | * pointer if there are no matches. */ | |
985 | struct cls_rule * | |
986 | cls_cursor_first(struct cls_cursor *cursor) | |
987 | { | |
03868246 | 988 | struct cls_subtable *subtable; |
5ecc9d81 | 989 | |
03868246 JR |
990 | HMAP_FOR_EACH (subtable, hmap_node, &cursor->cls->subtables) { |
991 | struct cls_rule *rule = search_subtable(subtable, cursor->target); | |
5ecc9d81 | 992 | if (rule) { |
03868246 | 993 | cursor->subtable = subtable; |
5ecc9d81 BP |
994 | return rule; |
995 | } | |
996 | } | |
997 | ||
998 | return NULL; | |
999 | } | |
1000 | ||
1001 | /* Returns the next matching cls_rule in 'cursor''s iteration, or a null | |
1002 | * pointer if there are no more matches. */ | |
1003 | struct cls_rule * | |
9850cd0f | 1004 | cls_cursor_next(struct cls_cursor *cursor, const struct cls_rule *rule_) |
5ecc9d81 | 1005 | { |
9850cd0f | 1006 | struct cls_rule *rule = CONST_CAST(struct cls_rule *, rule_); |
03868246 | 1007 | const struct cls_subtable *subtable; |
5ecc9d81 BP |
1008 | struct cls_rule *next; |
1009 | ||
955f579d BP |
1010 | next = next_rule_in_list__(rule); |
1011 | if (next->priority < rule->priority) { | |
5ecc9d81 BP |
1012 | return next; |
1013 | } | |
1014 | ||
955f579d | 1015 | /* 'next' is the head of the list, that is, the rule that is included in |
03868246 JR |
1016 | * the subtable's hmap. (This is important when the classifier contains |
1017 | * rules that differ only in priority.) */ | |
955f579d | 1018 | rule = next; |
03868246 | 1019 | HMAP_FOR_EACH_CONTINUE (rule, hmap_node, &cursor->subtable->rules) { |
5ecc9d81 BP |
1020 | if (rule_matches(rule, cursor->target)) { |
1021 | return rule; | |
1022 | } | |
1023 | } | |
1024 | ||
03868246 JR |
1025 | subtable = cursor->subtable; |
1026 | HMAP_FOR_EACH_CONTINUE (subtable, hmap_node, &cursor->cls->subtables) { | |
1027 | rule = search_subtable(subtable, cursor->target); | |
5ecc9d81 | 1028 | if (rule) { |
03868246 | 1029 | cursor->subtable = subtable; |
5ecc9d81 BP |
1030 | return rule; |
1031 | } | |
1032 | } | |
1033 | ||
1034 | return NULL; | |
1035 | } | |
1036 | \f | |
03868246 JR |
1037 | static struct cls_subtable * |
1038 | find_subtable(const struct classifier *cls, const struct minimask *mask) | |
b5d97350 | 1039 | { |
03868246 | 1040 | struct cls_subtable *subtable; |
064af421 | 1041 | |
03868246 JR |
1042 | HMAP_FOR_EACH_IN_BUCKET (subtable, hmap_node, minimask_hash(mask, 0), |
1043 | &cls->subtables) { | |
1044 | if (minimask_equal(mask, &subtable->mask)) { | |
1045 | return subtable; | |
064af421 BP |
1046 | } |
1047 | } | |
b5d97350 | 1048 | return NULL; |
064af421 | 1049 | } |
064af421 | 1050 | |
03868246 JR |
1051 | static struct cls_subtable * |
1052 | insert_subtable(struct classifier *cls, const struct minimask *mask) | |
b5d97350 | 1053 | { |
c906cedf | 1054 | uint32_t hash = minimask_hash(mask, 0); |
03868246 | 1055 | struct cls_subtable *subtable; |
476f36e8 JR |
1056 | int i, index = 0; |
1057 | struct flow_wildcards old, new; | |
1058 | uint8_t prev; | |
064af421 | 1059 | |
03868246 JR |
1060 | subtable = xzalloc(sizeof *subtable); |
1061 | hmap_init(&subtable->rules); | |
1062 | minimask_clone(&subtable->mask, mask); | |
476f36e8 JR |
1063 | |
1064 | /* Init indices for segmented lookup, if any. */ | |
1065 | flow_wildcards_init_catchall(&new); | |
1066 | old = new; | |
1067 | prev = 0; | |
1068 | for (i = 0; i < cls->n_flow_segments; i++) { | |
1069 | flow_wildcards_fold_minimask_range(&new, mask, prev, | |
1070 | cls->flow_segments[i]); | |
1071 | /* Add an index if it adds mask bits. */ | |
1072 | if (!flow_wildcards_equal(&new, &old)) { | |
1073 | hindex_init(&subtable->indices[index]); | |
1074 | subtable->index_ofs[index] = cls->flow_segments[i]; | |
1075 | index++; | |
1076 | old = new; | |
1077 | } | |
1078 | prev = cls->flow_segments[i]; | |
1079 | } | |
1080 | /* Check if the rest of the subtable's mask adds any bits, | |
1081 | * and remove the last index if it doesn't. */ | |
1082 | if (index > 0) { | |
1083 | flow_wildcards_fold_minimask_range(&new, mask, prev, FLOW_U32S); | |
1084 | if (flow_wildcards_equal(&new, &old)) { | |
1085 | --index; | |
1086 | subtable->index_ofs[index] = 0; | |
1087 | hindex_destroy(&subtable->indices[index]); | |
1088 | } | |
1089 | } | |
1090 | subtable->n_indices = index; | |
1091 | ||
1092 | hmap_insert(&cls->subtables, &subtable->hmap_node, hash); | |
03868246 JR |
1093 | list_push_back(&cls->subtables_priority, &subtable->list_node); |
1094 | subtable->tag = (minimask_get_metadata_mask(mask) == OVS_BE64_MAX | |
1095 | ? tag_create_deterministic(hash) | |
1096 | : TAG_ALL); | |
064af421 | 1097 | |
13751fd8 JR |
1098 | for (i = 0; i < cls->n_tries; i++) { |
1099 | subtable->trie_plen[i] = minimask_get_prefix_len(mask, | |
1100 | cls->tries[i].field); | |
1101 | } | |
1102 | ||
03868246 | 1103 | return subtable; |
064af421 BP |
1104 | } |
1105 | ||
b5d97350 | 1106 | static void |
03868246 | 1107 | destroy_subtable(struct classifier *cls, struct cls_subtable *subtable) |
b5d97350 | 1108 | { |
476f36e8 JR |
1109 | int i; |
1110 | ||
1111 | for (i = 0; i < subtable->n_indices; i++) { | |
1112 | hindex_destroy(&subtable->indices[i]); | |
1113 | } | |
03868246 JR |
1114 | minimask_destroy(&subtable->mask); |
1115 | hmap_remove(&cls->subtables, &subtable->hmap_node); | |
1116 | hmap_destroy(&subtable->rules); | |
1117 | list_remove(&subtable->list_node); | |
1118 | free(subtable); | |
b5d97350 | 1119 | } |
064af421 | 1120 | |
03868246 JR |
1121 | /* This function performs the following updates for 'subtable' in 'cls' |
1122 | * following the addition of a new rule with priority 'new_priority' to | |
1123 | * 'subtable': | |
4aacd02d | 1124 | * |
03868246 | 1125 | * - Update 'subtable->max_priority' and 'subtable->max_count' if necessary. |
4aacd02d | 1126 | * |
03868246 | 1127 | * - Update 'subtable''s position in 'cls->subtables_priority' if necessary. |
4aacd02d BP |
1128 | * |
1129 | * This function should only be called after adding a new rule, not after | |
1130 | * replacing a rule by an identical one or modifying a rule in-place. */ | |
1131 | static void | |
03868246 JR |
1132 | update_subtables_after_insertion(struct classifier *cls, |
1133 | struct cls_subtable *subtable, | |
1134 | unsigned int new_priority) | |
1135 | { | |
1136 | if (new_priority == subtable->max_priority) { | |
1137 | ++subtable->max_count; | |
1138 | } else if (new_priority > subtable->max_priority) { | |
1139 | struct cls_subtable *iter; | |
1140 | ||
1141 | subtable->max_priority = new_priority; | |
1142 | subtable->max_count = 1; | |
1143 | ||
1144 | /* Possibly move 'subtable' earlier in the priority list. If we break | |
1145 | * out of the loop, then 'subtable' should be moved just after that | |
1146 | * 'iter'. If the loop terminates normally, then 'iter' will be the | |
1147 | * list head and we'll move subtable just after that (e.g. to the front | |
1148 | * of the list). */ | |
1149 | iter = subtable; | |
4aacd02d | 1150 | LIST_FOR_EACH_REVERSE_CONTINUE (iter, list_node, |
03868246 JR |
1151 | &cls->subtables_priority) { |
1152 | if (iter->max_priority >= subtable->max_priority) { | |
4aacd02d BP |
1153 | break; |
1154 | } | |
1155 | } | |
1156 | ||
03868246 JR |
1157 | /* Move 'subtable' just after 'iter' (unless it's already there). */ |
1158 | if (iter->list_node.next != &subtable->list_node) { | |
4aacd02d | 1159 | list_splice(iter->list_node.next, |
03868246 | 1160 | &subtable->list_node, subtable->list_node.next); |
4aacd02d BP |
1161 | } |
1162 | } | |
1163 | } | |
1164 | ||
03868246 JR |
1165 | /* This function performs the following updates for 'subtable' in 'cls' |
1166 | * following the deletion of a rule with priority 'del_priority' from | |
1167 | * 'subtable': | |
4aacd02d | 1168 | * |
03868246 | 1169 | * - Update 'subtable->max_priority' and 'subtable->max_count' if necessary. |
4aacd02d | 1170 | * |
03868246 | 1171 | * - Update 'subtable''s position in 'cls->subtables_priority' if necessary. |
4aacd02d BP |
1172 | * |
1173 | * This function should only be called after removing a rule, not after | |
1174 | * replacing a rule by an identical one or modifying a rule in-place. */ | |
1175 | static void | |
03868246 JR |
1176 | update_subtables_after_removal(struct classifier *cls, |
1177 | struct cls_subtable *subtable, | |
1178 | unsigned int del_priority) | |
4aacd02d | 1179 | { |
03868246 | 1180 | struct cls_subtable *iter; |
4aacd02d | 1181 | |
03868246 | 1182 | if (del_priority == subtable->max_priority && --subtable->max_count == 0) { |
4aacd02d BP |
1183 | struct cls_rule *head; |
1184 | ||
03868246 JR |
1185 | subtable->max_priority = 0; |
1186 | HMAP_FOR_EACH (head, hmap_node, &subtable->rules) { | |
1187 | if (head->priority > subtable->max_priority) { | |
1188 | subtable->max_priority = head->priority; | |
1189 | subtable->max_count = 1; | |
1190 | } else if (head->priority == subtable->max_priority) { | |
1191 | ++subtable->max_count; | |
4aacd02d BP |
1192 | } |
1193 | } | |
1194 | ||
03868246 JR |
1195 | /* Possibly move 'subtable' later in the priority list. If we break |
1196 | * out of the loop, then 'subtable' should be moved just before that | |
1197 | * 'iter'. If the loop terminates normally, then 'iter' will be the | |
1198 | * list head and we'll move subtable just before that (e.g. to the back | |
1199 | * of the list). */ | |
1200 | iter = subtable; | |
1201 | LIST_FOR_EACH_CONTINUE (iter, list_node, &cls->subtables_priority) { | |
1202 | if (iter->max_priority <= subtable->max_priority) { | |
4aacd02d BP |
1203 | break; |
1204 | } | |
1205 | } | |
1206 | ||
03868246 JR |
1207 | /* Move 'subtable' just before 'iter' (unless it's already there). */ |
1208 | if (iter->list_node.prev != &subtable->list_node) { | |
4aacd02d | 1209 | list_splice(&iter->list_node, |
03868246 | 1210 | &subtable->list_node, subtable->list_node.next); |
4aacd02d BP |
1211 | } |
1212 | } | |
1213 | } | |
1214 | ||
13751fd8 JR |
1215 | struct range { |
1216 | uint8_t start; | |
1217 | uint8_t end; | |
1218 | }; | |
1219 | ||
1220 | /* Return 'true' if can skip rest of the subtable based on the prefix trie | |
1221 | * lookup results. */ | |
1222 | static inline bool | |
1223 | check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries, | |
1224 | const unsigned int field_plen[CLS_MAX_TRIES], | |
1225 | const struct range ofs, const struct flow *flow, | |
1226 | struct flow_wildcards *wc) | |
1227 | { | |
1228 | int j; | |
1229 | ||
1230 | /* Check if we could avoid fully unwildcarding the next level of | |
1231 | * fields using the prefix tries. The trie checks are done only as | |
1232 | * needed to avoid folding in additional bits to the wildcards mask. */ | |
1233 | for (j = 0; j < n_tries; j++) { | |
1234 | /* Is the trie field relevant for this subtable? */ | |
1235 | if (field_plen[j]) { | |
1236 | struct trie_ctx *ctx = &trie_ctx[j]; | |
1237 | uint8_t be32ofs = ctx->be32ofs; | |
1238 | ||
1239 | /* Is the trie field within the current range of fields? */ | |
1240 | if (be32ofs >= ofs.start && be32ofs < ofs.end) { | |
1241 | /* On-demand trie lookup. */ | |
1242 | if (!ctx->lookup_done) { | |
1243 | ctx->match_plen = trie_lookup(ctx->trie, flow, | |
1244 | &ctx->maskbits); | |
1245 | ctx->lookup_done = true; | |
1246 | } | |
1247 | /* Possible to skip the rest of the subtable if subtable's | |
1248 | * prefix on the field is longer than what is known to match | |
1249 | * based on the trie lookup. */ | |
1250 | if (field_plen[j] > ctx->match_plen) { | |
1251 | /* RFC: We want the trie lookup to never result in | |
1252 | * unwildcarding any bits that would not be unwildcarded | |
1253 | * otherwise. Since the trie is shared by the whole | |
1254 | * classifier, it is possible that the 'maskbits' contain | |
1255 | * bits that are irrelevant for the partition of the | |
1256 | * classifier relevant for the current flow. */ | |
1257 | ||
1258 | /* Can skip if the field is already unwildcarded. */ | |
1259 | if (mask_prefix_bits_set(wc, be32ofs, ctx->maskbits)) { | |
1260 | return true; | |
1261 | } | |
1262 | /* Check that the trie result will not unwildcard more bits | |
1263 | * than this stage will. */ | |
1264 | if (ctx->maskbits <= field_plen[j]) { | |
1265 | /* Unwildcard the bits and skip the rest. */ | |
1266 | mask_set_prefix_bits(wc, be32ofs, ctx->maskbits); | |
1267 | /* Note: Prerequisite already unwildcarded, as the only | |
1268 | * prerequisite of the supported trie lookup fields is | |
1269 | * the ethertype, which is currently always | |
1270 | * unwildcarded. | |
1271 | */ | |
1272 | return true; | |
1273 | } | |
1274 | } | |
1275 | } | |
1276 | } | |
1277 | } | |
1278 | return false; | |
1279 | } | |
1280 | ||
476f36e8 JR |
1281 | static inline struct cls_rule * |
1282 | find_match(const struct cls_subtable *subtable, const struct flow *flow, | |
1283 | uint32_t hash) | |
b5d97350 BP |
1284 | { |
1285 | struct cls_rule *rule; | |
b5d97350 | 1286 | |
03868246 | 1287 | HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &subtable->rules) { |
df40c152 | 1288 | if (minimatch_matches_flow(&rule->match, flow)) { |
b5d97350 | 1289 | return rule; |
064af421 BP |
1290 | } |
1291 | } | |
c23740be | 1292 | |
064af421 BP |
1293 | return NULL; |
1294 | } | |
1295 | ||
476f36e8 JR |
1296 | static struct cls_rule * |
1297 | find_match_wc(const struct cls_subtable *subtable, const struct flow *flow, | |
13751fd8 JR |
1298 | struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries, |
1299 | struct flow_wildcards *wc) | |
476f36e8 JR |
1300 | { |
1301 | uint32_t basis = 0, hash; | |
1302 | struct cls_rule *rule = NULL; | |
476f36e8 | 1303 | int i; |
13751fd8 | 1304 | struct range ofs; |
476f36e8 JR |
1305 | |
1306 | if (!wc) { | |
1307 | return find_match(subtable, flow, | |
1308 | flow_hash_in_minimask(flow, &subtable->mask, 0)); | |
1309 | } | |
1310 | ||
13751fd8 | 1311 | ofs.start = 0; |
476f36e8 JR |
1312 | /* Try to finish early by checking fields in segments. */ |
1313 | for (i = 0; i < subtable->n_indices; i++) { | |
1314 | struct hindex_node *inode; | |
13751fd8 | 1315 | ofs.end = subtable->index_ofs[i]; |
476f36e8 | 1316 | |
13751fd8 JR |
1317 | if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow, |
1318 | wc)) { | |
1319 | goto range_out; | |
1320 | } | |
1321 | hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start, | |
1322 | ofs.end, &basis); | |
1323 | ofs.start = ofs.end; | |
476f36e8 JR |
1324 | inode = hindex_node_with_hash(&subtable->indices[i], hash); |
1325 | if (!inode) { | |
1326 | /* No match, can stop immediately, but must fold in the mask | |
1327 | * covered so far. */ | |
13751fd8 | 1328 | goto range_out; |
476f36e8 JR |
1329 | } |
1330 | ||
1331 | /* If we have narrowed down to a single rule already, check whether | |
1332 | * that rule matches. If it does match, then we're done. If it does | |
1333 | * not match, then we know that we will never get a match, but we do | |
1334 | * not yet know how many wildcards we need to fold into 'wc' so we | |
1335 | * continue iterating through indices to find that out. (We won't | |
1336 | * waste time calling minimatch_matches_flow() again because we've set | |
1337 | * 'rule' nonnull.) | |
1338 | * | |
1339 | * This check shows a measurable benefit with non-trivial flow tables. | |
1340 | * | |
1341 | * (Rare) hash collisions may cause us to miss the opportunity for this | |
1342 | * optimization. */ | |
1343 | if (!inode->s && !rule) { | |
1344 | ASSIGN_CONTAINER(rule, inode - i, index_nodes); | |
1345 | if (minimatch_matches_flow(&rule->match, flow)) { | |
1346 | goto out; | |
1347 | } | |
1348 | } | |
1349 | } | |
13751fd8 JR |
1350 | ofs.end = FLOW_U32S; |
1351 | /* Trie check for the final range. */ | |
1352 | if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow, wc)) { | |
1353 | goto range_out; | |
1354 | } | |
476f36e8 JR |
1355 | if (!rule) { |
1356 | /* Multiple potential matches exist, look for one. */ | |
13751fd8 JR |
1357 | hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start, |
1358 | ofs.end, &basis); | |
476f36e8 JR |
1359 | rule = find_match(subtable, flow, hash); |
1360 | } else { | |
1361 | /* We already narrowed the matching candidates down to just 'rule', | |
1362 | * but it didn't match. */ | |
1363 | rule = NULL; | |
1364 | } | |
1365 | out: | |
13751fd8 | 1366 | /* Must unwildcard all the fields, as they were looked at. */ |
476f36e8 JR |
1367 | flow_wildcards_fold_minimask(wc, &subtable->mask); |
1368 | return rule; | |
13751fd8 JR |
1369 | |
1370 | range_out: | |
1371 | /* Must unwildcard the fields looked up so far, if any. */ | |
1372 | if (ofs.start) { | |
1373 | flow_wildcards_fold_minimask_range(wc, &subtable->mask, 0, ofs.start); | |
1374 | } | |
1375 | return NULL; | |
476f36e8 JR |
1376 | } |
1377 | ||
064af421 | 1378 | static struct cls_rule * |
03868246 JR |
1379 | find_equal(struct cls_subtable *subtable, const struct miniflow *flow, |
1380 | uint32_t hash) | |
064af421 | 1381 | { |
b5d97350 | 1382 | struct cls_rule *head; |
064af421 | 1383 | |
03868246 | 1384 | HMAP_FOR_EACH_WITH_HASH (head, hmap_node, hash, &subtable->rules) { |
5cb7a798 | 1385 | if (miniflow_equal(&head->match.flow, flow)) { |
b5d97350 | 1386 | return head; |
064af421 BP |
1387 | } |
1388 | } | |
1389 | return NULL; | |
1390 | } | |
1391 | ||
b5d97350 | 1392 | static struct cls_rule * |
03868246 JR |
1393 | insert_rule(struct classifier *cls, struct cls_subtable *subtable, |
1394 | struct cls_rule *new) | |
064af421 | 1395 | { |
b5d97350 | 1396 | struct cls_rule *head; |
4aacd02d | 1397 | struct cls_rule *old = NULL; |
476f36e8 JR |
1398 | int i; |
1399 | uint32_t basis = 0, hash; | |
13751fd8 | 1400 | uint8_t prev_be32ofs = 0; |
476f36e8 JR |
1401 | |
1402 | /* Add new node to segment indices. */ | |
1403 | for (i = 0; i < subtable->n_indices; i++) { | |
13751fd8 | 1404 | hash = minimatch_hash_range(&new->match, prev_be32ofs, |
476f36e8 JR |
1405 | subtable->index_ofs[i], &basis); |
1406 | hindex_insert(&subtable->indices[i], &new->index_nodes[i], hash); | |
13751fd8 | 1407 | prev_be32ofs = subtable->index_ofs[i]; |
476f36e8 | 1408 | } |
13751fd8 | 1409 | hash = minimatch_hash_range(&new->match, prev_be32ofs, FLOW_U32S, &basis); |
476f36e8 | 1410 | head = find_equal(subtable, &new->match.flow, hash); |
b5d97350 | 1411 | if (!head) { |
476f36e8 | 1412 | hmap_insert(&subtable->rules, &new->hmap_node, hash); |
b5d97350 | 1413 | list_init(&new->list); |
4aacd02d | 1414 | goto out; |
b5d97350 BP |
1415 | } else { |
1416 | /* Scan the list for the insertion point that will keep the list in | |
1417 | * order of decreasing priority. */ | |
1418 | struct cls_rule *rule; | |
476f36e8 JR |
1419 | |
1420 | new->hmap_node.hash = hash; /* Otherwise done by hmap_insert. */ | |
1421 | ||
b5d97350 BP |
1422 | FOR_EACH_RULE_IN_LIST (rule, head) { |
1423 | if (new->priority >= rule->priority) { | |
1424 | if (rule == head) { | |
1425 | /* 'new' is the new highest-priority flow in the list. */ | |
03868246 | 1426 | hmap_replace(&subtable->rules, |
b5d97350 BP |
1427 | &rule->hmap_node, &new->hmap_node); |
1428 | } | |
064af421 | 1429 | |
b5d97350 BP |
1430 | if (new->priority == rule->priority) { |
1431 | list_replace(&new->list, &rule->list); | |
4aacd02d BP |
1432 | old = rule; |
1433 | goto out; | |
b5d97350 BP |
1434 | } else { |
1435 | list_insert(&rule->list, &new->list); | |
4aacd02d | 1436 | goto out; |
b5d97350 BP |
1437 | } |
1438 | } | |
1439 | } | |
064af421 | 1440 | |
b5d97350 BP |
1441 | /* Insert 'new' at the end of the list. */ |
1442 | list_push_back(&head->list, &new->list); | |
064af421 | 1443 | } |
4aacd02d BP |
1444 | |
1445 | out: | |
1446 | if (!old) { | |
03868246 | 1447 | update_subtables_after_insertion(cls, subtable, new->priority); |
476f36e8 JR |
1448 | } else { |
1449 | /* Remove old node from indices. */ | |
1450 | for (i = 0; i < subtable->n_indices; i++) { | |
1451 | hindex_remove(&subtable->indices[i], &old->index_nodes[i]); | |
1452 | } | |
4aacd02d BP |
1453 | } |
1454 | return old; | |
064af421 BP |
1455 | } |
1456 | ||
b5d97350 | 1457 | static struct cls_rule * |
955f579d | 1458 | next_rule_in_list__(struct cls_rule *rule) |
064af421 | 1459 | { |
b5d97350 | 1460 | struct cls_rule *next = OBJECT_CONTAINING(rule->list.next, next, list); |
955f579d BP |
1461 | return next; |
1462 | } | |
1463 | ||
1464 | static struct cls_rule * | |
1465 | next_rule_in_list(struct cls_rule *rule) | |
1466 | { | |
1467 | struct cls_rule *next = next_rule_in_list__(rule); | |
b5d97350 | 1468 | return next->priority < rule->priority ? next : NULL; |
064af421 | 1469 | } |
13751fd8 JR |
1470 | \f |
1471 | /* A longest-prefix match tree. */ | |
1472 | struct trie_node { | |
1473 | uint32_t prefix; /* Prefix bits for this node, MSB first. */ | |
1474 | uint8_t nbits; /* Never zero, except for the root node. */ | |
1475 | unsigned int n_rules; /* Number of rules that have this prefix. */ | |
1476 | struct trie_node *edges[2]; /* Both NULL if leaf. */ | |
1477 | }; | |
1478 | ||
1479 | /* Max bits per node. Must fit in struct trie_node's 'prefix'. | |
1480 | * Also tested with 16, 8, and 5 to stress the implementation. */ | |
1481 | #define TRIE_PREFIX_BITS 32 | |
1482 | ||
1483 | /* Return at least 'plen' bits of the 'prefix', starting at bit offset 'ofs'. | |
1484 | * Prefixes are in the network byte order, and the offset 0 corresponds to | |
1485 | * the most significant bit of the first byte. The offset can be read as | |
1486 | * "how many bits to skip from the start of the prefix starting at 'pr'". */ | |
1487 | static uint32_t | |
1488 | raw_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen) | |
1489 | { | |
1490 | uint32_t prefix; | |
1491 | ||
1492 | pr += ofs / 32; /* Where to start. */ | |
1493 | ofs %= 32; /* How many bits to skip at 'pr'. */ | |
1494 | ||
1495 | prefix = ntohl(*pr) << ofs; /* Get the first 32 - ofs bits. */ | |
1496 | if (plen > 32 - ofs) { /* Need more than we have already? */ | |
1497 | prefix |= ntohl(*++pr) >> (32 - ofs); | |
1498 | } | |
1499 | /* Return with possible unwanted bits at the end. */ | |
1500 | return prefix; | |
1501 | } | |
1502 | ||
1503 | /* Return min(TRIE_PREFIX_BITS, plen) bits of the 'prefix', starting at bit | |
1504 | * offset 'ofs'. Prefixes are in the network byte order, and the offset 0 | |
1505 | * corresponds to the most significant bit of the first byte. The offset can | |
1506 | * be read as "how many bits to skip from the start of the prefix starting at | |
1507 | * 'pr'". */ | |
1508 | static uint32_t | |
1509 | trie_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen) | |
1510 | { | |
1511 | if (!plen) { | |
1512 | return 0; | |
1513 | } | |
1514 | if (plen > TRIE_PREFIX_BITS) { | |
1515 | plen = TRIE_PREFIX_BITS; /* Get at most TRIE_PREFIX_BITS. */ | |
1516 | } | |
1517 | /* Return with unwanted bits cleared. */ | |
1518 | return raw_get_prefix(pr, ofs, plen) & ~0u << (32 - plen); | |
1519 | } | |
1520 | ||
1521 | /* Return the number of equal bits in 'nbits' of 'prefix's MSBs and a 'value' | |
1522 | * starting at "MSB 0"-based offset 'ofs'. */ | |
1523 | static unsigned int | |
1524 | prefix_equal_bits(uint32_t prefix, unsigned int nbits, const ovs_be32 value[], | |
1525 | unsigned int ofs) | |
1526 | { | |
1527 | uint64_t diff = prefix ^ raw_get_prefix(value, ofs, nbits); | |
1528 | /* Set the bit after the relevant bits to limit the result. */ | |
1529 | return raw_clz64(diff << 32 | UINT64_C(1) << (63 - nbits)); | |
1530 | } | |
1531 | ||
1532 | /* Return the number of equal bits in 'node' prefix and a 'prefix' of length | |
1533 | * 'plen', starting at "MSB 0"-based offset 'ofs'. */ | |
1534 | static unsigned int | |
1535 | trie_prefix_equal_bits(const struct trie_node *node, const ovs_be32 prefix[], | |
1536 | unsigned int ofs, unsigned int plen) | |
1537 | { | |
1538 | return prefix_equal_bits(node->prefix, MIN(node->nbits, plen - ofs), | |
1539 | prefix, ofs); | |
1540 | } | |
1541 | ||
1542 | /* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' can | |
1543 | * be greater than 31. */ | |
1544 | static unsigned int | |
1545 | be_get_bit_at(const ovs_be32 value[], unsigned int ofs) | |
1546 | { | |
1547 | return (((const uint8_t *)value)[ofs / 8] >> (7 - ofs % 8)) & 1u; | |
1548 | } | |
1549 | ||
1550 | /* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' must | |
1551 | * be between 0 and 31, inclusive. */ | |
1552 | static unsigned int | |
1553 | get_bit_at(const uint32_t prefix, unsigned int ofs) | |
1554 | { | |
1555 | return (prefix >> (31 - ofs)) & 1u; | |
1556 | } | |
1557 | ||
1558 | /* Create new branch. */ | |
1559 | static struct trie_node * | |
1560 | trie_branch_create(const ovs_be32 *prefix, unsigned int ofs, unsigned int plen, | |
1561 | unsigned int n_rules) | |
1562 | { | |
1563 | struct trie_node *node = xmalloc(sizeof *node); | |
1564 | ||
1565 | node->prefix = trie_get_prefix(prefix, ofs, plen); | |
1566 | ||
1567 | if (plen <= TRIE_PREFIX_BITS) { | |
1568 | node->nbits = plen; | |
1569 | node->edges[0] = NULL; | |
1570 | node->edges[1] = NULL; | |
1571 | node->n_rules = n_rules; | |
1572 | } else { /* Need intermediate nodes. */ | |
1573 | struct trie_node *subnode = trie_branch_create(prefix, | |
1574 | ofs + TRIE_PREFIX_BITS, | |
1575 | plen - TRIE_PREFIX_BITS, | |
1576 | n_rules); | |
1577 | int bit = get_bit_at(subnode->prefix, 0); | |
1578 | node->nbits = TRIE_PREFIX_BITS; | |
1579 | node->edges[bit] = subnode; | |
1580 | node->edges[!bit] = NULL; | |
1581 | node->n_rules = 0; | |
1582 | } | |
1583 | return node; | |
1584 | } | |
1585 | ||
1586 | static void | |
1587 | trie_node_destroy(struct trie_node *node) | |
1588 | { | |
1589 | free(node); | |
1590 | } | |
1591 | ||
1592 | static void | |
1593 | trie_destroy(struct trie_node *node) | |
1594 | { | |
1595 | if (node) { | |
1596 | trie_destroy(node->edges[0]); | |
1597 | trie_destroy(node->edges[1]); | |
1598 | free(node); | |
1599 | } | |
1600 | } | |
1601 | ||
1602 | static bool | |
1603 | trie_is_leaf(const struct trie_node *trie) | |
1604 | { | |
1605 | return !trie->edges[0] && !trie->edges[1]; /* No children. */ | |
1606 | } | |
1607 | ||
1608 | static void | |
1609 | mask_set_prefix_bits(struct flow_wildcards *wc, uint8_t be32ofs, | |
1610 | unsigned int nbits) | |
1611 | { | |
1612 | ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs]; | |
1613 | unsigned int i; | |
1614 | ||
1615 | for (i = 0; i < nbits / 32; i++) { | |
1616 | mask[i] = OVS_BE32_MAX; | |
1617 | } | |
1618 | if (nbits % 32) { | |
1619 | mask[i] |= htonl(~0u << (32 - nbits % 32)); | |
1620 | } | |
1621 | } | |
1622 | ||
1623 | static bool | |
1624 | mask_prefix_bits_set(const struct flow_wildcards *wc, uint8_t be32ofs, | |
1625 | unsigned int nbits) | |
1626 | { | |
1627 | ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs]; | |
1628 | unsigned int i; | |
1629 | ovs_be32 zeroes = 0; | |
1630 | ||
1631 | for (i = 0; i < nbits / 32; i++) { | |
1632 | zeroes |= ~mask[i]; | |
1633 | } | |
1634 | if (nbits % 32) { | |
1635 | zeroes |= ~mask[i] & htonl(~0u << (32 - nbits % 32)); | |
1636 | } | |
1637 | ||
1638 | return !zeroes; /* All 'nbits' bits set. */ | |
1639 | } | |
1640 | ||
1641 | static struct trie_node ** | |
1642 | trie_next_edge(struct trie_node *node, const ovs_be32 value[], | |
1643 | unsigned int ofs) | |
1644 | { | |
1645 | return node->edges + be_get_bit_at(value, ofs); | |
1646 | } | |
1647 | ||
1648 | static const struct trie_node * | |
1649 | trie_next_node(const struct trie_node *node, const ovs_be32 value[], | |
1650 | unsigned int ofs) | |
1651 | { | |
1652 | return node->edges[be_get_bit_at(value, ofs)]; | |
1653 | } | |
1654 | ||
1655 | /* Return the prefix mask length necessary to find the longest-prefix match for | |
1656 | * the '*value' in the prefix tree 'node'. | |
1657 | * '*checkbits' is set to the number of bits in the prefix mask necessary to | |
1658 | * determine a mismatch, in case there are longer prefixes in the tree below | |
1659 | * the one that matched. | |
1660 | */ | |
1661 | static unsigned int | |
1662 | trie_lookup_value(const struct trie_node *node, const ovs_be32 value[], | |
1663 | unsigned int *checkbits) | |
1664 | { | |
1665 | unsigned int plen = 0, match_len = 0; | |
1666 | const struct trie_node *prev = NULL; | |
1667 | ||
1668 | for (; node; prev = node, node = trie_next_node(node, value, plen)) { | |
1669 | unsigned int eqbits; | |
1670 | /* Check if this edge can be followed. */ | |
1671 | eqbits = prefix_equal_bits(node->prefix, node->nbits, value, plen); | |
1672 | plen += eqbits; | |
1673 | if (eqbits < node->nbits) { /* Mismatch, nothing more to be found. */ | |
1674 | /* Bit at offset 'plen' differed. */ | |
1675 | *checkbits = plen + 1; /* Includes the first mismatching bit. */ | |
1676 | return match_len; | |
1677 | } | |
1678 | /* Full match, check if rules exist at this prefix length. */ | |
1679 | if (node->n_rules > 0) { | |
1680 | match_len = plen; | |
1681 | } | |
1682 | } | |
1683 | /* Dead end, exclude the other branch if it exists. */ | |
1684 | *checkbits = !prev || trie_is_leaf(prev) ? plen : plen + 1; | |
1685 | return match_len; | |
1686 | } | |
1687 | ||
1688 | static unsigned int | |
1689 | trie_lookup(const struct cls_trie *trie, const struct flow *flow, | |
1690 | unsigned int *checkbits) | |
1691 | { | |
1692 | const struct mf_field *mf = trie->field; | |
1693 | ||
1694 | /* Check that current flow matches the prerequisites for the trie | |
1695 | * field. Some match fields are used for multiple purposes, so we | |
1696 | * must check that the trie is relevant for this flow. */ | |
1697 | if (mf_are_prereqs_ok(mf, flow)) { | |
1698 | return trie_lookup_value(trie->root, | |
1699 | &((ovs_be32 *)flow)[mf->flow_be32ofs], | |
1700 | checkbits); | |
1701 | } | |
1702 | *checkbits = 0; /* Value not used in this case. */ | |
1703 | return UINT_MAX; | |
1704 | } | |
1705 | ||
1706 | /* Returns the length of a prefix match mask for the field 'mf' in 'minimask'. | |
1707 | * Returns the u32 offset to the miniflow data in '*miniflow_index', if | |
1708 | * 'miniflow_index' is not NULL. */ | |
1709 | static unsigned int | |
1710 | minimask_get_prefix_len(const struct minimask *minimask, | |
1711 | const struct mf_field *mf) | |
1712 | { | |
1713 | unsigned int nbits = 0, mask_tz = 0; /* Non-zero when end of mask seen. */ | |
1714 | uint8_t u32_ofs = mf->flow_be32ofs; | |
1715 | uint8_t u32_end = u32_ofs + mf->n_bytes / 4; | |
1716 | ||
1717 | for (; u32_ofs < u32_end; ++u32_ofs) { | |
1718 | uint32_t mask; | |
1719 | mask = ntohl((OVS_FORCE ovs_be32)minimask_get(minimask, u32_ofs)); | |
1720 | ||
1721 | /* Validate mask, count the mask length. */ | |
1722 | if (mask_tz) { | |
1723 | if (mask) { | |
1724 | return 0; /* No bits allowed after mask ended. */ | |
1725 | } | |
1726 | } else { | |
1727 | if (~mask & (~mask + 1)) { | |
1728 | return 0; /* Mask not contiguous. */ | |
1729 | } | |
1730 | mask_tz = ctz32(mask); | |
1731 | nbits += 32 - mask_tz; | |
1732 | } | |
1733 | } | |
1734 | ||
1735 | return nbits; | |
1736 | } | |
1737 | ||
1738 | /* | |
1739 | * This is called only when mask prefix is known to be CIDR and non-zero. | |
1740 | * Relies on the fact that the flow and mask have the same map, and since | |
1741 | * the mask is CIDR, the storage for the flow field exists even if it | |
1742 | * happened to be zeros. | |
1743 | */ | |
1744 | static const ovs_be32 * | |
1745 | minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf) | |
1746 | { | |
3d91d909 | 1747 | return match->flow.values + |
13751fd8 JR |
1748 | count_1bits(match->flow.map & ((UINT64_C(1) << mf->flow_be32ofs) - 1)); |
1749 | } | |
1750 | ||
1751 | /* Insert rule in to the prefix tree. | |
1752 | * 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask | |
1753 | * in 'rule'. */ | |
1754 | static void | |
1755 | trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen) | |
1756 | { | |
1757 | const ovs_be32 *prefix = minimatch_get_prefix(&rule->match, trie->field); | |
1758 | struct trie_node *node; | |
1759 | struct trie_node **edge; | |
1760 | int ofs = 0; | |
1761 | ||
1762 | /* Walk the tree. */ | |
1763 | for (edge = &trie->root; | |
1764 | (node = *edge) != NULL; | |
1765 | edge = trie_next_edge(node, prefix, ofs)) { | |
1766 | unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen); | |
1767 | ofs += eqbits; | |
1768 | if (eqbits < node->nbits) { | |
1769 | /* Mismatch, new node needs to be inserted above. */ | |
1770 | int old_branch = get_bit_at(node->prefix, eqbits); | |
1771 | ||
1772 | /* New parent node. */ | |
1773 | *edge = trie_branch_create(prefix, ofs - eqbits, eqbits, | |
1774 | ofs == mlen ? 1 : 0); | |
1775 | ||
1776 | /* Adjust old node for its new position in the tree. */ | |
1777 | node->prefix <<= eqbits; | |
1778 | node->nbits -= eqbits; | |
1779 | (*edge)->edges[old_branch] = node; | |
1780 | ||
1781 | /* Check if need a new branch for the new rule. */ | |
1782 | if (ofs < mlen) { | |
1783 | (*edge)->edges[!old_branch] | |
1784 | = trie_branch_create(prefix, ofs, mlen - ofs, 1); | |
1785 | } | |
1786 | return; | |
1787 | } | |
1788 | /* Full match so far. */ | |
1789 | ||
1790 | if (ofs == mlen) { | |
1791 | /* Full match at the current node, rule needs to be added here. */ | |
1792 | node->n_rules++; | |
1793 | return; | |
1794 | } | |
1795 | } | |
1796 | /* Must insert a new tree branch for the new rule. */ | |
1797 | *edge = trie_branch_create(prefix, ofs, mlen - ofs, 1); | |
1798 | } | |
1799 | ||
1800 | /* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask | |
1801 | * in 'rule'. */ | |
1802 | static void | |
1803 | trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen) | |
1804 | { | |
1805 | const ovs_be32 *prefix = minimatch_get_prefix(&rule->match, trie->field); | |
1806 | struct trie_node *node; | |
1807 | struct trie_node **edges[sizeof(union mf_value) * 8]; | |
1808 | int depth = 0, ofs = 0; | |
1809 | ||
1810 | /* Walk the tree. */ | |
1811 | for (edges[depth] = &trie->root; | |
1812 | (node = *edges[depth]) != NULL; | |
1813 | edges[++depth] = trie_next_edge(node, prefix, ofs)) { | |
1814 | unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen); | |
1815 | if (eqbits < node->nbits) { | |
1816 | /* Mismatch, nothing to be removed. This should never happen, as | |
1817 | * only rules in the classifier are ever removed. */ | |
1818 | break; /* Log a warning. */ | |
1819 | } | |
1820 | /* Full match so far. */ | |
1821 | ofs += eqbits; | |
1822 | ||
1823 | if (ofs == mlen) { | |
1824 | /* Full prefix match at the current node, remove rule here. */ | |
1825 | if (!node->n_rules) { | |
1826 | break; /* Log a warning. */ | |
1827 | } | |
1828 | node->n_rules--; | |
1829 | ||
1830 | /* Check if can prune the tree. */ | |
1831 | while (!node->n_rules && !(node->edges[0] && node->edges[1])) { | |
1832 | /* No rules and at most one child node, remove this node. */ | |
1833 | struct trie_node *next; | |
1834 | next = node->edges[0] ? node->edges[0] : node->edges[1]; | |
1835 | ||
1836 | if (next) { | |
1837 | if (node->nbits + next->nbits > TRIE_PREFIX_BITS) { | |
1838 | break; /* Cannot combine. */ | |
1839 | } | |
1840 | /* Combine node with next. */ | |
1841 | next->prefix = node->prefix | next->prefix >> node->nbits; | |
1842 | next->nbits += node->nbits; | |
1843 | } | |
1844 | trie_node_destroy(node); | |
1845 | /* Update the parent's edge. */ | |
1846 | *edges[depth] = next; | |
1847 | if (next || !depth) { | |
1848 | /* Branch not pruned or at root, nothing more to do. */ | |
1849 | break; | |
1850 | } | |
1851 | node = *edges[--depth]; | |
1852 | } | |
1853 | return; | |
1854 | } | |
1855 | } | |
1856 | /* Cannot go deeper. This should never happen, since only rules | |
1857 | * that actually exist in the classifier are ever removed. */ | |
1858 | VLOG_WARN("Trying to remove non-existing rule from a prefix trie."); | |
1859 | } |