]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
4aacd02d | 2 | * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. |
064af421 | 3 | * |
a14bc59f BP |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
064af421 | 7 | * |
a14bc59f BP |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | ||
17 | #include <config.h> | |
18 | #include "classifier.h" | |
064af421 BP |
19 | #include <errno.h> |
20 | #include <netinet/in.h> | |
844dff32 | 21 | #include "byte-order.h" |
68d1c8c3 | 22 | #include "dynamic-string.h" |
064af421 BP |
23 | #include "flow.h" |
24 | #include "hash.h" | |
f2c21402 | 25 | #include "cmap.h" |
52054c15 | 26 | #include "list.h" |
07b37e8f | 27 | #include "odp-util.h" |
d8ae4d67 | 28 | #include "ofp-util.h" |
0b4f2078 | 29 | #include "ovs-thread.h" |
13751fd8 | 30 | #include "packets.h" |
fe7cfa5c | 31 | #include "pvector.h" |
52054c15 JR |
32 | #include "tag.h" |
33 | #include "util.h" | |
13751fd8 JR |
34 | #include "vlog.h" |
35 | ||
36 | VLOG_DEFINE_THIS_MODULE(classifier); | |
064af421 | 37 | |
cabd4c43 | 38 | struct trie_node; |
69d6040e JR |
39 | struct trie_ctx; |
40 | ||
41 | /* Ports trie depends on both ports sharing the same ovs_be32. */ | |
42 | #define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4) | |
43 | BUILD_ASSERT_DECL(TP_PORTS_OFS32 == offsetof(struct flow, tp_dst) / 4); | |
cabd4c43 JR |
44 | |
45 | /* Prefix trie for a 'field' */ | |
46 | struct cls_trie { | |
47 | const struct mf_field *field; /* Trie field, or NULL. */ | |
48 | struct trie_node *root; /* NULL if none. */ | |
49 | }; | |
50 | ||
627fb667 JR |
51 | enum { |
52 | CLS_MAX_INDICES = 3 /* Maximum number of lookup indices per subtable. */ | |
53 | }; | |
54 | ||
cabd4c43 | 55 | struct cls_classifier { |
5a87054c | 56 | int n_rules; /* Total number of rules. */ |
cabd4c43 JR |
57 | uint8_t n_flow_segments; |
58 | uint8_t flow_segments[CLS_MAX_INDICES]; /* Flow segment boundaries to use | |
59 | * for staged lookup. */ | |
f2c21402 | 60 | struct cmap subtables_map; /* Contains "struct cls_subtable"s. */ |
fe7cfa5c | 61 | struct pvector subtables; |
f2c21402 | 62 | struct cmap partitions; /* Contains "struct cls_partition"s. */ |
cabd4c43 JR |
63 | struct cls_trie tries[CLS_MAX_TRIES]; /* Prefix tries. */ |
64 | unsigned int n_tries; | |
65 | }; | |
66 | ||
67 | /* A set of rules that all have the same fields wildcarded. */ | |
68 | struct cls_subtable { | |
f2c21402 JR |
69 | struct cmap_node cmap_node; /* Within struct cls_classifier |
70 | * 'subtables_map'. */ | |
71 | struct cmap rules; /* Contains "struct cls_rule"s. */ | |
cabd4c43 JR |
72 | int n_rules; /* Number of rules, including duplicates. */ |
73 | unsigned int max_priority; /* Max priority of any rule in the subtable. */ | |
74 | unsigned int max_count; /* Count of max_priority rules. */ | |
75 | tag_type tag; /* Tag generated from mask for partitioning. */ | |
76 | uint8_t n_indices; /* How many indices to use. */ | |
77 | uint8_t index_ofs[CLS_MAX_INDICES]; /* u32 flow segment boundaries. */ | |
f2c21402 | 78 | struct cmap indices[CLS_MAX_INDICES]; /* Staged lookup indices. */ |
cabd4c43 | 79 | unsigned int trie_plen[CLS_MAX_TRIES]; /* Trie prefix length in 'mask'. */ |
69d6040e JR |
80 | int ports_mask_len; |
81 | struct trie_node *ports_trie; /* NULL if none. */ | |
3016f3e4 JR |
82 | struct minimask mask; /* Wildcards for fields. */ |
83 | /* 'mask' must be the last field. */ | |
cabd4c43 JR |
84 | }; |
85 | ||
86 | /* Associates a metadata value (that is, a value of the OpenFlow 1.1+ metadata | |
87 | * field) with tags for the "cls_subtable"s that contain rules that match that | |
88 | * metadata value. */ | |
89 | struct cls_partition { | |
f2c21402 JR |
90 | struct cmap_node cmap_node; /* In struct cls_classifier's 'partitions' |
91 | * map. */ | |
cabd4c43 JR |
92 | ovs_be64 metadata; /* metadata value for this partition. */ |
93 | tag_type tags; /* OR of each flow's cls_subtable tag. */ | |
94 | struct tag_tracker tracker; /* Tracks the bits in 'tags'. */ | |
95 | }; | |
96 | ||
627fb667 JR |
97 | /* Internal representation of a rule in a "struct cls_subtable". */ |
98 | struct cls_match { | |
99 | struct cls_rule *cls_rule; | |
f2c21402 JR |
100 | struct cmap_node index_nodes[CLS_MAX_INDICES]; /* Within subtable's |
101 | * 'indices'. */ | |
102 | struct cmap_node cmap_node; /* Within struct cls_subtable 'rules'. */ | |
627fb667 JR |
103 | unsigned int priority; /* Larger numbers are higher priorities. */ |
104 | struct cls_partition *partition; | |
105 | struct list list; /* List of identical, lower-priority rules. */ | |
3016f3e4 JR |
106 | struct miniflow flow; /* Matching rule. Mask is in the subtable. */ |
107 | /* 'flow' must be the last field. */ | |
627fb667 | 108 | }; |
cabd4c43 | 109 | |
627fb667 JR |
110 | static struct cls_match * |
111 | cls_match_alloc(struct cls_rule *rule) | |
112 | { | |
3016f3e4 JR |
113 | int count = count_1bits(rule->match.flow.map); |
114 | ||
115 | struct cls_match *cls_match | |
116 | = xmalloc(sizeof *cls_match - sizeof cls_match->flow.inline_values | |
117 | + MINIFLOW_VALUES_SIZE(count)); | |
627fb667 JR |
118 | |
119 | cls_match->cls_rule = rule; | |
3016f3e4 | 120 | miniflow_clone_inline(&cls_match->flow, &rule->match.flow, count); |
627fb667 JR |
121 | cls_match->priority = rule->priority; |
122 | rule->cls_match = cls_match; | |
123 | ||
124 | return cls_match; | |
125 | } | |
cabd4c43 | 126 | |
cabd4c43 | 127 | static struct cls_subtable *find_subtable(const struct cls_classifier *, |
03868246 | 128 | const struct minimask *); |
cabd4c43 | 129 | static struct cls_subtable *insert_subtable(struct cls_classifier *, |
03868246 | 130 | const struct minimask *); |
b5d97350 | 131 | |
cabd4c43 | 132 | static void destroy_subtable(struct cls_classifier *, struct cls_subtable *); |
b5d97350 | 133 | |
627fb667 JR |
134 | static struct cls_match *find_match_wc(const struct cls_subtable *, |
135 | const struct flow *, struct trie_ctx *, | |
136 | unsigned int n_tries, | |
137 | struct flow_wildcards *); | |
138 | static struct cls_match *find_equal(struct cls_subtable *, | |
139 | const struct miniflow *, uint32_t hash); | |
140 | static struct cls_match *insert_rule(struct cls_classifier *, | |
141 | struct cls_subtable *, struct cls_rule *); | |
b5d97350 | 142 | |
b5d97350 BP |
143 | /* Iterates RULE over HEAD and all of the cls_rules on HEAD->list. */ |
144 | #define FOR_EACH_RULE_IN_LIST(RULE, HEAD) \ | |
145 | for ((RULE) = (HEAD); (RULE) != NULL; (RULE) = next_rule_in_list(RULE)) | |
146 | #define FOR_EACH_RULE_IN_LIST_SAFE(RULE, NEXT, HEAD) \ | |
147 | for ((RULE) = (HEAD); \ | |
148 | (RULE) != NULL && ((NEXT) = next_rule_in_list(RULE), true); \ | |
149 | (RULE) = (NEXT)) | |
150 | ||
627fb667 JR |
151 | static struct cls_match *next_rule_in_list__(struct cls_match *); |
152 | static struct cls_match *next_rule_in_list(struct cls_match *); | |
13751fd8 JR |
153 | |
154 | static unsigned int minimask_get_prefix_len(const struct minimask *, | |
155 | const struct mf_field *); | |
cabd4c43 | 156 | static void trie_init(struct cls_classifier *, int trie_idx, |
13751fd8 JR |
157 | const struct mf_field *); |
158 | static unsigned int trie_lookup(const struct cls_trie *, const struct flow *, | |
159 | unsigned int *checkbits); | |
69d6040e JR |
160 | static unsigned int trie_lookup_value(const struct trie_node *, |
161 | const ovs_be32 value[], | |
f0e5aa11 | 162 | unsigned int value_bits, |
69d6040e | 163 | unsigned int *checkbits); |
13751fd8 JR |
164 | static void trie_destroy(struct trie_node *); |
165 | static void trie_insert(struct cls_trie *, const struct cls_rule *, int mlen); | |
69d6040e JR |
166 | static void trie_insert_prefix(struct trie_node **, const ovs_be32 *prefix, |
167 | int mlen); | |
13751fd8 | 168 | static void trie_remove(struct cls_trie *, const struct cls_rule *, int mlen); |
69d6040e JR |
169 | static void trie_remove_prefix(struct trie_node **, const ovs_be32 *prefix, |
170 | int mlen); | |
13751fd8 | 171 | static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t be32ofs, |
c30cfa6b | 172 | unsigned int n_bits); |
13751fd8 | 173 | static bool mask_prefix_bits_set(const struct flow_wildcards *, |
c30cfa6b | 174 | uint8_t be32ofs, unsigned int n_bits); |
3d91d909 JR |
175 | \f |
176 | /* flow/miniflow/minimask/minimatch utilities. | |
177 | * These are only used by the classifier, so place them here to allow | |
178 | * for better optimization. */ | |
179 | ||
180 | static inline uint64_t | |
181 | miniflow_get_map_in_range(const struct miniflow *miniflow, | |
182 | uint8_t start, uint8_t end, unsigned int *offset) | |
183 | { | |
184 | uint64_t map = miniflow->map; | |
185 | *offset = 0; | |
186 | ||
187 | if (start > 0) { | |
188 | uint64_t msk = (UINT64_C(1) << start) - 1; /* 'start' LSBs set */ | |
189 | *offset = count_1bits(map & msk); | |
190 | map &= ~msk; | |
191 | } | |
192 | if (end < FLOW_U32S) { | |
193 | uint64_t msk = (UINT64_C(1) << end) - 1; /* 'end' LSBs set */ | |
194 | map &= msk; | |
195 | } | |
196 | return map; | |
197 | } | |
198 | ||
199 | /* Returns a hash value for the bits of 'flow' where there are 1-bits in | |
200 | * 'mask', given 'basis'. | |
201 | * | |
202 | * The hash values returned by this function are the same as those returned by | |
203 | * miniflow_hash_in_minimask(), only the form of the arguments differ. */ | |
204 | static inline uint32_t | |
205 | flow_hash_in_minimask(const struct flow *flow, const struct minimask *mask, | |
206 | uint32_t basis) | |
207 | { | |
27bbe15d | 208 | const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks); |
3d91d909 | 209 | const uint32_t *flow_u32 = (const uint32_t *)flow; |
27bbe15d | 210 | const uint32_t *p = mask_values; |
3d91d909 JR |
211 | uint32_t hash; |
212 | uint64_t map; | |
213 | ||
214 | hash = basis; | |
215 | for (map = mask->masks.map; map; map = zero_rightmost_1bit(map)) { | |
33c6a1b9 | 216 | hash = hash_add(hash, flow_u32[raw_ctz(map)] & *p++); |
3d91d909 JR |
217 | } |
218 | ||
33c6a1b9 | 219 | return hash_finish(hash, (p - mask_values) * 4); |
3d91d909 JR |
220 | } |
221 | ||
222 | /* Returns a hash value for the bits of 'flow' where there are 1-bits in | |
223 | * 'mask', given 'basis'. | |
224 | * | |
225 | * The hash values returned by this function are the same as those returned by | |
226 | * flow_hash_in_minimask(), only the form of the arguments differ. */ | |
227 | static inline uint32_t | |
228 | miniflow_hash_in_minimask(const struct miniflow *flow, | |
229 | const struct minimask *mask, uint32_t basis) | |
230 | { | |
27bbe15d JR |
231 | const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks); |
232 | const uint32_t *p = mask_values; | |
3d91d909 JR |
233 | uint32_t hash = basis; |
234 | uint32_t flow_u32; | |
235 | ||
236 | MINIFLOW_FOR_EACH_IN_MAP(flow_u32, flow, mask->masks.map) { | |
33c6a1b9 | 237 | hash = hash_add(hash, flow_u32 & *p++); |
3d91d909 JR |
238 | } |
239 | ||
33c6a1b9 | 240 | return hash_finish(hash, (p - mask_values) * 4); |
3d91d909 JR |
241 | } |
242 | ||
243 | /* Returns a hash value for the bits of range [start, end) in 'flow', | |
244 | * where there are 1-bits in 'mask', given 'hash'. | |
245 | * | |
246 | * The hash values returned by this function are the same as those returned by | |
247 | * minimatch_hash_range(), only the form of the arguments differ. */ | |
248 | static inline uint32_t | |
249 | flow_hash_in_minimask_range(const struct flow *flow, | |
250 | const struct minimask *mask, | |
251 | uint8_t start, uint8_t end, uint32_t *basis) | |
252 | { | |
27bbe15d | 253 | const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks); |
3d91d909 JR |
254 | const uint32_t *flow_u32 = (const uint32_t *)flow; |
255 | unsigned int offset; | |
256 | uint64_t map = miniflow_get_map_in_range(&mask->masks, start, end, | |
257 | &offset); | |
27bbe15d | 258 | const uint32_t *p = mask_values + offset; |
3d91d909 JR |
259 | uint32_t hash = *basis; |
260 | ||
261 | for (; map; map = zero_rightmost_1bit(map)) { | |
33c6a1b9 | 262 | hash = hash_add(hash, flow_u32[raw_ctz(map)] & *p++); |
3d91d909 JR |
263 | } |
264 | ||
265 | *basis = hash; /* Allow continuation from the unfinished value. */ | |
33c6a1b9 | 266 | return hash_finish(hash, (p - mask_values) * 4); |
3d91d909 JR |
267 | } |
268 | ||
269 | /* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask. */ | |
270 | static inline void | |
271 | flow_wildcards_fold_minimask(struct flow_wildcards *wc, | |
272 | const struct minimask *mask) | |
273 | { | |
274 | flow_union_with_miniflow(&wc->masks, &mask->masks); | |
275 | } | |
276 | ||
277 | /* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask | |
278 | * in range [start, end). */ | |
279 | static inline void | |
280 | flow_wildcards_fold_minimask_range(struct flow_wildcards *wc, | |
281 | const struct minimask *mask, | |
282 | uint8_t start, uint8_t end) | |
283 | { | |
284 | uint32_t *dst_u32 = (uint32_t *)&wc->masks; | |
285 | unsigned int offset; | |
286 | uint64_t map = miniflow_get_map_in_range(&mask->masks, start, end, | |
287 | &offset); | |
27bbe15d | 288 | const uint32_t *p = miniflow_get_u32_values(&mask->masks) + offset; |
3d91d909 JR |
289 | |
290 | for (; map; map = zero_rightmost_1bit(map)) { | |
291 | dst_u32[raw_ctz(map)] |= *p++; | |
292 | } | |
293 | } | |
294 | ||
295 | /* Returns a hash value for 'flow', given 'basis'. */ | |
296 | static inline uint32_t | |
297 | miniflow_hash(const struct miniflow *flow, uint32_t basis) | |
298 | { | |
27bbe15d JR |
299 | const uint32_t *values = miniflow_get_u32_values(flow); |
300 | const uint32_t *p = values; | |
3d91d909 JR |
301 | uint32_t hash = basis; |
302 | uint64_t hash_map = 0; | |
303 | uint64_t map; | |
304 | ||
305 | for (map = flow->map; map; map = zero_rightmost_1bit(map)) { | |
306 | if (*p) { | |
33c6a1b9 | 307 | hash = hash_add(hash, *p); |
3d91d909 JR |
308 | hash_map |= rightmost_1bit(map); |
309 | } | |
310 | p++; | |
311 | } | |
33c6a1b9 JR |
312 | hash = hash_add(hash, hash_map); |
313 | hash = hash_add(hash, hash_map >> 32); | |
3d91d909 | 314 | |
33c6a1b9 | 315 | return hash_finish(hash, p - values); |
3d91d909 JR |
316 | } |
317 | ||
318 | /* Returns a hash value for 'mask', given 'basis'. */ | |
319 | static inline uint32_t | |
320 | minimask_hash(const struct minimask *mask, uint32_t basis) | |
321 | { | |
322 | return miniflow_hash(&mask->masks, basis); | |
323 | } | |
324 | ||
325 | /* Returns a hash value for 'match', given 'basis'. */ | |
326 | static inline uint32_t | |
327 | minimatch_hash(const struct minimatch *match, uint32_t basis) | |
328 | { | |
329 | return miniflow_hash(&match->flow, minimask_hash(&match->mask, basis)); | |
330 | } | |
331 | ||
332 | /* Returns a hash value for the bits of range [start, end) in 'minimatch', | |
333 | * given 'basis'. | |
334 | * | |
335 | * The hash values returned by this function are the same as those returned by | |
336 | * flow_hash_in_minimask_range(), only the form of the arguments differ. */ | |
337 | static inline uint32_t | |
338 | minimatch_hash_range(const struct minimatch *match, uint8_t start, uint8_t end, | |
339 | uint32_t *basis) | |
340 | { | |
341 | unsigned int offset; | |
342 | const uint32_t *p, *q; | |
343 | uint32_t hash = *basis; | |
344 | int n, i; | |
345 | ||
346 | n = count_1bits(miniflow_get_map_in_range(&match->mask.masks, start, end, | |
347 | &offset)); | |
27bbe15d JR |
348 | q = miniflow_get_u32_values(&match->mask.masks) + offset; |
349 | p = miniflow_get_u32_values(&match->flow) + offset; | |
3d91d909 JR |
350 | |
351 | for (i = 0; i < n; i++) { | |
33c6a1b9 | 352 | hash = hash_add(hash, p[i] & q[i]); |
3d91d909 JR |
353 | } |
354 | *basis = hash; /* Allow continuation from the unfinished value. */ | |
33c6a1b9 | 355 | return hash_finish(hash, (offset + n) * 4); |
3d91d909 JR |
356 | } |
357 | ||
81a76618 BP |
358 | \f |
359 | /* cls_rule. */ | |
b5d97350 | 360 | |
81a76618 | 361 | /* Initializes 'rule' to match packets specified by 'match' at the given |
5cb7a798 BP |
362 | * 'priority'. 'match' must satisfy the invariant described in the comment at |
363 | * the definition of struct match. | |
66642cb4 | 364 | * |
48d28ac1 BP |
365 | * The caller must eventually destroy 'rule' with cls_rule_destroy(). |
366 | * | |
81a76618 BP |
367 | * (OpenFlow uses priorities between 0 and UINT16_MAX, inclusive, but |
368 | * internally Open vSwitch supports a wider range.) */ | |
47284b1f | 369 | void |
81a76618 BP |
370 | cls_rule_init(struct cls_rule *rule, |
371 | const struct match *match, unsigned int priority) | |
47284b1f | 372 | { |
5cb7a798 BP |
373 | minimatch_init(&rule->match, match); |
374 | rule->priority = priority; | |
627fb667 | 375 | rule->cls_match = NULL; |
5cb7a798 BP |
376 | } |
377 | ||
378 | /* Same as cls_rule_init() for initialization from a "struct minimatch". */ | |
379 | void | |
380 | cls_rule_init_from_minimatch(struct cls_rule *rule, | |
381 | const struct minimatch *match, | |
382 | unsigned int priority) | |
383 | { | |
384 | minimatch_clone(&rule->match, match); | |
81a76618 | 385 | rule->priority = priority; |
627fb667 | 386 | rule->cls_match = NULL; |
685a51a5 JP |
387 | } |
388 | ||
48d28ac1 BP |
389 | /* Initializes 'dst' as a copy of 'src'. |
390 | * | |
b2c1f00b | 391 | * The caller must eventually destroy 'dst' with cls_rule_destroy(). */ |
48d28ac1 BP |
392 | void |
393 | cls_rule_clone(struct cls_rule *dst, const struct cls_rule *src) | |
394 | { | |
5cb7a798 BP |
395 | minimatch_clone(&dst->match, &src->match); |
396 | dst->priority = src->priority; | |
627fb667 | 397 | dst->cls_match = NULL; |
48d28ac1 BP |
398 | } |
399 | ||
b2c1f00b BP |
400 | /* Initializes 'dst' with the data in 'src', destroying 'src'. |
401 | * | |
402 | * The caller must eventually destroy 'dst' with cls_rule_destroy(). */ | |
403 | void | |
404 | cls_rule_move(struct cls_rule *dst, struct cls_rule *src) | |
405 | { | |
406 | minimatch_move(&dst->match, &src->match); | |
407 | dst->priority = src->priority; | |
627fb667 | 408 | dst->cls_match = NULL; |
b2c1f00b BP |
409 | } |
410 | ||
48d28ac1 BP |
411 | /* Frees memory referenced by 'rule'. Doesn't free 'rule' itself (it's |
412 | * normally embedded into a larger structure). | |
413 | * | |
414 | * ('rule' must not currently be in a classifier.) */ | |
415 | void | |
5cb7a798 | 416 | cls_rule_destroy(struct cls_rule *rule) |
48d28ac1 | 417 | { |
627fb667 | 418 | ovs_assert(!rule->cls_match); |
5cb7a798 | 419 | minimatch_destroy(&rule->match); |
48d28ac1 BP |
420 | } |
421 | ||
81a76618 BP |
422 | /* Returns true if 'a' and 'b' match the same packets at the same priority, |
423 | * false if they differ in some way. */ | |
193eb874 BP |
424 | bool |
425 | cls_rule_equal(const struct cls_rule *a, const struct cls_rule *b) | |
426 | { | |
5cb7a798 | 427 | return a->priority == b->priority && minimatch_equal(&a->match, &b->match); |
193eb874 BP |
428 | } |
429 | ||
81a76618 | 430 | /* Returns a hash value for 'rule', folding in 'basis'. */ |
57452fdc BP |
431 | uint32_t |
432 | cls_rule_hash(const struct cls_rule *rule, uint32_t basis) | |
433 | { | |
5cb7a798 | 434 | return minimatch_hash(&rule->match, hash_int(rule->priority, basis)); |
73f33563 BP |
435 | } |
436 | ||
81a76618 | 437 | /* Appends a string describing 'rule' to 's'. */ |
07b37e8f BP |
438 | void |
439 | cls_rule_format(const struct cls_rule *rule, struct ds *s) | |
440 | { | |
5cb7a798 | 441 | minimatch_format(&rule->match, s, rule->priority); |
064af421 | 442 | } |
3ca1de08 BP |
443 | |
444 | /* Returns true if 'rule' matches every packet, false otherwise. */ | |
445 | bool | |
446 | cls_rule_is_catchall(const struct cls_rule *rule) | |
447 | { | |
5cb7a798 | 448 | return minimask_is_catchall(&rule->match.mask); |
3ca1de08 | 449 | } |
064af421 BP |
450 | \f |
451 | /* Initializes 'cls' as a classifier that initially contains no classification | |
452 | * rules. */ | |
453 | void | |
cabd4c43 | 454 | classifier_init(struct classifier *cls_, const uint8_t *flow_segments) |
064af421 | 455 | { |
cabd4c43 JR |
456 | struct cls_classifier *cls = xmalloc(sizeof *cls); |
457 | ||
458 | fat_rwlock_init(&cls_->rwlock); | |
459 | ||
460 | cls_->cls = cls; | |
461 | ||
064af421 | 462 | cls->n_rules = 0; |
f2c21402 | 463 | cmap_init(&cls->subtables_map); |
fe7cfa5c | 464 | pvector_init(&cls->subtables); |
f2c21402 | 465 | cmap_init(&cls->partitions); |
476f36e8 JR |
466 | cls->n_flow_segments = 0; |
467 | if (flow_segments) { | |
468 | while (cls->n_flow_segments < CLS_MAX_INDICES | |
469 | && *flow_segments < FLOW_U32S) { | |
470 | cls->flow_segments[cls->n_flow_segments++] = *flow_segments++; | |
471 | } | |
472 | } | |
13751fd8 | 473 | cls->n_tries = 0; |
064af421 BP |
474 | } |
475 | ||
476 | /* Destroys 'cls'. Rules within 'cls', if any, are not freed; this is the | |
477 | * caller's responsibility. */ | |
478 | void | |
cabd4c43 | 479 | classifier_destroy(struct classifier *cls_) |
064af421 | 480 | { |
cabd4c43 JR |
481 | if (cls_) { |
482 | struct cls_classifier *cls = cls_->cls; | |
42181540 | 483 | struct cls_partition *partition, *next_partition; |
03868246 | 484 | struct cls_subtable *subtable, *next_subtable; |
13751fd8 JR |
485 | int i; |
486 | ||
cabd4c43 JR |
487 | fat_rwlock_destroy(&cls_->rwlock); |
488 | if (!cls) { | |
489 | return; | |
490 | } | |
491 | ||
13751fd8 JR |
492 | for (i = 0; i < cls->n_tries; i++) { |
493 | trie_destroy(cls->tries[i].root); | |
494 | } | |
064af421 | 495 | |
f2c21402 | 496 | CMAP_FOR_EACH_SAFE (subtable, next_subtable, cmap_node, |
5a87054c | 497 | &cls->subtables_map) { |
03868246 | 498 | destroy_subtable(cls, subtable); |
064af421 | 499 | } |
f2c21402 | 500 | cmap_destroy(&cls->subtables_map); |
c906cedf | 501 | |
f2c21402 | 502 | CMAP_FOR_EACH_SAFE (partition, next_partition, cmap_node, |
c906cedf | 503 | &cls->partitions) { |
f2c21402 | 504 | ovsrcu_postpone(free, partition); |
c906cedf | 505 | } |
f2c21402 | 506 | cmap_destroy(&cls->partitions); |
cabd4c43 | 507 | |
fe7cfa5c | 508 | pvector_destroy(&cls->subtables); |
cabd4c43 | 509 | free(cls); |
064af421 BP |
510 | } |
511 | } | |
512 | ||
13751fd8 JR |
513 | /* We use uint64_t as a set for the fields below. */ |
514 | BUILD_ASSERT_DECL(MFF_N_IDS <= 64); | |
515 | ||
516 | /* Set the fields for which prefix lookup should be performed. */ | |
517 | void | |
cabd4c43 | 518 | classifier_set_prefix_fields(struct classifier *cls_, |
13751fd8 JR |
519 | const enum mf_field_id *trie_fields, |
520 | unsigned int n_fields) | |
521 | { | |
cabd4c43 | 522 | struct cls_classifier *cls = cls_->cls; |
13751fd8 JR |
523 | uint64_t fields = 0; |
524 | int i, trie; | |
525 | ||
526 | for (i = 0, trie = 0; i < n_fields && trie < CLS_MAX_TRIES; i++) { | |
527 | const struct mf_field *field = mf_from_id(trie_fields[i]); | |
528 | if (field->flow_be32ofs < 0 || field->n_bits % 32) { | |
529 | /* Incompatible field. This is the only place where we | |
530 | * enforce these requirements, but the rest of the trie code | |
531 | * depends on the flow_be32ofs to be non-negative and the | |
532 | * field length to be a multiple of 32 bits. */ | |
533 | continue; | |
534 | } | |
535 | ||
536 | if (fields & (UINT64_C(1) << trie_fields[i])) { | |
537 | /* Duplicate field, there is no need to build more than | |
538 | * one index for any one field. */ | |
539 | continue; | |
540 | } | |
541 | fields |= UINT64_C(1) << trie_fields[i]; | |
542 | ||
543 | if (trie >= cls->n_tries || field != cls->tries[trie].field) { | |
544 | trie_init(cls, trie, field); | |
545 | } | |
546 | trie++; | |
547 | } | |
548 | ||
549 | /* Destroy the rest. */ | |
550 | for (i = trie; i < cls->n_tries; i++) { | |
551 | trie_init(cls, i, NULL); | |
552 | } | |
553 | cls->n_tries = trie; | |
554 | } | |
555 | ||
556 | static void | |
cabd4c43 | 557 | trie_init(struct cls_classifier *cls, int trie_idx, |
13751fd8 JR |
558 | const struct mf_field *field) |
559 | { | |
560 | struct cls_trie *trie = &cls->tries[trie_idx]; | |
561 | struct cls_subtable *subtable; | |
562 | ||
563 | if (trie_idx < cls->n_tries) { | |
564 | trie_destroy(trie->root); | |
565 | } | |
566 | trie->root = NULL; | |
567 | trie->field = field; | |
568 | ||
569 | /* Add existing rules to the trie. */ | |
f2c21402 | 570 | CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) { |
13751fd8 JR |
571 | unsigned int plen; |
572 | ||
573 | plen = field ? minimask_get_prefix_len(&subtable->mask, field) : 0; | |
574 | /* Initialize subtable's prefix length on this field. */ | |
575 | subtable->trie_plen[trie_idx] = plen; | |
576 | ||
577 | if (plen) { | |
627fb667 | 578 | struct cls_match *head; |
13751fd8 | 579 | |
f2c21402 | 580 | CMAP_FOR_EACH (head, cmap_node, &subtable->rules) { |
627fb667 | 581 | struct cls_match *match; |
13751fd8 | 582 | |
627fb667 JR |
583 | FOR_EACH_RULE_IN_LIST (match, head) { |
584 | trie_insert(trie, match->cls_rule, plen); | |
13751fd8 JR |
585 | } |
586 | } | |
587 | } | |
588 | } | |
589 | } | |
590 | ||
5f0476ce JR |
591 | /* Returns true if 'cls' contains no classification rules, false otherwise. |
592 | * Checking the cmap requires no locking. */ | |
064af421 BP |
593 | bool |
594 | classifier_is_empty(const struct classifier *cls) | |
595 | { | |
5f0476ce | 596 | return cmap_is_empty(&cls->cls->subtables_map); |
064af421 BP |
597 | } |
598 | ||
dbda2960 | 599 | /* Returns the number of rules in 'cls'. */ |
064af421 BP |
600 | int |
601 | classifier_count(const struct classifier *cls) | |
602 | { | |
cabd4c43 | 603 | return cls->cls->n_rules; |
064af421 BP |
604 | } |
605 | ||
c906cedf BP |
606 | static uint32_t |
607 | hash_metadata(ovs_be64 metadata_) | |
608 | { | |
609 | uint64_t metadata = (OVS_FORCE uint64_t) metadata_; | |
965607c8 | 610 | return hash_uint64(metadata); |
c906cedf BP |
611 | } |
612 | ||
613 | static struct cls_partition * | |
cabd4c43 JR |
614 | find_partition(const struct cls_classifier *cls, ovs_be64 metadata, |
615 | uint32_t hash) | |
c906cedf BP |
616 | { |
617 | struct cls_partition *partition; | |
618 | ||
f2c21402 | 619 | CMAP_FOR_EACH_WITH_HASH (partition, cmap_node, hash, &cls->partitions) { |
c906cedf BP |
620 | if (partition->metadata == metadata) { |
621 | return partition; | |
622 | } | |
623 | } | |
624 | ||
625 | return NULL; | |
626 | } | |
627 | ||
628 | static struct cls_partition * | |
cabd4c43 | 629 | create_partition(struct cls_classifier *cls, struct cls_subtable *subtable, |
c906cedf BP |
630 | ovs_be64 metadata) |
631 | { | |
632 | uint32_t hash = hash_metadata(metadata); | |
633 | struct cls_partition *partition = find_partition(cls, metadata, hash); | |
634 | if (!partition) { | |
635 | partition = xmalloc(sizeof *partition); | |
636 | partition->metadata = metadata; | |
637 | partition->tags = 0; | |
183126a1 | 638 | tag_tracker_init(&partition->tracker); |
f2c21402 | 639 | cmap_insert(&cls->partitions, &partition->cmap_node, hash); |
c906cedf | 640 | } |
03868246 | 641 | tag_tracker_add(&partition->tracker, &partition->tags, subtable->tag); |
c906cedf BP |
642 | return partition; |
643 | } | |
644 | ||
69d6040e JR |
645 | static inline ovs_be32 minimatch_get_ports(const struct minimatch *match) |
646 | { | |
647 | /* Could optimize to use the same map if needed for fast path. */ | |
648 | return MINIFLOW_GET_BE32(&match->flow, tp_src) | |
649 | & MINIFLOW_GET_BE32(&match->mask.masks, tp_src); | |
650 | } | |
651 | ||
b5d97350 BP |
652 | /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller |
653 | * must not modify or free it. | |
064af421 BP |
654 | * |
655 | * If 'cls' already contains an identical rule (including wildcards, values of | |
656 | * fixed fields, and priority), replaces the old rule by 'rule' and returns the | |
657 | * rule that was replaced. The caller takes ownership of the returned rule and | |
48d28ac1 BP |
658 | * is thus responsible for destroying it with cls_rule_destroy(), freeing the |
659 | * memory block in which it resides, etc., as necessary. | |
064af421 BP |
660 | * |
661 | * Returns NULL if 'cls' does not contain a rule with an identical key, after | |
662 | * inserting the new rule. In this case, no rules are displaced by the new | |
663 | * rule, even rules that cannot have any effect because the new rule matches a | |
664 | * superset of their flows and has higher priority. */ | |
665 | struct cls_rule * | |
cabd4c43 | 666 | classifier_replace(struct classifier *cls_, struct cls_rule *rule) |
064af421 | 667 | { |
cabd4c43 | 668 | struct cls_classifier *cls = cls_->cls; |
627fb667 | 669 | struct cls_match *old_rule; |
03868246 | 670 | struct cls_subtable *subtable; |
b5d97350 | 671 | |
03868246 JR |
672 | subtable = find_subtable(cls, &rule->match.mask); |
673 | if (!subtable) { | |
674 | subtable = insert_subtable(cls, &rule->match.mask); | |
b5d97350 BP |
675 | } |
676 | ||
03868246 | 677 | old_rule = insert_rule(cls, subtable, rule); |
b5d97350 | 678 | if (!old_rule) { |
13751fd8 JR |
679 | int i; |
680 | ||
627fb667 | 681 | rule->cls_match->partition = NULL; |
c906cedf BP |
682 | if (minimask_get_metadata_mask(&rule->match.mask) == OVS_BE64_MAX) { |
683 | ovs_be64 metadata = miniflow_get_metadata(&rule->match.flow); | |
627fb667 JR |
684 | rule->cls_match->partition = create_partition(cls, subtable, |
685 | metadata); | |
c906cedf BP |
686 | } |
687 | ||
064af421 | 688 | cls->n_rules++; |
13751fd8 JR |
689 | |
690 | for (i = 0; i < cls->n_tries; i++) { | |
691 | if (subtable->trie_plen[i]) { | |
692 | trie_insert(&cls->tries[i], rule, subtable->trie_plen[i]); | |
693 | } | |
694 | } | |
69d6040e JR |
695 | |
696 | /* Ports trie. */ | |
697 | if (subtable->ports_mask_len) { | |
698 | /* We mask the value to be inserted to always have the wildcarded | |
699 | * bits in known (zero) state, so we can include them in comparison | |
700 | * and they will always match (== their original value does not | |
701 | * matter). */ | |
702 | ovs_be32 masked_ports = minimatch_get_ports(&rule->match); | |
703 | ||
704 | trie_insert_prefix(&subtable->ports_trie, &masked_ports, | |
705 | subtable->ports_mask_len); | |
706 | } | |
707 | ||
627fb667 | 708 | return NULL; |
c906cedf | 709 | } else { |
627fb667 JR |
710 | struct cls_rule *old_cls_rule = old_rule->cls_rule; |
711 | ||
712 | rule->cls_match->partition = old_rule->partition; | |
713 | old_cls_rule->cls_match = NULL; | |
f2c21402 JR |
714 | |
715 | /* 'old_rule' contains a cmap_node, which may not be freed | |
716 | * immediately. */ | |
717 | ovsrcu_postpone(free, old_rule); | |
718 | ||
627fb667 | 719 | return old_cls_rule; |
064af421 | 720 | } |
064af421 BP |
721 | } |
722 | ||
08944c1d BP |
723 | /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller |
724 | * must not modify or free it. | |
725 | * | |
726 | * 'cls' must not contain an identical rule (including wildcards, values of | |
727 | * fixed fields, and priority). Use classifier_find_rule_exactly() to find | |
728 | * such a rule. */ | |
729 | void | |
730 | classifier_insert(struct classifier *cls, struct cls_rule *rule) | |
731 | { | |
732 | struct cls_rule *displaced_rule = classifier_replace(cls, rule); | |
cb22974d | 733 | ovs_assert(!displaced_rule); |
08944c1d BP |
734 | } |
735 | ||
48d28ac1 BP |
736 | /* Removes 'rule' from 'cls'. It is the caller's responsibility to destroy |
737 | * 'rule' with cls_rule_destroy(), freeing the memory block in which 'rule' | |
738 | * resides, etc., as necessary. */ | |
064af421 | 739 | void |
cabd4c43 | 740 | classifier_remove(struct classifier *cls_, struct cls_rule *rule) |
064af421 | 741 | { |
cabd4c43 | 742 | struct cls_classifier *cls = cls_->cls; |
c906cedf | 743 | struct cls_partition *partition; |
627fb667 JR |
744 | struct cls_match *cls_match = rule->cls_match; |
745 | struct cls_match *head; | |
03868246 | 746 | struct cls_subtable *subtable; |
476f36e8 | 747 | int i; |
f2c21402 JR |
748 | uint32_t basis = 0, hash, ihash[CLS_MAX_INDICES]; |
749 | uint8_t prev_be32ofs = 0; | |
064af421 | 750 | |
627fb667 JR |
751 | ovs_assert(cls_match); |
752 | ||
03868246 | 753 | subtable = find_subtable(cls, &rule->match.mask); |
627fb667 JR |
754 | ovs_assert(subtable); |
755 | ||
69d6040e JR |
756 | if (subtable->ports_mask_len) { |
757 | ovs_be32 masked_ports = minimatch_get_ports(&rule->match); | |
758 | ||
759 | trie_remove_prefix(&subtable->ports_trie, | |
760 | &masked_ports, subtable->ports_mask_len); | |
761 | } | |
13751fd8 JR |
762 | for (i = 0; i < cls->n_tries; i++) { |
763 | if (subtable->trie_plen[i]) { | |
764 | trie_remove(&cls->tries[i], rule, subtable->trie_plen[i]); | |
765 | } | |
766 | } | |
767 | ||
476f36e8 JR |
768 | /* Remove rule node from indices. */ |
769 | for (i = 0; i < subtable->n_indices; i++) { | |
f2c21402 JR |
770 | ihash[i] = minimatch_hash_range(&rule->match, prev_be32ofs, |
771 | subtable->index_ofs[i], &basis); | |
772 | cmap_remove(&subtable->indices[i], &cls_match->index_nodes[i], | |
773 | ihash[i]); | |
774 | prev_be32ofs = subtable->index_ofs[i]; | |
476f36e8 | 775 | } |
f2c21402 | 776 | hash = minimatch_hash_range(&rule->match, prev_be32ofs, FLOW_U32S, &basis); |
476f36e8 | 777 | |
f2c21402 | 778 | head = find_equal(subtable, &rule->match.flow, hash); |
627fb667 JR |
779 | if (head != cls_match) { |
780 | list_remove(&cls_match->list); | |
781 | } else if (list_is_empty(&cls_match->list)) { | |
f2c21402 | 782 | cmap_remove(&subtable->rules, &cls_match->cmap_node, hash); |
b5d97350 | 783 | } else { |
627fb667 JR |
784 | struct cls_match *next = CONTAINER_OF(cls_match->list.next, |
785 | struct cls_match, list); | |
064af421 | 786 | |
627fb667 | 787 | list_remove(&cls_match->list); |
f2c21402 JR |
788 | cmap_replace(&subtable->rules, &cls_match->cmap_node, |
789 | &next->cmap_node, hash); | |
b5d97350 | 790 | } |
064af421 | 791 | |
627fb667 | 792 | partition = cls_match->partition; |
183126a1 BP |
793 | if (partition) { |
794 | tag_tracker_subtract(&partition->tracker, &partition->tags, | |
03868246 | 795 | subtable->tag); |
183126a1 | 796 | if (!partition->tags) { |
f2c21402 JR |
797 | cmap_remove(&cls->partitions, &partition->cmap_node, |
798 | hash_metadata(partition->metadata)); | |
799 | ovsrcu_postpone(free, partition); | |
183126a1 | 800 | } |
c906cedf BP |
801 | } |
802 | ||
03868246 JR |
803 | if (--subtable->n_rules == 0) { |
804 | destroy_subtable(cls, subtable); | |
fe7cfa5c JR |
805 | } else if (subtable->max_priority == cls_match->priority |
806 | && --subtable->max_count == 0) { | |
807 | /* Find the new 'max_priority' and 'max_count'. */ | |
808 | struct cls_match *head; | |
809 | unsigned int max_priority = 0; | |
810 | ||
f2c21402 | 811 | CMAP_FOR_EACH (head, cmap_node, &subtable->rules) { |
fe7cfa5c JR |
812 | if (head->priority > max_priority) { |
813 | max_priority = head->priority; | |
814 | subtable->max_count = 1; | |
815 | } else if (head->priority == max_priority) { | |
816 | ++subtable->max_count; | |
817 | } | |
818 | } | |
819 | subtable->max_priority = max_priority; | |
820 | pvector_change_priority(&cls->subtables, subtable, max_priority); | |
4d935a6b | 821 | } |
13751fd8 | 822 | |
b5d97350 | 823 | cls->n_rules--; |
627fb667 JR |
824 | |
825 | rule->cls_match = NULL; | |
f2c21402 | 826 | ovsrcu_postpone(free, cls_match); |
064af421 BP |
827 | } |
828 | ||
13751fd8 JR |
829 | /* Prefix tree context. Valid when 'lookup_done' is true. Can skip all |
830 | * subtables which have more than 'match_plen' bits in their corresponding | |
831 | * field at offset 'be32ofs'. If skipped, 'maskbits' prefix bits should be | |
832 | * unwildcarded to quarantee datapath flow matches only packets it should. */ | |
833 | struct trie_ctx { | |
834 | const struct cls_trie *trie; | |
835 | bool lookup_done; /* Status of the lookup. */ | |
836 | uint8_t be32ofs; /* U32 offset of the field in question. */ | |
837 | unsigned int match_plen; /* Longest prefix than could possibly match. */ | |
838 | unsigned int maskbits; /* Prefix length needed to avoid false matches. */ | |
839 | }; | |
840 | ||
841 | static void | |
842 | trie_ctx_init(struct trie_ctx *ctx, const struct cls_trie *trie) | |
843 | { | |
844 | ctx->trie = trie; | |
845 | ctx->be32ofs = trie->field->flow_be32ofs; | |
846 | ctx->lookup_done = false; | |
847 | } | |
848 | ||
48c3de13 BP |
849 | /* Finds and returns the highest-priority rule in 'cls' that matches 'flow'. |
850 | * Returns a null pointer if no rules in 'cls' match 'flow'. If multiple rules | |
74f74083 EJ |
851 | * of equal priority match 'flow', returns one arbitrarily. |
852 | * | |
853 | * If a rule is found and 'wc' is non-null, bitwise-OR's 'wc' with the | |
854 | * set of bits that were significant in the lookup. At some point | |
855 | * earlier, 'wc' should have been initialized (e.g., by | |
856 | * flow_wildcards_init_catchall()). */ | |
48c3de13 | 857 | struct cls_rule * |
cabd4c43 | 858 | classifier_lookup(const struct classifier *cls_, const struct flow *flow, |
74f74083 | 859 | struct flow_wildcards *wc) |
48c3de13 | 860 | { |
cabd4c43 | 861 | struct cls_classifier *cls = cls_->cls; |
c906cedf | 862 | const struct cls_partition *partition; |
c906cedf | 863 | tag_type tags; |
ec988646 | 864 | int64_t best_priority = -1; |
fe7cfa5c JR |
865 | const struct cls_match *best; |
866 | struct trie_ctx trie_ctx[CLS_MAX_TRIES]; | |
867 | struct cls_subtable *subtable; | |
c906cedf | 868 | |
03868246 JR |
869 | /* Determine 'tags' such that, if 'subtable->tag' doesn't intersect them, |
870 | * then 'flow' cannot possibly match in 'subtable': | |
c906cedf BP |
871 | * |
872 | * - If flow->metadata maps to a given 'partition', then we can use | |
873 | * 'tags' for 'partition->tags'. | |
874 | * | |
875 | * - If flow->metadata has no partition, then no rule in 'cls' has an | |
876 | * exact-match for flow->metadata. That means that we don't need to | |
03868246 | 877 | * search any subtable that includes flow->metadata in its mask. |
c906cedf | 878 | * |
03868246 | 879 | * In either case, we always need to search any cls_subtables that do not |
c906cedf | 880 | * include flow->metadata in its mask. One way to do that would be to |
03868246 JR |
881 | * check the "cls_subtable"s explicitly for that, but that would require an |
882 | * extra branch per subtable. Instead, we mark such a cls_subtable's | |
883 | * 'tags' as TAG_ALL and make sure that 'tags' is never empty. This means | |
884 | * that 'tags' always intersects such a cls_subtable's 'tags', so we don't | |
885 | * need a special case. | |
c906cedf | 886 | */ |
f2c21402 | 887 | partition = (cmap_is_empty(&cls->partitions) |
c906cedf BP |
888 | ? NULL |
889 | : find_partition(cls, flow->metadata, | |
890 | hash_metadata(flow->metadata))); | |
891 | tags = partition ? partition->tags : TAG_ARBITRARY; | |
48c3de13 | 892 | |
13751fd8 | 893 | /* Initialize trie contexts for match_find_wc(). */ |
fe7cfa5c | 894 | for (int i = 0; i < cls->n_tries; i++) { |
13751fd8 JR |
895 | trie_ctx_init(&trie_ctx[i], &cls->tries[i]); |
896 | } | |
ec988646 | 897 | |
b5d97350 | 898 | best = NULL; |
fe7cfa5c JR |
899 | PVECTOR_FOR_EACH_PRIORITY(subtable, best_priority, 2, |
900 | sizeof(struct cls_subtable), &cls->subtables) { | |
627fb667 | 901 | struct cls_match *rule; |
c906cedf | 902 | |
fe7cfa5c | 903 | if (!tag_intersects(tags, subtable->tag)) { |
c906cedf BP |
904 | continue; |
905 | } | |
74f74083 | 906 | |
fe7cfa5c | 907 | rule = find_match_wc(subtable, flow, trie_ctx, cls->n_tries, wc); |
ec988646 JR |
908 | if (rule && (int64_t)rule->priority > best_priority) { |
909 | best_priority = (int64_t)rule->priority; | |
1f3c5efc | 910 | best = rule; |
b5d97350 | 911 | } |
48c3de13 | 912 | } |
13751fd8 | 913 | |
627fb667 | 914 | return best ? best->cls_rule : NULL; |
48c3de13 BP |
915 | } |
916 | ||
2abf78ff | 917 | /* Returns true if 'target' satisifies 'match', that is, if each bit for which |
3016f3e4 JR |
918 | * 'match' specifies a particular value has the correct value in 'target'. |
919 | * | |
920 | * 'flow' and 'mask' have the same mask! */ | |
2abf78ff | 921 | static bool |
3016f3e4 JR |
922 | miniflow_and_mask_matches_miniflow(const struct miniflow *flow, |
923 | const struct minimask *mask, | |
924 | const struct miniflow *target) | |
2abf78ff | 925 | { |
3016f3e4 JR |
926 | const uint32_t *flowp = miniflow_get_u32_values(flow); |
927 | const uint32_t *maskp = miniflow_get_u32_values(&mask->masks); | |
2abf78ff JR |
928 | uint32_t target_u32; |
929 | ||
3016f3e4 | 930 | MINIFLOW_FOR_EACH_IN_MAP(target_u32, target, mask->masks.map) { |
2abf78ff JR |
931 | if ((*flowp++ ^ target_u32) & *maskp++) { |
932 | return false; | |
933 | } | |
934 | } | |
935 | ||
936 | return true; | |
937 | } | |
938 | ||
627fb667 | 939 | static inline struct cls_match * |
2abf78ff JR |
940 | find_match_miniflow(const struct cls_subtable *subtable, |
941 | const struct miniflow *flow, | |
942 | uint32_t hash) | |
943 | { | |
627fb667 | 944 | struct cls_match *rule; |
2abf78ff | 945 | |
f2c21402 | 946 | CMAP_FOR_EACH_WITH_HASH (rule, cmap_node, hash, &subtable->rules) { |
3016f3e4 JR |
947 | if (miniflow_and_mask_matches_miniflow(&rule->flow, &subtable->mask, |
948 | flow)) { | |
2abf78ff JR |
949 | return rule; |
950 | } | |
951 | } | |
952 | ||
953 | return NULL; | |
954 | } | |
955 | ||
b7648634 EJ |
956 | /* For each miniflow in 'flows' performs a classifier lookup writing the result |
957 | * into the corresponding slot in 'rules'. If a particular entry in 'flows' is | |
958 | * NULL it is skipped. | |
2abf78ff | 959 | * |
b7648634 EJ |
960 | * This function is optimized for use in the userspace datapath and therefore |
961 | * does not implement a lot of features available in the standard | |
962 | * classifier_lookup() function. Specifically, it does not implement | |
963 | * priorities, instead returning any rule which matches the flow. */ | |
964 | void | |
965 | classifier_lookup_miniflow_batch(const struct classifier *cls_, | |
966 | const struct miniflow **flows, | |
967 | struct cls_rule **rules, size_t len) | |
2abf78ff | 968 | { |
cabd4c43 | 969 | struct cls_classifier *cls = cls_->cls; |
2abf78ff | 970 | struct cls_subtable *subtable; |
b7648634 | 971 | size_t i, begin = 0; |
2abf78ff | 972 | |
b7648634 | 973 | memset(rules, 0, len * sizeof *rules); |
fe7cfa5c | 974 | PVECTOR_FOR_EACH (subtable, &cls->subtables) { |
b7648634 EJ |
975 | for (i = begin; i < len; i++) { |
976 | struct cls_match *match; | |
977 | uint32_t hash; | |
2abf78ff | 978 | |
b7648634 EJ |
979 | if (OVS_UNLIKELY(rules[i] || !flows[i])) { |
980 | continue; | |
981 | } | |
982 | ||
983 | hash = miniflow_hash_in_minimask(flows[i], &subtable->mask, 0); | |
984 | match = find_match_miniflow(subtable, flows[i], hash); | |
985 | if (OVS_UNLIKELY(match)) { | |
986 | rules[i] = match->cls_rule; | |
987 | } | |
2abf78ff | 988 | } |
2abf78ff | 989 | |
b7648634 EJ |
990 | while (begin < len && (rules[begin] || !flows[begin])) { |
991 | begin++; | |
992 | } | |
993 | if (begin >= len) { | |
994 | break; | |
995 | } | |
996 | } | |
2abf78ff JR |
997 | } |
998 | ||
b5d97350 BP |
999 | /* Finds and returns a rule in 'cls' with exactly the same priority and |
1000 | * matching criteria as 'target'. Returns a null pointer if 'cls' doesn't | |
c084ce1d | 1001 | * contain an exact match. */ |
064af421 | 1002 | struct cls_rule * |
cabd4c43 | 1003 | classifier_find_rule_exactly(const struct classifier *cls_, |
76ecc721 | 1004 | const struct cls_rule *target) |
064af421 | 1005 | { |
cabd4c43 | 1006 | struct cls_classifier *cls = cls_->cls; |
627fb667 | 1007 | struct cls_match *head, *rule; |
03868246 | 1008 | struct cls_subtable *subtable; |
064af421 | 1009 | |
03868246 JR |
1010 | subtable = find_subtable(cls, &target->match.mask); |
1011 | if (!subtable) { | |
b5d97350 | 1012 | return NULL; |
064af421 BP |
1013 | } |
1014 | ||
4d935a6b | 1015 | /* Skip if there is no hope. */ |
03868246 | 1016 | if (target->priority > subtable->max_priority) { |
4d935a6b JR |
1017 | return NULL; |
1018 | } | |
1019 | ||
03868246 | 1020 | head = find_equal(subtable, &target->match.flow, |
5cb7a798 BP |
1021 | miniflow_hash_in_minimask(&target->match.flow, |
1022 | &target->match.mask, 0)); | |
b5d97350 BP |
1023 | FOR_EACH_RULE_IN_LIST (rule, head) { |
1024 | if (target->priority >= rule->priority) { | |
627fb667 | 1025 | return target->priority == rule->priority ? rule->cls_rule : NULL; |
064af421 BP |
1026 | } |
1027 | } | |
1028 | return NULL; | |
1029 | } | |
1030 | ||
81a76618 BP |
1031 | /* Finds and returns a rule in 'cls' with priority 'priority' and exactly the |
1032 | * same matching criteria as 'target'. Returns a null pointer if 'cls' doesn't | |
1033 | * contain an exact match. */ | |
1034 | struct cls_rule * | |
1035 | classifier_find_match_exactly(const struct classifier *cls, | |
1036 | const struct match *target, | |
1037 | unsigned int priority) | |
1038 | { | |
1039 | struct cls_rule *retval; | |
1040 | struct cls_rule cr; | |
1041 | ||
1042 | cls_rule_init(&cr, target, priority); | |
1043 | retval = classifier_find_rule_exactly(cls, &cr); | |
48d28ac1 | 1044 | cls_rule_destroy(&cr); |
81a76618 BP |
1045 | |
1046 | return retval; | |
1047 | } | |
1048 | ||
faa50f40 BP |
1049 | /* Checks if 'target' would overlap any other rule in 'cls'. Two rules are |
1050 | * considered to overlap if both rules have the same priority and a packet | |
1051 | * could match both. */ | |
49bdc010 | 1052 | bool |
cabd4c43 | 1053 | classifier_rule_overlaps(const struct classifier *cls_, |
faa50f40 | 1054 | const struct cls_rule *target) |
49bdc010 | 1055 | { |
cabd4c43 | 1056 | struct cls_classifier *cls = cls_->cls; |
03868246 | 1057 | struct cls_subtable *subtable; |
fe7cfa5c | 1058 | int64_t stop_at_priority = (int64_t)target->priority - 1; |
49bdc010 | 1059 | |
03868246 | 1060 | /* Iterate subtables in the descending max priority order. */ |
fe7cfa5c JR |
1061 | PVECTOR_FOR_EACH_PRIORITY (subtable, stop_at_priority, 2, |
1062 | sizeof(struct cls_subtable), &cls->subtables) { | |
5cb7a798 BP |
1063 | uint32_t storage[FLOW_U32S]; |
1064 | struct minimask mask; | |
627fb667 | 1065 | struct cls_match *head; |
49bdc010 | 1066 | |
03868246 | 1067 | minimask_combine(&mask, &target->match.mask, &subtable->mask, storage); |
f2c21402 | 1068 | CMAP_FOR_EACH (head, cmap_node, &subtable->rules) { |
627fb667 | 1069 | struct cls_match *rule; |
49bdc010 | 1070 | |
b5d97350 | 1071 | FOR_EACH_RULE_IN_LIST (rule, head) { |
4d935a6b JR |
1072 | if (rule->priority < target->priority) { |
1073 | break; /* Rules in descending priority order. */ | |
1074 | } | |
faa50f40 | 1075 | if (rule->priority == target->priority |
5cb7a798 | 1076 | && miniflow_equal_in_minimask(&target->match.flow, |
3016f3e4 | 1077 | &rule->flow, &mask)) { |
49bdc010 JP |
1078 | return true; |
1079 | } | |
1080 | } | |
1081 | } | |
1082 | } | |
1083 | ||
1084 | return false; | |
1085 | } | |
6ceeaa92 BP |
1086 | |
1087 | /* Returns true if 'rule' exactly matches 'criteria' or if 'rule' is more | |
1088 | * specific than 'criteria'. That is, 'rule' matches 'criteria' and this | |
1089 | * function returns true if, for every field: | |
1090 | * | |
1091 | * - 'criteria' and 'rule' specify the same (non-wildcarded) value for the | |
1092 | * field, or | |
1093 | * | |
1094 | * - 'criteria' wildcards the field, | |
1095 | * | |
1096 | * Conversely, 'rule' does not match 'criteria' and this function returns false | |
1097 | * if, for at least one field: | |
1098 | * | |
1099 | * - 'criteria' and 'rule' specify different values for the field, or | |
1100 | * | |
1101 | * - 'criteria' specifies a value for the field but 'rule' wildcards it. | |
1102 | * | |
1103 | * Equivalently, the truth table for whether a field matches is: | |
1104 | * | |
1105 | * rule | |
1106 | * | |
1107 | * c wildcard exact | |
1108 | * r +---------+---------+ | |
1109 | * i wild | yes | yes | | |
1110 | * t card | | | | |
1111 | * e +---------+---------+ | |
1112 | * r exact | no |if values| | |
1113 | * i | |are equal| | |
1114 | * a +---------+---------+ | |
1115 | * | |
1116 | * This is the matching rule used by OpenFlow 1.0 non-strict OFPT_FLOW_MOD | |
1117 | * commands and by OpenFlow 1.0 aggregate and flow stats. | |
1118 | * | |
81a76618 | 1119 | * Ignores rule->priority. */ |
6ceeaa92 BP |
1120 | bool |
1121 | cls_rule_is_loose_match(const struct cls_rule *rule, | |
5cb7a798 | 1122 | const struct minimatch *criteria) |
6ceeaa92 | 1123 | { |
5cb7a798 BP |
1124 | return (!minimask_has_extra(&rule->match.mask, &criteria->mask) |
1125 | && miniflow_equal_in_minimask(&rule->match.flow, &criteria->flow, | |
1126 | &criteria->mask)); | |
6ceeaa92 | 1127 | } |
b5d97350 | 1128 | \f |
5ecc9d81 BP |
1129 | /* Iteration. */ |
1130 | ||
1131 | static bool | |
627fb667 | 1132 | rule_matches(const struct cls_match *rule, const struct cls_rule *target) |
5ecc9d81 BP |
1133 | { |
1134 | return (!target | |
3016f3e4 | 1135 | || miniflow_equal_in_minimask(&rule->flow, |
5cb7a798 BP |
1136 | &target->match.flow, |
1137 | &target->match.mask)); | |
5ecc9d81 BP |
1138 | } |
1139 | ||
627fb667 | 1140 | static struct cls_match * |
03868246 | 1141 | search_subtable(const struct cls_subtable *subtable, |
f2c21402 | 1142 | struct cls_cursor *cursor) |
5ecc9d81 | 1143 | { |
f2c21402 JR |
1144 | if (!cursor->target |
1145 | || !minimask_has_extra(&subtable->mask, &cursor->target->match.mask)) { | |
627fb667 | 1146 | struct cls_match *rule; |
5ecc9d81 | 1147 | |
f2c21402 JR |
1148 | CMAP_CURSOR_FOR_EACH (rule, cmap_node, &cursor->rules, |
1149 | &subtable->rules) { | |
1150 | if (rule_matches(rule, cursor->target)) { | |
5ecc9d81 BP |
1151 | return rule; |
1152 | } | |
1153 | } | |
1154 | } | |
1155 | return NULL; | |
1156 | } | |
1157 | ||
5f0476ce JR |
1158 | /* Initializes 'cursor' for iterating through rules in 'cls', and returns the |
1159 | * first matching cls_rule via '*pnode', or NULL if there are no matches. | |
5ecc9d81 | 1160 | * |
6ceeaa92 | 1161 | * - If 'target' is null, the cursor will visit every rule in 'cls'. |
5ecc9d81 | 1162 | * |
6ceeaa92 BP |
1163 | * - If 'target' is nonnull, the cursor will visit each 'rule' in 'cls' |
1164 | * such that cls_rule_is_loose_match(rule, target) returns true. | |
5ecc9d81 | 1165 | * |
6ceeaa92 | 1166 | * Ignores target->priority. */ |
5f0476ce JR |
1167 | struct cls_cursor cls_cursor_init(const struct classifier *cls, |
1168 | const struct cls_rule *target, | |
1169 | void **pnode, const void *offset, bool safe) | |
1170 | OVS_NO_THREAD_SAFETY_ANALYSIS | |
5ecc9d81 | 1171 | { |
5f0476ce | 1172 | struct cls_cursor cursor; |
03868246 | 1173 | struct cls_subtable *subtable; |
5f0476ce | 1174 | struct cls_rule *cls_rule = NULL; |
5ecc9d81 | 1175 | |
5f0476ce JR |
1176 | cursor.safe = safe; |
1177 | cursor.cls = cls; | |
1178 | cursor.target = target && !cls_rule_is_catchall(target) ? target : NULL; | |
1179 | ||
1180 | /* Find first rule. */ | |
1181 | fat_rwlock_rdlock(&cursor.cls->rwlock); | |
1182 | CMAP_CURSOR_FOR_EACH (subtable, cmap_node, &cursor.subtables, | |
1183 | &cursor.cls->cls->subtables_map) { | |
1184 | struct cls_match *rule = search_subtable(subtable, &cursor); | |
f2c21402 | 1185 | |
5ecc9d81 | 1186 | if (rule) { |
5f0476ce JR |
1187 | cursor.subtable = subtable; |
1188 | cls_rule = rule->cls_rule; | |
1189 | break; | |
5ecc9d81 BP |
1190 | } |
1191 | } | |
5f0476ce | 1192 | *pnode = (char *)cls_rule + (ptrdiff_t)offset; |
5ecc9d81 | 1193 | |
5f0476ce JR |
1194 | /* Leave locked if requested and have a rule. */ |
1195 | if (safe || !cls_rule) { | |
1196 | fat_rwlock_unlock(&cls->rwlock); | |
1197 | } | |
1198 | return cursor; | |
1199 | } | |
1200 | ||
1201 | static void | |
1202 | cls_cursor_next_unlock(struct cls_cursor *cursor, struct cls_rule *rule) | |
1203 | OVS_NO_THREAD_SAFETY_ANALYSIS | |
1204 | { | |
1205 | /* Release the lock if no rule, or 'safe' mode. */ | |
1206 | if (!rule || cursor->safe) { | |
1207 | fat_rwlock_unlock(&cursor->cls->rwlock); | |
1208 | } | |
5ecc9d81 BP |
1209 | } |
1210 | ||
1211 | /* Returns the next matching cls_rule in 'cursor''s iteration, or a null | |
1212 | * pointer if there are no more matches. */ | |
1213 | struct cls_rule * | |
9850cd0f | 1214 | cls_cursor_next(struct cls_cursor *cursor, const struct cls_rule *rule_) |
5f0476ce | 1215 | OVS_NO_THREAD_SAFETY_ANALYSIS |
5ecc9d81 | 1216 | { |
627fb667 | 1217 | struct cls_match *rule = CONST_CAST(struct cls_match *, rule_->cls_match); |
03868246 | 1218 | const struct cls_subtable *subtable; |
627fb667 | 1219 | struct cls_match *next; |
5ecc9d81 | 1220 | |
5f0476ce JR |
1221 | /* Lock if not locked already. */ |
1222 | if (cursor->safe) { | |
1223 | fat_rwlock_rdlock(&cursor->cls->rwlock); | |
1224 | } | |
1225 | ||
955f579d BP |
1226 | next = next_rule_in_list__(rule); |
1227 | if (next->priority < rule->priority) { | |
5f0476ce | 1228 | cls_cursor_next_unlock(cursor, next->cls_rule); |
627fb667 | 1229 | return next->cls_rule; |
5ecc9d81 BP |
1230 | } |
1231 | ||
955f579d | 1232 | /* 'next' is the head of the list, that is, the rule that is included in |
f2c21402 | 1233 | * the subtable's map. (This is important when the classifier contains |
03868246 | 1234 | * rules that differ only in priority.) */ |
955f579d | 1235 | rule = next; |
f2c21402 | 1236 | CMAP_CURSOR_FOR_EACH_CONTINUE (rule, cmap_node, &cursor->rules) { |
5ecc9d81 | 1237 | if (rule_matches(rule, cursor->target)) { |
5f0476ce | 1238 | cls_cursor_next_unlock(cursor, rule->cls_rule); |
627fb667 | 1239 | return rule->cls_rule; |
5ecc9d81 BP |
1240 | } |
1241 | } | |
1242 | ||
03868246 | 1243 | subtable = cursor->subtable; |
f2c21402 JR |
1244 | CMAP_CURSOR_FOR_EACH_CONTINUE (subtable, cmap_node, &cursor->subtables) { |
1245 | rule = search_subtable(subtable, cursor); | |
5ecc9d81 | 1246 | if (rule) { |
03868246 | 1247 | cursor->subtable = subtable; |
5f0476ce | 1248 | cls_cursor_next_unlock(cursor, rule->cls_rule); |
627fb667 | 1249 | return rule->cls_rule; |
5ecc9d81 BP |
1250 | } |
1251 | } | |
1252 | ||
5f0476ce | 1253 | fat_rwlock_unlock(&cursor->cls->rwlock); |
5ecc9d81 BP |
1254 | return NULL; |
1255 | } | |
1256 | \f | |
03868246 | 1257 | static struct cls_subtable * |
cabd4c43 | 1258 | find_subtable(const struct cls_classifier *cls, const struct minimask *mask) |
b5d97350 | 1259 | { |
03868246 | 1260 | struct cls_subtable *subtable; |
064af421 | 1261 | |
f2c21402 | 1262 | CMAP_FOR_EACH_WITH_HASH (subtable, cmap_node, minimask_hash(mask, 0), |
5a87054c | 1263 | &cls->subtables_map) { |
03868246 JR |
1264 | if (minimask_equal(mask, &subtable->mask)) { |
1265 | return subtable; | |
064af421 BP |
1266 | } |
1267 | } | |
b5d97350 | 1268 | return NULL; |
064af421 | 1269 | } |
064af421 | 1270 | |
03868246 | 1271 | static struct cls_subtable * |
cabd4c43 | 1272 | insert_subtable(struct cls_classifier *cls, const struct minimask *mask) |
b5d97350 | 1273 | { |
c906cedf | 1274 | uint32_t hash = minimask_hash(mask, 0); |
03868246 | 1275 | struct cls_subtable *subtable; |
476f36e8 JR |
1276 | int i, index = 0; |
1277 | struct flow_wildcards old, new; | |
1278 | uint8_t prev; | |
3016f3e4 | 1279 | int count = count_1bits(mask->masks.map); |
064af421 | 1280 | |
3016f3e4 JR |
1281 | subtable = xzalloc(sizeof *subtable - sizeof mask->masks.inline_values |
1282 | + MINIFLOW_VALUES_SIZE(count)); | |
f2c21402 | 1283 | cmap_init(&subtable->rules); |
3016f3e4 | 1284 | miniflow_clone_inline(&subtable->mask.masks, &mask->masks, count); |
476f36e8 JR |
1285 | |
1286 | /* Init indices for segmented lookup, if any. */ | |
1287 | flow_wildcards_init_catchall(&new); | |
1288 | old = new; | |
1289 | prev = 0; | |
1290 | for (i = 0; i < cls->n_flow_segments; i++) { | |
1291 | flow_wildcards_fold_minimask_range(&new, mask, prev, | |
1292 | cls->flow_segments[i]); | |
1293 | /* Add an index if it adds mask bits. */ | |
1294 | if (!flow_wildcards_equal(&new, &old)) { | |
f2c21402 | 1295 | cmap_init(&subtable->indices[index]); |
476f36e8 JR |
1296 | subtable->index_ofs[index] = cls->flow_segments[i]; |
1297 | index++; | |
1298 | old = new; | |
1299 | } | |
1300 | prev = cls->flow_segments[i]; | |
1301 | } | |
1302 | /* Check if the rest of the subtable's mask adds any bits, | |
1303 | * and remove the last index if it doesn't. */ | |
1304 | if (index > 0) { | |
1305 | flow_wildcards_fold_minimask_range(&new, mask, prev, FLOW_U32S); | |
1306 | if (flow_wildcards_equal(&new, &old)) { | |
1307 | --index; | |
1308 | subtable->index_ofs[index] = 0; | |
f2c21402 | 1309 | cmap_destroy(&subtable->indices[index]); |
476f36e8 JR |
1310 | } |
1311 | } | |
1312 | subtable->n_indices = index; | |
1313 | ||
03868246 JR |
1314 | subtable->tag = (minimask_get_metadata_mask(mask) == OVS_BE64_MAX |
1315 | ? tag_create_deterministic(hash) | |
1316 | : TAG_ALL); | |
064af421 | 1317 | |
13751fd8 JR |
1318 | for (i = 0; i < cls->n_tries; i++) { |
1319 | subtable->trie_plen[i] = minimask_get_prefix_len(mask, | |
1320 | cls->tries[i].field); | |
1321 | } | |
1322 | ||
69d6040e JR |
1323 | /* Ports trie. */ |
1324 | subtable->ports_trie = NULL; | |
1325 | subtable->ports_mask_len | |
1326 | = 32 - ctz32(ntohl(MINIFLOW_GET_BE32(&mask->masks, tp_src))); | |
1327 | ||
f2c21402 | 1328 | cmap_insert(&cls->subtables_map, &subtable->cmap_node, hash); |
ec988646 | 1329 | |
03868246 | 1330 | return subtable; |
064af421 BP |
1331 | } |
1332 | ||
b5d97350 | 1333 | static void |
cabd4c43 | 1334 | destroy_subtable(struct cls_classifier *cls, struct cls_subtable *subtable) |
b5d97350 | 1335 | { |
476f36e8 JR |
1336 | int i; |
1337 | ||
fe7cfa5c | 1338 | pvector_remove(&cls->subtables, subtable); |
69d6040e JR |
1339 | trie_destroy(subtable->ports_trie); |
1340 | ||
476f36e8 | 1341 | for (i = 0; i < subtable->n_indices; i++) { |
f2c21402 | 1342 | cmap_destroy(&subtable->indices[i]); |
476f36e8 | 1343 | } |
f2c21402 JR |
1344 | cmap_remove(&cls->subtables_map, &subtable->cmap_node, |
1345 | minimask_hash(&subtable->mask, 0)); | |
fe7cfa5c | 1346 | minimask_destroy(&subtable->mask); |
f2c21402 | 1347 | cmap_destroy(&subtable->rules); |
fe7cfa5c | 1348 | ovsrcu_postpone(free, subtable); |
4aacd02d BP |
1349 | } |
1350 | ||
13751fd8 JR |
1351 | struct range { |
1352 | uint8_t start; | |
1353 | uint8_t end; | |
1354 | }; | |
1355 | ||
1356 | /* Return 'true' if can skip rest of the subtable based on the prefix trie | |
1357 | * lookup results. */ | |
1358 | static inline bool | |
1359 | check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries, | |
1360 | const unsigned int field_plen[CLS_MAX_TRIES], | |
1361 | const struct range ofs, const struct flow *flow, | |
1362 | struct flow_wildcards *wc) | |
1363 | { | |
1364 | int j; | |
1365 | ||
1366 | /* Check if we could avoid fully unwildcarding the next level of | |
1367 | * fields using the prefix tries. The trie checks are done only as | |
1368 | * needed to avoid folding in additional bits to the wildcards mask. */ | |
1369 | for (j = 0; j < n_tries; j++) { | |
1370 | /* Is the trie field relevant for this subtable? */ | |
1371 | if (field_plen[j]) { | |
1372 | struct trie_ctx *ctx = &trie_ctx[j]; | |
1373 | uint8_t be32ofs = ctx->be32ofs; | |
1374 | ||
1375 | /* Is the trie field within the current range of fields? */ | |
1376 | if (be32ofs >= ofs.start && be32ofs < ofs.end) { | |
1377 | /* On-demand trie lookup. */ | |
1378 | if (!ctx->lookup_done) { | |
1379 | ctx->match_plen = trie_lookup(ctx->trie, flow, | |
1380 | &ctx->maskbits); | |
1381 | ctx->lookup_done = true; | |
1382 | } | |
1383 | /* Possible to skip the rest of the subtable if subtable's | |
1384 | * prefix on the field is longer than what is known to match | |
1385 | * based on the trie lookup. */ | |
1386 | if (field_plen[j] > ctx->match_plen) { | |
1387 | /* RFC: We want the trie lookup to never result in | |
1388 | * unwildcarding any bits that would not be unwildcarded | |
1389 | * otherwise. Since the trie is shared by the whole | |
1390 | * classifier, it is possible that the 'maskbits' contain | |
1391 | * bits that are irrelevant for the partition of the | |
1392 | * classifier relevant for the current flow. */ | |
1393 | ||
1394 | /* Can skip if the field is already unwildcarded. */ | |
1395 | if (mask_prefix_bits_set(wc, be32ofs, ctx->maskbits)) { | |
1396 | return true; | |
1397 | } | |
1398 | /* Check that the trie result will not unwildcard more bits | |
1399 | * than this stage will. */ | |
1400 | if (ctx->maskbits <= field_plen[j]) { | |
1401 | /* Unwildcard the bits and skip the rest. */ | |
1402 | mask_set_prefix_bits(wc, be32ofs, ctx->maskbits); | |
1403 | /* Note: Prerequisite already unwildcarded, as the only | |
1404 | * prerequisite of the supported trie lookup fields is | |
1405 | * the ethertype, which is currently always | |
1406 | * unwildcarded. | |
1407 | */ | |
1408 | return true; | |
1409 | } | |
1410 | } | |
1411 | } | |
1412 | } | |
1413 | } | |
1414 | return false; | |
1415 | } | |
1416 | ||
3016f3e4 JR |
1417 | /* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit |
1418 | * for which 'flow', for which 'mask' has a bit set, specifies a particular | |
1419 | * value has the correct value in 'target'. | |
1420 | * | |
1421 | * This function is equivalent to miniflow_equal_flow_in_minimask(flow, | |
a64759f0 JR |
1422 | * target, mask) but this is faster because of the invariant that |
1423 | * flow->map and mask->masks.map are the same, and that this version | |
1424 | * takes the 'wc'. */ | |
3016f3e4 JR |
1425 | static inline bool |
1426 | miniflow_and_mask_matches_flow(const struct miniflow *flow, | |
1427 | const struct minimask *mask, | |
e9319757 | 1428 | const struct flow *target) |
3016f3e4 JR |
1429 | { |
1430 | const uint32_t *flowp = miniflow_get_u32_values(flow); | |
1431 | const uint32_t *maskp = miniflow_get_u32_values(&mask->masks); | |
a64759f0 | 1432 | uint32_t idx; |
3016f3e4 | 1433 | |
a64759f0 JR |
1434 | MAP_FOR_EACH_INDEX(idx, mask->masks.map) { |
1435 | uint32_t diff = (*flowp++ ^ flow_u32_value(target, idx)) & *maskp++; | |
1436 | ||
1437 | if (diff) { | |
3016f3e4 JR |
1438 | return false; |
1439 | } | |
1440 | } | |
1441 | ||
1442 | return true; | |
1443 | } | |
1444 | ||
627fb667 | 1445 | static inline struct cls_match * |
476f36e8 JR |
1446 | find_match(const struct cls_subtable *subtable, const struct flow *flow, |
1447 | uint32_t hash) | |
b5d97350 | 1448 | { |
627fb667 | 1449 | struct cls_match *rule; |
b5d97350 | 1450 | |
f2c21402 | 1451 | CMAP_FOR_EACH_WITH_HASH (rule, cmap_node, hash, &subtable->rules) { |
3016f3e4 | 1452 | if (miniflow_and_mask_matches_flow(&rule->flow, &subtable->mask, |
e9319757 | 1453 | flow)) { |
b5d97350 | 1454 | return rule; |
064af421 BP |
1455 | } |
1456 | } | |
c23740be | 1457 | |
064af421 BP |
1458 | return NULL; |
1459 | } | |
1460 | ||
e9319757 JR |
1461 | /* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit |
1462 | * for which 'flow', for which 'mask' has a bit set, specifies a particular | |
1463 | * value has the correct value in 'target'. | |
1464 | * | |
1465 | * This function is equivalent to miniflow_and_mask_matches_flow() but this | |
1466 | * version fills in the mask bits in 'wc'. */ | |
1467 | static inline bool | |
1468 | miniflow_and_mask_matches_flow_wc(const struct miniflow *flow, | |
1469 | const struct minimask *mask, | |
1470 | const struct flow *target, | |
1471 | struct flow_wildcards *wc) | |
1472 | { | |
1473 | const uint32_t *flowp = miniflow_get_u32_values(flow); | |
1474 | const uint32_t *maskp = miniflow_get_u32_values(&mask->masks); | |
1475 | uint32_t idx; | |
1476 | ||
1477 | MAP_FOR_EACH_INDEX(idx, mask->masks.map) { | |
1478 | uint32_t mask = *maskp++; | |
1479 | uint32_t diff = (*flowp++ ^ flow_u32_value(target, idx)) & mask; | |
1480 | ||
1481 | if (diff) { | |
1482 | /* Only unwildcard if none of the differing bits is already | |
1483 | * exact-matched. */ | |
1484 | if (!(flow_u32_value(&wc->masks, idx) & diff)) { | |
1485 | /* Keep one bit of the difference. */ | |
1486 | *flow_u32_lvalue(&wc->masks, idx) |= rightmost_1bit(diff); | |
1487 | } | |
1488 | return false; | |
1489 | } | |
1490 | /* Fill in the bits that were looked at. */ | |
1491 | *flow_u32_lvalue(&wc->masks, idx) |= mask; | |
1492 | } | |
1493 | ||
1494 | return true; | |
1495 | } | |
1496 | ||
386cb9f7 JR |
1497 | /* Unwildcard the fields looked up so far, if any. */ |
1498 | static void | |
1499 | fill_range_wc(const struct cls_subtable *subtable, struct flow_wildcards *wc, | |
1500 | uint8_t to) | |
1501 | { | |
1502 | if (to) { | |
1503 | flow_wildcards_fold_minimask_range(wc, &subtable->mask, 0, to); | |
1504 | } | |
1505 | } | |
1506 | ||
627fb667 | 1507 | static struct cls_match * |
476f36e8 | 1508 | find_match_wc(const struct cls_subtable *subtable, const struct flow *flow, |
13751fd8 JR |
1509 | struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries, |
1510 | struct flow_wildcards *wc) | |
476f36e8 JR |
1511 | { |
1512 | uint32_t basis = 0, hash; | |
a64759f0 | 1513 | struct cls_match *rule; |
476f36e8 | 1514 | int i; |
13751fd8 | 1515 | struct range ofs; |
476f36e8 | 1516 | |
ec988646 | 1517 | if (OVS_UNLIKELY(!wc)) { |
476f36e8 JR |
1518 | return find_match(subtable, flow, |
1519 | flow_hash_in_minimask(flow, &subtable->mask, 0)); | |
1520 | } | |
1521 | ||
13751fd8 | 1522 | ofs.start = 0; |
476f36e8 JR |
1523 | /* Try to finish early by checking fields in segments. */ |
1524 | for (i = 0; i < subtable->n_indices; i++) { | |
f2c21402 JR |
1525 | struct cmap_node *inode; |
1526 | ||
13751fd8 | 1527 | ofs.end = subtable->index_ofs[i]; |
476f36e8 | 1528 | |
13751fd8 JR |
1529 | if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow, |
1530 | wc)) { | |
386cb9f7 JR |
1531 | /* 'wc' bits for the trie field set, now unwildcard the preceding |
1532 | * bits used so far. */ | |
1533 | fill_range_wc(subtable, wc, ofs.start); | |
1534 | return NULL; | |
13751fd8 JR |
1535 | } |
1536 | hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start, | |
1537 | ofs.end, &basis); | |
f2c21402 | 1538 | inode = cmap_find(&subtable->indices[i], hash); |
476f36e8 | 1539 | if (!inode) { |
386cb9f7 JR |
1540 | /* No match, can stop immediately, but must fold in the bits |
1541 | * used in lookup so far. */ | |
1542 | fill_range_wc(subtable, wc, ofs.end); | |
1543 | return NULL; | |
476f36e8 JR |
1544 | } |
1545 | ||
1546 | /* If we have narrowed down to a single rule already, check whether | |
a64759f0 | 1547 | * that rule matches. Either way, we're done. |
476f36e8 JR |
1548 | * |
1549 | * (Rare) hash collisions may cause us to miss the opportunity for this | |
1550 | * optimization. */ | |
f2c21402 | 1551 | if (!cmap_node_next(inode)) { |
476f36e8 | 1552 | ASSIGN_CONTAINER(rule, inode - i, index_nodes); |
e9319757 JR |
1553 | if (miniflow_and_mask_matches_flow_wc(&rule->flow, &subtable->mask, |
1554 | flow, wc)) { | |
1555 | return rule; | |
476f36e8 | 1556 | } |
e9319757 | 1557 | return NULL; |
476f36e8 | 1558 | } |
386cb9f7 | 1559 | ofs.start = ofs.end; |
476f36e8 | 1560 | } |
13751fd8 JR |
1561 | ofs.end = FLOW_U32S; |
1562 | /* Trie check for the final range. */ | |
1563 | if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow, wc)) { | |
386cb9f7 JR |
1564 | fill_range_wc(subtable, wc, ofs.start); |
1565 | return NULL; | |
13751fd8 | 1566 | } |
a64759f0 JR |
1567 | hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start, |
1568 | ofs.end, &basis); | |
1569 | rule = find_match(subtable, flow, hash); | |
69d6040e JR |
1570 | if (!rule && subtable->ports_mask_len) { |
1571 | /* Ports are always part of the final range, if any. | |
1572 | * No match was found for the ports. Use the ports trie to figure out | |
1573 | * which ports bits to unwildcard. */ | |
1574 | unsigned int mbits; | |
1575 | ovs_be32 value, mask; | |
1576 | ||
1577 | mask = MINIFLOW_GET_BE32(&subtable->mask.masks, tp_src); | |
1578 | value = ((OVS_FORCE ovs_be32 *)flow)[TP_PORTS_OFS32] & mask; | |
f0e5aa11 | 1579 | trie_lookup_value(subtable->ports_trie, &value, 32, &mbits); |
69d6040e JR |
1580 | |
1581 | ((OVS_FORCE ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |= | |
1582 | mask & htonl(~0 << (32 - mbits)); | |
1583 | ||
386cb9f7 JR |
1584 | /* Unwildcard all bits in the mask upto the ports, as they were used |
1585 | * to determine there is no match. */ | |
1586 | fill_range_wc(subtable, wc, TP_PORTS_OFS32); | |
1587 | return NULL; | |
69d6040e | 1588 | } |
e9319757 | 1589 | |
13751fd8 | 1590 | /* Must unwildcard all the fields, as they were looked at. */ |
476f36e8 JR |
1591 | flow_wildcards_fold_minimask(wc, &subtable->mask); |
1592 | return rule; | |
1593 | } | |
1594 | ||
627fb667 | 1595 | static struct cls_match * |
03868246 JR |
1596 | find_equal(struct cls_subtable *subtable, const struct miniflow *flow, |
1597 | uint32_t hash) | |
064af421 | 1598 | { |
627fb667 | 1599 | struct cls_match *head; |
064af421 | 1600 | |
f2c21402 | 1601 | CMAP_FOR_EACH_WITH_HASH (head, cmap_node, hash, &subtable->rules) { |
3016f3e4 | 1602 | if (miniflow_equal(&head->flow, flow)) { |
b5d97350 | 1603 | return head; |
064af421 BP |
1604 | } |
1605 | } | |
1606 | return NULL; | |
1607 | } | |
1608 | ||
627fb667 | 1609 | static struct cls_match * |
cabd4c43 | 1610 | insert_rule(struct cls_classifier *cls, struct cls_subtable *subtable, |
f2c21402 | 1611 | struct cls_rule *new_rule) |
064af421 | 1612 | { |
627fb667 | 1613 | struct cls_match *old = NULL; |
f2c21402 JR |
1614 | struct cls_match *new = cls_match_alloc(new_rule); |
1615 | struct cls_match *head; | |
476f36e8 | 1616 | int i; |
f2c21402 | 1617 | uint32_t basis = 0, hash, ihash[CLS_MAX_INDICES]; |
13751fd8 | 1618 | uint8_t prev_be32ofs = 0; |
476f36e8 JR |
1619 | |
1620 | /* Add new node to segment indices. */ | |
1621 | for (i = 0; i < subtable->n_indices; i++) { | |
f2c21402 JR |
1622 | ihash[i] = minimatch_hash_range(&new_rule->match, prev_be32ofs, |
1623 | subtable->index_ofs[i], &basis); | |
1624 | cmap_insert(&subtable->indices[i], &new->index_nodes[i], ihash[i]); | |
13751fd8 | 1625 | prev_be32ofs = subtable->index_ofs[i]; |
476f36e8 | 1626 | } |
f2c21402 JR |
1627 | hash = minimatch_hash_range(&new_rule->match, prev_be32ofs, FLOW_U32S, |
1628 | &basis); | |
1629 | head = find_equal(subtable, &new_rule->match.flow, hash); | |
b5d97350 | 1630 | if (!head) { |
f2c21402 JR |
1631 | cmap_insert(&subtable->rules, &new->cmap_node, hash); |
1632 | list_init(&new->list); | |
4aacd02d | 1633 | goto out; |
b5d97350 BP |
1634 | } else { |
1635 | /* Scan the list for the insertion point that will keep the list in | |
1636 | * order of decreasing priority. */ | |
627fb667 | 1637 | struct cls_match *rule; |
476f36e8 | 1638 | |
b5d97350 | 1639 | FOR_EACH_RULE_IN_LIST (rule, head) { |
f2c21402 | 1640 | if (new->priority >= rule->priority) { |
b5d97350 | 1641 | if (rule == head) { |
f2c21402 JR |
1642 | /* 'new' is the new highest-priority flow in the list. */ |
1643 | cmap_replace(&subtable->rules, &rule->cmap_node, | |
1644 | &new->cmap_node, hash); | |
b5d97350 | 1645 | } |
064af421 | 1646 | |
f2c21402 JR |
1647 | if (new->priority == rule->priority) { |
1648 | list_replace(&new->list, &rule->list); | |
4aacd02d | 1649 | old = rule; |
b5d97350 | 1650 | } else { |
f2c21402 | 1651 | list_insert(&rule->list, &new->list); |
b5d97350 | 1652 | } |
fe7cfa5c | 1653 | goto out; |
b5d97350 BP |
1654 | } |
1655 | } | |
064af421 | 1656 | |
b5d97350 | 1657 | /* Insert 'new' at the end of the list. */ |
f2c21402 | 1658 | list_push_back(&head->list, &new->list); |
064af421 | 1659 | } |
4aacd02d BP |
1660 | |
1661 | out: | |
1662 | if (!old) { | |
fe7cfa5c JR |
1663 | subtable->n_rules++; |
1664 | ||
1665 | /* Rule was added, not replaced. Update 'subtable's 'max_priority' | |
1666 | * and 'max_count', if necessary. */ | |
1667 | if (subtable->n_rules == 1) { | |
f2c21402 | 1668 | subtable->max_priority = new->priority; |
fe7cfa5c | 1669 | subtable->max_count = 1; |
f2c21402 JR |
1670 | pvector_insert(&cls->subtables, subtable, new->priority); |
1671 | } else if (subtable->max_priority == new->priority) { | |
fe7cfa5c | 1672 | ++subtable->max_count; |
f2c21402 JR |
1673 | } else if (new->priority > subtable->max_priority) { |
1674 | subtable->max_priority = new->priority; | |
fe7cfa5c | 1675 | subtable->max_count = 1; |
f2c21402 | 1676 | pvector_change_priority(&cls->subtables, subtable, new->priority); |
fe7cfa5c | 1677 | } |
476f36e8 JR |
1678 | } else { |
1679 | /* Remove old node from indices. */ | |
1680 | for (i = 0; i < subtable->n_indices; i++) { | |
f2c21402 | 1681 | cmap_remove(&subtable->indices[i], &old->index_nodes[i], ihash[i]); |
476f36e8 | 1682 | } |
4aacd02d BP |
1683 | } |
1684 | return old; | |
064af421 BP |
1685 | } |
1686 | ||
627fb667 JR |
1687 | static struct cls_match * |
1688 | next_rule_in_list__(struct cls_match *rule) | |
064af421 | 1689 | { |
627fb667 | 1690 | struct cls_match *next = OBJECT_CONTAINING(rule->list.next, next, list); |
955f579d BP |
1691 | return next; |
1692 | } | |
1693 | ||
627fb667 JR |
1694 | static struct cls_match * |
1695 | next_rule_in_list(struct cls_match *rule) | |
955f579d | 1696 | { |
627fb667 | 1697 | struct cls_match *next = next_rule_in_list__(rule); |
b5d97350 | 1698 | return next->priority < rule->priority ? next : NULL; |
064af421 | 1699 | } |
13751fd8 JR |
1700 | \f |
1701 | /* A longest-prefix match tree. */ | |
1702 | struct trie_node { | |
1703 | uint32_t prefix; /* Prefix bits for this node, MSB first. */ | |
c30cfa6b | 1704 | uint8_t n_bits; /* Never zero, except for the root node. */ |
13751fd8 JR |
1705 | unsigned int n_rules; /* Number of rules that have this prefix. */ |
1706 | struct trie_node *edges[2]; /* Both NULL if leaf. */ | |
1707 | }; | |
1708 | ||
1709 | /* Max bits per node. Must fit in struct trie_node's 'prefix'. | |
1710 | * Also tested with 16, 8, and 5 to stress the implementation. */ | |
1711 | #define TRIE_PREFIX_BITS 32 | |
1712 | ||
1713 | /* Return at least 'plen' bits of the 'prefix', starting at bit offset 'ofs'. | |
1714 | * Prefixes are in the network byte order, and the offset 0 corresponds to | |
1715 | * the most significant bit of the first byte. The offset can be read as | |
1716 | * "how many bits to skip from the start of the prefix starting at 'pr'". */ | |
1717 | static uint32_t | |
1718 | raw_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen) | |
1719 | { | |
1720 | uint32_t prefix; | |
1721 | ||
1722 | pr += ofs / 32; /* Where to start. */ | |
1723 | ofs %= 32; /* How many bits to skip at 'pr'. */ | |
1724 | ||
1725 | prefix = ntohl(*pr) << ofs; /* Get the first 32 - ofs bits. */ | |
1726 | if (plen > 32 - ofs) { /* Need more than we have already? */ | |
1727 | prefix |= ntohl(*++pr) >> (32 - ofs); | |
1728 | } | |
1729 | /* Return with possible unwanted bits at the end. */ | |
1730 | return prefix; | |
1731 | } | |
1732 | ||
1733 | /* Return min(TRIE_PREFIX_BITS, plen) bits of the 'prefix', starting at bit | |
1734 | * offset 'ofs'. Prefixes are in the network byte order, and the offset 0 | |
1735 | * corresponds to the most significant bit of the first byte. The offset can | |
1736 | * be read as "how many bits to skip from the start of the prefix starting at | |
1737 | * 'pr'". */ | |
1738 | static uint32_t | |
1739 | trie_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen) | |
1740 | { | |
1741 | if (!plen) { | |
1742 | return 0; | |
1743 | } | |
1744 | if (plen > TRIE_PREFIX_BITS) { | |
1745 | plen = TRIE_PREFIX_BITS; /* Get at most TRIE_PREFIX_BITS. */ | |
1746 | } | |
1747 | /* Return with unwanted bits cleared. */ | |
1748 | return raw_get_prefix(pr, ofs, plen) & ~0u << (32 - plen); | |
1749 | } | |
1750 | ||
c30cfa6b | 1751 | /* Return the number of equal bits in 'n_bits' of 'prefix's MSBs and a 'value' |
13751fd8 JR |
1752 | * starting at "MSB 0"-based offset 'ofs'. */ |
1753 | static unsigned int | |
c30cfa6b | 1754 | prefix_equal_bits(uint32_t prefix, unsigned int n_bits, const ovs_be32 value[], |
13751fd8 JR |
1755 | unsigned int ofs) |
1756 | { | |
c30cfa6b | 1757 | uint64_t diff = prefix ^ raw_get_prefix(value, ofs, n_bits); |
13751fd8 | 1758 | /* Set the bit after the relevant bits to limit the result. */ |
c30cfa6b | 1759 | return raw_clz64(diff << 32 | UINT64_C(1) << (63 - n_bits)); |
13751fd8 JR |
1760 | } |
1761 | ||
1762 | /* Return the number of equal bits in 'node' prefix and a 'prefix' of length | |
1763 | * 'plen', starting at "MSB 0"-based offset 'ofs'. */ | |
1764 | static unsigned int | |
1765 | trie_prefix_equal_bits(const struct trie_node *node, const ovs_be32 prefix[], | |
1766 | unsigned int ofs, unsigned int plen) | |
1767 | { | |
c30cfa6b | 1768 | return prefix_equal_bits(node->prefix, MIN(node->n_bits, plen - ofs), |
13751fd8 JR |
1769 | prefix, ofs); |
1770 | } | |
1771 | ||
1772 | /* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' can | |
1773 | * be greater than 31. */ | |
1774 | static unsigned int | |
1775 | be_get_bit_at(const ovs_be32 value[], unsigned int ofs) | |
1776 | { | |
1777 | return (((const uint8_t *)value)[ofs / 8] >> (7 - ofs % 8)) & 1u; | |
1778 | } | |
1779 | ||
1780 | /* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' must | |
1781 | * be between 0 and 31, inclusive. */ | |
1782 | static unsigned int | |
1783 | get_bit_at(const uint32_t prefix, unsigned int ofs) | |
1784 | { | |
1785 | return (prefix >> (31 - ofs)) & 1u; | |
1786 | } | |
1787 | ||
1788 | /* Create new branch. */ | |
1789 | static struct trie_node * | |
1790 | trie_branch_create(const ovs_be32 *prefix, unsigned int ofs, unsigned int plen, | |
1791 | unsigned int n_rules) | |
1792 | { | |
1793 | struct trie_node *node = xmalloc(sizeof *node); | |
1794 | ||
1795 | node->prefix = trie_get_prefix(prefix, ofs, plen); | |
1796 | ||
1797 | if (plen <= TRIE_PREFIX_BITS) { | |
c30cfa6b | 1798 | node->n_bits = plen; |
13751fd8 JR |
1799 | node->edges[0] = NULL; |
1800 | node->edges[1] = NULL; | |
1801 | node->n_rules = n_rules; | |
1802 | } else { /* Need intermediate nodes. */ | |
1803 | struct trie_node *subnode = trie_branch_create(prefix, | |
1804 | ofs + TRIE_PREFIX_BITS, | |
1805 | plen - TRIE_PREFIX_BITS, | |
1806 | n_rules); | |
1807 | int bit = get_bit_at(subnode->prefix, 0); | |
c30cfa6b | 1808 | node->n_bits = TRIE_PREFIX_BITS; |
13751fd8 JR |
1809 | node->edges[bit] = subnode; |
1810 | node->edges[!bit] = NULL; | |
1811 | node->n_rules = 0; | |
1812 | } | |
1813 | return node; | |
1814 | } | |
1815 | ||
1816 | static void | |
1817 | trie_node_destroy(struct trie_node *node) | |
1818 | { | |
1819 | free(node); | |
1820 | } | |
1821 | ||
1822 | static void | |
1823 | trie_destroy(struct trie_node *node) | |
1824 | { | |
1825 | if (node) { | |
1826 | trie_destroy(node->edges[0]); | |
1827 | trie_destroy(node->edges[1]); | |
1828 | free(node); | |
1829 | } | |
1830 | } | |
1831 | ||
1832 | static bool | |
1833 | trie_is_leaf(const struct trie_node *trie) | |
1834 | { | |
1835 | return !trie->edges[0] && !trie->edges[1]; /* No children. */ | |
1836 | } | |
1837 | ||
1838 | static void | |
1839 | mask_set_prefix_bits(struct flow_wildcards *wc, uint8_t be32ofs, | |
c30cfa6b | 1840 | unsigned int n_bits) |
13751fd8 JR |
1841 | { |
1842 | ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs]; | |
1843 | unsigned int i; | |
1844 | ||
c30cfa6b | 1845 | for (i = 0; i < n_bits / 32; i++) { |
13751fd8 JR |
1846 | mask[i] = OVS_BE32_MAX; |
1847 | } | |
c30cfa6b JR |
1848 | if (n_bits % 32) { |
1849 | mask[i] |= htonl(~0u << (32 - n_bits % 32)); | |
13751fd8 JR |
1850 | } |
1851 | } | |
1852 | ||
1853 | static bool | |
1854 | mask_prefix_bits_set(const struct flow_wildcards *wc, uint8_t be32ofs, | |
c30cfa6b | 1855 | unsigned int n_bits) |
13751fd8 JR |
1856 | { |
1857 | ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs]; | |
1858 | unsigned int i; | |
1859 | ovs_be32 zeroes = 0; | |
1860 | ||
c30cfa6b | 1861 | for (i = 0; i < n_bits / 32; i++) { |
13751fd8 JR |
1862 | zeroes |= ~mask[i]; |
1863 | } | |
c30cfa6b JR |
1864 | if (n_bits % 32) { |
1865 | zeroes |= ~mask[i] & htonl(~0u << (32 - n_bits % 32)); | |
13751fd8 JR |
1866 | } |
1867 | ||
c30cfa6b | 1868 | return !zeroes; /* All 'n_bits' bits set. */ |
13751fd8 JR |
1869 | } |
1870 | ||
1871 | static struct trie_node ** | |
1872 | trie_next_edge(struct trie_node *node, const ovs_be32 value[], | |
1873 | unsigned int ofs) | |
1874 | { | |
1875 | return node->edges + be_get_bit_at(value, ofs); | |
1876 | } | |
1877 | ||
1878 | static const struct trie_node * | |
1879 | trie_next_node(const struct trie_node *node, const ovs_be32 value[], | |
1880 | unsigned int ofs) | |
1881 | { | |
1882 | return node->edges[be_get_bit_at(value, ofs)]; | |
1883 | } | |
1884 | ||
1885 | /* Return the prefix mask length necessary to find the longest-prefix match for | |
1886 | * the '*value' in the prefix tree 'node'. | |
1887 | * '*checkbits' is set to the number of bits in the prefix mask necessary to | |
1888 | * determine a mismatch, in case there are longer prefixes in the tree below | |
1889 | * the one that matched. | |
1890 | */ | |
1891 | static unsigned int | |
1892 | trie_lookup_value(const struct trie_node *node, const ovs_be32 value[], | |
f0e5aa11 | 1893 | unsigned int n_bits, unsigned int *checkbits) |
13751fd8 | 1894 | { |
197573ac | 1895 | unsigned int ofs = 0, match_len = 0; |
13751fd8 JR |
1896 | const struct trie_node *prev = NULL; |
1897 | ||
197573ac | 1898 | for (; node; prev = node, node = trie_next_node(node, value, ofs)) { |
13751fd8 JR |
1899 | unsigned int eqbits; |
1900 | /* Check if this edge can be followed. */ | |
c30cfa6b | 1901 | eqbits = prefix_equal_bits(node->prefix, node->n_bits, value, ofs); |
197573ac | 1902 | ofs += eqbits; |
c30cfa6b | 1903 | if (eqbits < node->n_bits) { /* Mismatch, nothing more to be found. */ |
197573ac JR |
1904 | /* Bit at offset 'ofs' differed. */ |
1905 | *checkbits = ofs + 1; /* Includes the first mismatching bit. */ | |
13751fd8 JR |
1906 | return match_len; |
1907 | } | |
1908 | /* Full match, check if rules exist at this prefix length. */ | |
1909 | if (node->n_rules > 0) { | |
197573ac | 1910 | match_len = ofs; |
13751fd8 | 1911 | } |
f0e5aa11 JR |
1912 | if (ofs >= n_bits) { |
1913 | *checkbits = n_bits; /* Full prefix. */ | |
1914 | return match_len; | |
1915 | } | |
13751fd8 | 1916 | } |
f0e5aa11 JR |
1917 | /* node == NULL. Full match so far, but we came to a dead end. |
1918 | * need to exclude the other branch if it exists. */ | |
197573ac | 1919 | *checkbits = !prev || trie_is_leaf(prev) ? ofs : ofs + 1; |
13751fd8 JR |
1920 | return match_len; |
1921 | } | |
1922 | ||
1923 | static unsigned int | |
1924 | trie_lookup(const struct cls_trie *trie, const struct flow *flow, | |
1925 | unsigned int *checkbits) | |
1926 | { | |
1927 | const struct mf_field *mf = trie->field; | |
1928 | ||
1929 | /* Check that current flow matches the prerequisites for the trie | |
1930 | * field. Some match fields are used for multiple purposes, so we | |
1931 | * must check that the trie is relevant for this flow. */ | |
1932 | if (mf_are_prereqs_ok(mf, flow)) { | |
1933 | return trie_lookup_value(trie->root, | |
1934 | &((ovs_be32 *)flow)[mf->flow_be32ofs], | |
f0e5aa11 | 1935 | mf->n_bits, checkbits); |
13751fd8 JR |
1936 | } |
1937 | *checkbits = 0; /* Value not used in this case. */ | |
1938 | return UINT_MAX; | |
1939 | } | |
1940 | ||
1941 | /* Returns the length of a prefix match mask for the field 'mf' in 'minimask'. | |
1942 | * Returns the u32 offset to the miniflow data in '*miniflow_index', if | |
1943 | * 'miniflow_index' is not NULL. */ | |
1944 | static unsigned int | |
1945 | minimask_get_prefix_len(const struct minimask *minimask, | |
1946 | const struct mf_field *mf) | |
1947 | { | |
c30cfa6b | 1948 | unsigned int n_bits = 0, mask_tz = 0; /* Non-zero when end of mask seen. */ |
13751fd8 JR |
1949 | uint8_t u32_ofs = mf->flow_be32ofs; |
1950 | uint8_t u32_end = u32_ofs + mf->n_bytes / 4; | |
1951 | ||
1952 | for (; u32_ofs < u32_end; ++u32_ofs) { | |
1953 | uint32_t mask; | |
1954 | mask = ntohl((OVS_FORCE ovs_be32)minimask_get(minimask, u32_ofs)); | |
1955 | ||
1956 | /* Validate mask, count the mask length. */ | |
1957 | if (mask_tz) { | |
1958 | if (mask) { | |
1959 | return 0; /* No bits allowed after mask ended. */ | |
1960 | } | |
1961 | } else { | |
1962 | if (~mask & (~mask + 1)) { | |
1963 | return 0; /* Mask not contiguous. */ | |
1964 | } | |
1965 | mask_tz = ctz32(mask); | |
c30cfa6b | 1966 | n_bits += 32 - mask_tz; |
13751fd8 JR |
1967 | } |
1968 | } | |
1969 | ||
c30cfa6b | 1970 | return n_bits; |
13751fd8 JR |
1971 | } |
1972 | ||
1973 | /* | |
1974 | * This is called only when mask prefix is known to be CIDR and non-zero. | |
1975 | * Relies on the fact that the flow and mask have the same map, and since | |
1976 | * the mask is CIDR, the storage for the flow field exists even if it | |
1977 | * happened to be zeros. | |
1978 | */ | |
1979 | static const ovs_be32 * | |
1980 | minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf) | |
1981 | { | |
27bbe15d | 1982 | return miniflow_get_be32_values(&match->flow) + |
13751fd8 JR |
1983 | count_1bits(match->flow.map & ((UINT64_C(1) << mf->flow_be32ofs) - 1)); |
1984 | } | |
1985 | ||
1986 | /* Insert rule in to the prefix tree. | |
1987 | * 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask | |
1988 | * in 'rule'. */ | |
1989 | static void | |
1990 | trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen) | |
1991 | { | |
69d6040e JR |
1992 | trie_insert_prefix(&trie->root, |
1993 | minimatch_get_prefix(&rule->match, trie->field), mlen); | |
1994 | } | |
1995 | ||
1996 | static void | |
1997 | trie_insert_prefix(struct trie_node **edge, const ovs_be32 *prefix, int mlen) | |
1998 | { | |
13751fd8 | 1999 | struct trie_node *node; |
13751fd8 JR |
2000 | int ofs = 0; |
2001 | ||
2002 | /* Walk the tree. */ | |
69d6040e | 2003 | for (; (node = *edge) != NULL; |
13751fd8 JR |
2004 | edge = trie_next_edge(node, prefix, ofs)) { |
2005 | unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen); | |
2006 | ofs += eqbits; | |
c30cfa6b | 2007 | if (eqbits < node->n_bits) { |
13751fd8 JR |
2008 | /* Mismatch, new node needs to be inserted above. */ |
2009 | int old_branch = get_bit_at(node->prefix, eqbits); | |
2010 | ||
2011 | /* New parent node. */ | |
2012 | *edge = trie_branch_create(prefix, ofs - eqbits, eqbits, | |
2013 | ofs == mlen ? 1 : 0); | |
2014 | ||
2015 | /* Adjust old node for its new position in the tree. */ | |
2016 | node->prefix <<= eqbits; | |
c30cfa6b | 2017 | node->n_bits -= eqbits; |
13751fd8 JR |
2018 | (*edge)->edges[old_branch] = node; |
2019 | ||
2020 | /* Check if need a new branch for the new rule. */ | |
2021 | if (ofs < mlen) { | |
2022 | (*edge)->edges[!old_branch] | |
2023 | = trie_branch_create(prefix, ofs, mlen - ofs, 1); | |
2024 | } | |
2025 | return; | |
2026 | } | |
2027 | /* Full match so far. */ | |
2028 | ||
2029 | if (ofs == mlen) { | |
2030 | /* Full match at the current node, rule needs to be added here. */ | |
2031 | node->n_rules++; | |
2032 | return; | |
2033 | } | |
2034 | } | |
2035 | /* Must insert a new tree branch for the new rule. */ | |
2036 | *edge = trie_branch_create(prefix, ofs, mlen - ofs, 1); | |
2037 | } | |
2038 | ||
2039 | /* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask | |
2040 | * in 'rule'. */ | |
2041 | static void | |
2042 | trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen) | |
2043 | { | |
69d6040e JR |
2044 | trie_remove_prefix(&trie->root, |
2045 | minimatch_get_prefix(&rule->match, trie->field), mlen); | |
2046 | } | |
2047 | ||
2048 | /* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask | |
2049 | * in 'rule'. */ | |
2050 | static void | |
2051 | trie_remove_prefix(struct trie_node **root, const ovs_be32 *prefix, int mlen) | |
2052 | { | |
13751fd8 JR |
2053 | struct trie_node *node; |
2054 | struct trie_node **edges[sizeof(union mf_value) * 8]; | |
2055 | int depth = 0, ofs = 0; | |
2056 | ||
2057 | /* Walk the tree. */ | |
69d6040e | 2058 | for (edges[0] = root; |
13751fd8 JR |
2059 | (node = *edges[depth]) != NULL; |
2060 | edges[++depth] = trie_next_edge(node, prefix, ofs)) { | |
2061 | unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen); | |
69d6040e | 2062 | |
c30cfa6b | 2063 | if (eqbits < node->n_bits) { |
13751fd8 JR |
2064 | /* Mismatch, nothing to be removed. This should never happen, as |
2065 | * only rules in the classifier are ever removed. */ | |
2066 | break; /* Log a warning. */ | |
2067 | } | |
2068 | /* Full match so far. */ | |
2069 | ofs += eqbits; | |
2070 | ||
2071 | if (ofs == mlen) { | |
2072 | /* Full prefix match at the current node, remove rule here. */ | |
2073 | if (!node->n_rules) { | |
2074 | break; /* Log a warning. */ | |
2075 | } | |
2076 | node->n_rules--; | |
2077 | ||
2078 | /* Check if can prune the tree. */ | |
2079 | while (!node->n_rules && !(node->edges[0] && node->edges[1])) { | |
2080 | /* No rules and at most one child node, remove this node. */ | |
2081 | struct trie_node *next; | |
2082 | next = node->edges[0] ? node->edges[0] : node->edges[1]; | |
2083 | ||
2084 | if (next) { | |
c30cfa6b | 2085 | if (node->n_bits + next->n_bits > TRIE_PREFIX_BITS) { |
13751fd8 JR |
2086 | break; /* Cannot combine. */ |
2087 | } | |
2088 | /* Combine node with next. */ | |
c30cfa6b JR |
2089 | next->prefix = node->prefix | next->prefix >> node->n_bits; |
2090 | next->n_bits += node->n_bits; | |
13751fd8 JR |
2091 | } |
2092 | trie_node_destroy(node); | |
2093 | /* Update the parent's edge. */ | |
2094 | *edges[depth] = next; | |
2095 | if (next || !depth) { | |
2096 | /* Branch not pruned or at root, nothing more to do. */ | |
2097 | break; | |
2098 | } | |
2099 | node = *edges[--depth]; | |
2100 | } | |
2101 | return; | |
2102 | } | |
2103 | } | |
2104 | /* Cannot go deeper. This should never happen, since only rules | |
2105 | * that actually exist in the classifier are ever removed. */ | |
2106 | VLOG_WARN("Trying to remove non-existing rule from a prefix trie."); | |
2107 | } |