]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
4aacd02d | 2 | * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. |
064af421 | 3 | * |
a14bc59f BP |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
064af421 | 7 | * |
a14bc59f BP |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | ||
17 | #include <config.h> | |
18 | #include "classifier.h" | |
064af421 BP |
19 | #include <errno.h> |
20 | #include <netinet/in.h> | |
844dff32 | 21 | #include "byte-order.h" |
68d1c8c3 | 22 | #include "dynamic-string.h" |
064af421 BP |
23 | #include "flow.h" |
24 | #include "hash.h" | |
f2c21402 | 25 | #include "cmap.h" |
52054c15 | 26 | #include "list.h" |
07b37e8f | 27 | #include "odp-util.h" |
d8ae4d67 | 28 | #include "ofp-util.h" |
0b4f2078 | 29 | #include "ovs-thread.h" |
13751fd8 | 30 | #include "packets.h" |
fe7cfa5c | 31 | #include "pvector.h" |
52054c15 JR |
32 | #include "tag.h" |
33 | #include "util.h" | |
13751fd8 JR |
34 | #include "vlog.h" |
35 | ||
36 | VLOG_DEFINE_THIS_MODULE(classifier); | |
064af421 | 37 | |
cabd4c43 | 38 | struct trie_node; |
69d6040e JR |
39 | struct trie_ctx; |
40 | ||
41 | /* Ports trie depends on both ports sharing the same ovs_be32. */ | |
42 | #define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4) | |
43 | BUILD_ASSERT_DECL(TP_PORTS_OFS32 == offsetof(struct flow, tp_dst) / 4); | |
cabd4c43 JR |
44 | |
45 | /* Prefix trie for a 'field' */ | |
46 | struct cls_trie { | |
47 | const struct mf_field *field; /* Trie field, or NULL. */ | |
48 | struct trie_node *root; /* NULL if none. */ | |
49 | }; | |
50 | ||
627fb667 JR |
51 | enum { |
52 | CLS_MAX_INDICES = 3 /* Maximum number of lookup indices per subtable. */ | |
53 | }; | |
54 | ||
cabd4c43 | 55 | struct cls_classifier { |
e65413ab JR |
56 | struct ovs_mutex mutex; |
57 | int n_rules OVS_GUARDED; /* Total number of rules. */ | |
cabd4c43 JR |
58 | uint8_t n_flow_segments; |
59 | uint8_t flow_segments[CLS_MAX_INDICES]; /* Flow segment boundaries to use | |
60 | * for staged lookup. */ | |
f2c21402 | 61 | struct cmap subtables_map; /* Contains "struct cls_subtable"s. */ |
fe7cfa5c | 62 | struct pvector subtables; |
f2c21402 | 63 | struct cmap partitions; /* Contains "struct cls_partition"s. */ |
cabd4c43 JR |
64 | struct cls_trie tries[CLS_MAX_TRIES]; /* Prefix tries. */ |
65 | unsigned int n_tries; | |
66 | }; | |
67 | ||
68 | /* A set of rules that all have the same fields wildcarded. */ | |
69 | struct cls_subtable { | |
e65413ab | 70 | /* The fields are only used by writers and iterators. */ |
f2c21402 JR |
71 | struct cmap_node cmap_node; /* Within struct cls_classifier |
72 | * 'subtables_map'. */ | |
e65413ab JR |
73 | |
74 | /* The fields are only used by writers. */ | |
75 | int n_rules OVS_GUARDED; /* Number of rules, including | |
76 | * duplicates. */ | |
77 | unsigned int max_priority OVS_GUARDED; /* Max priority of any rule in | |
78 | * the subtable. */ | |
79 | unsigned int max_count OVS_GUARDED; /* Count of max_priority rules. */ | |
80 | ||
81 | /* These fields are accessed by readers who care about wildcarding. */ | |
82 | tag_type tag; /* Tag generated from mask for partitioning (const). */ | |
83 | uint8_t n_indices; /* How many indices to use (const). */ | |
84 | uint8_t index_ofs[CLS_MAX_INDICES]; /* u32 segment boundaries (const). */ | |
85 | unsigned int trie_plen[CLS_MAX_TRIES]; /* Trie prefix length in 'mask' | |
86 | * (runtime configurable). */ | |
87 | int ports_mask_len; /* (const) */ | |
88 | struct cmap indices[CLS_MAX_INDICES]; /* Staged lookup indices. */ | |
89 | struct trie_node *ports_trie; /* NULL if none. */ | |
90 | ||
91 | /* These fields are accessed by all readers. */ | |
92 | struct cmap rules; /* Contains "struct cls_rule"s. */ | |
93 | struct minimask mask; /* Wildcards for fields (const). */ | |
3016f3e4 | 94 | /* 'mask' must be the last field. */ |
cabd4c43 JR |
95 | }; |
96 | ||
97 | /* Associates a metadata value (that is, a value of the OpenFlow 1.1+ metadata | |
98 | * field) with tags for the "cls_subtable"s that contain rules that match that | |
99 | * metadata value. */ | |
100 | struct cls_partition { | |
f2c21402 JR |
101 | struct cmap_node cmap_node; /* In struct cls_classifier's 'partitions' |
102 | * map. */ | |
cabd4c43 JR |
103 | ovs_be64 metadata; /* metadata value for this partition. */ |
104 | tag_type tags; /* OR of each flow's cls_subtable tag. */ | |
e65413ab | 105 | struct tag_tracker tracker OVS_GUARDED; /* Tracks the bits in 'tags'. */ |
cabd4c43 JR |
106 | }; |
107 | ||
627fb667 JR |
108 | /* Internal representation of a rule in a "struct cls_subtable". */ |
109 | struct cls_match { | |
e65413ab JR |
110 | /* Accessed only by writers and iterators. */ |
111 | struct list list OVS_GUARDED; /* List of identical, lower-priority rules. */ | |
112 | ||
113 | /* Accessed only by writers. */ | |
114 | struct cls_partition *partition OVS_GUARDED; | |
115 | ||
116 | /* Accessed by readers interested in wildcarding. */ | |
117 | unsigned int priority; /* Larger numbers are higher priorities. */ | |
f2c21402 JR |
118 | struct cmap_node index_nodes[CLS_MAX_INDICES]; /* Within subtable's |
119 | * 'indices'. */ | |
e65413ab | 120 | /* Accessed by all readers. */ |
f2c21402 | 121 | struct cmap_node cmap_node; /* Within struct cls_subtable 'rules'. */ |
e65413ab | 122 | struct cls_rule *cls_rule; |
3016f3e4 JR |
123 | struct miniflow flow; /* Matching rule. Mask is in the subtable. */ |
124 | /* 'flow' must be the last field. */ | |
627fb667 | 125 | }; |
cabd4c43 | 126 | |
627fb667 JR |
127 | static struct cls_match * |
128 | cls_match_alloc(struct cls_rule *rule) | |
129 | { | |
3016f3e4 JR |
130 | int count = count_1bits(rule->match.flow.map); |
131 | ||
132 | struct cls_match *cls_match | |
133 | = xmalloc(sizeof *cls_match - sizeof cls_match->flow.inline_values | |
134 | + MINIFLOW_VALUES_SIZE(count)); | |
627fb667 JR |
135 | |
136 | cls_match->cls_rule = rule; | |
3016f3e4 | 137 | miniflow_clone_inline(&cls_match->flow, &rule->match.flow, count); |
627fb667 JR |
138 | cls_match->priority = rule->priority; |
139 | rule->cls_match = cls_match; | |
140 | ||
141 | return cls_match; | |
142 | } | |
cabd4c43 | 143 | |
e65413ab JR |
144 | static struct cls_subtable *find_subtable(const struct cls_classifier *cls, |
145 | const struct minimask *) | |
146 | OVS_REQUIRES(cls->mutex); | |
147 | static struct cls_subtable *insert_subtable(struct cls_classifier *cls, | |
148 | const struct minimask *) | |
149 | OVS_REQUIRES(cls->mutex); | |
150 | static void destroy_subtable(struct cls_classifier *cls, struct cls_subtable *) | |
151 | OVS_REQUIRES(cls->mutex); | |
152 | static struct cls_match *insert_rule(struct cls_classifier *cls, | |
153 | struct cls_subtable *, struct cls_rule *) | |
154 | OVS_REQUIRES(cls->mutex); | |
b5d97350 | 155 | |
627fb667 JR |
156 | static struct cls_match *find_match_wc(const struct cls_subtable *, |
157 | const struct flow *, struct trie_ctx *, | |
158 | unsigned int n_tries, | |
159 | struct flow_wildcards *); | |
160 | static struct cls_match *find_equal(struct cls_subtable *, | |
161 | const struct miniflow *, uint32_t hash); | |
b5d97350 | 162 | |
e65413ab JR |
163 | /* Iterates RULE over HEAD and all of the cls_rules on HEAD->list. |
164 | * Classifier's mutex must be held while iterating, as the list is | |
165 | * protoceted by it. */ | |
b5d97350 BP |
166 | #define FOR_EACH_RULE_IN_LIST(RULE, HEAD) \ |
167 | for ((RULE) = (HEAD); (RULE) != NULL; (RULE) = next_rule_in_list(RULE)) | |
168 | #define FOR_EACH_RULE_IN_LIST_SAFE(RULE, NEXT, HEAD) \ | |
169 | for ((RULE) = (HEAD); \ | |
170 | (RULE) != NULL && ((NEXT) = next_rule_in_list(RULE), true); \ | |
171 | (RULE) = (NEXT)) | |
172 | ||
627fb667 JR |
173 | static struct cls_match *next_rule_in_list__(struct cls_match *); |
174 | static struct cls_match *next_rule_in_list(struct cls_match *); | |
13751fd8 JR |
175 | |
176 | static unsigned int minimask_get_prefix_len(const struct minimask *, | |
177 | const struct mf_field *); | |
e65413ab JR |
178 | static void trie_init(struct cls_classifier *cls, int trie_idx, |
179 | const struct mf_field *) | |
180 | OVS_REQUIRES(cls->mutex); | |
13751fd8 JR |
181 | static unsigned int trie_lookup(const struct cls_trie *, const struct flow *, |
182 | unsigned int *checkbits); | |
69d6040e JR |
183 | static unsigned int trie_lookup_value(const struct trie_node *, |
184 | const ovs_be32 value[], | |
f0e5aa11 | 185 | unsigned int value_bits, |
69d6040e | 186 | unsigned int *checkbits); |
13751fd8 JR |
187 | static void trie_destroy(struct trie_node *); |
188 | static void trie_insert(struct cls_trie *, const struct cls_rule *, int mlen); | |
69d6040e JR |
189 | static void trie_insert_prefix(struct trie_node **, const ovs_be32 *prefix, |
190 | int mlen); | |
13751fd8 | 191 | static void trie_remove(struct cls_trie *, const struct cls_rule *, int mlen); |
69d6040e JR |
192 | static void trie_remove_prefix(struct trie_node **, const ovs_be32 *prefix, |
193 | int mlen); | |
13751fd8 | 194 | static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t be32ofs, |
c30cfa6b | 195 | unsigned int n_bits); |
13751fd8 | 196 | static bool mask_prefix_bits_set(const struct flow_wildcards *, |
c30cfa6b | 197 | uint8_t be32ofs, unsigned int n_bits); |
3d91d909 JR |
198 | \f |
199 | /* flow/miniflow/minimask/minimatch utilities. | |
200 | * These are only used by the classifier, so place them here to allow | |
201 | * for better optimization. */ | |
202 | ||
203 | static inline uint64_t | |
204 | miniflow_get_map_in_range(const struct miniflow *miniflow, | |
205 | uint8_t start, uint8_t end, unsigned int *offset) | |
206 | { | |
207 | uint64_t map = miniflow->map; | |
208 | *offset = 0; | |
209 | ||
210 | if (start > 0) { | |
211 | uint64_t msk = (UINT64_C(1) << start) - 1; /* 'start' LSBs set */ | |
212 | *offset = count_1bits(map & msk); | |
213 | map &= ~msk; | |
214 | } | |
215 | if (end < FLOW_U32S) { | |
216 | uint64_t msk = (UINT64_C(1) << end) - 1; /* 'end' LSBs set */ | |
217 | map &= msk; | |
218 | } | |
219 | return map; | |
220 | } | |
221 | ||
222 | /* Returns a hash value for the bits of 'flow' where there are 1-bits in | |
223 | * 'mask', given 'basis'. | |
224 | * | |
225 | * The hash values returned by this function are the same as those returned by | |
226 | * miniflow_hash_in_minimask(), only the form of the arguments differ. */ | |
227 | static inline uint32_t | |
228 | flow_hash_in_minimask(const struct flow *flow, const struct minimask *mask, | |
229 | uint32_t basis) | |
230 | { | |
27bbe15d | 231 | const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks); |
3d91d909 | 232 | const uint32_t *flow_u32 = (const uint32_t *)flow; |
27bbe15d | 233 | const uint32_t *p = mask_values; |
3d91d909 JR |
234 | uint32_t hash; |
235 | uint64_t map; | |
236 | ||
237 | hash = basis; | |
238 | for (map = mask->masks.map; map; map = zero_rightmost_1bit(map)) { | |
33c6a1b9 | 239 | hash = hash_add(hash, flow_u32[raw_ctz(map)] & *p++); |
3d91d909 JR |
240 | } |
241 | ||
33c6a1b9 | 242 | return hash_finish(hash, (p - mask_values) * 4); |
3d91d909 JR |
243 | } |
244 | ||
245 | /* Returns a hash value for the bits of 'flow' where there are 1-bits in | |
246 | * 'mask', given 'basis'. | |
247 | * | |
248 | * The hash values returned by this function are the same as those returned by | |
249 | * flow_hash_in_minimask(), only the form of the arguments differ. */ | |
250 | static inline uint32_t | |
251 | miniflow_hash_in_minimask(const struct miniflow *flow, | |
252 | const struct minimask *mask, uint32_t basis) | |
253 | { | |
27bbe15d JR |
254 | const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks); |
255 | const uint32_t *p = mask_values; | |
3d91d909 JR |
256 | uint32_t hash = basis; |
257 | uint32_t flow_u32; | |
258 | ||
259 | MINIFLOW_FOR_EACH_IN_MAP(flow_u32, flow, mask->masks.map) { | |
33c6a1b9 | 260 | hash = hash_add(hash, flow_u32 & *p++); |
3d91d909 JR |
261 | } |
262 | ||
33c6a1b9 | 263 | return hash_finish(hash, (p - mask_values) * 4); |
3d91d909 JR |
264 | } |
265 | ||
266 | /* Returns a hash value for the bits of range [start, end) in 'flow', | |
267 | * where there are 1-bits in 'mask', given 'hash'. | |
268 | * | |
269 | * The hash values returned by this function are the same as those returned by | |
270 | * minimatch_hash_range(), only the form of the arguments differ. */ | |
271 | static inline uint32_t | |
272 | flow_hash_in_minimask_range(const struct flow *flow, | |
273 | const struct minimask *mask, | |
274 | uint8_t start, uint8_t end, uint32_t *basis) | |
275 | { | |
27bbe15d | 276 | const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks); |
3d91d909 JR |
277 | const uint32_t *flow_u32 = (const uint32_t *)flow; |
278 | unsigned int offset; | |
279 | uint64_t map = miniflow_get_map_in_range(&mask->masks, start, end, | |
280 | &offset); | |
27bbe15d | 281 | const uint32_t *p = mask_values + offset; |
3d91d909 JR |
282 | uint32_t hash = *basis; |
283 | ||
284 | for (; map; map = zero_rightmost_1bit(map)) { | |
33c6a1b9 | 285 | hash = hash_add(hash, flow_u32[raw_ctz(map)] & *p++); |
3d91d909 JR |
286 | } |
287 | ||
288 | *basis = hash; /* Allow continuation from the unfinished value. */ | |
33c6a1b9 | 289 | return hash_finish(hash, (p - mask_values) * 4); |
3d91d909 JR |
290 | } |
291 | ||
292 | /* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask. */ | |
293 | static inline void | |
294 | flow_wildcards_fold_minimask(struct flow_wildcards *wc, | |
295 | const struct minimask *mask) | |
296 | { | |
297 | flow_union_with_miniflow(&wc->masks, &mask->masks); | |
298 | } | |
299 | ||
300 | /* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask | |
301 | * in range [start, end). */ | |
302 | static inline void | |
303 | flow_wildcards_fold_minimask_range(struct flow_wildcards *wc, | |
304 | const struct minimask *mask, | |
305 | uint8_t start, uint8_t end) | |
306 | { | |
307 | uint32_t *dst_u32 = (uint32_t *)&wc->masks; | |
308 | unsigned int offset; | |
309 | uint64_t map = miniflow_get_map_in_range(&mask->masks, start, end, | |
310 | &offset); | |
27bbe15d | 311 | const uint32_t *p = miniflow_get_u32_values(&mask->masks) + offset; |
3d91d909 JR |
312 | |
313 | for (; map; map = zero_rightmost_1bit(map)) { | |
314 | dst_u32[raw_ctz(map)] |= *p++; | |
315 | } | |
316 | } | |
317 | ||
318 | /* Returns a hash value for 'flow', given 'basis'. */ | |
319 | static inline uint32_t | |
320 | miniflow_hash(const struct miniflow *flow, uint32_t basis) | |
321 | { | |
27bbe15d JR |
322 | const uint32_t *values = miniflow_get_u32_values(flow); |
323 | const uint32_t *p = values; | |
3d91d909 JR |
324 | uint32_t hash = basis; |
325 | uint64_t hash_map = 0; | |
326 | uint64_t map; | |
327 | ||
328 | for (map = flow->map; map; map = zero_rightmost_1bit(map)) { | |
329 | if (*p) { | |
33c6a1b9 | 330 | hash = hash_add(hash, *p); |
3d91d909 JR |
331 | hash_map |= rightmost_1bit(map); |
332 | } | |
333 | p++; | |
334 | } | |
33c6a1b9 JR |
335 | hash = hash_add(hash, hash_map); |
336 | hash = hash_add(hash, hash_map >> 32); | |
3d91d909 | 337 | |
33c6a1b9 | 338 | return hash_finish(hash, p - values); |
3d91d909 JR |
339 | } |
340 | ||
341 | /* Returns a hash value for 'mask', given 'basis'. */ | |
342 | static inline uint32_t | |
343 | minimask_hash(const struct minimask *mask, uint32_t basis) | |
344 | { | |
345 | return miniflow_hash(&mask->masks, basis); | |
346 | } | |
347 | ||
348 | /* Returns a hash value for 'match', given 'basis'. */ | |
349 | static inline uint32_t | |
350 | minimatch_hash(const struct minimatch *match, uint32_t basis) | |
351 | { | |
352 | return miniflow_hash(&match->flow, minimask_hash(&match->mask, basis)); | |
353 | } | |
354 | ||
355 | /* Returns a hash value for the bits of range [start, end) in 'minimatch', | |
356 | * given 'basis'. | |
357 | * | |
358 | * The hash values returned by this function are the same as those returned by | |
359 | * flow_hash_in_minimask_range(), only the form of the arguments differ. */ | |
360 | static inline uint32_t | |
361 | minimatch_hash_range(const struct minimatch *match, uint8_t start, uint8_t end, | |
362 | uint32_t *basis) | |
363 | { | |
364 | unsigned int offset; | |
365 | const uint32_t *p, *q; | |
366 | uint32_t hash = *basis; | |
367 | int n, i; | |
368 | ||
369 | n = count_1bits(miniflow_get_map_in_range(&match->mask.masks, start, end, | |
370 | &offset)); | |
27bbe15d JR |
371 | q = miniflow_get_u32_values(&match->mask.masks) + offset; |
372 | p = miniflow_get_u32_values(&match->flow) + offset; | |
3d91d909 JR |
373 | |
374 | for (i = 0; i < n; i++) { | |
33c6a1b9 | 375 | hash = hash_add(hash, p[i] & q[i]); |
3d91d909 JR |
376 | } |
377 | *basis = hash; /* Allow continuation from the unfinished value. */ | |
33c6a1b9 | 378 | return hash_finish(hash, (offset + n) * 4); |
3d91d909 JR |
379 | } |
380 | ||
81a76618 BP |
381 | \f |
382 | /* cls_rule. */ | |
b5d97350 | 383 | |
81a76618 | 384 | /* Initializes 'rule' to match packets specified by 'match' at the given |
5cb7a798 BP |
385 | * 'priority'. 'match' must satisfy the invariant described in the comment at |
386 | * the definition of struct match. | |
66642cb4 | 387 | * |
48d28ac1 BP |
388 | * The caller must eventually destroy 'rule' with cls_rule_destroy(). |
389 | * | |
81a76618 BP |
390 | * (OpenFlow uses priorities between 0 and UINT16_MAX, inclusive, but |
391 | * internally Open vSwitch supports a wider range.) */ | |
47284b1f | 392 | void |
81a76618 BP |
393 | cls_rule_init(struct cls_rule *rule, |
394 | const struct match *match, unsigned int priority) | |
47284b1f | 395 | { |
5cb7a798 BP |
396 | minimatch_init(&rule->match, match); |
397 | rule->priority = priority; | |
627fb667 | 398 | rule->cls_match = NULL; |
5cb7a798 BP |
399 | } |
400 | ||
401 | /* Same as cls_rule_init() for initialization from a "struct minimatch". */ | |
402 | void | |
403 | cls_rule_init_from_minimatch(struct cls_rule *rule, | |
404 | const struct minimatch *match, | |
405 | unsigned int priority) | |
406 | { | |
407 | minimatch_clone(&rule->match, match); | |
81a76618 | 408 | rule->priority = priority; |
627fb667 | 409 | rule->cls_match = NULL; |
685a51a5 JP |
410 | } |
411 | ||
48d28ac1 BP |
412 | /* Initializes 'dst' as a copy of 'src'. |
413 | * | |
b2c1f00b | 414 | * The caller must eventually destroy 'dst' with cls_rule_destroy(). */ |
48d28ac1 BP |
415 | void |
416 | cls_rule_clone(struct cls_rule *dst, const struct cls_rule *src) | |
417 | { | |
5cb7a798 BP |
418 | minimatch_clone(&dst->match, &src->match); |
419 | dst->priority = src->priority; | |
627fb667 | 420 | dst->cls_match = NULL; |
48d28ac1 BP |
421 | } |
422 | ||
b2c1f00b BP |
423 | /* Initializes 'dst' with the data in 'src', destroying 'src'. |
424 | * | |
425 | * The caller must eventually destroy 'dst' with cls_rule_destroy(). */ | |
426 | void | |
427 | cls_rule_move(struct cls_rule *dst, struct cls_rule *src) | |
428 | { | |
429 | minimatch_move(&dst->match, &src->match); | |
430 | dst->priority = src->priority; | |
627fb667 | 431 | dst->cls_match = NULL; |
b2c1f00b BP |
432 | } |
433 | ||
48d28ac1 BP |
434 | /* Frees memory referenced by 'rule'. Doesn't free 'rule' itself (it's |
435 | * normally embedded into a larger structure). | |
436 | * | |
437 | * ('rule' must not currently be in a classifier.) */ | |
438 | void | |
5cb7a798 | 439 | cls_rule_destroy(struct cls_rule *rule) |
48d28ac1 | 440 | { |
627fb667 | 441 | ovs_assert(!rule->cls_match); |
5cb7a798 | 442 | minimatch_destroy(&rule->match); |
48d28ac1 BP |
443 | } |
444 | ||
81a76618 BP |
445 | /* Returns true if 'a' and 'b' match the same packets at the same priority, |
446 | * false if they differ in some way. */ | |
193eb874 BP |
447 | bool |
448 | cls_rule_equal(const struct cls_rule *a, const struct cls_rule *b) | |
449 | { | |
5cb7a798 | 450 | return a->priority == b->priority && minimatch_equal(&a->match, &b->match); |
193eb874 BP |
451 | } |
452 | ||
81a76618 | 453 | /* Returns a hash value for 'rule', folding in 'basis'. */ |
57452fdc BP |
454 | uint32_t |
455 | cls_rule_hash(const struct cls_rule *rule, uint32_t basis) | |
456 | { | |
5cb7a798 | 457 | return minimatch_hash(&rule->match, hash_int(rule->priority, basis)); |
73f33563 BP |
458 | } |
459 | ||
81a76618 | 460 | /* Appends a string describing 'rule' to 's'. */ |
07b37e8f BP |
461 | void |
462 | cls_rule_format(const struct cls_rule *rule, struct ds *s) | |
463 | { | |
5cb7a798 | 464 | minimatch_format(&rule->match, s, rule->priority); |
064af421 | 465 | } |
3ca1de08 BP |
466 | |
467 | /* Returns true if 'rule' matches every packet, false otherwise. */ | |
468 | bool | |
469 | cls_rule_is_catchall(const struct cls_rule *rule) | |
470 | { | |
5cb7a798 | 471 | return minimask_is_catchall(&rule->match.mask); |
3ca1de08 | 472 | } |
064af421 BP |
473 | \f |
474 | /* Initializes 'cls' as a classifier that initially contains no classification | |
475 | * rules. */ | |
476 | void | |
cabd4c43 | 477 | classifier_init(struct classifier *cls_, const uint8_t *flow_segments) |
e65413ab | 478 | OVS_EXCLUDED(cls_->cls->mutex) |
064af421 | 479 | { |
cabd4c43 JR |
480 | struct cls_classifier *cls = xmalloc(sizeof *cls); |
481 | ||
482 | fat_rwlock_init(&cls_->rwlock); | |
e65413ab | 483 | ovs_mutex_init(&cls->mutex); |
cabd4c43 | 484 | |
e65413ab | 485 | ovs_mutex_lock(&cls->mutex); |
cabd4c43 JR |
486 | cls_->cls = cls; |
487 | ||
064af421 | 488 | cls->n_rules = 0; |
f2c21402 | 489 | cmap_init(&cls->subtables_map); |
fe7cfa5c | 490 | pvector_init(&cls->subtables); |
f2c21402 | 491 | cmap_init(&cls->partitions); |
476f36e8 JR |
492 | cls->n_flow_segments = 0; |
493 | if (flow_segments) { | |
494 | while (cls->n_flow_segments < CLS_MAX_INDICES | |
495 | && *flow_segments < FLOW_U32S) { | |
496 | cls->flow_segments[cls->n_flow_segments++] = *flow_segments++; | |
497 | } | |
498 | } | |
13751fd8 | 499 | cls->n_tries = 0; |
e65413ab JR |
500 | for (int i = 0; i < CLS_MAX_TRIES; i++) { |
501 | trie_init(cls, i, NULL); | |
502 | } | |
503 | ovs_mutex_unlock(&cls->mutex); | |
064af421 BP |
504 | } |
505 | ||
506 | /* Destroys 'cls'. Rules within 'cls', if any, are not freed; this is the | |
507 | * caller's responsibility. */ | |
508 | void | |
cabd4c43 | 509 | classifier_destroy(struct classifier *cls_) |
e65413ab | 510 | OVS_EXCLUDED(cls_->cls->mutex) |
064af421 | 511 | { |
cabd4c43 JR |
512 | if (cls_) { |
513 | struct cls_classifier *cls = cls_->cls; | |
42181540 | 514 | struct cls_partition *partition, *next_partition; |
03868246 | 515 | struct cls_subtable *subtable, *next_subtable; |
13751fd8 JR |
516 | int i; |
517 | ||
cabd4c43 JR |
518 | fat_rwlock_destroy(&cls_->rwlock); |
519 | if (!cls) { | |
520 | return; | |
521 | } | |
522 | ||
e65413ab | 523 | ovs_mutex_lock(&cls->mutex); |
13751fd8 JR |
524 | for (i = 0; i < cls->n_tries; i++) { |
525 | trie_destroy(cls->tries[i].root); | |
526 | } | |
064af421 | 527 | |
f2c21402 | 528 | CMAP_FOR_EACH_SAFE (subtable, next_subtable, cmap_node, |
5a87054c | 529 | &cls->subtables_map) { |
03868246 | 530 | destroy_subtable(cls, subtable); |
064af421 | 531 | } |
f2c21402 | 532 | cmap_destroy(&cls->subtables_map); |
c906cedf | 533 | |
f2c21402 | 534 | CMAP_FOR_EACH_SAFE (partition, next_partition, cmap_node, |
c906cedf | 535 | &cls->partitions) { |
f2c21402 | 536 | ovsrcu_postpone(free, partition); |
c906cedf | 537 | } |
f2c21402 | 538 | cmap_destroy(&cls->partitions); |
cabd4c43 | 539 | |
fe7cfa5c | 540 | pvector_destroy(&cls->subtables); |
e65413ab JR |
541 | ovs_mutex_unlock(&cls->mutex); |
542 | ovs_mutex_destroy(&cls->mutex); | |
cabd4c43 | 543 | free(cls); |
064af421 BP |
544 | } |
545 | } | |
546 | ||
13751fd8 JR |
547 | /* We use uint64_t as a set for the fields below. */ |
548 | BUILD_ASSERT_DECL(MFF_N_IDS <= 64); | |
549 | ||
550 | /* Set the fields for which prefix lookup should be performed. */ | |
551 | void | |
cabd4c43 | 552 | classifier_set_prefix_fields(struct classifier *cls_, |
13751fd8 JR |
553 | const enum mf_field_id *trie_fields, |
554 | unsigned int n_fields) | |
e65413ab | 555 | OVS_EXCLUDED(cls_->cls->mutex) |
13751fd8 | 556 | { |
cabd4c43 | 557 | struct cls_classifier *cls = cls_->cls; |
13751fd8 JR |
558 | uint64_t fields = 0; |
559 | int i, trie; | |
560 | ||
e65413ab | 561 | ovs_mutex_lock(&cls->mutex); |
13751fd8 JR |
562 | for (i = 0, trie = 0; i < n_fields && trie < CLS_MAX_TRIES; i++) { |
563 | const struct mf_field *field = mf_from_id(trie_fields[i]); | |
564 | if (field->flow_be32ofs < 0 || field->n_bits % 32) { | |
565 | /* Incompatible field. This is the only place where we | |
566 | * enforce these requirements, but the rest of the trie code | |
567 | * depends on the flow_be32ofs to be non-negative and the | |
568 | * field length to be a multiple of 32 bits. */ | |
569 | continue; | |
570 | } | |
571 | ||
572 | if (fields & (UINT64_C(1) << trie_fields[i])) { | |
573 | /* Duplicate field, there is no need to build more than | |
574 | * one index for any one field. */ | |
575 | continue; | |
576 | } | |
577 | fields |= UINT64_C(1) << trie_fields[i]; | |
578 | ||
579 | if (trie >= cls->n_tries || field != cls->tries[trie].field) { | |
580 | trie_init(cls, trie, field); | |
581 | } | |
582 | trie++; | |
583 | } | |
584 | ||
585 | /* Destroy the rest. */ | |
586 | for (i = trie; i < cls->n_tries; i++) { | |
587 | trie_init(cls, i, NULL); | |
588 | } | |
589 | cls->n_tries = trie; | |
e65413ab | 590 | ovs_mutex_unlock(&cls->mutex); |
13751fd8 JR |
591 | } |
592 | ||
593 | static void | |
cabd4c43 | 594 | trie_init(struct cls_classifier *cls, int trie_idx, |
13751fd8 | 595 | const struct mf_field *field) |
e65413ab | 596 | OVS_REQUIRES(cls->mutex) |
13751fd8 JR |
597 | { |
598 | struct cls_trie *trie = &cls->tries[trie_idx]; | |
599 | struct cls_subtable *subtable; | |
600 | ||
601 | if (trie_idx < cls->n_tries) { | |
602 | trie_destroy(trie->root); | |
603 | } | |
604 | trie->root = NULL; | |
605 | trie->field = field; | |
606 | ||
607 | /* Add existing rules to the trie. */ | |
f2c21402 | 608 | CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) { |
13751fd8 JR |
609 | unsigned int plen; |
610 | ||
611 | plen = field ? minimask_get_prefix_len(&subtable->mask, field) : 0; | |
612 | /* Initialize subtable's prefix length on this field. */ | |
613 | subtable->trie_plen[trie_idx] = plen; | |
614 | ||
615 | if (plen) { | |
627fb667 | 616 | struct cls_match *head; |
13751fd8 | 617 | |
f2c21402 | 618 | CMAP_FOR_EACH (head, cmap_node, &subtable->rules) { |
627fb667 | 619 | struct cls_match *match; |
13751fd8 | 620 | |
627fb667 JR |
621 | FOR_EACH_RULE_IN_LIST (match, head) { |
622 | trie_insert(trie, match->cls_rule, plen); | |
13751fd8 JR |
623 | } |
624 | } | |
625 | } | |
626 | } | |
627 | } | |
628 | ||
5f0476ce JR |
629 | /* Returns true if 'cls' contains no classification rules, false otherwise. |
630 | * Checking the cmap requires no locking. */ | |
064af421 BP |
631 | bool |
632 | classifier_is_empty(const struct classifier *cls) | |
633 | { | |
5f0476ce | 634 | return cmap_is_empty(&cls->cls->subtables_map); |
064af421 BP |
635 | } |
636 | ||
dbda2960 | 637 | /* Returns the number of rules in 'cls'. */ |
064af421 BP |
638 | int |
639 | classifier_count(const struct classifier *cls) | |
640 | { | |
cabd4c43 | 641 | return cls->cls->n_rules; |
064af421 BP |
642 | } |
643 | ||
c906cedf BP |
644 | static uint32_t |
645 | hash_metadata(ovs_be64 metadata_) | |
646 | { | |
647 | uint64_t metadata = (OVS_FORCE uint64_t) metadata_; | |
965607c8 | 648 | return hash_uint64(metadata); |
c906cedf BP |
649 | } |
650 | ||
651 | static struct cls_partition * | |
cabd4c43 JR |
652 | find_partition(const struct cls_classifier *cls, ovs_be64 metadata, |
653 | uint32_t hash) | |
c906cedf BP |
654 | { |
655 | struct cls_partition *partition; | |
656 | ||
f2c21402 | 657 | CMAP_FOR_EACH_WITH_HASH (partition, cmap_node, hash, &cls->partitions) { |
c906cedf BP |
658 | if (partition->metadata == metadata) { |
659 | return partition; | |
660 | } | |
661 | } | |
662 | ||
663 | return NULL; | |
664 | } | |
665 | ||
666 | static struct cls_partition * | |
cabd4c43 | 667 | create_partition(struct cls_classifier *cls, struct cls_subtable *subtable, |
c906cedf | 668 | ovs_be64 metadata) |
e65413ab | 669 | OVS_REQUIRES(cls->mutex) |
c906cedf BP |
670 | { |
671 | uint32_t hash = hash_metadata(metadata); | |
672 | struct cls_partition *partition = find_partition(cls, metadata, hash); | |
673 | if (!partition) { | |
674 | partition = xmalloc(sizeof *partition); | |
675 | partition->metadata = metadata; | |
676 | partition->tags = 0; | |
183126a1 | 677 | tag_tracker_init(&partition->tracker); |
f2c21402 | 678 | cmap_insert(&cls->partitions, &partition->cmap_node, hash); |
c906cedf | 679 | } |
03868246 | 680 | tag_tracker_add(&partition->tracker, &partition->tags, subtable->tag); |
c906cedf BP |
681 | return partition; |
682 | } | |
683 | ||
69d6040e JR |
684 | static inline ovs_be32 minimatch_get_ports(const struct minimatch *match) |
685 | { | |
686 | /* Could optimize to use the same map if needed for fast path. */ | |
687 | return MINIFLOW_GET_BE32(&match->flow, tp_src) | |
688 | & MINIFLOW_GET_BE32(&match->mask.masks, tp_src); | |
689 | } | |
690 | ||
b5d97350 BP |
691 | /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller |
692 | * must not modify or free it. | |
064af421 BP |
693 | * |
694 | * If 'cls' already contains an identical rule (including wildcards, values of | |
695 | * fixed fields, and priority), replaces the old rule by 'rule' and returns the | |
696 | * rule that was replaced. The caller takes ownership of the returned rule and | |
48d28ac1 BP |
697 | * is thus responsible for destroying it with cls_rule_destroy(), freeing the |
698 | * memory block in which it resides, etc., as necessary. | |
064af421 BP |
699 | * |
700 | * Returns NULL if 'cls' does not contain a rule with an identical key, after | |
701 | * inserting the new rule. In this case, no rules are displaced by the new | |
702 | * rule, even rules that cannot have any effect because the new rule matches a | |
703 | * superset of their flows and has higher priority. */ | |
704 | struct cls_rule * | |
cabd4c43 | 705 | classifier_replace(struct classifier *cls_, struct cls_rule *rule) |
e65413ab | 706 | OVS_EXCLUDED(cls_->cls->mutex) |
064af421 | 707 | { |
cabd4c43 | 708 | struct cls_classifier *cls = cls_->cls; |
627fb667 | 709 | struct cls_match *old_rule; |
03868246 | 710 | struct cls_subtable *subtable; |
e65413ab | 711 | struct cls_rule *old_cls_rule = NULL; |
b5d97350 | 712 | |
e65413ab | 713 | ovs_mutex_lock(&cls->mutex); |
03868246 JR |
714 | subtable = find_subtable(cls, &rule->match.mask); |
715 | if (!subtable) { | |
716 | subtable = insert_subtable(cls, &rule->match.mask); | |
b5d97350 BP |
717 | } |
718 | ||
03868246 | 719 | old_rule = insert_rule(cls, subtable, rule); |
b5d97350 | 720 | if (!old_rule) { |
e65413ab | 721 | old_cls_rule = NULL; |
13751fd8 | 722 | |
627fb667 | 723 | rule->cls_match->partition = NULL; |
c906cedf BP |
724 | if (minimask_get_metadata_mask(&rule->match.mask) == OVS_BE64_MAX) { |
725 | ovs_be64 metadata = miniflow_get_metadata(&rule->match.flow); | |
627fb667 JR |
726 | rule->cls_match->partition = create_partition(cls, subtable, |
727 | metadata); | |
c906cedf BP |
728 | } |
729 | ||
064af421 | 730 | cls->n_rules++; |
13751fd8 | 731 | |
e65413ab | 732 | for (int i = 0; i < cls->n_tries; i++) { |
13751fd8 JR |
733 | if (subtable->trie_plen[i]) { |
734 | trie_insert(&cls->tries[i], rule, subtable->trie_plen[i]); | |
735 | } | |
736 | } | |
69d6040e JR |
737 | |
738 | /* Ports trie. */ | |
739 | if (subtable->ports_mask_len) { | |
740 | /* We mask the value to be inserted to always have the wildcarded | |
741 | * bits in known (zero) state, so we can include them in comparison | |
742 | * and they will always match (== their original value does not | |
743 | * matter). */ | |
744 | ovs_be32 masked_ports = minimatch_get_ports(&rule->match); | |
745 | ||
746 | trie_insert_prefix(&subtable->ports_trie, &masked_ports, | |
747 | subtable->ports_mask_len); | |
748 | } | |
c906cedf | 749 | } else { |
e65413ab | 750 | old_cls_rule = old_rule->cls_rule; |
627fb667 JR |
751 | rule->cls_match->partition = old_rule->partition; |
752 | old_cls_rule->cls_match = NULL; | |
f2c21402 JR |
753 | |
754 | /* 'old_rule' contains a cmap_node, which may not be freed | |
755 | * immediately. */ | |
756 | ovsrcu_postpone(free, old_rule); | |
064af421 | 757 | } |
e65413ab JR |
758 | ovs_mutex_unlock(&cls->mutex); |
759 | return old_cls_rule; | |
064af421 BP |
760 | } |
761 | ||
08944c1d BP |
762 | /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller |
763 | * must not modify or free it. | |
764 | * | |
765 | * 'cls' must not contain an identical rule (including wildcards, values of | |
766 | * fixed fields, and priority). Use classifier_find_rule_exactly() to find | |
767 | * such a rule. */ | |
768 | void | |
769 | classifier_insert(struct classifier *cls, struct cls_rule *rule) | |
770 | { | |
771 | struct cls_rule *displaced_rule = classifier_replace(cls, rule); | |
cb22974d | 772 | ovs_assert(!displaced_rule); |
08944c1d BP |
773 | } |
774 | ||
48d28ac1 BP |
775 | /* Removes 'rule' from 'cls'. It is the caller's responsibility to destroy |
776 | * 'rule' with cls_rule_destroy(), freeing the memory block in which 'rule' | |
777 | * resides, etc., as necessary. */ | |
064af421 | 778 | void |
cabd4c43 | 779 | classifier_remove(struct classifier *cls_, struct cls_rule *rule) |
e65413ab | 780 | OVS_EXCLUDED(cls_->cls->mutex) |
064af421 | 781 | { |
cabd4c43 | 782 | struct cls_classifier *cls = cls_->cls; |
c906cedf | 783 | struct cls_partition *partition; |
627fb667 JR |
784 | struct cls_match *cls_match = rule->cls_match; |
785 | struct cls_match *head; | |
03868246 | 786 | struct cls_subtable *subtable; |
476f36e8 | 787 | int i; |
f2c21402 JR |
788 | uint32_t basis = 0, hash, ihash[CLS_MAX_INDICES]; |
789 | uint8_t prev_be32ofs = 0; | |
064af421 | 790 | |
627fb667 JR |
791 | ovs_assert(cls_match); |
792 | ||
e65413ab | 793 | ovs_mutex_lock(&cls->mutex); |
03868246 | 794 | subtable = find_subtable(cls, &rule->match.mask); |
627fb667 JR |
795 | ovs_assert(subtable); |
796 | ||
69d6040e JR |
797 | if (subtable->ports_mask_len) { |
798 | ovs_be32 masked_ports = minimatch_get_ports(&rule->match); | |
799 | ||
800 | trie_remove_prefix(&subtable->ports_trie, | |
801 | &masked_ports, subtable->ports_mask_len); | |
802 | } | |
13751fd8 JR |
803 | for (i = 0; i < cls->n_tries; i++) { |
804 | if (subtable->trie_plen[i]) { | |
805 | trie_remove(&cls->tries[i], rule, subtable->trie_plen[i]); | |
806 | } | |
807 | } | |
808 | ||
476f36e8 JR |
809 | /* Remove rule node from indices. */ |
810 | for (i = 0; i < subtable->n_indices; i++) { | |
f2c21402 JR |
811 | ihash[i] = minimatch_hash_range(&rule->match, prev_be32ofs, |
812 | subtable->index_ofs[i], &basis); | |
813 | cmap_remove(&subtable->indices[i], &cls_match->index_nodes[i], | |
814 | ihash[i]); | |
815 | prev_be32ofs = subtable->index_ofs[i]; | |
476f36e8 | 816 | } |
f2c21402 | 817 | hash = minimatch_hash_range(&rule->match, prev_be32ofs, FLOW_U32S, &basis); |
476f36e8 | 818 | |
f2c21402 | 819 | head = find_equal(subtable, &rule->match.flow, hash); |
627fb667 JR |
820 | if (head != cls_match) { |
821 | list_remove(&cls_match->list); | |
822 | } else if (list_is_empty(&cls_match->list)) { | |
f2c21402 | 823 | cmap_remove(&subtable->rules, &cls_match->cmap_node, hash); |
b5d97350 | 824 | } else { |
627fb667 JR |
825 | struct cls_match *next = CONTAINER_OF(cls_match->list.next, |
826 | struct cls_match, list); | |
064af421 | 827 | |
627fb667 | 828 | list_remove(&cls_match->list); |
f2c21402 JR |
829 | cmap_replace(&subtable->rules, &cls_match->cmap_node, |
830 | &next->cmap_node, hash); | |
b5d97350 | 831 | } |
064af421 | 832 | |
627fb667 | 833 | partition = cls_match->partition; |
183126a1 BP |
834 | if (partition) { |
835 | tag_tracker_subtract(&partition->tracker, &partition->tags, | |
03868246 | 836 | subtable->tag); |
183126a1 | 837 | if (!partition->tags) { |
f2c21402 JR |
838 | cmap_remove(&cls->partitions, &partition->cmap_node, |
839 | hash_metadata(partition->metadata)); | |
840 | ovsrcu_postpone(free, partition); | |
183126a1 | 841 | } |
c906cedf BP |
842 | } |
843 | ||
03868246 JR |
844 | if (--subtable->n_rules == 0) { |
845 | destroy_subtable(cls, subtable); | |
fe7cfa5c JR |
846 | } else if (subtable->max_priority == cls_match->priority |
847 | && --subtable->max_count == 0) { | |
848 | /* Find the new 'max_priority' and 'max_count'. */ | |
849 | struct cls_match *head; | |
850 | unsigned int max_priority = 0; | |
851 | ||
f2c21402 | 852 | CMAP_FOR_EACH (head, cmap_node, &subtable->rules) { |
fe7cfa5c JR |
853 | if (head->priority > max_priority) { |
854 | max_priority = head->priority; | |
855 | subtable->max_count = 1; | |
856 | } else if (head->priority == max_priority) { | |
857 | ++subtable->max_count; | |
858 | } | |
859 | } | |
860 | subtable->max_priority = max_priority; | |
861 | pvector_change_priority(&cls->subtables, subtable, max_priority); | |
4d935a6b | 862 | } |
13751fd8 | 863 | |
b5d97350 | 864 | cls->n_rules--; |
627fb667 JR |
865 | |
866 | rule->cls_match = NULL; | |
f2c21402 | 867 | ovsrcu_postpone(free, cls_match); |
e65413ab | 868 | ovs_mutex_unlock(&cls->mutex); |
064af421 BP |
869 | } |
870 | ||
13751fd8 JR |
871 | /* Prefix tree context. Valid when 'lookup_done' is true. Can skip all |
872 | * subtables which have more than 'match_plen' bits in their corresponding | |
873 | * field at offset 'be32ofs'. If skipped, 'maskbits' prefix bits should be | |
874 | * unwildcarded to quarantee datapath flow matches only packets it should. */ | |
875 | struct trie_ctx { | |
876 | const struct cls_trie *trie; | |
877 | bool lookup_done; /* Status of the lookup. */ | |
878 | uint8_t be32ofs; /* U32 offset of the field in question. */ | |
879 | unsigned int match_plen; /* Longest prefix than could possibly match. */ | |
880 | unsigned int maskbits; /* Prefix length needed to avoid false matches. */ | |
881 | }; | |
882 | ||
883 | static void | |
884 | trie_ctx_init(struct trie_ctx *ctx, const struct cls_trie *trie) | |
885 | { | |
886 | ctx->trie = trie; | |
887 | ctx->be32ofs = trie->field->flow_be32ofs; | |
888 | ctx->lookup_done = false; | |
889 | } | |
890 | ||
48c3de13 BP |
891 | /* Finds and returns the highest-priority rule in 'cls' that matches 'flow'. |
892 | * Returns a null pointer if no rules in 'cls' match 'flow'. If multiple rules | |
74f74083 EJ |
893 | * of equal priority match 'flow', returns one arbitrarily. |
894 | * | |
895 | * If a rule is found and 'wc' is non-null, bitwise-OR's 'wc' with the | |
896 | * set of bits that were significant in the lookup. At some point | |
897 | * earlier, 'wc' should have been initialized (e.g., by | |
898 | * flow_wildcards_init_catchall()). */ | |
48c3de13 | 899 | struct cls_rule * |
cabd4c43 | 900 | classifier_lookup(const struct classifier *cls_, const struct flow *flow, |
74f74083 | 901 | struct flow_wildcards *wc) |
48c3de13 | 902 | { |
cabd4c43 | 903 | struct cls_classifier *cls = cls_->cls; |
c906cedf | 904 | const struct cls_partition *partition; |
c906cedf | 905 | tag_type tags; |
ec988646 | 906 | int64_t best_priority = -1; |
fe7cfa5c JR |
907 | const struct cls_match *best; |
908 | struct trie_ctx trie_ctx[CLS_MAX_TRIES]; | |
909 | struct cls_subtable *subtable; | |
c906cedf | 910 | |
03868246 JR |
911 | /* Determine 'tags' such that, if 'subtable->tag' doesn't intersect them, |
912 | * then 'flow' cannot possibly match in 'subtable': | |
c906cedf BP |
913 | * |
914 | * - If flow->metadata maps to a given 'partition', then we can use | |
915 | * 'tags' for 'partition->tags'. | |
916 | * | |
917 | * - If flow->metadata has no partition, then no rule in 'cls' has an | |
918 | * exact-match for flow->metadata. That means that we don't need to | |
03868246 | 919 | * search any subtable that includes flow->metadata in its mask. |
c906cedf | 920 | * |
03868246 | 921 | * In either case, we always need to search any cls_subtables that do not |
c906cedf | 922 | * include flow->metadata in its mask. One way to do that would be to |
03868246 JR |
923 | * check the "cls_subtable"s explicitly for that, but that would require an |
924 | * extra branch per subtable. Instead, we mark such a cls_subtable's | |
925 | * 'tags' as TAG_ALL and make sure that 'tags' is never empty. This means | |
926 | * that 'tags' always intersects such a cls_subtable's 'tags', so we don't | |
927 | * need a special case. | |
c906cedf | 928 | */ |
f2c21402 | 929 | partition = (cmap_is_empty(&cls->partitions) |
c906cedf BP |
930 | ? NULL |
931 | : find_partition(cls, flow->metadata, | |
932 | hash_metadata(flow->metadata))); | |
933 | tags = partition ? partition->tags : TAG_ARBITRARY; | |
48c3de13 | 934 | |
13751fd8 | 935 | /* Initialize trie contexts for match_find_wc(). */ |
fe7cfa5c | 936 | for (int i = 0; i < cls->n_tries; i++) { |
13751fd8 JR |
937 | trie_ctx_init(&trie_ctx[i], &cls->tries[i]); |
938 | } | |
ec988646 | 939 | |
b5d97350 | 940 | best = NULL; |
fe7cfa5c JR |
941 | PVECTOR_FOR_EACH_PRIORITY(subtable, best_priority, 2, |
942 | sizeof(struct cls_subtable), &cls->subtables) { | |
627fb667 | 943 | struct cls_match *rule; |
c906cedf | 944 | |
fe7cfa5c | 945 | if (!tag_intersects(tags, subtable->tag)) { |
c906cedf BP |
946 | continue; |
947 | } | |
74f74083 | 948 | |
fe7cfa5c | 949 | rule = find_match_wc(subtable, flow, trie_ctx, cls->n_tries, wc); |
ec988646 JR |
950 | if (rule && (int64_t)rule->priority > best_priority) { |
951 | best_priority = (int64_t)rule->priority; | |
1f3c5efc | 952 | best = rule; |
b5d97350 | 953 | } |
48c3de13 | 954 | } |
13751fd8 | 955 | |
627fb667 | 956 | return best ? best->cls_rule : NULL; |
48c3de13 BP |
957 | } |
958 | ||
2abf78ff | 959 | /* Returns true if 'target' satisifies 'match', that is, if each bit for which |
3016f3e4 JR |
960 | * 'match' specifies a particular value has the correct value in 'target'. |
961 | * | |
962 | * 'flow' and 'mask' have the same mask! */ | |
2abf78ff | 963 | static bool |
3016f3e4 JR |
964 | miniflow_and_mask_matches_miniflow(const struct miniflow *flow, |
965 | const struct minimask *mask, | |
966 | const struct miniflow *target) | |
2abf78ff | 967 | { |
3016f3e4 JR |
968 | const uint32_t *flowp = miniflow_get_u32_values(flow); |
969 | const uint32_t *maskp = miniflow_get_u32_values(&mask->masks); | |
2abf78ff JR |
970 | uint32_t target_u32; |
971 | ||
3016f3e4 | 972 | MINIFLOW_FOR_EACH_IN_MAP(target_u32, target, mask->masks.map) { |
2abf78ff JR |
973 | if ((*flowp++ ^ target_u32) & *maskp++) { |
974 | return false; | |
975 | } | |
976 | } | |
977 | ||
978 | return true; | |
979 | } | |
980 | ||
627fb667 | 981 | static inline struct cls_match * |
2abf78ff JR |
982 | find_match_miniflow(const struct cls_subtable *subtable, |
983 | const struct miniflow *flow, | |
984 | uint32_t hash) | |
985 | { | |
627fb667 | 986 | struct cls_match *rule; |
2abf78ff | 987 | |
f2c21402 | 988 | CMAP_FOR_EACH_WITH_HASH (rule, cmap_node, hash, &subtable->rules) { |
3016f3e4 JR |
989 | if (miniflow_and_mask_matches_miniflow(&rule->flow, &subtable->mask, |
990 | flow)) { | |
2abf78ff JR |
991 | return rule; |
992 | } | |
993 | } | |
994 | ||
995 | return NULL; | |
996 | } | |
997 | ||
b7648634 EJ |
998 | /* For each miniflow in 'flows' performs a classifier lookup writing the result |
999 | * into the corresponding slot in 'rules'. If a particular entry in 'flows' is | |
1000 | * NULL it is skipped. | |
2abf78ff | 1001 | * |
b7648634 EJ |
1002 | * This function is optimized for use in the userspace datapath and therefore |
1003 | * does not implement a lot of features available in the standard | |
1004 | * classifier_lookup() function. Specifically, it does not implement | |
1005 | * priorities, instead returning any rule which matches the flow. */ | |
1006 | void | |
1007 | classifier_lookup_miniflow_batch(const struct classifier *cls_, | |
1008 | const struct miniflow **flows, | |
1009 | struct cls_rule **rules, size_t len) | |
2abf78ff | 1010 | { |
cabd4c43 | 1011 | struct cls_classifier *cls = cls_->cls; |
2abf78ff | 1012 | struct cls_subtable *subtable; |
b7648634 | 1013 | size_t i, begin = 0; |
2abf78ff | 1014 | |
b7648634 | 1015 | memset(rules, 0, len * sizeof *rules); |
fe7cfa5c | 1016 | PVECTOR_FOR_EACH (subtable, &cls->subtables) { |
b7648634 EJ |
1017 | for (i = begin; i < len; i++) { |
1018 | struct cls_match *match; | |
1019 | uint32_t hash; | |
2abf78ff | 1020 | |
b7648634 EJ |
1021 | if (OVS_UNLIKELY(rules[i] || !flows[i])) { |
1022 | continue; | |
1023 | } | |
1024 | ||
1025 | hash = miniflow_hash_in_minimask(flows[i], &subtable->mask, 0); | |
1026 | match = find_match_miniflow(subtable, flows[i], hash); | |
1027 | if (OVS_UNLIKELY(match)) { | |
1028 | rules[i] = match->cls_rule; | |
1029 | } | |
2abf78ff | 1030 | } |
2abf78ff | 1031 | |
b7648634 EJ |
1032 | while (begin < len && (rules[begin] || !flows[begin])) { |
1033 | begin++; | |
1034 | } | |
1035 | if (begin >= len) { | |
1036 | break; | |
1037 | } | |
1038 | } | |
2abf78ff JR |
1039 | } |
1040 | ||
b5d97350 BP |
1041 | /* Finds and returns a rule in 'cls' with exactly the same priority and |
1042 | * matching criteria as 'target'. Returns a null pointer if 'cls' doesn't | |
c084ce1d | 1043 | * contain an exact match. */ |
064af421 | 1044 | struct cls_rule * |
cabd4c43 | 1045 | classifier_find_rule_exactly(const struct classifier *cls_, |
76ecc721 | 1046 | const struct cls_rule *target) |
e65413ab | 1047 | OVS_EXCLUDED(cls_->cls->mutex) |
064af421 | 1048 | { |
cabd4c43 | 1049 | struct cls_classifier *cls = cls_->cls; |
627fb667 | 1050 | struct cls_match *head, *rule; |
03868246 | 1051 | struct cls_subtable *subtable; |
064af421 | 1052 | |
e65413ab | 1053 | ovs_mutex_lock(&cls->mutex); |
03868246 JR |
1054 | subtable = find_subtable(cls, &target->match.mask); |
1055 | if (!subtable) { | |
e65413ab | 1056 | goto out; |
064af421 BP |
1057 | } |
1058 | ||
4d935a6b | 1059 | /* Skip if there is no hope. */ |
03868246 | 1060 | if (target->priority > subtable->max_priority) { |
e65413ab | 1061 | goto out; |
4d935a6b JR |
1062 | } |
1063 | ||
03868246 | 1064 | head = find_equal(subtable, &target->match.flow, |
5cb7a798 BP |
1065 | miniflow_hash_in_minimask(&target->match.flow, |
1066 | &target->match.mask, 0)); | |
b5d97350 BP |
1067 | FOR_EACH_RULE_IN_LIST (rule, head) { |
1068 | if (target->priority >= rule->priority) { | |
e65413ab | 1069 | ovs_mutex_unlock(&cls->mutex); |
627fb667 | 1070 | return target->priority == rule->priority ? rule->cls_rule : NULL; |
064af421 BP |
1071 | } |
1072 | } | |
e65413ab JR |
1073 | out: |
1074 | ovs_mutex_unlock(&cls->mutex); | |
064af421 BP |
1075 | return NULL; |
1076 | } | |
1077 | ||
81a76618 BP |
1078 | /* Finds and returns a rule in 'cls' with priority 'priority' and exactly the |
1079 | * same matching criteria as 'target'. Returns a null pointer if 'cls' doesn't | |
1080 | * contain an exact match. */ | |
1081 | struct cls_rule * | |
1082 | classifier_find_match_exactly(const struct classifier *cls, | |
1083 | const struct match *target, | |
1084 | unsigned int priority) | |
1085 | { | |
1086 | struct cls_rule *retval; | |
1087 | struct cls_rule cr; | |
1088 | ||
1089 | cls_rule_init(&cr, target, priority); | |
1090 | retval = classifier_find_rule_exactly(cls, &cr); | |
48d28ac1 | 1091 | cls_rule_destroy(&cr); |
81a76618 BP |
1092 | |
1093 | return retval; | |
1094 | } | |
1095 | ||
faa50f40 BP |
1096 | /* Checks if 'target' would overlap any other rule in 'cls'. Two rules are |
1097 | * considered to overlap if both rules have the same priority and a packet | |
1098 | * could match both. */ | |
49bdc010 | 1099 | bool |
cabd4c43 | 1100 | classifier_rule_overlaps(const struct classifier *cls_, |
faa50f40 | 1101 | const struct cls_rule *target) |
e65413ab | 1102 | OVS_EXCLUDED(cls_->cls->mutex) |
49bdc010 | 1103 | { |
cabd4c43 | 1104 | struct cls_classifier *cls = cls_->cls; |
03868246 | 1105 | struct cls_subtable *subtable; |
fe7cfa5c | 1106 | int64_t stop_at_priority = (int64_t)target->priority - 1; |
49bdc010 | 1107 | |
e65413ab | 1108 | ovs_mutex_lock(&cls->mutex); |
03868246 | 1109 | /* Iterate subtables in the descending max priority order. */ |
fe7cfa5c JR |
1110 | PVECTOR_FOR_EACH_PRIORITY (subtable, stop_at_priority, 2, |
1111 | sizeof(struct cls_subtable), &cls->subtables) { | |
5cb7a798 BP |
1112 | uint32_t storage[FLOW_U32S]; |
1113 | struct minimask mask; | |
627fb667 | 1114 | struct cls_match *head; |
49bdc010 | 1115 | |
03868246 | 1116 | minimask_combine(&mask, &target->match.mask, &subtable->mask, storage); |
f2c21402 | 1117 | CMAP_FOR_EACH (head, cmap_node, &subtable->rules) { |
627fb667 | 1118 | struct cls_match *rule; |
49bdc010 | 1119 | |
b5d97350 | 1120 | FOR_EACH_RULE_IN_LIST (rule, head) { |
4d935a6b JR |
1121 | if (rule->priority < target->priority) { |
1122 | break; /* Rules in descending priority order. */ | |
1123 | } | |
faa50f40 | 1124 | if (rule->priority == target->priority |
5cb7a798 | 1125 | && miniflow_equal_in_minimask(&target->match.flow, |
3016f3e4 | 1126 | &rule->flow, &mask)) { |
e65413ab | 1127 | ovs_mutex_unlock(&cls->mutex); |
49bdc010 JP |
1128 | return true; |
1129 | } | |
1130 | } | |
1131 | } | |
1132 | } | |
1133 | ||
e65413ab | 1134 | ovs_mutex_unlock(&cls->mutex); |
49bdc010 JP |
1135 | return false; |
1136 | } | |
6ceeaa92 BP |
1137 | |
1138 | /* Returns true if 'rule' exactly matches 'criteria' or if 'rule' is more | |
1139 | * specific than 'criteria'. That is, 'rule' matches 'criteria' and this | |
1140 | * function returns true if, for every field: | |
1141 | * | |
1142 | * - 'criteria' and 'rule' specify the same (non-wildcarded) value for the | |
1143 | * field, or | |
1144 | * | |
1145 | * - 'criteria' wildcards the field, | |
1146 | * | |
1147 | * Conversely, 'rule' does not match 'criteria' and this function returns false | |
1148 | * if, for at least one field: | |
1149 | * | |
1150 | * - 'criteria' and 'rule' specify different values for the field, or | |
1151 | * | |
1152 | * - 'criteria' specifies a value for the field but 'rule' wildcards it. | |
1153 | * | |
1154 | * Equivalently, the truth table for whether a field matches is: | |
1155 | * | |
1156 | * rule | |
1157 | * | |
1158 | * c wildcard exact | |
1159 | * r +---------+---------+ | |
1160 | * i wild | yes | yes | | |
1161 | * t card | | | | |
1162 | * e +---------+---------+ | |
1163 | * r exact | no |if values| | |
1164 | * i | |are equal| | |
1165 | * a +---------+---------+ | |
1166 | * | |
1167 | * This is the matching rule used by OpenFlow 1.0 non-strict OFPT_FLOW_MOD | |
1168 | * commands and by OpenFlow 1.0 aggregate and flow stats. | |
1169 | * | |
81a76618 | 1170 | * Ignores rule->priority. */ |
6ceeaa92 BP |
1171 | bool |
1172 | cls_rule_is_loose_match(const struct cls_rule *rule, | |
5cb7a798 | 1173 | const struct minimatch *criteria) |
6ceeaa92 | 1174 | { |
5cb7a798 BP |
1175 | return (!minimask_has_extra(&rule->match.mask, &criteria->mask) |
1176 | && miniflow_equal_in_minimask(&rule->match.flow, &criteria->flow, | |
1177 | &criteria->mask)); | |
6ceeaa92 | 1178 | } |
b5d97350 | 1179 | \f |
5ecc9d81 BP |
1180 | /* Iteration. */ |
1181 | ||
1182 | static bool | |
627fb667 | 1183 | rule_matches(const struct cls_match *rule, const struct cls_rule *target) |
5ecc9d81 BP |
1184 | { |
1185 | return (!target | |
3016f3e4 | 1186 | || miniflow_equal_in_minimask(&rule->flow, |
5cb7a798 BP |
1187 | &target->match.flow, |
1188 | &target->match.mask)); | |
5ecc9d81 BP |
1189 | } |
1190 | ||
627fb667 | 1191 | static struct cls_match * |
03868246 | 1192 | search_subtable(const struct cls_subtable *subtable, |
f2c21402 | 1193 | struct cls_cursor *cursor) |
5ecc9d81 | 1194 | { |
f2c21402 JR |
1195 | if (!cursor->target |
1196 | || !minimask_has_extra(&subtable->mask, &cursor->target->match.mask)) { | |
627fb667 | 1197 | struct cls_match *rule; |
5ecc9d81 | 1198 | |
f2c21402 JR |
1199 | CMAP_CURSOR_FOR_EACH (rule, cmap_node, &cursor->rules, |
1200 | &subtable->rules) { | |
1201 | if (rule_matches(rule, cursor->target)) { | |
5ecc9d81 BP |
1202 | return rule; |
1203 | } | |
1204 | } | |
1205 | } | |
1206 | return NULL; | |
1207 | } | |
1208 | ||
5f0476ce JR |
1209 | /* Initializes 'cursor' for iterating through rules in 'cls', and returns the |
1210 | * first matching cls_rule via '*pnode', or NULL if there are no matches. | |
5ecc9d81 | 1211 | * |
6ceeaa92 | 1212 | * - If 'target' is null, the cursor will visit every rule in 'cls'. |
5ecc9d81 | 1213 | * |
6ceeaa92 BP |
1214 | * - If 'target' is nonnull, the cursor will visit each 'rule' in 'cls' |
1215 | * such that cls_rule_is_loose_match(rule, target) returns true. | |
5ecc9d81 | 1216 | * |
6ceeaa92 | 1217 | * Ignores target->priority. */ |
5f0476ce JR |
1218 | struct cls_cursor cls_cursor_init(const struct classifier *cls, |
1219 | const struct cls_rule *target, | |
1220 | void **pnode, const void *offset, bool safe) | |
1221 | OVS_NO_THREAD_SAFETY_ANALYSIS | |
5ecc9d81 | 1222 | { |
5f0476ce | 1223 | struct cls_cursor cursor; |
03868246 | 1224 | struct cls_subtable *subtable; |
5f0476ce | 1225 | struct cls_rule *cls_rule = NULL; |
5ecc9d81 | 1226 | |
5f0476ce | 1227 | cursor.safe = safe; |
e65413ab | 1228 | cursor.cls = cls->cls; |
5f0476ce JR |
1229 | cursor.target = target && !cls_rule_is_catchall(target) ? target : NULL; |
1230 | ||
1231 | /* Find first rule. */ | |
e65413ab | 1232 | ovs_mutex_lock(&cursor.cls->mutex); |
5f0476ce | 1233 | CMAP_CURSOR_FOR_EACH (subtable, cmap_node, &cursor.subtables, |
e65413ab | 1234 | &cursor.cls->subtables_map) { |
5f0476ce | 1235 | struct cls_match *rule = search_subtable(subtable, &cursor); |
f2c21402 | 1236 | |
5ecc9d81 | 1237 | if (rule) { |
5f0476ce JR |
1238 | cursor.subtable = subtable; |
1239 | cls_rule = rule->cls_rule; | |
1240 | break; | |
5ecc9d81 BP |
1241 | } |
1242 | } | |
5f0476ce | 1243 | *pnode = (char *)cls_rule + (ptrdiff_t)offset; |
5ecc9d81 | 1244 | |
5f0476ce JR |
1245 | /* Leave locked if requested and have a rule. */ |
1246 | if (safe || !cls_rule) { | |
e65413ab | 1247 | ovs_mutex_unlock(&cursor.cls->mutex); |
5f0476ce JR |
1248 | } |
1249 | return cursor; | |
1250 | } | |
1251 | ||
1252 | static void | |
1253 | cls_cursor_next_unlock(struct cls_cursor *cursor, struct cls_rule *rule) | |
1254 | OVS_NO_THREAD_SAFETY_ANALYSIS | |
1255 | { | |
e65413ab | 1256 | /* Release the mutex if no rule, or 'safe' mode. */ |
5f0476ce | 1257 | if (!rule || cursor->safe) { |
e65413ab | 1258 | ovs_mutex_unlock(&cursor->cls->mutex); |
5f0476ce | 1259 | } |
5ecc9d81 BP |
1260 | } |
1261 | ||
1262 | /* Returns the next matching cls_rule in 'cursor''s iteration, or a null | |
1263 | * pointer if there are no more matches. */ | |
1264 | struct cls_rule * | |
9850cd0f | 1265 | cls_cursor_next(struct cls_cursor *cursor, const struct cls_rule *rule_) |
5f0476ce | 1266 | OVS_NO_THREAD_SAFETY_ANALYSIS |
5ecc9d81 | 1267 | { |
627fb667 | 1268 | struct cls_match *rule = CONST_CAST(struct cls_match *, rule_->cls_match); |
03868246 | 1269 | const struct cls_subtable *subtable; |
627fb667 | 1270 | struct cls_match *next; |
5ecc9d81 | 1271 | |
5f0476ce JR |
1272 | /* Lock if not locked already. */ |
1273 | if (cursor->safe) { | |
e65413ab | 1274 | ovs_mutex_lock(&cursor->cls->mutex); |
5f0476ce JR |
1275 | } |
1276 | ||
955f579d BP |
1277 | next = next_rule_in_list__(rule); |
1278 | if (next->priority < rule->priority) { | |
5f0476ce | 1279 | cls_cursor_next_unlock(cursor, next->cls_rule); |
627fb667 | 1280 | return next->cls_rule; |
5ecc9d81 BP |
1281 | } |
1282 | ||
955f579d | 1283 | /* 'next' is the head of the list, that is, the rule that is included in |
f2c21402 | 1284 | * the subtable's map. (This is important when the classifier contains |
03868246 | 1285 | * rules that differ only in priority.) */ |
955f579d | 1286 | rule = next; |
f2c21402 | 1287 | CMAP_CURSOR_FOR_EACH_CONTINUE (rule, cmap_node, &cursor->rules) { |
5ecc9d81 | 1288 | if (rule_matches(rule, cursor->target)) { |
5f0476ce | 1289 | cls_cursor_next_unlock(cursor, rule->cls_rule); |
627fb667 | 1290 | return rule->cls_rule; |
5ecc9d81 BP |
1291 | } |
1292 | } | |
1293 | ||
03868246 | 1294 | subtable = cursor->subtable; |
f2c21402 JR |
1295 | CMAP_CURSOR_FOR_EACH_CONTINUE (subtable, cmap_node, &cursor->subtables) { |
1296 | rule = search_subtable(subtable, cursor); | |
5ecc9d81 | 1297 | if (rule) { |
03868246 | 1298 | cursor->subtable = subtable; |
5f0476ce | 1299 | cls_cursor_next_unlock(cursor, rule->cls_rule); |
627fb667 | 1300 | return rule->cls_rule; |
5ecc9d81 BP |
1301 | } |
1302 | } | |
1303 | ||
e65413ab | 1304 | ovs_mutex_unlock(&cursor->cls->mutex); |
5ecc9d81 BP |
1305 | return NULL; |
1306 | } | |
1307 | \f | |
03868246 | 1308 | static struct cls_subtable * |
cabd4c43 | 1309 | find_subtable(const struct cls_classifier *cls, const struct minimask *mask) |
e65413ab | 1310 | OVS_REQUIRES(cls->mutex) |
b5d97350 | 1311 | { |
03868246 | 1312 | struct cls_subtable *subtable; |
064af421 | 1313 | |
f2c21402 | 1314 | CMAP_FOR_EACH_WITH_HASH (subtable, cmap_node, minimask_hash(mask, 0), |
5a87054c | 1315 | &cls->subtables_map) { |
03868246 JR |
1316 | if (minimask_equal(mask, &subtable->mask)) { |
1317 | return subtable; | |
064af421 BP |
1318 | } |
1319 | } | |
b5d97350 | 1320 | return NULL; |
064af421 | 1321 | } |
064af421 | 1322 | |
e65413ab | 1323 | /* The new subtable will be visible to the readers only after this. */ |
03868246 | 1324 | static struct cls_subtable * |
cabd4c43 | 1325 | insert_subtable(struct cls_classifier *cls, const struct minimask *mask) |
e65413ab | 1326 | OVS_REQUIRES(cls->mutex) |
b5d97350 | 1327 | { |
c906cedf | 1328 | uint32_t hash = minimask_hash(mask, 0); |
03868246 | 1329 | struct cls_subtable *subtable; |
476f36e8 JR |
1330 | int i, index = 0; |
1331 | struct flow_wildcards old, new; | |
1332 | uint8_t prev; | |
3016f3e4 | 1333 | int count = count_1bits(mask->masks.map); |
064af421 | 1334 | |
3016f3e4 JR |
1335 | subtable = xzalloc(sizeof *subtable - sizeof mask->masks.inline_values |
1336 | + MINIFLOW_VALUES_SIZE(count)); | |
f2c21402 | 1337 | cmap_init(&subtable->rules); |
3016f3e4 | 1338 | miniflow_clone_inline(&subtable->mask.masks, &mask->masks, count); |
476f36e8 JR |
1339 | |
1340 | /* Init indices for segmented lookup, if any. */ | |
1341 | flow_wildcards_init_catchall(&new); | |
1342 | old = new; | |
1343 | prev = 0; | |
1344 | for (i = 0; i < cls->n_flow_segments; i++) { | |
1345 | flow_wildcards_fold_minimask_range(&new, mask, prev, | |
1346 | cls->flow_segments[i]); | |
1347 | /* Add an index if it adds mask bits. */ | |
1348 | if (!flow_wildcards_equal(&new, &old)) { | |
f2c21402 | 1349 | cmap_init(&subtable->indices[index]); |
476f36e8 JR |
1350 | subtable->index_ofs[index] = cls->flow_segments[i]; |
1351 | index++; | |
1352 | old = new; | |
1353 | } | |
1354 | prev = cls->flow_segments[i]; | |
1355 | } | |
1356 | /* Check if the rest of the subtable's mask adds any bits, | |
1357 | * and remove the last index if it doesn't. */ | |
1358 | if (index > 0) { | |
1359 | flow_wildcards_fold_minimask_range(&new, mask, prev, FLOW_U32S); | |
1360 | if (flow_wildcards_equal(&new, &old)) { | |
1361 | --index; | |
1362 | subtable->index_ofs[index] = 0; | |
f2c21402 | 1363 | cmap_destroy(&subtable->indices[index]); |
476f36e8 JR |
1364 | } |
1365 | } | |
1366 | subtable->n_indices = index; | |
1367 | ||
03868246 JR |
1368 | subtable->tag = (minimask_get_metadata_mask(mask) == OVS_BE64_MAX |
1369 | ? tag_create_deterministic(hash) | |
1370 | : TAG_ALL); | |
064af421 | 1371 | |
13751fd8 JR |
1372 | for (i = 0; i < cls->n_tries; i++) { |
1373 | subtable->trie_plen[i] = minimask_get_prefix_len(mask, | |
1374 | cls->tries[i].field); | |
1375 | } | |
1376 | ||
69d6040e JR |
1377 | /* Ports trie. */ |
1378 | subtable->ports_trie = NULL; | |
1379 | subtable->ports_mask_len | |
1380 | = 32 - ctz32(ntohl(MINIFLOW_GET_BE32(&mask->masks, tp_src))); | |
1381 | ||
f2c21402 | 1382 | cmap_insert(&cls->subtables_map, &subtable->cmap_node, hash); |
ec988646 | 1383 | |
03868246 | 1384 | return subtable; |
064af421 BP |
1385 | } |
1386 | ||
b5d97350 | 1387 | static void |
cabd4c43 | 1388 | destroy_subtable(struct cls_classifier *cls, struct cls_subtable *subtable) |
e65413ab | 1389 | OVS_REQUIRES(cls->mutex) |
b5d97350 | 1390 | { |
476f36e8 JR |
1391 | int i; |
1392 | ||
fe7cfa5c | 1393 | pvector_remove(&cls->subtables, subtable); |
69d6040e JR |
1394 | trie_destroy(subtable->ports_trie); |
1395 | ||
476f36e8 | 1396 | for (i = 0; i < subtable->n_indices; i++) { |
f2c21402 | 1397 | cmap_destroy(&subtable->indices[i]); |
476f36e8 | 1398 | } |
f2c21402 JR |
1399 | cmap_remove(&cls->subtables_map, &subtable->cmap_node, |
1400 | minimask_hash(&subtable->mask, 0)); | |
fe7cfa5c | 1401 | minimask_destroy(&subtable->mask); |
f2c21402 | 1402 | cmap_destroy(&subtable->rules); |
fe7cfa5c | 1403 | ovsrcu_postpone(free, subtable); |
4aacd02d BP |
1404 | } |
1405 | ||
13751fd8 JR |
1406 | struct range { |
1407 | uint8_t start; | |
1408 | uint8_t end; | |
1409 | }; | |
1410 | ||
1411 | /* Return 'true' if can skip rest of the subtable based on the prefix trie | |
1412 | * lookup results. */ | |
1413 | static inline bool | |
1414 | check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries, | |
1415 | const unsigned int field_plen[CLS_MAX_TRIES], | |
1416 | const struct range ofs, const struct flow *flow, | |
1417 | struct flow_wildcards *wc) | |
1418 | { | |
1419 | int j; | |
1420 | ||
1421 | /* Check if we could avoid fully unwildcarding the next level of | |
1422 | * fields using the prefix tries. The trie checks are done only as | |
1423 | * needed to avoid folding in additional bits to the wildcards mask. */ | |
1424 | for (j = 0; j < n_tries; j++) { | |
1425 | /* Is the trie field relevant for this subtable? */ | |
1426 | if (field_plen[j]) { | |
1427 | struct trie_ctx *ctx = &trie_ctx[j]; | |
1428 | uint8_t be32ofs = ctx->be32ofs; | |
1429 | ||
1430 | /* Is the trie field within the current range of fields? */ | |
1431 | if (be32ofs >= ofs.start && be32ofs < ofs.end) { | |
1432 | /* On-demand trie lookup. */ | |
1433 | if (!ctx->lookup_done) { | |
1434 | ctx->match_plen = trie_lookup(ctx->trie, flow, | |
1435 | &ctx->maskbits); | |
1436 | ctx->lookup_done = true; | |
1437 | } | |
1438 | /* Possible to skip the rest of the subtable if subtable's | |
1439 | * prefix on the field is longer than what is known to match | |
1440 | * based on the trie lookup. */ | |
1441 | if (field_plen[j] > ctx->match_plen) { | |
1442 | /* RFC: We want the trie lookup to never result in | |
1443 | * unwildcarding any bits that would not be unwildcarded | |
1444 | * otherwise. Since the trie is shared by the whole | |
1445 | * classifier, it is possible that the 'maskbits' contain | |
1446 | * bits that are irrelevant for the partition of the | |
1447 | * classifier relevant for the current flow. */ | |
1448 | ||
1449 | /* Can skip if the field is already unwildcarded. */ | |
1450 | if (mask_prefix_bits_set(wc, be32ofs, ctx->maskbits)) { | |
1451 | return true; | |
1452 | } | |
1453 | /* Check that the trie result will not unwildcard more bits | |
1454 | * than this stage will. */ | |
1455 | if (ctx->maskbits <= field_plen[j]) { | |
1456 | /* Unwildcard the bits and skip the rest. */ | |
1457 | mask_set_prefix_bits(wc, be32ofs, ctx->maskbits); | |
1458 | /* Note: Prerequisite already unwildcarded, as the only | |
1459 | * prerequisite of the supported trie lookup fields is | |
1460 | * the ethertype, which is currently always | |
1461 | * unwildcarded. | |
1462 | */ | |
1463 | return true; | |
1464 | } | |
1465 | } | |
1466 | } | |
1467 | } | |
1468 | } | |
1469 | return false; | |
1470 | } | |
1471 | ||
3016f3e4 JR |
1472 | /* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit |
1473 | * for which 'flow', for which 'mask' has a bit set, specifies a particular | |
1474 | * value has the correct value in 'target'. | |
1475 | * | |
1476 | * This function is equivalent to miniflow_equal_flow_in_minimask(flow, | |
a64759f0 JR |
1477 | * target, mask) but this is faster because of the invariant that |
1478 | * flow->map and mask->masks.map are the same, and that this version | |
1479 | * takes the 'wc'. */ | |
3016f3e4 JR |
1480 | static inline bool |
1481 | miniflow_and_mask_matches_flow(const struct miniflow *flow, | |
1482 | const struct minimask *mask, | |
e9319757 | 1483 | const struct flow *target) |
3016f3e4 JR |
1484 | { |
1485 | const uint32_t *flowp = miniflow_get_u32_values(flow); | |
1486 | const uint32_t *maskp = miniflow_get_u32_values(&mask->masks); | |
a64759f0 | 1487 | uint32_t idx; |
3016f3e4 | 1488 | |
a64759f0 JR |
1489 | MAP_FOR_EACH_INDEX(idx, mask->masks.map) { |
1490 | uint32_t diff = (*flowp++ ^ flow_u32_value(target, idx)) & *maskp++; | |
1491 | ||
1492 | if (diff) { | |
3016f3e4 JR |
1493 | return false; |
1494 | } | |
1495 | } | |
1496 | ||
1497 | return true; | |
1498 | } | |
1499 | ||
627fb667 | 1500 | static inline struct cls_match * |
476f36e8 JR |
1501 | find_match(const struct cls_subtable *subtable, const struct flow *flow, |
1502 | uint32_t hash) | |
b5d97350 | 1503 | { |
627fb667 | 1504 | struct cls_match *rule; |
b5d97350 | 1505 | |
f2c21402 | 1506 | CMAP_FOR_EACH_WITH_HASH (rule, cmap_node, hash, &subtable->rules) { |
3016f3e4 | 1507 | if (miniflow_and_mask_matches_flow(&rule->flow, &subtable->mask, |
e9319757 | 1508 | flow)) { |
b5d97350 | 1509 | return rule; |
064af421 BP |
1510 | } |
1511 | } | |
c23740be | 1512 | |
064af421 BP |
1513 | return NULL; |
1514 | } | |
1515 | ||
e9319757 JR |
1516 | /* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit |
1517 | * for which 'flow', for which 'mask' has a bit set, specifies a particular | |
1518 | * value has the correct value in 'target'. | |
1519 | * | |
1520 | * This function is equivalent to miniflow_and_mask_matches_flow() but this | |
1521 | * version fills in the mask bits in 'wc'. */ | |
1522 | static inline bool | |
1523 | miniflow_and_mask_matches_flow_wc(const struct miniflow *flow, | |
1524 | const struct minimask *mask, | |
1525 | const struct flow *target, | |
1526 | struct flow_wildcards *wc) | |
1527 | { | |
1528 | const uint32_t *flowp = miniflow_get_u32_values(flow); | |
1529 | const uint32_t *maskp = miniflow_get_u32_values(&mask->masks); | |
1530 | uint32_t idx; | |
1531 | ||
1532 | MAP_FOR_EACH_INDEX(idx, mask->masks.map) { | |
1533 | uint32_t mask = *maskp++; | |
1534 | uint32_t diff = (*flowp++ ^ flow_u32_value(target, idx)) & mask; | |
1535 | ||
1536 | if (diff) { | |
1537 | /* Only unwildcard if none of the differing bits is already | |
1538 | * exact-matched. */ | |
1539 | if (!(flow_u32_value(&wc->masks, idx) & diff)) { | |
1540 | /* Keep one bit of the difference. */ | |
1541 | *flow_u32_lvalue(&wc->masks, idx) |= rightmost_1bit(diff); | |
1542 | } | |
1543 | return false; | |
1544 | } | |
1545 | /* Fill in the bits that were looked at. */ | |
1546 | *flow_u32_lvalue(&wc->masks, idx) |= mask; | |
1547 | } | |
1548 | ||
1549 | return true; | |
1550 | } | |
1551 | ||
386cb9f7 JR |
1552 | /* Unwildcard the fields looked up so far, if any. */ |
1553 | static void | |
1554 | fill_range_wc(const struct cls_subtable *subtable, struct flow_wildcards *wc, | |
1555 | uint8_t to) | |
1556 | { | |
1557 | if (to) { | |
1558 | flow_wildcards_fold_minimask_range(wc, &subtable->mask, 0, to); | |
1559 | } | |
1560 | } | |
1561 | ||
627fb667 | 1562 | static struct cls_match * |
476f36e8 | 1563 | find_match_wc(const struct cls_subtable *subtable, const struct flow *flow, |
13751fd8 JR |
1564 | struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries, |
1565 | struct flow_wildcards *wc) | |
476f36e8 JR |
1566 | { |
1567 | uint32_t basis = 0, hash; | |
a64759f0 | 1568 | struct cls_match *rule; |
476f36e8 | 1569 | int i; |
13751fd8 | 1570 | struct range ofs; |
476f36e8 | 1571 | |
ec988646 | 1572 | if (OVS_UNLIKELY(!wc)) { |
476f36e8 JR |
1573 | return find_match(subtable, flow, |
1574 | flow_hash_in_minimask(flow, &subtable->mask, 0)); | |
1575 | } | |
1576 | ||
13751fd8 | 1577 | ofs.start = 0; |
476f36e8 JR |
1578 | /* Try to finish early by checking fields in segments. */ |
1579 | for (i = 0; i < subtable->n_indices; i++) { | |
f2c21402 JR |
1580 | struct cmap_node *inode; |
1581 | ||
13751fd8 | 1582 | ofs.end = subtable->index_ofs[i]; |
476f36e8 | 1583 | |
13751fd8 JR |
1584 | if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow, |
1585 | wc)) { | |
386cb9f7 JR |
1586 | /* 'wc' bits for the trie field set, now unwildcard the preceding |
1587 | * bits used so far. */ | |
1588 | fill_range_wc(subtable, wc, ofs.start); | |
1589 | return NULL; | |
13751fd8 JR |
1590 | } |
1591 | hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start, | |
1592 | ofs.end, &basis); | |
f2c21402 | 1593 | inode = cmap_find(&subtable->indices[i], hash); |
476f36e8 | 1594 | if (!inode) { |
386cb9f7 JR |
1595 | /* No match, can stop immediately, but must fold in the bits |
1596 | * used in lookup so far. */ | |
1597 | fill_range_wc(subtable, wc, ofs.end); | |
1598 | return NULL; | |
476f36e8 JR |
1599 | } |
1600 | ||
1601 | /* If we have narrowed down to a single rule already, check whether | |
a64759f0 | 1602 | * that rule matches. Either way, we're done. |
476f36e8 JR |
1603 | * |
1604 | * (Rare) hash collisions may cause us to miss the opportunity for this | |
1605 | * optimization. */ | |
f2c21402 | 1606 | if (!cmap_node_next(inode)) { |
476f36e8 | 1607 | ASSIGN_CONTAINER(rule, inode - i, index_nodes); |
e9319757 JR |
1608 | if (miniflow_and_mask_matches_flow_wc(&rule->flow, &subtable->mask, |
1609 | flow, wc)) { | |
1610 | return rule; | |
476f36e8 | 1611 | } |
e9319757 | 1612 | return NULL; |
476f36e8 | 1613 | } |
386cb9f7 | 1614 | ofs.start = ofs.end; |
476f36e8 | 1615 | } |
13751fd8 JR |
1616 | ofs.end = FLOW_U32S; |
1617 | /* Trie check for the final range. */ | |
1618 | if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow, wc)) { | |
386cb9f7 JR |
1619 | fill_range_wc(subtable, wc, ofs.start); |
1620 | return NULL; | |
13751fd8 | 1621 | } |
a64759f0 JR |
1622 | hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start, |
1623 | ofs.end, &basis); | |
1624 | rule = find_match(subtable, flow, hash); | |
69d6040e JR |
1625 | if (!rule && subtable->ports_mask_len) { |
1626 | /* Ports are always part of the final range, if any. | |
1627 | * No match was found for the ports. Use the ports trie to figure out | |
1628 | * which ports bits to unwildcard. */ | |
1629 | unsigned int mbits; | |
1630 | ovs_be32 value, mask; | |
1631 | ||
1632 | mask = MINIFLOW_GET_BE32(&subtable->mask.masks, tp_src); | |
1633 | value = ((OVS_FORCE ovs_be32 *)flow)[TP_PORTS_OFS32] & mask; | |
f0e5aa11 | 1634 | trie_lookup_value(subtable->ports_trie, &value, 32, &mbits); |
69d6040e JR |
1635 | |
1636 | ((OVS_FORCE ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |= | |
1637 | mask & htonl(~0 << (32 - mbits)); | |
1638 | ||
386cb9f7 JR |
1639 | /* Unwildcard all bits in the mask upto the ports, as they were used |
1640 | * to determine there is no match. */ | |
1641 | fill_range_wc(subtable, wc, TP_PORTS_OFS32); | |
1642 | return NULL; | |
69d6040e | 1643 | } |
e9319757 | 1644 | |
13751fd8 | 1645 | /* Must unwildcard all the fields, as they were looked at. */ |
476f36e8 JR |
1646 | flow_wildcards_fold_minimask(wc, &subtable->mask); |
1647 | return rule; | |
1648 | } | |
1649 | ||
627fb667 | 1650 | static struct cls_match * |
03868246 JR |
1651 | find_equal(struct cls_subtable *subtable, const struct miniflow *flow, |
1652 | uint32_t hash) | |
064af421 | 1653 | { |
627fb667 | 1654 | struct cls_match *head; |
064af421 | 1655 | |
f2c21402 | 1656 | CMAP_FOR_EACH_WITH_HASH (head, cmap_node, hash, &subtable->rules) { |
3016f3e4 | 1657 | if (miniflow_equal(&head->flow, flow)) { |
b5d97350 | 1658 | return head; |
064af421 BP |
1659 | } |
1660 | } | |
1661 | return NULL; | |
1662 | } | |
1663 | ||
627fb667 | 1664 | static struct cls_match * |
cabd4c43 | 1665 | insert_rule(struct cls_classifier *cls, struct cls_subtable *subtable, |
f2c21402 | 1666 | struct cls_rule *new_rule) |
e65413ab | 1667 | OVS_REQUIRES(cls->mutex) |
064af421 | 1668 | { |
627fb667 | 1669 | struct cls_match *old = NULL; |
f2c21402 JR |
1670 | struct cls_match *new = cls_match_alloc(new_rule); |
1671 | struct cls_match *head; | |
476f36e8 | 1672 | int i; |
f2c21402 | 1673 | uint32_t basis = 0, hash, ihash[CLS_MAX_INDICES]; |
13751fd8 | 1674 | uint8_t prev_be32ofs = 0; |
476f36e8 JR |
1675 | |
1676 | /* Add new node to segment indices. */ | |
1677 | for (i = 0; i < subtable->n_indices; i++) { | |
f2c21402 JR |
1678 | ihash[i] = minimatch_hash_range(&new_rule->match, prev_be32ofs, |
1679 | subtable->index_ofs[i], &basis); | |
1680 | cmap_insert(&subtable->indices[i], &new->index_nodes[i], ihash[i]); | |
13751fd8 | 1681 | prev_be32ofs = subtable->index_ofs[i]; |
476f36e8 | 1682 | } |
f2c21402 JR |
1683 | hash = minimatch_hash_range(&new_rule->match, prev_be32ofs, FLOW_U32S, |
1684 | &basis); | |
1685 | head = find_equal(subtable, &new_rule->match.flow, hash); | |
b5d97350 | 1686 | if (!head) { |
f2c21402 JR |
1687 | cmap_insert(&subtable->rules, &new->cmap_node, hash); |
1688 | list_init(&new->list); | |
4aacd02d | 1689 | goto out; |
b5d97350 BP |
1690 | } else { |
1691 | /* Scan the list for the insertion point that will keep the list in | |
1692 | * order of decreasing priority. */ | |
627fb667 | 1693 | struct cls_match *rule; |
476f36e8 | 1694 | |
b5d97350 | 1695 | FOR_EACH_RULE_IN_LIST (rule, head) { |
f2c21402 | 1696 | if (new->priority >= rule->priority) { |
b5d97350 | 1697 | if (rule == head) { |
f2c21402 JR |
1698 | /* 'new' is the new highest-priority flow in the list. */ |
1699 | cmap_replace(&subtable->rules, &rule->cmap_node, | |
1700 | &new->cmap_node, hash); | |
b5d97350 | 1701 | } |
064af421 | 1702 | |
f2c21402 JR |
1703 | if (new->priority == rule->priority) { |
1704 | list_replace(&new->list, &rule->list); | |
4aacd02d | 1705 | old = rule; |
b5d97350 | 1706 | } else { |
f2c21402 | 1707 | list_insert(&rule->list, &new->list); |
b5d97350 | 1708 | } |
fe7cfa5c | 1709 | goto out; |
b5d97350 BP |
1710 | } |
1711 | } | |
064af421 | 1712 | |
b5d97350 | 1713 | /* Insert 'new' at the end of the list. */ |
f2c21402 | 1714 | list_push_back(&head->list, &new->list); |
064af421 | 1715 | } |
4aacd02d BP |
1716 | |
1717 | out: | |
1718 | if (!old) { | |
fe7cfa5c JR |
1719 | subtable->n_rules++; |
1720 | ||
1721 | /* Rule was added, not replaced. Update 'subtable's 'max_priority' | |
1722 | * and 'max_count', if necessary. */ | |
1723 | if (subtable->n_rules == 1) { | |
f2c21402 | 1724 | subtable->max_priority = new->priority; |
fe7cfa5c | 1725 | subtable->max_count = 1; |
f2c21402 JR |
1726 | pvector_insert(&cls->subtables, subtable, new->priority); |
1727 | } else if (subtable->max_priority == new->priority) { | |
fe7cfa5c | 1728 | ++subtable->max_count; |
f2c21402 JR |
1729 | } else if (new->priority > subtable->max_priority) { |
1730 | subtable->max_priority = new->priority; | |
fe7cfa5c | 1731 | subtable->max_count = 1; |
f2c21402 | 1732 | pvector_change_priority(&cls->subtables, subtable, new->priority); |
fe7cfa5c | 1733 | } |
476f36e8 JR |
1734 | } else { |
1735 | /* Remove old node from indices. */ | |
1736 | for (i = 0; i < subtable->n_indices; i++) { | |
f2c21402 | 1737 | cmap_remove(&subtable->indices[i], &old->index_nodes[i], ihash[i]); |
476f36e8 | 1738 | } |
4aacd02d BP |
1739 | } |
1740 | return old; | |
064af421 BP |
1741 | } |
1742 | ||
627fb667 JR |
1743 | static struct cls_match * |
1744 | next_rule_in_list__(struct cls_match *rule) | |
e65413ab | 1745 | OVS_NO_THREAD_SAFETY_ANALYSIS |
064af421 | 1746 | { |
627fb667 | 1747 | struct cls_match *next = OBJECT_CONTAINING(rule->list.next, next, list); |
955f579d BP |
1748 | return next; |
1749 | } | |
1750 | ||
627fb667 JR |
1751 | static struct cls_match * |
1752 | next_rule_in_list(struct cls_match *rule) | |
955f579d | 1753 | { |
627fb667 | 1754 | struct cls_match *next = next_rule_in_list__(rule); |
b5d97350 | 1755 | return next->priority < rule->priority ? next : NULL; |
064af421 | 1756 | } |
13751fd8 JR |
1757 | \f |
1758 | /* A longest-prefix match tree. */ | |
1759 | struct trie_node { | |
1760 | uint32_t prefix; /* Prefix bits for this node, MSB first. */ | |
c30cfa6b | 1761 | uint8_t n_bits; /* Never zero, except for the root node. */ |
13751fd8 JR |
1762 | unsigned int n_rules; /* Number of rules that have this prefix. */ |
1763 | struct trie_node *edges[2]; /* Both NULL if leaf. */ | |
1764 | }; | |
1765 | ||
1766 | /* Max bits per node. Must fit in struct trie_node's 'prefix'. | |
1767 | * Also tested with 16, 8, and 5 to stress the implementation. */ | |
1768 | #define TRIE_PREFIX_BITS 32 | |
1769 | ||
1770 | /* Return at least 'plen' bits of the 'prefix', starting at bit offset 'ofs'. | |
1771 | * Prefixes are in the network byte order, and the offset 0 corresponds to | |
1772 | * the most significant bit of the first byte. The offset can be read as | |
1773 | * "how many bits to skip from the start of the prefix starting at 'pr'". */ | |
1774 | static uint32_t | |
1775 | raw_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen) | |
1776 | { | |
1777 | uint32_t prefix; | |
1778 | ||
1779 | pr += ofs / 32; /* Where to start. */ | |
1780 | ofs %= 32; /* How many bits to skip at 'pr'. */ | |
1781 | ||
1782 | prefix = ntohl(*pr) << ofs; /* Get the first 32 - ofs bits. */ | |
1783 | if (plen > 32 - ofs) { /* Need more than we have already? */ | |
1784 | prefix |= ntohl(*++pr) >> (32 - ofs); | |
1785 | } | |
1786 | /* Return with possible unwanted bits at the end. */ | |
1787 | return prefix; | |
1788 | } | |
1789 | ||
1790 | /* Return min(TRIE_PREFIX_BITS, plen) bits of the 'prefix', starting at bit | |
1791 | * offset 'ofs'. Prefixes are in the network byte order, and the offset 0 | |
1792 | * corresponds to the most significant bit of the first byte. The offset can | |
1793 | * be read as "how many bits to skip from the start of the prefix starting at | |
1794 | * 'pr'". */ | |
1795 | static uint32_t | |
1796 | trie_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen) | |
1797 | { | |
1798 | if (!plen) { | |
1799 | return 0; | |
1800 | } | |
1801 | if (plen > TRIE_PREFIX_BITS) { | |
1802 | plen = TRIE_PREFIX_BITS; /* Get at most TRIE_PREFIX_BITS. */ | |
1803 | } | |
1804 | /* Return with unwanted bits cleared. */ | |
1805 | return raw_get_prefix(pr, ofs, plen) & ~0u << (32 - plen); | |
1806 | } | |
1807 | ||
c30cfa6b | 1808 | /* Return the number of equal bits in 'n_bits' of 'prefix's MSBs and a 'value' |
13751fd8 JR |
1809 | * starting at "MSB 0"-based offset 'ofs'. */ |
1810 | static unsigned int | |
c30cfa6b | 1811 | prefix_equal_bits(uint32_t prefix, unsigned int n_bits, const ovs_be32 value[], |
13751fd8 JR |
1812 | unsigned int ofs) |
1813 | { | |
c30cfa6b | 1814 | uint64_t diff = prefix ^ raw_get_prefix(value, ofs, n_bits); |
13751fd8 | 1815 | /* Set the bit after the relevant bits to limit the result. */ |
c30cfa6b | 1816 | return raw_clz64(diff << 32 | UINT64_C(1) << (63 - n_bits)); |
13751fd8 JR |
1817 | } |
1818 | ||
1819 | /* Return the number of equal bits in 'node' prefix and a 'prefix' of length | |
1820 | * 'plen', starting at "MSB 0"-based offset 'ofs'. */ | |
1821 | static unsigned int | |
1822 | trie_prefix_equal_bits(const struct trie_node *node, const ovs_be32 prefix[], | |
1823 | unsigned int ofs, unsigned int plen) | |
1824 | { | |
c30cfa6b | 1825 | return prefix_equal_bits(node->prefix, MIN(node->n_bits, plen - ofs), |
13751fd8 JR |
1826 | prefix, ofs); |
1827 | } | |
1828 | ||
1829 | /* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' can | |
1830 | * be greater than 31. */ | |
1831 | static unsigned int | |
1832 | be_get_bit_at(const ovs_be32 value[], unsigned int ofs) | |
1833 | { | |
1834 | return (((const uint8_t *)value)[ofs / 8] >> (7 - ofs % 8)) & 1u; | |
1835 | } | |
1836 | ||
1837 | /* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' must | |
1838 | * be between 0 and 31, inclusive. */ | |
1839 | static unsigned int | |
1840 | get_bit_at(const uint32_t prefix, unsigned int ofs) | |
1841 | { | |
1842 | return (prefix >> (31 - ofs)) & 1u; | |
1843 | } | |
1844 | ||
1845 | /* Create new branch. */ | |
1846 | static struct trie_node * | |
1847 | trie_branch_create(const ovs_be32 *prefix, unsigned int ofs, unsigned int plen, | |
1848 | unsigned int n_rules) | |
1849 | { | |
1850 | struct trie_node *node = xmalloc(sizeof *node); | |
1851 | ||
1852 | node->prefix = trie_get_prefix(prefix, ofs, plen); | |
1853 | ||
1854 | if (plen <= TRIE_PREFIX_BITS) { | |
c30cfa6b | 1855 | node->n_bits = plen; |
13751fd8 JR |
1856 | node->edges[0] = NULL; |
1857 | node->edges[1] = NULL; | |
1858 | node->n_rules = n_rules; | |
1859 | } else { /* Need intermediate nodes. */ | |
1860 | struct trie_node *subnode = trie_branch_create(prefix, | |
1861 | ofs + TRIE_PREFIX_BITS, | |
1862 | plen - TRIE_PREFIX_BITS, | |
1863 | n_rules); | |
1864 | int bit = get_bit_at(subnode->prefix, 0); | |
c30cfa6b | 1865 | node->n_bits = TRIE_PREFIX_BITS; |
13751fd8 JR |
1866 | node->edges[bit] = subnode; |
1867 | node->edges[!bit] = NULL; | |
1868 | node->n_rules = 0; | |
1869 | } | |
1870 | return node; | |
1871 | } | |
1872 | ||
1873 | static void | |
1874 | trie_node_destroy(struct trie_node *node) | |
1875 | { | |
1876 | free(node); | |
1877 | } | |
1878 | ||
1879 | static void | |
1880 | trie_destroy(struct trie_node *node) | |
1881 | { | |
1882 | if (node) { | |
1883 | trie_destroy(node->edges[0]); | |
1884 | trie_destroy(node->edges[1]); | |
1885 | free(node); | |
1886 | } | |
1887 | } | |
1888 | ||
1889 | static bool | |
1890 | trie_is_leaf(const struct trie_node *trie) | |
1891 | { | |
1892 | return !trie->edges[0] && !trie->edges[1]; /* No children. */ | |
1893 | } | |
1894 | ||
1895 | static void | |
1896 | mask_set_prefix_bits(struct flow_wildcards *wc, uint8_t be32ofs, | |
c30cfa6b | 1897 | unsigned int n_bits) |
13751fd8 JR |
1898 | { |
1899 | ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs]; | |
1900 | unsigned int i; | |
1901 | ||
c30cfa6b | 1902 | for (i = 0; i < n_bits / 32; i++) { |
13751fd8 JR |
1903 | mask[i] = OVS_BE32_MAX; |
1904 | } | |
c30cfa6b JR |
1905 | if (n_bits % 32) { |
1906 | mask[i] |= htonl(~0u << (32 - n_bits % 32)); | |
13751fd8 JR |
1907 | } |
1908 | } | |
1909 | ||
1910 | static bool | |
1911 | mask_prefix_bits_set(const struct flow_wildcards *wc, uint8_t be32ofs, | |
c30cfa6b | 1912 | unsigned int n_bits) |
13751fd8 JR |
1913 | { |
1914 | ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs]; | |
1915 | unsigned int i; | |
1916 | ovs_be32 zeroes = 0; | |
1917 | ||
c30cfa6b | 1918 | for (i = 0; i < n_bits / 32; i++) { |
13751fd8 JR |
1919 | zeroes |= ~mask[i]; |
1920 | } | |
c30cfa6b JR |
1921 | if (n_bits % 32) { |
1922 | zeroes |= ~mask[i] & htonl(~0u << (32 - n_bits % 32)); | |
13751fd8 JR |
1923 | } |
1924 | ||
c30cfa6b | 1925 | return !zeroes; /* All 'n_bits' bits set. */ |
13751fd8 JR |
1926 | } |
1927 | ||
1928 | static struct trie_node ** | |
1929 | trie_next_edge(struct trie_node *node, const ovs_be32 value[], | |
1930 | unsigned int ofs) | |
1931 | { | |
1932 | return node->edges + be_get_bit_at(value, ofs); | |
1933 | } | |
1934 | ||
1935 | static const struct trie_node * | |
1936 | trie_next_node(const struct trie_node *node, const ovs_be32 value[], | |
1937 | unsigned int ofs) | |
1938 | { | |
1939 | return node->edges[be_get_bit_at(value, ofs)]; | |
1940 | } | |
1941 | ||
1942 | /* Return the prefix mask length necessary to find the longest-prefix match for | |
1943 | * the '*value' in the prefix tree 'node'. | |
1944 | * '*checkbits' is set to the number of bits in the prefix mask necessary to | |
1945 | * determine a mismatch, in case there are longer prefixes in the tree below | |
1946 | * the one that matched. | |
1947 | */ | |
1948 | static unsigned int | |
1949 | trie_lookup_value(const struct trie_node *node, const ovs_be32 value[], | |
f0e5aa11 | 1950 | unsigned int n_bits, unsigned int *checkbits) |
13751fd8 | 1951 | { |
197573ac | 1952 | unsigned int ofs = 0, match_len = 0; |
13751fd8 JR |
1953 | const struct trie_node *prev = NULL; |
1954 | ||
197573ac | 1955 | for (; node; prev = node, node = trie_next_node(node, value, ofs)) { |
13751fd8 JR |
1956 | unsigned int eqbits; |
1957 | /* Check if this edge can be followed. */ | |
c30cfa6b | 1958 | eqbits = prefix_equal_bits(node->prefix, node->n_bits, value, ofs); |
197573ac | 1959 | ofs += eqbits; |
c30cfa6b | 1960 | if (eqbits < node->n_bits) { /* Mismatch, nothing more to be found. */ |
197573ac JR |
1961 | /* Bit at offset 'ofs' differed. */ |
1962 | *checkbits = ofs + 1; /* Includes the first mismatching bit. */ | |
13751fd8 JR |
1963 | return match_len; |
1964 | } | |
1965 | /* Full match, check if rules exist at this prefix length. */ | |
1966 | if (node->n_rules > 0) { | |
197573ac | 1967 | match_len = ofs; |
13751fd8 | 1968 | } |
f0e5aa11 JR |
1969 | if (ofs >= n_bits) { |
1970 | *checkbits = n_bits; /* Full prefix. */ | |
1971 | return match_len; | |
1972 | } | |
13751fd8 | 1973 | } |
f0e5aa11 JR |
1974 | /* node == NULL. Full match so far, but we came to a dead end. |
1975 | * need to exclude the other branch if it exists. */ | |
197573ac | 1976 | *checkbits = !prev || trie_is_leaf(prev) ? ofs : ofs + 1; |
13751fd8 JR |
1977 | return match_len; |
1978 | } | |
1979 | ||
1980 | static unsigned int | |
1981 | trie_lookup(const struct cls_trie *trie, const struct flow *flow, | |
1982 | unsigned int *checkbits) | |
1983 | { | |
1984 | const struct mf_field *mf = trie->field; | |
1985 | ||
1986 | /* Check that current flow matches the prerequisites for the trie | |
1987 | * field. Some match fields are used for multiple purposes, so we | |
1988 | * must check that the trie is relevant for this flow. */ | |
1989 | if (mf_are_prereqs_ok(mf, flow)) { | |
1990 | return trie_lookup_value(trie->root, | |
1991 | &((ovs_be32 *)flow)[mf->flow_be32ofs], | |
f0e5aa11 | 1992 | mf->n_bits, checkbits); |
13751fd8 JR |
1993 | } |
1994 | *checkbits = 0; /* Value not used in this case. */ | |
1995 | return UINT_MAX; | |
1996 | } | |
1997 | ||
1998 | /* Returns the length of a prefix match mask for the field 'mf' in 'minimask'. | |
1999 | * Returns the u32 offset to the miniflow data in '*miniflow_index', if | |
2000 | * 'miniflow_index' is not NULL. */ | |
2001 | static unsigned int | |
2002 | minimask_get_prefix_len(const struct minimask *minimask, | |
2003 | const struct mf_field *mf) | |
2004 | { | |
c30cfa6b | 2005 | unsigned int n_bits = 0, mask_tz = 0; /* Non-zero when end of mask seen. */ |
13751fd8 JR |
2006 | uint8_t u32_ofs = mf->flow_be32ofs; |
2007 | uint8_t u32_end = u32_ofs + mf->n_bytes / 4; | |
2008 | ||
2009 | for (; u32_ofs < u32_end; ++u32_ofs) { | |
2010 | uint32_t mask; | |
2011 | mask = ntohl((OVS_FORCE ovs_be32)minimask_get(minimask, u32_ofs)); | |
2012 | ||
2013 | /* Validate mask, count the mask length. */ | |
2014 | if (mask_tz) { | |
2015 | if (mask) { | |
2016 | return 0; /* No bits allowed after mask ended. */ | |
2017 | } | |
2018 | } else { | |
2019 | if (~mask & (~mask + 1)) { | |
2020 | return 0; /* Mask not contiguous. */ | |
2021 | } | |
2022 | mask_tz = ctz32(mask); | |
c30cfa6b | 2023 | n_bits += 32 - mask_tz; |
13751fd8 JR |
2024 | } |
2025 | } | |
2026 | ||
c30cfa6b | 2027 | return n_bits; |
13751fd8 JR |
2028 | } |
2029 | ||
2030 | /* | |
2031 | * This is called only when mask prefix is known to be CIDR and non-zero. | |
2032 | * Relies on the fact that the flow and mask have the same map, and since | |
2033 | * the mask is CIDR, the storage for the flow field exists even if it | |
2034 | * happened to be zeros. | |
2035 | */ | |
2036 | static const ovs_be32 * | |
2037 | minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf) | |
2038 | { | |
27bbe15d | 2039 | return miniflow_get_be32_values(&match->flow) + |
13751fd8 JR |
2040 | count_1bits(match->flow.map & ((UINT64_C(1) << mf->flow_be32ofs) - 1)); |
2041 | } | |
2042 | ||
2043 | /* Insert rule in to the prefix tree. | |
2044 | * 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask | |
2045 | * in 'rule'. */ | |
2046 | static void | |
2047 | trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen) | |
2048 | { | |
69d6040e JR |
2049 | trie_insert_prefix(&trie->root, |
2050 | minimatch_get_prefix(&rule->match, trie->field), mlen); | |
2051 | } | |
2052 | ||
2053 | static void | |
2054 | trie_insert_prefix(struct trie_node **edge, const ovs_be32 *prefix, int mlen) | |
2055 | { | |
13751fd8 | 2056 | struct trie_node *node; |
13751fd8 JR |
2057 | int ofs = 0; |
2058 | ||
2059 | /* Walk the tree. */ | |
69d6040e | 2060 | for (; (node = *edge) != NULL; |
13751fd8 JR |
2061 | edge = trie_next_edge(node, prefix, ofs)) { |
2062 | unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen); | |
2063 | ofs += eqbits; | |
c30cfa6b | 2064 | if (eqbits < node->n_bits) { |
13751fd8 JR |
2065 | /* Mismatch, new node needs to be inserted above. */ |
2066 | int old_branch = get_bit_at(node->prefix, eqbits); | |
2067 | ||
2068 | /* New parent node. */ | |
2069 | *edge = trie_branch_create(prefix, ofs - eqbits, eqbits, | |
2070 | ofs == mlen ? 1 : 0); | |
2071 | ||
2072 | /* Adjust old node for its new position in the tree. */ | |
2073 | node->prefix <<= eqbits; | |
c30cfa6b | 2074 | node->n_bits -= eqbits; |
13751fd8 JR |
2075 | (*edge)->edges[old_branch] = node; |
2076 | ||
2077 | /* Check if need a new branch for the new rule. */ | |
2078 | if (ofs < mlen) { | |
2079 | (*edge)->edges[!old_branch] | |
2080 | = trie_branch_create(prefix, ofs, mlen - ofs, 1); | |
2081 | } | |
2082 | return; | |
2083 | } | |
2084 | /* Full match so far. */ | |
2085 | ||
2086 | if (ofs == mlen) { | |
2087 | /* Full match at the current node, rule needs to be added here. */ | |
2088 | node->n_rules++; | |
2089 | return; | |
2090 | } | |
2091 | } | |
2092 | /* Must insert a new tree branch for the new rule. */ | |
2093 | *edge = trie_branch_create(prefix, ofs, mlen - ofs, 1); | |
2094 | } | |
2095 | ||
2096 | /* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask | |
2097 | * in 'rule'. */ | |
2098 | static void | |
2099 | trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen) | |
2100 | { | |
69d6040e JR |
2101 | trie_remove_prefix(&trie->root, |
2102 | minimatch_get_prefix(&rule->match, trie->field), mlen); | |
2103 | } | |
2104 | ||
2105 | /* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask | |
2106 | * in 'rule'. */ | |
2107 | static void | |
2108 | trie_remove_prefix(struct trie_node **root, const ovs_be32 *prefix, int mlen) | |
2109 | { | |
13751fd8 JR |
2110 | struct trie_node *node; |
2111 | struct trie_node **edges[sizeof(union mf_value) * 8]; | |
2112 | int depth = 0, ofs = 0; | |
2113 | ||
2114 | /* Walk the tree. */ | |
69d6040e | 2115 | for (edges[0] = root; |
13751fd8 JR |
2116 | (node = *edges[depth]) != NULL; |
2117 | edges[++depth] = trie_next_edge(node, prefix, ofs)) { | |
2118 | unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen); | |
69d6040e | 2119 | |
c30cfa6b | 2120 | if (eqbits < node->n_bits) { |
13751fd8 JR |
2121 | /* Mismatch, nothing to be removed. This should never happen, as |
2122 | * only rules in the classifier are ever removed. */ | |
2123 | break; /* Log a warning. */ | |
2124 | } | |
2125 | /* Full match so far. */ | |
2126 | ofs += eqbits; | |
2127 | ||
2128 | if (ofs == mlen) { | |
2129 | /* Full prefix match at the current node, remove rule here. */ | |
2130 | if (!node->n_rules) { | |
2131 | break; /* Log a warning. */ | |
2132 | } | |
2133 | node->n_rules--; | |
2134 | ||
2135 | /* Check if can prune the tree. */ | |
2136 | while (!node->n_rules && !(node->edges[0] && node->edges[1])) { | |
2137 | /* No rules and at most one child node, remove this node. */ | |
2138 | struct trie_node *next; | |
2139 | next = node->edges[0] ? node->edges[0] : node->edges[1]; | |
2140 | ||
2141 | if (next) { | |
c30cfa6b | 2142 | if (node->n_bits + next->n_bits > TRIE_PREFIX_BITS) { |
13751fd8 JR |
2143 | break; /* Cannot combine. */ |
2144 | } | |
2145 | /* Combine node with next. */ | |
c30cfa6b JR |
2146 | next->prefix = node->prefix | next->prefix >> node->n_bits; |
2147 | next->n_bits += node->n_bits; | |
13751fd8 JR |
2148 | } |
2149 | trie_node_destroy(node); | |
2150 | /* Update the parent's edge. */ | |
2151 | *edges[depth] = next; | |
2152 | if (next || !depth) { | |
2153 | /* Branch not pruned or at root, nothing more to do. */ | |
2154 | break; | |
2155 | } | |
2156 | node = *edges[--depth]; | |
2157 | } | |
2158 | return; | |
2159 | } | |
2160 | } | |
2161 | /* Cannot go deeper. This should never happen, since only rules | |
2162 | * that actually exist in the classifier are ever removed. */ | |
2163 | VLOG_WARN("Trying to remove non-existing rule from a prefix trie."); | |
2164 | } |