]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
18080541 | 2 | * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc. |
064af421 | 3 | * |
a14bc59f BP |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
064af421 | 7 | * |
a14bc59f BP |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | ||
17 | #include <config.h> | |
18 | #include "classifier.h" | |
38c449e0 | 19 | #include "classifier-private.h" |
064af421 BP |
20 | #include <errno.h> |
21 | #include <netinet/in.h> | |
844dff32 | 22 | #include "byte-order.h" |
68d1c8c3 | 23 | #include "dynamic-string.h" |
07b37e8f | 24 | #include "odp-util.h" |
d8ae4d67 | 25 | #include "ofp-util.h" |
13751fd8 | 26 | #include "packets.h" |
52054c15 | 27 | #include "util.h" |
e6211adc | 28 | #include "openvswitch/vlog.h" |
13751fd8 JR |
29 | |
30 | VLOG_DEFINE_THIS_MODULE(classifier); | |
064af421 | 31 | |
69d6040e JR |
32 | struct trie_ctx; |
33 | ||
18080541 BP |
34 | /* A collection of "struct cls_conjunction"s currently embedded into a |
35 | * cls_match. */ | |
36 | struct cls_conjunction_set { | |
37 | /* Link back to the cls_match. | |
38 | * | |
39 | * cls_conjunction_set is mostly used during classifier lookup, and, in | |
40 | * turn, during classifier lookup the most used member of | |
41 | * cls_conjunction_set is the rule's priority, so we cache it here for fast | |
42 | * access. */ | |
43 | struct cls_match *match; | |
44 | int priority; /* Cached copy of match->priority. */ | |
45 | ||
46 | /* Conjunction information. | |
47 | * | |
48 | * 'min_n_clauses' allows some optimization during classifier lookup. */ | |
49 | unsigned int n; /* Number of elements in 'conj'. */ | |
50 | unsigned int min_n_clauses; /* Smallest 'n' among elements of 'conj'. */ | |
51 | struct cls_conjunction conj[]; | |
52 | }; | |
53 | ||
69d6040e JR |
54 | /* Ports trie depends on both ports sharing the same ovs_be32. */ |
55 | #define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4) | |
56 | BUILD_ASSERT_DECL(TP_PORTS_OFS32 == offsetof(struct flow, tp_dst) / 4); | |
d70e8c28 JR |
57 | BUILD_ASSERT_DECL(TP_PORTS_OFS32 % 2 == 0); |
58 | #define TP_PORTS_OFS64 (TP_PORTS_OFS32 / 2) | |
cabd4c43 | 59 | |
18080541 BP |
60 | static size_t |
61 | cls_conjunction_set_size(size_t n) | |
62 | { | |
63 | return (sizeof(struct cls_conjunction_set) | |
64 | + n * sizeof(struct cls_conjunction)); | |
65 | } | |
66 | ||
67 | static struct cls_conjunction_set * | |
68 | cls_conjunction_set_alloc(struct cls_match *match, | |
69 | const struct cls_conjunction conj[], size_t n) | |
70 | { | |
71 | if (n) { | |
72 | size_t min_n_clauses = conj[0].n_clauses; | |
73 | for (size_t i = 1; i < n; i++) { | |
74 | min_n_clauses = MIN(min_n_clauses, conj[i].n_clauses); | |
75 | } | |
76 | ||
77 | struct cls_conjunction_set *set = xmalloc(cls_conjunction_set_size(n)); | |
78 | set->match = match; | |
79 | set->priority = match->priority; | |
80 | set->n = n; | |
81 | set->min_n_clauses = min_n_clauses; | |
82 | memcpy(set->conj, conj, n * sizeof *conj); | |
83 | return set; | |
84 | } else { | |
85 | return NULL; | |
86 | } | |
87 | } | |
88 | ||
627fb667 | 89 | static struct cls_match * |
18080541 BP |
90 | cls_match_alloc(const struct cls_rule *rule, |
91 | const struct cls_conjunction conj[], size_t n) | |
627fb667 | 92 | { |
3016f3e4 JR |
93 | int count = count_1bits(rule->match.flow.map); |
94 | ||
95 | struct cls_match *cls_match | |
96 | = xmalloc(sizeof *cls_match - sizeof cls_match->flow.inline_values | |
97 | + MINIFLOW_VALUES_SIZE(count)); | |
627fb667 | 98 | |
8f8023b3 | 99 | ovsrcu_init(&cls_match->next, NULL); |
f80028fe JR |
100 | *CONST_CAST(const struct cls_rule **, &cls_match->cls_rule) = rule; |
101 | *CONST_CAST(int *, &cls_match->priority) = rule->priority; | |
2b7b1427 | 102 | atomic_init(&cls_match->visibility, 0); /* Initially invisible. */ |
f80028fe JR |
103 | miniflow_clone_inline(CONST_CAST(struct miniflow *, &cls_match->flow), |
104 | &rule->match.flow, count); | |
18080541 BP |
105 | ovsrcu_set_hidden(&cls_match->conj_set, |
106 | cls_conjunction_set_alloc(cls_match, conj, n)); | |
627fb667 JR |
107 | |
108 | return cls_match; | |
109 | } | |
cabd4c43 | 110 | |
e48eccd1 | 111 | static struct cls_subtable *find_subtable(const struct classifier *cls, |
dfea28b3 | 112 | const struct minimask *); |
e48eccd1 | 113 | static struct cls_subtable *insert_subtable(struct classifier *cls, |
fccd7c09 JR |
114 | const struct minimask *); |
115 | static void destroy_subtable(struct classifier *cls, struct cls_subtable *); | |
b5d97350 | 116 | |
dfea28b3 | 117 | static const struct cls_match *find_match_wc(const struct cls_subtable *, |
2b7b1427 | 118 | long long version, |
dfea28b3 JR |
119 | const struct flow *, |
120 | struct trie_ctx *, | |
121 | unsigned int n_tries, | |
122 | struct flow_wildcards *); | |
123 | static struct cls_match *find_equal(const struct cls_subtable *, | |
627fb667 | 124 | const struct miniflow *, uint32_t hash); |
b5d97350 | 125 | |
8f8023b3 JR |
126 | /* Return the next visible (lower-priority) rule in the list. Multiple |
127 | * identical rules with the same priority may exist transitionally, but when | |
128 | * versioning is used at most one of them is ever visible for lookups on any | |
129 | * given 'version'. */ | |
fc02ecc7 | 130 | static inline const struct cls_match * |
2b7b1427 | 131 | next_visible_rule_in_list(const struct cls_match *rule, long long version) |
fc02ecc7 | 132 | { |
fc02ecc7 | 133 | do { |
8f8023b3 JR |
134 | rule = cls_match_next(rule); |
135 | if (!rule) { | |
186120da | 136 | /* We have reached the head of the list, stop. */ |
8f8023b3 | 137 | break; |
186120da | 138 | } |
8f8023b3 | 139 | } while (!cls_match_visible_in_version(rule, version)); |
fc02ecc7 | 140 | |
8f8023b3 | 141 | return rule; |
c501b427 JR |
142 | } |
143 | ||
13751fd8 JR |
144 | static unsigned int minimask_get_prefix_len(const struct minimask *, |
145 | const struct mf_field *); | |
e48eccd1 | 146 | static void trie_init(struct classifier *cls, int trie_idx, |
fccd7c09 | 147 | const struct mf_field *); |
13751fd8 | 148 | static unsigned int trie_lookup(const struct cls_trie *, const struct flow *, |
c0bfb650 | 149 | union mf_value *plens); |
f358a2cb | 150 | static unsigned int trie_lookup_value(const rcu_trie_ptr *, |
c0bfb650 JR |
151 | const ovs_be32 value[], ovs_be32 plens[], |
152 | unsigned int value_bits); | |
f358a2cb | 153 | static void trie_destroy(rcu_trie_ptr *); |
13751fd8 | 154 | static void trie_insert(struct cls_trie *, const struct cls_rule *, int mlen); |
f358a2cb | 155 | static void trie_insert_prefix(rcu_trie_ptr *, const ovs_be32 *prefix, |
69d6040e | 156 | int mlen); |
13751fd8 | 157 | static void trie_remove(struct cls_trie *, const struct cls_rule *, int mlen); |
f358a2cb | 158 | static void trie_remove_prefix(rcu_trie_ptr *, const ovs_be32 *prefix, |
69d6040e | 159 | int mlen); |
13751fd8 | 160 | static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t be32ofs, |
c30cfa6b | 161 | unsigned int n_bits); |
13751fd8 | 162 | static bool mask_prefix_bits_set(const struct flow_wildcards *, |
c30cfa6b | 163 | uint8_t be32ofs, unsigned int n_bits); |
81a76618 BP |
164 | \f |
165 | /* cls_rule. */ | |
b5d97350 | 166 | |
de4ad4a2 | 167 | static inline void |
2b7b1427 JR |
168 | cls_rule_init__(struct cls_rule *rule, unsigned int priority, |
169 | long long version) | |
de4ad4a2 | 170 | { |
2b7b1427 JR |
171 | ovs_assert(version > 0); |
172 | ||
de4ad4a2 | 173 | rculist_init(&rule->node); |
2b7b1427 JR |
174 | *CONST_CAST(int *, &rule->priority) = priority; |
175 | *CONST_CAST(long long *, &rule->version) = version; | |
de4ad4a2 JR |
176 | rule->cls_match = NULL; |
177 | } | |
178 | ||
81a76618 | 179 | /* Initializes 'rule' to match packets specified by 'match' at the given |
5cb7a798 BP |
180 | * 'priority'. 'match' must satisfy the invariant described in the comment at |
181 | * the definition of struct match. | |
66642cb4 | 182 | * |
48d28ac1 BP |
183 | * The caller must eventually destroy 'rule' with cls_rule_destroy(). |
184 | * | |
eb391b76 BP |
185 | * Clients should not use priority INT_MIN. (OpenFlow uses priorities between |
186 | * 0 and UINT16_MAX, inclusive.) */ | |
47284b1f | 187 | void |
2b7b1427 JR |
188 | cls_rule_init(struct cls_rule *rule, const struct match *match, int priority, |
189 | long long version) | |
47284b1f | 190 | { |
2b7b1427 JR |
191 | cls_rule_init__(rule, priority, version); |
192 | minimatch_init(CONST_CAST(struct minimatch *, &rule->match), match); | |
5cb7a798 BP |
193 | } |
194 | ||
195 | /* Same as cls_rule_init() for initialization from a "struct minimatch". */ | |
196 | void | |
197 | cls_rule_init_from_minimatch(struct cls_rule *rule, | |
2b7b1427 JR |
198 | const struct minimatch *match, int priority, |
199 | long long version) | |
5cb7a798 | 200 | { |
2b7b1427 JR |
201 | cls_rule_init__(rule, priority, version); |
202 | minimatch_clone(CONST_CAST(struct minimatch *, &rule->match), match); | |
685a51a5 JP |
203 | } |
204 | ||
39c94593 JR |
205 | /* Initializes 'dst' as a copy of 'src', but with 'version'. |
206 | * | |
207 | * The caller must eventually destroy 'dst' with cls_rule_destroy(). */ | |
208 | void | |
209 | cls_rule_clone_in_version(struct cls_rule *dst, const struct cls_rule *src, | |
210 | long long version) | |
211 | { | |
212 | cls_rule_init__(dst, src->priority, version); | |
213 | minimatch_clone(CONST_CAST(struct minimatch *, &dst->match), &src->match); | |
214 | } | |
215 | ||
48d28ac1 BP |
216 | /* Initializes 'dst' as a copy of 'src'. |
217 | * | |
b2c1f00b | 218 | * The caller must eventually destroy 'dst' with cls_rule_destroy(). */ |
48d28ac1 BP |
219 | void |
220 | cls_rule_clone(struct cls_rule *dst, const struct cls_rule *src) | |
221 | { | |
39c94593 | 222 | cls_rule_clone_in_version(dst, src, src->version); |
48d28ac1 BP |
223 | } |
224 | ||
b2c1f00b | 225 | /* Initializes 'dst' with the data in 'src', destroying 'src'. |
2b7b1427 | 226 | * |
de4ad4a2 | 227 | * 'src' must be a cls_rule NOT in a classifier. |
b2c1f00b BP |
228 | * |
229 | * The caller must eventually destroy 'dst' with cls_rule_destroy(). */ | |
230 | void | |
231 | cls_rule_move(struct cls_rule *dst, struct cls_rule *src) | |
232 | { | |
2b7b1427 JR |
233 | cls_rule_init__(dst, src->priority, src->version); |
234 | minimatch_move(CONST_CAST(struct minimatch *, &dst->match), | |
235 | CONST_CAST(struct minimatch *, &src->match)); | |
b2c1f00b BP |
236 | } |
237 | ||
48d28ac1 BP |
238 | /* Frees memory referenced by 'rule'. Doesn't free 'rule' itself (it's |
239 | * normally embedded into a larger structure). | |
240 | * | |
241 | * ('rule' must not currently be in a classifier.) */ | |
242 | void | |
5cb7a798 | 243 | cls_rule_destroy(struct cls_rule *rule) |
2541d759 | 244 | OVS_NO_THREAD_SAFETY_ANALYSIS |
48d28ac1 | 245 | { |
de4ad4a2 JR |
246 | ovs_assert(!rule->cls_match); /* Must not be in a classifier. */ |
247 | ||
2541d759 JR |
248 | /* Check that the rule has been properly removed from the classifier. */ |
249 | ovs_assert(rule->node.prev == RCULIST_POISON | |
de4ad4a2 | 250 | || rculist_is_empty(&rule->node)); |
2541d759 | 251 | rculist_poison__(&rule->node); /* Poisons also the next pointer. */ |
de4ad4a2 | 252 | |
2b7b1427 | 253 | minimatch_destroy(CONST_CAST(struct minimatch *, &rule->match)); |
48d28ac1 BP |
254 | } |
255 | ||
18080541 BP |
256 | void |
257 | cls_rule_set_conjunctions(struct cls_rule *cr, | |
258 | const struct cls_conjunction *conj, size_t n) | |
259 | { | |
260 | struct cls_match *match = cr->cls_match; | |
261 | struct cls_conjunction_set *old | |
262 | = ovsrcu_get_protected(struct cls_conjunction_set *, &match->conj_set); | |
263 | struct cls_conjunction *old_conj = old ? old->conj : NULL; | |
264 | unsigned int old_n = old ? old->n : 0; | |
265 | ||
266 | if (old_n != n || (n && memcmp(old_conj, conj, n * sizeof *conj))) { | |
267 | if (old) { | |
268 | ovsrcu_postpone(free, old); | |
269 | } | |
270 | ovsrcu_set(&match->conj_set, | |
271 | cls_conjunction_set_alloc(match, conj, n)); | |
272 | } | |
273 | } | |
274 | ||
275 | ||
81a76618 BP |
276 | /* Returns true if 'a' and 'b' match the same packets at the same priority, |
277 | * false if they differ in some way. */ | |
193eb874 BP |
278 | bool |
279 | cls_rule_equal(const struct cls_rule *a, const struct cls_rule *b) | |
280 | { | |
5cb7a798 | 281 | return a->priority == b->priority && minimatch_equal(&a->match, &b->match); |
193eb874 BP |
282 | } |
283 | ||
81a76618 | 284 | /* Returns a hash value for 'rule', folding in 'basis'. */ |
57452fdc BP |
285 | uint32_t |
286 | cls_rule_hash(const struct cls_rule *rule, uint32_t basis) | |
287 | { | |
5cb7a798 | 288 | return minimatch_hash(&rule->match, hash_int(rule->priority, basis)); |
73f33563 BP |
289 | } |
290 | ||
81a76618 | 291 | /* Appends a string describing 'rule' to 's'. */ |
07b37e8f BP |
292 | void |
293 | cls_rule_format(const struct cls_rule *rule, struct ds *s) | |
294 | { | |
5cb7a798 | 295 | minimatch_format(&rule->match, s, rule->priority); |
064af421 | 296 | } |
3ca1de08 BP |
297 | |
298 | /* Returns true if 'rule' matches every packet, false otherwise. */ | |
299 | bool | |
300 | cls_rule_is_catchall(const struct cls_rule *rule) | |
301 | { | |
5cb7a798 | 302 | return minimask_is_catchall(&rule->match.mask); |
3ca1de08 | 303 | } |
fc02ecc7 | 304 | |
2b7b1427 JR |
305 | /* Makes rule invisible after 'version'. Once that version is made invisible |
306 | * (by changing the version parameter used in lookups), the rule should be | |
307 | * actually removed via ovsrcu_postpone(). | |
308 | * | |
309 | * 'rule_' must be in a classifier. */ | |
310 | void | |
311 | cls_rule_make_invisible_in_version(const struct cls_rule *rule_, | |
312 | long long version, long long lookup_version) | |
313 | { | |
314 | struct cls_match *rule = rule_->cls_match; | |
315 | ||
316 | /* XXX: Adjust when versioning is actually used. */ | |
317 | ovs_assert(version >= rule_->version && version >= lookup_version); | |
318 | ||
319 | /* Normally, we call this when deleting a rule that is already visible to | |
320 | * lookups. However, sometimes a bundle transaction will add a rule and | |
321 | * then delete it before the rule has ever become visible. If we set such | |
322 | * a rule to become invisible in a future 'version', it would become | |
323 | * visible to all prior versions. So, in this case we must set the rule | |
324 | * visibility to 0 (== never visible). */ | |
325 | if (cls_match_visible_in_version(rule, lookup_version)) { | |
326 | /* Make invisible starting at 'version'. */ | |
327 | atomic_store_relaxed(&rule->visibility, -version); | |
328 | } else { | |
329 | /* Rule has not yet been visible to lookups, make invisible in all | |
330 | * version. */ | |
331 | atomic_store_relaxed(&rule->visibility, 0); | |
332 | } | |
333 | } | |
334 | ||
335 | /* This undoes the change made by cls_rule_make_invisible_after_version(). | |
fc02ecc7 JR |
336 | * |
337 | * 'rule' must be in a classifier. */ | |
2b7b1427 JR |
338 | void |
339 | cls_rule_restore_visibility(const struct cls_rule *rule) | |
fc02ecc7 | 340 | { |
2b7b1427 | 341 | atomic_store_relaxed(&rule->cls_match->visibility, rule->version); |
fc02ecc7 JR |
342 | } |
343 | ||
2b7b1427 JR |
344 | /* Return true if 'rule' is visible in 'version'. |
345 | * | |
346 | * 'rule' must be in a classifier. */ | |
347 | bool | |
348 | cls_rule_visible_in_version(const struct cls_rule *rule, long long version) | |
349 | { | |
350 | return cls_match_visible_in_version(rule->cls_match, version); | |
351 | } | |
064af421 BP |
352 | \f |
353 | /* Initializes 'cls' as a classifier that initially contains no classification | |
354 | * rules. */ | |
355 | void | |
e48eccd1 | 356 | classifier_init(struct classifier *cls, const uint8_t *flow_segments) |
064af421 | 357 | { |
064af421 | 358 | cls->n_rules = 0; |
f2c21402 | 359 | cmap_init(&cls->subtables_map); |
fe7cfa5c | 360 | pvector_init(&cls->subtables); |
f2c21402 | 361 | cmap_init(&cls->partitions); |
476f36e8 JR |
362 | cls->n_flow_segments = 0; |
363 | if (flow_segments) { | |
364 | while (cls->n_flow_segments < CLS_MAX_INDICES | |
d70e8c28 | 365 | && *flow_segments < FLOW_U64S) { |
476f36e8 JR |
366 | cls->flow_segments[cls->n_flow_segments++] = *flow_segments++; |
367 | } | |
368 | } | |
13751fd8 | 369 | cls->n_tries = 0; |
e65413ab JR |
370 | for (int i = 0; i < CLS_MAX_TRIES; i++) { |
371 | trie_init(cls, i, NULL); | |
372 | } | |
802f84ff | 373 | cls->publish = true; |
064af421 BP |
374 | } |
375 | ||
376 | /* Destroys 'cls'. Rules within 'cls', if any, are not freed; this is the | |
afae68b1 JR |
377 | * caller's responsibility. |
378 | * May only be called after all the readers have been terminated. */ | |
064af421 | 379 | void |
e48eccd1 | 380 | classifier_destroy(struct classifier *cls) |
064af421 | 381 | { |
e48eccd1 | 382 | if (cls) { |
78c8df12 BP |
383 | struct cls_partition *partition; |
384 | struct cls_subtable *subtable; | |
13751fd8 JR |
385 | int i; |
386 | ||
387 | for (i = 0; i < cls->n_tries; i++) { | |
f358a2cb | 388 | trie_destroy(&cls->tries[i].root); |
13751fd8 | 389 | } |
064af421 | 390 | |
6bc3bb82 | 391 | CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) { |
03868246 | 392 | destroy_subtable(cls, subtable); |
064af421 | 393 | } |
f2c21402 | 394 | cmap_destroy(&cls->subtables_map); |
c906cedf | 395 | |
6bc3bb82 | 396 | CMAP_FOR_EACH (partition, cmap_node, &cls->partitions) { |
f2c21402 | 397 | ovsrcu_postpone(free, partition); |
c906cedf | 398 | } |
f2c21402 | 399 | cmap_destroy(&cls->partitions); |
cabd4c43 | 400 | |
fe7cfa5c | 401 | pvector_destroy(&cls->subtables); |
064af421 BP |
402 | } |
403 | } | |
404 | ||
13751fd8 | 405 | /* Set the fields for which prefix lookup should be performed. */ |
f358a2cb | 406 | bool |
e48eccd1 | 407 | classifier_set_prefix_fields(struct classifier *cls, |
13751fd8 JR |
408 | const enum mf_field_id *trie_fields, |
409 | unsigned int n_fields) | |
410 | { | |
f358a2cb | 411 | const struct mf_field * new_fields[CLS_MAX_TRIES]; |
abadfcb0 | 412 | struct mf_bitmap fields = MF_BITMAP_INITIALIZER; |
f358a2cb JR |
413 | int i, n_tries = 0; |
414 | bool changed = false; | |
13751fd8 | 415 | |
f358a2cb | 416 | for (i = 0; i < n_fields && n_tries < CLS_MAX_TRIES; i++) { |
13751fd8 JR |
417 | const struct mf_field *field = mf_from_id(trie_fields[i]); |
418 | if (field->flow_be32ofs < 0 || field->n_bits % 32) { | |
419 | /* Incompatible field. This is the only place where we | |
420 | * enforce these requirements, but the rest of the trie code | |
421 | * depends on the flow_be32ofs to be non-negative and the | |
422 | * field length to be a multiple of 32 bits. */ | |
423 | continue; | |
424 | } | |
425 | ||
abadfcb0 | 426 | if (bitmap_is_set(fields.bm, trie_fields[i])) { |
13751fd8 JR |
427 | /* Duplicate field, there is no need to build more than |
428 | * one index for any one field. */ | |
429 | continue; | |
430 | } | |
abadfcb0 | 431 | bitmap_set1(fields.bm, trie_fields[i]); |
13751fd8 | 432 | |
f358a2cb JR |
433 | new_fields[n_tries] = NULL; |
434 | if (n_tries >= cls->n_tries || field != cls->tries[n_tries].field) { | |
435 | new_fields[n_tries] = field; | |
436 | changed = true; | |
437 | } | |
438 | n_tries++; | |
439 | } | |
440 | ||
441 | if (changed || n_tries < cls->n_tries) { | |
442 | struct cls_subtable *subtable; | |
443 | ||
444 | /* Trie configuration needs to change. Disable trie lookups | |
445 | * for the tries that are changing and wait all the current readers | |
446 | * with the old configuration to be done. */ | |
447 | changed = false; | |
448 | CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) { | |
449 | for (i = 0; i < cls->n_tries; i++) { | |
450 | if ((i < n_tries && new_fields[i]) || i >= n_tries) { | |
451 | if (subtable->trie_plen[i]) { | |
452 | subtable->trie_plen[i] = 0; | |
453 | changed = true; | |
454 | } | |
455 | } | |
456 | } | |
457 | } | |
458 | /* Synchronize if any readers were using tries. The readers may | |
459 | * temporarily function without the trie lookup based optimizations. */ | |
460 | if (changed) { | |
461 | /* ovsrcu_synchronize() functions as a memory barrier, so it does | |
462 | * not matter that subtable->trie_plen is not atomic. */ | |
463 | ovsrcu_synchronize(); | |
13751fd8 | 464 | } |
13751fd8 | 465 | |
f358a2cb JR |
466 | /* Now set up the tries. */ |
467 | for (i = 0; i < n_tries; i++) { | |
468 | if (new_fields[i]) { | |
469 | trie_init(cls, i, new_fields[i]); | |
470 | } | |
471 | } | |
472 | /* Destroy the rest, if any. */ | |
473 | for (; i < cls->n_tries; i++) { | |
474 | trie_init(cls, i, NULL); | |
475 | } | |
476 | ||
477 | cls->n_tries = n_tries; | |
f358a2cb | 478 | return true; |
13751fd8 | 479 | } |
f358a2cb | 480 | |
f358a2cb | 481 | return false; /* No change. */ |
13751fd8 JR |
482 | } |
483 | ||
484 | static void | |
e48eccd1 | 485 | trie_init(struct classifier *cls, int trie_idx, const struct mf_field *field) |
13751fd8 JR |
486 | { |
487 | struct cls_trie *trie = &cls->tries[trie_idx]; | |
488 | struct cls_subtable *subtable; | |
489 | ||
490 | if (trie_idx < cls->n_tries) { | |
f358a2cb JR |
491 | trie_destroy(&trie->root); |
492 | } else { | |
493 | ovsrcu_set_hidden(&trie->root, NULL); | |
13751fd8 | 494 | } |
13751fd8 JR |
495 | trie->field = field; |
496 | ||
f358a2cb | 497 | /* Add existing rules to the new trie. */ |
f2c21402 | 498 | CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) { |
13751fd8 JR |
499 | unsigned int plen; |
500 | ||
501 | plen = field ? minimask_get_prefix_len(&subtable->mask, field) : 0; | |
13751fd8 | 502 | if (plen) { |
627fb667 | 503 | struct cls_match *head; |
13751fd8 | 504 | |
f2c21402 | 505 | CMAP_FOR_EACH (head, cmap_node, &subtable->rules) { |
f47eef15 | 506 | trie_insert(trie, head->cls_rule, plen); |
13751fd8 JR |
507 | } |
508 | } | |
f358a2cb JR |
509 | /* Initialize subtable's prefix length on this field. This will |
510 | * allow readers to use the trie. */ | |
511 | atomic_thread_fence(memory_order_release); | |
512 | subtable->trie_plen[trie_idx] = plen; | |
13751fd8 JR |
513 | } |
514 | } | |
515 | ||
5f0476ce JR |
516 | /* Returns true if 'cls' contains no classification rules, false otherwise. |
517 | * Checking the cmap requires no locking. */ | |
064af421 BP |
518 | bool |
519 | classifier_is_empty(const struct classifier *cls) | |
520 | { | |
e48eccd1 | 521 | return cmap_is_empty(&cls->subtables_map); |
064af421 BP |
522 | } |
523 | ||
dbda2960 | 524 | /* Returns the number of rules in 'cls'. */ |
064af421 BP |
525 | int |
526 | classifier_count(const struct classifier *cls) | |
527 | { | |
afae68b1 JR |
528 | /* n_rules is an int, so in the presence of concurrent writers this will |
529 | * return either the old or a new value. */ | |
e48eccd1 | 530 | return cls->n_rules; |
064af421 BP |
531 | } |
532 | ||
c906cedf | 533 | static uint32_t |
d70e8c28 | 534 | hash_metadata(ovs_be64 metadata) |
c906cedf | 535 | { |
d70e8c28 | 536 | return hash_uint64((OVS_FORCE uint64_t) metadata); |
c906cedf BP |
537 | } |
538 | ||
539 | static struct cls_partition * | |
e48eccd1 | 540 | find_partition(const struct classifier *cls, ovs_be64 metadata, uint32_t hash) |
c906cedf BP |
541 | { |
542 | struct cls_partition *partition; | |
543 | ||
f2c21402 | 544 | CMAP_FOR_EACH_WITH_HASH (partition, cmap_node, hash, &cls->partitions) { |
c906cedf BP |
545 | if (partition->metadata == metadata) { |
546 | return partition; | |
547 | } | |
548 | } | |
549 | ||
550 | return NULL; | |
551 | } | |
552 | ||
553 | static struct cls_partition * | |
e48eccd1 | 554 | create_partition(struct classifier *cls, struct cls_subtable *subtable, |
c906cedf BP |
555 | ovs_be64 metadata) |
556 | { | |
557 | uint32_t hash = hash_metadata(metadata); | |
558 | struct cls_partition *partition = find_partition(cls, metadata, hash); | |
559 | if (!partition) { | |
560 | partition = xmalloc(sizeof *partition); | |
561 | partition->metadata = metadata; | |
562 | partition->tags = 0; | |
183126a1 | 563 | tag_tracker_init(&partition->tracker); |
f2c21402 | 564 | cmap_insert(&cls->partitions, &partition->cmap_node, hash); |
c906cedf | 565 | } |
03868246 | 566 | tag_tracker_add(&partition->tracker, &partition->tags, subtable->tag); |
c906cedf BP |
567 | return partition; |
568 | } | |
569 | ||
69d6040e JR |
570 | static inline ovs_be32 minimatch_get_ports(const struct minimatch *match) |
571 | { | |
572 | /* Could optimize to use the same map if needed for fast path. */ | |
573 | return MINIFLOW_GET_BE32(&match->flow, tp_src) | |
574 | & MINIFLOW_GET_BE32(&match->mask.masks, tp_src); | |
575 | } | |
576 | ||
f47eef15 JR |
577 | static void |
578 | subtable_replace_head_rule(struct classifier *cls OVS_UNUSED, | |
579 | struct cls_subtable *subtable, | |
580 | struct cls_match *head, struct cls_match *new, | |
581 | uint32_t hash, uint32_t ihash[CLS_MAX_INDICES]) | |
f47eef15 JR |
582 | { |
583 | /* Rule's data is already in the tries. */ | |
584 | ||
585 | new->partition = head->partition; /* Steal partition, if any. */ | |
586 | head->partition = NULL; | |
587 | ||
588 | for (int i = 0; i < subtable->n_indices; i++) { | |
589 | cmap_replace(&subtable->indices[i], &head->index_nodes[i], | |
590 | &new->index_nodes[i], ihash[i]); | |
591 | } | |
592 | cmap_replace(&subtable->rules, &head->cmap_node, &new->cmap_node, hash); | |
593 | } | |
594 | ||
b5d97350 BP |
595 | /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller |
596 | * must not modify or free it. | |
064af421 BP |
597 | * |
598 | * If 'cls' already contains an identical rule (including wildcards, values of | |
599 | * fixed fields, and priority), replaces the old rule by 'rule' and returns the | |
600 | * rule that was replaced. The caller takes ownership of the returned rule and | |
f47eef15 JR |
601 | * is thus responsible for destroying it with cls_rule_destroy(), after RCU |
602 | * grace period has passed (see ovsrcu_postpone()). | |
064af421 BP |
603 | * |
604 | * Returns NULL if 'cls' does not contain a rule with an identical key, after | |
605 | * inserting the new rule. In this case, no rules are displaced by the new | |
606 | * rule, even rules that cannot have any effect because the new rule matches a | |
886af6ea JR |
607 | * superset of their flows and has higher priority. |
608 | */ | |
dfea28b3 | 609 | const struct cls_rule * |
18080541 BP |
610 | classifier_replace(struct classifier *cls, const struct cls_rule *rule, |
611 | const struct cls_conjunction *conjs, size_t n_conjs) | |
064af421 | 612 | { |
2b7b1427 | 613 | struct cls_match *new; |
03868246 | 614 | struct cls_subtable *subtable; |
886af6ea | 615 | uint32_t ihash[CLS_MAX_INDICES]; |
d70e8c28 | 616 | uint8_t prev_be64ofs = 0; |
886af6ea | 617 | struct cls_match *head; |
f47eef15 | 618 | size_t n_rules = 0; |
886af6ea JR |
619 | uint32_t basis; |
620 | uint32_t hash; | |
621 | int i; | |
b5d97350 | 622 | |
2b7b1427 JR |
623 | ovs_assert(rule->version > 0); |
624 | ||
625 | /* 'new' is initially invisible to lookups. */ | |
626 | new = cls_match_alloc(rule, conjs, n_conjs); | |
627 | ||
d0999f1b | 628 | CONST_CAST(struct cls_rule *, rule)->cls_match = new; |
f47eef15 | 629 | |
03868246 JR |
630 | subtable = find_subtable(cls, &rule->match.mask); |
631 | if (!subtable) { | |
632 | subtable = insert_subtable(cls, &rule->match.mask); | |
b5d97350 BP |
633 | } |
634 | ||
f47eef15 | 635 | /* Compute hashes in segments. */ |
886af6ea JR |
636 | basis = 0; |
637 | for (i = 0; i < subtable->n_indices; i++) { | |
d70e8c28 | 638 | ihash[i] = minimatch_hash_range(&rule->match, prev_be64ofs, |
886af6ea | 639 | subtable->index_ofs[i], &basis); |
d70e8c28 | 640 | prev_be64ofs = subtable->index_ofs[i]; |
886af6ea | 641 | } |
d70e8c28 | 642 | hash = minimatch_hash_range(&rule->match, prev_be64ofs, FLOW_U64S, &basis); |
f47eef15 | 643 | |
886af6ea JR |
644 | head = find_equal(subtable, &rule->match.flow, hash); |
645 | if (!head) { | |
886af6ea JR |
646 | /* Add rule to tries. |
647 | * | |
648 | * Concurrent readers might miss seeing the rule until this update, | |
649 | * which might require being fixed up by revalidation later. */ | |
f47eef15 | 650 | for (i = 0; i < cls->n_tries; i++) { |
13751fd8 JR |
651 | if (subtable->trie_plen[i]) { |
652 | trie_insert(&cls->tries[i], rule, subtable->trie_plen[i]); | |
653 | } | |
654 | } | |
69d6040e | 655 | |
886af6ea | 656 | /* Add rule to ports trie. */ |
69d6040e JR |
657 | if (subtable->ports_mask_len) { |
658 | /* We mask the value to be inserted to always have the wildcarded | |
659 | * bits in known (zero) state, so we can include them in comparison | |
660 | * and they will always match (== their original value does not | |
661 | * matter). */ | |
662 | ovs_be32 masked_ports = minimatch_get_ports(&rule->match); | |
663 | ||
664 | trie_insert_prefix(&subtable->ports_trie, &masked_ports, | |
665 | subtable->ports_mask_len); | |
666 | } | |
886af6ea | 667 | |
f47eef15 | 668 | /* Add rule to partitions. |
886af6ea | 669 | * |
f47eef15 JR |
670 | * Concurrent readers might miss seeing the rule until this update, |
671 | * which might require being fixed up by revalidation later. */ | |
672 | new->partition = NULL; | |
673 | if (minimask_get_metadata_mask(&rule->match.mask) == OVS_BE64_MAX) { | |
674 | ovs_be64 metadata = miniflow_get_metadata(&rule->match.flow); | |
675 | ||
676 | new->partition = create_partition(cls, subtable, metadata); | |
677 | } | |
678 | ||
f47eef15 JR |
679 | /* Add new node to segment indices. |
680 | * | |
681 | * Readers may find the rule in the indices before the rule is visible | |
682 | * in the subtables 'rules' map. This may result in us losing the | |
683 | * opportunity to quit lookups earlier, resulting in sub-optimal | |
684 | * wildcarding. This will be fixed later by revalidation (always | |
685 | * scheduled after flow table changes). */ | |
886af6ea | 686 | for (i = 0; i < subtable->n_indices; i++) { |
f47eef15 JR |
687 | cmap_insert(&subtable->indices[i], &new->index_nodes[i], ihash[i]); |
688 | } | |
689 | n_rules = cmap_insert(&subtable->rules, &new->cmap_node, hash); | |
690 | } else { /* Equal rules exist in the classifier already. */ | |
8f8023b3 | 691 | struct cls_match *prev, *iter; |
f47eef15 JR |
692 | |
693 | /* Scan the list for the insertion point that will keep the list in | |
2b7b1427 JR |
694 | * order of decreasing priority. Insert after rules marked invisible |
695 | * in any version of the same priority. */ | |
8f8023b3 | 696 | FOR_EACH_RULE_IN_LIST_PROTECTED (iter, prev, head) { |
186120da JR |
697 | if (rule->priority > iter->priority |
698 | || (rule->priority == iter->priority | |
2b7b1427 | 699 | && !cls_match_is_eventually_invisible(iter))) { |
f47eef15 JR |
700 | break; |
701 | } | |
886af6ea JR |
702 | } |
703 | ||
8f8023b3 JR |
704 | /* Replace 'iter' with 'new' or insert 'new' between 'prev' and |
705 | * 'iter'. */ | |
f47eef15 JR |
706 | if (iter) { |
707 | struct cls_rule *old; | |
708 | ||
709 | if (rule->priority == iter->priority) { | |
8f8023b3 | 710 | cls_match_replace(prev, iter, new); |
f47eef15 JR |
711 | old = CONST_CAST(struct cls_rule *, iter->cls_rule); |
712 | } else { | |
8f8023b3 | 713 | cls_match_insert(prev, iter, new); |
f47eef15 JR |
714 | old = NULL; |
715 | } | |
716 | ||
717 | /* Replace the existing head in data structures, if rule is the new | |
718 | * head. */ | |
719 | if (iter == head) { | |
720 | subtable_replace_head_rule(cls, subtable, head, new, hash, | |
721 | ihash); | |
722 | } | |
723 | ||
724 | if (old) { | |
18080541 BP |
725 | struct cls_conjunction_set *conj_set; |
726 | ||
727 | conj_set = ovsrcu_get_protected(struct cls_conjunction_set *, | |
728 | &iter->conj_set); | |
729 | if (conj_set) { | |
730 | ovsrcu_postpone(free, conj_set); | |
731 | } | |
732 | ||
8f8023b3 | 733 | ovsrcu_postpone(cls_match_free_cb, iter); |
f47eef15 | 734 | old->cls_match = NULL; |
f2c21402 | 735 | |
f47eef15 JR |
736 | /* No change in subtable's max priority or max count. */ |
737 | ||
2b7b1427 JR |
738 | /* Make 'new' visible to lookups in the appropriate version. */ |
739 | cls_match_set_visibility(new, rule->version); | |
fc02ecc7 JR |
740 | |
741 | /* Make rule visible to iterators (immediately). */ | |
d0999f1b JR |
742 | rculist_replace(CONST_CAST(struct rculist *, &rule->node), |
743 | &old->node); | |
de4ad4a2 | 744 | |
f47eef15 JR |
745 | /* Return displaced rule. Caller is responsible for keeping it |
746 | * around until all threads quiesce. */ | |
f47eef15 JR |
747 | return old; |
748 | } | |
749 | } else { | |
8f8023b3 JR |
750 | /* 'new' is new node after 'prev' */ |
751 | cls_match_insert(prev, iter, new); | |
f47eef15 | 752 | } |
064af421 | 753 | } |
886af6ea | 754 | |
2b7b1427 JR |
755 | /* Make 'new' visible to lookups in the appropriate version. */ |
756 | cls_match_set_visibility(new, rule->version); | |
fc02ecc7 JR |
757 | |
758 | /* Make rule visible to iterators (immediately). */ | |
d0999f1b JR |
759 | rculist_push_back(&subtable->rules_list, |
760 | CONST_CAST(struct rculist *, &rule->node)); | |
de4ad4a2 | 761 | |
f47eef15 JR |
762 | /* Rule was added, not replaced. Update 'subtable's 'max_priority' and |
763 | * 'max_count', if necessary. | |
764 | * | |
765 | * The rule was already inserted, but concurrent readers may not see the | |
766 | * rule yet as the subtables vector is not updated yet. This will have to | |
767 | * be fixed by revalidation later. */ | |
768 | if (n_rules == 1) { | |
769 | subtable->max_priority = rule->priority; | |
770 | subtable->max_count = 1; | |
771 | pvector_insert(&cls->subtables, subtable, rule->priority); | |
772 | } else if (rule->priority == subtable->max_priority) { | |
773 | ++subtable->max_count; | |
774 | } else if (rule->priority > subtable->max_priority) { | |
775 | subtable->max_priority = rule->priority; | |
776 | subtable->max_count = 1; | |
777 | pvector_change_priority(&cls->subtables, subtable, rule->priority); | |
778 | } | |
779 | ||
780 | /* Nothing was replaced. */ | |
781 | cls->n_rules++; | |
802f84ff JR |
782 | |
783 | if (cls->publish) { | |
784 | pvector_publish(&cls->subtables); | |
785 | } | |
786 | ||
f47eef15 | 787 | return NULL; |
064af421 BP |
788 | } |
789 | ||
08944c1d BP |
790 | /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller |
791 | * must not modify or free it. | |
792 | * | |
793 | * 'cls' must not contain an identical rule (including wildcards, values of | |
794 | * fixed fields, and priority). Use classifier_find_rule_exactly() to find | |
795 | * such a rule. */ | |
796 | void | |
18080541 BP |
797 | classifier_insert(struct classifier *cls, const struct cls_rule *rule, |
798 | const struct cls_conjunction conj[], size_t n_conj) | |
08944c1d | 799 | { |
18080541 BP |
800 | const struct cls_rule *displaced_rule |
801 | = classifier_replace(cls, rule, conj, n_conj); | |
cb22974d | 802 | ovs_assert(!displaced_rule); |
08944c1d BP |
803 | } |
804 | ||
48d28ac1 BP |
805 | /* Removes 'rule' from 'cls'. It is the caller's responsibility to destroy |
806 | * 'rule' with cls_rule_destroy(), freeing the memory block in which 'rule' | |
747f140a JR |
807 | * resides, etc., as necessary. |
808 | * | |
809 | * Does nothing if 'rule' has been already removed, or was never inserted. | |
810 | * | |
811 | * Returns the removed rule, or NULL, if it was already removed. | |
812 | */ | |
dfea28b3 | 813 | const struct cls_rule * |
186120da | 814 | classifier_remove(struct classifier *cls, const struct cls_rule *cls_rule) |
064af421 | 815 | { |
8f8023b3 | 816 | struct cls_match *rule, *prev, *next, *head; |
c906cedf | 817 | struct cls_partition *partition; |
18080541 | 818 | struct cls_conjunction_set *conj_set; |
03868246 | 819 | struct cls_subtable *subtable; |
476f36e8 | 820 | int i; |
f2c21402 | 821 | uint32_t basis = 0, hash, ihash[CLS_MAX_INDICES]; |
d70e8c28 | 822 | uint8_t prev_be64ofs = 0; |
f47eef15 | 823 | size_t n_rules; |
064af421 | 824 | |
186120da JR |
825 | rule = cls_rule->cls_match; |
826 | if (!rule) { | |
fccd7c09 | 827 | return NULL; |
747f140a | 828 | } |
f47eef15 | 829 | /* Mark as removed. */ |
186120da | 830 | CONST_CAST(struct cls_rule *, cls_rule)->cls_match = NULL; |
f47eef15 | 831 | |
186120da JR |
832 | /* Remove 'cls_rule' from the subtable's rules list. */ |
833 | rculist_remove(CONST_CAST(struct rculist *, &cls_rule->node)); | |
de4ad4a2 | 834 | |
186120da | 835 | subtable = find_subtable(cls, &cls_rule->match.mask); |
627fb667 JR |
836 | ovs_assert(subtable); |
837 | ||
f47eef15 | 838 | for (i = 0; i < subtable->n_indices; i++) { |
186120da | 839 | ihash[i] = minimatch_hash_range(&cls_rule->match, prev_be64ofs, |
f47eef15 | 840 | subtable->index_ofs[i], &basis); |
d70e8c28 | 841 | prev_be64ofs = subtable->index_ofs[i]; |
f47eef15 | 842 | } |
186120da JR |
843 | hash = minimatch_hash_range(&cls_rule->match, prev_be64ofs, FLOW_U64S, |
844 | &basis); | |
845 | ||
8f8023b3 JR |
846 | head = find_equal(subtable, &cls_rule->match.flow, hash); |
847 | ||
186120da | 848 | /* Check if the rule is not the head rule. */ |
8f8023b3 JR |
849 | if (rule != head) { |
850 | struct cls_match *iter; | |
851 | ||
186120da | 852 | /* Not the head rule, but potentially one with the same priority. */ |
8f8023b3 JR |
853 | /* Remove from the list of equal rules. */ |
854 | FOR_EACH_RULE_IN_LIST_PROTECTED (iter, prev, head) { | |
855 | if (rule == iter) { | |
856 | break; | |
857 | } | |
858 | } | |
859 | ovs_assert(iter == rule); | |
860 | ||
861 | cls_match_remove(prev, rule); | |
862 | ||
186120da JR |
863 | goto check_priority; |
864 | } | |
f47eef15 | 865 | |
186120da JR |
866 | /* 'rule' is the head rule. Check if there is another rule to |
867 | * replace 'rule' in the data structures. */ | |
8f8023b3 JR |
868 | next = cls_match_next_protected(rule); |
869 | if (next) { | |
186120da | 870 | subtable_replace_head_rule(cls, subtable, rule, next, hash, ihash); |
f47eef15 JR |
871 | goto check_priority; |
872 | } | |
873 | ||
874 | /* 'rule' is last of the kind in the classifier, must remove from all the | |
875 | * data structures. */ | |
876 | ||
69d6040e | 877 | if (subtable->ports_mask_len) { |
186120da | 878 | ovs_be32 masked_ports = minimatch_get_ports(&cls_rule->match); |
69d6040e JR |
879 | |
880 | trie_remove_prefix(&subtable->ports_trie, | |
881 | &masked_ports, subtable->ports_mask_len); | |
882 | } | |
13751fd8 JR |
883 | for (i = 0; i < cls->n_tries; i++) { |
884 | if (subtable->trie_plen[i]) { | |
186120da | 885 | trie_remove(&cls->tries[i], cls_rule, subtable->trie_plen[i]); |
13751fd8 JR |
886 | } |
887 | } | |
888 | ||
476f36e8 JR |
889 | /* Remove rule node from indices. */ |
890 | for (i = 0; i < subtable->n_indices; i++) { | |
186120da | 891 | cmap_remove(&subtable->indices[i], &rule->index_nodes[i], ihash[i]); |
b5d97350 | 892 | } |
186120da | 893 | n_rules = cmap_remove(&subtable->rules, &rule->cmap_node, hash); |
064af421 | 894 | |
186120da | 895 | partition = rule->partition; |
183126a1 BP |
896 | if (partition) { |
897 | tag_tracker_subtract(&partition->tracker, &partition->tags, | |
03868246 | 898 | subtable->tag); |
183126a1 | 899 | if (!partition->tags) { |
f2c21402 JR |
900 | cmap_remove(&cls->partitions, &partition->cmap_node, |
901 | hash_metadata(partition->metadata)); | |
902 | ovsrcu_postpone(free, partition); | |
183126a1 | 903 | } |
c906cedf BP |
904 | } |
905 | ||
f47eef15 | 906 | if (n_rules == 0) { |
03868246 | 907 | destroy_subtable(cls, subtable); |
f47eef15 JR |
908 | } else { |
909 | check_priority: | |
910 | if (subtable->max_priority == rule->priority | |
911 | && --subtable->max_count == 0) { | |
912 | /* Find the new 'max_priority' and 'max_count'. */ | |
f47eef15 | 913 | int max_priority = INT_MIN; |
186120da | 914 | struct cls_match *head; |
f47eef15 JR |
915 | |
916 | CMAP_FOR_EACH (head, cmap_node, &subtable->rules) { | |
917 | if (head->priority > max_priority) { | |
918 | max_priority = head->priority; | |
919 | subtable->max_count = 1; | |
920 | } else if (head->priority == max_priority) { | |
921 | ++subtable->max_count; | |
922 | } | |
fe7cfa5c | 923 | } |
f47eef15 JR |
924 | subtable->max_priority = max_priority; |
925 | pvector_change_priority(&cls->subtables, subtable, max_priority); | |
fe7cfa5c | 926 | } |
4d935a6b | 927 | } |
802f84ff JR |
928 | |
929 | if (cls->publish) { | |
930 | pvector_publish(&cls->subtables); | |
931 | } | |
932 | ||
8f8023b3 | 933 | /* free the rule. */ |
18080541 | 934 | conj_set = ovsrcu_get_protected(struct cls_conjunction_set *, |
186120da | 935 | &rule->conj_set); |
18080541 BP |
936 | if (conj_set) { |
937 | ovsrcu_postpone(free, conj_set); | |
938 | } | |
8f8023b3 | 939 | ovsrcu_postpone(cls_match_free_cb, rule); |
f47eef15 | 940 | cls->n_rules--; |
747f140a | 941 | |
186120da | 942 | return cls_rule; |
064af421 BP |
943 | } |
944 | ||
13751fd8 | 945 | /* Prefix tree context. Valid when 'lookup_done' is true. Can skip all |
c0bfb650 JR |
946 | * subtables which have a prefix match on the trie field, but whose prefix |
947 | * length is not indicated in 'match_plens'. For example, a subtable that | |
948 | * has a 8-bit trie field prefix match can be skipped if | |
949 | * !be_get_bit_at(&match_plens, 8 - 1). If skipped, 'maskbits' prefix bits | |
950 | * must be unwildcarded to make datapath flow only match packets it should. */ | |
13751fd8 JR |
951 | struct trie_ctx { |
952 | const struct cls_trie *trie; | |
953 | bool lookup_done; /* Status of the lookup. */ | |
954 | uint8_t be32ofs; /* U32 offset of the field in question. */ | |
13751fd8 | 955 | unsigned int maskbits; /* Prefix length needed to avoid false matches. */ |
c0bfb650 JR |
956 | union mf_value match_plens; /* Bitmask of prefix lengths with possible |
957 | * matches. */ | |
13751fd8 JR |
958 | }; |
959 | ||
960 | static void | |
961 | trie_ctx_init(struct trie_ctx *ctx, const struct cls_trie *trie) | |
962 | { | |
963 | ctx->trie = trie; | |
964 | ctx->be32ofs = trie->field->flow_be32ofs; | |
965 | ctx->lookup_done = false; | |
966 | } | |
967 | ||
18080541 BP |
968 | struct conjunctive_match { |
969 | struct hmap_node hmap_node; | |
970 | uint32_t id; | |
971 | uint64_t clauses; | |
972 | }; | |
973 | ||
974 | static struct conjunctive_match * | |
975 | find_conjunctive_match__(struct hmap *matches, uint64_t id, uint32_t hash) | |
976 | { | |
977 | struct conjunctive_match *m; | |
978 | ||
979 | HMAP_FOR_EACH_IN_BUCKET (m, hmap_node, hash, matches) { | |
980 | if (m->id == id) { | |
981 | return m; | |
982 | } | |
983 | } | |
984 | return NULL; | |
985 | } | |
986 | ||
987 | static bool | |
988 | find_conjunctive_match(const struct cls_conjunction_set *set, | |
989 | unsigned int max_n_clauses, struct hmap *matches, | |
990 | struct conjunctive_match *cm_stubs, size_t n_cm_stubs, | |
991 | uint32_t *idp) | |
992 | { | |
993 | const struct cls_conjunction *c; | |
994 | ||
995 | if (max_n_clauses < set->min_n_clauses) { | |
996 | return false; | |
997 | } | |
998 | ||
999 | for (c = set->conj; c < &set->conj[set->n]; c++) { | |
1000 | struct conjunctive_match *cm; | |
1001 | uint32_t hash; | |
1002 | ||
1003 | if (c->n_clauses > max_n_clauses) { | |
1004 | continue; | |
1005 | } | |
1006 | ||
1007 | hash = hash_int(c->id, 0); | |
1008 | cm = find_conjunctive_match__(matches, c->id, hash); | |
1009 | if (!cm) { | |
1010 | size_t n = hmap_count(matches); | |
1011 | ||
1012 | cm = n < n_cm_stubs ? &cm_stubs[n] : xmalloc(sizeof *cm); | |
1013 | hmap_insert(matches, &cm->hmap_node, hash); | |
1014 | cm->id = c->id; | |
1015 | cm->clauses = UINT64_MAX << (c->n_clauses & 63); | |
1016 | } | |
1017 | cm->clauses |= UINT64_C(1) << c->clause; | |
1018 | if (cm->clauses == UINT64_MAX) { | |
1019 | *idp = cm->id; | |
1020 | return true; | |
1021 | } | |
1022 | } | |
1023 | return false; | |
1024 | } | |
1025 | ||
1026 | static void | |
1027 | free_conjunctive_matches(struct hmap *matches, | |
1028 | struct conjunctive_match *cm_stubs, size_t n_cm_stubs) | |
1029 | { | |
1030 | if (hmap_count(matches) > n_cm_stubs) { | |
1031 | struct conjunctive_match *cm, *next; | |
1032 | ||
1033 | HMAP_FOR_EACH_SAFE (cm, next, hmap_node, matches) { | |
1034 | if (!(cm >= cm_stubs && cm < &cm_stubs[n_cm_stubs])) { | |
1035 | free(cm); | |
1036 | } | |
1037 | } | |
1038 | } | |
1039 | hmap_destroy(matches); | |
1040 | } | |
1041 | ||
1042 | /* Like classifier_lookup(), except that support for conjunctive matches can be | |
1043 | * configured with 'allow_conjunctive_matches'. That feature is not exposed | |
1044 | * externally because turning off conjunctive matches is only useful to avoid | |
1045 | * recursion within this function itself. | |
2e0bded4 BP |
1046 | * |
1047 | * 'flow' is non-const to allow for temporary modifications during the lookup. | |
1048 | * Any changes are restored before returning. */ | |
18080541 | 1049 | static const struct cls_rule * |
2b7b1427 JR |
1050 | classifier_lookup__(const struct classifier *cls, long long version, |
1051 | struct flow *flow, struct flow_wildcards *wc, | |
1052 | bool allow_conjunctive_matches) | |
48c3de13 | 1053 | { |
c906cedf | 1054 | const struct cls_partition *partition; |
fe7cfa5c | 1055 | struct trie_ctx trie_ctx[CLS_MAX_TRIES]; |
18080541 BP |
1056 | const struct cls_match *match; |
1057 | tag_type tags; | |
1058 | ||
1059 | /* Highest-priority flow in 'cls' that certainly matches 'flow'. */ | |
1060 | const struct cls_match *hard = NULL; | |
1061 | int hard_pri = INT_MIN; /* hard ? hard->priority : INT_MIN. */ | |
1062 | ||
1063 | /* Highest-priority conjunctive flows in 'cls' matching 'flow'. Since | |
1064 | * these are (components of) conjunctive flows, we can only know whether | |
1065 | * the full conjunctive flow matches after seeing multiple of them. Thus, | |
1066 | * we refer to these as "soft matches". */ | |
1067 | struct cls_conjunction_set *soft_stub[64]; | |
1068 | struct cls_conjunction_set **soft = soft_stub; | |
1069 | size_t n_soft = 0, allocated_soft = ARRAY_SIZE(soft_stub); | |
1070 | int soft_pri = INT_MIN; /* n_soft ? MAX(soft[*]->priority) : INT_MIN. */ | |
c906cedf | 1071 | |
f358a2cb JR |
1072 | /* Synchronize for cls->n_tries and subtable->trie_plen. They can change |
1073 | * when table configuration changes, which happens typically only on | |
1074 | * startup. */ | |
1075 | atomic_thread_fence(memory_order_acquire); | |
1076 | ||
03868246 JR |
1077 | /* Determine 'tags' such that, if 'subtable->tag' doesn't intersect them, |
1078 | * then 'flow' cannot possibly match in 'subtable': | |
c906cedf BP |
1079 | * |
1080 | * - If flow->metadata maps to a given 'partition', then we can use | |
1081 | * 'tags' for 'partition->tags'. | |
1082 | * | |
1083 | * - If flow->metadata has no partition, then no rule in 'cls' has an | |
1084 | * exact-match for flow->metadata. That means that we don't need to | |
03868246 | 1085 | * search any subtable that includes flow->metadata in its mask. |
c906cedf | 1086 | * |
03868246 | 1087 | * In either case, we always need to search any cls_subtables that do not |
c906cedf | 1088 | * include flow->metadata in its mask. One way to do that would be to |
03868246 JR |
1089 | * check the "cls_subtable"s explicitly for that, but that would require an |
1090 | * extra branch per subtable. Instead, we mark such a cls_subtable's | |
1091 | * 'tags' as TAG_ALL and make sure that 'tags' is never empty. This means | |
1092 | * that 'tags' always intersects such a cls_subtable's 'tags', so we don't | |
1093 | * need a special case. | |
c906cedf | 1094 | */ |
f2c21402 | 1095 | partition = (cmap_is_empty(&cls->partitions) |
c906cedf BP |
1096 | ? NULL |
1097 | : find_partition(cls, flow->metadata, | |
1098 | hash_metadata(flow->metadata))); | |
1099 | tags = partition ? partition->tags : TAG_ARBITRARY; | |
48c3de13 | 1100 | |
ff8241db | 1101 | /* Initialize trie contexts for find_match_wc(). */ |
fe7cfa5c | 1102 | for (int i = 0; i < cls->n_tries; i++) { |
13751fd8 JR |
1103 | trie_ctx_init(&trie_ctx[i], &cls->tries[i]); |
1104 | } | |
ec988646 | 1105 | |
18080541 BP |
1106 | /* Main loop. */ |
1107 | struct cls_subtable *subtable; | |
1108 | PVECTOR_FOR_EACH_PRIORITY (subtable, hard_pri, 2, sizeof *subtable, | |
1109 | &cls->subtables) { | |
1110 | struct cls_conjunction_set *conj_set; | |
c906cedf | 1111 | |
18080541 | 1112 | /* Skip subtables not in our partition. */ |
fe7cfa5c | 1113 | if (!tag_intersects(tags, subtable->tag)) { |
c906cedf BP |
1114 | continue; |
1115 | } | |
74f74083 | 1116 | |
18080541 BP |
1117 | /* Skip subtables with no match, or where the match is lower-priority |
1118 | * than some certain match we've already found. */ | |
2b7b1427 JR |
1119 | match = find_match_wc(subtable, version, flow, trie_ctx, cls->n_tries, |
1120 | wc); | |
18080541 BP |
1121 | if (!match || match->priority <= hard_pri) { |
1122 | continue; | |
1123 | } | |
1124 | ||
1125 | conj_set = ovsrcu_get(struct cls_conjunction_set *, &match->conj_set); | |
1126 | if (!conj_set) { | |
1127 | /* 'match' isn't part of a conjunctive match. It's the best | |
1128 | * certain match we've got so far, since we know that it's | |
1129 | * higher-priority than hard_pri. | |
1130 | * | |
1131 | * (There might be a higher-priority conjunctive match. We can't | |
1132 | * tell yet.) */ | |
1133 | hard = match; | |
1134 | hard_pri = hard->priority; | |
1135 | } else if (allow_conjunctive_matches) { | |
1136 | /* 'match' is part of a conjunctive match. Add it to the list. */ | |
1137 | if (OVS_UNLIKELY(n_soft >= allocated_soft)) { | |
1138 | struct cls_conjunction_set **old_soft = soft; | |
1139 | ||
1140 | allocated_soft *= 2; | |
1141 | soft = xmalloc(allocated_soft * sizeof *soft); | |
1142 | memcpy(soft, old_soft, n_soft * sizeof *soft); | |
1143 | if (old_soft != soft_stub) { | |
1144 | free(old_soft); | |
1145 | } | |
1146 | } | |
1147 | soft[n_soft++] = conj_set; | |
1148 | ||
1149 | /* Keep track of the highest-priority soft match. */ | |
1150 | if (soft_pri < match->priority) { | |
1151 | soft_pri = match->priority; | |
1152 | } | |
b5d97350 | 1153 | } |
48c3de13 | 1154 | } |
13751fd8 | 1155 | |
18080541 BP |
1156 | /* In the common case, at this point we have no soft matches and we can |
1157 | * return immediately. (We do the same thing if we have potential soft | |
1158 | * matches but none of them are higher-priority than our hard match.) */ | |
1159 | if (hard_pri >= soft_pri) { | |
1160 | if (soft != soft_stub) { | |
1161 | free(soft); | |
1162 | } | |
1163 | return hard ? hard->cls_rule : NULL; | |
1164 | } | |
1165 | ||
1166 | /* At this point, we have some soft matches. We might also have a hard | |
1167 | * match; if so, its priority is lower than the highest-priority soft | |
1168 | * match. */ | |
1169 | ||
1170 | /* Soft match loop. | |
1171 | * | |
1172 | * Check whether soft matches are real matches. */ | |
1173 | for (;;) { | |
1174 | /* Delete soft matches that are null. This only happens in second and | |
1175 | * subsequent iterations of the soft match loop, when we drop back from | |
1176 | * a high-priority soft match to a lower-priority one. | |
1177 | * | |
1178 | * Also, delete soft matches whose priority is less than or equal to | |
1179 | * the hard match's priority. In the first iteration of the soft | |
1180 | * match, these can be in 'soft' because the earlier main loop found | |
1181 | * the soft match before the hard match. In second and later iteration | |
1182 | * of the soft match loop, these can be in 'soft' because we dropped | |
1183 | * back from a high-priority soft match to a lower-priority soft match. | |
1184 | * | |
1185 | * It is tempting to delete soft matches that cannot be satisfied | |
1186 | * because there are fewer soft matches than required to satisfy any of | |
1187 | * their conjunctions, but we cannot do that because there might be | |
1188 | * lower priority soft or hard matches with otherwise identical | |
1189 | * matches. (We could special case those here, but there's no | |
1190 | * need--we'll do so at the bottom of the soft match loop anyway and | |
1191 | * this duplicates less code.) | |
1192 | * | |
1193 | * It's also tempting to break out of the soft match loop if 'n_soft == | |
1194 | * 1' but that would also miss lower-priority hard matches. We could | |
1195 | * special case that also but again there's no need. */ | |
1196 | for (int i = 0; i < n_soft; ) { | |
1197 | if (!soft[i] || soft[i]->priority <= hard_pri) { | |
1198 | soft[i] = soft[--n_soft]; | |
1199 | } else { | |
1200 | i++; | |
1201 | } | |
1202 | } | |
1203 | if (!n_soft) { | |
1204 | break; | |
1205 | } | |
1206 | ||
1207 | /* Find the highest priority among the soft matches. (We know this | |
1208 | * must be higher than the hard match's priority; otherwise we would | |
1209 | * have deleted all of the soft matches in the previous loop.) Count | |
1210 | * the number of soft matches that have that priority. */ | |
1211 | soft_pri = INT_MIN; | |
1212 | int n_soft_pri = 0; | |
1213 | for (int i = 0; i < n_soft; i++) { | |
1214 | if (soft[i]->priority > soft_pri) { | |
1215 | soft_pri = soft[i]->priority; | |
1216 | n_soft_pri = 1; | |
1217 | } else if (soft[i]->priority == soft_pri) { | |
1218 | n_soft_pri++; | |
1219 | } | |
1220 | } | |
1221 | ovs_assert(soft_pri > hard_pri); | |
1222 | ||
1223 | /* Look for a real match among the highest-priority soft matches. | |
1224 | * | |
1225 | * It's unusual to have many conjunctive matches, so we use stubs to | |
1226 | * avoid calling malloc() in the common case. An hmap has a built-in | |
1227 | * stub for up to 2 hmap_nodes; possibly, we would benefit a variant | |
1228 | * with a bigger stub. */ | |
1229 | struct conjunctive_match cm_stubs[16]; | |
1230 | struct hmap matches; | |
1231 | ||
1232 | hmap_init(&matches); | |
1233 | for (int i = 0; i < n_soft; i++) { | |
1234 | uint32_t id; | |
1235 | ||
1236 | if (soft[i]->priority == soft_pri | |
1237 | && find_conjunctive_match(soft[i], n_soft_pri, &matches, | |
1238 | cm_stubs, ARRAY_SIZE(cm_stubs), | |
1239 | &id)) { | |
1240 | uint32_t saved_conj_id = flow->conj_id; | |
1241 | const struct cls_rule *rule; | |
1242 | ||
1243 | flow->conj_id = id; | |
2b7b1427 | 1244 | rule = classifier_lookup__(cls, version, flow, wc, false); |
18080541 BP |
1245 | flow->conj_id = saved_conj_id; |
1246 | ||
1247 | if (rule) { | |
1248 | free_conjunctive_matches(&matches, | |
1249 | cm_stubs, ARRAY_SIZE(cm_stubs)); | |
1250 | if (soft != soft_stub) { | |
1251 | free(soft); | |
1252 | } | |
1253 | return rule; | |
1254 | } | |
1255 | } | |
1256 | } | |
1257 | free_conjunctive_matches(&matches, cm_stubs, ARRAY_SIZE(cm_stubs)); | |
1258 | ||
1259 | /* There's no real match among the highest-priority soft matches. | |
1260 | * However, if any of those soft matches has a lower-priority but | |
1261 | * otherwise identical flow match, then we need to consider those for | |
1262 | * soft or hard matches. | |
1263 | * | |
1264 | * The next iteration of the soft match loop will delete any null | |
1265 | * pointers we put into 'soft' (and some others too). */ | |
1266 | for (int i = 0; i < n_soft; i++) { | |
1267 | if (soft[i]->priority != soft_pri) { | |
1268 | continue; | |
1269 | } | |
1270 | ||
1271 | /* Find next-lower-priority flow with identical flow match. */ | |
2b7b1427 | 1272 | match = next_visible_rule_in_list(soft[i]->match, version); |
18080541 BP |
1273 | if (match) { |
1274 | soft[i] = ovsrcu_get(struct cls_conjunction_set *, | |
1275 | &match->conj_set); | |
1276 | if (!soft[i]) { | |
1277 | /* The flow is a hard match; don't treat as a soft | |
1278 | * match. */ | |
1279 | if (match->priority > hard_pri) { | |
1280 | hard = match; | |
1281 | hard_pri = hard->priority; | |
1282 | } | |
1283 | } | |
1284 | } else { | |
1285 | /* No such lower-priority flow (probably the common case). */ | |
1286 | soft[i] = NULL; | |
1287 | } | |
1288 | } | |
1289 | } | |
1290 | ||
1291 | if (soft != soft_stub) { | |
1292 | free(soft); | |
1293 | } | |
1294 | return hard ? hard->cls_rule : NULL; | |
1295 | } | |
1296 | ||
2b7b1427 JR |
1297 | /* Finds and returns the highest-priority rule in 'cls' that matches 'flow' and |
1298 | * that is visible in 'version'. Returns a null pointer if no rules in 'cls' | |
1299 | * match 'flow'. If multiple rules of equal priority match 'flow', returns one | |
1300 | * arbitrarily. | |
18080541 BP |
1301 | * |
1302 | * If a rule is found and 'wc' is non-null, bitwise-OR's 'wc' with the | |
1303 | * set of bits that were significant in the lookup. At some point | |
1304 | * earlier, 'wc' should have been initialized (e.g., by | |
1305 | * flow_wildcards_init_catchall()). | |
1306 | * | |
1307 | * 'flow' is non-const to allow for temporary modifications during the lookup. | |
1308 | * Any changes are restored before returning. */ | |
1309 | const struct cls_rule * | |
2b7b1427 JR |
1310 | classifier_lookup(const struct classifier *cls, long long version, |
1311 | struct flow *flow, struct flow_wildcards *wc) | |
18080541 | 1312 | { |
2b7b1427 | 1313 | return classifier_lookup__(cls, version, flow, wc, true); |
48c3de13 BP |
1314 | } |
1315 | ||
b5d97350 | 1316 | /* Finds and returns a rule in 'cls' with exactly the same priority and |
2b7b1427 JR |
1317 | * matching criteria as 'target', and that is visible in 'target->version. |
1318 | * Only one such rule may ever exist. Returns a null pointer if 'cls' doesn't | |
1319 | * contain an exact match. */ | |
dfea28b3 | 1320 | const struct cls_rule * |
e48eccd1 | 1321 | classifier_find_rule_exactly(const struct classifier *cls, |
76ecc721 | 1322 | const struct cls_rule *target) |
064af421 | 1323 | { |
dfea28b3 JR |
1324 | const struct cls_match *head, *rule; |
1325 | const struct cls_subtable *subtable; | |
064af421 | 1326 | |
03868246 | 1327 | subtable = find_subtable(cls, &target->match.mask); |
0722ee5c | 1328 | if (!subtable) { |
98abae4a | 1329 | return NULL; |
4d935a6b JR |
1330 | } |
1331 | ||
03868246 | 1332 | head = find_equal(subtable, &target->match.flow, |
5cb7a798 BP |
1333 | miniflow_hash_in_minimask(&target->match.flow, |
1334 | &target->match.mask, 0)); | |
98abae4a JR |
1335 | if (!head) { |
1336 | return NULL; | |
1337 | } | |
8f8023b3 | 1338 | CLS_MATCH_FOR_EACH (rule, head) { |
186120da JR |
1339 | if (rule->priority < target->priority) { |
1340 | break; /* Not found. */ | |
1341 | } | |
1342 | if (rule->priority == target->priority | |
2b7b1427 | 1343 | && cls_match_visible_in_version(rule, target->version)) { |
186120da | 1344 | return rule->cls_rule; |
064af421 BP |
1345 | } |
1346 | } | |
1347 | return NULL; | |
1348 | } | |
1349 | ||
81a76618 | 1350 | /* Finds and returns a rule in 'cls' with priority 'priority' and exactly the |
2b7b1427 JR |
1351 | * same matching criteria as 'target', and that is visible in 'version'. |
1352 | * Returns a null pointer if 'cls' doesn't contain an exact match visible in | |
1353 | * 'version'. */ | |
dfea28b3 | 1354 | const struct cls_rule * |
81a76618 | 1355 | classifier_find_match_exactly(const struct classifier *cls, |
2b7b1427 JR |
1356 | const struct match *target, int priority, |
1357 | long long version) | |
81a76618 | 1358 | { |
dfea28b3 | 1359 | const struct cls_rule *retval; |
81a76618 BP |
1360 | struct cls_rule cr; |
1361 | ||
2b7b1427 | 1362 | cls_rule_init(&cr, target, priority, version); |
81a76618 | 1363 | retval = classifier_find_rule_exactly(cls, &cr); |
48d28ac1 | 1364 | cls_rule_destroy(&cr); |
81a76618 BP |
1365 | |
1366 | return retval; | |
1367 | } | |
1368 | ||
faa50f40 BP |
1369 | /* Checks if 'target' would overlap any other rule in 'cls'. Two rules are |
1370 | * considered to overlap if both rules have the same priority and a packet | |
2b7b1427 | 1371 | * could match both, and if both rules are visible in the same version. |
de4ad4a2 JR |
1372 | * |
1373 | * A trivial example of overlapping rules is two rules matching disjoint sets | |
1374 | * of fields. E.g., if one rule matches only on port number, while another only | |
1375 | * on dl_type, any packet from that specific port and with that specific | |
2b7b1427 | 1376 | * dl_type could match both, if the rules also have the same priority. */ |
49bdc010 | 1377 | bool |
e48eccd1 | 1378 | classifier_rule_overlaps(const struct classifier *cls, |
faa50f40 | 1379 | const struct cls_rule *target) |
49bdc010 | 1380 | { |
03868246 | 1381 | struct cls_subtable *subtable; |
49bdc010 | 1382 | |
03868246 | 1383 | /* Iterate subtables in the descending max priority order. */ |
eb391b76 | 1384 | PVECTOR_FOR_EACH_PRIORITY (subtable, target->priority - 1, 2, |
fe7cfa5c | 1385 | sizeof(struct cls_subtable), &cls->subtables) { |
d70e8c28 | 1386 | uint64_t storage[FLOW_U64S]; |
5cb7a798 | 1387 | struct minimask mask; |
de4ad4a2 | 1388 | const struct cls_rule *rule; |
49bdc010 | 1389 | |
03868246 | 1390 | minimask_combine(&mask, &target->match.mask, &subtable->mask, storage); |
49bdc010 | 1391 | |
de4ad4a2 JR |
1392 | RCULIST_FOR_EACH (rule, node, &subtable->rules_list) { |
1393 | if (rule->priority == target->priority | |
1394 | && miniflow_equal_in_minimask(&target->match.flow, | |
2b7b1427 JR |
1395 | &rule->match.flow, &mask) |
1396 | && cls_match_visible_in_version(rule->cls_match, | |
1397 | target->version)) { | |
de4ad4a2 | 1398 | return true; |
49bdc010 JP |
1399 | } |
1400 | } | |
1401 | } | |
49bdc010 JP |
1402 | return false; |
1403 | } | |
6ceeaa92 BP |
1404 | |
1405 | /* Returns true if 'rule' exactly matches 'criteria' or if 'rule' is more | |
1406 | * specific than 'criteria'. That is, 'rule' matches 'criteria' and this | |
1407 | * function returns true if, for every field: | |
1408 | * | |
1409 | * - 'criteria' and 'rule' specify the same (non-wildcarded) value for the | |
1410 | * field, or | |
1411 | * | |
1412 | * - 'criteria' wildcards the field, | |
1413 | * | |
1414 | * Conversely, 'rule' does not match 'criteria' and this function returns false | |
1415 | * if, for at least one field: | |
1416 | * | |
1417 | * - 'criteria' and 'rule' specify different values for the field, or | |
1418 | * | |
1419 | * - 'criteria' specifies a value for the field but 'rule' wildcards it. | |
1420 | * | |
1421 | * Equivalently, the truth table for whether a field matches is: | |
1422 | * | |
1423 | * rule | |
1424 | * | |
1425 | * c wildcard exact | |
1426 | * r +---------+---------+ | |
1427 | * i wild | yes | yes | | |
1428 | * t card | | | | |
1429 | * e +---------+---------+ | |
1430 | * r exact | no |if values| | |
1431 | * i | |are equal| | |
1432 | * a +---------+---------+ | |
1433 | * | |
1434 | * This is the matching rule used by OpenFlow 1.0 non-strict OFPT_FLOW_MOD | |
1435 | * commands and by OpenFlow 1.0 aggregate and flow stats. | |
1436 | * | |
81a76618 | 1437 | * Ignores rule->priority. */ |
6ceeaa92 BP |
1438 | bool |
1439 | cls_rule_is_loose_match(const struct cls_rule *rule, | |
5cb7a798 | 1440 | const struct minimatch *criteria) |
6ceeaa92 | 1441 | { |
5cb7a798 BP |
1442 | return (!minimask_has_extra(&rule->match.mask, &criteria->mask) |
1443 | && miniflow_equal_in_minimask(&rule->match.flow, &criteria->flow, | |
1444 | &criteria->mask)); | |
6ceeaa92 | 1445 | } |
b5d97350 | 1446 | \f |
5ecc9d81 BP |
1447 | /* Iteration. */ |
1448 | ||
2b7b1427 JR |
1449 | /* Rule may only match a target if it is visible in target's version. For NULL |
1450 | * target we only return rules that are not invisible in any version. */ | |
5ecc9d81 | 1451 | static bool |
de4ad4a2 | 1452 | rule_matches(const struct cls_rule *rule, const struct cls_rule *target) |
5ecc9d81 | 1453 | { |
2b7b1427 JR |
1454 | /* Iterators never see duplicate rules with the same priority. */ |
1455 | return target | |
1456 | ? (miniflow_equal_in_minimask(&rule->match.flow, &target->match.flow, | |
1457 | &target->match.mask) | |
1458 | && cls_match_visible_in_version(rule->cls_match, target->version)) | |
1459 | : !cls_match_is_eventually_invisible(rule->cls_match); | |
5ecc9d81 BP |
1460 | } |
1461 | ||
de4ad4a2 | 1462 | static const struct cls_rule * |
03868246 | 1463 | search_subtable(const struct cls_subtable *subtable, |
f2c21402 | 1464 | struct cls_cursor *cursor) |
5ecc9d81 | 1465 | { |
f2c21402 JR |
1466 | if (!cursor->target |
1467 | || !minimask_has_extra(&subtable->mask, &cursor->target->match.mask)) { | |
de4ad4a2 | 1468 | const struct cls_rule *rule; |
5ecc9d81 | 1469 | |
de4ad4a2 | 1470 | RCULIST_FOR_EACH (rule, node, &subtable->rules_list) { |
f2c21402 | 1471 | if (rule_matches(rule, cursor->target)) { |
5ecc9d81 BP |
1472 | return rule; |
1473 | } | |
1474 | } | |
1475 | } | |
1476 | return NULL; | |
1477 | } | |
1478 | ||
5f0476ce JR |
1479 | /* Initializes 'cursor' for iterating through rules in 'cls', and returns the |
1480 | * first matching cls_rule via '*pnode', or NULL if there are no matches. | |
5ecc9d81 | 1481 | * |
2b7b1427 JR |
1482 | * - If 'target' is null, or if the 'target' is a catchall target and the |
1483 | * target's version is CLS_NO_VERSION, the cursor will visit every rule | |
1484 | * in 'cls' that is not invisible in any version. | |
5ecc9d81 | 1485 | * |
6ceeaa92 | 1486 | * - If 'target' is nonnull, the cursor will visit each 'rule' in 'cls' |
2b7b1427 JR |
1487 | * such that cls_rule_is_loose_match(rule, target) returns true and that |
1488 | * the rule is visible in 'target->version'. | |
5ecc9d81 | 1489 | * |
6ceeaa92 | 1490 | * Ignores target->priority. */ |
186120da JR |
1491 | struct cls_cursor |
1492 | cls_cursor_start(const struct classifier *cls, const struct cls_rule *target) | |
5ecc9d81 | 1493 | { |
5f0476ce | 1494 | struct cls_cursor cursor; |
03868246 | 1495 | struct cls_subtable *subtable; |
5ecc9d81 | 1496 | |
e48eccd1 | 1497 | cursor.cls = cls; |
2b7b1427 JR |
1498 | cursor.target = target && (!cls_rule_is_catchall(target) |
1499 | || target->version != CLS_MAX_VERSION) | |
1500 | ? target : NULL; | |
78c8df12 | 1501 | cursor.rule = NULL; |
5f0476ce JR |
1502 | |
1503 | /* Find first rule. */ | |
de4ad4a2 JR |
1504 | PVECTOR_CURSOR_FOR_EACH (subtable, &cursor.subtables, |
1505 | &cursor.cls->subtables) { | |
1506 | const struct cls_rule *rule = search_subtable(subtable, &cursor); | |
f2c21402 | 1507 | |
5ecc9d81 | 1508 | if (rule) { |
5f0476ce | 1509 | cursor.subtable = subtable; |
de4ad4a2 | 1510 | cursor.rule = rule; |
5f0476ce | 1511 | break; |
5ecc9d81 BP |
1512 | } |
1513 | } | |
1514 | ||
5f0476ce JR |
1515 | return cursor; |
1516 | } | |
1517 | ||
dfea28b3 | 1518 | static const struct cls_rule * |
1caa1561 | 1519 | cls_cursor_next(struct cls_cursor *cursor) |
5ecc9d81 | 1520 | { |
de4ad4a2 | 1521 | const struct cls_rule *rule; |
03868246 | 1522 | const struct cls_subtable *subtable; |
5ecc9d81 | 1523 | |
de4ad4a2 JR |
1524 | rule = cursor->rule; |
1525 | subtable = cursor->subtable; | |
1526 | RCULIST_FOR_EACH_CONTINUE (rule, node, &subtable->rules_list) { | |
5ecc9d81 | 1527 | if (rule_matches(rule, cursor->target)) { |
de4ad4a2 | 1528 | return rule; |
5ecc9d81 BP |
1529 | } |
1530 | } | |
1531 | ||
de4ad4a2 | 1532 | PVECTOR_CURSOR_FOR_EACH_CONTINUE (subtable, &cursor->subtables) { |
f2c21402 | 1533 | rule = search_subtable(subtable, cursor); |
5ecc9d81 | 1534 | if (rule) { |
03868246 | 1535 | cursor->subtable = subtable; |
de4ad4a2 | 1536 | return rule; |
5ecc9d81 BP |
1537 | } |
1538 | } | |
1539 | ||
1caa1561 BP |
1540 | return NULL; |
1541 | } | |
1542 | ||
1543 | /* Sets 'cursor->rule' to the next matching cls_rule in 'cursor''s iteration, | |
1544 | * or to null if all matching rules have been visited. */ | |
1545 | void | |
1546 | cls_cursor_advance(struct cls_cursor *cursor) | |
1caa1561 | 1547 | { |
1caa1561 | 1548 | cursor->rule = cls_cursor_next(cursor); |
5ecc9d81 BP |
1549 | } |
1550 | \f | |
03868246 | 1551 | static struct cls_subtable * |
e48eccd1 | 1552 | find_subtable(const struct classifier *cls, const struct minimask *mask) |
b5d97350 | 1553 | { |
03868246 | 1554 | struct cls_subtable *subtable; |
064af421 | 1555 | |
f2c21402 | 1556 | CMAP_FOR_EACH_WITH_HASH (subtable, cmap_node, minimask_hash(mask, 0), |
5a87054c | 1557 | &cls->subtables_map) { |
03868246 JR |
1558 | if (minimask_equal(mask, &subtable->mask)) { |
1559 | return subtable; | |
064af421 BP |
1560 | } |
1561 | } | |
b5d97350 | 1562 | return NULL; |
064af421 | 1563 | } |
064af421 | 1564 | |
e65413ab | 1565 | /* The new subtable will be visible to the readers only after this. */ |
03868246 | 1566 | static struct cls_subtable * |
e48eccd1 | 1567 | insert_subtable(struct classifier *cls, const struct minimask *mask) |
b5d97350 | 1568 | { |
c906cedf | 1569 | uint32_t hash = minimask_hash(mask, 0); |
03868246 | 1570 | struct cls_subtable *subtable; |
476f36e8 JR |
1571 | int i, index = 0; |
1572 | struct flow_wildcards old, new; | |
1573 | uint8_t prev; | |
3016f3e4 | 1574 | int count = count_1bits(mask->masks.map); |
064af421 | 1575 | |
3016f3e4 JR |
1576 | subtable = xzalloc(sizeof *subtable - sizeof mask->masks.inline_values |
1577 | + MINIFLOW_VALUES_SIZE(count)); | |
f2c21402 | 1578 | cmap_init(&subtable->rules); |
f80028fe JR |
1579 | miniflow_clone_inline(CONST_CAST(struct miniflow *, &subtable->mask.masks), |
1580 | &mask->masks, count); | |
476f36e8 JR |
1581 | |
1582 | /* Init indices for segmented lookup, if any. */ | |
1583 | flow_wildcards_init_catchall(&new); | |
1584 | old = new; | |
1585 | prev = 0; | |
1586 | for (i = 0; i < cls->n_flow_segments; i++) { | |
1587 | flow_wildcards_fold_minimask_range(&new, mask, prev, | |
1588 | cls->flow_segments[i]); | |
1589 | /* Add an index if it adds mask bits. */ | |
1590 | if (!flow_wildcards_equal(&new, &old)) { | |
f2c21402 | 1591 | cmap_init(&subtable->indices[index]); |
f80028fe JR |
1592 | *CONST_CAST(uint8_t *, &subtable->index_ofs[index]) |
1593 | = cls->flow_segments[i]; | |
476f36e8 JR |
1594 | index++; |
1595 | old = new; | |
1596 | } | |
1597 | prev = cls->flow_segments[i]; | |
1598 | } | |
1599 | /* Check if the rest of the subtable's mask adds any bits, | |
1600 | * and remove the last index if it doesn't. */ | |
1601 | if (index > 0) { | |
d70e8c28 | 1602 | flow_wildcards_fold_minimask_range(&new, mask, prev, FLOW_U64S); |
476f36e8 JR |
1603 | if (flow_wildcards_equal(&new, &old)) { |
1604 | --index; | |
f80028fe | 1605 | *CONST_CAST(uint8_t *, &subtable->index_ofs[index]) = 0; |
f2c21402 | 1606 | cmap_destroy(&subtable->indices[index]); |
476f36e8 JR |
1607 | } |
1608 | } | |
f80028fe | 1609 | *CONST_CAST(uint8_t *, &subtable->n_indices) = index; |
476f36e8 | 1610 | |
f80028fe JR |
1611 | *CONST_CAST(tag_type *, &subtable->tag) = |
1612 | (minimask_get_metadata_mask(mask) == OVS_BE64_MAX | |
1613 | ? tag_create_deterministic(hash) | |
1614 | : TAG_ALL); | |
064af421 | 1615 | |
13751fd8 JR |
1616 | for (i = 0; i < cls->n_tries; i++) { |
1617 | subtable->trie_plen[i] = minimask_get_prefix_len(mask, | |
1618 | cls->tries[i].field); | |
1619 | } | |
1620 | ||
69d6040e | 1621 | /* Ports trie. */ |
f358a2cb | 1622 | ovsrcu_set_hidden(&subtable->ports_trie, NULL); |
f80028fe | 1623 | *CONST_CAST(int *, &subtable->ports_mask_len) |
69d6040e JR |
1624 | = 32 - ctz32(ntohl(MINIFLOW_GET_BE32(&mask->masks, tp_src))); |
1625 | ||
de4ad4a2 JR |
1626 | /* List of rules. */ |
1627 | rculist_init(&subtable->rules_list); | |
1628 | ||
f2c21402 | 1629 | cmap_insert(&cls->subtables_map, &subtable->cmap_node, hash); |
ec988646 | 1630 | |
03868246 | 1631 | return subtable; |
064af421 BP |
1632 | } |
1633 | ||
01c0f83a | 1634 | /* RCU readers may still access the subtable before it is actually freed. */ |
b5d97350 | 1635 | static void |
e48eccd1 | 1636 | destroy_subtable(struct classifier *cls, struct cls_subtable *subtable) |
b5d97350 | 1637 | { |
476f36e8 JR |
1638 | int i; |
1639 | ||
fe7cfa5c | 1640 | pvector_remove(&cls->subtables, subtable); |
01c0f83a JR |
1641 | cmap_remove(&cls->subtables_map, &subtable->cmap_node, |
1642 | minimask_hash(&subtable->mask, 0)); | |
1643 | ||
1644 | ovs_assert(ovsrcu_get_protected(struct trie_node *, &subtable->ports_trie) | |
1645 | == NULL); | |
1646 | ovs_assert(cmap_is_empty(&subtable->rules)); | |
de4ad4a2 | 1647 | ovs_assert(rculist_is_empty(&subtable->rules_list)); |
69d6040e | 1648 | |
476f36e8 | 1649 | for (i = 0; i < subtable->n_indices; i++) { |
f2c21402 | 1650 | cmap_destroy(&subtable->indices[i]); |
476f36e8 | 1651 | } |
f2c21402 | 1652 | cmap_destroy(&subtable->rules); |
fe7cfa5c | 1653 | ovsrcu_postpone(free, subtable); |
4aacd02d BP |
1654 | } |
1655 | ||
13751fd8 JR |
1656 | struct range { |
1657 | uint8_t start; | |
1658 | uint8_t end; | |
1659 | }; | |
1660 | ||
c0bfb650 JR |
1661 | static unsigned int be_get_bit_at(const ovs_be32 value[], unsigned int ofs); |
1662 | ||
13751fd8 JR |
1663 | /* Return 'true' if can skip rest of the subtable based on the prefix trie |
1664 | * lookup results. */ | |
1665 | static inline bool | |
1666 | check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries, | |
1667 | const unsigned int field_plen[CLS_MAX_TRIES], | |
1668 | const struct range ofs, const struct flow *flow, | |
1669 | struct flow_wildcards *wc) | |
1670 | { | |
1671 | int j; | |
1672 | ||
1673 | /* Check if we could avoid fully unwildcarding the next level of | |
1674 | * fields using the prefix tries. The trie checks are done only as | |
1675 | * needed to avoid folding in additional bits to the wildcards mask. */ | |
1676 | for (j = 0; j < n_tries; j++) { | |
1677 | /* Is the trie field relevant for this subtable? */ | |
1678 | if (field_plen[j]) { | |
1679 | struct trie_ctx *ctx = &trie_ctx[j]; | |
1680 | uint8_t be32ofs = ctx->be32ofs; | |
d70e8c28 | 1681 | uint8_t be64ofs = be32ofs / 2; |
13751fd8 JR |
1682 | |
1683 | /* Is the trie field within the current range of fields? */ | |
d70e8c28 | 1684 | if (be64ofs >= ofs.start && be64ofs < ofs.end) { |
13751fd8 JR |
1685 | /* On-demand trie lookup. */ |
1686 | if (!ctx->lookup_done) { | |
c0bfb650 JR |
1687 | memset(&ctx->match_plens, 0, sizeof ctx->match_plens); |
1688 | ctx->maskbits = trie_lookup(ctx->trie, flow, | |
1689 | &ctx->match_plens); | |
13751fd8 JR |
1690 | ctx->lookup_done = true; |
1691 | } | |
1692 | /* Possible to skip the rest of the subtable if subtable's | |
c0bfb650 JR |
1693 | * prefix on the field is not included in the lookup result. */ |
1694 | if (!be_get_bit_at(&ctx->match_plens.be32, field_plen[j] - 1)) { | |
1817dcea JR |
1695 | /* We want the trie lookup to never result in unwildcarding |
1696 | * any bits that would not be unwildcarded otherwise. | |
1697 | * Since the trie is shared by the whole classifier, it is | |
1698 | * possible that the 'maskbits' contain bits that are | |
1699 | * irrelevant for the partition relevant for the current | |
1700 | * packet. Hence the checks below. */ | |
13751fd8 | 1701 | |
13751fd8 | 1702 | /* Check that the trie result will not unwildcard more bits |
1817dcea | 1703 | * than this subtable would otherwise. */ |
13751fd8 JR |
1704 | if (ctx->maskbits <= field_plen[j]) { |
1705 | /* Unwildcard the bits and skip the rest. */ | |
1706 | mask_set_prefix_bits(wc, be32ofs, ctx->maskbits); | |
1707 | /* Note: Prerequisite already unwildcarded, as the only | |
1708 | * prerequisite of the supported trie lookup fields is | |
1817dcea JR |
1709 | * the ethertype, which is always unwildcarded. */ |
1710 | return true; | |
1711 | } | |
1712 | /* Can skip if the field is already unwildcarded. */ | |
1713 | if (mask_prefix_bits_set(wc, be32ofs, ctx->maskbits)) { | |
13751fd8 JR |
1714 | return true; |
1715 | } | |
1716 | } | |
1717 | } | |
1718 | } | |
1719 | } | |
1720 | return false; | |
1721 | } | |
1722 | ||
3016f3e4 JR |
1723 | /* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit |
1724 | * for which 'flow', for which 'mask' has a bit set, specifies a particular | |
1725 | * value has the correct value in 'target'. | |
1726 | * | |
1727 | * This function is equivalent to miniflow_equal_flow_in_minimask(flow, | |
a64759f0 JR |
1728 | * target, mask) but this is faster because of the invariant that |
1729 | * flow->map and mask->masks.map are the same, and that this version | |
1730 | * takes the 'wc'. */ | |
3016f3e4 JR |
1731 | static inline bool |
1732 | miniflow_and_mask_matches_flow(const struct miniflow *flow, | |
1733 | const struct minimask *mask, | |
e9319757 | 1734 | const struct flow *target) |
3016f3e4 | 1735 | { |
d70e8c28 JR |
1736 | const uint64_t *flowp = miniflow_get_values(flow); |
1737 | const uint64_t *maskp = miniflow_get_values(&mask->masks); | |
1cea007c | 1738 | int idx; |
3016f3e4 | 1739 | |
a64759f0 | 1740 | MAP_FOR_EACH_INDEX(idx, mask->masks.map) { |
d70e8c28 | 1741 | uint64_t diff = (*flowp++ ^ flow_u64_value(target, idx)) & *maskp++; |
a64759f0 JR |
1742 | |
1743 | if (diff) { | |
3016f3e4 JR |
1744 | return false; |
1745 | } | |
1746 | } | |
1747 | ||
1748 | return true; | |
1749 | } | |
1750 | ||
dfea28b3 | 1751 | static inline const struct cls_match * |
2b7b1427 JR |
1752 | find_match(const struct cls_subtable *subtable, long long version, |
1753 | const struct flow *flow, uint32_t hash) | |
b5d97350 | 1754 | { |
fc02ecc7 | 1755 | const struct cls_match *head, *rule; |
b5d97350 | 1756 | |
fc02ecc7 JR |
1757 | CMAP_FOR_EACH_WITH_HASH (head, cmap_node, hash, &subtable->rules) { |
1758 | if (OVS_LIKELY(miniflow_and_mask_matches_flow(&head->flow, | |
1759 | &subtable->mask, | |
1760 | flow))) { | |
1761 | /* Return highest priority rule that is visible. */ | |
8f8023b3 | 1762 | CLS_MATCH_FOR_EACH (rule, head) { |
2b7b1427 | 1763 | if (OVS_LIKELY(cls_match_visible_in_version(rule, version))) { |
fc02ecc7 JR |
1764 | return rule; |
1765 | } | |
1766 | } | |
064af421 BP |
1767 | } |
1768 | } | |
c23740be | 1769 | |
064af421 BP |
1770 | return NULL; |
1771 | } | |
1772 | ||
e9319757 JR |
1773 | /* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit |
1774 | * for which 'flow', for which 'mask' has a bit set, specifies a particular | |
1775 | * value has the correct value in 'target'. | |
1776 | * | |
1777 | * This function is equivalent to miniflow_and_mask_matches_flow() but this | |
1778 | * version fills in the mask bits in 'wc'. */ | |
1779 | static inline bool | |
1780 | miniflow_and_mask_matches_flow_wc(const struct miniflow *flow, | |
1781 | const struct minimask *mask, | |
1782 | const struct flow *target, | |
1783 | struct flow_wildcards *wc) | |
1784 | { | |
d70e8c28 JR |
1785 | const uint64_t *flowp = miniflow_get_values(flow); |
1786 | const uint64_t *maskp = miniflow_get_values(&mask->masks); | |
1cea007c | 1787 | int idx; |
e9319757 JR |
1788 | |
1789 | MAP_FOR_EACH_INDEX(idx, mask->masks.map) { | |
d70e8c28 JR |
1790 | uint64_t mask = *maskp++; |
1791 | uint64_t diff = (*flowp++ ^ flow_u64_value(target, idx)) & mask; | |
e9319757 JR |
1792 | |
1793 | if (diff) { | |
1794 | /* Only unwildcard if none of the differing bits is already | |
1795 | * exact-matched. */ | |
d70e8c28 | 1796 | if (!(flow_u64_value(&wc->masks, idx) & diff)) { |
66e1d955 JR |
1797 | /* Keep one bit of the difference. The selected bit may be |
1798 | * different in big-endian v.s. little-endian systems. */ | |
d70e8c28 | 1799 | *flow_u64_lvalue(&wc->masks, idx) |= rightmost_1bit(diff); |
e9319757 JR |
1800 | } |
1801 | return false; | |
1802 | } | |
1803 | /* Fill in the bits that were looked at. */ | |
d70e8c28 | 1804 | *flow_u64_lvalue(&wc->masks, idx) |= mask; |
e9319757 JR |
1805 | } |
1806 | ||
1807 | return true; | |
1808 | } | |
1809 | ||
386cb9f7 JR |
1810 | /* Unwildcard the fields looked up so far, if any. */ |
1811 | static void | |
1812 | fill_range_wc(const struct cls_subtable *subtable, struct flow_wildcards *wc, | |
1813 | uint8_t to) | |
1814 | { | |
1815 | if (to) { | |
1816 | flow_wildcards_fold_minimask_range(wc, &subtable->mask, 0, to); | |
1817 | } | |
1818 | } | |
1819 | ||
dfea28b3 | 1820 | static const struct cls_match * |
2b7b1427 JR |
1821 | find_match_wc(const struct cls_subtable *subtable, long long version, |
1822 | const struct flow *flow, struct trie_ctx trie_ctx[CLS_MAX_TRIES], | |
1823 | unsigned int n_tries, struct flow_wildcards *wc) | |
476f36e8 JR |
1824 | { |
1825 | uint32_t basis = 0, hash; | |
dfea28b3 | 1826 | const struct cls_match *rule = NULL; |
476f36e8 | 1827 | int i; |
13751fd8 | 1828 | struct range ofs; |
476f36e8 | 1829 | |
ec988646 | 1830 | if (OVS_UNLIKELY(!wc)) { |
2b7b1427 | 1831 | return find_match(subtable, version, flow, |
476f36e8 JR |
1832 | flow_hash_in_minimask(flow, &subtable->mask, 0)); |
1833 | } | |
1834 | ||
13751fd8 | 1835 | ofs.start = 0; |
476f36e8 JR |
1836 | /* Try to finish early by checking fields in segments. */ |
1837 | for (i = 0; i < subtable->n_indices; i++) { | |
55847abe | 1838 | const struct cmap_node *inode; |
f2c21402 | 1839 | |
13751fd8 | 1840 | ofs.end = subtable->index_ofs[i]; |
476f36e8 | 1841 | |
13751fd8 JR |
1842 | if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow, |
1843 | wc)) { | |
386cb9f7 JR |
1844 | /* 'wc' bits for the trie field set, now unwildcard the preceding |
1845 | * bits used so far. */ | |
1846 | fill_range_wc(subtable, wc, ofs.start); | |
1847 | return NULL; | |
13751fd8 JR |
1848 | } |
1849 | hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start, | |
1850 | ofs.end, &basis); | |
f2c21402 | 1851 | inode = cmap_find(&subtable->indices[i], hash); |
476f36e8 | 1852 | if (!inode) { |
386cb9f7 JR |
1853 | /* No match, can stop immediately, but must fold in the bits |
1854 | * used in lookup so far. */ | |
1855 | fill_range_wc(subtable, wc, ofs.end); | |
1856 | return NULL; | |
476f36e8 JR |
1857 | } |
1858 | ||
1859 | /* If we have narrowed down to a single rule already, check whether | |
a64759f0 | 1860 | * that rule matches. Either way, we're done. |
476f36e8 JR |
1861 | * |
1862 | * (Rare) hash collisions may cause us to miss the opportunity for this | |
1863 | * optimization. */ | |
f2c21402 | 1864 | if (!cmap_node_next(inode)) { |
fc02ecc7 JR |
1865 | const struct cls_match *head; |
1866 | ||
1867 | ASSIGN_CONTAINER(head, inode - i, index_nodes); | |
1868 | if (miniflow_and_mask_matches_flow_wc(&head->flow, &subtable->mask, | |
e9319757 | 1869 | flow, wc)) { |
fc02ecc7 | 1870 | /* Return highest priority rule that is visible. */ |
8f8023b3 | 1871 | CLS_MATCH_FOR_EACH (rule, head) { |
2b7b1427 JR |
1872 | if (OVS_LIKELY(cls_match_visible_in_version(rule, |
1873 | version))) { | |
fc02ecc7 JR |
1874 | return rule; |
1875 | } | |
1876 | } | |
476f36e8 | 1877 | } |
e9319757 | 1878 | return NULL; |
476f36e8 | 1879 | } |
386cb9f7 | 1880 | ofs.start = ofs.end; |
476f36e8 | 1881 | } |
d70e8c28 | 1882 | ofs.end = FLOW_U64S; |
13751fd8 JR |
1883 | /* Trie check for the final range. */ |
1884 | if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow, wc)) { | |
386cb9f7 JR |
1885 | fill_range_wc(subtable, wc, ofs.start); |
1886 | return NULL; | |
13751fd8 | 1887 | } |
a64759f0 JR |
1888 | hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start, |
1889 | ofs.end, &basis); | |
2b7b1427 | 1890 | rule = find_match(subtable, version, flow, hash); |
69d6040e JR |
1891 | if (!rule && subtable->ports_mask_len) { |
1892 | /* Ports are always part of the final range, if any. | |
1893 | * No match was found for the ports. Use the ports trie to figure out | |
1894 | * which ports bits to unwildcard. */ | |
1895 | unsigned int mbits; | |
c0bfb650 | 1896 | ovs_be32 value, plens, mask; |
69d6040e JR |
1897 | |
1898 | mask = MINIFLOW_GET_BE32(&subtable->mask.masks, tp_src); | |
1899 | value = ((OVS_FORCE ovs_be32 *)flow)[TP_PORTS_OFS32] & mask; | |
c0bfb650 | 1900 | mbits = trie_lookup_value(&subtable->ports_trie, &value, &plens, 32); |
69d6040e JR |
1901 | |
1902 | ((OVS_FORCE ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |= | |
86f35fb5 | 1903 | mask & be32_prefix_mask(mbits); |
69d6040e | 1904 | |
386cb9f7 JR |
1905 | /* Unwildcard all bits in the mask upto the ports, as they were used |
1906 | * to determine there is no match. */ | |
d70e8c28 | 1907 | fill_range_wc(subtable, wc, TP_PORTS_OFS64); |
386cb9f7 | 1908 | return NULL; |
69d6040e | 1909 | } |
e9319757 | 1910 | |
13751fd8 | 1911 | /* Must unwildcard all the fields, as they were looked at. */ |
476f36e8 JR |
1912 | flow_wildcards_fold_minimask(wc, &subtable->mask); |
1913 | return rule; | |
1914 | } | |
1915 | ||
627fb667 | 1916 | static struct cls_match * |
dfea28b3 | 1917 | find_equal(const struct cls_subtable *subtable, const struct miniflow *flow, |
03868246 | 1918 | uint32_t hash) |
064af421 | 1919 | { |
627fb667 | 1920 | struct cls_match *head; |
064af421 | 1921 | |
f2c21402 | 1922 | CMAP_FOR_EACH_WITH_HASH (head, cmap_node, hash, &subtable->rules) { |
3016f3e4 | 1923 | if (miniflow_equal(&head->flow, flow)) { |
b5d97350 | 1924 | return head; |
064af421 BP |
1925 | } |
1926 | } | |
1927 | return NULL; | |
1928 | } | |
13751fd8 JR |
1929 | \f |
1930 | /* A longest-prefix match tree. */ | |
13751fd8 JR |
1931 | |
1932 | /* Return at least 'plen' bits of the 'prefix', starting at bit offset 'ofs'. | |
1933 | * Prefixes are in the network byte order, and the offset 0 corresponds to | |
1934 | * the most significant bit of the first byte. The offset can be read as | |
1935 | * "how many bits to skip from the start of the prefix starting at 'pr'". */ | |
1936 | static uint32_t | |
1937 | raw_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen) | |
1938 | { | |
1939 | uint32_t prefix; | |
1940 | ||
1941 | pr += ofs / 32; /* Where to start. */ | |
1942 | ofs %= 32; /* How many bits to skip at 'pr'. */ | |
1943 | ||
1944 | prefix = ntohl(*pr) << ofs; /* Get the first 32 - ofs bits. */ | |
1945 | if (plen > 32 - ofs) { /* Need more than we have already? */ | |
1946 | prefix |= ntohl(*++pr) >> (32 - ofs); | |
1947 | } | |
1948 | /* Return with possible unwanted bits at the end. */ | |
1949 | return prefix; | |
1950 | } | |
1951 | ||
1952 | /* Return min(TRIE_PREFIX_BITS, plen) bits of the 'prefix', starting at bit | |
1953 | * offset 'ofs'. Prefixes are in the network byte order, and the offset 0 | |
1954 | * corresponds to the most significant bit of the first byte. The offset can | |
1955 | * be read as "how many bits to skip from the start of the prefix starting at | |
1956 | * 'pr'". */ | |
1957 | static uint32_t | |
1958 | trie_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen) | |
1959 | { | |
1960 | if (!plen) { | |
1961 | return 0; | |
1962 | } | |
1963 | if (plen > TRIE_PREFIX_BITS) { | |
1964 | plen = TRIE_PREFIX_BITS; /* Get at most TRIE_PREFIX_BITS. */ | |
1965 | } | |
1966 | /* Return with unwanted bits cleared. */ | |
1967 | return raw_get_prefix(pr, ofs, plen) & ~0u << (32 - plen); | |
1968 | } | |
1969 | ||
c30cfa6b | 1970 | /* Return the number of equal bits in 'n_bits' of 'prefix's MSBs and a 'value' |
13751fd8 JR |
1971 | * starting at "MSB 0"-based offset 'ofs'. */ |
1972 | static unsigned int | |
c30cfa6b | 1973 | prefix_equal_bits(uint32_t prefix, unsigned int n_bits, const ovs_be32 value[], |
13751fd8 JR |
1974 | unsigned int ofs) |
1975 | { | |
c30cfa6b | 1976 | uint64_t diff = prefix ^ raw_get_prefix(value, ofs, n_bits); |
13751fd8 | 1977 | /* Set the bit after the relevant bits to limit the result. */ |
c30cfa6b | 1978 | return raw_clz64(diff << 32 | UINT64_C(1) << (63 - n_bits)); |
13751fd8 JR |
1979 | } |
1980 | ||
1981 | /* Return the number of equal bits in 'node' prefix and a 'prefix' of length | |
1982 | * 'plen', starting at "MSB 0"-based offset 'ofs'. */ | |
1983 | static unsigned int | |
1984 | trie_prefix_equal_bits(const struct trie_node *node, const ovs_be32 prefix[], | |
1985 | unsigned int ofs, unsigned int plen) | |
1986 | { | |
c30cfa6b | 1987 | return prefix_equal_bits(node->prefix, MIN(node->n_bits, plen - ofs), |
13751fd8 JR |
1988 | prefix, ofs); |
1989 | } | |
1990 | ||
1991 | /* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' can | |
1992 | * be greater than 31. */ | |
1993 | static unsigned int | |
1994 | be_get_bit_at(const ovs_be32 value[], unsigned int ofs) | |
1995 | { | |
1996 | return (((const uint8_t *)value)[ofs / 8] >> (7 - ofs % 8)) & 1u; | |
1997 | } | |
1998 | ||
1999 | /* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' must | |
2000 | * be between 0 and 31, inclusive. */ | |
2001 | static unsigned int | |
2002 | get_bit_at(const uint32_t prefix, unsigned int ofs) | |
2003 | { | |
2004 | return (prefix >> (31 - ofs)) & 1u; | |
2005 | } | |
2006 | ||
2007 | /* Create new branch. */ | |
2008 | static struct trie_node * | |
2009 | trie_branch_create(const ovs_be32 *prefix, unsigned int ofs, unsigned int plen, | |
2010 | unsigned int n_rules) | |
2011 | { | |
2012 | struct trie_node *node = xmalloc(sizeof *node); | |
2013 | ||
2014 | node->prefix = trie_get_prefix(prefix, ofs, plen); | |
2015 | ||
2016 | if (plen <= TRIE_PREFIX_BITS) { | |
c30cfa6b | 2017 | node->n_bits = plen; |
f358a2cb JR |
2018 | ovsrcu_set_hidden(&node->edges[0], NULL); |
2019 | ovsrcu_set_hidden(&node->edges[1], NULL); | |
13751fd8 JR |
2020 | node->n_rules = n_rules; |
2021 | } else { /* Need intermediate nodes. */ | |
2022 | struct trie_node *subnode = trie_branch_create(prefix, | |
2023 | ofs + TRIE_PREFIX_BITS, | |
2024 | plen - TRIE_PREFIX_BITS, | |
2025 | n_rules); | |
2026 | int bit = get_bit_at(subnode->prefix, 0); | |
c30cfa6b | 2027 | node->n_bits = TRIE_PREFIX_BITS; |
f358a2cb JR |
2028 | ovsrcu_set_hidden(&node->edges[bit], subnode); |
2029 | ovsrcu_set_hidden(&node->edges[!bit], NULL); | |
13751fd8 JR |
2030 | node->n_rules = 0; |
2031 | } | |
2032 | return node; | |
2033 | } | |
2034 | ||
2035 | static void | |
f358a2cb | 2036 | trie_node_destroy(const struct trie_node *node) |
13751fd8 | 2037 | { |
f358a2cb JR |
2038 | ovsrcu_postpone(free, CONST_CAST(struct trie_node *, node)); |
2039 | } | |
2040 | ||
2041 | /* Copy a trie node for modification and postpone delete the old one. */ | |
2042 | static struct trie_node * | |
2043 | trie_node_rcu_realloc(const struct trie_node *node) | |
2044 | { | |
2045 | struct trie_node *new_node = xmalloc(sizeof *node); | |
2046 | ||
2047 | *new_node = *node; | |
2048 | trie_node_destroy(node); | |
2049 | ||
2050 | return new_node; | |
13751fd8 JR |
2051 | } |
2052 | ||
2053 | static void | |
f358a2cb | 2054 | trie_destroy(rcu_trie_ptr *trie) |
13751fd8 | 2055 | { |
f358a2cb JR |
2056 | struct trie_node *node = ovsrcu_get_protected(struct trie_node *, trie); |
2057 | ||
13751fd8 | 2058 | if (node) { |
f358a2cb JR |
2059 | ovsrcu_set_hidden(trie, NULL); |
2060 | trie_destroy(&node->edges[0]); | |
2061 | trie_destroy(&node->edges[1]); | |
2062 | trie_node_destroy(node); | |
13751fd8 JR |
2063 | } |
2064 | } | |
2065 | ||
2066 | static bool | |
2067 | trie_is_leaf(const struct trie_node *trie) | |
2068 | { | |
f358a2cb JR |
2069 | /* No children? */ |
2070 | return !ovsrcu_get(struct trie_node *, &trie->edges[0]) | |
2071 | && !ovsrcu_get(struct trie_node *, &trie->edges[1]); | |
13751fd8 JR |
2072 | } |
2073 | ||
2074 | static void | |
2075 | mask_set_prefix_bits(struct flow_wildcards *wc, uint8_t be32ofs, | |
c30cfa6b | 2076 | unsigned int n_bits) |
13751fd8 JR |
2077 | { |
2078 | ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs]; | |
2079 | unsigned int i; | |
2080 | ||
c30cfa6b | 2081 | for (i = 0; i < n_bits / 32; i++) { |
13751fd8 JR |
2082 | mask[i] = OVS_BE32_MAX; |
2083 | } | |
c30cfa6b JR |
2084 | if (n_bits % 32) { |
2085 | mask[i] |= htonl(~0u << (32 - n_bits % 32)); | |
13751fd8 JR |
2086 | } |
2087 | } | |
2088 | ||
2089 | static bool | |
2090 | mask_prefix_bits_set(const struct flow_wildcards *wc, uint8_t be32ofs, | |
c30cfa6b | 2091 | unsigned int n_bits) |
13751fd8 JR |
2092 | { |
2093 | ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs]; | |
2094 | unsigned int i; | |
2095 | ovs_be32 zeroes = 0; | |
2096 | ||
c30cfa6b | 2097 | for (i = 0; i < n_bits / 32; i++) { |
13751fd8 JR |
2098 | zeroes |= ~mask[i]; |
2099 | } | |
c30cfa6b JR |
2100 | if (n_bits % 32) { |
2101 | zeroes |= ~mask[i] & htonl(~0u << (32 - n_bits % 32)); | |
13751fd8 JR |
2102 | } |
2103 | ||
c30cfa6b | 2104 | return !zeroes; /* All 'n_bits' bits set. */ |
13751fd8 JR |
2105 | } |
2106 | ||
f358a2cb | 2107 | static rcu_trie_ptr * |
13751fd8 JR |
2108 | trie_next_edge(struct trie_node *node, const ovs_be32 value[], |
2109 | unsigned int ofs) | |
2110 | { | |
2111 | return node->edges + be_get_bit_at(value, ofs); | |
2112 | } | |
2113 | ||
2114 | static const struct trie_node * | |
2115 | trie_next_node(const struct trie_node *node, const ovs_be32 value[], | |
2116 | unsigned int ofs) | |
2117 | { | |
f358a2cb JR |
2118 | return ovsrcu_get(struct trie_node *, |
2119 | &node->edges[be_get_bit_at(value, ofs)]); | |
13751fd8 JR |
2120 | } |
2121 | ||
c0bfb650 JR |
2122 | /* Set the bit at ("MSB 0"-based) offset 'ofs'. 'ofs' can be greater than 31. |
2123 | */ | |
2124 | static void | |
2125 | be_set_bit_at(ovs_be32 value[], unsigned int ofs) | |
2126 | { | |
2127 | ((uint8_t *)value)[ofs / 8] |= 1u << (7 - ofs % 8); | |
2128 | } | |
2129 | ||
2130 | /* Returns the number of bits in the prefix mask necessary to determine a | |
2131 | * mismatch, in case there are longer prefixes in the tree below the one that | |
2132 | * matched. | |
2133 | * '*plens' will have a bit set for each prefix length that may have matching | |
2134 | * rules. The caller is responsible for clearing the '*plens' prior to | |
2135 | * calling this. | |
13751fd8 JR |
2136 | */ |
2137 | static unsigned int | |
f358a2cb | 2138 | trie_lookup_value(const rcu_trie_ptr *trie, const ovs_be32 value[], |
c0bfb650 | 2139 | ovs_be32 plens[], unsigned int n_bits) |
13751fd8 | 2140 | { |
13751fd8 | 2141 | const struct trie_node *prev = NULL; |
c0bfb650 JR |
2142 | const struct trie_node *node = ovsrcu_get(struct trie_node *, trie); |
2143 | unsigned int match_len = 0; /* Number of matching bits. */ | |
13751fd8 | 2144 | |
27ce650f | 2145 | for (; node; prev = node, node = trie_next_node(node, value, match_len)) { |
13751fd8 JR |
2146 | unsigned int eqbits; |
2147 | /* Check if this edge can be followed. */ | |
27ce650f JR |
2148 | eqbits = prefix_equal_bits(node->prefix, node->n_bits, value, |
2149 | match_len); | |
2150 | match_len += eqbits; | |
c30cfa6b | 2151 | if (eqbits < node->n_bits) { /* Mismatch, nothing more to be found. */ |
27ce650f | 2152 | /* Bit at offset 'match_len' differed. */ |
c0bfb650 | 2153 | return match_len + 1; /* Includes the first mismatching bit. */ |
13751fd8 JR |
2154 | } |
2155 | /* Full match, check if rules exist at this prefix length. */ | |
2156 | if (node->n_rules > 0) { | |
c0bfb650 | 2157 | be_set_bit_at(plens, match_len - 1); |
13751fd8 | 2158 | } |
27ce650f | 2159 | if (match_len >= n_bits) { |
c0bfb650 | 2160 | return n_bits; /* Full prefix. */ |
f0e5aa11 | 2161 | } |
13751fd8 | 2162 | } |
c0bfb650 JR |
2163 | /* node == NULL. Full match so far, but we tried to follow an |
2164 | * non-existing branch. Need to exclude the other branch if it exists | |
2165 | * (it does not if we were called on an empty trie or 'prev' is a leaf | |
2166 | * node). */ | |
2167 | return !prev || trie_is_leaf(prev) ? match_len : match_len + 1; | |
13751fd8 JR |
2168 | } |
2169 | ||
2170 | static unsigned int | |
2171 | trie_lookup(const struct cls_trie *trie, const struct flow *flow, | |
c0bfb650 | 2172 | union mf_value *plens) |
13751fd8 JR |
2173 | { |
2174 | const struct mf_field *mf = trie->field; | |
2175 | ||
2176 | /* Check that current flow matches the prerequisites for the trie | |
2177 | * field. Some match fields are used for multiple purposes, so we | |
2178 | * must check that the trie is relevant for this flow. */ | |
2179 | if (mf_are_prereqs_ok(mf, flow)) { | |
f358a2cb | 2180 | return trie_lookup_value(&trie->root, |
13751fd8 | 2181 | &((ovs_be32 *)flow)[mf->flow_be32ofs], |
c0bfb650 | 2182 | &plens->be32, mf->n_bits); |
13751fd8 | 2183 | } |
c0bfb650 JR |
2184 | memset(plens, 0xff, sizeof *plens); /* All prefixes, no skipping. */ |
2185 | return 0; /* Value not used in this case. */ | |
13751fd8 JR |
2186 | } |
2187 | ||
2188 | /* Returns the length of a prefix match mask for the field 'mf' in 'minimask'. | |
2189 | * Returns the u32 offset to the miniflow data in '*miniflow_index', if | |
2190 | * 'miniflow_index' is not NULL. */ | |
2191 | static unsigned int | |
2192 | minimask_get_prefix_len(const struct minimask *minimask, | |
2193 | const struct mf_field *mf) | |
2194 | { | |
c30cfa6b | 2195 | unsigned int n_bits = 0, mask_tz = 0; /* Non-zero when end of mask seen. */ |
d70e8c28 JR |
2196 | uint8_t be32_ofs = mf->flow_be32ofs; |
2197 | uint8_t be32_end = be32_ofs + mf->n_bytes / 4; | |
13751fd8 | 2198 | |
d70e8c28 JR |
2199 | for (; be32_ofs < be32_end; ++be32_ofs) { |
2200 | uint32_t mask = ntohl(minimask_get_be32(minimask, be32_ofs)); | |
13751fd8 JR |
2201 | |
2202 | /* Validate mask, count the mask length. */ | |
2203 | if (mask_tz) { | |
2204 | if (mask) { | |
2205 | return 0; /* No bits allowed after mask ended. */ | |
2206 | } | |
2207 | } else { | |
2208 | if (~mask & (~mask + 1)) { | |
2209 | return 0; /* Mask not contiguous. */ | |
2210 | } | |
2211 | mask_tz = ctz32(mask); | |
c30cfa6b | 2212 | n_bits += 32 - mask_tz; |
13751fd8 JR |
2213 | } |
2214 | } | |
2215 | ||
c30cfa6b | 2216 | return n_bits; |
13751fd8 JR |
2217 | } |
2218 | ||
2219 | /* | |
2220 | * This is called only when mask prefix is known to be CIDR and non-zero. | |
2221 | * Relies on the fact that the flow and mask have the same map, and since | |
2222 | * the mask is CIDR, the storage for the flow field exists even if it | |
2223 | * happened to be zeros. | |
2224 | */ | |
2225 | static const ovs_be32 * | |
2226 | minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf) | |
2227 | { | |
d70e8c28 JR |
2228 | return (OVS_FORCE const ovs_be32 *) |
2229 | (miniflow_get_values(&match->flow) | |
2230 | + count_1bits(match->flow.map & | |
2231 | ((UINT64_C(1) << mf->flow_be32ofs / 2) - 1))) | |
2232 | + (mf->flow_be32ofs & 1); | |
13751fd8 JR |
2233 | } |
2234 | ||
2235 | /* Insert rule in to the prefix tree. | |
2236 | * 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask | |
2237 | * in 'rule'. */ | |
2238 | static void | |
2239 | trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen) | |
2240 | { | |
69d6040e JR |
2241 | trie_insert_prefix(&trie->root, |
2242 | minimatch_get_prefix(&rule->match, trie->field), mlen); | |
2243 | } | |
2244 | ||
2245 | static void | |
f358a2cb | 2246 | trie_insert_prefix(rcu_trie_ptr *edge, const ovs_be32 *prefix, int mlen) |
69d6040e | 2247 | { |
13751fd8 | 2248 | struct trie_node *node; |
13751fd8 JR |
2249 | int ofs = 0; |
2250 | ||
2251 | /* Walk the tree. */ | |
f358a2cb | 2252 | for (; (node = ovsrcu_get_protected(struct trie_node *, edge)); |
13751fd8 JR |
2253 | edge = trie_next_edge(node, prefix, ofs)) { |
2254 | unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen); | |
2255 | ofs += eqbits; | |
c30cfa6b | 2256 | if (eqbits < node->n_bits) { |
13751fd8 JR |
2257 | /* Mismatch, new node needs to be inserted above. */ |
2258 | int old_branch = get_bit_at(node->prefix, eqbits); | |
f358a2cb | 2259 | struct trie_node *new_parent; |
13751fd8 | 2260 | |
f358a2cb JR |
2261 | new_parent = trie_branch_create(prefix, ofs - eqbits, eqbits, |
2262 | ofs == mlen ? 1 : 0); | |
2263 | /* Copy the node to modify it. */ | |
2264 | node = trie_node_rcu_realloc(node); | |
2265 | /* Adjust the new node for its new position in the tree. */ | |
13751fd8 | 2266 | node->prefix <<= eqbits; |
c30cfa6b | 2267 | node->n_bits -= eqbits; |
f358a2cb | 2268 | ovsrcu_set_hidden(&new_parent->edges[old_branch], node); |
13751fd8 JR |
2269 | |
2270 | /* Check if need a new branch for the new rule. */ | |
2271 | if (ofs < mlen) { | |
f358a2cb JR |
2272 | ovsrcu_set_hidden(&new_parent->edges[!old_branch], |
2273 | trie_branch_create(prefix, ofs, mlen - ofs, | |
2274 | 1)); | |
13751fd8 | 2275 | } |
f358a2cb | 2276 | ovsrcu_set(edge, new_parent); /* Publish changes. */ |
13751fd8 JR |
2277 | return; |
2278 | } | |
2279 | /* Full match so far. */ | |
2280 | ||
2281 | if (ofs == mlen) { | |
2282 | /* Full match at the current node, rule needs to be added here. */ | |
2283 | node->n_rules++; | |
2284 | return; | |
2285 | } | |
2286 | } | |
2287 | /* Must insert a new tree branch for the new rule. */ | |
f358a2cb | 2288 | ovsrcu_set(edge, trie_branch_create(prefix, ofs, mlen - ofs, 1)); |
13751fd8 JR |
2289 | } |
2290 | ||
2291 | /* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask | |
2292 | * in 'rule'. */ | |
2293 | static void | |
2294 | trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen) | |
2295 | { | |
69d6040e JR |
2296 | trie_remove_prefix(&trie->root, |
2297 | minimatch_get_prefix(&rule->match, trie->field), mlen); | |
2298 | } | |
2299 | ||
2300 | /* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask | |
2301 | * in 'rule'. */ | |
2302 | static void | |
f358a2cb | 2303 | trie_remove_prefix(rcu_trie_ptr *root, const ovs_be32 *prefix, int mlen) |
69d6040e | 2304 | { |
13751fd8 | 2305 | struct trie_node *node; |
f358a2cb | 2306 | rcu_trie_ptr *edges[sizeof(union mf_value) * 8]; |
13751fd8 JR |
2307 | int depth = 0, ofs = 0; |
2308 | ||
2309 | /* Walk the tree. */ | |
69d6040e | 2310 | for (edges[0] = root; |
f358a2cb | 2311 | (node = ovsrcu_get_protected(struct trie_node *, edges[depth])); |
13751fd8 JR |
2312 | edges[++depth] = trie_next_edge(node, prefix, ofs)) { |
2313 | unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen); | |
69d6040e | 2314 | |
c30cfa6b | 2315 | if (eqbits < node->n_bits) { |
13751fd8 JR |
2316 | /* Mismatch, nothing to be removed. This should never happen, as |
2317 | * only rules in the classifier are ever removed. */ | |
2318 | break; /* Log a warning. */ | |
2319 | } | |
2320 | /* Full match so far. */ | |
2321 | ofs += eqbits; | |
2322 | ||
2323 | if (ofs == mlen) { | |
2324 | /* Full prefix match at the current node, remove rule here. */ | |
2325 | if (!node->n_rules) { | |
2326 | break; /* Log a warning. */ | |
2327 | } | |
2328 | node->n_rules--; | |
2329 | ||
2330 | /* Check if can prune the tree. */ | |
f358a2cb JR |
2331 | while (!node->n_rules) { |
2332 | struct trie_node *next, | |
2333 | *edge0 = ovsrcu_get_protected(struct trie_node *, | |
2334 | &node->edges[0]), | |
2335 | *edge1 = ovsrcu_get_protected(struct trie_node *, | |
2336 | &node->edges[1]); | |
2337 | ||
2338 | if (edge0 && edge1) { | |
2339 | break; /* A branching point, cannot prune. */ | |
2340 | } | |
2341 | ||
2342 | /* Else have at most one child node, remove this node. */ | |
2343 | next = edge0 ? edge0 : edge1; | |
13751fd8 JR |
2344 | |
2345 | if (next) { | |
c30cfa6b | 2346 | if (node->n_bits + next->n_bits > TRIE_PREFIX_BITS) { |
13751fd8 JR |
2347 | break; /* Cannot combine. */ |
2348 | } | |
f358a2cb JR |
2349 | next = trie_node_rcu_realloc(next); /* Modify. */ |
2350 | ||
13751fd8 | 2351 | /* Combine node with next. */ |
c30cfa6b JR |
2352 | next->prefix = node->prefix | next->prefix >> node->n_bits; |
2353 | next->n_bits += node->n_bits; | |
13751fd8 | 2354 | } |
13751fd8 | 2355 | /* Update the parent's edge. */ |
f358a2cb JR |
2356 | ovsrcu_set(edges[depth], next); /* Publish changes. */ |
2357 | trie_node_destroy(node); | |
2358 | ||
13751fd8 JR |
2359 | if (next || !depth) { |
2360 | /* Branch not pruned or at root, nothing more to do. */ | |
2361 | break; | |
2362 | } | |
f358a2cb JR |
2363 | node = ovsrcu_get_protected(struct trie_node *, |
2364 | edges[--depth]); | |
13751fd8 JR |
2365 | } |
2366 | return; | |
2367 | } | |
2368 | } | |
2369 | /* Cannot go deeper. This should never happen, since only rules | |
2370 | * that actually exist in the classifier are ever removed. */ | |
2371 | VLOG_WARN("Trying to remove non-existing rule from a prefix trie."); | |
2372 | } | |
8f8023b3 JR |
2373 | \f |
2374 | ||
2375 | #define CLS_MATCH_POISON (struct cls_match *)(UINTPTR_MAX / 0xf * 0xb) | |
2376 | ||
2377 | void | |
2378 | cls_match_free_cb(struct cls_match *rule) | |
2379 | { | |
2380 | ovsrcu_set_hidden(&rule->next, CLS_MATCH_POISON); | |
2381 | free(rule); | |
2382 | } |