]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
78c8df12 | 2 | * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. |
064af421 | 3 | * |
a14bc59f BP |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
064af421 | 7 | * |
a14bc59f BP |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | ||
17 | #include <config.h> | |
18 | #include "classifier.h" | |
38c449e0 | 19 | #include "classifier-private.h" |
064af421 BP |
20 | #include <errno.h> |
21 | #include <netinet/in.h> | |
844dff32 | 22 | #include "byte-order.h" |
68d1c8c3 | 23 | #include "dynamic-string.h" |
07b37e8f | 24 | #include "odp-util.h" |
d8ae4d67 | 25 | #include "ofp-util.h" |
13751fd8 | 26 | #include "packets.h" |
52054c15 | 27 | #include "util.h" |
13751fd8 JR |
28 | #include "vlog.h" |
29 | ||
30 | VLOG_DEFINE_THIS_MODULE(classifier); | |
064af421 | 31 | |
69d6040e JR |
32 | struct trie_ctx; |
33 | ||
34 | /* Ports trie depends on both ports sharing the same ovs_be32. */ | |
35 | #define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4) | |
36 | BUILD_ASSERT_DECL(TP_PORTS_OFS32 == offsetof(struct flow, tp_dst) / 4); | |
cabd4c43 | 37 | |
627fb667 JR |
38 | static struct cls_match * |
39 | cls_match_alloc(struct cls_rule *rule) | |
40 | { | |
3016f3e4 JR |
41 | int count = count_1bits(rule->match.flow.map); |
42 | ||
43 | struct cls_match *cls_match | |
44 | = xmalloc(sizeof *cls_match - sizeof cls_match->flow.inline_values | |
45 | + MINIFLOW_VALUES_SIZE(count)); | |
627fb667 | 46 | |
f47eef15 | 47 | rculist_init(&cls_match->list); |
f80028fe JR |
48 | *CONST_CAST(const struct cls_rule **, &cls_match->cls_rule) = rule; |
49 | *CONST_CAST(int *, &cls_match->priority) = rule->priority; | |
50 | miniflow_clone_inline(CONST_CAST(struct miniflow *, &cls_match->flow), | |
51 | &rule->match.flow, count); | |
627fb667 JR |
52 | |
53 | return cls_match; | |
54 | } | |
cabd4c43 | 55 | |
e48eccd1 | 56 | static struct cls_subtable *find_subtable(const struct classifier *cls, |
dfea28b3 | 57 | const struct minimask *); |
e48eccd1 | 58 | static struct cls_subtable *insert_subtable(struct classifier *cls, |
e65413ab JR |
59 | const struct minimask *) |
60 | OVS_REQUIRES(cls->mutex); | |
e48eccd1 | 61 | static void destroy_subtable(struct classifier *cls, struct cls_subtable *) |
e65413ab | 62 | OVS_REQUIRES(cls->mutex); |
b5d97350 | 63 | |
dfea28b3 JR |
64 | static const struct cls_match *find_match_wc(const struct cls_subtable *, |
65 | const struct flow *, | |
66 | struct trie_ctx *, | |
67 | unsigned int n_tries, | |
68 | struct flow_wildcards *); | |
69 | static struct cls_match *find_equal(const struct cls_subtable *, | |
627fb667 | 70 | const struct miniflow *, uint32_t hash); |
b5d97350 | 71 | |
dfea28b3 JR |
72 | static inline const struct cls_match * |
73 | next_rule_in_list__(const struct cls_match *rule) | |
74 | { | |
75 | const struct cls_match *next = NULL; | |
76 | next = OBJECT_CONTAINING(rculist_next(&rule->list), next, list); | |
77 | return next; | |
78 | } | |
79 | ||
80 | static inline const struct cls_match * | |
81 | next_rule_in_list(const struct cls_match *rule) | |
82 | { | |
83 | const struct cls_match *next = next_rule_in_list__(rule); | |
84 | return next->priority < rule->priority ? next : NULL; | |
85 | } | |
86 | ||
c501b427 | 87 | static inline struct cls_match * |
dfea28b3 | 88 | next_rule_in_list_protected__(struct cls_match *rule) |
c501b427 JR |
89 | { |
90 | struct cls_match *next = NULL; | |
dfea28b3 | 91 | next = OBJECT_CONTAINING(rculist_next_protected(&rule->list), next, list); |
c501b427 JR |
92 | return next; |
93 | } | |
94 | ||
95 | static inline struct cls_match * | |
dfea28b3 | 96 | next_rule_in_list_protected(struct cls_match *rule) |
c501b427 | 97 | { |
dfea28b3 | 98 | struct cls_match *next = next_rule_in_list_protected__(rule); |
c501b427 JR |
99 | return next->priority < rule->priority ? next : NULL; |
100 | } | |
101 | ||
e65413ab JR |
102 | /* Iterates RULE over HEAD and all of the cls_rules on HEAD->list. |
103 | * Classifier's mutex must be held while iterating, as the list is | |
104 | * protoceted by it. */ | |
b5d97350 BP |
105 | #define FOR_EACH_RULE_IN_LIST(RULE, HEAD) \ |
106 | for ((RULE) = (HEAD); (RULE) != NULL; (RULE) = next_rule_in_list(RULE)) | |
dfea28b3 JR |
107 | #define FOR_EACH_RULE_IN_LIST_PROTECTED(RULE, HEAD) \ |
108 | for ((RULE) = (HEAD); (RULE) != NULL; \ | |
109 | (RULE) = next_rule_in_list_protected(RULE)) | |
13751fd8 JR |
110 | |
111 | static unsigned int minimask_get_prefix_len(const struct minimask *, | |
112 | const struct mf_field *); | |
e48eccd1 | 113 | static void trie_init(struct classifier *cls, int trie_idx, |
e65413ab JR |
114 | const struct mf_field *) |
115 | OVS_REQUIRES(cls->mutex); | |
13751fd8 | 116 | static unsigned int trie_lookup(const struct cls_trie *, const struct flow *, |
c0bfb650 | 117 | union mf_value *plens); |
f358a2cb | 118 | static unsigned int trie_lookup_value(const rcu_trie_ptr *, |
c0bfb650 JR |
119 | const ovs_be32 value[], ovs_be32 plens[], |
120 | unsigned int value_bits); | |
f358a2cb | 121 | static void trie_destroy(rcu_trie_ptr *); |
13751fd8 | 122 | static void trie_insert(struct cls_trie *, const struct cls_rule *, int mlen); |
f358a2cb | 123 | static void trie_insert_prefix(rcu_trie_ptr *, const ovs_be32 *prefix, |
69d6040e | 124 | int mlen); |
13751fd8 | 125 | static void trie_remove(struct cls_trie *, const struct cls_rule *, int mlen); |
f358a2cb | 126 | static void trie_remove_prefix(rcu_trie_ptr *, const ovs_be32 *prefix, |
69d6040e | 127 | int mlen); |
13751fd8 | 128 | static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t be32ofs, |
c30cfa6b | 129 | unsigned int n_bits); |
13751fd8 | 130 | static bool mask_prefix_bits_set(const struct flow_wildcards *, |
c30cfa6b | 131 | uint8_t be32ofs, unsigned int n_bits); |
81a76618 BP |
132 | \f |
133 | /* cls_rule. */ | |
b5d97350 | 134 | |
81a76618 | 135 | /* Initializes 'rule' to match packets specified by 'match' at the given |
5cb7a798 BP |
136 | * 'priority'. 'match' must satisfy the invariant described in the comment at |
137 | * the definition of struct match. | |
66642cb4 | 138 | * |
48d28ac1 BP |
139 | * The caller must eventually destroy 'rule' with cls_rule_destroy(). |
140 | * | |
eb391b76 BP |
141 | * Clients should not use priority INT_MIN. (OpenFlow uses priorities between |
142 | * 0 and UINT16_MAX, inclusive.) */ | |
47284b1f | 143 | void |
eb391b76 | 144 | cls_rule_init(struct cls_rule *rule, const struct match *match, int priority) |
47284b1f | 145 | { |
5cb7a798 BP |
146 | minimatch_init(&rule->match, match); |
147 | rule->priority = priority; | |
627fb667 | 148 | rule->cls_match = NULL; |
5cb7a798 BP |
149 | } |
150 | ||
151 | /* Same as cls_rule_init() for initialization from a "struct minimatch". */ | |
152 | void | |
153 | cls_rule_init_from_minimatch(struct cls_rule *rule, | |
eb391b76 | 154 | const struct minimatch *match, int priority) |
5cb7a798 BP |
155 | { |
156 | minimatch_clone(&rule->match, match); | |
81a76618 | 157 | rule->priority = priority; |
627fb667 | 158 | rule->cls_match = NULL; |
685a51a5 JP |
159 | } |
160 | ||
48d28ac1 BP |
161 | /* Initializes 'dst' as a copy of 'src'. |
162 | * | |
b2c1f00b | 163 | * The caller must eventually destroy 'dst' with cls_rule_destroy(). */ |
48d28ac1 BP |
164 | void |
165 | cls_rule_clone(struct cls_rule *dst, const struct cls_rule *src) | |
166 | { | |
5cb7a798 BP |
167 | minimatch_clone(&dst->match, &src->match); |
168 | dst->priority = src->priority; | |
627fb667 | 169 | dst->cls_match = NULL; |
48d28ac1 BP |
170 | } |
171 | ||
b2c1f00b BP |
172 | /* Initializes 'dst' with the data in 'src', destroying 'src'. |
173 | * | |
174 | * The caller must eventually destroy 'dst' with cls_rule_destroy(). */ | |
175 | void | |
176 | cls_rule_move(struct cls_rule *dst, struct cls_rule *src) | |
177 | { | |
178 | minimatch_move(&dst->match, &src->match); | |
179 | dst->priority = src->priority; | |
627fb667 | 180 | dst->cls_match = NULL; |
b2c1f00b BP |
181 | } |
182 | ||
48d28ac1 BP |
183 | /* Frees memory referenced by 'rule'. Doesn't free 'rule' itself (it's |
184 | * normally embedded into a larger structure). | |
185 | * | |
186 | * ('rule' must not currently be in a classifier.) */ | |
187 | void | |
5cb7a798 | 188 | cls_rule_destroy(struct cls_rule *rule) |
48d28ac1 | 189 | { |
627fb667 | 190 | ovs_assert(!rule->cls_match); |
5cb7a798 | 191 | minimatch_destroy(&rule->match); |
48d28ac1 BP |
192 | } |
193 | ||
81a76618 BP |
194 | /* Returns true if 'a' and 'b' match the same packets at the same priority, |
195 | * false if they differ in some way. */ | |
193eb874 BP |
196 | bool |
197 | cls_rule_equal(const struct cls_rule *a, const struct cls_rule *b) | |
198 | { | |
5cb7a798 | 199 | return a->priority == b->priority && minimatch_equal(&a->match, &b->match); |
193eb874 BP |
200 | } |
201 | ||
81a76618 | 202 | /* Returns a hash value for 'rule', folding in 'basis'. */ |
57452fdc BP |
203 | uint32_t |
204 | cls_rule_hash(const struct cls_rule *rule, uint32_t basis) | |
205 | { | |
5cb7a798 | 206 | return minimatch_hash(&rule->match, hash_int(rule->priority, basis)); |
73f33563 BP |
207 | } |
208 | ||
81a76618 | 209 | /* Appends a string describing 'rule' to 's'. */ |
07b37e8f BP |
210 | void |
211 | cls_rule_format(const struct cls_rule *rule, struct ds *s) | |
212 | { | |
5cb7a798 | 213 | minimatch_format(&rule->match, s, rule->priority); |
064af421 | 214 | } |
3ca1de08 BP |
215 | |
216 | /* Returns true if 'rule' matches every packet, false otherwise. */ | |
217 | bool | |
218 | cls_rule_is_catchall(const struct cls_rule *rule) | |
219 | { | |
5cb7a798 | 220 | return minimask_is_catchall(&rule->match.mask); |
3ca1de08 | 221 | } |
064af421 BP |
222 | \f |
223 | /* Initializes 'cls' as a classifier that initially contains no classification | |
224 | * rules. */ | |
225 | void | |
e48eccd1 JR |
226 | classifier_init(struct classifier *cls, const uint8_t *flow_segments) |
227 | OVS_EXCLUDED(cls->mutex) | |
064af421 | 228 | { |
e65413ab | 229 | ovs_mutex_init(&cls->mutex); |
e65413ab | 230 | ovs_mutex_lock(&cls->mutex); |
064af421 | 231 | cls->n_rules = 0; |
f2c21402 | 232 | cmap_init(&cls->subtables_map); |
fe7cfa5c | 233 | pvector_init(&cls->subtables); |
f2c21402 | 234 | cmap_init(&cls->partitions); |
476f36e8 JR |
235 | cls->n_flow_segments = 0; |
236 | if (flow_segments) { | |
237 | while (cls->n_flow_segments < CLS_MAX_INDICES | |
238 | && *flow_segments < FLOW_U32S) { | |
239 | cls->flow_segments[cls->n_flow_segments++] = *flow_segments++; | |
240 | } | |
241 | } | |
13751fd8 | 242 | cls->n_tries = 0; |
e65413ab JR |
243 | for (int i = 0; i < CLS_MAX_TRIES; i++) { |
244 | trie_init(cls, i, NULL); | |
245 | } | |
246 | ovs_mutex_unlock(&cls->mutex); | |
064af421 BP |
247 | } |
248 | ||
249 | /* Destroys 'cls'. Rules within 'cls', if any, are not freed; this is the | |
afae68b1 JR |
250 | * caller's responsibility. |
251 | * May only be called after all the readers have been terminated. */ | |
064af421 | 252 | void |
e48eccd1 JR |
253 | classifier_destroy(struct classifier *cls) |
254 | OVS_EXCLUDED(cls->mutex) | |
064af421 | 255 | { |
e48eccd1 | 256 | if (cls) { |
78c8df12 BP |
257 | struct cls_partition *partition; |
258 | struct cls_subtable *subtable; | |
13751fd8 JR |
259 | int i; |
260 | ||
e65413ab | 261 | ovs_mutex_lock(&cls->mutex); |
13751fd8 | 262 | for (i = 0; i < cls->n_tries; i++) { |
f358a2cb | 263 | trie_destroy(&cls->tries[i].root); |
13751fd8 | 264 | } |
064af421 | 265 | |
6bc3bb82 | 266 | CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) { |
03868246 | 267 | destroy_subtable(cls, subtable); |
064af421 | 268 | } |
f2c21402 | 269 | cmap_destroy(&cls->subtables_map); |
c906cedf | 270 | |
6bc3bb82 | 271 | CMAP_FOR_EACH (partition, cmap_node, &cls->partitions) { |
f2c21402 | 272 | ovsrcu_postpone(free, partition); |
c906cedf | 273 | } |
f2c21402 | 274 | cmap_destroy(&cls->partitions); |
cabd4c43 | 275 | |
fe7cfa5c | 276 | pvector_destroy(&cls->subtables); |
e65413ab JR |
277 | ovs_mutex_unlock(&cls->mutex); |
278 | ovs_mutex_destroy(&cls->mutex); | |
064af421 BP |
279 | } |
280 | } | |
281 | ||
13751fd8 | 282 | /* Set the fields for which prefix lookup should be performed. */ |
f358a2cb | 283 | bool |
e48eccd1 | 284 | classifier_set_prefix_fields(struct classifier *cls, |
13751fd8 JR |
285 | const enum mf_field_id *trie_fields, |
286 | unsigned int n_fields) | |
e48eccd1 | 287 | OVS_EXCLUDED(cls->mutex) |
13751fd8 | 288 | { |
f358a2cb | 289 | const struct mf_field * new_fields[CLS_MAX_TRIES]; |
abadfcb0 | 290 | struct mf_bitmap fields = MF_BITMAP_INITIALIZER; |
f358a2cb JR |
291 | int i, n_tries = 0; |
292 | bool changed = false; | |
13751fd8 | 293 | |
e65413ab | 294 | ovs_mutex_lock(&cls->mutex); |
f358a2cb | 295 | for (i = 0; i < n_fields && n_tries < CLS_MAX_TRIES; i++) { |
13751fd8 JR |
296 | const struct mf_field *field = mf_from_id(trie_fields[i]); |
297 | if (field->flow_be32ofs < 0 || field->n_bits % 32) { | |
298 | /* Incompatible field. This is the only place where we | |
299 | * enforce these requirements, but the rest of the trie code | |
300 | * depends on the flow_be32ofs to be non-negative and the | |
301 | * field length to be a multiple of 32 bits. */ | |
302 | continue; | |
303 | } | |
304 | ||
abadfcb0 | 305 | if (bitmap_is_set(fields.bm, trie_fields[i])) { |
13751fd8 JR |
306 | /* Duplicate field, there is no need to build more than |
307 | * one index for any one field. */ | |
308 | continue; | |
309 | } | |
abadfcb0 | 310 | bitmap_set1(fields.bm, trie_fields[i]); |
13751fd8 | 311 | |
f358a2cb JR |
312 | new_fields[n_tries] = NULL; |
313 | if (n_tries >= cls->n_tries || field != cls->tries[n_tries].field) { | |
314 | new_fields[n_tries] = field; | |
315 | changed = true; | |
316 | } | |
317 | n_tries++; | |
318 | } | |
319 | ||
320 | if (changed || n_tries < cls->n_tries) { | |
321 | struct cls_subtable *subtable; | |
322 | ||
323 | /* Trie configuration needs to change. Disable trie lookups | |
324 | * for the tries that are changing and wait all the current readers | |
325 | * with the old configuration to be done. */ | |
326 | changed = false; | |
327 | CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) { | |
328 | for (i = 0; i < cls->n_tries; i++) { | |
329 | if ((i < n_tries && new_fields[i]) || i >= n_tries) { | |
330 | if (subtable->trie_plen[i]) { | |
331 | subtable->trie_plen[i] = 0; | |
332 | changed = true; | |
333 | } | |
334 | } | |
335 | } | |
336 | } | |
337 | /* Synchronize if any readers were using tries. The readers may | |
338 | * temporarily function without the trie lookup based optimizations. */ | |
339 | if (changed) { | |
340 | /* ovsrcu_synchronize() functions as a memory barrier, so it does | |
341 | * not matter that subtable->trie_plen is not atomic. */ | |
342 | ovsrcu_synchronize(); | |
13751fd8 | 343 | } |
13751fd8 | 344 | |
f358a2cb JR |
345 | /* Now set up the tries. */ |
346 | for (i = 0; i < n_tries; i++) { | |
347 | if (new_fields[i]) { | |
348 | trie_init(cls, i, new_fields[i]); | |
349 | } | |
350 | } | |
351 | /* Destroy the rest, if any. */ | |
352 | for (; i < cls->n_tries; i++) { | |
353 | trie_init(cls, i, NULL); | |
354 | } | |
355 | ||
356 | cls->n_tries = n_tries; | |
357 | ovs_mutex_unlock(&cls->mutex); | |
358 | return true; | |
13751fd8 | 359 | } |
f358a2cb | 360 | |
e65413ab | 361 | ovs_mutex_unlock(&cls->mutex); |
f358a2cb | 362 | return false; /* No change. */ |
13751fd8 JR |
363 | } |
364 | ||
365 | static void | |
e48eccd1 | 366 | trie_init(struct classifier *cls, int trie_idx, const struct mf_field *field) |
e65413ab | 367 | OVS_REQUIRES(cls->mutex) |
13751fd8 JR |
368 | { |
369 | struct cls_trie *trie = &cls->tries[trie_idx]; | |
370 | struct cls_subtable *subtable; | |
371 | ||
372 | if (trie_idx < cls->n_tries) { | |
f358a2cb JR |
373 | trie_destroy(&trie->root); |
374 | } else { | |
375 | ovsrcu_set_hidden(&trie->root, NULL); | |
13751fd8 | 376 | } |
13751fd8 JR |
377 | trie->field = field; |
378 | ||
f358a2cb | 379 | /* Add existing rules to the new trie. */ |
f2c21402 | 380 | CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) { |
13751fd8 JR |
381 | unsigned int plen; |
382 | ||
383 | plen = field ? minimask_get_prefix_len(&subtable->mask, field) : 0; | |
13751fd8 | 384 | if (plen) { |
627fb667 | 385 | struct cls_match *head; |
13751fd8 | 386 | |
f2c21402 | 387 | CMAP_FOR_EACH (head, cmap_node, &subtable->rules) { |
f47eef15 | 388 | trie_insert(trie, head->cls_rule, plen); |
13751fd8 JR |
389 | } |
390 | } | |
f358a2cb JR |
391 | /* Initialize subtable's prefix length on this field. This will |
392 | * allow readers to use the trie. */ | |
393 | atomic_thread_fence(memory_order_release); | |
394 | subtable->trie_plen[trie_idx] = plen; | |
13751fd8 JR |
395 | } |
396 | } | |
397 | ||
5f0476ce JR |
398 | /* Returns true if 'cls' contains no classification rules, false otherwise. |
399 | * Checking the cmap requires no locking. */ | |
064af421 BP |
400 | bool |
401 | classifier_is_empty(const struct classifier *cls) | |
402 | { | |
e48eccd1 | 403 | return cmap_is_empty(&cls->subtables_map); |
064af421 BP |
404 | } |
405 | ||
dbda2960 | 406 | /* Returns the number of rules in 'cls'. */ |
064af421 BP |
407 | int |
408 | classifier_count(const struct classifier *cls) | |
afae68b1 | 409 | OVS_NO_THREAD_SAFETY_ANALYSIS |
064af421 | 410 | { |
afae68b1 JR |
411 | /* n_rules is an int, so in the presence of concurrent writers this will |
412 | * return either the old or a new value. */ | |
e48eccd1 | 413 | return cls->n_rules; |
064af421 BP |
414 | } |
415 | ||
c906cedf BP |
416 | static uint32_t |
417 | hash_metadata(ovs_be64 metadata_) | |
418 | { | |
419 | uint64_t metadata = (OVS_FORCE uint64_t) metadata_; | |
965607c8 | 420 | return hash_uint64(metadata); |
c906cedf BP |
421 | } |
422 | ||
423 | static struct cls_partition * | |
e48eccd1 | 424 | find_partition(const struct classifier *cls, ovs_be64 metadata, uint32_t hash) |
c906cedf BP |
425 | { |
426 | struct cls_partition *partition; | |
427 | ||
f2c21402 | 428 | CMAP_FOR_EACH_WITH_HASH (partition, cmap_node, hash, &cls->partitions) { |
c906cedf BP |
429 | if (partition->metadata == metadata) { |
430 | return partition; | |
431 | } | |
432 | } | |
433 | ||
434 | return NULL; | |
435 | } | |
436 | ||
437 | static struct cls_partition * | |
e48eccd1 | 438 | create_partition(struct classifier *cls, struct cls_subtable *subtable, |
c906cedf | 439 | ovs_be64 metadata) |
e65413ab | 440 | OVS_REQUIRES(cls->mutex) |
c906cedf BP |
441 | { |
442 | uint32_t hash = hash_metadata(metadata); | |
443 | struct cls_partition *partition = find_partition(cls, metadata, hash); | |
444 | if (!partition) { | |
445 | partition = xmalloc(sizeof *partition); | |
446 | partition->metadata = metadata; | |
447 | partition->tags = 0; | |
183126a1 | 448 | tag_tracker_init(&partition->tracker); |
f2c21402 | 449 | cmap_insert(&cls->partitions, &partition->cmap_node, hash); |
c906cedf | 450 | } |
03868246 | 451 | tag_tracker_add(&partition->tracker, &partition->tags, subtable->tag); |
c906cedf BP |
452 | return partition; |
453 | } | |
454 | ||
69d6040e JR |
455 | static inline ovs_be32 minimatch_get_ports(const struct minimatch *match) |
456 | { | |
457 | /* Could optimize to use the same map if needed for fast path. */ | |
458 | return MINIFLOW_GET_BE32(&match->flow, tp_src) | |
459 | & MINIFLOW_GET_BE32(&match->mask.masks, tp_src); | |
460 | } | |
461 | ||
f47eef15 JR |
462 | static void |
463 | subtable_replace_head_rule(struct classifier *cls OVS_UNUSED, | |
464 | struct cls_subtable *subtable, | |
465 | struct cls_match *head, struct cls_match *new, | |
466 | uint32_t hash, uint32_t ihash[CLS_MAX_INDICES]) | |
467 | OVS_REQUIRES(cls->mutex) | |
468 | { | |
469 | /* Rule's data is already in the tries. */ | |
470 | ||
471 | new->partition = head->partition; /* Steal partition, if any. */ | |
472 | head->partition = NULL; | |
473 | ||
474 | for (int i = 0; i < subtable->n_indices; i++) { | |
475 | cmap_replace(&subtable->indices[i], &head->index_nodes[i], | |
476 | &new->index_nodes[i], ihash[i]); | |
477 | } | |
478 | cmap_replace(&subtable->rules, &head->cmap_node, &new->cmap_node, hash); | |
479 | } | |
480 | ||
b5d97350 BP |
481 | /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller |
482 | * must not modify or free it. | |
064af421 BP |
483 | * |
484 | * If 'cls' already contains an identical rule (including wildcards, values of | |
485 | * fixed fields, and priority), replaces the old rule by 'rule' and returns the | |
486 | * rule that was replaced. The caller takes ownership of the returned rule and | |
f47eef15 JR |
487 | * is thus responsible for destroying it with cls_rule_destroy(), after RCU |
488 | * grace period has passed (see ovsrcu_postpone()). | |
064af421 BP |
489 | * |
490 | * Returns NULL if 'cls' does not contain a rule with an identical key, after | |
491 | * inserting the new rule. In this case, no rules are displaced by the new | |
492 | * rule, even rules that cannot have any effect because the new rule matches a | |
886af6ea JR |
493 | * superset of their flows and has higher priority. |
494 | */ | |
dfea28b3 | 495 | const struct cls_rule * |
e48eccd1 JR |
496 | classifier_replace(struct classifier *cls, struct cls_rule *rule) |
497 | OVS_EXCLUDED(cls->mutex) | |
064af421 | 498 | { |
886af6ea | 499 | struct cls_match *new = cls_match_alloc(rule); |
03868246 | 500 | struct cls_subtable *subtable; |
886af6ea JR |
501 | uint32_t ihash[CLS_MAX_INDICES]; |
502 | uint8_t prev_be32ofs = 0; | |
503 | struct cls_match *head; | |
f47eef15 | 504 | size_t n_rules = 0; |
886af6ea JR |
505 | uint32_t basis; |
506 | uint32_t hash; | |
507 | int i; | |
b5d97350 | 508 | |
e65413ab | 509 | ovs_mutex_lock(&cls->mutex); |
f47eef15 JR |
510 | rule->cls_match = new; |
511 | ||
03868246 JR |
512 | subtable = find_subtable(cls, &rule->match.mask); |
513 | if (!subtable) { | |
514 | subtable = insert_subtable(cls, &rule->match.mask); | |
b5d97350 BP |
515 | } |
516 | ||
f47eef15 | 517 | /* Compute hashes in segments. */ |
886af6ea JR |
518 | basis = 0; |
519 | for (i = 0; i < subtable->n_indices; i++) { | |
520 | ihash[i] = minimatch_hash_range(&rule->match, prev_be32ofs, | |
521 | subtable->index_ofs[i], &basis); | |
886af6ea JR |
522 | prev_be32ofs = subtable->index_ofs[i]; |
523 | } | |
524 | hash = minimatch_hash_range(&rule->match, prev_be32ofs, FLOW_U32S, &basis); | |
f47eef15 | 525 | |
886af6ea JR |
526 | head = find_equal(subtable, &rule->match.flow, hash); |
527 | if (!head) { | |
886af6ea JR |
528 | /* Add rule to tries. |
529 | * | |
530 | * Concurrent readers might miss seeing the rule until this update, | |
531 | * which might require being fixed up by revalidation later. */ | |
f47eef15 | 532 | for (i = 0; i < cls->n_tries; i++) { |
13751fd8 JR |
533 | if (subtable->trie_plen[i]) { |
534 | trie_insert(&cls->tries[i], rule, subtable->trie_plen[i]); | |
535 | } | |
536 | } | |
69d6040e | 537 | |
886af6ea | 538 | /* Add rule to ports trie. */ |
69d6040e JR |
539 | if (subtable->ports_mask_len) { |
540 | /* We mask the value to be inserted to always have the wildcarded | |
541 | * bits in known (zero) state, so we can include them in comparison | |
542 | * and they will always match (== their original value does not | |
543 | * matter). */ | |
544 | ovs_be32 masked_ports = minimatch_get_ports(&rule->match); | |
545 | ||
546 | trie_insert_prefix(&subtable->ports_trie, &masked_ports, | |
547 | subtable->ports_mask_len); | |
548 | } | |
886af6ea | 549 | |
f47eef15 | 550 | /* Add rule to partitions. |
886af6ea | 551 | * |
f47eef15 JR |
552 | * Concurrent readers might miss seeing the rule until this update, |
553 | * which might require being fixed up by revalidation later. */ | |
554 | new->partition = NULL; | |
555 | if (minimask_get_metadata_mask(&rule->match.mask) == OVS_BE64_MAX) { | |
556 | ovs_be64 metadata = miniflow_get_metadata(&rule->match.flow); | |
557 | ||
558 | new->partition = create_partition(cls, subtable, metadata); | |
559 | } | |
560 | ||
561 | /* Make rule visible to lookups. */ | |
562 | ||
563 | /* Add new node to segment indices. | |
564 | * | |
565 | * Readers may find the rule in the indices before the rule is visible | |
566 | * in the subtables 'rules' map. This may result in us losing the | |
567 | * opportunity to quit lookups earlier, resulting in sub-optimal | |
568 | * wildcarding. This will be fixed later by revalidation (always | |
569 | * scheduled after flow table changes). */ | |
886af6ea | 570 | for (i = 0; i < subtable->n_indices; i++) { |
f47eef15 JR |
571 | cmap_insert(&subtable->indices[i], &new->index_nodes[i], ihash[i]); |
572 | } | |
573 | n_rules = cmap_insert(&subtable->rules, &new->cmap_node, hash); | |
574 | } else { /* Equal rules exist in the classifier already. */ | |
575 | struct cls_match *iter; | |
576 | ||
577 | /* Scan the list for the insertion point that will keep the list in | |
578 | * order of decreasing priority. */ | |
579 | FOR_EACH_RULE_IN_LIST_PROTECTED (iter, head) { | |
580 | if (rule->priority >= iter->priority) { | |
581 | break; | |
582 | } | |
886af6ea JR |
583 | } |
584 | ||
f47eef15 JR |
585 | /* 'iter' now at the insertion point or NULL it at end. */ |
586 | if (iter) { | |
587 | struct cls_rule *old; | |
588 | ||
589 | if (rule->priority == iter->priority) { | |
590 | rculist_replace(&new->list, &iter->list); | |
591 | old = CONST_CAST(struct cls_rule *, iter->cls_rule); | |
592 | } else { | |
593 | rculist_insert(&iter->list, &new->list); | |
594 | old = NULL; | |
595 | } | |
596 | ||
597 | /* Replace the existing head in data structures, if rule is the new | |
598 | * head. */ | |
599 | if (iter == head) { | |
600 | subtable_replace_head_rule(cls, subtable, head, new, hash, | |
601 | ihash); | |
602 | } | |
603 | ||
604 | if (old) { | |
605 | ovsrcu_postpone(free, iter); | |
606 | old->cls_match = NULL; | |
f2c21402 | 607 | |
f47eef15 JR |
608 | /* No change in subtable's max priority or max count. */ |
609 | ||
610 | /* Return displaced rule. Caller is responsible for keeping it | |
611 | * around until all threads quiesce. */ | |
612 | ovs_mutex_unlock(&cls->mutex); | |
613 | return old; | |
614 | } | |
615 | } else { | |
616 | rculist_push_back(&head->list, &new->list); | |
617 | } | |
064af421 | 618 | } |
886af6ea | 619 | |
f47eef15 JR |
620 | /* Rule was added, not replaced. Update 'subtable's 'max_priority' and |
621 | * 'max_count', if necessary. | |
622 | * | |
623 | * The rule was already inserted, but concurrent readers may not see the | |
624 | * rule yet as the subtables vector is not updated yet. This will have to | |
625 | * be fixed by revalidation later. */ | |
626 | if (n_rules == 1) { | |
627 | subtable->max_priority = rule->priority; | |
628 | subtable->max_count = 1; | |
629 | pvector_insert(&cls->subtables, subtable, rule->priority); | |
630 | } else if (rule->priority == subtable->max_priority) { | |
631 | ++subtable->max_count; | |
632 | } else if (rule->priority > subtable->max_priority) { | |
633 | subtable->max_priority = rule->priority; | |
634 | subtable->max_count = 1; | |
635 | pvector_change_priority(&cls->subtables, subtable, rule->priority); | |
636 | } | |
637 | ||
638 | /* Nothing was replaced. */ | |
639 | cls->n_rules++; | |
e65413ab | 640 | ovs_mutex_unlock(&cls->mutex); |
f47eef15 | 641 | return NULL; |
064af421 BP |
642 | } |
643 | ||
08944c1d BP |
644 | /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller |
645 | * must not modify or free it. | |
646 | * | |
647 | * 'cls' must not contain an identical rule (including wildcards, values of | |
648 | * fixed fields, and priority). Use classifier_find_rule_exactly() to find | |
649 | * such a rule. */ | |
650 | void | |
651 | classifier_insert(struct classifier *cls, struct cls_rule *rule) | |
652 | { | |
dfea28b3 | 653 | const struct cls_rule *displaced_rule = classifier_replace(cls, rule); |
cb22974d | 654 | ovs_assert(!displaced_rule); |
08944c1d BP |
655 | } |
656 | ||
48d28ac1 BP |
657 | /* Removes 'rule' from 'cls'. It is the caller's responsibility to destroy |
658 | * 'rule' with cls_rule_destroy(), freeing the memory block in which 'rule' | |
747f140a JR |
659 | * resides, etc., as necessary. |
660 | * | |
661 | * Does nothing if 'rule' has been already removed, or was never inserted. | |
662 | * | |
663 | * Returns the removed rule, or NULL, if it was already removed. | |
664 | */ | |
dfea28b3 JR |
665 | const struct cls_rule * |
666 | classifier_remove(struct classifier *cls, const struct cls_rule *rule) | |
e48eccd1 | 667 | OVS_EXCLUDED(cls->mutex) |
064af421 | 668 | { |
c906cedf | 669 | struct cls_partition *partition; |
747f140a | 670 | struct cls_match *cls_match; |
03868246 | 671 | struct cls_subtable *subtable; |
f47eef15 JR |
672 | struct cls_match *prev; |
673 | struct cls_match *next; | |
476f36e8 | 674 | int i; |
f2c21402 JR |
675 | uint32_t basis = 0, hash, ihash[CLS_MAX_INDICES]; |
676 | uint8_t prev_be32ofs = 0; | |
f47eef15 | 677 | size_t n_rules; |
064af421 | 678 | |
e65413ab | 679 | ovs_mutex_lock(&cls->mutex); |
747f140a JR |
680 | cls_match = rule->cls_match; |
681 | if (!cls_match) { | |
682 | rule = NULL; | |
683 | goto unlock; /* Already removed. */ | |
684 | } | |
f47eef15 JR |
685 | /* Mark as removed. */ |
686 | CONST_CAST(struct cls_rule *, rule)->cls_match = NULL; | |
687 | ||
688 | INIT_CONTAINER(prev, rculist_back_protected(&cls_match->list), list); | |
689 | INIT_CONTAINER(next, rculist_next(&cls_match->list), list); | |
690 | ||
691 | /* Remove from the list of equal rules. */ | |
692 | rculist_remove(&cls_match->list); | |
693 | ||
694 | /* Check if this is NOT a head rule. */ | |
695 | if (prev->priority > rule->priority) { | |
696 | /* Not the highest priority rule, no need to check subtable's | |
697 | * 'max_priority'. */ | |
698 | goto free; | |
699 | } | |
747f140a | 700 | |
03868246 | 701 | subtable = find_subtable(cls, &rule->match.mask); |
627fb667 JR |
702 | ovs_assert(subtable); |
703 | ||
f47eef15 JR |
704 | for (i = 0; i < subtable->n_indices; i++) { |
705 | ihash[i] = minimatch_hash_range(&rule->match, prev_be32ofs, | |
706 | subtable->index_ofs[i], &basis); | |
707 | prev_be32ofs = subtable->index_ofs[i]; | |
708 | } | |
709 | hash = minimatch_hash_range(&rule->match, prev_be32ofs, FLOW_U32S, &basis); | |
710 | ||
711 | /* Head rule. Check if 'next' is an identical, lower-priority rule that | |
712 | * will replace 'rule' in the data structures. */ | |
713 | if (next->priority < rule->priority) { | |
714 | subtable_replace_head_rule(cls, subtable, cls_match, next, hash, | |
715 | ihash); | |
716 | goto check_priority; | |
717 | } | |
718 | ||
719 | /* 'rule' is last of the kind in the classifier, must remove from all the | |
720 | * data structures. */ | |
721 | ||
69d6040e JR |
722 | if (subtable->ports_mask_len) { |
723 | ovs_be32 masked_ports = minimatch_get_ports(&rule->match); | |
724 | ||
725 | trie_remove_prefix(&subtable->ports_trie, | |
726 | &masked_ports, subtable->ports_mask_len); | |
727 | } | |
13751fd8 JR |
728 | for (i = 0; i < cls->n_tries; i++) { |
729 | if (subtable->trie_plen[i]) { | |
730 | trie_remove(&cls->tries[i], rule, subtable->trie_plen[i]); | |
731 | } | |
732 | } | |
733 | ||
476f36e8 JR |
734 | /* Remove rule node from indices. */ |
735 | for (i = 0; i < subtable->n_indices; i++) { | |
f2c21402 JR |
736 | cmap_remove(&subtable->indices[i], &cls_match->index_nodes[i], |
737 | ihash[i]); | |
b5d97350 | 738 | } |
f47eef15 | 739 | n_rules = cmap_remove(&subtable->rules, &cls_match->cmap_node, hash); |
064af421 | 740 | |
627fb667 | 741 | partition = cls_match->partition; |
183126a1 BP |
742 | if (partition) { |
743 | tag_tracker_subtract(&partition->tracker, &partition->tags, | |
03868246 | 744 | subtable->tag); |
183126a1 | 745 | if (!partition->tags) { |
f2c21402 JR |
746 | cmap_remove(&cls->partitions, &partition->cmap_node, |
747 | hash_metadata(partition->metadata)); | |
748 | ovsrcu_postpone(free, partition); | |
183126a1 | 749 | } |
c906cedf BP |
750 | } |
751 | ||
f47eef15 | 752 | if (n_rules == 0) { |
03868246 | 753 | destroy_subtable(cls, subtable); |
f47eef15 JR |
754 | } else { |
755 | check_priority: | |
756 | if (subtable->max_priority == rule->priority | |
757 | && --subtable->max_count == 0) { | |
758 | /* Find the new 'max_priority' and 'max_count'. */ | |
759 | struct cls_match *head; | |
760 | int max_priority = INT_MIN; | |
761 | ||
762 | CMAP_FOR_EACH (head, cmap_node, &subtable->rules) { | |
763 | if (head->priority > max_priority) { | |
764 | max_priority = head->priority; | |
765 | subtable->max_count = 1; | |
766 | } else if (head->priority == max_priority) { | |
767 | ++subtable->max_count; | |
768 | } | |
fe7cfa5c | 769 | } |
f47eef15 JR |
770 | subtable->max_priority = max_priority; |
771 | pvector_change_priority(&cls->subtables, subtable, max_priority); | |
fe7cfa5c | 772 | } |
4d935a6b | 773 | } |
f47eef15 | 774 | free: |
f2c21402 | 775 | ovsrcu_postpone(free, cls_match); |
f47eef15 | 776 | cls->n_rules--; |
747f140a | 777 | unlock: |
e65413ab | 778 | ovs_mutex_unlock(&cls->mutex); |
747f140a JR |
779 | |
780 | return rule; | |
064af421 BP |
781 | } |
782 | ||
13751fd8 | 783 | /* Prefix tree context. Valid when 'lookup_done' is true. Can skip all |
c0bfb650 JR |
784 | * subtables which have a prefix match on the trie field, but whose prefix |
785 | * length is not indicated in 'match_plens'. For example, a subtable that | |
786 | * has a 8-bit trie field prefix match can be skipped if | |
787 | * !be_get_bit_at(&match_plens, 8 - 1). If skipped, 'maskbits' prefix bits | |
788 | * must be unwildcarded to make datapath flow only match packets it should. */ | |
13751fd8 JR |
789 | struct trie_ctx { |
790 | const struct cls_trie *trie; | |
791 | bool lookup_done; /* Status of the lookup. */ | |
792 | uint8_t be32ofs; /* U32 offset of the field in question. */ | |
13751fd8 | 793 | unsigned int maskbits; /* Prefix length needed to avoid false matches. */ |
c0bfb650 JR |
794 | union mf_value match_plens; /* Bitmask of prefix lengths with possible |
795 | * matches. */ | |
13751fd8 JR |
796 | }; |
797 | ||
798 | static void | |
799 | trie_ctx_init(struct trie_ctx *ctx, const struct cls_trie *trie) | |
800 | { | |
801 | ctx->trie = trie; | |
802 | ctx->be32ofs = trie->field->flow_be32ofs; | |
803 | ctx->lookup_done = false; | |
804 | } | |
805 | ||
48c3de13 BP |
806 | /* Finds and returns the highest-priority rule in 'cls' that matches 'flow'. |
807 | * Returns a null pointer if no rules in 'cls' match 'flow'. If multiple rules | |
74f74083 EJ |
808 | * of equal priority match 'flow', returns one arbitrarily. |
809 | * | |
810 | * If a rule is found and 'wc' is non-null, bitwise-OR's 'wc' with the | |
811 | * set of bits that were significant in the lookup. At some point | |
812 | * earlier, 'wc' should have been initialized (e.g., by | |
813 | * flow_wildcards_init_catchall()). */ | |
dfea28b3 | 814 | const struct cls_rule * |
e48eccd1 | 815 | classifier_lookup(const struct classifier *cls, const struct flow *flow, |
74f74083 | 816 | struct flow_wildcards *wc) |
48c3de13 | 817 | { |
c906cedf | 818 | const struct cls_partition *partition; |
c906cedf | 819 | tag_type tags; |
eb391b76 | 820 | int best_priority = INT_MIN; |
fe7cfa5c JR |
821 | const struct cls_match *best; |
822 | struct trie_ctx trie_ctx[CLS_MAX_TRIES]; | |
823 | struct cls_subtable *subtable; | |
c906cedf | 824 | |
f358a2cb JR |
825 | /* Synchronize for cls->n_tries and subtable->trie_plen. They can change |
826 | * when table configuration changes, which happens typically only on | |
827 | * startup. */ | |
828 | atomic_thread_fence(memory_order_acquire); | |
829 | ||
03868246 JR |
830 | /* Determine 'tags' such that, if 'subtable->tag' doesn't intersect them, |
831 | * then 'flow' cannot possibly match in 'subtable': | |
c906cedf BP |
832 | * |
833 | * - If flow->metadata maps to a given 'partition', then we can use | |
834 | * 'tags' for 'partition->tags'. | |
835 | * | |
836 | * - If flow->metadata has no partition, then no rule in 'cls' has an | |
837 | * exact-match for flow->metadata. That means that we don't need to | |
03868246 | 838 | * search any subtable that includes flow->metadata in its mask. |
c906cedf | 839 | * |
03868246 | 840 | * In either case, we always need to search any cls_subtables that do not |
c906cedf | 841 | * include flow->metadata in its mask. One way to do that would be to |
03868246 JR |
842 | * check the "cls_subtable"s explicitly for that, but that would require an |
843 | * extra branch per subtable. Instead, we mark such a cls_subtable's | |
844 | * 'tags' as TAG_ALL and make sure that 'tags' is never empty. This means | |
845 | * that 'tags' always intersects such a cls_subtable's 'tags', so we don't | |
846 | * need a special case. | |
c906cedf | 847 | */ |
f2c21402 | 848 | partition = (cmap_is_empty(&cls->partitions) |
c906cedf BP |
849 | ? NULL |
850 | : find_partition(cls, flow->metadata, | |
851 | hash_metadata(flow->metadata))); | |
852 | tags = partition ? partition->tags : TAG_ARBITRARY; | |
48c3de13 | 853 | |
ff8241db | 854 | /* Initialize trie contexts for find_match_wc(). */ |
fe7cfa5c | 855 | for (int i = 0; i < cls->n_tries; i++) { |
13751fd8 JR |
856 | trie_ctx_init(&trie_ctx[i], &cls->tries[i]); |
857 | } | |
ec988646 | 858 | |
b5d97350 | 859 | best = NULL; |
fe7cfa5c JR |
860 | PVECTOR_FOR_EACH_PRIORITY(subtable, best_priority, 2, |
861 | sizeof(struct cls_subtable), &cls->subtables) { | |
dfea28b3 | 862 | const struct cls_match *rule; |
c906cedf | 863 | |
fe7cfa5c | 864 | if (!tag_intersects(tags, subtable->tag)) { |
c906cedf BP |
865 | continue; |
866 | } | |
74f74083 | 867 | |
fe7cfa5c | 868 | rule = find_match_wc(subtable, flow, trie_ctx, cls->n_tries, wc); |
eb391b76 BP |
869 | if (rule && rule->priority > best_priority) { |
870 | best_priority = rule->priority; | |
1f3c5efc | 871 | best = rule; |
b5d97350 | 872 | } |
48c3de13 | 873 | } |
13751fd8 | 874 | |
627fb667 | 875 | return best ? best->cls_rule : NULL; |
48c3de13 BP |
876 | } |
877 | ||
b5d97350 BP |
878 | /* Finds and returns a rule in 'cls' with exactly the same priority and |
879 | * matching criteria as 'target'. Returns a null pointer if 'cls' doesn't | |
c084ce1d | 880 | * contain an exact match. */ |
dfea28b3 | 881 | const struct cls_rule * |
e48eccd1 | 882 | classifier_find_rule_exactly(const struct classifier *cls, |
76ecc721 | 883 | const struct cls_rule *target) |
064af421 | 884 | { |
dfea28b3 JR |
885 | const struct cls_match *head, *rule; |
886 | const struct cls_subtable *subtable; | |
064af421 | 887 | |
03868246 | 888 | subtable = find_subtable(cls, &target->match.mask); |
0722ee5c | 889 | if (!subtable) { |
98abae4a | 890 | return NULL; |
4d935a6b JR |
891 | } |
892 | ||
03868246 | 893 | head = find_equal(subtable, &target->match.flow, |
5cb7a798 BP |
894 | miniflow_hash_in_minimask(&target->match.flow, |
895 | &target->match.mask, 0)); | |
98abae4a JR |
896 | if (!head) { |
897 | return NULL; | |
898 | } | |
b5d97350 BP |
899 | FOR_EACH_RULE_IN_LIST (rule, head) { |
900 | if (target->priority >= rule->priority) { | |
627fb667 | 901 | return target->priority == rule->priority ? rule->cls_rule : NULL; |
064af421 BP |
902 | } |
903 | } | |
904 | return NULL; | |
905 | } | |
906 | ||
81a76618 BP |
907 | /* Finds and returns a rule in 'cls' with priority 'priority' and exactly the |
908 | * same matching criteria as 'target'. Returns a null pointer if 'cls' doesn't | |
909 | * contain an exact match. */ | |
dfea28b3 | 910 | const struct cls_rule * |
81a76618 | 911 | classifier_find_match_exactly(const struct classifier *cls, |
eb391b76 | 912 | const struct match *target, int priority) |
81a76618 | 913 | { |
dfea28b3 | 914 | const struct cls_rule *retval; |
81a76618 BP |
915 | struct cls_rule cr; |
916 | ||
917 | cls_rule_init(&cr, target, priority); | |
918 | retval = classifier_find_rule_exactly(cls, &cr); | |
48d28ac1 | 919 | cls_rule_destroy(&cr); |
81a76618 BP |
920 | |
921 | return retval; | |
922 | } | |
923 | ||
faa50f40 BP |
924 | /* Checks if 'target' would overlap any other rule in 'cls'. Two rules are |
925 | * considered to overlap if both rules have the same priority and a packet | |
926 | * could match both. */ | |
49bdc010 | 927 | bool |
e48eccd1 | 928 | classifier_rule_overlaps(const struct classifier *cls, |
faa50f40 | 929 | const struct cls_rule *target) |
e48eccd1 | 930 | OVS_EXCLUDED(cls->mutex) |
49bdc010 | 931 | { |
03868246 | 932 | struct cls_subtable *subtable; |
49bdc010 | 933 | |
e65413ab | 934 | ovs_mutex_lock(&cls->mutex); |
03868246 | 935 | /* Iterate subtables in the descending max priority order. */ |
eb391b76 | 936 | PVECTOR_FOR_EACH_PRIORITY (subtable, target->priority - 1, 2, |
fe7cfa5c | 937 | sizeof(struct cls_subtable), &cls->subtables) { |
5cb7a798 BP |
938 | uint32_t storage[FLOW_U32S]; |
939 | struct minimask mask; | |
627fb667 | 940 | struct cls_match *head; |
49bdc010 | 941 | |
03868246 | 942 | minimask_combine(&mask, &target->match.mask, &subtable->mask, storage); |
f2c21402 | 943 | CMAP_FOR_EACH (head, cmap_node, &subtable->rules) { |
627fb667 | 944 | struct cls_match *rule; |
49bdc010 | 945 | |
dfea28b3 | 946 | FOR_EACH_RULE_IN_LIST_PROTECTED (rule, head) { |
4d935a6b JR |
947 | if (rule->priority < target->priority) { |
948 | break; /* Rules in descending priority order. */ | |
949 | } | |
faa50f40 | 950 | if (rule->priority == target->priority |
5cb7a798 | 951 | && miniflow_equal_in_minimask(&target->match.flow, |
3016f3e4 | 952 | &rule->flow, &mask)) { |
e65413ab | 953 | ovs_mutex_unlock(&cls->mutex); |
49bdc010 JP |
954 | return true; |
955 | } | |
956 | } | |
957 | } | |
958 | } | |
959 | ||
e65413ab | 960 | ovs_mutex_unlock(&cls->mutex); |
49bdc010 JP |
961 | return false; |
962 | } | |
6ceeaa92 BP |
963 | |
964 | /* Returns true if 'rule' exactly matches 'criteria' or if 'rule' is more | |
965 | * specific than 'criteria'. That is, 'rule' matches 'criteria' and this | |
966 | * function returns true if, for every field: | |
967 | * | |
968 | * - 'criteria' and 'rule' specify the same (non-wildcarded) value for the | |
969 | * field, or | |
970 | * | |
971 | * - 'criteria' wildcards the field, | |
972 | * | |
973 | * Conversely, 'rule' does not match 'criteria' and this function returns false | |
974 | * if, for at least one field: | |
975 | * | |
976 | * - 'criteria' and 'rule' specify different values for the field, or | |
977 | * | |
978 | * - 'criteria' specifies a value for the field but 'rule' wildcards it. | |
979 | * | |
980 | * Equivalently, the truth table for whether a field matches is: | |
981 | * | |
982 | * rule | |
983 | * | |
984 | * c wildcard exact | |
985 | * r +---------+---------+ | |
986 | * i wild | yes | yes | | |
987 | * t card | | | | |
988 | * e +---------+---------+ | |
989 | * r exact | no |if values| | |
990 | * i | |are equal| | |
991 | * a +---------+---------+ | |
992 | * | |
993 | * This is the matching rule used by OpenFlow 1.0 non-strict OFPT_FLOW_MOD | |
994 | * commands and by OpenFlow 1.0 aggregate and flow stats. | |
995 | * | |
81a76618 | 996 | * Ignores rule->priority. */ |
6ceeaa92 BP |
997 | bool |
998 | cls_rule_is_loose_match(const struct cls_rule *rule, | |
5cb7a798 | 999 | const struct minimatch *criteria) |
6ceeaa92 | 1000 | { |
5cb7a798 BP |
1001 | return (!minimask_has_extra(&rule->match.mask, &criteria->mask) |
1002 | && miniflow_equal_in_minimask(&rule->match.flow, &criteria->flow, | |
1003 | &criteria->mask)); | |
6ceeaa92 | 1004 | } |
b5d97350 | 1005 | \f |
5ecc9d81 BP |
1006 | /* Iteration. */ |
1007 | ||
1008 | static bool | |
627fb667 | 1009 | rule_matches(const struct cls_match *rule, const struct cls_rule *target) |
5ecc9d81 BP |
1010 | { |
1011 | return (!target | |
3016f3e4 | 1012 | || miniflow_equal_in_minimask(&rule->flow, |
5cb7a798 BP |
1013 | &target->match.flow, |
1014 | &target->match.mask)); | |
5ecc9d81 BP |
1015 | } |
1016 | ||
dfea28b3 | 1017 | static const struct cls_match * |
03868246 | 1018 | search_subtable(const struct cls_subtable *subtable, |
f2c21402 | 1019 | struct cls_cursor *cursor) |
5ecc9d81 | 1020 | { |
f2c21402 JR |
1021 | if (!cursor->target |
1022 | || !minimask_has_extra(&subtable->mask, &cursor->target->match.mask)) { | |
dfea28b3 | 1023 | const struct cls_match *rule; |
5ecc9d81 | 1024 | |
f2c21402 JR |
1025 | CMAP_CURSOR_FOR_EACH (rule, cmap_node, &cursor->rules, |
1026 | &subtable->rules) { | |
1027 | if (rule_matches(rule, cursor->target)) { | |
5ecc9d81 BP |
1028 | return rule; |
1029 | } | |
1030 | } | |
1031 | } | |
1032 | return NULL; | |
1033 | } | |
1034 | ||
5f0476ce JR |
1035 | /* Initializes 'cursor' for iterating through rules in 'cls', and returns the |
1036 | * first matching cls_rule via '*pnode', or NULL if there are no matches. | |
5ecc9d81 | 1037 | * |
6ceeaa92 | 1038 | * - If 'target' is null, the cursor will visit every rule in 'cls'. |
5ecc9d81 | 1039 | * |
6ceeaa92 BP |
1040 | * - If 'target' is nonnull, the cursor will visit each 'rule' in 'cls' |
1041 | * such that cls_rule_is_loose_match(rule, target) returns true. | |
5ecc9d81 | 1042 | * |
6ceeaa92 | 1043 | * Ignores target->priority. */ |
78c8df12 BP |
1044 | struct cls_cursor cls_cursor_start(const struct classifier *cls, |
1045 | const struct cls_rule *target, | |
1046 | bool safe) | |
5f0476ce | 1047 | OVS_NO_THREAD_SAFETY_ANALYSIS |
5ecc9d81 | 1048 | { |
5f0476ce | 1049 | struct cls_cursor cursor; |
03868246 | 1050 | struct cls_subtable *subtable; |
5ecc9d81 | 1051 | |
5f0476ce | 1052 | cursor.safe = safe; |
e48eccd1 | 1053 | cursor.cls = cls; |
5f0476ce | 1054 | cursor.target = target && !cls_rule_is_catchall(target) ? target : NULL; |
78c8df12 | 1055 | cursor.rule = NULL; |
5f0476ce JR |
1056 | |
1057 | /* Find first rule. */ | |
e65413ab | 1058 | ovs_mutex_lock(&cursor.cls->mutex); |
5f0476ce | 1059 | CMAP_CURSOR_FOR_EACH (subtable, cmap_node, &cursor.subtables, |
e65413ab | 1060 | &cursor.cls->subtables_map) { |
dfea28b3 | 1061 | const struct cls_match *rule = search_subtable(subtable, &cursor); |
f2c21402 | 1062 | |
5ecc9d81 | 1063 | if (rule) { |
5f0476ce | 1064 | cursor.subtable = subtable; |
78c8df12 | 1065 | cursor.rule = rule->cls_rule; |
5f0476ce | 1066 | break; |
5ecc9d81 BP |
1067 | } |
1068 | } | |
1069 | ||
5f0476ce | 1070 | /* Leave locked if requested and have a rule. */ |
78c8df12 | 1071 | if (safe || !cursor.rule) { |
e65413ab | 1072 | ovs_mutex_unlock(&cursor.cls->mutex); |
5f0476ce JR |
1073 | } |
1074 | return cursor; | |
1075 | } | |
1076 | ||
dfea28b3 | 1077 | static const struct cls_rule * |
1caa1561 | 1078 | cls_cursor_next(struct cls_cursor *cursor) |
5f0476ce | 1079 | OVS_NO_THREAD_SAFETY_ANALYSIS |
5ecc9d81 | 1080 | { |
dfea28b3 | 1081 | const struct cls_match *rule = cursor->rule->cls_match; |
03868246 | 1082 | const struct cls_subtable *subtable; |
dfea28b3 | 1083 | const struct cls_match *next; |
5ecc9d81 | 1084 | |
955f579d BP |
1085 | next = next_rule_in_list__(rule); |
1086 | if (next->priority < rule->priority) { | |
1caa1561 | 1087 | return next->cls_rule; |
5ecc9d81 BP |
1088 | } |
1089 | ||
955f579d | 1090 | /* 'next' is the head of the list, that is, the rule that is included in |
f2c21402 | 1091 | * the subtable's map. (This is important when the classifier contains |
03868246 | 1092 | * rules that differ only in priority.) */ |
955f579d | 1093 | rule = next; |
f2c21402 | 1094 | CMAP_CURSOR_FOR_EACH_CONTINUE (rule, cmap_node, &cursor->rules) { |
5ecc9d81 | 1095 | if (rule_matches(rule, cursor->target)) { |
1caa1561 | 1096 | return rule->cls_rule; |
5ecc9d81 BP |
1097 | } |
1098 | } | |
1099 | ||
03868246 | 1100 | subtable = cursor->subtable; |
f2c21402 JR |
1101 | CMAP_CURSOR_FOR_EACH_CONTINUE (subtable, cmap_node, &cursor->subtables) { |
1102 | rule = search_subtable(subtable, cursor); | |
5ecc9d81 | 1103 | if (rule) { |
03868246 | 1104 | cursor->subtable = subtable; |
1caa1561 | 1105 | return rule->cls_rule; |
5ecc9d81 BP |
1106 | } |
1107 | } | |
1108 | ||
1caa1561 BP |
1109 | return NULL; |
1110 | } | |
1111 | ||
1112 | /* Sets 'cursor->rule' to the next matching cls_rule in 'cursor''s iteration, | |
1113 | * or to null if all matching rules have been visited. */ | |
1114 | void | |
1115 | cls_cursor_advance(struct cls_cursor *cursor) | |
1116 | OVS_NO_THREAD_SAFETY_ANALYSIS | |
1117 | { | |
1118 | if (cursor->safe) { | |
1119 | ovs_mutex_lock(&cursor->cls->mutex); | |
1120 | } | |
1121 | cursor->rule = cls_cursor_next(cursor); | |
1122 | if (cursor->safe || !cursor->rule) { | |
1123 | ovs_mutex_unlock(&cursor->cls->mutex); | |
1124 | } | |
5ecc9d81 BP |
1125 | } |
1126 | \f | |
03868246 | 1127 | static struct cls_subtable * |
e48eccd1 | 1128 | find_subtable(const struct classifier *cls, const struct minimask *mask) |
b5d97350 | 1129 | { |
03868246 | 1130 | struct cls_subtable *subtable; |
064af421 | 1131 | |
f2c21402 | 1132 | CMAP_FOR_EACH_WITH_HASH (subtable, cmap_node, minimask_hash(mask, 0), |
5a87054c | 1133 | &cls->subtables_map) { |
03868246 JR |
1134 | if (minimask_equal(mask, &subtable->mask)) { |
1135 | return subtable; | |
064af421 BP |
1136 | } |
1137 | } | |
b5d97350 | 1138 | return NULL; |
064af421 | 1139 | } |
064af421 | 1140 | |
e65413ab | 1141 | /* The new subtable will be visible to the readers only after this. */ |
03868246 | 1142 | static struct cls_subtable * |
e48eccd1 | 1143 | insert_subtable(struct classifier *cls, const struct minimask *mask) |
e65413ab | 1144 | OVS_REQUIRES(cls->mutex) |
b5d97350 | 1145 | { |
c906cedf | 1146 | uint32_t hash = minimask_hash(mask, 0); |
03868246 | 1147 | struct cls_subtable *subtable; |
476f36e8 JR |
1148 | int i, index = 0; |
1149 | struct flow_wildcards old, new; | |
1150 | uint8_t prev; | |
3016f3e4 | 1151 | int count = count_1bits(mask->masks.map); |
064af421 | 1152 | |
3016f3e4 JR |
1153 | subtable = xzalloc(sizeof *subtable - sizeof mask->masks.inline_values |
1154 | + MINIFLOW_VALUES_SIZE(count)); | |
f2c21402 | 1155 | cmap_init(&subtable->rules); |
f80028fe JR |
1156 | miniflow_clone_inline(CONST_CAST(struct miniflow *, &subtable->mask.masks), |
1157 | &mask->masks, count); | |
476f36e8 JR |
1158 | |
1159 | /* Init indices for segmented lookup, if any. */ | |
1160 | flow_wildcards_init_catchall(&new); | |
1161 | old = new; | |
1162 | prev = 0; | |
1163 | for (i = 0; i < cls->n_flow_segments; i++) { | |
1164 | flow_wildcards_fold_minimask_range(&new, mask, prev, | |
1165 | cls->flow_segments[i]); | |
1166 | /* Add an index if it adds mask bits. */ | |
1167 | if (!flow_wildcards_equal(&new, &old)) { | |
f2c21402 | 1168 | cmap_init(&subtable->indices[index]); |
f80028fe JR |
1169 | *CONST_CAST(uint8_t *, &subtable->index_ofs[index]) |
1170 | = cls->flow_segments[i]; | |
476f36e8 JR |
1171 | index++; |
1172 | old = new; | |
1173 | } | |
1174 | prev = cls->flow_segments[i]; | |
1175 | } | |
1176 | /* Check if the rest of the subtable's mask adds any bits, | |
1177 | * and remove the last index if it doesn't. */ | |
1178 | if (index > 0) { | |
1179 | flow_wildcards_fold_minimask_range(&new, mask, prev, FLOW_U32S); | |
1180 | if (flow_wildcards_equal(&new, &old)) { | |
1181 | --index; | |
f80028fe | 1182 | *CONST_CAST(uint8_t *, &subtable->index_ofs[index]) = 0; |
f2c21402 | 1183 | cmap_destroy(&subtable->indices[index]); |
476f36e8 JR |
1184 | } |
1185 | } | |
f80028fe | 1186 | *CONST_CAST(uint8_t *, &subtable->n_indices) = index; |
476f36e8 | 1187 | |
f80028fe JR |
1188 | *CONST_CAST(tag_type *, &subtable->tag) = |
1189 | (minimask_get_metadata_mask(mask) == OVS_BE64_MAX | |
1190 | ? tag_create_deterministic(hash) | |
1191 | : TAG_ALL); | |
064af421 | 1192 | |
13751fd8 JR |
1193 | for (i = 0; i < cls->n_tries; i++) { |
1194 | subtable->trie_plen[i] = minimask_get_prefix_len(mask, | |
1195 | cls->tries[i].field); | |
1196 | } | |
1197 | ||
69d6040e | 1198 | /* Ports trie. */ |
f358a2cb | 1199 | ovsrcu_set_hidden(&subtable->ports_trie, NULL); |
f80028fe | 1200 | *CONST_CAST(int *, &subtable->ports_mask_len) |
69d6040e JR |
1201 | = 32 - ctz32(ntohl(MINIFLOW_GET_BE32(&mask->masks, tp_src))); |
1202 | ||
f2c21402 | 1203 | cmap_insert(&cls->subtables_map, &subtable->cmap_node, hash); |
ec988646 | 1204 | |
03868246 | 1205 | return subtable; |
064af421 BP |
1206 | } |
1207 | ||
01c0f83a | 1208 | /* RCU readers may still access the subtable before it is actually freed. */ |
b5d97350 | 1209 | static void |
e48eccd1 | 1210 | destroy_subtable(struct classifier *cls, struct cls_subtable *subtable) |
e65413ab | 1211 | OVS_REQUIRES(cls->mutex) |
b5d97350 | 1212 | { |
476f36e8 JR |
1213 | int i; |
1214 | ||
fe7cfa5c | 1215 | pvector_remove(&cls->subtables, subtable); |
01c0f83a JR |
1216 | cmap_remove(&cls->subtables_map, &subtable->cmap_node, |
1217 | minimask_hash(&subtable->mask, 0)); | |
1218 | ||
1219 | ovs_assert(ovsrcu_get_protected(struct trie_node *, &subtable->ports_trie) | |
1220 | == NULL); | |
1221 | ovs_assert(cmap_is_empty(&subtable->rules)); | |
69d6040e | 1222 | |
476f36e8 | 1223 | for (i = 0; i < subtable->n_indices; i++) { |
f2c21402 | 1224 | cmap_destroy(&subtable->indices[i]); |
476f36e8 | 1225 | } |
f2c21402 | 1226 | cmap_destroy(&subtable->rules); |
fe7cfa5c | 1227 | ovsrcu_postpone(free, subtable); |
4aacd02d BP |
1228 | } |
1229 | ||
13751fd8 JR |
1230 | struct range { |
1231 | uint8_t start; | |
1232 | uint8_t end; | |
1233 | }; | |
1234 | ||
c0bfb650 JR |
1235 | static unsigned int be_get_bit_at(const ovs_be32 value[], unsigned int ofs); |
1236 | ||
13751fd8 JR |
1237 | /* Return 'true' if can skip rest of the subtable based on the prefix trie |
1238 | * lookup results. */ | |
1239 | static inline bool | |
1240 | check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries, | |
1241 | const unsigned int field_plen[CLS_MAX_TRIES], | |
1242 | const struct range ofs, const struct flow *flow, | |
1243 | struct flow_wildcards *wc) | |
1244 | { | |
1245 | int j; | |
1246 | ||
1247 | /* Check if we could avoid fully unwildcarding the next level of | |
1248 | * fields using the prefix tries. The trie checks are done only as | |
1249 | * needed to avoid folding in additional bits to the wildcards mask. */ | |
1250 | for (j = 0; j < n_tries; j++) { | |
1251 | /* Is the trie field relevant for this subtable? */ | |
1252 | if (field_plen[j]) { | |
1253 | struct trie_ctx *ctx = &trie_ctx[j]; | |
1254 | uint8_t be32ofs = ctx->be32ofs; | |
1255 | ||
1256 | /* Is the trie field within the current range of fields? */ | |
1257 | if (be32ofs >= ofs.start && be32ofs < ofs.end) { | |
1258 | /* On-demand trie lookup. */ | |
1259 | if (!ctx->lookup_done) { | |
c0bfb650 JR |
1260 | memset(&ctx->match_plens, 0, sizeof ctx->match_plens); |
1261 | ctx->maskbits = trie_lookup(ctx->trie, flow, | |
1262 | &ctx->match_plens); | |
13751fd8 JR |
1263 | ctx->lookup_done = true; |
1264 | } | |
1265 | /* Possible to skip the rest of the subtable if subtable's | |
c0bfb650 JR |
1266 | * prefix on the field is not included in the lookup result. */ |
1267 | if (!be_get_bit_at(&ctx->match_plens.be32, field_plen[j] - 1)) { | |
1817dcea JR |
1268 | /* We want the trie lookup to never result in unwildcarding |
1269 | * any bits that would not be unwildcarded otherwise. | |
1270 | * Since the trie is shared by the whole classifier, it is | |
1271 | * possible that the 'maskbits' contain bits that are | |
1272 | * irrelevant for the partition relevant for the current | |
1273 | * packet. Hence the checks below. */ | |
13751fd8 | 1274 | |
13751fd8 | 1275 | /* Check that the trie result will not unwildcard more bits |
1817dcea | 1276 | * than this subtable would otherwise. */ |
13751fd8 JR |
1277 | if (ctx->maskbits <= field_plen[j]) { |
1278 | /* Unwildcard the bits and skip the rest. */ | |
1279 | mask_set_prefix_bits(wc, be32ofs, ctx->maskbits); | |
1280 | /* Note: Prerequisite already unwildcarded, as the only | |
1281 | * prerequisite of the supported trie lookup fields is | |
1817dcea JR |
1282 | * the ethertype, which is always unwildcarded. */ |
1283 | return true; | |
1284 | } | |
1285 | /* Can skip if the field is already unwildcarded. */ | |
1286 | if (mask_prefix_bits_set(wc, be32ofs, ctx->maskbits)) { | |
13751fd8 JR |
1287 | return true; |
1288 | } | |
1289 | } | |
1290 | } | |
1291 | } | |
1292 | } | |
1293 | return false; | |
1294 | } | |
1295 | ||
3016f3e4 JR |
1296 | /* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit |
1297 | * for which 'flow', for which 'mask' has a bit set, specifies a particular | |
1298 | * value has the correct value in 'target'. | |
1299 | * | |
1300 | * This function is equivalent to miniflow_equal_flow_in_minimask(flow, | |
a64759f0 JR |
1301 | * target, mask) but this is faster because of the invariant that |
1302 | * flow->map and mask->masks.map are the same, and that this version | |
1303 | * takes the 'wc'. */ | |
3016f3e4 JR |
1304 | static inline bool |
1305 | miniflow_and_mask_matches_flow(const struct miniflow *flow, | |
1306 | const struct minimask *mask, | |
e9319757 | 1307 | const struct flow *target) |
3016f3e4 JR |
1308 | { |
1309 | const uint32_t *flowp = miniflow_get_u32_values(flow); | |
1310 | const uint32_t *maskp = miniflow_get_u32_values(&mask->masks); | |
a64759f0 | 1311 | uint32_t idx; |
3016f3e4 | 1312 | |
a64759f0 JR |
1313 | MAP_FOR_EACH_INDEX(idx, mask->masks.map) { |
1314 | uint32_t diff = (*flowp++ ^ flow_u32_value(target, idx)) & *maskp++; | |
1315 | ||
1316 | if (diff) { | |
3016f3e4 JR |
1317 | return false; |
1318 | } | |
1319 | } | |
1320 | ||
1321 | return true; | |
1322 | } | |
1323 | ||
dfea28b3 | 1324 | static inline const struct cls_match * |
476f36e8 JR |
1325 | find_match(const struct cls_subtable *subtable, const struct flow *flow, |
1326 | uint32_t hash) | |
b5d97350 | 1327 | { |
dfea28b3 | 1328 | const struct cls_match *rule; |
b5d97350 | 1329 | |
f2c21402 | 1330 | CMAP_FOR_EACH_WITH_HASH (rule, cmap_node, hash, &subtable->rules) { |
3016f3e4 | 1331 | if (miniflow_and_mask_matches_flow(&rule->flow, &subtable->mask, |
e9319757 | 1332 | flow)) { |
b5d97350 | 1333 | return rule; |
064af421 BP |
1334 | } |
1335 | } | |
c23740be | 1336 | |
064af421 BP |
1337 | return NULL; |
1338 | } | |
1339 | ||
e9319757 JR |
1340 | /* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit |
1341 | * for which 'flow', for which 'mask' has a bit set, specifies a particular | |
1342 | * value has the correct value in 'target'. | |
1343 | * | |
1344 | * This function is equivalent to miniflow_and_mask_matches_flow() but this | |
1345 | * version fills in the mask bits in 'wc'. */ | |
1346 | static inline bool | |
1347 | miniflow_and_mask_matches_flow_wc(const struct miniflow *flow, | |
1348 | const struct minimask *mask, | |
1349 | const struct flow *target, | |
1350 | struct flow_wildcards *wc) | |
1351 | { | |
1352 | const uint32_t *flowp = miniflow_get_u32_values(flow); | |
1353 | const uint32_t *maskp = miniflow_get_u32_values(&mask->masks); | |
1354 | uint32_t idx; | |
1355 | ||
1356 | MAP_FOR_EACH_INDEX(idx, mask->masks.map) { | |
1357 | uint32_t mask = *maskp++; | |
1358 | uint32_t diff = (*flowp++ ^ flow_u32_value(target, idx)) & mask; | |
1359 | ||
1360 | if (diff) { | |
1361 | /* Only unwildcard if none of the differing bits is already | |
1362 | * exact-matched. */ | |
1363 | if (!(flow_u32_value(&wc->masks, idx) & diff)) { | |
1364 | /* Keep one bit of the difference. */ | |
1365 | *flow_u32_lvalue(&wc->masks, idx) |= rightmost_1bit(diff); | |
1366 | } | |
1367 | return false; | |
1368 | } | |
1369 | /* Fill in the bits that were looked at. */ | |
1370 | *flow_u32_lvalue(&wc->masks, idx) |= mask; | |
1371 | } | |
1372 | ||
1373 | return true; | |
1374 | } | |
1375 | ||
386cb9f7 JR |
1376 | /* Unwildcard the fields looked up so far, if any. */ |
1377 | static void | |
1378 | fill_range_wc(const struct cls_subtable *subtable, struct flow_wildcards *wc, | |
1379 | uint8_t to) | |
1380 | { | |
1381 | if (to) { | |
1382 | flow_wildcards_fold_minimask_range(wc, &subtable->mask, 0, to); | |
1383 | } | |
1384 | } | |
1385 | ||
dfea28b3 | 1386 | static const struct cls_match * |
476f36e8 | 1387 | find_match_wc(const struct cls_subtable *subtable, const struct flow *flow, |
13751fd8 JR |
1388 | struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries, |
1389 | struct flow_wildcards *wc) | |
476f36e8 JR |
1390 | { |
1391 | uint32_t basis = 0, hash; | |
dfea28b3 | 1392 | const struct cls_match *rule = NULL; |
476f36e8 | 1393 | int i; |
13751fd8 | 1394 | struct range ofs; |
476f36e8 | 1395 | |
ec988646 | 1396 | if (OVS_UNLIKELY(!wc)) { |
476f36e8 JR |
1397 | return find_match(subtable, flow, |
1398 | flow_hash_in_minimask(flow, &subtable->mask, 0)); | |
1399 | } | |
1400 | ||
13751fd8 | 1401 | ofs.start = 0; |
476f36e8 JR |
1402 | /* Try to finish early by checking fields in segments. */ |
1403 | for (i = 0; i < subtable->n_indices; i++) { | |
55847abe | 1404 | const struct cmap_node *inode; |
f2c21402 | 1405 | |
13751fd8 | 1406 | ofs.end = subtable->index_ofs[i]; |
476f36e8 | 1407 | |
13751fd8 JR |
1408 | if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow, |
1409 | wc)) { | |
386cb9f7 JR |
1410 | /* 'wc' bits for the trie field set, now unwildcard the preceding |
1411 | * bits used so far. */ | |
1412 | fill_range_wc(subtable, wc, ofs.start); | |
1413 | return NULL; | |
13751fd8 JR |
1414 | } |
1415 | hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start, | |
1416 | ofs.end, &basis); | |
f2c21402 | 1417 | inode = cmap_find(&subtable->indices[i], hash); |
476f36e8 | 1418 | if (!inode) { |
386cb9f7 JR |
1419 | /* No match, can stop immediately, but must fold in the bits |
1420 | * used in lookup so far. */ | |
1421 | fill_range_wc(subtable, wc, ofs.end); | |
1422 | return NULL; | |
476f36e8 JR |
1423 | } |
1424 | ||
1425 | /* If we have narrowed down to a single rule already, check whether | |
a64759f0 | 1426 | * that rule matches. Either way, we're done. |
476f36e8 JR |
1427 | * |
1428 | * (Rare) hash collisions may cause us to miss the opportunity for this | |
1429 | * optimization. */ | |
f2c21402 | 1430 | if (!cmap_node_next(inode)) { |
476f36e8 | 1431 | ASSIGN_CONTAINER(rule, inode - i, index_nodes); |
e9319757 JR |
1432 | if (miniflow_and_mask_matches_flow_wc(&rule->flow, &subtable->mask, |
1433 | flow, wc)) { | |
1434 | return rule; | |
476f36e8 | 1435 | } |
e9319757 | 1436 | return NULL; |
476f36e8 | 1437 | } |
386cb9f7 | 1438 | ofs.start = ofs.end; |
476f36e8 | 1439 | } |
13751fd8 JR |
1440 | ofs.end = FLOW_U32S; |
1441 | /* Trie check for the final range. */ | |
1442 | if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow, wc)) { | |
386cb9f7 JR |
1443 | fill_range_wc(subtable, wc, ofs.start); |
1444 | return NULL; | |
13751fd8 | 1445 | } |
a64759f0 JR |
1446 | hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start, |
1447 | ofs.end, &basis); | |
1448 | rule = find_match(subtable, flow, hash); | |
69d6040e JR |
1449 | if (!rule && subtable->ports_mask_len) { |
1450 | /* Ports are always part of the final range, if any. | |
1451 | * No match was found for the ports. Use the ports trie to figure out | |
1452 | * which ports bits to unwildcard. */ | |
1453 | unsigned int mbits; | |
c0bfb650 | 1454 | ovs_be32 value, plens, mask; |
69d6040e JR |
1455 | |
1456 | mask = MINIFLOW_GET_BE32(&subtable->mask.masks, tp_src); | |
1457 | value = ((OVS_FORCE ovs_be32 *)flow)[TP_PORTS_OFS32] & mask; | |
c0bfb650 | 1458 | mbits = trie_lookup_value(&subtable->ports_trie, &value, &plens, 32); |
69d6040e JR |
1459 | |
1460 | ((OVS_FORCE ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |= | |
86f35fb5 | 1461 | mask & be32_prefix_mask(mbits); |
69d6040e | 1462 | |
386cb9f7 JR |
1463 | /* Unwildcard all bits in the mask upto the ports, as they were used |
1464 | * to determine there is no match. */ | |
1465 | fill_range_wc(subtable, wc, TP_PORTS_OFS32); | |
1466 | return NULL; | |
69d6040e | 1467 | } |
e9319757 | 1468 | |
13751fd8 | 1469 | /* Must unwildcard all the fields, as they were looked at. */ |
476f36e8 JR |
1470 | flow_wildcards_fold_minimask(wc, &subtable->mask); |
1471 | return rule; | |
1472 | } | |
1473 | ||
627fb667 | 1474 | static struct cls_match * |
dfea28b3 | 1475 | find_equal(const struct cls_subtable *subtable, const struct miniflow *flow, |
03868246 | 1476 | uint32_t hash) |
064af421 | 1477 | { |
627fb667 | 1478 | struct cls_match *head; |
064af421 | 1479 | |
f2c21402 | 1480 | CMAP_FOR_EACH_WITH_HASH (head, cmap_node, hash, &subtable->rules) { |
3016f3e4 | 1481 | if (miniflow_equal(&head->flow, flow)) { |
b5d97350 | 1482 | return head; |
064af421 BP |
1483 | } |
1484 | } | |
1485 | return NULL; | |
1486 | } | |
13751fd8 JR |
1487 | \f |
1488 | /* A longest-prefix match tree. */ | |
13751fd8 JR |
1489 | |
1490 | /* Return at least 'plen' bits of the 'prefix', starting at bit offset 'ofs'. | |
1491 | * Prefixes are in the network byte order, and the offset 0 corresponds to | |
1492 | * the most significant bit of the first byte. The offset can be read as | |
1493 | * "how many bits to skip from the start of the prefix starting at 'pr'". */ | |
1494 | static uint32_t | |
1495 | raw_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen) | |
1496 | { | |
1497 | uint32_t prefix; | |
1498 | ||
1499 | pr += ofs / 32; /* Where to start. */ | |
1500 | ofs %= 32; /* How many bits to skip at 'pr'. */ | |
1501 | ||
1502 | prefix = ntohl(*pr) << ofs; /* Get the first 32 - ofs bits. */ | |
1503 | if (plen > 32 - ofs) { /* Need more than we have already? */ | |
1504 | prefix |= ntohl(*++pr) >> (32 - ofs); | |
1505 | } | |
1506 | /* Return with possible unwanted bits at the end. */ | |
1507 | return prefix; | |
1508 | } | |
1509 | ||
1510 | /* Return min(TRIE_PREFIX_BITS, plen) bits of the 'prefix', starting at bit | |
1511 | * offset 'ofs'. Prefixes are in the network byte order, and the offset 0 | |
1512 | * corresponds to the most significant bit of the first byte. The offset can | |
1513 | * be read as "how many bits to skip from the start of the prefix starting at | |
1514 | * 'pr'". */ | |
1515 | static uint32_t | |
1516 | trie_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen) | |
1517 | { | |
1518 | if (!plen) { | |
1519 | return 0; | |
1520 | } | |
1521 | if (plen > TRIE_PREFIX_BITS) { | |
1522 | plen = TRIE_PREFIX_BITS; /* Get at most TRIE_PREFIX_BITS. */ | |
1523 | } | |
1524 | /* Return with unwanted bits cleared. */ | |
1525 | return raw_get_prefix(pr, ofs, plen) & ~0u << (32 - plen); | |
1526 | } | |
1527 | ||
c30cfa6b | 1528 | /* Return the number of equal bits in 'n_bits' of 'prefix's MSBs and a 'value' |
13751fd8 JR |
1529 | * starting at "MSB 0"-based offset 'ofs'. */ |
1530 | static unsigned int | |
c30cfa6b | 1531 | prefix_equal_bits(uint32_t prefix, unsigned int n_bits, const ovs_be32 value[], |
13751fd8 JR |
1532 | unsigned int ofs) |
1533 | { | |
c30cfa6b | 1534 | uint64_t diff = prefix ^ raw_get_prefix(value, ofs, n_bits); |
13751fd8 | 1535 | /* Set the bit after the relevant bits to limit the result. */ |
c30cfa6b | 1536 | return raw_clz64(diff << 32 | UINT64_C(1) << (63 - n_bits)); |
13751fd8 JR |
1537 | } |
1538 | ||
1539 | /* Return the number of equal bits in 'node' prefix and a 'prefix' of length | |
1540 | * 'plen', starting at "MSB 0"-based offset 'ofs'. */ | |
1541 | static unsigned int | |
1542 | trie_prefix_equal_bits(const struct trie_node *node, const ovs_be32 prefix[], | |
1543 | unsigned int ofs, unsigned int plen) | |
1544 | { | |
c30cfa6b | 1545 | return prefix_equal_bits(node->prefix, MIN(node->n_bits, plen - ofs), |
13751fd8 JR |
1546 | prefix, ofs); |
1547 | } | |
1548 | ||
1549 | /* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' can | |
1550 | * be greater than 31. */ | |
1551 | static unsigned int | |
1552 | be_get_bit_at(const ovs_be32 value[], unsigned int ofs) | |
1553 | { | |
1554 | return (((const uint8_t *)value)[ofs / 8] >> (7 - ofs % 8)) & 1u; | |
1555 | } | |
1556 | ||
1557 | /* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' must | |
1558 | * be between 0 and 31, inclusive. */ | |
1559 | static unsigned int | |
1560 | get_bit_at(const uint32_t prefix, unsigned int ofs) | |
1561 | { | |
1562 | return (prefix >> (31 - ofs)) & 1u; | |
1563 | } | |
1564 | ||
1565 | /* Create new branch. */ | |
1566 | static struct trie_node * | |
1567 | trie_branch_create(const ovs_be32 *prefix, unsigned int ofs, unsigned int plen, | |
1568 | unsigned int n_rules) | |
1569 | { | |
1570 | struct trie_node *node = xmalloc(sizeof *node); | |
1571 | ||
1572 | node->prefix = trie_get_prefix(prefix, ofs, plen); | |
1573 | ||
1574 | if (plen <= TRIE_PREFIX_BITS) { | |
c30cfa6b | 1575 | node->n_bits = plen; |
f358a2cb JR |
1576 | ovsrcu_set_hidden(&node->edges[0], NULL); |
1577 | ovsrcu_set_hidden(&node->edges[1], NULL); | |
13751fd8 JR |
1578 | node->n_rules = n_rules; |
1579 | } else { /* Need intermediate nodes. */ | |
1580 | struct trie_node *subnode = trie_branch_create(prefix, | |
1581 | ofs + TRIE_PREFIX_BITS, | |
1582 | plen - TRIE_PREFIX_BITS, | |
1583 | n_rules); | |
1584 | int bit = get_bit_at(subnode->prefix, 0); | |
c30cfa6b | 1585 | node->n_bits = TRIE_PREFIX_BITS; |
f358a2cb JR |
1586 | ovsrcu_set_hidden(&node->edges[bit], subnode); |
1587 | ovsrcu_set_hidden(&node->edges[!bit], NULL); | |
13751fd8 JR |
1588 | node->n_rules = 0; |
1589 | } | |
1590 | return node; | |
1591 | } | |
1592 | ||
1593 | static void | |
f358a2cb | 1594 | trie_node_destroy(const struct trie_node *node) |
13751fd8 | 1595 | { |
f358a2cb JR |
1596 | ovsrcu_postpone(free, CONST_CAST(struct trie_node *, node)); |
1597 | } | |
1598 | ||
1599 | /* Copy a trie node for modification and postpone delete the old one. */ | |
1600 | static struct trie_node * | |
1601 | trie_node_rcu_realloc(const struct trie_node *node) | |
1602 | { | |
1603 | struct trie_node *new_node = xmalloc(sizeof *node); | |
1604 | ||
1605 | *new_node = *node; | |
1606 | trie_node_destroy(node); | |
1607 | ||
1608 | return new_node; | |
13751fd8 JR |
1609 | } |
1610 | ||
e48eccd1 | 1611 | /* May only be called while holding the classifier mutex. */ |
13751fd8 | 1612 | static void |
f358a2cb | 1613 | trie_destroy(rcu_trie_ptr *trie) |
13751fd8 | 1614 | { |
f358a2cb JR |
1615 | struct trie_node *node = ovsrcu_get_protected(struct trie_node *, trie); |
1616 | ||
13751fd8 | 1617 | if (node) { |
f358a2cb JR |
1618 | ovsrcu_set_hidden(trie, NULL); |
1619 | trie_destroy(&node->edges[0]); | |
1620 | trie_destroy(&node->edges[1]); | |
1621 | trie_node_destroy(node); | |
13751fd8 JR |
1622 | } |
1623 | } | |
1624 | ||
1625 | static bool | |
1626 | trie_is_leaf(const struct trie_node *trie) | |
1627 | { | |
f358a2cb JR |
1628 | /* No children? */ |
1629 | return !ovsrcu_get(struct trie_node *, &trie->edges[0]) | |
1630 | && !ovsrcu_get(struct trie_node *, &trie->edges[1]); | |
13751fd8 JR |
1631 | } |
1632 | ||
1633 | static void | |
1634 | mask_set_prefix_bits(struct flow_wildcards *wc, uint8_t be32ofs, | |
c30cfa6b | 1635 | unsigned int n_bits) |
13751fd8 JR |
1636 | { |
1637 | ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs]; | |
1638 | unsigned int i; | |
1639 | ||
c30cfa6b | 1640 | for (i = 0; i < n_bits / 32; i++) { |
13751fd8 JR |
1641 | mask[i] = OVS_BE32_MAX; |
1642 | } | |
c30cfa6b JR |
1643 | if (n_bits % 32) { |
1644 | mask[i] |= htonl(~0u << (32 - n_bits % 32)); | |
13751fd8 JR |
1645 | } |
1646 | } | |
1647 | ||
1648 | static bool | |
1649 | mask_prefix_bits_set(const struct flow_wildcards *wc, uint8_t be32ofs, | |
c30cfa6b | 1650 | unsigned int n_bits) |
13751fd8 JR |
1651 | { |
1652 | ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs]; | |
1653 | unsigned int i; | |
1654 | ovs_be32 zeroes = 0; | |
1655 | ||
c30cfa6b | 1656 | for (i = 0; i < n_bits / 32; i++) { |
13751fd8 JR |
1657 | zeroes |= ~mask[i]; |
1658 | } | |
c30cfa6b JR |
1659 | if (n_bits % 32) { |
1660 | zeroes |= ~mask[i] & htonl(~0u << (32 - n_bits % 32)); | |
13751fd8 JR |
1661 | } |
1662 | ||
c30cfa6b | 1663 | return !zeroes; /* All 'n_bits' bits set. */ |
13751fd8 JR |
1664 | } |
1665 | ||
f358a2cb | 1666 | static rcu_trie_ptr * |
13751fd8 JR |
1667 | trie_next_edge(struct trie_node *node, const ovs_be32 value[], |
1668 | unsigned int ofs) | |
1669 | { | |
1670 | return node->edges + be_get_bit_at(value, ofs); | |
1671 | } | |
1672 | ||
1673 | static const struct trie_node * | |
1674 | trie_next_node(const struct trie_node *node, const ovs_be32 value[], | |
1675 | unsigned int ofs) | |
1676 | { | |
f358a2cb JR |
1677 | return ovsrcu_get(struct trie_node *, |
1678 | &node->edges[be_get_bit_at(value, ofs)]); | |
13751fd8 JR |
1679 | } |
1680 | ||
c0bfb650 JR |
1681 | /* Set the bit at ("MSB 0"-based) offset 'ofs'. 'ofs' can be greater than 31. |
1682 | */ | |
1683 | static void | |
1684 | be_set_bit_at(ovs_be32 value[], unsigned int ofs) | |
1685 | { | |
1686 | ((uint8_t *)value)[ofs / 8] |= 1u << (7 - ofs % 8); | |
1687 | } | |
1688 | ||
1689 | /* Returns the number of bits in the prefix mask necessary to determine a | |
1690 | * mismatch, in case there are longer prefixes in the tree below the one that | |
1691 | * matched. | |
1692 | * '*plens' will have a bit set for each prefix length that may have matching | |
1693 | * rules. The caller is responsible for clearing the '*plens' prior to | |
1694 | * calling this. | |
13751fd8 JR |
1695 | */ |
1696 | static unsigned int | |
f358a2cb | 1697 | trie_lookup_value(const rcu_trie_ptr *trie, const ovs_be32 value[], |
c0bfb650 | 1698 | ovs_be32 plens[], unsigned int n_bits) |
13751fd8 | 1699 | { |
13751fd8 | 1700 | const struct trie_node *prev = NULL; |
c0bfb650 JR |
1701 | const struct trie_node *node = ovsrcu_get(struct trie_node *, trie); |
1702 | unsigned int match_len = 0; /* Number of matching bits. */ | |
13751fd8 | 1703 | |
27ce650f | 1704 | for (; node; prev = node, node = trie_next_node(node, value, match_len)) { |
13751fd8 JR |
1705 | unsigned int eqbits; |
1706 | /* Check if this edge can be followed. */ | |
27ce650f JR |
1707 | eqbits = prefix_equal_bits(node->prefix, node->n_bits, value, |
1708 | match_len); | |
1709 | match_len += eqbits; | |
c30cfa6b | 1710 | if (eqbits < node->n_bits) { /* Mismatch, nothing more to be found. */ |
27ce650f | 1711 | /* Bit at offset 'match_len' differed. */ |
c0bfb650 | 1712 | return match_len + 1; /* Includes the first mismatching bit. */ |
13751fd8 JR |
1713 | } |
1714 | /* Full match, check if rules exist at this prefix length. */ | |
1715 | if (node->n_rules > 0) { | |
c0bfb650 | 1716 | be_set_bit_at(plens, match_len - 1); |
13751fd8 | 1717 | } |
27ce650f | 1718 | if (match_len >= n_bits) { |
c0bfb650 | 1719 | return n_bits; /* Full prefix. */ |
f0e5aa11 | 1720 | } |
13751fd8 | 1721 | } |
c0bfb650 JR |
1722 | /* node == NULL. Full match so far, but we tried to follow an |
1723 | * non-existing branch. Need to exclude the other branch if it exists | |
1724 | * (it does not if we were called on an empty trie or 'prev' is a leaf | |
1725 | * node). */ | |
1726 | return !prev || trie_is_leaf(prev) ? match_len : match_len + 1; | |
13751fd8 JR |
1727 | } |
1728 | ||
1729 | static unsigned int | |
1730 | trie_lookup(const struct cls_trie *trie, const struct flow *flow, | |
c0bfb650 | 1731 | union mf_value *plens) |
13751fd8 JR |
1732 | { |
1733 | const struct mf_field *mf = trie->field; | |
1734 | ||
1735 | /* Check that current flow matches the prerequisites for the trie | |
1736 | * field. Some match fields are used for multiple purposes, so we | |
1737 | * must check that the trie is relevant for this flow. */ | |
1738 | if (mf_are_prereqs_ok(mf, flow)) { | |
f358a2cb | 1739 | return trie_lookup_value(&trie->root, |
13751fd8 | 1740 | &((ovs_be32 *)flow)[mf->flow_be32ofs], |
c0bfb650 | 1741 | &plens->be32, mf->n_bits); |
13751fd8 | 1742 | } |
c0bfb650 JR |
1743 | memset(plens, 0xff, sizeof *plens); /* All prefixes, no skipping. */ |
1744 | return 0; /* Value not used in this case. */ | |
13751fd8 JR |
1745 | } |
1746 | ||
1747 | /* Returns the length of a prefix match mask for the field 'mf' in 'minimask'. | |
1748 | * Returns the u32 offset to the miniflow data in '*miniflow_index', if | |
1749 | * 'miniflow_index' is not NULL. */ | |
1750 | static unsigned int | |
1751 | minimask_get_prefix_len(const struct minimask *minimask, | |
1752 | const struct mf_field *mf) | |
1753 | { | |
c30cfa6b | 1754 | unsigned int n_bits = 0, mask_tz = 0; /* Non-zero when end of mask seen. */ |
13751fd8 JR |
1755 | uint8_t u32_ofs = mf->flow_be32ofs; |
1756 | uint8_t u32_end = u32_ofs + mf->n_bytes / 4; | |
1757 | ||
1758 | for (; u32_ofs < u32_end; ++u32_ofs) { | |
1759 | uint32_t mask; | |
1760 | mask = ntohl((OVS_FORCE ovs_be32)minimask_get(minimask, u32_ofs)); | |
1761 | ||
1762 | /* Validate mask, count the mask length. */ | |
1763 | if (mask_tz) { | |
1764 | if (mask) { | |
1765 | return 0; /* No bits allowed after mask ended. */ | |
1766 | } | |
1767 | } else { | |
1768 | if (~mask & (~mask + 1)) { | |
1769 | return 0; /* Mask not contiguous. */ | |
1770 | } | |
1771 | mask_tz = ctz32(mask); | |
c30cfa6b | 1772 | n_bits += 32 - mask_tz; |
13751fd8 JR |
1773 | } |
1774 | } | |
1775 | ||
c30cfa6b | 1776 | return n_bits; |
13751fd8 JR |
1777 | } |
1778 | ||
1779 | /* | |
1780 | * This is called only when mask prefix is known to be CIDR and non-zero. | |
1781 | * Relies on the fact that the flow and mask have the same map, and since | |
1782 | * the mask is CIDR, the storage for the flow field exists even if it | |
1783 | * happened to be zeros. | |
1784 | */ | |
1785 | static const ovs_be32 * | |
1786 | minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf) | |
1787 | { | |
27bbe15d | 1788 | return miniflow_get_be32_values(&match->flow) + |
13751fd8 JR |
1789 | count_1bits(match->flow.map & ((UINT64_C(1) << mf->flow_be32ofs) - 1)); |
1790 | } | |
1791 | ||
1792 | /* Insert rule in to the prefix tree. | |
1793 | * 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask | |
1794 | * in 'rule'. */ | |
1795 | static void | |
1796 | trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen) | |
1797 | { | |
69d6040e JR |
1798 | trie_insert_prefix(&trie->root, |
1799 | minimatch_get_prefix(&rule->match, trie->field), mlen); | |
1800 | } | |
1801 | ||
1802 | static void | |
f358a2cb | 1803 | trie_insert_prefix(rcu_trie_ptr *edge, const ovs_be32 *prefix, int mlen) |
69d6040e | 1804 | { |
13751fd8 | 1805 | struct trie_node *node; |
13751fd8 JR |
1806 | int ofs = 0; |
1807 | ||
1808 | /* Walk the tree. */ | |
f358a2cb | 1809 | for (; (node = ovsrcu_get_protected(struct trie_node *, edge)); |
13751fd8 JR |
1810 | edge = trie_next_edge(node, prefix, ofs)) { |
1811 | unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen); | |
1812 | ofs += eqbits; | |
c30cfa6b | 1813 | if (eqbits < node->n_bits) { |
13751fd8 JR |
1814 | /* Mismatch, new node needs to be inserted above. */ |
1815 | int old_branch = get_bit_at(node->prefix, eqbits); | |
f358a2cb | 1816 | struct trie_node *new_parent; |
13751fd8 | 1817 | |
f358a2cb JR |
1818 | new_parent = trie_branch_create(prefix, ofs - eqbits, eqbits, |
1819 | ofs == mlen ? 1 : 0); | |
1820 | /* Copy the node to modify it. */ | |
1821 | node = trie_node_rcu_realloc(node); | |
1822 | /* Adjust the new node for its new position in the tree. */ | |
13751fd8 | 1823 | node->prefix <<= eqbits; |
c30cfa6b | 1824 | node->n_bits -= eqbits; |
f358a2cb | 1825 | ovsrcu_set_hidden(&new_parent->edges[old_branch], node); |
13751fd8 JR |
1826 | |
1827 | /* Check if need a new branch for the new rule. */ | |
1828 | if (ofs < mlen) { | |
f358a2cb JR |
1829 | ovsrcu_set_hidden(&new_parent->edges[!old_branch], |
1830 | trie_branch_create(prefix, ofs, mlen - ofs, | |
1831 | 1)); | |
13751fd8 | 1832 | } |
f358a2cb | 1833 | ovsrcu_set(edge, new_parent); /* Publish changes. */ |
13751fd8 JR |
1834 | return; |
1835 | } | |
1836 | /* Full match so far. */ | |
1837 | ||
1838 | if (ofs == mlen) { | |
1839 | /* Full match at the current node, rule needs to be added here. */ | |
1840 | node->n_rules++; | |
1841 | return; | |
1842 | } | |
1843 | } | |
1844 | /* Must insert a new tree branch for the new rule. */ | |
f358a2cb | 1845 | ovsrcu_set(edge, trie_branch_create(prefix, ofs, mlen - ofs, 1)); |
13751fd8 JR |
1846 | } |
1847 | ||
1848 | /* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask | |
1849 | * in 'rule'. */ | |
1850 | static void | |
1851 | trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen) | |
1852 | { | |
69d6040e JR |
1853 | trie_remove_prefix(&trie->root, |
1854 | minimatch_get_prefix(&rule->match, trie->field), mlen); | |
1855 | } | |
1856 | ||
1857 | /* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask | |
1858 | * in 'rule'. */ | |
1859 | static void | |
f358a2cb | 1860 | trie_remove_prefix(rcu_trie_ptr *root, const ovs_be32 *prefix, int mlen) |
69d6040e | 1861 | { |
13751fd8 | 1862 | struct trie_node *node; |
f358a2cb | 1863 | rcu_trie_ptr *edges[sizeof(union mf_value) * 8]; |
13751fd8 JR |
1864 | int depth = 0, ofs = 0; |
1865 | ||
1866 | /* Walk the tree. */ | |
69d6040e | 1867 | for (edges[0] = root; |
f358a2cb | 1868 | (node = ovsrcu_get_protected(struct trie_node *, edges[depth])); |
13751fd8 JR |
1869 | edges[++depth] = trie_next_edge(node, prefix, ofs)) { |
1870 | unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen); | |
69d6040e | 1871 | |
c30cfa6b | 1872 | if (eqbits < node->n_bits) { |
13751fd8 JR |
1873 | /* Mismatch, nothing to be removed. This should never happen, as |
1874 | * only rules in the classifier are ever removed. */ | |
1875 | break; /* Log a warning. */ | |
1876 | } | |
1877 | /* Full match so far. */ | |
1878 | ofs += eqbits; | |
1879 | ||
1880 | if (ofs == mlen) { | |
1881 | /* Full prefix match at the current node, remove rule here. */ | |
1882 | if (!node->n_rules) { | |
1883 | break; /* Log a warning. */ | |
1884 | } | |
1885 | node->n_rules--; | |
1886 | ||
1887 | /* Check if can prune the tree. */ | |
f358a2cb JR |
1888 | while (!node->n_rules) { |
1889 | struct trie_node *next, | |
1890 | *edge0 = ovsrcu_get_protected(struct trie_node *, | |
1891 | &node->edges[0]), | |
1892 | *edge1 = ovsrcu_get_protected(struct trie_node *, | |
1893 | &node->edges[1]); | |
1894 | ||
1895 | if (edge0 && edge1) { | |
1896 | break; /* A branching point, cannot prune. */ | |
1897 | } | |
1898 | ||
1899 | /* Else have at most one child node, remove this node. */ | |
1900 | next = edge0 ? edge0 : edge1; | |
13751fd8 JR |
1901 | |
1902 | if (next) { | |
c30cfa6b | 1903 | if (node->n_bits + next->n_bits > TRIE_PREFIX_BITS) { |
13751fd8 JR |
1904 | break; /* Cannot combine. */ |
1905 | } | |
f358a2cb JR |
1906 | next = trie_node_rcu_realloc(next); /* Modify. */ |
1907 | ||
13751fd8 | 1908 | /* Combine node with next. */ |
c30cfa6b JR |
1909 | next->prefix = node->prefix | next->prefix >> node->n_bits; |
1910 | next->n_bits += node->n_bits; | |
13751fd8 | 1911 | } |
13751fd8 | 1912 | /* Update the parent's edge. */ |
f358a2cb JR |
1913 | ovsrcu_set(edges[depth], next); /* Publish changes. */ |
1914 | trie_node_destroy(node); | |
1915 | ||
13751fd8 JR |
1916 | if (next || !depth) { |
1917 | /* Branch not pruned or at root, nothing more to do. */ | |
1918 | break; | |
1919 | } | |
f358a2cb JR |
1920 | node = ovsrcu_get_protected(struct trie_node *, |
1921 | edges[--depth]); | |
13751fd8 JR |
1922 | } |
1923 | return; | |
1924 | } | |
1925 | } | |
1926 | /* Cannot go deeper. This should never happen, since only rules | |
1927 | * that actually exist in the classifier are ever removed. */ | |
1928 | VLOG_WARN("Trying to remove non-existing rule from a prefix trie."); | |
1929 | } |