]> git.proxmox.com Git - ovs.git/blame - lib/classifier.c
ovs-rcu: Comment fixes.
[ovs.git] / lib / classifier.c
CommitLineData
064af421 1/*
18080541 2 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
064af421 3 *
a14bc59f
BP
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
064af421 7 *
a14bc59f
BP
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
064af421
BP
15 */
16
17#include <config.h>
18#include "classifier.h"
38c449e0 19#include "classifier-private.h"
064af421
BP
20#include <errno.h>
21#include <netinet/in.h>
844dff32 22#include "byte-order.h"
68d1c8c3 23#include "dynamic-string.h"
07b37e8f 24#include "odp-util.h"
d8ae4d67 25#include "ofp-util.h"
13751fd8 26#include "packets.h"
52054c15 27#include "util.h"
e6211adc 28#include "openvswitch/vlog.h"
13751fd8
JR
29
30VLOG_DEFINE_THIS_MODULE(classifier);
064af421 31
69d6040e
JR
32struct trie_ctx;
33
18080541
BP
34/* A collection of "struct cls_conjunction"s currently embedded into a
35 * cls_match. */
36struct cls_conjunction_set {
37 /* Link back to the cls_match.
38 *
39 * cls_conjunction_set is mostly used during classifier lookup, and, in
40 * turn, during classifier lookup the most used member of
41 * cls_conjunction_set is the rule's priority, so we cache it here for fast
42 * access. */
43 struct cls_match *match;
44 int priority; /* Cached copy of match->priority. */
45
46 /* Conjunction information.
47 *
48 * 'min_n_clauses' allows some optimization during classifier lookup. */
49 unsigned int n; /* Number of elements in 'conj'. */
50 unsigned int min_n_clauses; /* Smallest 'n' among elements of 'conj'. */
51 struct cls_conjunction conj[];
52};
53
69d6040e
JR
54/* Ports trie depends on both ports sharing the same ovs_be32. */
55#define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4)
56BUILD_ASSERT_DECL(TP_PORTS_OFS32 == offsetof(struct flow, tp_dst) / 4);
d70e8c28
JR
57BUILD_ASSERT_DECL(TP_PORTS_OFS32 % 2 == 0);
58#define TP_PORTS_OFS64 (TP_PORTS_OFS32 / 2)
cabd4c43 59
18080541
BP
60static size_t
61cls_conjunction_set_size(size_t n)
62{
63 return (sizeof(struct cls_conjunction_set)
64 + n * sizeof(struct cls_conjunction));
65}
66
67static struct cls_conjunction_set *
68cls_conjunction_set_alloc(struct cls_match *match,
69 const struct cls_conjunction conj[], size_t n)
70{
71 if (n) {
72 size_t min_n_clauses = conj[0].n_clauses;
73 for (size_t i = 1; i < n; i++) {
74 min_n_clauses = MIN(min_n_clauses, conj[i].n_clauses);
75 }
76
77 struct cls_conjunction_set *set = xmalloc(cls_conjunction_set_size(n));
78 set->match = match;
79 set->priority = match->priority;
80 set->n = n;
81 set->min_n_clauses = min_n_clauses;
82 memcpy(set->conj, conj, n * sizeof *conj);
83 return set;
84 } else {
85 return NULL;
86 }
87}
88
627fb667 89static struct cls_match *
18080541
BP
90cls_match_alloc(const struct cls_rule *rule,
91 const struct cls_conjunction conj[], size_t n)
627fb667 92{
3016f3e4
JR
93 int count = count_1bits(rule->match.flow.map);
94
95 struct cls_match *cls_match
96 = xmalloc(sizeof *cls_match - sizeof cls_match->flow.inline_values
97 + MINIFLOW_VALUES_SIZE(count));
627fb667 98
8f8023b3 99 ovsrcu_init(&cls_match->next, NULL);
f80028fe
JR
100 *CONST_CAST(const struct cls_rule **, &cls_match->cls_rule) = rule;
101 *CONST_CAST(int *, &cls_match->priority) = rule->priority;
2b7b1427 102 atomic_init(&cls_match->visibility, 0); /* Initially invisible. */
f80028fe
JR
103 miniflow_clone_inline(CONST_CAST(struct miniflow *, &cls_match->flow),
104 &rule->match.flow, count);
18080541
BP
105 ovsrcu_set_hidden(&cls_match->conj_set,
106 cls_conjunction_set_alloc(cls_match, conj, n));
627fb667
JR
107
108 return cls_match;
109}
cabd4c43 110
e48eccd1 111static struct cls_subtable *find_subtable(const struct classifier *cls,
dfea28b3 112 const struct minimask *);
e48eccd1 113static struct cls_subtable *insert_subtable(struct classifier *cls,
fccd7c09
JR
114 const struct minimask *);
115static void destroy_subtable(struct classifier *cls, struct cls_subtable *);
b5d97350 116
dfea28b3 117static const struct cls_match *find_match_wc(const struct cls_subtable *,
2b7b1427 118 long long version,
dfea28b3
JR
119 const struct flow *,
120 struct trie_ctx *,
121 unsigned int n_tries,
122 struct flow_wildcards *);
123static struct cls_match *find_equal(const struct cls_subtable *,
627fb667 124 const struct miniflow *, uint32_t hash);
b5d97350 125
8f8023b3
JR
126/* Return the next visible (lower-priority) rule in the list. Multiple
127 * identical rules with the same priority may exist transitionally, but when
128 * versioning is used at most one of them is ever visible for lookups on any
129 * given 'version'. */
fc02ecc7 130static inline const struct cls_match *
2b7b1427 131next_visible_rule_in_list(const struct cls_match *rule, long long version)
fc02ecc7 132{
fc02ecc7 133 do {
8f8023b3
JR
134 rule = cls_match_next(rule);
135 if (!rule) {
186120da 136 /* We have reached the head of the list, stop. */
8f8023b3 137 break;
186120da 138 }
8f8023b3 139 } while (!cls_match_visible_in_version(rule, version));
fc02ecc7 140
8f8023b3 141 return rule;
c501b427
JR
142}
143
13751fd8
JR
144static unsigned int minimask_get_prefix_len(const struct minimask *,
145 const struct mf_field *);
e48eccd1 146static void trie_init(struct classifier *cls, int trie_idx,
fccd7c09 147 const struct mf_field *);
13751fd8 148static unsigned int trie_lookup(const struct cls_trie *, const struct flow *,
c0bfb650 149 union mf_value *plens);
f358a2cb 150static unsigned int trie_lookup_value(const rcu_trie_ptr *,
c0bfb650
JR
151 const ovs_be32 value[], ovs_be32 plens[],
152 unsigned int value_bits);
f358a2cb 153static void trie_destroy(rcu_trie_ptr *);
13751fd8 154static void trie_insert(struct cls_trie *, const struct cls_rule *, int mlen);
f358a2cb 155static void trie_insert_prefix(rcu_trie_ptr *, const ovs_be32 *prefix,
69d6040e 156 int mlen);
13751fd8 157static void trie_remove(struct cls_trie *, const struct cls_rule *, int mlen);
f358a2cb 158static void trie_remove_prefix(rcu_trie_ptr *, const ovs_be32 *prefix,
69d6040e 159 int mlen);
13751fd8 160static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t be32ofs,
c30cfa6b 161 unsigned int n_bits);
13751fd8 162static bool mask_prefix_bits_set(const struct flow_wildcards *,
c30cfa6b 163 uint8_t be32ofs, unsigned int n_bits);
81a76618
BP
164\f
165/* cls_rule. */
b5d97350 166
de4ad4a2 167static inline void
2b7b1427
JR
168cls_rule_init__(struct cls_rule *rule, unsigned int priority,
169 long long version)
de4ad4a2 170{
2b7b1427
JR
171 ovs_assert(version > 0);
172
de4ad4a2 173 rculist_init(&rule->node);
2b7b1427
JR
174 *CONST_CAST(int *, &rule->priority) = priority;
175 *CONST_CAST(long long *, &rule->version) = version;
de4ad4a2
JR
176 rule->cls_match = NULL;
177}
178
81a76618 179/* Initializes 'rule' to match packets specified by 'match' at the given
5cb7a798
BP
180 * 'priority'. 'match' must satisfy the invariant described in the comment at
181 * the definition of struct match.
66642cb4 182 *
48d28ac1
BP
183 * The caller must eventually destroy 'rule' with cls_rule_destroy().
184 *
eb391b76
BP
185 * Clients should not use priority INT_MIN. (OpenFlow uses priorities between
186 * 0 and UINT16_MAX, inclusive.) */
47284b1f 187void
2b7b1427
JR
188cls_rule_init(struct cls_rule *rule, const struct match *match, int priority,
189 long long version)
47284b1f 190{
2b7b1427
JR
191 cls_rule_init__(rule, priority, version);
192 minimatch_init(CONST_CAST(struct minimatch *, &rule->match), match);
5cb7a798
BP
193}
194
195/* Same as cls_rule_init() for initialization from a "struct minimatch". */
196void
197cls_rule_init_from_minimatch(struct cls_rule *rule,
2b7b1427
JR
198 const struct minimatch *match, int priority,
199 long long version)
5cb7a798 200{
2b7b1427
JR
201 cls_rule_init__(rule, priority, version);
202 minimatch_clone(CONST_CAST(struct minimatch *, &rule->match), match);
685a51a5
JP
203}
204
39c94593
JR
205/* Initializes 'dst' as a copy of 'src', but with 'version'.
206 *
207 * The caller must eventually destroy 'dst' with cls_rule_destroy(). */
208void
209cls_rule_clone_in_version(struct cls_rule *dst, const struct cls_rule *src,
210 long long version)
211{
212 cls_rule_init__(dst, src->priority, version);
213 minimatch_clone(CONST_CAST(struct minimatch *, &dst->match), &src->match);
214}
215
48d28ac1
BP
216/* Initializes 'dst' as a copy of 'src'.
217 *
b2c1f00b 218 * The caller must eventually destroy 'dst' with cls_rule_destroy(). */
48d28ac1
BP
219void
220cls_rule_clone(struct cls_rule *dst, const struct cls_rule *src)
221{
39c94593 222 cls_rule_clone_in_version(dst, src, src->version);
48d28ac1
BP
223}
224
b2c1f00b 225/* Initializes 'dst' with the data in 'src', destroying 'src'.
2b7b1427 226 *
de4ad4a2 227 * 'src' must be a cls_rule NOT in a classifier.
b2c1f00b
BP
228 *
229 * The caller must eventually destroy 'dst' with cls_rule_destroy(). */
230void
231cls_rule_move(struct cls_rule *dst, struct cls_rule *src)
232{
2b7b1427
JR
233 cls_rule_init__(dst, src->priority, src->version);
234 minimatch_move(CONST_CAST(struct minimatch *, &dst->match),
235 CONST_CAST(struct minimatch *, &src->match));
b2c1f00b
BP
236}
237
48d28ac1
BP
238/* Frees memory referenced by 'rule'. Doesn't free 'rule' itself (it's
239 * normally embedded into a larger structure).
240 *
241 * ('rule' must not currently be in a classifier.) */
242void
5cb7a798 243cls_rule_destroy(struct cls_rule *rule)
2541d759 244 OVS_NO_THREAD_SAFETY_ANALYSIS
48d28ac1 245{
de4ad4a2
JR
246 ovs_assert(!rule->cls_match); /* Must not be in a classifier. */
247
2541d759
JR
248 /* Check that the rule has been properly removed from the classifier. */
249 ovs_assert(rule->node.prev == RCULIST_POISON
de4ad4a2 250 || rculist_is_empty(&rule->node));
2541d759 251 rculist_poison__(&rule->node); /* Poisons also the next pointer. */
de4ad4a2 252
2b7b1427 253 minimatch_destroy(CONST_CAST(struct minimatch *, &rule->match));
48d28ac1
BP
254}
255
18080541
BP
256void
257cls_rule_set_conjunctions(struct cls_rule *cr,
258 const struct cls_conjunction *conj, size_t n)
259{
260 struct cls_match *match = cr->cls_match;
261 struct cls_conjunction_set *old
262 = ovsrcu_get_protected(struct cls_conjunction_set *, &match->conj_set);
263 struct cls_conjunction *old_conj = old ? old->conj : NULL;
264 unsigned int old_n = old ? old->n : 0;
265
266 if (old_n != n || (n && memcmp(old_conj, conj, n * sizeof *conj))) {
267 if (old) {
268 ovsrcu_postpone(free, old);
269 }
270 ovsrcu_set(&match->conj_set,
271 cls_conjunction_set_alloc(match, conj, n));
272 }
273}
274
275
81a76618
BP
276/* Returns true if 'a' and 'b' match the same packets at the same priority,
277 * false if they differ in some way. */
193eb874
BP
278bool
279cls_rule_equal(const struct cls_rule *a, const struct cls_rule *b)
280{
5cb7a798 281 return a->priority == b->priority && minimatch_equal(&a->match, &b->match);
193eb874
BP
282}
283
81a76618 284/* Returns a hash value for 'rule', folding in 'basis'. */
57452fdc
BP
285uint32_t
286cls_rule_hash(const struct cls_rule *rule, uint32_t basis)
287{
5cb7a798 288 return minimatch_hash(&rule->match, hash_int(rule->priority, basis));
73f33563
BP
289}
290
81a76618 291/* Appends a string describing 'rule' to 's'. */
07b37e8f
BP
292void
293cls_rule_format(const struct cls_rule *rule, struct ds *s)
294{
5cb7a798 295 minimatch_format(&rule->match, s, rule->priority);
064af421 296}
3ca1de08
BP
297
298/* Returns true if 'rule' matches every packet, false otherwise. */
299bool
300cls_rule_is_catchall(const struct cls_rule *rule)
301{
5cb7a798 302 return minimask_is_catchall(&rule->match.mask);
3ca1de08 303}
fc02ecc7 304
2b7b1427
JR
305/* Makes rule invisible after 'version'. Once that version is made invisible
306 * (by changing the version parameter used in lookups), the rule should be
307 * actually removed via ovsrcu_postpone().
308 *
309 * 'rule_' must be in a classifier. */
310void
311cls_rule_make_invisible_in_version(const struct cls_rule *rule_,
312 long long version, long long lookup_version)
313{
314 struct cls_match *rule = rule_->cls_match;
315
316 /* XXX: Adjust when versioning is actually used. */
317 ovs_assert(version >= rule_->version && version >= lookup_version);
318
319 /* Normally, we call this when deleting a rule that is already visible to
320 * lookups. However, sometimes a bundle transaction will add a rule and
321 * then delete it before the rule has ever become visible. If we set such
322 * a rule to become invisible in a future 'version', it would become
323 * visible to all prior versions. So, in this case we must set the rule
324 * visibility to 0 (== never visible). */
325 if (cls_match_visible_in_version(rule, lookup_version)) {
326 /* Make invisible starting at 'version'. */
327 atomic_store_relaxed(&rule->visibility, -version);
328 } else {
329 /* Rule has not yet been visible to lookups, make invisible in all
330 * version. */
331 atomic_store_relaxed(&rule->visibility, 0);
332 }
333}
334
335/* This undoes the change made by cls_rule_make_invisible_after_version().
fc02ecc7
JR
336 *
337 * 'rule' must be in a classifier. */
2b7b1427
JR
338void
339cls_rule_restore_visibility(const struct cls_rule *rule)
fc02ecc7 340{
2b7b1427 341 atomic_store_relaxed(&rule->cls_match->visibility, rule->version);
fc02ecc7
JR
342}
343
2b7b1427
JR
344/* Return true if 'rule' is visible in 'version'.
345 *
346 * 'rule' must be in a classifier. */
347bool
348cls_rule_visible_in_version(const struct cls_rule *rule, long long version)
349{
350 return cls_match_visible_in_version(rule->cls_match, version);
351}
064af421
BP
352\f
353/* Initializes 'cls' as a classifier that initially contains no classification
354 * rules. */
355void
e48eccd1 356classifier_init(struct classifier *cls, const uint8_t *flow_segments)
064af421 357{
064af421 358 cls->n_rules = 0;
f2c21402 359 cmap_init(&cls->subtables_map);
fe7cfa5c 360 pvector_init(&cls->subtables);
f2c21402 361 cmap_init(&cls->partitions);
476f36e8
JR
362 cls->n_flow_segments = 0;
363 if (flow_segments) {
364 while (cls->n_flow_segments < CLS_MAX_INDICES
d70e8c28 365 && *flow_segments < FLOW_U64S) {
476f36e8
JR
366 cls->flow_segments[cls->n_flow_segments++] = *flow_segments++;
367 }
368 }
13751fd8 369 cls->n_tries = 0;
e65413ab
JR
370 for (int i = 0; i < CLS_MAX_TRIES; i++) {
371 trie_init(cls, i, NULL);
372 }
802f84ff 373 cls->publish = true;
064af421
BP
374}
375
376/* Destroys 'cls'. Rules within 'cls', if any, are not freed; this is the
afae68b1
JR
377 * caller's responsibility.
378 * May only be called after all the readers have been terminated. */
064af421 379void
e48eccd1 380classifier_destroy(struct classifier *cls)
064af421 381{
e48eccd1 382 if (cls) {
78c8df12
BP
383 struct cls_partition *partition;
384 struct cls_subtable *subtable;
13751fd8
JR
385 int i;
386
387 for (i = 0; i < cls->n_tries; i++) {
f358a2cb 388 trie_destroy(&cls->tries[i].root);
13751fd8 389 }
064af421 390
6bc3bb82 391 CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
03868246 392 destroy_subtable(cls, subtable);
064af421 393 }
f2c21402 394 cmap_destroy(&cls->subtables_map);
c906cedf 395
6bc3bb82 396 CMAP_FOR_EACH (partition, cmap_node, &cls->partitions) {
f2c21402 397 ovsrcu_postpone(free, partition);
c906cedf 398 }
f2c21402 399 cmap_destroy(&cls->partitions);
cabd4c43 400
fe7cfa5c 401 pvector_destroy(&cls->subtables);
064af421
BP
402 }
403}
404
13751fd8 405/* Set the fields for which prefix lookup should be performed. */
f358a2cb 406bool
e48eccd1 407classifier_set_prefix_fields(struct classifier *cls,
13751fd8
JR
408 const enum mf_field_id *trie_fields,
409 unsigned int n_fields)
410{
f358a2cb 411 const struct mf_field * new_fields[CLS_MAX_TRIES];
abadfcb0 412 struct mf_bitmap fields = MF_BITMAP_INITIALIZER;
f358a2cb
JR
413 int i, n_tries = 0;
414 bool changed = false;
13751fd8 415
f358a2cb 416 for (i = 0; i < n_fields && n_tries < CLS_MAX_TRIES; i++) {
13751fd8
JR
417 const struct mf_field *field = mf_from_id(trie_fields[i]);
418 if (field->flow_be32ofs < 0 || field->n_bits % 32) {
419 /* Incompatible field. This is the only place where we
420 * enforce these requirements, but the rest of the trie code
421 * depends on the flow_be32ofs to be non-negative and the
422 * field length to be a multiple of 32 bits. */
423 continue;
424 }
425
abadfcb0 426 if (bitmap_is_set(fields.bm, trie_fields[i])) {
13751fd8
JR
427 /* Duplicate field, there is no need to build more than
428 * one index for any one field. */
429 continue;
430 }
abadfcb0 431 bitmap_set1(fields.bm, trie_fields[i]);
13751fd8 432
f358a2cb
JR
433 new_fields[n_tries] = NULL;
434 if (n_tries >= cls->n_tries || field != cls->tries[n_tries].field) {
435 new_fields[n_tries] = field;
436 changed = true;
437 }
438 n_tries++;
439 }
440
441 if (changed || n_tries < cls->n_tries) {
442 struct cls_subtable *subtable;
443
444 /* Trie configuration needs to change. Disable trie lookups
445 * for the tries that are changing and wait all the current readers
446 * with the old configuration to be done. */
447 changed = false;
448 CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
449 for (i = 0; i < cls->n_tries; i++) {
450 if ((i < n_tries && new_fields[i]) || i >= n_tries) {
451 if (subtable->trie_plen[i]) {
452 subtable->trie_plen[i] = 0;
453 changed = true;
454 }
455 }
456 }
457 }
458 /* Synchronize if any readers were using tries. The readers may
459 * temporarily function without the trie lookup based optimizations. */
460 if (changed) {
461 /* ovsrcu_synchronize() functions as a memory barrier, so it does
462 * not matter that subtable->trie_plen is not atomic. */
463 ovsrcu_synchronize();
13751fd8 464 }
13751fd8 465
f358a2cb
JR
466 /* Now set up the tries. */
467 for (i = 0; i < n_tries; i++) {
468 if (new_fields[i]) {
469 trie_init(cls, i, new_fields[i]);
470 }
471 }
472 /* Destroy the rest, if any. */
473 for (; i < cls->n_tries; i++) {
474 trie_init(cls, i, NULL);
475 }
476
477 cls->n_tries = n_tries;
f358a2cb 478 return true;
13751fd8 479 }
f358a2cb 480
f358a2cb 481 return false; /* No change. */
13751fd8
JR
482}
483
484static void
e48eccd1 485trie_init(struct classifier *cls, int trie_idx, const struct mf_field *field)
13751fd8
JR
486{
487 struct cls_trie *trie = &cls->tries[trie_idx];
488 struct cls_subtable *subtable;
489
490 if (trie_idx < cls->n_tries) {
f358a2cb
JR
491 trie_destroy(&trie->root);
492 } else {
493 ovsrcu_set_hidden(&trie->root, NULL);
13751fd8 494 }
13751fd8
JR
495 trie->field = field;
496
f358a2cb 497 /* Add existing rules to the new trie. */
f2c21402 498 CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
13751fd8
JR
499 unsigned int plen;
500
501 plen = field ? minimask_get_prefix_len(&subtable->mask, field) : 0;
13751fd8 502 if (plen) {
627fb667 503 struct cls_match *head;
13751fd8 504
f2c21402 505 CMAP_FOR_EACH (head, cmap_node, &subtable->rules) {
f47eef15 506 trie_insert(trie, head->cls_rule, plen);
13751fd8
JR
507 }
508 }
f358a2cb
JR
509 /* Initialize subtable's prefix length on this field. This will
510 * allow readers to use the trie. */
511 atomic_thread_fence(memory_order_release);
512 subtable->trie_plen[trie_idx] = plen;
13751fd8
JR
513 }
514}
515
5f0476ce
JR
516/* Returns true if 'cls' contains no classification rules, false otherwise.
517 * Checking the cmap requires no locking. */
064af421
BP
518bool
519classifier_is_empty(const struct classifier *cls)
520{
e48eccd1 521 return cmap_is_empty(&cls->subtables_map);
064af421
BP
522}
523
dbda2960 524/* Returns the number of rules in 'cls'. */
064af421
BP
525int
526classifier_count(const struct classifier *cls)
527{
afae68b1
JR
528 /* n_rules is an int, so in the presence of concurrent writers this will
529 * return either the old or a new value. */
e48eccd1 530 return cls->n_rules;
064af421
BP
531}
532
c906cedf 533static uint32_t
d70e8c28 534hash_metadata(ovs_be64 metadata)
c906cedf 535{
d70e8c28 536 return hash_uint64((OVS_FORCE uint64_t) metadata);
c906cedf
BP
537}
538
539static struct cls_partition *
e48eccd1 540find_partition(const struct classifier *cls, ovs_be64 metadata, uint32_t hash)
c906cedf
BP
541{
542 struct cls_partition *partition;
543
f2c21402 544 CMAP_FOR_EACH_WITH_HASH (partition, cmap_node, hash, &cls->partitions) {
c906cedf
BP
545 if (partition->metadata == metadata) {
546 return partition;
547 }
548 }
549
550 return NULL;
551}
552
553static struct cls_partition *
e48eccd1 554create_partition(struct classifier *cls, struct cls_subtable *subtable,
c906cedf
BP
555 ovs_be64 metadata)
556{
557 uint32_t hash = hash_metadata(metadata);
558 struct cls_partition *partition = find_partition(cls, metadata, hash);
559 if (!partition) {
560 partition = xmalloc(sizeof *partition);
561 partition->metadata = metadata;
562 partition->tags = 0;
183126a1 563 tag_tracker_init(&partition->tracker);
f2c21402 564 cmap_insert(&cls->partitions, &partition->cmap_node, hash);
c906cedf 565 }
03868246 566 tag_tracker_add(&partition->tracker, &partition->tags, subtable->tag);
c906cedf
BP
567 return partition;
568}
569
69d6040e
JR
570static inline ovs_be32 minimatch_get_ports(const struct minimatch *match)
571{
572 /* Could optimize to use the same map if needed for fast path. */
573 return MINIFLOW_GET_BE32(&match->flow, tp_src)
574 & MINIFLOW_GET_BE32(&match->mask.masks, tp_src);
575}
576
f47eef15
JR
577static void
578subtable_replace_head_rule(struct classifier *cls OVS_UNUSED,
579 struct cls_subtable *subtable,
580 struct cls_match *head, struct cls_match *new,
581 uint32_t hash, uint32_t ihash[CLS_MAX_INDICES])
f47eef15
JR
582{
583 /* Rule's data is already in the tries. */
584
585 new->partition = head->partition; /* Steal partition, if any. */
586 head->partition = NULL;
587
588 for (int i = 0; i < subtable->n_indices; i++) {
589 cmap_replace(&subtable->indices[i], &head->index_nodes[i],
590 &new->index_nodes[i], ihash[i]);
591 }
592 cmap_replace(&subtable->rules, &head->cmap_node, &new->cmap_node, hash);
593}
594
b5d97350
BP
595/* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller
596 * must not modify or free it.
064af421
BP
597 *
598 * If 'cls' already contains an identical rule (including wildcards, values of
599 * fixed fields, and priority), replaces the old rule by 'rule' and returns the
600 * rule that was replaced. The caller takes ownership of the returned rule and
f47eef15
JR
601 * is thus responsible for destroying it with cls_rule_destroy(), after RCU
602 * grace period has passed (see ovsrcu_postpone()).
064af421
BP
603 *
604 * Returns NULL if 'cls' does not contain a rule with an identical key, after
605 * inserting the new rule. In this case, no rules are displaced by the new
606 * rule, even rules that cannot have any effect because the new rule matches a
886af6ea
JR
607 * superset of their flows and has higher priority.
608 */
dfea28b3 609const struct cls_rule *
18080541
BP
610classifier_replace(struct classifier *cls, const struct cls_rule *rule,
611 const struct cls_conjunction *conjs, size_t n_conjs)
064af421 612{
2b7b1427 613 struct cls_match *new;
03868246 614 struct cls_subtable *subtable;
886af6ea 615 uint32_t ihash[CLS_MAX_INDICES];
d70e8c28 616 uint8_t prev_be64ofs = 0;
886af6ea 617 struct cls_match *head;
f47eef15 618 size_t n_rules = 0;
886af6ea
JR
619 uint32_t basis;
620 uint32_t hash;
621 int i;
b5d97350 622
2b7b1427
JR
623 ovs_assert(rule->version > 0);
624
625 /* 'new' is initially invisible to lookups. */
626 new = cls_match_alloc(rule, conjs, n_conjs);
627
d0999f1b 628 CONST_CAST(struct cls_rule *, rule)->cls_match = new;
f47eef15 629
03868246
JR
630 subtable = find_subtable(cls, &rule->match.mask);
631 if (!subtable) {
632 subtable = insert_subtable(cls, &rule->match.mask);
b5d97350
BP
633 }
634
f47eef15 635 /* Compute hashes in segments. */
886af6ea
JR
636 basis = 0;
637 for (i = 0; i < subtable->n_indices; i++) {
d70e8c28 638 ihash[i] = minimatch_hash_range(&rule->match, prev_be64ofs,
886af6ea 639 subtable->index_ofs[i], &basis);
d70e8c28 640 prev_be64ofs = subtable->index_ofs[i];
886af6ea 641 }
d70e8c28 642 hash = minimatch_hash_range(&rule->match, prev_be64ofs, FLOW_U64S, &basis);
f47eef15 643
886af6ea
JR
644 head = find_equal(subtable, &rule->match.flow, hash);
645 if (!head) {
886af6ea
JR
646 /* Add rule to tries.
647 *
648 * Concurrent readers might miss seeing the rule until this update,
649 * which might require being fixed up by revalidation later. */
f47eef15 650 for (i = 0; i < cls->n_tries; i++) {
13751fd8
JR
651 if (subtable->trie_plen[i]) {
652 trie_insert(&cls->tries[i], rule, subtable->trie_plen[i]);
653 }
654 }
69d6040e 655
886af6ea 656 /* Add rule to ports trie. */
69d6040e
JR
657 if (subtable->ports_mask_len) {
658 /* We mask the value to be inserted to always have the wildcarded
659 * bits in known (zero) state, so we can include them in comparison
660 * and they will always match (== their original value does not
661 * matter). */
662 ovs_be32 masked_ports = minimatch_get_ports(&rule->match);
663
664 trie_insert_prefix(&subtable->ports_trie, &masked_ports,
665 subtable->ports_mask_len);
666 }
886af6ea 667
f47eef15 668 /* Add rule to partitions.
886af6ea 669 *
f47eef15
JR
670 * Concurrent readers might miss seeing the rule until this update,
671 * which might require being fixed up by revalidation later. */
672 new->partition = NULL;
673 if (minimask_get_metadata_mask(&rule->match.mask) == OVS_BE64_MAX) {
674 ovs_be64 metadata = miniflow_get_metadata(&rule->match.flow);
675
676 new->partition = create_partition(cls, subtable, metadata);
677 }
678
f47eef15
JR
679 /* Add new node to segment indices.
680 *
681 * Readers may find the rule in the indices before the rule is visible
682 * in the subtables 'rules' map. This may result in us losing the
683 * opportunity to quit lookups earlier, resulting in sub-optimal
684 * wildcarding. This will be fixed later by revalidation (always
685 * scheduled after flow table changes). */
886af6ea 686 for (i = 0; i < subtable->n_indices; i++) {
f47eef15
JR
687 cmap_insert(&subtable->indices[i], &new->index_nodes[i], ihash[i]);
688 }
689 n_rules = cmap_insert(&subtable->rules, &new->cmap_node, hash);
690 } else { /* Equal rules exist in the classifier already. */
8f8023b3 691 struct cls_match *prev, *iter;
f47eef15
JR
692
693 /* Scan the list for the insertion point that will keep the list in
2b7b1427
JR
694 * order of decreasing priority. Insert after rules marked invisible
695 * in any version of the same priority. */
8f8023b3 696 FOR_EACH_RULE_IN_LIST_PROTECTED (iter, prev, head) {
186120da
JR
697 if (rule->priority > iter->priority
698 || (rule->priority == iter->priority
2b7b1427 699 && !cls_match_is_eventually_invisible(iter))) {
f47eef15
JR
700 break;
701 }
886af6ea
JR
702 }
703
8f8023b3
JR
704 /* Replace 'iter' with 'new' or insert 'new' between 'prev' and
705 * 'iter'. */
f47eef15
JR
706 if (iter) {
707 struct cls_rule *old;
708
709 if (rule->priority == iter->priority) {
8f8023b3 710 cls_match_replace(prev, iter, new);
f47eef15
JR
711 old = CONST_CAST(struct cls_rule *, iter->cls_rule);
712 } else {
8f8023b3 713 cls_match_insert(prev, iter, new);
f47eef15
JR
714 old = NULL;
715 }
716
717 /* Replace the existing head in data structures, if rule is the new
718 * head. */
719 if (iter == head) {
720 subtable_replace_head_rule(cls, subtable, head, new, hash,
721 ihash);
722 }
723
724 if (old) {
18080541
BP
725 struct cls_conjunction_set *conj_set;
726
727 conj_set = ovsrcu_get_protected(struct cls_conjunction_set *,
728 &iter->conj_set);
729 if (conj_set) {
730 ovsrcu_postpone(free, conj_set);
731 }
732
8f8023b3 733 ovsrcu_postpone(cls_match_free_cb, iter);
f47eef15 734 old->cls_match = NULL;
f2c21402 735
f47eef15
JR
736 /* No change in subtable's max priority or max count. */
737
2b7b1427
JR
738 /* Make 'new' visible to lookups in the appropriate version. */
739 cls_match_set_visibility(new, rule->version);
fc02ecc7
JR
740
741 /* Make rule visible to iterators (immediately). */
d0999f1b
JR
742 rculist_replace(CONST_CAST(struct rculist *, &rule->node),
743 &old->node);
de4ad4a2 744
f47eef15
JR
745 /* Return displaced rule. Caller is responsible for keeping it
746 * around until all threads quiesce. */
f47eef15
JR
747 return old;
748 }
749 } else {
8f8023b3
JR
750 /* 'new' is new node after 'prev' */
751 cls_match_insert(prev, iter, new);
f47eef15 752 }
064af421 753 }
886af6ea 754
2b7b1427
JR
755 /* Make 'new' visible to lookups in the appropriate version. */
756 cls_match_set_visibility(new, rule->version);
fc02ecc7
JR
757
758 /* Make rule visible to iterators (immediately). */
d0999f1b
JR
759 rculist_push_back(&subtable->rules_list,
760 CONST_CAST(struct rculist *, &rule->node));
de4ad4a2 761
f47eef15
JR
762 /* Rule was added, not replaced. Update 'subtable's 'max_priority' and
763 * 'max_count', if necessary.
764 *
765 * The rule was already inserted, but concurrent readers may not see the
766 * rule yet as the subtables vector is not updated yet. This will have to
767 * be fixed by revalidation later. */
768 if (n_rules == 1) {
769 subtable->max_priority = rule->priority;
770 subtable->max_count = 1;
771 pvector_insert(&cls->subtables, subtable, rule->priority);
772 } else if (rule->priority == subtable->max_priority) {
773 ++subtable->max_count;
774 } else if (rule->priority > subtable->max_priority) {
775 subtable->max_priority = rule->priority;
776 subtable->max_count = 1;
777 pvector_change_priority(&cls->subtables, subtable, rule->priority);
778 }
779
780 /* Nothing was replaced. */
781 cls->n_rules++;
802f84ff
JR
782
783 if (cls->publish) {
784 pvector_publish(&cls->subtables);
785 }
786
f47eef15 787 return NULL;
064af421
BP
788}
789
08944c1d
BP
790/* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller
791 * must not modify or free it.
792 *
793 * 'cls' must not contain an identical rule (including wildcards, values of
794 * fixed fields, and priority). Use classifier_find_rule_exactly() to find
795 * such a rule. */
796void
18080541
BP
797classifier_insert(struct classifier *cls, const struct cls_rule *rule,
798 const struct cls_conjunction conj[], size_t n_conj)
08944c1d 799{
18080541
BP
800 const struct cls_rule *displaced_rule
801 = classifier_replace(cls, rule, conj, n_conj);
cb22974d 802 ovs_assert(!displaced_rule);
08944c1d
BP
803}
804
48d28ac1
BP
805/* Removes 'rule' from 'cls'. It is the caller's responsibility to destroy
806 * 'rule' with cls_rule_destroy(), freeing the memory block in which 'rule'
747f140a
JR
807 * resides, etc., as necessary.
808 *
809 * Does nothing if 'rule' has been already removed, or was never inserted.
810 *
811 * Returns the removed rule, or NULL, if it was already removed.
812 */
dfea28b3 813const struct cls_rule *
186120da 814classifier_remove(struct classifier *cls, const struct cls_rule *cls_rule)
064af421 815{
8f8023b3 816 struct cls_match *rule, *prev, *next, *head;
c906cedf 817 struct cls_partition *partition;
18080541 818 struct cls_conjunction_set *conj_set;
03868246 819 struct cls_subtable *subtable;
476f36e8 820 int i;
f2c21402 821 uint32_t basis = 0, hash, ihash[CLS_MAX_INDICES];
d70e8c28 822 uint8_t prev_be64ofs = 0;
f47eef15 823 size_t n_rules;
064af421 824
186120da
JR
825 rule = cls_rule->cls_match;
826 if (!rule) {
fccd7c09 827 return NULL;
747f140a 828 }
f47eef15 829 /* Mark as removed. */
186120da 830 CONST_CAST(struct cls_rule *, cls_rule)->cls_match = NULL;
f47eef15 831
186120da
JR
832 /* Remove 'cls_rule' from the subtable's rules list. */
833 rculist_remove(CONST_CAST(struct rculist *, &cls_rule->node));
de4ad4a2 834
186120da 835 subtable = find_subtable(cls, &cls_rule->match.mask);
627fb667
JR
836 ovs_assert(subtable);
837
f47eef15 838 for (i = 0; i < subtable->n_indices; i++) {
186120da 839 ihash[i] = minimatch_hash_range(&cls_rule->match, prev_be64ofs,
f47eef15 840 subtable->index_ofs[i], &basis);
d70e8c28 841 prev_be64ofs = subtable->index_ofs[i];
f47eef15 842 }
186120da
JR
843 hash = minimatch_hash_range(&cls_rule->match, prev_be64ofs, FLOW_U64S,
844 &basis);
845
8f8023b3
JR
846 head = find_equal(subtable, &cls_rule->match.flow, hash);
847
186120da 848 /* Check if the rule is not the head rule. */
8f8023b3
JR
849 if (rule != head) {
850 struct cls_match *iter;
851
186120da 852 /* Not the head rule, but potentially one with the same priority. */
8f8023b3
JR
853 /* Remove from the list of equal rules. */
854 FOR_EACH_RULE_IN_LIST_PROTECTED (iter, prev, head) {
855 if (rule == iter) {
856 break;
857 }
858 }
859 ovs_assert(iter == rule);
860
861 cls_match_remove(prev, rule);
862
186120da
JR
863 goto check_priority;
864 }
f47eef15 865
186120da
JR
866 /* 'rule' is the head rule. Check if there is another rule to
867 * replace 'rule' in the data structures. */
8f8023b3
JR
868 next = cls_match_next_protected(rule);
869 if (next) {
186120da 870 subtable_replace_head_rule(cls, subtable, rule, next, hash, ihash);
f47eef15
JR
871 goto check_priority;
872 }
873
874 /* 'rule' is last of the kind in the classifier, must remove from all the
875 * data structures. */
876
69d6040e 877 if (subtable->ports_mask_len) {
186120da 878 ovs_be32 masked_ports = minimatch_get_ports(&cls_rule->match);
69d6040e
JR
879
880 trie_remove_prefix(&subtable->ports_trie,
881 &masked_ports, subtable->ports_mask_len);
882 }
13751fd8
JR
883 for (i = 0; i < cls->n_tries; i++) {
884 if (subtable->trie_plen[i]) {
186120da 885 trie_remove(&cls->tries[i], cls_rule, subtable->trie_plen[i]);
13751fd8
JR
886 }
887 }
888
476f36e8
JR
889 /* Remove rule node from indices. */
890 for (i = 0; i < subtable->n_indices; i++) {
186120da 891 cmap_remove(&subtable->indices[i], &rule->index_nodes[i], ihash[i]);
b5d97350 892 }
186120da 893 n_rules = cmap_remove(&subtable->rules, &rule->cmap_node, hash);
064af421 894
186120da 895 partition = rule->partition;
183126a1
BP
896 if (partition) {
897 tag_tracker_subtract(&partition->tracker, &partition->tags,
03868246 898 subtable->tag);
183126a1 899 if (!partition->tags) {
f2c21402
JR
900 cmap_remove(&cls->partitions, &partition->cmap_node,
901 hash_metadata(partition->metadata));
902 ovsrcu_postpone(free, partition);
183126a1 903 }
c906cedf
BP
904 }
905
f47eef15 906 if (n_rules == 0) {
03868246 907 destroy_subtable(cls, subtable);
f47eef15
JR
908 } else {
909check_priority:
910 if (subtable->max_priority == rule->priority
911 && --subtable->max_count == 0) {
912 /* Find the new 'max_priority' and 'max_count'. */
f47eef15 913 int max_priority = INT_MIN;
186120da 914 struct cls_match *head;
f47eef15
JR
915
916 CMAP_FOR_EACH (head, cmap_node, &subtable->rules) {
917 if (head->priority > max_priority) {
918 max_priority = head->priority;
919 subtable->max_count = 1;
920 } else if (head->priority == max_priority) {
921 ++subtable->max_count;
922 }
fe7cfa5c 923 }
f47eef15
JR
924 subtable->max_priority = max_priority;
925 pvector_change_priority(&cls->subtables, subtable, max_priority);
fe7cfa5c 926 }
4d935a6b 927 }
802f84ff
JR
928
929 if (cls->publish) {
930 pvector_publish(&cls->subtables);
931 }
932
8f8023b3 933 /* free the rule. */
18080541 934 conj_set = ovsrcu_get_protected(struct cls_conjunction_set *,
186120da 935 &rule->conj_set);
18080541
BP
936 if (conj_set) {
937 ovsrcu_postpone(free, conj_set);
938 }
8f8023b3 939 ovsrcu_postpone(cls_match_free_cb, rule);
f47eef15 940 cls->n_rules--;
747f140a 941
186120da 942 return cls_rule;
064af421
BP
943}
944
13751fd8 945/* Prefix tree context. Valid when 'lookup_done' is true. Can skip all
c0bfb650
JR
946 * subtables which have a prefix match on the trie field, but whose prefix
947 * length is not indicated in 'match_plens'. For example, a subtable that
948 * has a 8-bit trie field prefix match can be skipped if
949 * !be_get_bit_at(&match_plens, 8 - 1). If skipped, 'maskbits' prefix bits
950 * must be unwildcarded to make datapath flow only match packets it should. */
13751fd8
JR
951struct trie_ctx {
952 const struct cls_trie *trie;
953 bool lookup_done; /* Status of the lookup. */
954 uint8_t be32ofs; /* U32 offset of the field in question. */
13751fd8 955 unsigned int maskbits; /* Prefix length needed to avoid false matches. */
c0bfb650
JR
956 union mf_value match_plens; /* Bitmask of prefix lengths with possible
957 * matches. */
13751fd8
JR
958};
959
960static void
961trie_ctx_init(struct trie_ctx *ctx, const struct cls_trie *trie)
962{
963 ctx->trie = trie;
964 ctx->be32ofs = trie->field->flow_be32ofs;
965 ctx->lookup_done = false;
966}
967
18080541
BP
968struct conjunctive_match {
969 struct hmap_node hmap_node;
970 uint32_t id;
971 uint64_t clauses;
972};
973
974static struct conjunctive_match *
975find_conjunctive_match__(struct hmap *matches, uint64_t id, uint32_t hash)
976{
977 struct conjunctive_match *m;
978
979 HMAP_FOR_EACH_IN_BUCKET (m, hmap_node, hash, matches) {
980 if (m->id == id) {
981 return m;
982 }
983 }
984 return NULL;
985}
986
987static bool
988find_conjunctive_match(const struct cls_conjunction_set *set,
989 unsigned int max_n_clauses, struct hmap *matches,
990 struct conjunctive_match *cm_stubs, size_t n_cm_stubs,
991 uint32_t *idp)
992{
993 const struct cls_conjunction *c;
994
995 if (max_n_clauses < set->min_n_clauses) {
996 return false;
997 }
998
999 for (c = set->conj; c < &set->conj[set->n]; c++) {
1000 struct conjunctive_match *cm;
1001 uint32_t hash;
1002
1003 if (c->n_clauses > max_n_clauses) {
1004 continue;
1005 }
1006
1007 hash = hash_int(c->id, 0);
1008 cm = find_conjunctive_match__(matches, c->id, hash);
1009 if (!cm) {
1010 size_t n = hmap_count(matches);
1011
1012 cm = n < n_cm_stubs ? &cm_stubs[n] : xmalloc(sizeof *cm);
1013 hmap_insert(matches, &cm->hmap_node, hash);
1014 cm->id = c->id;
1015 cm->clauses = UINT64_MAX << (c->n_clauses & 63);
1016 }
1017 cm->clauses |= UINT64_C(1) << c->clause;
1018 if (cm->clauses == UINT64_MAX) {
1019 *idp = cm->id;
1020 return true;
1021 }
1022 }
1023 return false;
1024}
1025
1026static void
1027free_conjunctive_matches(struct hmap *matches,
1028 struct conjunctive_match *cm_stubs, size_t n_cm_stubs)
1029{
1030 if (hmap_count(matches) > n_cm_stubs) {
1031 struct conjunctive_match *cm, *next;
1032
1033 HMAP_FOR_EACH_SAFE (cm, next, hmap_node, matches) {
1034 if (!(cm >= cm_stubs && cm < &cm_stubs[n_cm_stubs])) {
1035 free(cm);
1036 }
1037 }
1038 }
1039 hmap_destroy(matches);
1040}
1041
1042/* Like classifier_lookup(), except that support for conjunctive matches can be
1043 * configured with 'allow_conjunctive_matches'. That feature is not exposed
1044 * externally because turning off conjunctive matches is only useful to avoid
1045 * recursion within this function itself.
2e0bded4
BP
1046 *
1047 * 'flow' is non-const to allow for temporary modifications during the lookup.
1048 * Any changes are restored before returning. */
18080541 1049static const struct cls_rule *
2b7b1427
JR
1050classifier_lookup__(const struct classifier *cls, long long version,
1051 struct flow *flow, struct flow_wildcards *wc,
1052 bool allow_conjunctive_matches)
48c3de13 1053{
c906cedf 1054 const struct cls_partition *partition;
fe7cfa5c 1055 struct trie_ctx trie_ctx[CLS_MAX_TRIES];
18080541
BP
1056 const struct cls_match *match;
1057 tag_type tags;
1058
1059 /* Highest-priority flow in 'cls' that certainly matches 'flow'. */
1060 const struct cls_match *hard = NULL;
1061 int hard_pri = INT_MIN; /* hard ? hard->priority : INT_MIN. */
1062
1063 /* Highest-priority conjunctive flows in 'cls' matching 'flow'. Since
1064 * these are (components of) conjunctive flows, we can only know whether
1065 * the full conjunctive flow matches after seeing multiple of them. Thus,
1066 * we refer to these as "soft matches". */
1067 struct cls_conjunction_set *soft_stub[64];
1068 struct cls_conjunction_set **soft = soft_stub;
1069 size_t n_soft = 0, allocated_soft = ARRAY_SIZE(soft_stub);
1070 int soft_pri = INT_MIN; /* n_soft ? MAX(soft[*]->priority) : INT_MIN. */
c906cedf 1071
f358a2cb
JR
1072 /* Synchronize for cls->n_tries and subtable->trie_plen. They can change
1073 * when table configuration changes, which happens typically only on
1074 * startup. */
1075 atomic_thread_fence(memory_order_acquire);
1076
03868246
JR
1077 /* Determine 'tags' such that, if 'subtable->tag' doesn't intersect them,
1078 * then 'flow' cannot possibly match in 'subtable':
c906cedf
BP
1079 *
1080 * - If flow->metadata maps to a given 'partition', then we can use
1081 * 'tags' for 'partition->tags'.
1082 *
1083 * - If flow->metadata has no partition, then no rule in 'cls' has an
1084 * exact-match for flow->metadata. That means that we don't need to
03868246 1085 * search any subtable that includes flow->metadata in its mask.
c906cedf 1086 *
03868246 1087 * In either case, we always need to search any cls_subtables that do not
c906cedf 1088 * include flow->metadata in its mask. One way to do that would be to
03868246
JR
1089 * check the "cls_subtable"s explicitly for that, but that would require an
1090 * extra branch per subtable. Instead, we mark such a cls_subtable's
1091 * 'tags' as TAG_ALL and make sure that 'tags' is never empty. This means
1092 * that 'tags' always intersects such a cls_subtable's 'tags', so we don't
1093 * need a special case.
c906cedf 1094 */
f2c21402 1095 partition = (cmap_is_empty(&cls->partitions)
c906cedf
BP
1096 ? NULL
1097 : find_partition(cls, flow->metadata,
1098 hash_metadata(flow->metadata)));
1099 tags = partition ? partition->tags : TAG_ARBITRARY;
48c3de13 1100
ff8241db 1101 /* Initialize trie contexts for find_match_wc(). */
fe7cfa5c 1102 for (int i = 0; i < cls->n_tries; i++) {
13751fd8
JR
1103 trie_ctx_init(&trie_ctx[i], &cls->tries[i]);
1104 }
ec988646 1105
18080541
BP
1106 /* Main loop. */
1107 struct cls_subtable *subtable;
1108 PVECTOR_FOR_EACH_PRIORITY (subtable, hard_pri, 2, sizeof *subtable,
1109 &cls->subtables) {
1110 struct cls_conjunction_set *conj_set;
c906cedf 1111
18080541 1112 /* Skip subtables not in our partition. */
fe7cfa5c 1113 if (!tag_intersects(tags, subtable->tag)) {
c906cedf
BP
1114 continue;
1115 }
74f74083 1116
18080541
BP
1117 /* Skip subtables with no match, or where the match is lower-priority
1118 * than some certain match we've already found. */
2b7b1427
JR
1119 match = find_match_wc(subtable, version, flow, trie_ctx, cls->n_tries,
1120 wc);
18080541
BP
1121 if (!match || match->priority <= hard_pri) {
1122 continue;
1123 }
1124
1125 conj_set = ovsrcu_get(struct cls_conjunction_set *, &match->conj_set);
1126 if (!conj_set) {
1127 /* 'match' isn't part of a conjunctive match. It's the best
1128 * certain match we've got so far, since we know that it's
1129 * higher-priority than hard_pri.
1130 *
1131 * (There might be a higher-priority conjunctive match. We can't
1132 * tell yet.) */
1133 hard = match;
1134 hard_pri = hard->priority;
1135 } else if (allow_conjunctive_matches) {
1136 /* 'match' is part of a conjunctive match. Add it to the list. */
1137 if (OVS_UNLIKELY(n_soft >= allocated_soft)) {
1138 struct cls_conjunction_set **old_soft = soft;
1139
1140 allocated_soft *= 2;
1141 soft = xmalloc(allocated_soft * sizeof *soft);
1142 memcpy(soft, old_soft, n_soft * sizeof *soft);
1143 if (old_soft != soft_stub) {
1144 free(old_soft);
1145 }
1146 }
1147 soft[n_soft++] = conj_set;
1148
1149 /* Keep track of the highest-priority soft match. */
1150 if (soft_pri < match->priority) {
1151 soft_pri = match->priority;
1152 }
b5d97350 1153 }
48c3de13 1154 }
13751fd8 1155
18080541
BP
1156 /* In the common case, at this point we have no soft matches and we can
1157 * return immediately. (We do the same thing if we have potential soft
1158 * matches but none of them are higher-priority than our hard match.) */
1159 if (hard_pri >= soft_pri) {
1160 if (soft != soft_stub) {
1161 free(soft);
1162 }
1163 return hard ? hard->cls_rule : NULL;
1164 }
1165
1166 /* At this point, we have some soft matches. We might also have a hard
1167 * match; if so, its priority is lower than the highest-priority soft
1168 * match. */
1169
1170 /* Soft match loop.
1171 *
1172 * Check whether soft matches are real matches. */
1173 for (;;) {
1174 /* Delete soft matches that are null. This only happens in second and
1175 * subsequent iterations of the soft match loop, when we drop back from
1176 * a high-priority soft match to a lower-priority one.
1177 *
1178 * Also, delete soft matches whose priority is less than or equal to
1179 * the hard match's priority. In the first iteration of the soft
1180 * match, these can be in 'soft' because the earlier main loop found
1181 * the soft match before the hard match. In second and later iteration
1182 * of the soft match loop, these can be in 'soft' because we dropped
1183 * back from a high-priority soft match to a lower-priority soft match.
1184 *
1185 * It is tempting to delete soft matches that cannot be satisfied
1186 * because there are fewer soft matches than required to satisfy any of
1187 * their conjunctions, but we cannot do that because there might be
1188 * lower priority soft or hard matches with otherwise identical
1189 * matches. (We could special case those here, but there's no
1190 * need--we'll do so at the bottom of the soft match loop anyway and
1191 * this duplicates less code.)
1192 *
1193 * It's also tempting to break out of the soft match loop if 'n_soft ==
1194 * 1' but that would also miss lower-priority hard matches. We could
1195 * special case that also but again there's no need. */
1196 for (int i = 0; i < n_soft; ) {
1197 if (!soft[i] || soft[i]->priority <= hard_pri) {
1198 soft[i] = soft[--n_soft];
1199 } else {
1200 i++;
1201 }
1202 }
1203 if (!n_soft) {
1204 break;
1205 }
1206
1207 /* Find the highest priority among the soft matches. (We know this
1208 * must be higher than the hard match's priority; otherwise we would
1209 * have deleted all of the soft matches in the previous loop.) Count
1210 * the number of soft matches that have that priority. */
1211 soft_pri = INT_MIN;
1212 int n_soft_pri = 0;
1213 for (int i = 0; i < n_soft; i++) {
1214 if (soft[i]->priority > soft_pri) {
1215 soft_pri = soft[i]->priority;
1216 n_soft_pri = 1;
1217 } else if (soft[i]->priority == soft_pri) {
1218 n_soft_pri++;
1219 }
1220 }
1221 ovs_assert(soft_pri > hard_pri);
1222
1223 /* Look for a real match among the highest-priority soft matches.
1224 *
1225 * It's unusual to have many conjunctive matches, so we use stubs to
1226 * avoid calling malloc() in the common case. An hmap has a built-in
1227 * stub for up to 2 hmap_nodes; possibly, we would benefit a variant
1228 * with a bigger stub. */
1229 struct conjunctive_match cm_stubs[16];
1230 struct hmap matches;
1231
1232 hmap_init(&matches);
1233 for (int i = 0; i < n_soft; i++) {
1234 uint32_t id;
1235
1236 if (soft[i]->priority == soft_pri
1237 && find_conjunctive_match(soft[i], n_soft_pri, &matches,
1238 cm_stubs, ARRAY_SIZE(cm_stubs),
1239 &id)) {
1240 uint32_t saved_conj_id = flow->conj_id;
1241 const struct cls_rule *rule;
1242
1243 flow->conj_id = id;
2b7b1427 1244 rule = classifier_lookup__(cls, version, flow, wc, false);
18080541
BP
1245 flow->conj_id = saved_conj_id;
1246
1247 if (rule) {
1248 free_conjunctive_matches(&matches,
1249 cm_stubs, ARRAY_SIZE(cm_stubs));
1250 if (soft != soft_stub) {
1251 free(soft);
1252 }
1253 return rule;
1254 }
1255 }
1256 }
1257 free_conjunctive_matches(&matches, cm_stubs, ARRAY_SIZE(cm_stubs));
1258
1259 /* There's no real match among the highest-priority soft matches.
1260 * However, if any of those soft matches has a lower-priority but
1261 * otherwise identical flow match, then we need to consider those for
1262 * soft or hard matches.
1263 *
1264 * The next iteration of the soft match loop will delete any null
1265 * pointers we put into 'soft' (and some others too). */
1266 for (int i = 0; i < n_soft; i++) {
1267 if (soft[i]->priority != soft_pri) {
1268 continue;
1269 }
1270
1271 /* Find next-lower-priority flow with identical flow match. */
2b7b1427 1272 match = next_visible_rule_in_list(soft[i]->match, version);
18080541
BP
1273 if (match) {
1274 soft[i] = ovsrcu_get(struct cls_conjunction_set *,
1275 &match->conj_set);
1276 if (!soft[i]) {
1277 /* The flow is a hard match; don't treat as a soft
1278 * match. */
1279 if (match->priority > hard_pri) {
1280 hard = match;
1281 hard_pri = hard->priority;
1282 }
1283 }
1284 } else {
1285 /* No such lower-priority flow (probably the common case). */
1286 soft[i] = NULL;
1287 }
1288 }
1289 }
1290
1291 if (soft != soft_stub) {
1292 free(soft);
1293 }
1294 return hard ? hard->cls_rule : NULL;
1295}
1296
2b7b1427
JR
1297/* Finds and returns the highest-priority rule in 'cls' that matches 'flow' and
1298 * that is visible in 'version'. Returns a null pointer if no rules in 'cls'
1299 * match 'flow'. If multiple rules of equal priority match 'flow', returns one
1300 * arbitrarily.
18080541
BP
1301 *
1302 * If a rule is found and 'wc' is non-null, bitwise-OR's 'wc' with the
1303 * set of bits that were significant in the lookup. At some point
1304 * earlier, 'wc' should have been initialized (e.g., by
1305 * flow_wildcards_init_catchall()).
1306 *
1307 * 'flow' is non-const to allow for temporary modifications during the lookup.
1308 * Any changes are restored before returning. */
1309const struct cls_rule *
2b7b1427
JR
1310classifier_lookup(const struct classifier *cls, long long version,
1311 struct flow *flow, struct flow_wildcards *wc)
18080541 1312{
2b7b1427 1313 return classifier_lookup__(cls, version, flow, wc, true);
48c3de13
BP
1314}
1315
b5d97350 1316/* Finds and returns a rule in 'cls' with exactly the same priority and
2b7b1427
JR
1317 * matching criteria as 'target', and that is visible in 'target->version.
1318 * Only one such rule may ever exist. Returns a null pointer if 'cls' doesn't
1319 * contain an exact match. */
dfea28b3 1320const struct cls_rule *
e48eccd1 1321classifier_find_rule_exactly(const struct classifier *cls,
76ecc721 1322 const struct cls_rule *target)
064af421 1323{
dfea28b3
JR
1324 const struct cls_match *head, *rule;
1325 const struct cls_subtable *subtable;
064af421 1326
03868246 1327 subtable = find_subtable(cls, &target->match.mask);
0722ee5c 1328 if (!subtable) {
98abae4a 1329 return NULL;
4d935a6b
JR
1330 }
1331
03868246 1332 head = find_equal(subtable, &target->match.flow,
5cb7a798
BP
1333 miniflow_hash_in_minimask(&target->match.flow,
1334 &target->match.mask, 0));
98abae4a
JR
1335 if (!head) {
1336 return NULL;
1337 }
8f8023b3 1338 CLS_MATCH_FOR_EACH (rule, head) {
186120da
JR
1339 if (rule->priority < target->priority) {
1340 break; /* Not found. */
1341 }
1342 if (rule->priority == target->priority
2b7b1427 1343 && cls_match_visible_in_version(rule, target->version)) {
186120da 1344 return rule->cls_rule;
064af421
BP
1345 }
1346 }
1347 return NULL;
1348}
1349
81a76618 1350/* Finds and returns a rule in 'cls' with priority 'priority' and exactly the
2b7b1427
JR
1351 * same matching criteria as 'target', and that is visible in 'version'.
1352 * Returns a null pointer if 'cls' doesn't contain an exact match visible in
1353 * 'version'. */
dfea28b3 1354const struct cls_rule *
81a76618 1355classifier_find_match_exactly(const struct classifier *cls,
2b7b1427
JR
1356 const struct match *target, int priority,
1357 long long version)
81a76618 1358{
dfea28b3 1359 const struct cls_rule *retval;
81a76618
BP
1360 struct cls_rule cr;
1361
2b7b1427 1362 cls_rule_init(&cr, target, priority, version);
81a76618 1363 retval = classifier_find_rule_exactly(cls, &cr);
48d28ac1 1364 cls_rule_destroy(&cr);
81a76618
BP
1365
1366 return retval;
1367}
1368
faa50f40
BP
1369/* Checks if 'target' would overlap any other rule in 'cls'. Two rules are
1370 * considered to overlap if both rules have the same priority and a packet
2b7b1427 1371 * could match both, and if both rules are visible in the same version.
de4ad4a2
JR
1372 *
1373 * A trivial example of overlapping rules is two rules matching disjoint sets
1374 * of fields. E.g., if one rule matches only on port number, while another only
1375 * on dl_type, any packet from that specific port and with that specific
2b7b1427 1376 * dl_type could match both, if the rules also have the same priority. */
49bdc010 1377bool
e48eccd1 1378classifier_rule_overlaps(const struct classifier *cls,
faa50f40 1379 const struct cls_rule *target)
49bdc010 1380{
03868246 1381 struct cls_subtable *subtable;
49bdc010 1382
03868246 1383 /* Iterate subtables in the descending max priority order. */
eb391b76 1384 PVECTOR_FOR_EACH_PRIORITY (subtable, target->priority - 1, 2,
fe7cfa5c 1385 sizeof(struct cls_subtable), &cls->subtables) {
d70e8c28 1386 uint64_t storage[FLOW_U64S];
5cb7a798 1387 struct minimask mask;
de4ad4a2 1388 const struct cls_rule *rule;
49bdc010 1389
03868246 1390 minimask_combine(&mask, &target->match.mask, &subtable->mask, storage);
49bdc010 1391
de4ad4a2
JR
1392 RCULIST_FOR_EACH (rule, node, &subtable->rules_list) {
1393 if (rule->priority == target->priority
1394 && miniflow_equal_in_minimask(&target->match.flow,
2b7b1427
JR
1395 &rule->match.flow, &mask)
1396 && cls_match_visible_in_version(rule->cls_match,
1397 target->version)) {
de4ad4a2 1398 return true;
49bdc010
JP
1399 }
1400 }
1401 }
49bdc010
JP
1402 return false;
1403}
6ceeaa92
BP
1404
1405/* Returns true if 'rule' exactly matches 'criteria' or if 'rule' is more
1406 * specific than 'criteria'. That is, 'rule' matches 'criteria' and this
1407 * function returns true if, for every field:
1408 *
1409 * - 'criteria' and 'rule' specify the same (non-wildcarded) value for the
1410 * field, or
1411 *
1412 * - 'criteria' wildcards the field,
1413 *
1414 * Conversely, 'rule' does not match 'criteria' and this function returns false
1415 * if, for at least one field:
1416 *
1417 * - 'criteria' and 'rule' specify different values for the field, or
1418 *
1419 * - 'criteria' specifies a value for the field but 'rule' wildcards it.
1420 *
1421 * Equivalently, the truth table for whether a field matches is:
1422 *
1423 * rule
1424 *
1425 * c wildcard exact
1426 * r +---------+---------+
1427 * i wild | yes | yes |
1428 * t card | | |
1429 * e +---------+---------+
1430 * r exact | no |if values|
1431 * i | |are equal|
1432 * a +---------+---------+
1433 *
1434 * This is the matching rule used by OpenFlow 1.0 non-strict OFPT_FLOW_MOD
1435 * commands and by OpenFlow 1.0 aggregate and flow stats.
1436 *
81a76618 1437 * Ignores rule->priority. */
6ceeaa92
BP
1438bool
1439cls_rule_is_loose_match(const struct cls_rule *rule,
5cb7a798 1440 const struct minimatch *criteria)
6ceeaa92 1441{
5cb7a798
BP
1442 return (!minimask_has_extra(&rule->match.mask, &criteria->mask)
1443 && miniflow_equal_in_minimask(&rule->match.flow, &criteria->flow,
1444 &criteria->mask));
6ceeaa92 1445}
b5d97350 1446\f
5ecc9d81
BP
1447/* Iteration. */
1448
2b7b1427
JR
1449/* Rule may only match a target if it is visible in target's version. For NULL
1450 * target we only return rules that are not invisible in any version. */
5ecc9d81 1451static bool
de4ad4a2 1452rule_matches(const struct cls_rule *rule, const struct cls_rule *target)
5ecc9d81 1453{
2b7b1427
JR
1454 /* Iterators never see duplicate rules with the same priority. */
1455 return target
1456 ? (miniflow_equal_in_minimask(&rule->match.flow, &target->match.flow,
1457 &target->match.mask)
1458 && cls_match_visible_in_version(rule->cls_match, target->version))
1459 : !cls_match_is_eventually_invisible(rule->cls_match);
5ecc9d81
BP
1460}
1461
de4ad4a2 1462static const struct cls_rule *
03868246 1463search_subtable(const struct cls_subtable *subtable,
f2c21402 1464 struct cls_cursor *cursor)
5ecc9d81 1465{
f2c21402
JR
1466 if (!cursor->target
1467 || !minimask_has_extra(&subtable->mask, &cursor->target->match.mask)) {
de4ad4a2 1468 const struct cls_rule *rule;
5ecc9d81 1469
de4ad4a2 1470 RCULIST_FOR_EACH (rule, node, &subtable->rules_list) {
f2c21402 1471 if (rule_matches(rule, cursor->target)) {
5ecc9d81
BP
1472 return rule;
1473 }
1474 }
1475 }
1476 return NULL;
1477}
1478
5f0476ce
JR
1479/* Initializes 'cursor' for iterating through rules in 'cls', and returns the
1480 * first matching cls_rule via '*pnode', or NULL if there are no matches.
5ecc9d81 1481 *
2b7b1427
JR
1482 * - If 'target' is null, or if the 'target' is a catchall target and the
1483 * target's version is CLS_NO_VERSION, the cursor will visit every rule
1484 * in 'cls' that is not invisible in any version.
5ecc9d81 1485 *
6ceeaa92 1486 * - If 'target' is nonnull, the cursor will visit each 'rule' in 'cls'
2b7b1427
JR
1487 * such that cls_rule_is_loose_match(rule, target) returns true and that
1488 * the rule is visible in 'target->version'.
5ecc9d81 1489 *
6ceeaa92 1490 * Ignores target->priority. */
186120da
JR
1491struct cls_cursor
1492cls_cursor_start(const struct classifier *cls, const struct cls_rule *target)
5ecc9d81 1493{
5f0476ce 1494 struct cls_cursor cursor;
03868246 1495 struct cls_subtable *subtable;
5ecc9d81 1496
e48eccd1 1497 cursor.cls = cls;
2b7b1427
JR
1498 cursor.target = target && (!cls_rule_is_catchall(target)
1499 || target->version != CLS_MAX_VERSION)
1500 ? target : NULL;
78c8df12 1501 cursor.rule = NULL;
5f0476ce
JR
1502
1503 /* Find first rule. */
de4ad4a2
JR
1504 PVECTOR_CURSOR_FOR_EACH (subtable, &cursor.subtables,
1505 &cursor.cls->subtables) {
1506 const struct cls_rule *rule = search_subtable(subtable, &cursor);
f2c21402 1507
5ecc9d81 1508 if (rule) {
5f0476ce 1509 cursor.subtable = subtable;
de4ad4a2 1510 cursor.rule = rule;
5f0476ce 1511 break;
5ecc9d81
BP
1512 }
1513 }
1514
5f0476ce
JR
1515 return cursor;
1516}
1517
dfea28b3 1518static const struct cls_rule *
1caa1561 1519cls_cursor_next(struct cls_cursor *cursor)
5ecc9d81 1520{
de4ad4a2 1521 const struct cls_rule *rule;
03868246 1522 const struct cls_subtable *subtable;
5ecc9d81 1523
de4ad4a2
JR
1524 rule = cursor->rule;
1525 subtable = cursor->subtable;
1526 RCULIST_FOR_EACH_CONTINUE (rule, node, &subtable->rules_list) {
5ecc9d81 1527 if (rule_matches(rule, cursor->target)) {
de4ad4a2 1528 return rule;
5ecc9d81
BP
1529 }
1530 }
1531
de4ad4a2 1532 PVECTOR_CURSOR_FOR_EACH_CONTINUE (subtable, &cursor->subtables) {
f2c21402 1533 rule = search_subtable(subtable, cursor);
5ecc9d81 1534 if (rule) {
03868246 1535 cursor->subtable = subtable;
de4ad4a2 1536 return rule;
5ecc9d81
BP
1537 }
1538 }
1539
1caa1561
BP
1540 return NULL;
1541}
1542
1543/* Sets 'cursor->rule' to the next matching cls_rule in 'cursor''s iteration,
1544 * or to null if all matching rules have been visited. */
1545void
1546cls_cursor_advance(struct cls_cursor *cursor)
1caa1561 1547{
1caa1561 1548 cursor->rule = cls_cursor_next(cursor);
5ecc9d81
BP
1549}
1550\f
03868246 1551static struct cls_subtable *
e48eccd1 1552find_subtable(const struct classifier *cls, const struct minimask *mask)
b5d97350 1553{
03868246 1554 struct cls_subtable *subtable;
064af421 1555
f2c21402 1556 CMAP_FOR_EACH_WITH_HASH (subtable, cmap_node, minimask_hash(mask, 0),
5a87054c 1557 &cls->subtables_map) {
03868246
JR
1558 if (minimask_equal(mask, &subtable->mask)) {
1559 return subtable;
064af421
BP
1560 }
1561 }
b5d97350 1562 return NULL;
064af421 1563}
064af421 1564
e65413ab 1565/* The new subtable will be visible to the readers only after this. */
03868246 1566static struct cls_subtable *
e48eccd1 1567insert_subtable(struct classifier *cls, const struct minimask *mask)
b5d97350 1568{
c906cedf 1569 uint32_t hash = minimask_hash(mask, 0);
03868246 1570 struct cls_subtable *subtable;
476f36e8
JR
1571 int i, index = 0;
1572 struct flow_wildcards old, new;
1573 uint8_t prev;
3016f3e4 1574 int count = count_1bits(mask->masks.map);
064af421 1575
3016f3e4
JR
1576 subtable = xzalloc(sizeof *subtable - sizeof mask->masks.inline_values
1577 + MINIFLOW_VALUES_SIZE(count));
f2c21402 1578 cmap_init(&subtable->rules);
f80028fe
JR
1579 miniflow_clone_inline(CONST_CAST(struct miniflow *, &subtable->mask.masks),
1580 &mask->masks, count);
476f36e8
JR
1581
1582 /* Init indices for segmented lookup, if any. */
1583 flow_wildcards_init_catchall(&new);
1584 old = new;
1585 prev = 0;
1586 for (i = 0; i < cls->n_flow_segments; i++) {
1587 flow_wildcards_fold_minimask_range(&new, mask, prev,
1588 cls->flow_segments[i]);
1589 /* Add an index if it adds mask bits. */
1590 if (!flow_wildcards_equal(&new, &old)) {
f2c21402 1591 cmap_init(&subtable->indices[index]);
f80028fe
JR
1592 *CONST_CAST(uint8_t *, &subtable->index_ofs[index])
1593 = cls->flow_segments[i];
476f36e8
JR
1594 index++;
1595 old = new;
1596 }
1597 prev = cls->flow_segments[i];
1598 }
1599 /* Check if the rest of the subtable's mask adds any bits,
1600 * and remove the last index if it doesn't. */
1601 if (index > 0) {
d70e8c28 1602 flow_wildcards_fold_minimask_range(&new, mask, prev, FLOW_U64S);
476f36e8
JR
1603 if (flow_wildcards_equal(&new, &old)) {
1604 --index;
f80028fe 1605 *CONST_CAST(uint8_t *, &subtable->index_ofs[index]) = 0;
f2c21402 1606 cmap_destroy(&subtable->indices[index]);
476f36e8
JR
1607 }
1608 }
f80028fe 1609 *CONST_CAST(uint8_t *, &subtable->n_indices) = index;
476f36e8 1610
f80028fe
JR
1611 *CONST_CAST(tag_type *, &subtable->tag) =
1612 (minimask_get_metadata_mask(mask) == OVS_BE64_MAX
1613 ? tag_create_deterministic(hash)
1614 : TAG_ALL);
064af421 1615
13751fd8
JR
1616 for (i = 0; i < cls->n_tries; i++) {
1617 subtable->trie_plen[i] = minimask_get_prefix_len(mask,
1618 cls->tries[i].field);
1619 }
1620
69d6040e 1621 /* Ports trie. */
f358a2cb 1622 ovsrcu_set_hidden(&subtable->ports_trie, NULL);
f80028fe 1623 *CONST_CAST(int *, &subtable->ports_mask_len)
69d6040e
JR
1624 = 32 - ctz32(ntohl(MINIFLOW_GET_BE32(&mask->masks, tp_src)));
1625
de4ad4a2
JR
1626 /* List of rules. */
1627 rculist_init(&subtable->rules_list);
1628
f2c21402 1629 cmap_insert(&cls->subtables_map, &subtable->cmap_node, hash);
ec988646 1630
03868246 1631 return subtable;
064af421
BP
1632}
1633
01c0f83a 1634/* RCU readers may still access the subtable before it is actually freed. */
b5d97350 1635static void
e48eccd1 1636destroy_subtable(struct classifier *cls, struct cls_subtable *subtable)
b5d97350 1637{
476f36e8
JR
1638 int i;
1639
fe7cfa5c 1640 pvector_remove(&cls->subtables, subtable);
01c0f83a
JR
1641 cmap_remove(&cls->subtables_map, &subtable->cmap_node,
1642 minimask_hash(&subtable->mask, 0));
1643
1644 ovs_assert(ovsrcu_get_protected(struct trie_node *, &subtable->ports_trie)
1645 == NULL);
1646 ovs_assert(cmap_is_empty(&subtable->rules));
de4ad4a2 1647 ovs_assert(rculist_is_empty(&subtable->rules_list));
69d6040e 1648
476f36e8 1649 for (i = 0; i < subtable->n_indices; i++) {
f2c21402 1650 cmap_destroy(&subtable->indices[i]);
476f36e8 1651 }
f2c21402 1652 cmap_destroy(&subtable->rules);
fe7cfa5c 1653 ovsrcu_postpone(free, subtable);
4aacd02d
BP
1654}
1655
13751fd8
JR
1656struct range {
1657 uint8_t start;
1658 uint8_t end;
1659};
1660
c0bfb650
JR
1661static unsigned int be_get_bit_at(const ovs_be32 value[], unsigned int ofs);
1662
13751fd8
JR
1663/* Return 'true' if can skip rest of the subtable based on the prefix trie
1664 * lookup results. */
1665static inline bool
1666check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries,
1667 const unsigned int field_plen[CLS_MAX_TRIES],
1668 const struct range ofs, const struct flow *flow,
1669 struct flow_wildcards *wc)
1670{
1671 int j;
1672
1673 /* Check if we could avoid fully unwildcarding the next level of
1674 * fields using the prefix tries. The trie checks are done only as
1675 * needed to avoid folding in additional bits to the wildcards mask. */
1676 for (j = 0; j < n_tries; j++) {
1677 /* Is the trie field relevant for this subtable? */
1678 if (field_plen[j]) {
1679 struct trie_ctx *ctx = &trie_ctx[j];
1680 uint8_t be32ofs = ctx->be32ofs;
d70e8c28 1681 uint8_t be64ofs = be32ofs / 2;
13751fd8
JR
1682
1683 /* Is the trie field within the current range of fields? */
d70e8c28 1684 if (be64ofs >= ofs.start && be64ofs < ofs.end) {
13751fd8
JR
1685 /* On-demand trie lookup. */
1686 if (!ctx->lookup_done) {
c0bfb650
JR
1687 memset(&ctx->match_plens, 0, sizeof ctx->match_plens);
1688 ctx->maskbits = trie_lookup(ctx->trie, flow,
1689 &ctx->match_plens);
13751fd8
JR
1690 ctx->lookup_done = true;
1691 }
1692 /* Possible to skip the rest of the subtable if subtable's
c0bfb650
JR
1693 * prefix on the field is not included in the lookup result. */
1694 if (!be_get_bit_at(&ctx->match_plens.be32, field_plen[j] - 1)) {
1817dcea
JR
1695 /* We want the trie lookup to never result in unwildcarding
1696 * any bits that would not be unwildcarded otherwise.
1697 * Since the trie is shared by the whole classifier, it is
1698 * possible that the 'maskbits' contain bits that are
1699 * irrelevant for the partition relevant for the current
1700 * packet. Hence the checks below. */
13751fd8 1701
13751fd8 1702 /* Check that the trie result will not unwildcard more bits
1817dcea 1703 * than this subtable would otherwise. */
13751fd8
JR
1704 if (ctx->maskbits <= field_plen[j]) {
1705 /* Unwildcard the bits and skip the rest. */
1706 mask_set_prefix_bits(wc, be32ofs, ctx->maskbits);
1707 /* Note: Prerequisite already unwildcarded, as the only
1708 * prerequisite of the supported trie lookup fields is
1817dcea
JR
1709 * the ethertype, which is always unwildcarded. */
1710 return true;
1711 }
1712 /* Can skip if the field is already unwildcarded. */
1713 if (mask_prefix_bits_set(wc, be32ofs, ctx->maskbits)) {
13751fd8
JR
1714 return true;
1715 }
1716 }
1717 }
1718 }
1719 }
1720 return false;
1721}
1722
3016f3e4
JR
1723/* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit
1724 * for which 'flow', for which 'mask' has a bit set, specifies a particular
1725 * value has the correct value in 'target'.
1726 *
1727 * This function is equivalent to miniflow_equal_flow_in_minimask(flow,
a64759f0
JR
1728 * target, mask) but this is faster because of the invariant that
1729 * flow->map and mask->masks.map are the same, and that this version
1730 * takes the 'wc'. */
3016f3e4
JR
1731static inline bool
1732miniflow_and_mask_matches_flow(const struct miniflow *flow,
1733 const struct minimask *mask,
e9319757 1734 const struct flow *target)
3016f3e4 1735{
d70e8c28
JR
1736 const uint64_t *flowp = miniflow_get_values(flow);
1737 const uint64_t *maskp = miniflow_get_values(&mask->masks);
1cea007c 1738 int idx;
3016f3e4 1739
a64759f0 1740 MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
d70e8c28 1741 uint64_t diff = (*flowp++ ^ flow_u64_value(target, idx)) & *maskp++;
a64759f0
JR
1742
1743 if (diff) {
3016f3e4
JR
1744 return false;
1745 }
1746 }
1747
1748 return true;
1749}
1750
dfea28b3 1751static inline const struct cls_match *
2b7b1427
JR
1752find_match(const struct cls_subtable *subtable, long long version,
1753 const struct flow *flow, uint32_t hash)
b5d97350 1754{
fc02ecc7 1755 const struct cls_match *head, *rule;
b5d97350 1756
fc02ecc7
JR
1757 CMAP_FOR_EACH_WITH_HASH (head, cmap_node, hash, &subtable->rules) {
1758 if (OVS_LIKELY(miniflow_and_mask_matches_flow(&head->flow,
1759 &subtable->mask,
1760 flow))) {
1761 /* Return highest priority rule that is visible. */
8f8023b3 1762 CLS_MATCH_FOR_EACH (rule, head) {
2b7b1427 1763 if (OVS_LIKELY(cls_match_visible_in_version(rule, version))) {
fc02ecc7
JR
1764 return rule;
1765 }
1766 }
064af421
BP
1767 }
1768 }
c23740be 1769
064af421
BP
1770 return NULL;
1771}
1772
e9319757
JR
1773/* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit
1774 * for which 'flow', for which 'mask' has a bit set, specifies a particular
1775 * value has the correct value in 'target'.
1776 *
1777 * This function is equivalent to miniflow_and_mask_matches_flow() but this
1778 * version fills in the mask bits in 'wc'. */
1779static inline bool
1780miniflow_and_mask_matches_flow_wc(const struct miniflow *flow,
1781 const struct minimask *mask,
1782 const struct flow *target,
1783 struct flow_wildcards *wc)
1784{
d70e8c28
JR
1785 const uint64_t *flowp = miniflow_get_values(flow);
1786 const uint64_t *maskp = miniflow_get_values(&mask->masks);
1cea007c 1787 int idx;
e9319757
JR
1788
1789 MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
d70e8c28
JR
1790 uint64_t mask = *maskp++;
1791 uint64_t diff = (*flowp++ ^ flow_u64_value(target, idx)) & mask;
e9319757
JR
1792
1793 if (diff) {
1794 /* Only unwildcard if none of the differing bits is already
1795 * exact-matched. */
d70e8c28 1796 if (!(flow_u64_value(&wc->masks, idx) & diff)) {
66e1d955
JR
1797 /* Keep one bit of the difference. The selected bit may be
1798 * different in big-endian v.s. little-endian systems. */
d70e8c28 1799 *flow_u64_lvalue(&wc->masks, idx) |= rightmost_1bit(diff);
e9319757
JR
1800 }
1801 return false;
1802 }
1803 /* Fill in the bits that were looked at. */
d70e8c28 1804 *flow_u64_lvalue(&wc->masks, idx) |= mask;
e9319757
JR
1805 }
1806
1807 return true;
1808}
1809
386cb9f7
JR
1810/* Unwildcard the fields looked up so far, if any. */
1811static void
1812fill_range_wc(const struct cls_subtable *subtable, struct flow_wildcards *wc,
1813 uint8_t to)
1814{
1815 if (to) {
1816 flow_wildcards_fold_minimask_range(wc, &subtable->mask, 0, to);
1817 }
1818}
1819
dfea28b3 1820static const struct cls_match *
2b7b1427
JR
1821find_match_wc(const struct cls_subtable *subtable, long long version,
1822 const struct flow *flow, struct trie_ctx trie_ctx[CLS_MAX_TRIES],
1823 unsigned int n_tries, struct flow_wildcards *wc)
476f36e8
JR
1824{
1825 uint32_t basis = 0, hash;
dfea28b3 1826 const struct cls_match *rule = NULL;
476f36e8 1827 int i;
13751fd8 1828 struct range ofs;
476f36e8 1829
ec988646 1830 if (OVS_UNLIKELY(!wc)) {
2b7b1427 1831 return find_match(subtable, version, flow,
476f36e8
JR
1832 flow_hash_in_minimask(flow, &subtable->mask, 0));
1833 }
1834
13751fd8 1835 ofs.start = 0;
476f36e8
JR
1836 /* Try to finish early by checking fields in segments. */
1837 for (i = 0; i < subtable->n_indices; i++) {
55847abe 1838 const struct cmap_node *inode;
f2c21402 1839
13751fd8 1840 ofs.end = subtable->index_ofs[i];
476f36e8 1841
13751fd8
JR
1842 if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow,
1843 wc)) {
386cb9f7
JR
1844 /* 'wc' bits for the trie field set, now unwildcard the preceding
1845 * bits used so far. */
1846 fill_range_wc(subtable, wc, ofs.start);
1847 return NULL;
13751fd8
JR
1848 }
1849 hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start,
1850 ofs.end, &basis);
f2c21402 1851 inode = cmap_find(&subtable->indices[i], hash);
476f36e8 1852 if (!inode) {
386cb9f7
JR
1853 /* No match, can stop immediately, but must fold in the bits
1854 * used in lookup so far. */
1855 fill_range_wc(subtable, wc, ofs.end);
1856 return NULL;
476f36e8
JR
1857 }
1858
1859 /* If we have narrowed down to a single rule already, check whether
a64759f0 1860 * that rule matches. Either way, we're done.
476f36e8
JR
1861 *
1862 * (Rare) hash collisions may cause us to miss the opportunity for this
1863 * optimization. */
f2c21402 1864 if (!cmap_node_next(inode)) {
fc02ecc7
JR
1865 const struct cls_match *head;
1866
1867 ASSIGN_CONTAINER(head, inode - i, index_nodes);
1868 if (miniflow_and_mask_matches_flow_wc(&head->flow, &subtable->mask,
e9319757 1869 flow, wc)) {
fc02ecc7 1870 /* Return highest priority rule that is visible. */
8f8023b3 1871 CLS_MATCH_FOR_EACH (rule, head) {
2b7b1427
JR
1872 if (OVS_LIKELY(cls_match_visible_in_version(rule,
1873 version))) {
fc02ecc7
JR
1874 return rule;
1875 }
1876 }
476f36e8 1877 }
e9319757 1878 return NULL;
476f36e8 1879 }
386cb9f7 1880 ofs.start = ofs.end;
476f36e8 1881 }
d70e8c28 1882 ofs.end = FLOW_U64S;
13751fd8
JR
1883 /* Trie check for the final range. */
1884 if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow, wc)) {
386cb9f7
JR
1885 fill_range_wc(subtable, wc, ofs.start);
1886 return NULL;
13751fd8 1887 }
a64759f0
JR
1888 hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start,
1889 ofs.end, &basis);
2b7b1427 1890 rule = find_match(subtable, version, flow, hash);
69d6040e
JR
1891 if (!rule && subtable->ports_mask_len) {
1892 /* Ports are always part of the final range, if any.
1893 * No match was found for the ports. Use the ports trie to figure out
1894 * which ports bits to unwildcard. */
1895 unsigned int mbits;
c0bfb650 1896 ovs_be32 value, plens, mask;
69d6040e
JR
1897
1898 mask = MINIFLOW_GET_BE32(&subtable->mask.masks, tp_src);
1899 value = ((OVS_FORCE ovs_be32 *)flow)[TP_PORTS_OFS32] & mask;
c0bfb650 1900 mbits = trie_lookup_value(&subtable->ports_trie, &value, &plens, 32);
69d6040e
JR
1901
1902 ((OVS_FORCE ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |=
86f35fb5 1903 mask & be32_prefix_mask(mbits);
69d6040e 1904
386cb9f7
JR
1905 /* Unwildcard all bits in the mask upto the ports, as they were used
1906 * to determine there is no match. */
d70e8c28 1907 fill_range_wc(subtable, wc, TP_PORTS_OFS64);
386cb9f7 1908 return NULL;
69d6040e 1909 }
e9319757 1910
13751fd8 1911 /* Must unwildcard all the fields, as they were looked at. */
476f36e8
JR
1912 flow_wildcards_fold_minimask(wc, &subtable->mask);
1913 return rule;
1914}
1915
627fb667 1916static struct cls_match *
dfea28b3 1917find_equal(const struct cls_subtable *subtable, const struct miniflow *flow,
03868246 1918 uint32_t hash)
064af421 1919{
627fb667 1920 struct cls_match *head;
064af421 1921
f2c21402 1922 CMAP_FOR_EACH_WITH_HASH (head, cmap_node, hash, &subtable->rules) {
3016f3e4 1923 if (miniflow_equal(&head->flow, flow)) {
b5d97350 1924 return head;
064af421
BP
1925 }
1926 }
1927 return NULL;
1928}
13751fd8
JR
1929\f
1930/* A longest-prefix match tree. */
13751fd8
JR
1931
1932/* Return at least 'plen' bits of the 'prefix', starting at bit offset 'ofs'.
1933 * Prefixes are in the network byte order, and the offset 0 corresponds to
1934 * the most significant bit of the first byte. The offset can be read as
1935 * "how many bits to skip from the start of the prefix starting at 'pr'". */
1936static uint32_t
1937raw_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen)
1938{
1939 uint32_t prefix;
1940
1941 pr += ofs / 32; /* Where to start. */
1942 ofs %= 32; /* How many bits to skip at 'pr'. */
1943
1944 prefix = ntohl(*pr) << ofs; /* Get the first 32 - ofs bits. */
1945 if (plen > 32 - ofs) { /* Need more than we have already? */
1946 prefix |= ntohl(*++pr) >> (32 - ofs);
1947 }
1948 /* Return with possible unwanted bits at the end. */
1949 return prefix;
1950}
1951
1952/* Return min(TRIE_PREFIX_BITS, plen) bits of the 'prefix', starting at bit
1953 * offset 'ofs'. Prefixes are in the network byte order, and the offset 0
1954 * corresponds to the most significant bit of the first byte. The offset can
1955 * be read as "how many bits to skip from the start of the prefix starting at
1956 * 'pr'". */
1957static uint32_t
1958trie_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen)
1959{
1960 if (!plen) {
1961 return 0;
1962 }
1963 if (plen > TRIE_PREFIX_BITS) {
1964 plen = TRIE_PREFIX_BITS; /* Get at most TRIE_PREFIX_BITS. */
1965 }
1966 /* Return with unwanted bits cleared. */
1967 return raw_get_prefix(pr, ofs, plen) & ~0u << (32 - plen);
1968}
1969
c30cfa6b 1970/* Return the number of equal bits in 'n_bits' of 'prefix's MSBs and a 'value'
13751fd8
JR
1971 * starting at "MSB 0"-based offset 'ofs'. */
1972static unsigned int
c30cfa6b 1973prefix_equal_bits(uint32_t prefix, unsigned int n_bits, const ovs_be32 value[],
13751fd8
JR
1974 unsigned int ofs)
1975{
c30cfa6b 1976 uint64_t diff = prefix ^ raw_get_prefix(value, ofs, n_bits);
13751fd8 1977 /* Set the bit after the relevant bits to limit the result. */
c30cfa6b 1978 return raw_clz64(diff << 32 | UINT64_C(1) << (63 - n_bits));
13751fd8
JR
1979}
1980
1981/* Return the number of equal bits in 'node' prefix and a 'prefix' of length
1982 * 'plen', starting at "MSB 0"-based offset 'ofs'. */
1983static unsigned int
1984trie_prefix_equal_bits(const struct trie_node *node, const ovs_be32 prefix[],
1985 unsigned int ofs, unsigned int plen)
1986{
c30cfa6b 1987 return prefix_equal_bits(node->prefix, MIN(node->n_bits, plen - ofs),
13751fd8
JR
1988 prefix, ofs);
1989}
1990
1991/* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' can
1992 * be greater than 31. */
1993static unsigned int
1994be_get_bit_at(const ovs_be32 value[], unsigned int ofs)
1995{
1996 return (((const uint8_t *)value)[ofs / 8] >> (7 - ofs % 8)) & 1u;
1997}
1998
1999/* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' must
2000 * be between 0 and 31, inclusive. */
2001static unsigned int
2002get_bit_at(const uint32_t prefix, unsigned int ofs)
2003{
2004 return (prefix >> (31 - ofs)) & 1u;
2005}
2006
2007/* Create new branch. */
2008static struct trie_node *
2009trie_branch_create(const ovs_be32 *prefix, unsigned int ofs, unsigned int plen,
2010 unsigned int n_rules)
2011{
2012 struct trie_node *node = xmalloc(sizeof *node);
2013
2014 node->prefix = trie_get_prefix(prefix, ofs, plen);
2015
2016 if (plen <= TRIE_PREFIX_BITS) {
c30cfa6b 2017 node->n_bits = plen;
f358a2cb
JR
2018 ovsrcu_set_hidden(&node->edges[0], NULL);
2019 ovsrcu_set_hidden(&node->edges[1], NULL);
13751fd8
JR
2020 node->n_rules = n_rules;
2021 } else { /* Need intermediate nodes. */
2022 struct trie_node *subnode = trie_branch_create(prefix,
2023 ofs + TRIE_PREFIX_BITS,
2024 plen - TRIE_PREFIX_BITS,
2025 n_rules);
2026 int bit = get_bit_at(subnode->prefix, 0);
c30cfa6b 2027 node->n_bits = TRIE_PREFIX_BITS;
f358a2cb
JR
2028 ovsrcu_set_hidden(&node->edges[bit], subnode);
2029 ovsrcu_set_hidden(&node->edges[!bit], NULL);
13751fd8
JR
2030 node->n_rules = 0;
2031 }
2032 return node;
2033}
2034
2035static void
f358a2cb 2036trie_node_destroy(const struct trie_node *node)
13751fd8 2037{
f358a2cb
JR
2038 ovsrcu_postpone(free, CONST_CAST(struct trie_node *, node));
2039}
2040
2041/* Copy a trie node for modification and postpone delete the old one. */
2042static struct trie_node *
2043trie_node_rcu_realloc(const struct trie_node *node)
2044{
2045 struct trie_node *new_node = xmalloc(sizeof *node);
2046
2047 *new_node = *node;
2048 trie_node_destroy(node);
2049
2050 return new_node;
13751fd8
JR
2051}
2052
2053static void
f358a2cb 2054trie_destroy(rcu_trie_ptr *trie)
13751fd8 2055{
f358a2cb
JR
2056 struct trie_node *node = ovsrcu_get_protected(struct trie_node *, trie);
2057
13751fd8 2058 if (node) {
f358a2cb
JR
2059 ovsrcu_set_hidden(trie, NULL);
2060 trie_destroy(&node->edges[0]);
2061 trie_destroy(&node->edges[1]);
2062 trie_node_destroy(node);
13751fd8
JR
2063 }
2064}
2065
2066static bool
2067trie_is_leaf(const struct trie_node *trie)
2068{
f358a2cb
JR
2069 /* No children? */
2070 return !ovsrcu_get(struct trie_node *, &trie->edges[0])
2071 && !ovsrcu_get(struct trie_node *, &trie->edges[1]);
13751fd8
JR
2072}
2073
2074static void
2075mask_set_prefix_bits(struct flow_wildcards *wc, uint8_t be32ofs,
c30cfa6b 2076 unsigned int n_bits)
13751fd8
JR
2077{
2078 ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs];
2079 unsigned int i;
2080
c30cfa6b 2081 for (i = 0; i < n_bits / 32; i++) {
13751fd8
JR
2082 mask[i] = OVS_BE32_MAX;
2083 }
c30cfa6b
JR
2084 if (n_bits % 32) {
2085 mask[i] |= htonl(~0u << (32 - n_bits % 32));
13751fd8
JR
2086 }
2087}
2088
2089static bool
2090mask_prefix_bits_set(const struct flow_wildcards *wc, uint8_t be32ofs,
c30cfa6b 2091 unsigned int n_bits)
13751fd8
JR
2092{
2093 ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs];
2094 unsigned int i;
2095 ovs_be32 zeroes = 0;
2096
c30cfa6b 2097 for (i = 0; i < n_bits / 32; i++) {
13751fd8
JR
2098 zeroes |= ~mask[i];
2099 }
c30cfa6b
JR
2100 if (n_bits % 32) {
2101 zeroes |= ~mask[i] & htonl(~0u << (32 - n_bits % 32));
13751fd8
JR
2102 }
2103
c30cfa6b 2104 return !zeroes; /* All 'n_bits' bits set. */
13751fd8
JR
2105}
2106
f358a2cb 2107static rcu_trie_ptr *
13751fd8
JR
2108trie_next_edge(struct trie_node *node, const ovs_be32 value[],
2109 unsigned int ofs)
2110{
2111 return node->edges + be_get_bit_at(value, ofs);
2112}
2113
2114static const struct trie_node *
2115trie_next_node(const struct trie_node *node, const ovs_be32 value[],
2116 unsigned int ofs)
2117{
f358a2cb
JR
2118 return ovsrcu_get(struct trie_node *,
2119 &node->edges[be_get_bit_at(value, ofs)]);
13751fd8
JR
2120}
2121
c0bfb650
JR
2122/* Set the bit at ("MSB 0"-based) offset 'ofs'. 'ofs' can be greater than 31.
2123 */
2124static void
2125be_set_bit_at(ovs_be32 value[], unsigned int ofs)
2126{
2127 ((uint8_t *)value)[ofs / 8] |= 1u << (7 - ofs % 8);
2128}
2129
2130/* Returns the number of bits in the prefix mask necessary to determine a
2131 * mismatch, in case there are longer prefixes in the tree below the one that
2132 * matched.
2133 * '*plens' will have a bit set for each prefix length that may have matching
2134 * rules. The caller is responsible for clearing the '*plens' prior to
2135 * calling this.
13751fd8
JR
2136 */
2137static unsigned int
f358a2cb 2138trie_lookup_value(const rcu_trie_ptr *trie, const ovs_be32 value[],
c0bfb650 2139 ovs_be32 plens[], unsigned int n_bits)
13751fd8 2140{
13751fd8 2141 const struct trie_node *prev = NULL;
c0bfb650
JR
2142 const struct trie_node *node = ovsrcu_get(struct trie_node *, trie);
2143 unsigned int match_len = 0; /* Number of matching bits. */
13751fd8 2144
27ce650f 2145 for (; node; prev = node, node = trie_next_node(node, value, match_len)) {
13751fd8
JR
2146 unsigned int eqbits;
2147 /* Check if this edge can be followed. */
27ce650f
JR
2148 eqbits = prefix_equal_bits(node->prefix, node->n_bits, value,
2149 match_len);
2150 match_len += eqbits;
c30cfa6b 2151 if (eqbits < node->n_bits) { /* Mismatch, nothing more to be found. */
27ce650f 2152 /* Bit at offset 'match_len' differed. */
c0bfb650 2153 return match_len + 1; /* Includes the first mismatching bit. */
13751fd8
JR
2154 }
2155 /* Full match, check if rules exist at this prefix length. */
2156 if (node->n_rules > 0) {
c0bfb650 2157 be_set_bit_at(plens, match_len - 1);
13751fd8 2158 }
27ce650f 2159 if (match_len >= n_bits) {
c0bfb650 2160 return n_bits; /* Full prefix. */
f0e5aa11 2161 }
13751fd8 2162 }
c0bfb650
JR
2163 /* node == NULL. Full match so far, but we tried to follow an
2164 * non-existing branch. Need to exclude the other branch if it exists
2165 * (it does not if we were called on an empty trie or 'prev' is a leaf
2166 * node). */
2167 return !prev || trie_is_leaf(prev) ? match_len : match_len + 1;
13751fd8
JR
2168}
2169
2170static unsigned int
2171trie_lookup(const struct cls_trie *trie, const struct flow *flow,
c0bfb650 2172 union mf_value *plens)
13751fd8
JR
2173{
2174 const struct mf_field *mf = trie->field;
2175
2176 /* Check that current flow matches the prerequisites for the trie
2177 * field. Some match fields are used for multiple purposes, so we
2178 * must check that the trie is relevant for this flow. */
2179 if (mf_are_prereqs_ok(mf, flow)) {
f358a2cb 2180 return trie_lookup_value(&trie->root,
13751fd8 2181 &((ovs_be32 *)flow)[mf->flow_be32ofs],
c0bfb650 2182 &plens->be32, mf->n_bits);
13751fd8 2183 }
c0bfb650
JR
2184 memset(plens, 0xff, sizeof *plens); /* All prefixes, no skipping. */
2185 return 0; /* Value not used in this case. */
13751fd8
JR
2186}
2187
2188/* Returns the length of a prefix match mask for the field 'mf' in 'minimask'.
2189 * Returns the u32 offset to the miniflow data in '*miniflow_index', if
2190 * 'miniflow_index' is not NULL. */
2191static unsigned int
2192minimask_get_prefix_len(const struct minimask *minimask,
2193 const struct mf_field *mf)
2194{
c30cfa6b 2195 unsigned int n_bits = 0, mask_tz = 0; /* Non-zero when end of mask seen. */
d70e8c28
JR
2196 uint8_t be32_ofs = mf->flow_be32ofs;
2197 uint8_t be32_end = be32_ofs + mf->n_bytes / 4;
13751fd8 2198
d70e8c28
JR
2199 for (; be32_ofs < be32_end; ++be32_ofs) {
2200 uint32_t mask = ntohl(minimask_get_be32(minimask, be32_ofs));
13751fd8
JR
2201
2202 /* Validate mask, count the mask length. */
2203 if (mask_tz) {
2204 if (mask) {
2205 return 0; /* No bits allowed after mask ended. */
2206 }
2207 } else {
2208 if (~mask & (~mask + 1)) {
2209 return 0; /* Mask not contiguous. */
2210 }
2211 mask_tz = ctz32(mask);
c30cfa6b 2212 n_bits += 32 - mask_tz;
13751fd8
JR
2213 }
2214 }
2215
c30cfa6b 2216 return n_bits;
13751fd8
JR
2217}
2218
2219/*
2220 * This is called only when mask prefix is known to be CIDR and non-zero.
2221 * Relies on the fact that the flow and mask have the same map, and since
2222 * the mask is CIDR, the storage for the flow field exists even if it
2223 * happened to be zeros.
2224 */
2225static const ovs_be32 *
2226minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf)
2227{
d70e8c28
JR
2228 return (OVS_FORCE const ovs_be32 *)
2229 (miniflow_get_values(&match->flow)
2230 + count_1bits(match->flow.map &
2231 ((UINT64_C(1) << mf->flow_be32ofs / 2) - 1)))
2232 + (mf->flow_be32ofs & 1);
13751fd8
JR
2233}
2234
2235/* Insert rule in to the prefix tree.
2236 * 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
2237 * in 'rule'. */
2238static void
2239trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
2240{
69d6040e
JR
2241 trie_insert_prefix(&trie->root,
2242 minimatch_get_prefix(&rule->match, trie->field), mlen);
2243}
2244
2245static void
f358a2cb 2246trie_insert_prefix(rcu_trie_ptr *edge, const ovs_be32 *prefix, int mlen)
69d6040e 2247{
13751fd8 2248 struct trie_node *node;
13751fd8
JR
2249 int ofs = 0;
2250
2251 /* Walk the tree. */
f358a2cb 2252 for (; (node = ovsrcu_get_protected(struct trie_node *, edge));
13751fd8
JR
2253 edge = trie_next_edge(node, prefix, ofs)) {
2254 unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen);
2255 ofs += eqbits;
c30cfa6b 2256 if (eqbits < node->n_bits) {
13751fd8
JR
2257 /* Mismatch, new node needs to be inserted above. */
2258 int old_branch = get_bit_at(node->prefix, eqbits);
f358a2cb 2259 struct trie_node *new_parent;
13751fd8 2260
f358a2cb
JR
2261 new_parent = trie_branch_create(prefix, ofs - eqbits, eqbits,
2262 ofs == mlen ? 1 : 0);
2263 /* Copy the node to modify it. */
2264 node = trie_node_rcu_realloc(node);
2265 /* Adjust the new node for its new position in the tree. */
13751fd8 2266 node->prefix <<= eqbits;
c30cfa6b 2267 node->n_bits -= eqbits;
f358a2cb 2268 ovsrcu_set_hidden(&new_parent->edges[old_branch], node);
13751fd8
JR
2269
2270 /* Check if need a new branch for the new rule. */
2271 if (ofs < mlen) {
f358a2cb
JR
2272 ovsrcu_set_hidden(&new_parent->edges[!old_branch],
2273 trie_branch_create(prefix, ofs, mlen - ofs,
2274 1));
13751fd8 2275 }
f358a2cb 2276 ovsrcu_set(edge, new_parent); /* Publish changes. */
13751fd8
JR
2277 return;
2278 }
2279 /* Full match so far. */
2280
2281 if (ofs == mlen) {
2282 /* Full match at the current node, rule needs to be added here. */
2283 node->n_rules++;
2284 return;
2285 }
2286 }
2287 /* Must insert a new tree branch for the new rule. */
f358a2cb 2288 ovsrcu_set(edge, trie_branch_create(prefix, ofs, mlen - ofs, 1));
13751fd8
JR
2289}
2290
2291/* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
2292 * in 'rule'. */
2293static void
2294trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
2295{
69d6040e
JR
2296 trie_remove_prefix(&trie->root,
2297 minimatch_get_prefix(&rule->match, trie->field), mlen);
2298}
2299
2300/* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
2301 * in 'rule'. */
2302static void
f358a2cb 2303trie_remove_prefix(rcu_trie_ptr *root, const ovs_be32 *prefix, int mlen)
69d6040e 2304{
13751fd8 2305 struct trie_node *node;
f358a2cb 2306 rcu_trie_ptr *edges[sizeof(union mf_value) * 8];
13751fd8
JR
2307 int depth = 0, ofs = 0;
2308
2309 /* Walk the tree. */
69d6040e 2310 for (edges[0] = root;
f358a2cb 2311 (node = ovsrcu_get_protected(struct trie_node *, edges[depth]));
13751fd8
JR
2312 edges[++depth] = trie_next_edge(node, prefix, ofs)) {
2313 unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen);
69d6040e 2314
c30cfa6b 2315 if (eqbits < node->n_bits) {
13751fd8
JR
2316 /* Mismatch, nothing to be removed. This should never happen, as
2317 * only rules in the classifier are ever removed. */
2318 break; /* Log a warning. */
2319 }
2320 /* Full match so far. */
2321 ofs += eqbits;
2322
2323 if (ofs == mlen) {
2324 /* Full prefix match at the current node, remove rule here. */
2325 if (!node->n_rules) {
2326 break; /* Log a warning. */
2327 }
2328 node->n_rules--;
2329
2330 /* Check if can prune the tree. */
f358a2cb
JR
2331 while (!node->n_rules) {
2332 struct trie_node *next,
2333 *edge0 = ovsrcu_get_protected(struct trie_node *,
2334 &node->edges[0]),
2335 *edge1 = ovsrcu_get_protected(struct trie_node *,
2336 &node->edges[1]);
2337
2338 if (edge0 && edge1) {
2339 break; /* A branching point, cannot prune. */
2340 }
2341
2342 /* Else have at most one child node, remove this node. */
2343 next = edge0 ? edge0 : edge1;
13751fd8
JR
2344
2345 if (next) {
c30cfa6b 2346 if (node->n_bits + next->n_bits > TRIE_PREFIX_BITS) {
13751fd8
JR
2347 break; /* Cannot combine. */
2348 }
f358a2cb
JR
2349 next = trie_node_rcu_realloc(next); /* Modify. */
2350
13751fd8 2351 /* Combine node with next. */
c30cfa6b
JR
2352 next->prefix = node->prefix | next->prefix >> node->n_bits;
2353 next->n_bits += node->n_bits;
13751fd8 2354 }
13751fd8 2355 /* Update the parent's edge. */
f358a2cb
JR
2356 ovsrcu_set(edges[depth], next); /* Publish changes. */
2357 trie_node_destroy(node);
2358
13751fd8
JR
2359 if (next || !depth) {
2360 /* Branch not pruned or at root, nothing more to do. */
2361 break;
2362 }
f358a2cb
JR
2363 node = ovsrcu_get_protected(struct trie_node *,
2364 edges[--depth]);
13751fd8
JR
2365 }
2366 return;
2367 }
2368 }
2369 /* Cannot go deeper. This should never happen, since only rules
2370 * that actually exist in the classifier are ever removed. */
2371 VLOG_WARN("Trying to remove non-existing rule from a prefix trie.");
2372}
8f8023b3
JR
2373\f
2374
2375#define CLS_MATCH_POISON (struct cls_match *)(UINTPTR_MAX / 0xf * 0xb)
2376
2377void
2378cls_match_free_cb(struct cls_match *rule)
2379{
2380 ovsrcu_set_hidden(&rule->next, CLS_MATCH_POISON);
2381 free(rule);
2382}