]> git.proxmox.com Git - ovs.git/blame - lib/classifier.c
classifier: Do not insert duplicate rules in indices.
[ovs.git] / lib / classifier.c
CommitLineData
064af421 1/*
78c8df12 2 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
064af421 3 *
a14bc59f
BP
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
064af421 7 *
a14bc59f
BP
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
064af421
BP
15 */
16
17#include <config.h>
18#include "classifier.h"
38c449e0 19#include "classifier-private.h"
064af421
BP
20#include <errno.h>
21#include <netinet/in.h>
844dff32 22#include "byte-order.h"
68d1c8c3 23#include "dynamic-string.h"
07b37e8f 24#include "odp-util.h"
d8ae4d67 25#include "ofp-util.h"
13751fd8 26#include "packets.h"
52054c15 27#include "util.h"
13751fd8
JR
28#include "vlog.h"
29
30VLOG_DEFINE_THIS_MODULE(classifier);
064af421 31
69d6040e
JR
32struct trie_ctx;
33
34/* Ports trie depends on both ports sharing the same ovs_be32. */
35#define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4)
36BUILD_ASSERT_DECL(TP_PORTS_OFS32 == offsetof(struct flow, tp_dst) / 4);
cabd4c43 37
627fb667
JR
38static struct cls_match *
39cls_match_alloc(struct cls_rule *rule)
40{
3016f3e4
JR
41 int count = count_1bits(rule->match.flow.map);
42
43 struct cls_match *cls_match
44 = xmalloc(sizeof *cls_match - sizeof cls_match->flow.inline_values
45 + MINIFLOW_VALUES_SIZE(count));
627fb667 46
f47eef15 47 rculist_init(&cls_match->list);
f80028fe
JR
48 *CONST_CAST(const struct cls_rule **, &cls_match->cls_rule) = rule;
49 *CONST_CAST(int *, &cls_match->priority) = rule->priority;
50 miniflow_clone_inline(CONST_CAST(struct miniflow *, &cls_match->flow),
51 &rule->match.flow, count);
627fb667
JR
52
53 return cls_match;
54}
cabd4c43 55
e48eccd1 56static struct cls_subtable *find_subtable(const struct classifier *cls,
dfea28b3 57 const struct minimask *);
e48eccd1 58static struct cls_subtable *insert_subtable(struct classifier *cls,
e65413ab
JR
59 const struct minimask *)
60 OVS_REQUIRES(cls->mutex);
e48eccd1 61static void destroy_subtable(struct classifier *cls, struct cls_subtable *)
e65413ab 62 OVS_REQUIRES(cls->mutex);
b5d97350 63
dfea28b3
JR
64static const struct cls_match *find_match_wc(const struct cls_subtable *,
65 const struct flow *,
66 struct trie_ctx *,
67 unsigned int n_tries,
68 struct flow_wildcards *);
69static struct cls_match *find_equal(const struct cls_subtable *,
627fb667 70 const struct miniflow *, uint32_t hash);
b5d97350 71
dfea28b3
JR
72static inline const struct cls_match *
73next_rule_in_list__(const struct cls_match *rule)
74{
75 const struct cls_match *next = NULL;
76 next = OBJECT_CONTAINING(rculist_next(&rule->list), next, list);
77 return next;
78}
79
80static inline const struct cls_match *
81next_rule_in_list(const struct cls_match *rule)
82{
83 const struct cls_match *next = next_rule_in_list__(rule);
84 return next->priority < rule->priority ? next : NULL;
85}
86
c501b427 87static inline struct cls_match *
dfea28b3 88next_rule_in_list_protected__(struct cls_match *rule)
c501b427
JR
89{
90 struct cls_match *next = NULL;
dfea28b3 91 next = OBJECT_CONTAINING(rculist_next_protected(&rule->list), next, list);
c501b427
JR
92 return next;
93}
94
95static inline struct cls_match *
dfea28b3 96next_rule_in_list_protected(struct cls_match *rule)
c501b427 97{
dfea28b3 98 struct cls_match *next = next_rule_in_list_protected__(rule);
c501b427
JR
99 return next->priority < rule->priority ? next : NULL;
100}
101
e65413ab
JR
102/* Iterates RULE over HEAD and all of the cls_rules on HEAD->list.
103 * Classifier's mutex must be held while iterating, as the list is
104 * protoceted by it. */
b5d97350
BP
105#define FOR_EACH_RULE_IN_LIST(RULE, HEAD) \
106 for ((RULE) = (HEAD); (RULE) != NULL; (RULE) = next_rule_in_list(RULE))
dfea28b3
JR
107#define FOR_EACH_RULE_IN_LIST_PROTECTED(RULE, HEAD) \
108 for ((RULE) = (HEAD); (RULE) != NULL; \
109 (RULE) = next_rule_in_list_protected(RULE))
13751fd8
JR
110
111static unsigned int minimask_get_prefix_len(const struct minimask *,
112 const struct mf_field *);
e48eccd1 113static void trie_init(struct classifier *cls, int trie_idx,
e65413ab
JR
114 const struct mf_field *)
115 OVS_REQUIRES(cls->mutex);
13751fd8 116static unsigned int trie_lookup(const struct cls_trie *, const struct flow *,
c0bfb650 117 union mf_value *plens);
f358a2cb 118static unsigned int trie_lookup_value(const rcu_trie_ptr *,
c0bfb650
JR
119 const ovs_be32 value[], ovs_be32 plens[],
120 unsigned int value_bits);
f358a2cb 121static void trie_destroy(rcu_trie_ptr *);
13751fd8 122static void trie_insert(struct cls_trie *, const struct cls_rule *, int mlen);
f358a2cb 123static void trie_insert_prefix(rcu_trie_ptr *, const ovs_be32 *prefix,
69d6040e 124 int mlen);
13751fd8 125static void trie_remove(struct cls_trie *, const struct cls_rule *, int mlen);
f358a2cb 126static void trie_remove_prefix(rcu_trie_ptr *, const ovs_be32 *prefix,
69d6040e 127 int mlen);
13751fd8 128static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t be32ofs,
c30cfa6b 129 unsigned int n_bits);
13751fd8 130static bool mask_prefix_bits_set(const struct flow_wildcards *,
c30cfa6b 131 uint8_t be32ofs, unsigned int n_bits);
81a76618
BP
132\f
133/* cls_rule. */
b5d97350 134
81a76618 135/* Initializes 'rule' to match packets specified by 'match' at the given
5cb7a798
BP
136 * 'priority'. 'match' must satisfy the invariant described in the comment at
137 * the definition of struct match.
66642cb4 138 *
48d28ac1
BP
139 * The caller must eventually destroy 'rule' with cls_rule_destroy().
140 *
eb391b76
BP
141 * Clients should not use priority INT_MIN. (OpenFlow uses priorities between
142 * 0 and UINT16_MAX, inclusive.) */
47284b1f 143void
eb391b76 144cls_rule_init(struct cls_rule *rule, const struct match *match, int priority)
47284b1f 145{
5cb7a798
BP
146 minimatch_init(&rule->match, match);
147 rule->priority = priority;
627fb667 148 rule->cls_match = NULL;
5cb7a798
BP
149}
150
151/* Same as cls_rule_init() for initialization from a "struct minimatch". */
152void
153cls_rule_init_from_minimatch(struct cls_rule *rule,
eb391b76 154 const struct minimatch *match, int priority)
5cb7a798
BP
155{
156 minimatch_clone(&rule->match, match);
81a76618 157 rule->priority = priority;
627fb667 158 rule->cls_match = NULL;
685a51a5
JP
159}
160
48d28ac1
BP
161/* Initializes 'dst' as a copy of 'src'.
162 *
b2c1f00b 163 * The caller must eventually destroy 'dst' with cls_rule_destroy(). */
48d28ac1
BP
164void
165cls_rule_clone(struct cls_rule *dst, const struct cls_rule *src)
166{
5cb7a798
BP
167 minimatch_clone(&dst->match, &src->match);
168 dst->priority = src->priority;
627fb667 169 dst->cls_match = NULL;
48d28ac1
BP
170}
171
b2c1f00b
BP
172/* Initializes 'dst' with the data in 'src', destroying 'src'.
173 *
174 * The caller must eventually destroy 'dst' with cls_rule_destroy(). */
175void
176cls_rule_move(struct cls_rule *dst, struct cls_rule *src)
177{
178 minimatch_move(&dst->match, &src->match);
179 dst->priority = src->priority;
627fb667 180 dst->cls_match = NULL;
b2c1f00b
BP
181}
182
48d28ac1
BP
183/* Frees memory referenced by 'rule'. Doesn't free 'rule' itself (it's
184 * normally embedded into a larger structure).
185 *
186 * ('rule' must not currently be in a classifier.) */
187void
5cb7a798 188cls_rule_destroy(struct cls_rule *rule)
48d28ac1 189{
627fb667 190 ovs_assert(!rule->cls_match);
5cb7a798 191 minimatch_destroy(&rule->match);
48d28ac1
BP
192}
193
81a76618
BP
194/* Returns true if 'a' and 'b' match the same packets at the same priority,
195 * false if they differ in some way. */
193eb874
BP
196bool
197cls_rule_equal(const struct cls_rule *a, const struct cls_rule *b)
198{
5cb7a798 199 return a->priority == b->priority && minimatch_equal(&a->match, &b->match);
193eb874
BP
200}
201
81a76618 202/* Returns a hash value for 'rule', folding in 'basis'. */
57452fdc
BP
203uint32_t
204cls_rule_hash(const struct cls_rule *rule, uint32_t basis)
205{
5cb7a798 206 return minimatch_hash(&rule->match, hash_int(rule->priority, basis));
73f33563
BP
207}
208
81a76618 209/* Appends a string describing 'rule' to 's'. */
07b37e8f
BP
210void
211cls_rule_format(const struct cls_rule *rule, struct ds *s)
212{
5cb7a798 213 minimatch_format(&rule->match, s, rule->priority);
064af421 214}
3ca1de08
BP
215
216/* Returns true if 'rule' matches every packet, false otherwise. */
217bool
218cls_rule_is_catchall(const struct cls_rule *rule)
219{
5cb7a798 220 return minimask_is_catchall(&rule->match.mask);
3ca1de08 221}
064af421
BP
222\f
223/* Initializes 'cls' as a classifier that initially contains no classification
224 * rules. */
225void
e48eccd1
JR
226classifier_init(struct classifier *cls, const uint8_t *flow_segments)
227 OVS_EXCLUDED(cls->mutex)
064af421 228{
e65413ab 229 ovs_mutex_init(&cls->mutex);
e65413ab 230 ovs_mutex_lock(&cls->mutex);
064af421 231 cls->n_rules = 0;
f2c21402 232 cmap_init(&cls->subtables_map);
fe7cfa5c 233 pvector_init(&cls->subtables);
f2c21402 234 cmap_init(&cls->partitions);
476f36e8
JR
235 cls->n_flow_segments = 0;
236 if (flow_segments) {
237 while (cls->n_flow_segments < CLS_MAX_INDICES
238 && *flow_segments < FLOW_U32S) {
239 cls->flow_segments[cls->n_flow_segments++] = *flow_segments++;
240 }
241 }
13751fd8 242 cls->n_tries = 0;
e65413ab
JR
243 for (int i = 0; i < CLS_MAX_TRIES; i++) {
244 trie_init(cls, i, NULL);
245 }
246 ovs_mutex_unlock(&cls->mutex);
064af421
BP
247}
248
249/* Destroys 'cls'. Rules within 'cls', if any, are not freed; this is the
afae68b1
JR
250 * caller's responsibility.
251 * May only be called after all the readers have been terminated. */
064af421 252void
e48eccd1
JR
253classifier_destroy(struct classifier *cls)
254 OVS_EXCLUDED(cls->mutex)
064af421 255{
e48eccd1 256 if (cls) {
78c8df12
BP
257 struct cls_partition *partition;
258 struct cls_subtable *subtable;
13751fd8
JR
259 int i;
260
e65413ab 261 ovs_mutex_lock(&cls->mutex);
13751fd8 262 for (i = 0; i < cls->n_tries; i++) {
f358a2cb 263 trie_destroy(&cls->tries[i].root);
13751fd8 264 }
064af421 265
6bc3bb82 266 CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
03868246 267 destroy_subtable(cls, subtable);
064af421 268 }
f2c21402 269 cmap_destroy(&cls->subtables_map);
c906cedf 270
6bc3bb82 271 CMAP_FOR_EACH (partition, cmap_node, &cls->partitions) {
f2c21402 272 ovsrcu_postpone(free, partition);
c906cedf 273 }
f2c21402 274 cmap_destroy(&cls->partitions);
cabd4c43 275
fe7cfa5c 276 pvector_destroy(&cls->subtables);
e65413ab
JR
277 ovs_mutex_unlock(&cls->mutex);
278 ovs_mutex_destroy(&cls->mutex);
064af421
BP
279 }
280}
281
13751fd8 282/* Set the fields for which prefix lookup should be performed. */
f358a2cb 283bool
e48eccd1 284classifier_set_prefix_fields(struct classifier *cls,
13751fd8
JR
285 const enum mf_field_id *trie_fields,
286 unsigned int n_fields)
e48eccd1 287 OVS_EXCLUDED(cls->mutex)
13751fd8 288{
f358a2cb 289 const struct mf_field * new_fields[CLS_MAX_TRIES];
abadfcb0 290 struct mf_bitmap fields = MF_BITMAP_INITIALIZER;
f358a2cb
JR
291 int i, n_tries = 0;
292 bool changed = false;
13751fd8 293
e65413ab 294 ovs_mutex_lock(&cls->mutex);
f358a2cb 295 for (i = 0; i < n_fields && n_tries < CLS_MAX_TRIES; i++) {
13751fd8
JR
296 const struct mf_field *field = mf_from_id(trie_fields[i]);
297 if (field->flow_be32ofs < 0 || field->n_bits % 32) {
298 /* Incompatible field. This is the only place where we
299 * enforce these requirements, but the rest of the trie code
300 * depends on the flow_be32ofs to be non-negative and the
301 * field length to be a multiple of 32 bits. */
302 continue;
303 }
304
abadfcb0 305 if (bitmap_is_set(fields.bm, trie_fields[i])) {
13751fd8
JR
306 /* Duplicate field, there is no need to build more than
307 * one index for any one field. */
308 continue;
309 }
abadfcb0 310 bitmap_set1(fields.bm, trie_fields[i]);
13751fd8 311
f358a2cb
JR
312 new_fields[n_tries] = NULL;
313 if (n_tries >= cls->n_tries || field != cls->tries[n_tries].field) {
314 new_fields[n_tries] = field;
315 changed = true;
316 }
317 n_tries++;
318 }
319
320 if (changed || n_tries < cls->n_tries) {
321 struct cls_subtable *subtable;
322
323 /* Trie configuration needs to change. Disable trie lookups
324 * for the tries that are changing and wait all the current readers
325 * with the old configuration to be done. */
326 changed = false;
327 CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
328 for (i = 0; i < cls->n_tries; i++) {
329 if ((i < n_tries && new_fields[i]) || i >= n_tries) {
330 if (subtable->trie_plen[i]) {
331 subtable->trie_plen[i] = 0;
332 changed = true;
333 }
334 }
335 }
336 }
337 /* Synchronize if any readers were using tries. The readers may
338 * temporarily function without the trie lookup based optimizations. */
339 if (changed) {
340 /* ovsrcu_synchronize() functions as a memory barrier, so it does
341 * not matter that subtable->trie_plen is not atomic. */
342 ovsrcu_synchronize();
13751fd8 343 }
13751fd8 344
f358a2cb
JR
345 /* Now set up the tries. */
346 for (i = 0; i < n_tries; i++) {
347 if (new_fields[i]) {
348 trie_init(cls, i, new_fields[i]);
349 }
350 }
351 /* Destroy the rest, if any. */
352 for (; i < cls->n_tries; i++) {
353 trie_init(cls, i, NULL);
354 }
355
356 cls->n_tries = n_tries;
357 ovs_mutex_unlock(&cls->mutex);
358 return true;
13751fd8 359 }
f358a2cb 360
e65413ab 361 ovs_mutex_unlock(&cls->mutex);
f358a2cb 362 return false; /* No change. */
13751fd8
JR
363}
364
365static void
e48eccd1 366trie_init(struct classifier *cls, int trie_idx, const struct mf_field *field)
e65413ab 367 OVS_REQUIRES(cls->mutex)
13751fd8
JR
368{
369 struct cls_trie *trie = &cls->tries[trie_idx];
370 struct cls_subtable *subtable;
371
372 if (trie_idx < cls->n_tries) {
f358a2cb
JR
373 trie_destroy(&trie->root);
374 } else {
375 ovsrcu_set_hidden(&trie->root, NULL);
13751fd8 376 }
13751fd8
JR
377 trie->field = field;
378
f358a2cb 379 /* Add existing rules to the new trie. */
f2c21402 380 CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
13751fd8
JR
381 unsigned int plen;
382
383 plen = field ? minimask_get_prefix_len(&subtable->mask, field) : 0;
13751fd8 384 if (plen) {
627fb667 385 struct cls_match *head;
13751fd8 386
f2c21402 387 CMAP_FOR_EACH (head, cmap_node, &subtable->rules) {
f47eef15 388 trie_insert(trie, head->cls_rule, plen);
13751fd8
JR
389 }
390 }
f358a2cb
JR
391 /* Initialize subtable's prefix length on this field. This will
392 * allow readers to use the trie. */
393 atomic_thread_fence(memory_order_release);
394 subtable->trie_plen[trie_idx] = plen;
13751fd8
JR
395 }
396}
397
5f0476ce
JR
398/* Returns true if 'cls' contains no classification rules, false otherwise.
399 * Checking the cmap requires no locking. */
064af421
BP
400bool
401classifier_is_empty(const struct classifier *cls)
402{
e48eccd1 403 return cmap_is_empty(&cls->subtables_map);
064af421
BP
404}
405
dbda2960 406/* Returns the number of rules in 'cls'. */
064af421
BP
407int
408classifier_count(const struct classifier *cls)
afae68b1 409 OVS_NO_THREAD_SAFETY_ANALYSIS
064af421 410{
afae68b1
JR
411 /* n_rules is an int, so in the presence of concurrent writers this will
412 * return either the old or a new value. */
e48eccd1 413 return cls->n_rules;
064af421
BP
414}
415
c906cedf
BP
416static uint32_t
417hash_metadata(ovs_be64 metadata_)
418{
419 uint64_t metadata = (OVS_FORCE uint64_t) metadata_;
965607c8 420 return hash_uint64(metadata);
c906cedf
BP
421}
422
423static struct cls_partition *
e48eccd1 424find_partition(const struct classifier *cls, ovs_be64 metadata, uint32_t hash)
c906cedf
BP
425{
426 struct cls_partition *partition;
427
f2c21402 428 CMAP_FOR_EACH_WITH_HASH (partition, cmap_node, hash, &cls->partitions) {
c906cedf
BP
429 if (partition->metadata == metadata) {
430 return partition;
431 }
432 }
433
434 return NULL;
435}
436
437static struct cls_partition *
e48eccd1 438create_partition(struct classifier *cls, struct cls_subtable *subtable,
c906cedf 439 ovs_be64 metadata)
e65413ab 440 OVS_REQUIRES(cls->mutex)
c906cedf
BP
441{
442 uint32_t hash = hash_metadata(metadata);
443 struct cls_partition *partition = find_partition(cls, metadata, hash);
444 if (!partition) {
445 partition = xmalloc(sizeof *partition);
446 partition->metadata = metadata;
447 partition->tags = 0;
183126a1 448 tag_tracker_init(&partition->tracker);
f2c21402 449 cmap_insert(&cls->partitions, &partition->cmap_node, hash);
c906cedf 450 }
03868246 451 tag_tracker_add(&partition->tracker, &partition->tags, subtable->tag);
c906cedf
BP
452 return partition;
453}
454
69d6040e
JR
455static inline ovs_be32 minimatch_get_ports(const struct minimatch *match)
456{
457 /* Could optimize to use the same map if needed for fast path. */
458 return MINIFLOW_GET_BE32(&match->flow, tp_src)
459 & MINIFLOW_GET_BE32(&match->mask.masks, tp_src);
460}
461
f47eef15
JR
462static void
463subtable_replace_head_rule(struct classifier *cls OVS_UNUSED,
464 struct cls_subtable *subtable,
465 struct cls_match *head, struct cls_match *new,
466 uint32_t hash, uint32_t ihash[CLS_MAX_INDICES])
467 OVS_REQUIRES(cls->mutex)
468{
469 /* Rule's data is already in the tries. */
470
471 new->partition = head->partition; /* Steal partition, if any. */
472 head->partition = NULL;
473
474 for (int i = 0; i < subtable->n_indices; i++) {
475 cmap_replace(&subtable->indices[i], &head->index_nodes[i],
476 &new->index_nodes[i], ihash[i]);
477 }
478 cmap_replace(&subtable->rules, &head->cmap_node, &new->cmap_node, hash);
479}
480
b5d97350
BP
481/* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller
482 * must not modify or free it.
064af421
BP
483 *
484 * If 'cls' already contains an identical rule (including wildcards, values of
485 * fixed fields, and priority), replaces the old rule by 'rule' and returns the
486 * rule that was replaced. The caller takes ownership of the returned rule and
f47eef15
JR
487 * is thus responsible for destroying it with cls_rule_destroy(), after RCU
488 * grace period has passed (see ovsrcu_postpone()).
064af421
BP
489 *
490 * Returns NULL if 'cls' does not contain a rule with an identical key, after
491 * inserting the new rule. In this case, no rules are displaced by the new
492 * rule, even rules that cannot have any effect because the new rule matches a
886af6ea
JR
493 * superset of their flows and has higher priority.
494 */
dfea28b3 495const struct cls_rule *
e48eccd1
JR
496classifier_replace(struct classifier *cls, struct cls_rule *rule)
497 OVS_EXCLUDED(cls->mutex)
064af421 498{
886af6ea 499 struct cls_match *new = cls_match_alloc(rule);
03868246 500 struct cls_subtable *subtable;
886af6ea
JR
501 uint32_t ihash[CLS_MAX_INDICES];
502 uint8_t prev_be32ofs = 0;
503 struct cls_match *head;
f47eef15 504 size_t n_rules = 0;
886af6ea
JR
505 uint32_t basis;
506 uint32_t hash;
507 int i;
b5d97350 508
e65413ab 509 ovs_mutex_lock(&cls->mutex);
f47eef15
JR
510 rule->cls_match = new;
511
03868246
JR
512 subtable = find_subtable(cls, &rule->match.mask);
513 if (!subtable) {
514 subtable = insert_subtable(cls, &rule->match.mask);
b5d97350
BP
515 }
516
f47eef15 517 /* Compute hashes in segments. */
886af6ea
JR
518 basis = 0;
519 for (i = 0; i < subtable->n_indices; i++) {
520 ihash[i] = minimatch_hash_range(&rule->match, prev_be32ofs,
521 subtable->index_ofs[i], &basis);
886af6ea
JR
522 prev_be32ofs = subtable->index_ofs[i];
523 }
524 hash = minimatch_hash_range(&rule->match, prev_be32ofs, FLOW_U32S, &basis);
f47eef15 525
886af6ea
JR
526 head = find_equal(subtable, &rule->match.flow, hash);
527 if (!head) {
886af6ea
JR
528 /* Add rule to tries.
529 *
530 * Concurrent readers might miss seeing the rule until this update,
531 * which might require being fixed up by revalidation later. */
f47eef15 532 for (i = 0; i < cls->n_tries; i++) {
13751fd8
JR
533 if (subtable->trie_plen[i]) {
534 trie_insert(&cls->tries[i], rule, subtable->trie_plen[i]);
535 }
536 }
69d6040e 537
886af6ea 538 /* Add rule to ports trie. */
69d6040e
JR
539 if (subtable->ports_mask_len) {
540 /* We mask the value to be inserted to always have the wildcarded
541 * bits in known (zero) state, so we can include them in comparison
542 * and they will always match (== their original value does not
543 * matter). */
544 ovs_be32 masked_ports = minimatch_get_ports(&rule->match);
545
546 trie_insert_prefix(&subtable->ports_trie, &masked_ports,
547 subtable->ports_mask_len);
548 }
886af6ea 549
f47eef15 550 /* Add rule to partitions.
886af6ea 551 *
f47eef15
JR
552 * Concurrent readers might miss seeing the rule until this update,
553 * which might require being fixed up by revalidation later. */
554 new->partition = NULL;
555 if (minimask_get_metadata_mask(&rule->match.mask) == OVS_BE64_MAX) {
556 ovs_be64 metadata = miniflow_get_metadata(&rule->match.flow);
557
558 new->partition = create_partition(cls, subtable, metadata);
559 }
560
561 /* Make rule visible to lookups. */
562
563 /* Add new node to segment indices.
564 *
565 * Readers may find the rule in the indices before the rule is visible
566 * in the subtables 'rules' map. This may result in us losing the
567 * opportunity to quit lookups earlier, resulting in sub-optimal
568 * wildcarding. This will be fixed later by revalidation (always
569 * scheduled after flow table changes). */
886af6ea 570 for (i = 0; i < subtable->n_indices; i++) {
f47eef15
JR
571 cmap_insert(&subtable->indices[i], &new->index_nodes[i], ihash[i]);
572 }
573 n_rules = cmap_insert(&subtable->rules, &new->cmap_node, hash);
574 } else { /* Equal rules exist in the classifier already. */
575 struct cls_match *iter;
576
577 /* Scan the list for the insertion point that will keep the list in
578 * order of decreasing priority. */
579 FOR_EACH_RULE_IN_LIST_PROTECTED (iter, head) {
580 if (rule->priority >= iter->priority) {
581 break;
582 }
886af6ea
JR
583 }
584
f47eef15
JR
585 /* 'iter' now at the insertion point or NULL it at end. */
586 if (iter) {
587 struct cls_rule *old;
588
589 if (rule->priority == iter->priority) {
590 rculist_replace(&new->list, &iter->list);
591 old = CONST_CAST(struct cls_rule *, iter->cls_rule);
592 } else {
593 rculist_insert(&iter->list, &new->list);
594 old = NULL;
595 }
596
597 /* Replace the existing head in data structures, if rule is the new
598 * head. */
599 if (iter == head) {
600 subtable_replace_head_rule(cls, subtable, head, new, hash,
601 ihash);
602 }
603
604 if (old) {
605 ovsrcu_postpone(free, iter);
606 old->cls_match = NULL;
f2c21402 607
f47eef15
JR
608 /* No change in subtable's max priority or max count. */
609
610 /* Return displaced rule. Caller is responsible for keeping it
611 * around until all threads quiesce. */
612 ovs_mutex_unlock(&cls->mutex);
613 return old;
614 }
615 } else {
616 rculist_push_back(&head->list, &new->list);
617 }
064af421 618 }
886af6ea 619
f47eef15
JR
620 /* Rule was added, not replaced. Update 'subtable's 'max_priority' and
621 * 'max_count', if necessary.
622 *
623 * The rule was already inserted, but concurrent readers may not see the
624 * rule yet as the subtables vector is not updated yet. This will have to
625 * be fixed by revalidation later. */
626 if (n_rules == 1) {
627 subtable->max_priority = rule->priority;
628 subtable->max_count = 1;
629 pvector_insert(&cls->subtables, subtable, rule->priority);
630 } else if (rule->priority == subtable->max_priority) {
631 ++subtable->max_count;
632 } else if (rule->priority > subtable->max_priority) {
633 subtable->max_priority = rule->priority;
634 subtable->max_count = 1;
635 pvector_change_priority(&cls->subtables, subtable, rule->priority);
636 }
637
638 /* Nothing was replaced. */
639 cls->n_rules++;
e65413ab 640 ovs_mutex_unlock(&cls->mutex);
f47eef15 641 return NULL;
064af421
BP
642}
643
08944c1d
BP
644/* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller
645 * must not modify or free it.
646 *
647 * 'cls' must not contain an identical rule (including wildcards, values of
648 * fixed fields, and priority). Use classifier_find_rule_exactly() to find
649 * such a rule. */
650void
651classifier_insert(struct classifier *cls, struct cls_rule *rule)
652{
dfea28b3 653 const struct cls_rule *displaced_rule = classifier_replace(cls, rule);
cb22974d 654 ovs_assert(!displaced_rule);
08944c1d
BP
655}
656
48d28ac1
BP
657/* Removes 'rule' from 'cls'. It is the caller's responsibility to destroy
658 * 'rule' with cls_rule_destroy(), freeing the memory block in which 'rule'
747f140a
JR
659 * resides, etc., as necessary.
660 *
661 * Does nothing if 'rule' has been already removed, or was never inserted.
662 *
663 * Returns the removed rule, or NULL, if it was already removed.
664 */
dfea28b3
JR
665const struct cls_rule *
666classifier_remove(struct classifier *cls, const struct cls_rule *rule)
e48eccd1 667 OVS_EXCLUDED(cls->mutex)
064af421 668{
c906cedf 669 struct cls_partition *partition;
747f140a 670 struct cls_match *cls_match;
03868246 671 struct cls_subtable *subtable;
f47eef15
JR
672 struct cls_match *prev;
673 struct cls_match *next;
476f36e8 674 int i;
f2c21402
JR
675 uint32_t basis = 0, hash, ihash[CLS_MAX_INDICES];
676 uint8_t prev_be32ofs = 0;
f47eef15 677 size_t n_rules;
064af421 678
e65413ab 679 ovs_mutex_lock(&cls->mutex);
747f140a
JR
680 cls_match = rule->cls_match;
681 if (!cls_match) {
682 rule = NULL;
683 goto unlock; /* Already removed. */
684 }
f47eef15
JR
685 /* Mark as removed. */
686 CONST_CAST(struct cls_rule *, rule)->cls_match = NULL;
687
688 INIT_CONTAINER(prev, rculist_back_protected(&cls_match->list), list);
689 INIT_CONTAINER(next, rculist_next(&cls_match->list), list);
690
691 /* Remove from the list of equal rules. */
692 rculist_remove(&cls_match->list);
693
694 /* Check if this is NOT a head rule. */
695 if (prev->priority > rule->priority) {
696 /* Not the highest priority rule, no need to check subtable's
697 * 'max_priority'. */
698 goto free;
699 }
747f140a 700
03868246 701 subtable = find_subtable(cls, &rule->match.mask);
627fb667
JR
702 ovs_assert(subtable);
703
f47eef15
JR
704 for (i = 0; i < subtable->n_indices; i++) {
705 ihash[i] = minimatch_hash_range(&rule->match, prev_be32ofs,
706 subtable->index_ofs[i], &basis);
707 prev_be32ofs = subtable->index_ofs[i];
708 }
709 hash = minimatch_hash_range(&rule->match, prev_be32ofs, FLOW_U32S, &basis);
710
711 /* Head rule. Check if 'next' is an identical, lower-priority rule that
712 * will replace 'rule' in the data structures. */
713 if (next->priority < rule->priority) {
714 subtable_replace_head_rule(cls, subtable, cls_match, next, hash,
715 ihash);
716 goto check_priority;
717 }
718
719 /* 'rule' is last of the kind in the classifier, must remove from all the
720 * data structures. */
721
69d6040e
JR
722 if (subtable->ports_mask_len) {
723 ovs_be32 masked_ports = minimatch_get_ports(&rule->match);
724
725 trie_remove_prefix(&subtable->ports_trie,
726 &masked_ports, subtable->ports_mask_len);
727 }
13751fd8
JR
728 for (i = 0; i < cls->n_tries; i++) {
729 if (subtable->trie_plen[i]) {
730 trie_remove(&cls->tries[i], rule, subtable->trie_plen[i]);
731 }
732 }
733
476f36e8
JR
734 /* Remove rule node from indices. */
735 for (i = 0; i < subtable->n_indices; i++) {
f2c21402
JR
736 cmap_remove(&subtable->indices[i], &cls_match->index_nodes[i],
737 ihash[i]);
b5d97350 738 }
f47eef15 739 n_rules = cmap_remove(&subtable->rules, &cls_match->cmap_node, hash);
064af421 740
627fb667 741 partition = cls_match->partition;
183126a1
BP
742 if (partition) {
743 tag_tracker_subtract(&partition->tracker, &partition->tags,
03868246 744 subtable->tag);
183126a1 745 if (!partition->tags) {
f2c21402
JR
746 cmap_remove(&cls->partitions, &partition->cmap_node,
747 hash_metadata(partition->metadata));
748 ovsrcu_postpone(free, partition);
183126a1 749 }
c906cedf
BP
750 }
751
f47eef15 752 if (n_rules == 0) {
03868246 753 destroy_subtable(cls, subtable);
f47eef15
JR
754 } else {
755check_priority:
756 if (subtable->max_priority == rule->priority
757 && --subtable->max_count == 0) {
758 /* Find the new 'max_priority' and 'max_count'. */
759 struct cls_match *head;
760 int max_priority = INT_MIN;
761
762 CMAP_FOR_EACH (head, cmap_node, &subtable->rules) {
763 if (head->priority > max_priority) {
764 max_priority = head->priority;
765 subtable->max_count = 1;
766 } else if (head->priority == max_priority) {
767 ++subtable->max_count;
768 }
fe7cfa5c 769 }
f47eef15
JR
770 subtable->max_priority = max_priority;
771 pvector_change_priority(&cls->subtables, subtable, max_priority);
fe7cfa5c 772 }
4d935a6b 773 }
f47eef15 774free:
f2c21402 775 ovsrcu_postpone(free, cls_match);
f47eef15 776 cls->n_rules--;
747f140a 777unlock:
e65413ab 778 ovs_mutex_unlock(&cls->mutex);
747f140a
JR
779
780 return rule;
064af421
BP
781}
782
13751fd8 783/* Prefix tree context. Valid when 'lookup_done' is true. Can skip all
c0bfb650
JR
784 * subtables which have a prefix match on the trie field, but whose prefix
785 * length is not indicated in 'match_plens'. For example, a subtable that
786 * has a 8-bit trie field prefix match can be skipped if
787 * !be_get_bit_at(&match_plens, 8 - 1). If skipped, 'maskbits' prefix bits
788 * must be unwildcarded to make datapath flow only match packets it should. */
13751fd8
JR
789struct trie_ctx {
790 const struct cls_trie *trie;
791 bool lookup_done; /* Status of the lookup. */
792 uint8_t be32ofs; /* U32 offset of the field in question. */
13751fd8 793 unsigned int maskbits; /* Prefix length needed to avoid false matches. */
c0bfb650
JR
794 union mf_value match_plens; /* Bitmask of prefix lengths with possible
795 * matches. */
13751fd8
JR
796};
797
798static void
799trie_ctx_init(struct trie_ctx *ctx, const struct cls_trie *trie)
800{
801 ctx->trie = trie;
802 ctx->be32ofs = trie->field->flow_be32ofs;
803 ctx->lookup_done = false;
804}
805
48c3de13
BP
806/* Finds and returns the highest-priority rule in 'cls' that matches 'flow'.
807 * Returns a null pointer if no rules in 'cls' match 'flow'. If multiple rules
74f74083
EJ
808 * of equal priority match 'flow', returns one arbitrarily.
809 *
810 * If a rule is found and 'wc' is non-null, bitwise-OR's 'wc' with the
811 * set of bits that were significant in the lookup. At some point
812 * earlier, 'wc' should have been initialized (e.g., by
813 * flow_wildcards_init_catchall()). */
dfea28b3 814const struct cls_rule *
e48eccd1 815classifier_lookup(const struct classifier *cls, const struct flow *flow,
74f74083 816 struct flow_wildcards *wc)
48c3de13 817{
c906cedf 818 const struct cls_partition *partition;
c906cedf 819 tag_type tags;
eb391b76 820 int best_priority = INT_MIN;
fe7cfa5c
JR
821 const struct cls_match *best;
822 struct trie_ctx trie_ctx[CLS_MAX_TRIES];
823 struct cls_subtable *subtable;
c906cedf 824
f358a2cb
JR
825 /* Synchronize for cls->n_tries and subtable->trie_plen. They can change
826 * when table configuration changes, which happens typically only on
827 * startup. */
828 atomic_thread_fence(memory_order_acquire);
829
03868246
JR
830 /* Determine 'tags' such that, if 'subtable->tag' doesn't intersect them,
831 * then 'flow' cannot possibly match in 'subtable':
c906cedf
BP
832 *
833 * - If flow->metadata maps to a given 'partition', then we can use
834 * 'tags' for 'partition->tags'.
835 *
836 * - If flow->metadata has no partition, then no rule in 'cls' has an
837 * exact-match for flow->metadata. That means that we don't need to
03868246 838 * search any subtable that includes flow->metadata in its mask.
c906cedf 839 *
03868246 840 * In either case, we always need to search any cls_subtables that do not
c906cedf 841 * include flow->metadata in its mask. One way to do that would be to
03868246
JR
842 * check the "cls_subtable"s explicitly for that, but that would require an
843 * extra branch per subtable. Instead, we mark such a cls_subtable's
844 * 'tags' as TAG_ALL and make sure that 'tags' is never empty. This means
845 * that 'tags' always intersects such a cls_subtable's 'tags', so we don't
846 * need a special case.
c906cedf 847 */
f2c21402 848 partition = (cmap_is_empty(&cls->partitions)
c906cedf
BP
849 ? NULL
850 : find_partition(cls, flow->metadata,
851 hash_metadata(flow->metadata)));
852 tags = partition ? partition->tags : TAG_ARBITRARY;
48c3de13 853
ff8241db 854 /* Initialize trie contexts for find_match_wc(). */
fe7cfa5c 855 for (int i = 0; i < cls->n_tries; i++) {
13751fd8
JR
856 trie_ctx_init(&trie_ctx[i], &cls->tries[i]);
857 }
ec988646 858
b5d97350 859 best = NULL;
fe7cfa5c
JR
860 PVECTOR_FOR_EACH_PRIORITY(subtable, best_priority, 2,
861 sizeof(struct cls_subtable), &cls->subtables) {
dfea28b3 862 const struct cls_match *rule;
c906cedf 863
fe7cfa5c 864 if (!tag_intersects(tags, subtable->tag)) {
c906cedf
BP
865 continue;
866 }
74f74083 867
fe7cfa5c 868 rule = find_match_wc(subtable, flow, trie_ctx, cls->n_tries, wc);
eb391b76
BP
869 if (rule && rule->priority > best_priority) {
870 best_priority = rule->priority;
1f3c5efc 871 best = rule;
b5d97350 872 }
48c3de13 873 }
13751fd8 874
627fb667 875 return best ? best->cls_rule : NULL;
48c3de13
BP
876}
877
b5d97350
BP
878/* Finds and returns a rule in 'cls' with exactly the same priority and
879 * matching criteria as 'target'. Returns a null pointer if 'cls' doesn't
c084ce1d 880 * contain an exact match. */
dfea28b3 881const struct cls_rule *
e48eccd1 882classifier_find_rule_exactly(const struct classifier *cls,
76ecc721 883 const struct cls_rule *target)
064af421 884{
dfea28b3
JR
885 const struct cls_match *head, *rule;
886 const struct cls_subtable *subtable;
064af421 887
03868246 888 subtable = find_subtable(cls, &target->match.mask);
0722ee5c 889 if (!subtable) {
98abae4a 890 return NULL;
4d935a6b
JR
891 }
892
03868246 893 head = find_equal(subtable, &target->match.flow,
5cb7a798
BP
894 miniflow_hash_in_minimask(&target->match.flow,
895 &target->match.mask, 0));
98abae4a
JR
896 if (!head) {
897 return NULL;
898 }
b5d97350
BP
899 FOR_EACH_RULE_IN_LIST (rule, head) {
900 if (target->priority >= rule->priority) {
627fb667 901 return target->priority == rule->priority ? rule->cls_rule : NULL;
064af421
BP
902 }
903 }
904 return NULL;
905}
906
81a76618
BP
907/* Finds and returns a rule in 'cls' with priority 'priority' and exactly the
908 * same matching criteria as 'target'. Returns a null pointer if 'cls' doesn't
909 * contain an exact match. */
dfea28b3 910const struct cls_rule *
81a76618 911classifier_find_match_exactly(const struct classifier *cls,
eb391b76 912 const struct match *target, int priority)
81a76618 913{
dfea28b3 914 const struct cls_rule *retval;
81a76618
BP
915 struct cls_rule cr;
916
917 cls_rule_init(&cr, target, priority);
918 retval = classifier_find_rule_exactly(cls, &cr);
48d28ac1 919 cls_rule_destroy(&cr);
81a76618
BP
920
921 return retval;
922}
923
faa50f40
BP
924/* Checks if 'target' would overlap any other rule in 'cls'. Two rules are
925 * considered to overlap if both rules have the same priority and a packet
926 * could match both. */
49bdc010 927bool
e48eccd1 928classifier_rule_overlaps(const struct classifier *cls,
faa50f40 929 const struct cls_rule *target)
e48eccd1 930 OVS_EXCLUDED(cls->mutex)
49bdc010 931{
03868246 932 struct cls_subtable *subtable;
49bdc010 933
e65413ab 934 ovs_mutex_lock(&cls->mutex);
03868246 935 /* Iterate subtables in the descending max priority order. */
eb391b76 936 PVECTOR_FOR_EACH_PRIORITY (subtable, target->priority - 1, 2,
fe7cfa5c 937 sizeof(struct cls_subtable), &cls->subtables) {
5cb7a798
BP
938 uint32_t storage[FLOW_U32S];
939 struct minimask mask;
627fb667 940 struct cls_match *head;
49bdc010 941
03868246 942 minimask_combine(&mask, &target->match.mask, &subtable->mask, storage);
f2c21402 943 CMAP_FOR_EACH (head, cmap_node, &subtable->rules) {
627fb667 944 struct cls_match *rule;
49bdc010 945
dfea28b3 946 FOR_EACH_RULE_IN_LIST_PROTECTED (rule, head) {
4d935a6b
JR
947 if (rule->priority < target->priority) {
948 break; /* Rules in descending priority order. */
949 }
faa50f40 950 if (rule->priority == target->priority
5cb7a798 951 && miniflow_equal_in_minimask(&target->match.flow,
3016f3e4 952 &rule->flow, &mask)) {
e65413ab 953 ovs_mutex_unlock(&cls->mutex);
49bdc010
JP
954 return true;
955 }
956 }
957 }
958 }
959
e65413ab 960 ovs_mutex_unlock(&cls->mutex);
49bdc010
JP
961 return false;
962}
6ceeaa92
BP
963
964/* Returns true if 'rule' exactly matches 'criteria' or if 'rule' is more
965 * specific than 'criteria'. That is, 'rule' matches 'criteria' and this
966 * function returns true if, for every field:
967 *
968 * - 'criteria' and 'rule' specify the same (non-wildcarded) value for the
969 * field, or
970 *
971 * - 'criteria' wildcards the field,
972 *
973 * Conversely, 'rule' does not match 'criteria' and this function returns false
974 * if, for at least one field:
975 *
976 * - 'criteria' and 'rule' specify different values for the field, or
977 *
978 * - 'criteria' specifies a value for the field but 'rule' wildcards it.
979 *
980 * Equivalently, the truth table for whether a field matches is:
981 *
982 * rule
983 *
984 * c wildcard exact
985 * r +---------+---------+
986 * i wild | yes | yes |
987 * t card | | |
988 * e +---------+---------+
989 * r exact | no |if values|
990 * i | |are equal|
991 * a +---------+---------+
992 *
993 * This is the matching rule used by OpenFlow 1.0 non-strict OFPT_FLOW_MOD
994 * commands and by OpenFlow 1.0 aggregate and flow stats.
995 *
81a76618 996 * Ignores rule->priority. */
6ceeaa92
BP
997bool
998cls_rule_is_loose_match(const struct cls_rule *rule,
5cb7a798 999 const struct minimatch *criteria)
6ceeaa92 1000{
5cb7a798
BP
1001 return (!minimask_has_extra(&rule->match.mask, &criteria->mask)
1002 && miniflow_equal_in_minimask(&rule->match.flow, &criteria->flow,
1003 &criteria->mask));
6ceeaa92 1004}
b5d97350 1005\f
5ecc9d81
BP
1006/* Iteration. */
1007
1008static bool
627fb667 1009rule_matches(const struct cls_match *rule, const struct cls_rule *target)
5ecc9d81
BP
1010{
1011 return (!target
3016f3e4 1012 || miniflow_equal_in_minimask(&rule->flow,
5cb7a798
BP
1013 &target->match.flow,
1014 &target->match.mask));
5ecc9d81
BP
1015}
1016
dfea28b3 1017static const struct cls_match *
03868246 1018search_subtable(const struct cls_subtable *subtable,
f2c21402 1019 struct cls_cursor *cursor)
5ecc9d81 1020{
f2c21402
JR
1021 if (!cursor->target
1022 || !minimask_has_extra(&subtable->mask, &cursor->target->match.mask)) {
dfea28b3 1023 const struct cls_match *rule;
5ecc9d81 1024
f2c21402
JR
1025 CMAP_CURSOR_FOR_EACH (rule, cmap_node, &cursor->rules,
1026 &subtable->rules) {
1027 if (rule_matches(rule, cursor->target)) {
5ecc9d81
BP
1028 return rule;
1029 }
1030 }
1031 }
1032 return NULL;
1033}
1034
5f0476ce
JR
1035/* Initializes 'cursor' for iterating through rules in 'cls', and returns the
1036 * first matching cls_rule via '*pnode', or NULL if there are no matches.
5ecc9d81 1037 *
6ceeaa92 1038 * - If 'target' is null, the cursor will visit every rule in 'cls'.
5ecc9d81 1039 *
6ceeaa92
BP
1040 * - If 'target' is nonnull, the cursor will visit each 'rule' in 'cls'
1041 * such that cls_rule_is_loose_match(rule, target) returns true.
5ecc9d81 1042 *
6ceeaa92 1043 * Ignores target->priority. */
78c8df12
BP
1044struct cls_cursor cls_cursor_start(const struct classifier *cls,
1045 const struct cls_rule *target,
1046 bool safe)
5f0476ce 1047 OVS_NO_THREAD_SAFETY_ANALYSIS
5ecc9d81 1048{
5f0476ce 1049 struct cls_cursor cursor;
03868246 1050 struct cls_subtable *subtable;
5ecc9d81 1051
5f0476ce 1052 cursor.safe = safe;
e48eccd1 1053 cursor.cls = cls;
5f0476ce 1054 cursor.target = target && !cls_rule_is_catchall(target) ? target : NULL;
78c8df12 1055 cursor.rule = NULL;
5f0476ce
JR
1056
1057 /* Find first rule. */
e65413ab 1058 ovs_mutex_lock(&cursor.cls->mutex);
5f0476ce 1059 CMAP_CURSOR_FOR_EACH (subtable, cmap_node, &cursor.subtables,
e65413ab 1060 &cursor.cls->subtables_map) {
dfea28b3 1061 const struct cls_match *rule = search_subtable(subtable, &cursor);
f2c21402 1062
5ecc9d81 1063 if (rule) {
5f0476ce 1064 cursor.subtable = subtable;
78c8df12 1065 cursor.rule = rule->cls_rule;
5f0476ce 1066 break;
5ecc9d81
BP
1067 }
1068 }
1069
5f0476ce 1070 /* Leave locked if requested and have a rule. */
78c8df12 1071 if (safe || !cursor.rule) {
e65413ab 1072 ovs_mutex_unlock(&cursor.cls->mutex);
5f0476ce
JR
1073 }
1074 return cursor;
1075}
1076
dfea28b3 1077static const struct cls_rule *
1caa1561 1078cls_cursor_next(struct cls_cursor *cursor)
5f0476ce 1079 OVS_NO_THREAD_SAFETY_ANALYSIS
5ecc9d81 1080{
dfea28b3 1081 const struct cls_match *rule = cursor->rule->cls_match;
03868246 1082 const struct cls_subtable *subtable;
dfea28b3 1083 const struct cls_match *next;
5ecc9d81 1084
955f579d
BP
1085 next = next_rule_in_list__(rule);
1086 if (next->priority < rule->priority) {
1caa1561 1087 return next->cls_rule;
5ecc9d81
BP
1088 }
1089
955f579d 1090 /* 'next' is the head of the list, that is, the rule that is included in
f2c21402 1091 * the subtable's map. (This is important when the classifier contains
03868246 1092 * rules that differ only in priority.) */
955f579d 1093 rule = next;
f2c21402 1094 CMAP_CURSOR_FOR_EACH_CONTINUE (rule, cmap_node, &cursor->rules) {
5ecc9d81 1095 if (rule_matches(rule, cursor->target)) {
1caa1561 1096 return rule->cls_rule;
5ecc9d81
BP
1097 }
1098 }
1099
03868246 1100 subtable = cursor->subtable;
f2c21402
JR
1101 CMAP_CURSOR_FOR_EACH_CONTINUE (subtable, cmap_node, &cursor->subtables) {
1102 rule = search_subtable(subtable, cursor);
5ecc9d81 1103 if (rule) {
03868246 1104 cursor->subtable = subtable;
1caa1561 1105 return rule->cls_rule;
5ecc9d81
BP
1106 }
1107 }
1108
1caa1561
BP
1109 return NULL;
1110}
1111
1112/* Sets 'cursor->rule' to the next matching cls_rule in 'cursor''s iteration,
1113 * or to null if all matching rules have been visited. */
1114void
1115cls_cursor_advance(struct cls_cursor *cursor)
1116 OVS_NO_THREAD_SAFETY_ANALYSIS
1117{
1118 if (cursor->safe) {
1119 ovs_mutex_lock(&cursor->cls->mutex);
1120 }
1121 cursor->rule = cls_cursor_next(cursor);
1122 if (cursor->safe || !cursor->rule) {
1123 ovs_mutex_unlock(&cursor->cls->mutex);
1124 }
5ecc9d81
BP
1125}
1126\f
03868246 1127static struct cls_subtable *
e48eccd1 1128find_subtable(const struct classifier *cls, const struct minimask *mask)
b5d97350 1129{
03868246 1130 struct cls_subtable *subtable;
064af421 1131
f2c21402 1132 CMAP_FOR_EACH_WITH_HASH (subtable, cmap_node, minimask_hash(mask, 0),
5a87054c 1133 &cls->subtables_map) {
03868246
JR
1134 if (minimask_equal(mask, &subtable->mask)) {
1135 return subtable;
064af421
BP
1136 }
1137 }
b5d97350 1138 return NULL;
064af421 1139}
064af421 1140
e65413ab 1141/* The new subtable will be visible to the readers only after this. */
03868246 1142static struct cls_subtable *
e48eccd1 1143insert_subtable(struct classifier *cls, const struct minimask *mask)
e65413ab 1144 OVS_REQUIRES(cls->mutex)
b5d97350 1145{
c906cedf 1146 uint32_t hash = minimask_hash(mask, 0);
03868246 1147 struct cls_subtable *subtable;
476f36e8
JR
1148 int i, index = 0;
1149 struct flow_wildcards old, new;
1150 uint8_t prev;
3016f3e4 1151 int count = count_1bits(mask->masks.map);
064af421 1152
3016f3e4
JR
1153 subtable = xzalloc(sizeof *subtable - sizeof mask->masks.inline_values
1154 + MINIFLOW_VALUES_SIZE(count));
f2c21402 1155 cmap_init(&subtable->rules);
f80028fe
JR
1156 miniflow_clone_inline(CONST_CAST(struct miniflow *, &subtable->mask.masks),
1157 &mask->masks, count);
476f36e8
JR
1158
1159 /* Init indices for segmented lookup, if any. */
1160 flow_wildcards_init_catchall(&new);
1161 old = new;
1162 prev = 0;
1163 for (i = 0; i < cls->n_flow_segments; i++) {
1164 flow_wildcards_fold_minimask_range(&new, mask, prev,
1165 cls->flow_segments[i]);
1166 /* Add an index if it adds mask bits. */
1167 if (!flow_wildcards_equal(&new, &old)) {
f2c21402 1168 cmap_init(&subtable->indices[index]);
f80028fe
JR
1169 *CONST_CAST(uint8_t *, &subtable->index_ofs[index])
1170 = cls->flow_segments[i];
476f36e8
JR
1171 index++;
1172 old = new;
1173 }
1174 prev = cls->flow_segments[i];
1175 }
1176 /* Check if the rest of the subtable's mask adds any bits,
1177 * and remove the last index if it doesn't. */
1178 if (index > 0) {
1179 flow_wildcards_fold_minimask_range(&new, mask, prev, FLOW_U32S);
1180 if (flow_wildcards_equal(&new, &old)) {
1181 --index;
f80028fe 1182 *CONST_CAST(uint8_t *, &subtable->index_ofs[index]) = 0;
f2c21402 1183 cmap_destroy(&subtable->indices[index]);
476f36e8
JR
1184 }
1185 }
f80028fe 1186 *CONST_CAST(uint8_t *, &subtable->n_indices) = index;
476f36e8 1187
f80028fe
JR
1188 *CONST_CAST(tag_type *, &subtable->tag) =
1189 (minimask_get_metadata_mask(mask) == OVS_BE64_MAX
1190 ? tag_create_deterministic(hash)
1191 : TAG_ALL);
064af421 1192
13751fd8
JR
1193 for (i = 0; i < cls->n_tries; i++) {
1194 subtable->trie_plen[i] = minimask_get_prefix_len(mask,
1195 cls->tries[i].field);
1196 }
1197
69d6040e 1198 /* Ports trie. */
f358a2cb 1199 ovsrcu_set_hidden(&subtable->ports_trie, NULL);
f80028fe 1200 *CONST_CAST(int *, &subtable->ports_mask_len)
69d6040e
JR
1201 = 32 - ctz32(ntohl(MINIFLOW_GET_BE32(&mask->masks, tp_src)));
1202
f2c21402 1203 cmap_insert(&cls->subtables_map, &subtable->cmap_node, hash);
ec988646 1204
03868246 1205 return subtable;
064af421
BP
1206}
1207
01c0f83a 1208/* RCU readers may still access the subtable before it is actually freed. */
b5d97350 1209static void
e48eccd1 1210destroy_subtable(struct classifier *cls, struct cls_subtable *subtable)
e65413ab 1211 OVS_REQUIRES(cls->mutex)
b5d97350 1212{
476f36e8
JR
1213 int i;
1214
fe7cfa5c 1215 pvector_remove(&cls->subtables, subtable);
01c0f83a
JR
1216 cmap_remove(&cls->subtables_map, &subtable->cmap_node,
1217 minimask_hash(&subtable->mask, 0));
1218
1219 ovs_assert(ovsrcu_get_protected(struct trie_node *, &subtable->ports_trie)
1220 == NULL);
1221 ovs_assert(cmap_is_empty(&subtable->rules));
69d6040e 1222
476f36e8 1223 for (i = 0; i < subtable->n_indices; i++) {
f2c21402 1224 cmap_destroy(&subtable->indices[i]);
476f36e8 1225 }
f2c21402 1226 cmap_destroy(&subtable->rules);
fe7cfa5c 1227 ovsrcu_postpone(free, subtable);
4aacd02d
BP
1228}
1229
13751fd8
JR
1230struct range {
1231 uint8_t start;
1232 uint8_t end;
1233};
1234
c0bfb650
JR
1235static unsigned int be_get_bit_at(const ovs_be32 value[], unsigned int ofs);
1236
13751fd8
JR
1237/* Return 'true' if can skip rest of the subtable based on the prefix trie
1238 * lookup results. */
1239static inline bool
1240check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries,
1241 const unsigned int field_plen[CLS_MAX_TRIES],
1242 const struct range ofs, const struct flow *flow,
1243 struct flow_wildcards *wc)
1244{
1245 int j;
1246
1247 /* Check if we could avoid fully unwildcarding the next level of
1248 * fields using the prefix tries. The trie checks are done only as
1249 * needed to avoid folding in additional bits to the wildcards mask. */
1250 for (j = 0; j < n_tries; j++) {
1251 /* Is the trie field relevant for this subtable? */
1252 if (field_plen[j]) {
1253 struct trie_ctx *ctx = &trie_ctx[j];
1254 uint8_t be32ofs = ctx->be32ofs;
1255
1256 /* Is the trie field within the current range of fields? */
1257 if (be32ofs >= ofs.start && be32ofs < ofs.end) {
1258 /* On-demand trie lookup. */
1259 if (!ctx->lookup_done) {
c0bfb650
JR
1260 memset(&ctx->match_plens, 0, sizeof ctx->match_plens);
1261 ctx->maskbits = trie_lookup(ctx->trie, flow,
1262 &ctx->match_plens);
13751fd8
JR
1263 ctx->lookup_done = true;
1264 }
1265 /* Possible to skip the rest of the subtable if subtable's
c0bfb650
JR
1266 * prefix on the field is not included in the lookup result. */
1267 if (!be_get_bit_at(&ctx->match_plens.be32, field_plen[j] - 1)) {
1817dcea
JR
1268 /* We want the trie lookup to never result in unwildcarding
1269 * any bits that would not be unwildcarded otherwise.
1270 * Since the trie is shared by the whole classifier, it is
1271 * possible that the 'maskbits' contain bits that are
1272 * irrelevant for the partition relevant for the current
1273 * packet. Hence the checks below. */
13751fd8 1274
13751fd8 1275 /* Check that the trie result will not unwildcard more bits
1817dcea 1276 * than this subtable would otherwise. */
13751fd8
JR
1277 if (ctx->maskbits <= field_plen[j]) {
1278 /* Unwildcard the bits and skip the rest. */
1279 mask_set_prefix_bits(wc, be32ofs, ctx->maskbits);
1280 /* Note: Prerequisite already unwildcarded, as the only
1281 * prerequisite of the supported trie lookup fields is
1817dcea
JR
1282 * the ethertype, which is always unwildcarded. */
1283 return true;
1284 }
1285 /* Can skip if the field is already unwildcarded. */
1286 if (mask_prefix_bits_set(wc, be32ofs, ctx->maskbits)) {
13751fd8
JR
1287 return true;
1288 }
1289 }
1290 }
1291 }
1292 }
1293 return false;
1294}
1295
3016f3e4
JR
1296/* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit
1297 * for which 'flow', for which 'mask' has a bit set, specifies a particular
1298 * value has the correct value in 'target'.
1299 *
1300 * This function is equivalent to miniflow_equal_flow_in_minimask(flow,
a64759f0
JR
1301 * target, mask) but this is faster because of the invariant that
1302 * flow->map and mask->masks.map are the same, and that this version
1303 * takes the 'wc'. */
3016f3e4
JR
1304static inline bool
1305miniflow_and_mask_matches_flow(const struct miniflow *flow,
1306 const struct minimask *mask,
e9319757 1307 const struct flow *target)
3016f3e4
JR
1308{
1309 const uint32_t *flowp = miniflow_get_u32_values(flow);
1310 const uint32_t *maskp = miniflow_get_u32_values(&mask->masks);
a64759f0 1311 uint32_t idx;
3016f3e4 1312
a64759f0
JR
1313 MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
1314 uint32_t diff = (*flowp++ ^ flow_u32_value(target, idx)) & *maskp++;
1315
1316 if (diff) {
3016f3e4
JR
1317 return false;
1318 }
1319 }
1320
1321 return true;
1322}
1323
dfea28b3 1324static inline const struct cls_match *
476f36e8
JR
1325find_match(const struct cls_subtable *subtable, const struct flow *flow,
1326 uint32_t hash)
b5d97350 1327{
dfea28b3 1328 const struct cls_match *rule;
b5d97350 1329
f2c21402 1330 CMAP_FOR_EACH_WITH_HASH (rule, cmap_node, hash, &subtable->rules) {
3016f3e4 1331 if (miniflow_and_mask_matches_flow(&rule->flow, &subtable->mask,
e9319757 1332 flow)) {
b5d97350 1333 return rule;
064af421
BP
1334 }
1335 }
c23740be 1336
064af421
BP
1337 return NULL;
1338}
1339
e9319757
JR
1340/* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit
1341 * for which 'flow', for which 'mask' has a bit set, specifies a particular
1342 * value has the correct value in 'target'.
1343 *
1344 * This function is equivalent to miniflow_and_mask_matches_flow() but this
1345 * version fills in the mask bits in 'wc'. */
1346static inline bool
1347miniflow_and_mask_matches_flow_wc(const struct miniflow *flow,
1348 const struct minimask *mask,
1349 const struct flow *target,
1350 struct flow_wildcards *wc)
1351{
1352 const uint32_t *flowp = miniflow_get_u32_values(flow);
1353 const uint32_t *maskp = miniflow_get_u32_values(&mask->masks);
1354 uint32_t idx;
1355
1356 MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
1357 uint32_t mask = *maskp++;
1358 uint32_t diff = (*flowp++ ^ flow_u32_value(target, idx)) & mask;
1359
1360 if (diff) {
1361 /* Only unwildcard if none of the differing bits is already
1362 * exact-matched. */
1363 if (!(flow_u32_value(&wc->masks, idx) & diff)) {
1364 /* Keep one bit of the difference. */
1365 *flow_u32_lvalue(&wc->masks, idx) |= rightmost_1bit(diff);
1366 }
1367 return false;
1368 }
1369 /* Fill in the bits that were looked at. */
1370 *flow_u32_lvalue(&wc->masks, idx) |= mask;
1371 }
1372
1373 return true;
1374}
1375
386cb9f7
JR
1376/* Unwildcard the fields looked up so far, if any. */
1377static void
1378fill_range_wc(const struct cls_subtable *subtable, struct flow_wildcards *wc,
1379 uint8_t to)
1380{
1381 if (to) {
1382 flow_wildcards_fold_minimask_range(wc, &subtable->mask, 0, to);
1383 }
1384}
1385
dfea28b3 1386static const struct cls_match *
476f36e8 1387find_match_wc(const struct cls_subtable *subtable, const struct flow *flow,
13751fd8
JR
1388 struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries,
1389 struct flow_wildcards *wc)
476f36e8
JR
1390{
1391 uint32_t basis = 0, hash;
dfea28b3 1392 const struct cls_match *rule = NULL;
476f36e8 1393 int i;
13751fd8 1394 struct range ofs;
476f36e8 1395
ec988646 1396 if (OVS_UNLIKELY(!wc)) {
476f36e8
JR
1397 return find_match(subtable, flow,
1398 flow_hash_in_minimask(flow, &subtable->mask, 0));
1399 }
1400
13751fd8 1401 ofs.start = 0;
476f36e8
JR
1402 /* Try to finish early by checking fields in segments. */
1403 for (i = 0; i < subtable->n_indices; i++) {
55847abe 1404 const struct cmap_node *inode;
f2c21402 1405
13751fd8 1406 ofs.end = subtable->index_ofs[i];
476f36e8 1407
13751fd8
JR
1408 if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow,
1409 wc)) {
386cb9f7
JR
1410 /* 'wc' bits for the trie field set, now unwildcard the preceding
1411 * bits used so far. */
1412 fill_range_wc(subtable, wc, ofs.start);
1413 return NULL;
13751fd8
JR
1414 }
1415 hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start,
1416 ofs.end, &basis);
f2c21402 1417 inode = cmap_find(&subtable->indices[i], hash);
476f36e8 1418 if (!inode) {
386cb9f7
JR
1419 /* No match, can stop immediately, but must fold in the bits
1420 * used in lookup so far. */
1421 fill_range_wc(subtable, wc, ofs.end);
1422 return NULL;
476f36e8
JR
1423 }
1424
1425 /* If we have narrowed down to a single rule already, check whether
a64759f0 1426 * that rule matches. Either way, we're done.
476f36e8
JR
1427 *
1428 * (Rare) hash collisions may cause us to miss the opportunity for this
1429 * optimization. */
f2c21402 1430 if (!cmap_node_next(inode)) {
476f36e8 1431 ASSIGN_CONTAINER(rule, inode - i, index_nodes);
e9319757
JR
1432 if (miniflow_and_mask_matches_flow_wc(&rule->flow, &subtable->mask,
1433 flow, wc)) {
1434 return rule;
476f36e8 1435 }
e9319757 1436 return NULL;
476f36e8 1437 }
386cb9f7 1438 ofs.start = ofs.end;
476f36e8 1439 }
13751fd8
JR
1440 ofs.end = FLOW_U32S;
1441 /* Trie check for the final range. */
1442 if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow, wc)) {
386cb9f7
JR
1443 fill_range_wc(subtable, wc, ofs.start);
1444 return NULL;
13751fd8 1445 }
a64759f0
JR
1446 hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start,
1447 ofs.end, &basis);
1448 rule = find_match(subtable, flow, hash);
69d6040e
JR
1449 if (!rule && subtable->ports_mask_len) {
1450 /* Ports are always part of the final range, if any.
1451 * No match was found for the ports. Use the ports trie to figure out
1452 * which ports bits to unwildcard. */
1453 unsigned int mbits;
c0bfb650 1454 ovs_be32 value, plens, mask;
69d6040e
JR
1455
1456 mask = MINIFLOW_GET_BE32(&subtable->mask.masks, tp_src);
1457 value = ((OVS_FORCE ovs_be32 *)flow)[TP_PORTS_OFS32] & mask;
c0bfb650 1458 mbits = trie_lookup_value(&subtable->ports_trie, &value, &plens, 32);
69d6040e
JR
1459
1460 ((OVS_FORCE ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |=
86f35fb5 1461 mask & be32_prefix_mask(mbits);
69d6040e 1462
386cb9f7
JR
1463 /* Unwildcard all bits in the mask upto the ports, as they were used
1464 * to determine there is no match. */
1465 fill_range_wc(subtable, wc, TP_PORTS_OFS32);
1466 return NULL;
69d6040e 1467 }
e9319757 1468
13751fd8 1469 /* Must unwildcard all the fields, as they were looked at. */
476f36e8
JR
1470 flow_wildcards_fold_minimask(wc, &subtable->mask);
1471 return rule;
1472}
1473
627fb667 1474static struct cls_match *
dfea28b3 1475find_equal(const struct cls_subtable *subtable, const struct miniflow *flow,
03868246 1476 uint32_t hash)
064af421 1477{
627fb667 1478 struct cls_match *head;
064af421 1479
f2c21402 1480 CMAP_FOR_EACH_WITH_HASH (head, cmap_node, hash, &subtable->rules) {
3016f3e4 1481 if (miniflow_equal(&head->flow, flow)) {
b5d97350 1482 return head;
064af421
BP
1483 }
1484 }
1485 return NULL;
1486}
13751fd8
JR
1487\f
1488/* A longest-prefix match tree. */
13751fd8
JR
1489
1490/* Return at least 'plen' bits of the 'prefix', starting at bit offset 'ofs'.
1491 * Prefixes are in the network byte order, and the offset 0 corresponds to
1492 * the most significant bit of the first byte. The offset can be read as
1493 * "how many bits to skip from the start of the prefix starting at 'pr'". */
1494static uint32_t
1495raw_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen)
1496{
1497 uint32_t prefix;
1498
1499 pr += ofs / 32; /* Where to start. */
1500 ofs %= 32; /* How many bits to skip at 'pr'. */
1501
1502 prefix = ntohl(*pr) << ofs; /* Get the first 32 - ofs bits. */
1503 if (plen > 32 - ofs) { /* Need more than we have already? */
1504 prefix |= ntohl(*++pr) >> (32 - ofs);
1505 }
1506 /* Return with possible unwanted bits at the end. */
1507 return prefix;
1508}
1509
1510/* Return min(TRIE_PREFIX_BITS, plen) bits of the 'prefix', starting at bit
1511 * offset 'ofs'. Prefixes are in the network byte order, and the offset 0
1512 * corresponds to the most significant bit of the first byte. The offset can
1513 * be read as "how many bits to skip from the start of the prefix starting at
1514 * 'pr'". */
1515static uint32_t
1516trie_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen)
1517{
1518 if (!plen) {
1519 return 0;
1520 }
1521 if (plen > TRIE_PREFIX_BITS) {
1522 plen = TRIE_PREFIX_BITS; /* Get at most TRIE_PREFIX_BITS. */
1523 }
1524 /* Return with unwanted bits cleared. */
1525 return raw_get_prefix(pr, ofs, plen) & ~0u << (32 - plen);
1526}
1527
c30cfa6b 1528/* Return the number of equal bits in 'n_bits' of 'prefix's MSBs and a 'value'
13751fd8
JR
1529 * starting at "MSB 0"-based offset 'ofs'. */
1530static unsigned int
c30cfa6b 1531prefix_equal_bits(uint32_t prefix, unsigned int n_bits, const ovs_be32 value[],
13751fd8
JR
1532 unsigned int ofs)
1533{
c30cfa6b 1534 uint64_t diff = prefix ^ raw_get_prefix(value, ofs, n_bits);
13751fd8 1535 /* Set the bit after the relevant bits to limit the result. */
c30cfa6b 1536 return raw_clz64(diff << 32 | UINT64_C(1) << (63 - n_bits));
13751fd8
JR
1537}
1538
1539/* Return the number of equal bits in 'node' prefix and a 'prefix' of length
1540 * 'plen', starting at "MSB 0"-based offset 'ofs'. */
1541static unsigned int
1542trie_prefix_equal_bits(const struct trie_node *node, const ovs_be32 prefix[],
1543 unsigned int ofs, unsigned int plen)
1544{
c30cfa6b 1545 return prefix_equal_bits(node->prefix, MIN(node->n_bits, plen - ofs),
13751fd8
JR
1546 prefix, ofs);
1547}
1548
1549/* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' can
1550 * be greater than 31. */
1551static unsigned int
1552be_get_bit_at(const ovs_be32 value[], unsigned int ofs)
1553{
1554 return (((const uint8_t *)value)[ofs / 8] >> (7 - ofs % 8)) & 1u;
1555}
1556
1557/* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' must
1558 * be between 0 and 31, inclusive. */
1559static unsigned int
1560get_bit_at(const uint32_t prefix, unsigned int ofs)
1561{
1562 return (prefix >> (31 - ofs)) & 1u;
1563}
1564
1565/* Create new branch. */
1566static struct trie_node *
1567trie_branch_create(const ovs_be32 *prefix, unsigned int ofs, unsigned int plen,
1568 unsigned int n_rules)
1569{
1570 struct trie_node *node = xmalloc(sizeof *node);
1571
1572 node->prefix = trie_get_prefix(prefix, ofs, plen);
1573
1574 if (plen <= TRIE_PREFIX_BITS) {
c30cfa6b 1575 node->n_bits = plen;
f358a2cb
JR
1576 ovsrcu_set_hidden(&node->edges[0], NULL);
1577 ovsrcu_set_hidden(&node->edges[1], NULL);
13751fd8
JR
1578 node->n_rules = n_rules;
1579 } else { /* Need intermediate nodes. */
1580 struct trie_node *subnode = trie_branch_create(prefix,
1581 ofs + TRIE_PREFIX_BITS,
1582 plen - TRIE_PREFIX_BITS,
1583 n_rules);
1584 int bit = get_bit_at(subnode->prefix, 0);
c30cfa6b 1585 node->n_bits = TRIE_PREFIX_BITS;
f358a2cb
JR
1586 ovsrcu_set_hidden(&node->edges[bit], subnode);
1587 ovsrcu_set_hidden(&node->edges[!bit], NULL);
13751fd8
JR
1588 node->n_rules = 0;
1589 }
1590 return node;
1591}
1592
1593static void
f358a2cb 1594trie_node_destroy(const struct trie_node *node)
13751fd8 1595{
f358a2cb
JR
1596 ovsrcu_postpone(free, CONST_CAST(struct trie_node *, node));
1597}
1598
1599/* Copy a trie node for modification and postpone delete the old one. */
1600static struct trie_node *
1601trie_node_rcu_realloc(const struct trie_node *node)
1602{
1603 struct trie_node *new_node = xmalloc(sizeof *node);
1604
1605 *new_node = *node;
1606 trie_node_destroy(node);
1607
1608 return new_node;
13751fd8
JR
1609}
1610
e48eccd1 1611/* May only be called while holding the classifier mutex. */
13751fd8 1612static void
f358a2cb 1613trie_destroy(rcu_trie_ptr *trie)
13751fd8 1614{
f358a2cb
JR
1615 struct trie_node *node = ovsrcu_get_protected(struct trie_node *, trie);
1616
13751fd8 1617 if (node) {
f358a2cb
JR
1618 ovsrcu_set_hidden(trie, NULL);
1619 trie_destroy(&node->edges[0]);
1620 trie_destroy(&node->edges[1]);
1621 trie_node_destroy(node);
13751fd8
JR
1622 }
1623}
1624
1625static bool
1626trie_is_leaf(const struct trie_node *trie)
1627{
f358a2cb
JR
1628 /* No children? */
1629 return !ovsrcu_get(struct trie_node *, &trie->edges[0])
1630 && !ovsrcu_get(struct trie_node *, &trie->edges[1]);
13751fd8
JR
1631}
1632
1633static void
1634mask_set_prefix_bits(struct flow_wildcards *wc, uint8_t be32ofs,
c30cfa6b 1635 unsigned int n_bits)
13751fd8
JR
1636{
1637 ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs];
1638 unsigned int i;
1639
c30cfa6b 1640 for (i = 0; i < n_bits / 32; i++) {
13751fd8
JR
1641 mask[i] = OVS_BE32_MAX;
1642 }
c30cfa6b
JR
1643 if (n_bits % 32) {
1644 mask[i] |= htonl(~0u << (32 - n_bits % 32));
13751fd8
JR
1645 }
1646}
1647
1648static bool
1649mask_prefix_bits_set(const struct flow_wildcards *wc, uint8_t be32ofs,
c30cfa6b 1650 unsigned int n_bits)
13751fd8
JR
1651{
1652 ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs];
1653 unsigned int i;
1654 ovs_be32 zeroes = 0;
1655
c30cfa6b 1656 for (i = 0; i < n_bits / 32; i++) {
13751fd8
JR
1657 zeroes |= ~mask[i];
1658 }
c30cfa6b
JR
1659 if (n_bits % 32) {
1660 zeroes |= ~mask[i] & htonl(~0u << (32 - n_bits % 32));
13751fd8
JR
1661 }
1662
c30cfa6b 1663 return !zeroes; /* All 'n_bits' bits set. */
13751fd8
JR
1664}
1665
f358a2cb 1666static rcu_trie_ptr *
13751fd8
JR
1667trie_next_edge(struct trie_node *node, const ovs_be32 value[],
1668 unsigned int ofs)
1669{
1670 return node->edges + be_get_bit_at(value, ofs);
1671}
1672
1673static const struct trie_node *
1674trie_next_node(const struct trie_node *node, const ovs_be32 value[],
1675 unsigned int ofs)
1676{
f358a2cb
JR
1677 return ovsrcu_get(struct trie_node *,
1678 &node->edges[be_get_bit_at(value, ofs)]);
13751fd8
JR
1679}
1680
c0bfb650
JR
1681/* Set the bit at ("MSB 0"-based) offset 'ofs'. 'ofs' can be greater than 31.
1682 */
1683static void
1684be_set_bit_at(ovs_be32 value[], unsigned int ofs)
1685{
1686 ((uint8_t *)value)[ofs / 8] |= 1u << (7 - ofs % 8);
1687}
1688
1689/* Returns the number of bits in the prefix mask necessary to determine a
1690 * mismatch, in case there are longer prefixes in the tree below the one that
1691 * matched.
1692 * '*plens' will have a bit set for each prefix length that may have matching
1693 * rules. The caller is responsible for clearing the '*plens' prior to
1694 * calling this.
13751fd8
JR
1695 */
1696static unsigned int
f358a2cb 1697trie_lookup_value(const rcu_trie_ptr *trie, const ovs_be32 value[],
c0bfb650 1698 ovs_be32 plens[], unsigned int n_bits)
13751fd8 1699{
13751fd8 1700 const struct trie_node *prev = NULL;
c0bfb650
JR
1701 const struct trie_node *node = ovsrcu_get(struct trie_node *, trie);
1702 unsigned int match_len = 0; /* Number of matching bits. */
13751fd8 1703
27ce650f 1704 for (; node; prev = node, node = trie_next_node(node, value, match_len)) {
13751fd8
JR
1705 unsigned int eqbits;
1706 /* Check if this edge can be followed. */
27ce650f
JR
1707 eqbits = prefix_equal_bits(node->prefix, node->n_bits, value,
1708 match_len);
1709 match_len += eqbits;
c30cfa6b 1710 if (eqbits < node->n_bits) { /* Mismatch, nothing more to be found. */
27ce650f 1711 /* Bit at offset 'match_len' differed. */
c0bfb650 1712 return match_len + 1; /* Includes the first mismatching bit. */
13751fd8
JR
1713 }
1714 /* Full match, check if rules exist at this prefix length. */
1715 if (node->n_rules > 0) {
c0bfb650 1716 be_set_bit_at(plens, match_len - 1);
13751fd8 1717 }
27ce650f 1718 if (match_len >= n_bits) {
c0bfb650 1719 return n_bits; /* Full prefix. */
f0e5aa11 1720 }
13751fd8 1721 }
c0bfb650
JR
1722 /* node == NULL. Full match so far, but we tried to follow an
1723 * non-existing branch. Need to exclude the other branch if it exists
1724 * (it does not if we were called on an empty trie or 'prev' is a leaf
1725 * node). */
1726 return !prev || trie_is_leaf(prev) ? match_len : match_len + 1;
13751fd8
JR
1727}
1728
1729static unsigned int
1730trie_lookup(const struct cls_trie *trie, const struct flow *flow,
c0bfb650 1731 union mf_value *plens)
13751fd8
JR
1732{
1733 const struct mf_field *mf = trie->field;
1734
1735 /* Check that current flow matches the prerequisites for the trie
1736 * field. Some match fields are used for multiple purposes, so we
1737 * must check that the trie is relevant for this flow. */
1738 if (mf_are_prereqs_ok(mf, flow)) {
f358a2cb 1739 return trie_lookup_value(&trie->root,
13751fd8 1740 &((ovs_be32 *)flow)[mf->flow_be32ofs],
c0bfb650 1741 &plens->be32, mf->n_bits);
13751fd8 1742 }
c0bfb650
JR
1743 memset(plens, 0xff, sizeof *plens); /* All prefixes, no skipping. */
1744 return 0; /* Value not used in this case. */
13751fd8
JR
1745}
1746
1747/* Returns the length of a prefix match mask for the field 'mf' in 'minimask'.
1748 * Returns the u32 offset to the miniflow data in '*miniflow_index', if
1749 * 'miniflow_index' is not NULL. */
1750static unsigned int
1751minimask_get_prefix_len(const struct minimask *minimask,
1752 const struct mf_field *mf)
1753{
c30cfa6b 1754 unsigned int n_bits = 0, mask_tz = 0; /* Non-zero when end of mask seen. */
13751fd8
JR
1755 uint8_t u32_ofs = mf->flow_be32ofs;
1756 uint8_t u32_end = u32_ofs + mf->n_bytes / 4;
1757
1758 for (; u32_ofs < u32_end; ++u32_ofs) {
1759 uint32_t mask;
1760 mask = ntohl((OVS_FORCE ovs_be32)minimask_get(minimask, u32_ofs));
1761
1762 /* Validate mask, count the mask length. */
1763 if (mask_tz) {
1764 if (mask) {
1765 return 0; /* No bits allowed after mask ended. */
1766 }
1767 } else {
1768 if (~mask & (~mask + 1)) {
1769 return 0; /* Mask not contiguous. */
1770 }
1771 mask_tz = ctz32(mask);
c30cfa6b 1772 n_bits += 32 - mask_tz;
13751fd8
JR
1773 }
1774 }
1775
c30cfa6b 1776 return n_bits;
13751fd8
JR
1777}
1778
1779/*
1780 * This is called only when mask prefix is known to be CIDR and non-zero.
1781 * Relies on the fact that the flow and mask have the same map, and since
1782 * the mask is CIDR, the storage for the flow field exists even if it
1783 * happened to be zeros.
1784 */
1785static const ovs_be32 *
1786minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf)
1787{
27bbe15d 1788 return miniflow_get_be32_values(&match->flow) +
13751fd8
JR
1789 count_1bits(match->flow.map & ((UINT64_C(1) << mf->flow_be32ofs) - 1));
1790}
1791
1792/* Insert rule in to the prefix tree.
1793 * 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
1794 * in 'rule'. */
1795static void
1796trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
1797{
69d6040e
JR
1798 trie_insert_prefix(&trie->root,
1799 minimatch_get_prefix(&rule->match, trie->field), mlen);
1800}
1801
1802static void
f358a2cb 1803trie_insert_prefix(rcu_trie_ptr *edge, const ovs_be32 *prefix, int mlen)
69d6040e 1804{
13751fd8 1805 struct trie_node *node;
13751fd8
JR
1806 int ofs = 0;
1807
1808 /* Walk the tree. */
f358a2cb 1809 for (; (node = ovsrcu_get_protected(struct trie_node *, edge));
13751fd8
JR
1810 edge = trie_next_edge(node, prefix, ofs)) {
1811 unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen);
1812 ofs += eqbits;
c30cfa6b 1813 if (eqbits < node->n_bits) {
13751fd8
JR
1814 /* Mismatch, new node needs to be inserted above. */
1815 int old_branch = get_bit_at(node->prefix, eqbits);
f358a2cb 1816 struct trie_node *new_parent;
13751fd8 1817
f358a2cb
JR
1818 new_parent = trie_branch_create(prefix, ofs - eqbits, eqbits,
1819 ofs == mlen ? 1 : 0);
1820 /* Copy the node to modify it. */
1821 node = trie_node_rcu_realloc(node);
1822 /* Adjust the new node for its new position in the tree. */
13751fd8 1823 node->prefix <<= eqbits;
c30cfa6b 1824 node->n_bits -= eqbits;
f358a2cb 1825 ovsrcu_set_hidden(&new_parent->edges[old_branch], node);
13751fd8
JR
1826
1827 /* Check if need a new branch for the new rule. */
1828 if (ofs < mlen) {
f358a2cb
JR
1829 ovsrcu_set_hidden(&new_parent->edges[!old_branch],
1830 trie_branch_create(prefix, ofs, mlen - ofs,
1831 1));
13751fd8 1832 }
f358a2cb 1833 ovsrcu_set(edge, new_parent); /* Publish changes. */
13751fd8
JR
1834 return;
1835 }
1836 /* Full match so far. */
1837
1838 if (ofs == mlen) {
1839 /* Full match at the current node, rule needs to be added here. */
1840 node->n_rules++;
1841 return;
1842 }
1843 }
1844 /* Must insert a new tree branch for the new rule. */
f358a2cb 1845 ovsrcu_set(edge, trie_branch_create(prefix, ofs, mlen - ofs, 1));
13751fd8
JR
1846}
1847
1848/* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
1849 * in 'rule'. */
1850static void
1851trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
1852{
69d6040e
JR
1853 trie_remove_prefix(&trie->root,
1854 minimatch_get_prefix(&rule->match, trie->field), mlen);
1855}
1856
1857/* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
1858 * in 'rule'. */
1859static void
f358a2cb 1860trie_remove_prefix(rcu_trie_ptr *root, const ovs_be32 *prefix, int mlen)
69d6040e 1861{
13751fd8 1862 struct trie_node *node;
f358a2cb 1863 rcu_trie_ptr *edges[sizeof(union mf_value) * 8];
13751fd8
JR
1864 int depth = 0, ofs = 0;
1865
1866 /* Walk the tree. */
69d6040e 1867 for (edges[0] = root;
f358a2cb 1868 (node = ovsrcu_get_protected(struct trie_node *, edges[depth]));
13751fd8
JR
1869 edges[++depth] = trie_next_edge(node, prefix, ofs)) {
1870 unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen);
69d6040e 1871
c30cfa6b 1872 if (eqbits < node->n_bits) {
13751fd8
JR
1873 /* Mismatch, nothing to be removed. This should never happen, as
1874 * only rules in the classifier are ever removed. */
1875 break; /* Log a warning. */
1876 }
1877 /* Full match so far. */
1878 ofs += eqbits;
1879
1880 if (ofs == mlen) {
1881 /* Full prefix match at the current node, remove rule here. */
1882 if (!node->n_rules) {
1883 break; /* Log a warning. */
1884 }
1885 node->n_rules--;
1886
1887 /* Check if can prune the tree. */
f358a2cb
JR
1888 while (!node->n_rules) {
1889 struct trie_node *next,
1890 *edge0 = ovsrcu_get_protected(struct trie_node *,
1891 &node->edges[0]),
1892 *edge1 = ovsrcu_get_protected(struct trie_node *,
1893 &node->edges[1]);
1894
1895 if (edge0 && edge1) {
1896 break; /* A branching point, cannot prune. */
1897 }
1898
1899 /* Else have at most one child node, remove this node. */
1900 next = edge0 ? edge0 : edge1;
13751fd8
JR
1901
1902 if (next) {
c30cfa6b 1903 if (node->n_bits + next->n_bits > TRIE_PREFIX_BITS) {
13751fd8
JR
1904 break; /* Cannot combine. */
1905 }
f358a2cb
JR
1906 next = trie_node_rcu_realloc(next); /* Modify. */
1907
13751fd8 1908 /* Combine node with next. */
c30cfa6b
JR
1909 next->prefix = node->prefix | next->prefix >> node->n_bits;
1910 next->n_bits += node->n_bits;
13751fd8 1911 }
13751fd8 1912 /* Update the parent's edge. */
f358a2cb
JR
1913 ovsrcu_set(edges[depth], next); /* Publish changes. */
1914 trie_node_destroy(node);
1915
13751fd8
JR
1916 if (next || !depth) {
1917 /* Branch not pruned or at root, nothing more to do. */
1918 break;
1919 }
f358a2cb
JR
1920 node = ovsrcu_get_protected(struct trie_node *,
1921 edges[--depth]);
13751fd8
JR
1922 }
1923 return;
1924 }
1925 }
1926 /* Cannot go deeper. This should never happen, since only rules
1927 * that actually exist in the classifier are ever removed. */
1928 VLOG_WARN("Trying to remove non-existing rule from a prefix trie.");
1929}