]> git.proxmox.com Git - ovs.git/blame - lib/classifier.c
lib/classifier: Stylistic change.
[ovs.git] / lib / classifier.c
CommitLineData
064af421 1/*
4aacd02d 2 * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
064af421 3 *
a14bc59f
BP
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
064af421 7 *
a14bc59f
BP
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
064af421
BP
15 */
16
17#include <config.h>
18#include "classifier.h"
064af421
BP
19#include <errno.h>
20#include <netinet/in.h>
844dff32 21#include "byte-order.h"
68d1c8c3 22#include "dynamic-string.h"
064af421
BP
23#include "flow.h"
24#include "hash.h"
f2c21402 25#include "cmap.h"
52054c15 26#include "list.h"
07b37e8f 27#include "odp-util.h"
d8ae4d67 28#include "ofp-util.h"
0b4f2078 29#include "ovs-thread.h"
13751fd8 30#include "packets.h"
fe7cfa5c 31#include "pvector.h"
52054c15
JR
32#include "tag.h"
33#include "util.h"
13751fd8
JR
34#include "vlog.h"
35
36VLOG_DEFINE_THIS_MODULE(classifier);
064af421 37
cabd4c43 38struct trie_node;
69d6040e
JR
39struct trie_ctx;
40
41/* Ports trie depends on both ports sharing the same ovs_be32. */
42#define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4)
43BUILD_ASSERT_DECL(TP_PORTS_OFS32 == offsetof(struct flow, tp_dst) / 4);
cabd4c43
JR
44
45/* Prefix trie for a 'field' */
46struct cls_trie {
47 const struct mf_field *field; /* Trie field, or NULL. */
48 struct trie_node *root; /* NULL if none. */
49};
50
627fb667
JR
51enum {
52 CLS_MAX_INDICES = 3 /* Maximum number of lookup indices per subtable. */
53};
54
cabd4c43 55struct cls_classifier {
5a87054c 56 int n_rules; /* Total number of rules. */
cabd4c43
JR
57 uint8_t n_flow_segments;
58 uint8_t flow_segments[CLS_MAX_INDICES]; /* Flow segment boundaries to use
59 * for staged lookup. */
f2c21402 60 struct cmap subtables_map; /* Contains "struct cls_subtable"s. */
fe7cfa5c 61 struct pvector subtables;
f2c21402 62 struct cmap partitions; /* Contains "struct cls_partition"s. */
cabd4c43
JR
63 struct cls_trie tries[CLS_MAX_TRIES]; /* Prefix tries. */
64 unsigned int n_tries;
65};
66
67/* A set of rules that all have the same fields wildcarded. */
68struct cls_subtable {
f2c21402
JR
69 struct cmap_node cmap_node; /* Within struct cls_classifier
70 * 'subtables_map'. */
71 struct cmap rules; /* Contains "struct cls_rule"s. */
cabd4c43
JR
72 int n_rules; /* Number of rules, including duplicates. */
73 unsigned int max_priority; /* Max priority of any rule in the subtable. */
74 unsigned int max_count; /* Count of max_priority rules. */
75 tag_type tag; /* Tag generated from mask for partitioning. */
76 uint8_t n_indices; /* How many indices to use. */
77 uint8_t index_ofs[CLS_MAX_INDICES]; /* u32 flow segment boundaries. */
f2c21402 78 struct cmap indices[CLS_MAX_INDICES]; /* Staged lookup indices. */
cabd4c43 79 unsigned int trie_plen[CLS_MAX_TRIES]; /* Trie prefix length in 'mask'. */
69d6040e
JR
80 int ports_mask_len;
81 struct trie_node *ports_trie; /* NULL if none. */
3016f3e4
JR
82 struct minimask mask; /* Wildcards for fields. */
83 /* 'mask' must be the last field. */
cabd4c43
JR
84};
85
86/* Associates a metadata value (that is, a value of the OpenFlow 1.1+ metadata
87 * field) with tags for the "cls_subtable"s that contain rules that match that
88 * metadata value. */
89struct cls_partition {
f2c21402
JR
90 struct cmap_node cmap_node; /* In struct cls_classifier's 'partitions'
91 * map. */
cabd4c43
JR
92 ovs_be64 metadata; /* metadata value for this partition. */
93 tag_type tags; /* OR of each flow's cls_subtable tag. */
94 struct tag_tracker tracker; /* Tracks the bits in 'tags'. */
95};
96
627fb667
JR
97/* Internal representation of a rule in a "struct cls_subtable". */
98struct cls_match {
99 struct cls_rule *cls_rule;
f2c21402
JR
100 struct cmap_node index_nodes[CLS_MAX_INDICES]; /* Within subtable's
101 * 'indices'. */
102 struct cmap_node cmap_node; /* Within struct cls_subtable 'rules'. */
627fb667
JR
103 unsigned int priority; /* Larger numbers are higher priorities. */
104 struct cls_partition *partition;
105 struct list list; /* List of identical, lower-priority rules. */
3016f3e4
JR
106 struct miniflow flow; /* Matching rule. Mask is in the subtable. */
107 /* 'flow' must be the last field. */
627fb667 108};
cabd4c43 109
627fb667
JR
110static struct cls_match *
111cls_match_alloc(struct cls_rule *rule)
112{
3016f3e4
JR
113 int count = count_1bits(rule->match.flow.map);
114
115 struct cls_match *cls_match
116 = xmalloc(sizeof *cls_match - sizeof cls_match->flow.inline_values
117 + MINIFLOW_VALUES_SIZE(count));
627fb667
JR
118
119 cls_match->cls_rule = rule;
3016f3e4 120 miniflow_clone_inline(&cls_match->flow, &rule->match.flow, count);
627fb667
JR
121 cls_match->priority = rule->priority;
122 rule->cls_match = cls_match;
123
124 return cls_match;
125}
cabd4c43 126
cabd4c43 127static struct cls_subtable *find_subtable(const struct cls_classifier *,
03868246 128 const struct minimask *);
cabd4c43 129static struct cls_subtable *insert_subtable(struct cls_classifier *,
03868246 130 const struct minimask *);
b5d97350 131
cabd4c43 132static void destroy_subtable(struct cls_classifier *, struct cls_subtable *);
b5d97350 133
627fb667
JR
134static struct cls_match *find_match_wc(const struct cls_subtable *,
135 const struct flow *, struct trie_ctx *,
136 unsigned int n_tries,
137 struct flow_wildcards *);
138static struct cls_match *find_equal(struct cls_subtable *,
139 const struct miniflow *, uint32_t hash);
140static struct cls_match *insert_rule(struct cls_classifier *,
141 struct cls_subtable *, struct cls_rule *);
b5d97350 142
b5d97350
BP
143/* Iterates RULE over HEAD and all of the cls_rules on HEAD->list. */
144#define FOR_EACH_RULE_IN_LIST(RULE, HEAD) \
145 for ((RULE) = (HEAD); (RULE) != NULL; (RULE) = next_rule_in_list(RULE))
146#define FOR_EACH_RULE_IN_LIST_SAFE(RULE, NEXT, HEAD) \
147 for ((RULE) = (HEAD); \
148 (RULE) != NULL && ((NEXT) = next_rule_in_list(RULE), true); \
149 (RULE) = (NEXT))
150
627fb667
JR
151static struct cls_match *next_rule_in_list__(struct cls_match *);
152static struct cls_match *next_rule_in_list(struct cls_match *);
13751fd8
JR
153
154static unsigned int minimask_get_prefix_len(const struct minimask *,
155 const struct mf_field *);
cabd4c43 156static void trie_init(struct cls_classifier *, int trie_idx,
13751fd8
JR
157 const struct mf_field *);
158static unsigned int trie_lookup(const struct cls_trie *, const struct flow *,
159 unsigned int *checkbits);
69d6040e
JR
160static unsigned int trie_lookup_value(const struct trie_node *,
161 const ovs_be32 value[],
f0e5aa11 162 unsigned int value_bits,
69d6040e 163 unsigned int *checkbits);
13751fd8
JR
164static void trie_destroy(struct trie_node *);
165static void trie_insert(struct cls_trie *, const struct cls_rule *, int mlen);
69d6040e
JR
166static void trie_insert_prefix(struct trie_node **, const ovs_be32 *prefix,
167 int mlen);
13751fd8 168static void trie_remove(struct cls_trie *, const struct cls_rule *, int mlen);
69d6040e
JR
169static void trie_remove_prefix(struct trie_node **, const ovs_be32 *prefix,
170 int mlen);
13751fd8 171static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t be32ofs,
c30cfa6b 172 unsigned int n_bits);
13751fd8 173static bool mask_prefix_bits_set(const struct flow_wildcards *,
c30cfa6b 174 uint8_t be32ofs, unsigned int n_bits);
3d91d909
JR
175\f
176/* flow/miniflow/minimask/minimatch utilities.
177 * These are only used by the classifier, so place them here to allow
178 * for better optimization. */
179
180static inline uint64_t
181miniflow_get_map_in_range(const struct miniflow *miniflow,
182 uint8_t start, uint8_t end, unsigned int *offset)
183{
184 uint64_t map = miniflow->map;
185 *offset = 0;
186
187 if (start > 0) {
188 uint64_t msk = (UINT64_C(1) << start) - 1; /* 'start' LSBs set */
189 *offset = count_1bits(map & msk);
190 map &= ~msk;
191 }
192 if (end < FLOW_U32S) {
193 uint64_t msk = (UINT64_C(1) << end) - 1; /* 'end' LSBs set */
194 map &= msk;
195 }
196 return map;
197}
198
199/* Returns a hash value for the bits of 'flow' where there are 1-bits in
200 * 'mask', given 'basis'.
201 *
202 * The hash values returned by this function are the same as those returned by
203 * miniflow_hash_in_minimask(), only the form of the arguments differ. */
204static inline uint32_t
205flow_hash_in_minimask(const struct flow *flow, const struct minimask *mask,
206 uint32_t basis)
207{
27bbe15d 208 const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks);
3d91d909 209 const uint32_t *flow_u32 = (const uint32_t *)flow;
27bbe15d 210 const uint32_t *p = mask_values;
3d91d909
JR
211 uint32_t hash;
212 uint64_t map;
213
214 hash = basis;
215 for (map = mask->masks.map; map; map = zero_rightmost_1bit(map)) {
33c6a1b9 216 hash = hash_add(hash, flow_u32[raw_ctz(map)] & *p++);
3d91d909
JR
217 }
218
33c6a1b9 219 return hash_finish(hash, (p - mask_values) * 4);
3d91d909
JR
220}
221
222/* Returns a hash value for the bits of 'flow' where there are 1-bits in
223 * 'mask', given 'basis'.
224 *
225 * The hash values returned by this function are the same as those returned by
226 * flow_hash_in_minimask(), only the form of the arguments differ. */
227static inline uint32_t
228miniflow_hash_in_minimask(const struct miniflow *flow,
229 const struct minimask *mask, uint32_t basis)
230{
27bbe15d
JR
231 const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks);
232 const uint32_t *p = mask_values;
3d91d909
JR
233 uint32_t hash = basis;
234 uint32_t flow_u32;
235
236 MINIFLOW_FOR_EACH_IN_MAP(flow_u32, flow, mask->masks.map) {
33c6a1b9 237 hash = hash_add(hash, flow_u32 & *p++);
3d91d909
JR
238 }
239
33c6a1b9 240 return hash_finish(hash, (p - mask_values) * 4);
3d91d909
JR
241}
242
243/* Returns a hash value for the bits of range [start, end) in 'flow',
244 * where there are 1-bits in 'mask', given 'hash'.
245 *
246 * The hash values returned by this function are the same as those returned by
247 * minimatch_hash_range(), only the form of the arguments differ. */
248static inline uint32_t
249flow_hash_in_minimask_range(const struct flow *flow,
250 const struct minimask *mask,
251 uint8_t start, uint8_t end, uint32_t *basis)
252{
27bbe15d 253 const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks);
3d91d909
JR
254 const uint32_t *flow_u32 = (const uint32_t *)flow;
255 unsigned int offset;
256 uint64_t map = miniflow_get_map_in_range(&mask->masks, start, end,
257 &offset);
27bbe15d 258 const uint32_t *p = mask_values + offset;
3d91d909
JR
259 uint32_t hash = *basis;
260
261 for (; map; map = zero_rightmost_1bit(map)) {
33c6a1b9 262 hash = hash_add(hash, flow_u32[raw_ctz(map)] & *p++);
3d91d909
JR
263 }
264
265 *basis = hash; /* Allow continuation from the unfinished value. */
33c6a1b9 266 return hash_finish(hash, (p - mask_values) * 4);
3d91d909
JR
267}
268
269/* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask. */
270static inline void
271flow_wildcards_fold_minimask(struct flow_wildcards *wc,
272 const struct minimask *mask)
273{
274 flow_union_with_miniflow(&wc->masks, &mask->masks);
275}
276
277/* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask
278 * in range [start, end). */
279static inline void
280flow_wildcards_fold_minimask_range(struct flow_wildcards *wc,
281 const struct minimask *mask,
282 uint8_t start, uint8_t end)
283{
284 uint32_t *dst_u32 = (uint32_t *)&wc->masks;
285 unsigned int offset;
286 uint64_t map = miniflow_get_map_in_range(&mask->masks, start, end,
287 &offset);
27bbe15d 288 const uint32_t *p = miniflow_get_u32_values(&mask->masks) + offset;
3d91d909
JR
289
290 for (; map; map = zero_rightmost_1bit(map)) {
291 dst_u32[raw_ctz(map)] |= *p++;
292 }
293}
294
295/* Returns a hash value for 'flow', given 'basis'. */
296static inline uint32_t
297miniflow_hash(const struct miniflow *flow, uint32_t basis)
298{
27bbe15d
JR
299 const uint32_t *values = miniflow_get_u32_values(flow);
300 const uint32_t *p = values;
3d91d909
JR
301 uint32_t hash = basis;
302 uint64_t hash_map = 0;
303 uint64_t map;
304
305 for (map = flow->map; map; map = zero_rightmost_1bit(map)) {
306 if (*p) {
33c6a1b9 307 hash = hash_add(hash, *p);
3d91d909
JR
308 hash_map |= rightmost_1bit(map);
309 }
310 p++;
311 }
33c6a1b9
JR
312 hash = hash_add(hash, hash_map);
313 hash = hash_add(hash, hash_map >> 32);
3d91d909 314
33c6a1b9 315 return hash_finish(hash, p - values);
3d91d909
JR
316}
317
318/* Returns a hash value for 'mask', given 'basis'. */
319static inline uint32_t
320minimask_hash(const struct minimask *mask, uint32_t basis)
321{
322 return miniflow_hash(&mask->masks, basis);
323}
324
325/* Returns a hash value for 'match', given 'basis'. */
326static inline uint32_t
327minimatch_hash(const struct minimatch *match, uint32_t basis)
328{
329 return miniflow_hash(&match->flow, minimask_hash(&match->mask, basis));
330}
331
332/* Returns a hash value for the bits of range [start, end) in 'minimatch',
333 * given 'basis'.
334 *
335 * The hash values returned by this function are the same as those returned by
336 * flow_hash_in_minimask_range(), only the form of the arguments differ. */
337static inline uint32_t
338minimatch_hash_range(const struct minimatch *match, uint8_t start, uint8_t end,
339 uint32_t *basis)
340{
341 unsigned int offset;
342 const uint32_t *p, *q;
343 uint32_t hash = *basis;
344 int n, i;
345
346 n = count_1bits(miniflow_get_map_in_range(&match->mask.masks, start, end,
347 &offset));
27bbe15d
JR
348 q = miniflow_get_u32_values(&match->mask.masks) + offset;
349 p = miniflow_get_u32_values(&match->flow) + offset;
3d91d909
JR
350
351 for (i = 0; i < n; i++) {
33c6a1b9 352 hash = hash_add(hash, p[i] & q[i]);
3d91d909
JR
353 }
354 *basis = hash; /* Allow continuation from the unfinished value. */
33c6a1b9 355 return hash_finish(hash, (offset + n) * 4);
3d91d909
JR
356}
357
81a76618
BP
358\f
359/* cls_rule. */
b5d97350 360
81a76618 361/* Initializes 'rule' to match packets specified by 'match' at the given
5cb7a798
BP
362 * 'priority'. 'match' must satisfy the invariant described in the comment at
363 * the definition of struct match.
66642cb4 364 *
48d28ac1
BP
365 * The caller must eventually destroy 'rule' with cls_rule_destroy().
366 *
81a76618
BP
367 * (OpenFlow uses priorities between 0 and UINT16_MAX, inclusive, but
368 * internally Open vSwitch supports a wider range.) */
47284b1f 369void
81a76618
BP
370cls_rule_init(struct cls_rule *rule,
371 const struct match *match, unsigned int priority)
47284b1f 372{
5cb7a798
BP
373 minimatch_init(&rule->match, match);
374 rule->priority = priority;
627fb667 375 rule->cls_match = NULL;
5cb7a798
BP
376}
377
378/* Same as cls_rule_init() for initialization from a "struct minimatch". */
379void
380cls_rule_init_from_minimatch(struct cls_rule *rule,
381 const struct minimatch *match,
382 unsigned int priority)
383{
384 minimatch_clone(&rule->match, match);
81a76618 385 rule->priority = priority;
627fb667 386 rule->cls_match = NULL;
685a51a5
JP
387}
388
48d28ac1
BP
389/* Initializes 'dst' as a copy of 'src'.
390 *
b2c1f00b 391 * The caller must eventually destroy 'dst' with cls_rule_destroy(). */
48d28ac1
BP
392void
393cls_rule_clone(struct cls_rule *dst, const struct cls_rule *src)
394{
5cb7a798
BP
395 minimatch_clone(&dst->match, &src->match);
396 dst->priority = src->priority;
627fb667 397 dst->cls_match = NULL;
48d28ac1
BP
398}
399
b2c1f00b
BP
400/* Initializes 'dst' with the data in 'src', destroying 'src'.
401 *
402 * The caller must eventually destroy 'dst' with cls_rule_destroy(). */
403void
404cls_rule_move(struct cls_rule *dst, struct cls_rule *src)
405{
406 minimatch_move(&dst->match, &src->match);
407 dst->priority = src->priority;
627fb667 408 dst->cls_match = NULL;
b2c1f00b
BP
409}
410
48d28ac1
BP
411/* Frees memory referenced by 'rule'. Doesn't free 'rule' itself (it's
412 * normally embedded into a larger structure).
413 *
414 * ('rule' must not currently be in a classifier.) */
415void
5cb7a798 416cls_rule_destroy(struct cls_rule *rule)
48d28ac1 417{
627fb667 418 ovs_assert(!rule->cls_match);
5cb7a798 419 minimatch_destroy(&rule->match);
48d28ac1
BP
420}
421
81a76618
BP
422/* Returns true if 'a' and 'b' match the same packets at the same priority,
423 * false if they differ in some way. */
193eb874
BP
424bool
425cls_rule_equal(const struct cls_rule *a, const struct cls_rule *b)
426{
5cb7a798 427 return a->priority == b->priority && minimatch_equal(&a->match, &b->match);
193eb874
BP
428}
429
81a76618 430/* Returns a hash value for 'rule', folding in 'basis'. */
57452fdc
BP
431uint32_t
432cls_rule_hash(const struct cls_rule *rule, uint32_t basis)
433{
5cb7a798 434 return minimatch_hash(&rule->match, hash_int(rule->priority, basis));
73f33563
BP
435}
436
81a76618 437/* Appends a string describing 'rule' to 's'. */
07b37e8f
BP
438void
439cls_rule_format(const struct cls_rule *rule, struct ds *s)
440{
5cb7a798 441 minimatch_format(&rule->match, s, rule->priority);
064af421 442}
3ca1de08
BP
443
444/* Returns true if 'rule' matches every packet, false otherwise. */
445bool
446cls_rule_is_catchall(const struct cls_rule *rule)
447{
5cb7a798 448 return minimask_is_catchall(&rule->match.mask);
3ca1de08 449}
064af421
BP
450\f
451/* Initializes 'cls' as a classifier that initially contains no classification
452 * rules. */
453void
cabd4c43 454classifier_init(struct classifier *cls_, const uint8_t *flow_segments)
064af421 455{
cabd4c43
JR
456 struct cls_classifier *cls = xmalloc(sizeof *cls);
457
458 fat_rwlock_init(&cls_->rwlock);
459
460 cls_->cls = cls;
461
064af421 462 cls->n_rules = 0;
f2c21402 463 cmap_init(&cls->subtables_map);
fe7cfa5c 464 pvector_init(&cls->subtables);
f2c21402 465 cmap_init(&cls->partitions);
476f36e8
JR
466 cls->n_flow_segments = 0;
467 if (flow_segments) {
468 while (cls->n_flow_segments < CLS_MAX_INDICES
469 && *flow_segments < FLOW_U32S) {
470 cls->flow_segments[cls->n_flow_segments++] = *flow_segments++;
471 }
472 }
13751fd8 473 cls->n_tries = 0;
064af421
BP
474}
475
476/* Destroys 'cls'. Rules within 'cls', if any, are not freed; this is the
477 * caller's responsibility. */
478void
cabd4c43 479classifier_destroy(struct classifier *cls_)
064af421 480{
cabd4c43
JR
481 if (cls_) {
482 struct cls_classifier *cls = cls_->cls;
42181540 483 struct cls_partition *partition, *next_partition;
03868246 484 struct cls_subtable *subtable, *next_subtable;
13751fd8
JR
485 int i;
486
cabd4c43
JR
487 fat_rwlock_destroy(&cls_->rwlock);
488 if (!cls) {
489 return;
490 }
491
13751fd8
JR
492 for (i = 0; i < cls->n_tries; i++) {
493 trie_destroy(cls->tries[i].root);
494 }
064af421 495
f2c21402 496 CMAP_FOR_EACH_SAFE (subtable, next_subtable, cmap_node,
5a87054c 497 &cls->subtables_map) {
03868246 498 destroy_subtable(cls, subtable);
064af421 499 }
f2c21402 500 cmap_destroy(&cls->subtables_map);
c906cedf 501
f2c21402 502 CMAP_FOR_EACH_SAFE (partition, next_partition, cmap_node,
c906cedf 503 &cls->partitions) {
f2c21402 504 ovsrcu_postpone(free, partition);
c906cedf 505 }
f2c21402 506 cmap_destroy(&cls->partitions);
cabd4c43 507
fe7cfa5c 508 pvector_destroy(&cls->subtables);
cabd4c43 509 free(cls);
064af421
BP
510 }
511}
512
13751fd8
JR
513/* We use uint64_t as a set for the fields below. */
514BUILD_ASSERT_DECL(MFF_N_IDS <= 64);
515
516/* Set the fields for which prefix lookup should be performed. */
517void
cabd4c43 518classifier_set_prefix_fields(struct classifier *cls_,
13751fd8
JR
519 const enum mf_field_id *trie_fields,
520 unsigned int n_fields)
521{
cabd4c43 522 struct cls_classifier *cls = cls_->cls;
13751fd8
JR
523 uint64_t fields = 0;
524 int i, trie;
525
526 for (i = 0, trie = 0; i < n_fields && trie < CLS_MAX_TRIES; i++) {
527 const struct mf_field *field = mf_from_id(trie_fields[i]);
528 if (field->flow_be32ofs < 0 || field->n_bits % 32) {
529 /* Incompatible field. This is the only place where we
530 * enforce these requirements, but the rest of the trie code
531 * depends on the flow_be32ofs to be non-negative and the
532 * field length to be a multiple of 32 bits. */
533 continue;
534 }
535
536 if (fields & (UINT64_C(1) << trie_fields[i])) {
537 /* Duplicate field, there is no need to build more than
538 * one index for any one field. */
539 continue;
540 }
541 fields |= UINT64_C(1) << trie_fields[i];
542
543 if (trie >= cls->n_tries || field != cls->tries[trie].field) {
544 trie_init(cls, trie, field);
545 }
546 trie++;
547 }
548
549 /* Destroy the rest. */
550 for (i = trie; i < cls->n_tries; i++) {
551 trie_init(cls, i, NULL);
552 }
553 cls->n_tries = trie;
554}
555
556static void
cabd4c43 557trie_init(struct cls_classifier *cls, int trie_idx,
13751fd8
JR
558 const struct mf_field *field)
559{
560 struct cls_trie *trie = &cls->tries[trie_idx];
561 struct cls_subtable *subtable;
562
563 if (trie_idx < cls->n_tries) {
564 trie_destroy(trie->root);
565 }
566 trie->root = NULL;
567 trie->field = field;
568
569 /* Add existing rules to the trie. */
f2c21402 570 CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
13751fd8
JR
571 unsigned int plen;
572
573 plen = field ? minimask_get_prefix_len(&subtable->mask, field) : 0;
574 /* Initialize subtable's prefix length on this field. */
575 subtable->trie_plen[trie_idx] = plen;
576
577 if (plen) {
627fb667 578 struct cls_match *head;
13751fd8 579
f2c21402 580 CMAP_FOR_EACH (head, cmap_node, &subtable->rules) {
627fb667 581 struct cls_match *match;
13751fd8 582
627fb667
JR
583 FOR_EACH_RULE_IN_LIST (match, head) {
584 trie_insert(trie, match->cls_rule, plen);
13751fd8
JR
585 }
586 }
587 }
588 }
589}
590
5f0476ce
JR
591/* Returns true if 'cls' contains no classification rules, false otherwise.
592 * Checking the cmap requires no locking. */
064af421
BP
593bool
594classifier_is_empty(const struct classifier *cls)
595{
5f0476ce 596 return cmap_is_empty(&cls->cls->subtables_map);
064af421
BP
597}
598
dbda2960 599/* Returns the number of rules in 'cls'. */
064af421
BP
600int
601classifier_count(const struct classifier *cls)
602{
cabd4c43 603 return cls->cls->n_rules;
064af421
BP
604}
605
c906cedf
BP
606static uint32_t
607hash_metadata(ovs_be64 metadata_)
608{
609 uint64_t metadata = (OVS_FORCE uint64_t) metadata_;
965607c8 610 return hash_uint64(metadata);
c906cedf
BP
611}
612
613static struct cls_partition *
cabd4c43
JR
614find_partition(const struct cls_classifier *cls, ovs_be64 metadata,
615 uint32_t hash)
c906cedf
BP
616{
617 struct cls_partition *partition;
618
f2c21402 619 CMAP_FOR_EACH_WITH_HASH (partition, cmap_node, hash, &cls->partitions) {
c906cedf
BP
620 if (partition->metadata == metadata) {
621 return partition;
622 }
623 }
624
625 return NULL;
626}
627
628static struct cls_partition *
cabd4c43 629create_partition(struct cls_classifier *cls, struct cls_subtable *subtable,
c906cedf
BP
630 ovs_be64 metadata)
631{
632 uint32_t hash = hash_metadata(metadata);
633 struct cls_partition *partition = find_partition(cls, metadata, hash);
634 if (!partition) {
635 partition = xmalloc(sizeof *partition);
636 partition->metadata = metadata;
637 partition->tags = 0;
183126a1 638 tag_tracker_init(&partition->tracker);
f2c21402 639 cmap_insert(&cls->partitions, &partition->cmap_node, hash);
c906cedf 640 }
03868246 641 tag_tracker_add(&partition->tracker, &partition->tags, subtable->tag);
c906cedf
BP
642 return partition;
643}
644
69d6040e
JR
645static inline ovs_be32 minimatch_get_ports(const struct minimatch *match)
646{
647 /* Could optimize to use the same map if needed for fast path. */
648 return MINIFLOW_GET_BE32(&match->flow, tp_src)
649 & MINIFLOW_GET_BE32(&match->mask.masks, tp_src);
650}
651
b5d97350
BP
652/* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller
653 * must not modify or free it.
064af421
BP
654 *
655 * If 'cls' already contains an identical rule (including wildcards, values of
656 * fixed fields, and priority), replaces the old rule by 'rule' and returns the
657 * rule that was replaced. The caller takes ownership of the returned rule and
48d28ac1
BP
658 * is thus responsible for destroying it with cls_rule_destroy(), freeing the
659 * memory block in which it resides, etc., as necessary.
064af421
BP
660 *
661 * Returns NULL if 'cls' does not contain a rule with an identical key, after
662 * inserting the new rule. In this case, no rules are displaced by the new
663 * rule, even rules that cannot have any effect because the new rule matches a
664 * superset of their flows and has higher priority. */
665struct cls_rule *
cabd4c43 666classifier_replace(struct classifier *cls_, struct cls_rule *rule)
064af421 667{
cabd4c43 668 struct cls_classifier *cls = cls_->cls;
627fb667 669 struct cls_match *old_rule;
03868246 670 struct cls_subtable *subtable;
b5d97350 671
03868246
JR
672 subtable = find_subtable(cls, &rule->match.mask);
673 if (!subtable) {
674 subtable = insert_subtable(cls, &rule->match.mask);
b5d97350
BP
675 }
676
03868246 677 old_rule = insert_rule(cls, subtable, rule);
b5d97350 678 if (!old_rule) {
13751fd8
JR
679 int i;
680
627fb667 681 rule->cls_match->partition = NULL;
c906cedf
BP
682 if (minimask_get_metadata_mask(&rule->match.mask) == OVS_BE64_MAX) {
683 ovs_be64 metadata = miniflow_get_metadata(&rule->match.flow);
627fb667
JR
684 rule->cls_match->partition = create_partition(cls, subtable,
685 metadata);
c906cedf
BP
686 }
687
064af421 688 cls->n_rules++;
13751fd8
JR
689
690 for (i = 0; i < cls->n_tries; i++) {
691 if (subtable->trie_plen[i]) {
692 trie_insert(&cls->tries[i], rule, subtable->trie_plen[i]);
693 }
694 }
69d6040e
JR
695
696 /* Ports trie. */
697 if (subtable->ports_mask_len) {
698 /* We mask the value to be inserted to always have the wildcarded
699 * bits in known (zero) state, so we can include them in comparison
700 * and they will always match (== their original value does not
701 * matter). */
702 ovs_be32 masked_ports = minimatch_get_ports(&rule->match);
703
704 trie_insert_prefix(&subtable->ports_trie, &masked_ports,
705 subtable->ports_mask_len);
706 }
707
627fb667 708 return NULL;
c906cedf 709 } else {
627fb667
JR
710 struct cls_rule *old_cls_rule = old_rule->cls_rule;
711
712 rule->cls_match->partition = old_rule->partition;
713 old_cls_rule->cls_match = NULL;
f2c21402
JR
714
715 /* 'old_rule' contains a cmap_node, which may not be freed
716 * immediately. */
717 ovsrcu_postpone(free, old_rule);
718
627fb667 719 return old_cls_rule;
064af421 720 }
064af421
BP
721}
722
08944c1d
BP
723/* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller
724 * must not modify or free it.
725 *
726 * 'cls' must not contain an identical rule (including wildcards, values of
727 * fixed fields, and priority). Use classifier_find_rule_exactly() to find
728 * such a rule. */
729void
730classifier_insert(struct classifier *cls, struct cls_rule *rule)
731{
732 struct cls_rule *displaced_rule = classifier_replace(cls, rule);
cb22974d 733 ovs_assert(!displaced_rule);
08944c1d
BP
734}
735
48d28ac1
BP
736/* Removes 'rule' from 'cls'. It is the caller's responsibility to destroy
737 * 'rule' with cls_rule_destroy(), freeing the memory block in which 'rule'
738 * resides, etc., as necessary. */
064af421 739void
cabd4c43 740classifier_remove(struct classifier *cls_, struct cls_rule *rule)
064af421 741{
cabd4c43 742 struct cls_classifier *cls = cls_->cls;
c906cedf 743 struct cls_partition *partition;
627fb667
JR
744 struct cls_match *cls_match = rule->cls_match;
745 struct cls_match *head;
03868246 746 struct cls_subtable *subtable;
476f36e8 747 int i;
f2c21402
JR
748 uint32_t basis = 0, hash, ihash[CLS_MAX_INDICES];
749 uint8_t prev_be32ofs = 0;
064af421 750
627fb667
JR
751 ovs_assert(cls_match);
752
03868246 753 subtable = find_subtable(cls, &rule->match.mask);
627fb667
JR
754 ovs_assert(subtable);
755
69d6040e
JR
756 if (subtable->ports_mask_len) {
757 ovs_be32 masked_ports = minimatch_get_ports(&rule->match);
758
759 trie_remove_prefix(&subtable->ports_trie,
760 &masked_ports, subtable->ports_mask_len);
761 }
13751fd8
JR
762 for (i = 0; i < cls->n_tries; i++) {
763 if (subtable->trie_plen[i]) {
764 trie_remove(&cls->tries[i], rule, subtable->trie_plen[i]);
765 }
766 }
767
476f36e8
JR
768 /* Remove rule node from indices. */
769 for (i = 0; i < subtable->n_indices; i++) {
f2c21402
JR
770 ihash[i] = minimatch_hash_range(&rule->match, prev_be32ofs,
771 subtable->index_ofs[i], &basis);
772 cmap_remove(&subtable->indices[i], &cls_match->index_nodes[i],
773 ihash[i]);
774 prev_be32ofs = subtable->index_ofs[i];
476f36e8 775 }
f2c21402 776 hash = minimatch_hash_range(&rule->match, prev_be32ofs, FLOW_U32S, &basis);
476f36e8 777
f2c21402 778 head = find_equal(subtable, &rule->match.flow, hash);
627fb667
JR
779 if (head != cls_match) {
780 list_remove(&cls_match->list);
781 } else if (list_is_empty(&cls_match->list)) {
f2c21402 782 cmap_remove(&subtable->rules, &cls_match->cmap_node, hash);
b5d97350 783 } else {
627fb667
JR
784 struct cls_match *next = CONTAINER_OF(cls_match->list.next,
785 struct cls_match, list);
064af421 786
627fb667 787 list_remove(&cls_match->list);
f2c21402
JR
788 cmap_replace(&subtable->rules, &cls_match->cmap_node,
789 &next->cmap_node, hash);
b5d97350 790 }
064af421 791
627fb667 792 partition = cls_match->partition;
183126a1
BP
793 if (partition) {
794 tag_tracker_subtract(&partition->tracker, &partition->tags,
03868246 795 subtable->tag);
183126a1 796 if (!partition->tags) {
f2c21402
JR
797 cmap_remove(&cls->partitions, &partition->cmap_node,
798 hash_metadata(partition->metadata));
799 ovsrcu_postpone(free, partition);
183126a1 800 }
c906cedf
BP
801 }
802
03868246
JR
803 if (--subtable->n_rules == 0) {
804 destroy_subtable(cls, subtable);
fe7cfa5c
JR
805 } else if (subtable->max_priority == cls_match->priority
806 && --subtable->max_count == 0) {
807 /* Find the new 'max_priority' and 'max_count'. */
808 struct cls_match *head;
809 unsigned int max_priority = 0;
810
f2c21402 811 CMAP_FOR_EACH (head, cmap_node, &subtable->rules) {
fe7cfa5c
JR
812 if (head->priority > max_priority) {
813 max_priority = head->priority;
814 subtable->max_count = 1;
815 } else if (head->priority == max_priority) {
816 ++subtable->max_count;
817 }
818 }
819 subtable->max_priority = max_priority;
820 pvector_change_priority(&cls->subtables, subtable, max_priority);
4d935a6b 821 }
13751fd8 822
b5d97350 823 cls->n_rules--;
627fb667
JR
824
825 rule->cls_match = NULL;
f2c21402 826 ovsrcu_postpone(free, cls_match);
064af421
BP
827}
828
13751fd8
JR
829/* Prefix tree context. Valid when 'lookup_done' is true. Can skip all
830 * subtables which have more than 'match_plen' bits in their corresponding
831 * field at offset 'be32ofs'. If skipped, 'maskbits' prefix bits should be
832 * unwildcarded to quarantee datapath flow matches only packets it should. */
833struct trie_ctx {
834 const struct cls_trie *trie;
835 bool lookup_done; /* Status of the lookup. */
836 uint8_t be32ofs; /* U32 offset of the field in question. */
837 unsigned int match_plen; /* Longest prefix than could possibly match. */
838 unsigned int maskbits; /* Prefix length needed to avoid false matches. */
839};
840
841static void
842trie_ctx_init(struct trie_ctx *ctx, const struct cls_trie *trie)
843{
844 ctx->trie = trie;
845 ctx->be32ofs = trie->field->flow_be32ofs;
846 ctx->lookup_done = false;
847}
848
48c3de13
BP
849/* Finds and returns the highest-priority rule in 'cls' that matches 'flow'.
850 * Returns a null pointer if no rules in 'cls' match 'flow'. If multiple rules
74f74083
EJ
851 * of equal priority match 'flow', returns one arbitrarily.
852 *
853 * If a rule is found and 'wc' is non-null, bitwise-OR's 'wc' with the
854 * set of bits that were significant in the lookup. At some point
855 * earlier, 'wc' should have been initialized (e.g., by
856 * flow_wildcards_init_catchall()). */
48c3de13 857struct cls_rule *
cabd4c43 858classifier_lookup(const struct classifier *cls_, const struct flow *flow,
74f74083 859 struct flow_wildcards *wc)
48c3de13 860{
cabd4c43 861 struct cls_classifier *cls = cls_->cls;
c906cedf 862 const struct cls_partition *partition;
c906cedf 863 tag_type tags;
ec988646 864 int64_t best_priority = -1;
fe7cfa5c
JR
865 const struct cls_match *best;
866 struct trie_ctx trie_ctx[CLS_MAX_TRIES];
867 struct cls_subtable *subtable;
c906cedf 868
03868246
JR
869 /* Determine 'tags' such that, if 'subtable->tag' doesn't intersect them,
870 * then 'flow' cannot possibly match in 'subtable':
c906cedf
BP
871 *
872 * - If flow->metadata maps to a given 'partition', then we can use
873 * 'tags' for 'partition->tags'.
874 *
875 * - If flow->metadata has no partition, then no rule in 'cls' has an
876 * exact-match for flow->metadata. That means that we don't need to
03868246 877 * search any subtable that includes flow->metadata in its mask.
c906cedf 878 *
03868246 879 * In either case, we always need to search any cls_subtables that do not
c906cedf 880 * include flow->metadata in its mask. One way to do that would be to
03868246
JR
881 * check the "cls_subtable"s explicitly for that, but that would require an
882 * extra branch per subtable. Instead, we mark such a cls_subtable's
883 * 'tags' as TAG_ALL and make sure that 'tags' is never empty. This means
884 * that 'tags' always intersects such a cls_subtable's 'tags', so we don't
885 * need a special case.
c906cedf 886 */
f2c21402 887 partition = (cmap_is_empty(&cls->partitions)
c906cedf
BP
888 ? NULL
889 : find_partition(cls, flow->metadata,
890 hash_metadata(flow->metadata)));
891 tags = partition ? partition->tags : TAG_ARBITRARY;
48c3de13 892
13751fd8 893 /* Initialize trie contexts for match_find_wc(). */
fe7cfa5c 894 for (int i = 0; i < cls->n_tries; i++) {
13751fd8
JR
895 trie_ctx_init(&trie_ctx[i], &cls->tries[i]);
896 }
ec988646 897
b5d97350 898 best = NULL;
fe7cfa5c
JR
899 PVECTOR_FOR_EACH_PRIORITY(subtable, best_priority, 2,
900 sizeof(struct cls_subtable), &cls->subtables) {
627fb667 901 struct cls_match *rule;
c906cedf 902
fe7cfa5c 903 if (!tag_intersects(tags, subtable->tag)) {
c906cedf
BP
904 continue;
905 }
74f74083 906
fe7cfa5c 907 rule = find_match_wc(subtable, flow, trie_ctx, cls->n_tries, wc);
ec988646
JR
908 if (rule && (int64_t)rule->priority > best_priority) {
909 best_priority = (int64_t)rule->priority;
1f3c5efc 910 best = rule;
b5d97350 911 }
48c3de13 912 }
13751fd8 913
627fb667 914 return best ? best->cls_rule : NULL;
48c3de13
BP
915}
916
2abf78ff 917/* Returns true if 'target' satisifies 'match', that is, if each bit for which
3016f3e4
JR
918 * 'match' specifies a particular value has the correct value in 'target'.
919 *
920 * 'flow' and 'mask' have the same mask! */
2abf78ff 921static bool
3016f3e4
JR
922miniflow_and_mask_matches_miniflow(const struct miniflow *flow,
923 const struct minimask *mask,
924 const struct miniflow *target)
2abf78ff 925{
3016f3e4
JR
926 const uint32_t *flowp = miniflow_get_u32_values(flow);
927 const uint32_t *maskp = miniflow_get_u32_values(&mask->masks);
2abf78ff
JR
928 uint32_t target_u32;
929
3016f3e4 930 MINIFLOW_FOR_EACH_IN_MAP(target_u32, target, mask->masks.map) {
2abf78ff
JR
931 if ((*flowp++ ^ target_u32) & *maskp++) {
932 return false;
933 }
934 }
935
936 return true;
937}
938
627fb667 939static inline struct cls_match *
2abf78ff
JR
940find_match_miniflow(const struct cls_subtable *subtable,
941 const struct miniflow *flow,
942 uint32_t hash)
943{
627fb667 944 struct cls_match *rule;
2abf78ff 945
f2c21402 946 CMAP_FOR_EACH_WITH_HASH (rule, cmap_node, hash, &subtable->rules) {
3016f3e4
JR
947 if (miniflow_and_mask_matches_miniflow(&rule->flow, &subtable->mask,
948 flow)) {
2abf78ff
JR
949 return rule;
950 }
951 }
952
953 return NULL;
954}
955
b7648634
EJ
956/* For each miniflow in 'flows' performs a classifier lookup writing the result
957 * into the corresponding slot in 'rules'. If a particular entry in 'flows' is
958 * NULL it is skipped.
2abf78ff 959 *
b7648634
EJ
960 * This function is optimized for use in the userspace datapath and therefore
961 * does not implement a lot of features available in the standard
962 * classifier_lookup() function. Specifically, it does not implement
963 * priorities, instead returning any rule which matches the flow. */
964void
965classifier_lookup_miniflow_batch(const struct classifier *cls_,
966 const struct miniflow **flows,
967 struct cls_rule **rules, size_t len)
2abf78ff 968{
cabd4c43 969 struct cls_classifier *cls = cls_->cls;
2abf78ff 970 struct cls_subtable *subtable;
b7648634 971 size_t i, begin = 0;
2abf78ff 972
b7648634 973 memset(rules, 0, len * sizeof *rules);
fe7cfa5c 974 PVECTOR_FOR_EACH (subtable, &cls->subtables) {
b7648634
EJ
975 for (i = begin; i < len; i++) {
976 struct cls_match *match;
977 uint32_t hash;
2abf78ff 978
b7648634
EJ
979 if (OVS_UNLIKELY(rules[i] || !flows[i])) {
980 continue;
981 }
982
983 hash = miniflow_hash_in_minimask(flows[i], &subtable->mask, 0);
984 match = find_match_miniflow(subtable, flows[i], hash);
985 if (OVS_UNLIKELY(match)) {
986 rules[i] = match->cls_rule;
987 }
2abf78ff 988 }
2abf78ff 989
b7648634
EJ
990 while (begin < len && (rules[begin] || !flows[begin])) {
991 begin++;
992 }
993 if (begin >= len) {
994 break;
995 }
996 }
2abf78ff
JR
997}
998
b5d97350
BP
999/* Finds and returns a rule in 'cls' with exactly the same priority and
1000 * matching criteria as 'target'. Returns a null pointer if 'cls' doesn't
c084ce1d 1001 * contain an exact match. */
064af421 1002struct cls_rule *
cabd4c43 1003classifier_find_rule_exactly(const struct classifier *cls_,
76ecc721 1004 const struct cls_rule *target)
064af421 1005{
cabd4c43 1006 struct cls_classifier *cls = cls_->cls;
627fb667 1007 struct cls_match *head, *rule;
03868246 1008 struct cls_subtable *subtable;
064af421 1009
03868246
JR
1010 subtable = find_subtable(cls, &target->match.mask);
1011 if (!subtable) {
b5d97350 1012 return NULL;
064af421
BP
1013 }
1014
4d935a6b 1015 /* Skip if there is no hope. */
03868246 1016 if (target->priority > subtable->max_priority) {
4d935a6b
JR
1017 return NULL;
1018 }
1019
03868246 1020 head = find_equal(subtable, &target->match.flow,
5cb7a798
BP
1021 miniflow_hash_in_minimask(&target->match.flow,
1022 &target->match.mask, 0));
b5d97350
BP
1023 FOR_EACH_RULE_IN_LIST (rule, head) {
1024 if (target->priority >= rule->priority) {
627fb667 1025 return target->priority == rule->priority ? rule->cls_rule : NULL;
064af421
BP
1026 }
1027 }
1028 return NULL;
1029}
1030
81a76618
BP
1031/* Finds and returns a rule in 'cls' with priority 'priority' and exactly the
1032 * same matching criteria as 'target'. Returns a null pointer if 'cls' doesn't
1033 * contain an exact match. */
1034struct cls_rule *
1035classifier_find_match_exactly(const struct classifier *cls,
1036 const struct match *target,
1037 unsigned int priority)
1038{
1039 struct cls_rule *retval;
1040 struct cls_rule cr;
1041
1042 cls_rule_init(&cr, target, priority);
1043 retval = classifier_find_rule_exactly(cls, &cr);
48d28ac1 1044 cls_rule_destroy(&cr);
81a76618
BP
1045
1046 return retval;
1047}
1048
faa50f40
BP
1049/* Checks if 'target' would overlap any other rule in 'cls'. Two rules are
1050 * considered to overlap if both rules have the same priority and a packet
1051 * could match both. */
49bdc010 1052bool
cabd4c43 1053classifier_rule_overlaps(const struct classifier *cls_,
faa50f40 1054 const struct cls_rule *target)
49bdc010 1055{
cabd4c43 1056 struct cls_classifier *cls = cls_->cls;
03868246 1057 struct cls_subtable *subtable;
fe7cfa5c 1058 int64_t stop_at_priority = (int64_t)target->priority - 1;
49bdc010 1059
03868246 1060 /* Iterate subtables in the descending max priority order. */
fe7cfa5c
JR
1061 PVECTOR_FOR_EACH_PRIORITY (subtable, stop_at_priority, 2,
1062 sizeof(struct cls_subtable), &cls->subtables) {
5cb7a798
BP
1063 uint32_t storage[FLOW_U32S];
1064 struct minimask mask;
627fb667 1065 struct cls_match *head;
49bdc010 1066
03868246 1067 minimask_combine(&mask, &target->match.mask, &subtable->mask, storage);
f2c21402 1068 CMAP_FOR_EACH (head, cmap_node, &subtable->rules) {
627fb667 1069 struct cls_match *rule;
49bdc010 1070
b5d97350 1071 FOR_EACH_RULE_IN_LIST (rule, head) {
4d935a6b
JR
1072 if (rule->priority < target->priority) {
1073 break; /* Rules in descending priority order. */
1074 }
faa50f40 1075 if (rule->priority == target->priority
5cb7a798 1076 && miniflow_equal_in_minimask(&target->match.flow,
3016f3e4 1077 &rule->flow, &mask)) {
49bdc010
JP
1078 return true;
1079 }
1080 }
1081 }
1082 }
1083
1084 return false;
1085}
6ceeaa92
BP
1086
1087/* Returns true if 'rule' exactly matches 'criteria' or if 'rule' is more
1088 * specific than 'criteria'. That is, 'rule' matches 'criteria' and this
1089 * function returns true if, for every field:
1090 *
1091 * - 'criteria' and 'rule' specify the same (non-wildcarded) value for the
1092 * field, or
1093 *
1094 * - 'criteria' wildcards the field,
1095 *
1096 * Conversely, 'rule' does not match 'criteria' and this function returns false
1097 * if, for at least one field:
1098 *
1099 * - 'criteria' and 'rule' specify different values for the field, or
1100 *
1101 * - 'criteria' specifies a value for the field but 'rule' wildcards it.
1102 *
1103 * Equivalently, the truth table for whether a field matches is:
1104 *
1105 * rule
1106 *
1107 * c wildcard exact
1108 * r +---------+---------+
1109 * i wild | yes | yes |
1110 * t card | | |
1111 * e +---------+---------+
1112 * r exact | no |if values|
1113 * i | |are equal|
1114 * a +---------+---------+
1115 *
1116 * This is the matching rule used by OpenFlow 1.0 non-strict OFPT_FLOW_MOD
1117 * commands and by OpenFlow 1.0 aggregate and flow stats.
1118 *
81a76618 1119 * Ignores rule->priority. */
6ceeaa92
BP
1120bool
1121cls_rule_is_loose_match(const struct cls_rule *rule,
5cb7a798 1122 const struct minimatch *criteria)
6ceeaa92 1123{
5cb7a798
BP
1124 return (!minimask_has_extra(&rule->match.mask, &criteria->mask)
1125 && miniflow_equal_in_minimask(&rule->match.flow, &criteria->flow,
1126 &criteria->mask));
6ceeaa92 1127}
b5d97350 1128\f
5ecc9d81
BP
1129/* Iteration. */
1130
1131static bool
627fb667 1132rule_matches(const struct cls_match *rule, const struct cls_rule *target)
5ecc9d81
BP
1133{
1134 return (!target
3016f3e4 1135 || miniflow_equal_in_minimask(&rule->flow,
5cb7a798
BP
1136 &target->match.flow,
1137 &target->match.mask));
5ecc9d81
BP
1138}
1139
627fb667 1140static struct cls_match *
03868246 1141search_subtable(const struct cls_subtable *subtable,
f2c21402 1142 struct cls_cursor *cursor)
5ecc9d81 1143{
f2c21402
JR
1144 if (!cursor->target
1145 || !minimask_has_extra(&subtable->mask, &cursor->target->match.mask)) {
627fb667 1146 struct cls_match *rule;
5ecc9d81 1147
f2c21402
JR
1148 CMAP_CURSOR_FOR_EACH (rule, cmap_node, &cursor->rules,
1149 &subtable->rules) {
1150 if (rule_matches(rule, cursor->target)) {
5ecc9d81
BP
1151 return rule;
1152 }
1153 }
1154 }
1155 return NULL;
1156}
1157
5f0476ce
JR
1158/* Initializes 'cursor' for iterating through rules in 'cls', and returns the
1159 * first matching cls_rule via '*pnode', or NULL if there are no matches.
5ecc9d81 1160 *
6ceeaa92 1161 * - If 'target' is null, the cursor will visit every rule in 'cls'.
5ecc9d81 1162 *
6ceeaa92
BP
1163 * - If 'target' is nonnull, the cursor will visit each 'rule' in 'cls'
1164 * such that cls_rule_is_loose_match(rule, target) returns true.
5ecc9d81 1165 *
6ceeaa92 1166 * Ignores target->priority. */
5f0476ce
JR
1167struct cls_cursor cls_cursor_init(const struct classifier *cls,
1168 const struct cls_rule *target,
1169 void **pnode, const void *offset, bool safe)
1170 OVS_NO_THREAD_SAFETY_ANALYSIS
5ecc9d81 1171{
5f0476ce 1172 struct cls_cursor cursor;
03868246 1173 struct cls_subtable *subtable;
5f0476ce 1174 struct cls_rule *cls_rule = NULL;
5ecc9d81 1175
5f0476ce
JR
1176 cursor.safe = safe;
1177 cursor.cls = cls;
1178 cursor.target = target && !cls_rule_is_catchall(target) ? target : NULL;
1179
1180 /* Find first rule. */
1181 fat_rwlock_rdlock(&cursor.cls->rwlock);
1182 CMAP_CURSOR_FOR_EACH (subtable, cmap_node, &cursor.subtables,
1183 &cursor.cls->cls->subtables_map) {
1184 struct cls_match *rule = search_subtable(subtable, &cursor);
f2c21402 1185
5ecc9d81 1186 if (rule) {
5f0476ce
JR
1187 cursor.subtable = subtable;
1188 cls_rule = rule->cls_rule;
1189 break;
5ecc9d81
BP
1190 }
1191 }
5f0476ce 1192 *pnode = (char *)cls_rule + (ptrdiff_t)offset;
5ecc9d81 1193
5f0476ce
JR
1194 /* Leave locked if requested and have a rule. */
1195 if (safe || !cls_rule) {
1196 fat_rwlock_unlock(&cls->rwlock);
1197 }
1198 return cursor;
1199}
1200
1201static void
1202cls_cursor_next_unlock(struct cls_cursor *cursor, struct cls_rule *rule)
1203 OVS_NO_THREAD_SAFETY_ANALYSIS
1204{
1205 /* Release the lock if no rule, or 'safe' mode. */
1206 if (!rule || cursor->safe) {
1207 fat_rwlock_unlock(&cursor->cls->rwlock);
1208 }
5ecc9d81
BP
1209}
1210
1211/* Returns the next matching cls_rule in 'cursor''s iteration, or a null
1212 * pointer if there are no more matches. */
1213struct cls_rule *
9850cd0f 1214cls_cursor_next(struct cls_cursor *cursor, const struct cls_rule *rule_)
5f0476ce 1215 OVS_NO_THREAD_SAFETY_ANALYSIS
5ecc9d81 1216{
627fb667 1217 struct cls_match *rule = CONST_CAST(struct cls_match *, rule_->cls_match);
03868246 1218 const struct cls_subtable *subtable;
627fb667 1219 struct cls_match *next;
5ecc9d81 1220
5f0476ce
JR
1221 /* Lock if not locked already. */
1222 if (cursor->safe) {
1223 fat_rwlock_rdlock(&cursor->cls->rwlock);
1224 }
1225
955f579d
BP
1226 next = next_rule_in_list__(rule);
1227 if (next->priority < rule->priority) {
5f0476ce 1228 cls_cursor_next_unlock(cursor, next->cls_rule);
627fb667 1229 return next->cls_rule;
5ecc9d81
BP
1230 }
1231
955f579d 1232 /* 'next' is the head of the list, that is, the rule that is included in
f2c21402 1233 * the subtable's map. (This is important when the classifier contains
03868246 1234 * rules that differ only in priority.) */
955f579d 1235 rule = next;
f2c21402 1236 CMAP_CURSOR_FOR_EACH_CONTINUE (rule, cmap_node, &cursor->rules) {
5ecc9d81 1237 if (rule_matches(rule, cursor->target)) {
5f0476ce 1238 cls_cursor_next_unlock(cursor, rule->cls_rule);
627fb667 1239 return rule->cls_rule;
5ecc9d81
BP
1240 }
1241 }
1242
03868246 1243 subtable = cursor->subtable;
f2c21402
JR
1244 CMAP_CURSOR_FOR_EACH_CONTINUE (subtable, cmap_node, &cursor->subtables) {
1245 rule = search_subtable(subtable, cursor);
5ecc9d81 1246 if (rule) {
03868246 1247 cursor->subtable = subtable;
5f0476ce 1248 cls_cursor_next_unlock(cursor, rule->cls_rule);
627fb667 1249 return rule->cls_rule;
5ecc9d81
BP
1250 }
1251 }
1252
5f0476ce 1253 fat_rwlock_unlock(&cursor->cls->rwlock);
5ecc9d81
BP
1254 return NULL;
1255}
1256\f
03868246 1257static struct cls_subtable *
cabd4c43 1258find_subtable(const struct cls_classifier *cls, const struct minimask *mask)
b5d97350 1259{
03868246 1260 struct cls_subtable *subtable;
064af421 1261
f2c21402 1262 CMAP_FOR_EACH_WITH_HASH (subtable, cmap_node, minimask_hash(mask, 0),
5a87054c 1263 &cls->subtables_map) {
03868246
JR
1264 if (minimask_equal(mask, &subtable->mask)) {
1265 return subtable;
064af421
BP
1266 }
1267 }
b5d97350 1268 return NULL;
064af421 1269}
064af421 1270
03868246 1271static struct cls_subtable *
cabd4c43 1272insert_subtable(struct cls_classifier *cls, const struct minimask *mask)
b5d97350 1273{
c906cedf 1274 uint32_t hash = minimask_hash(mask, 0);
03868246 1275 struct cls_subtable *subtable;
476f36e8
JR
1276 int i, index = 0;
1277 struct flow_wildcards old, new;
1278 uint8_t prev;
3016f3e4 1279 int count = count_1bits(mask->masks.map);
064af421 1280
3016f3e4
JR
1281 subtable = xzalloc(sizeof *subtable - sizeof mask->masks.inline_values
1282 + MINIFLOW_VALUES_SIZE(count));
f2c21402 1283 cmap_init(&subtable->rules);
3016f3e4 1284 miniflow_clone_inline(&subtable->mask.masks, &mask->masks, count);
476f36e8
JR
1285
1286 /* Init indices for segmented lookup, if any. */
1287 flow_wildcards_init_catchall(&new);
1288 old = new;
1289 prev = 0;
1290 for (i = 0; i < cls->n_flow_segments; i++) {
1291 flow_wildcards_fold_minimask_range(&new, mask, prev,
1292 cls->flow_segments[i]);
1293 /* Add an index if it adds mask bits. */
1294 if (!flow_wildcards_equal(&new, &old)) {
f2c21402 1295 cmap_init(&subtable->indices[index]);
476f36e8
JR
1296 subtable->index_ofs[index] = cls->flow_segments[i];
1297 index++;
1298 old = new;
1299 }
1300 prev = cls->flow_segments[i];
1301 }
1302 /* Check if the rest of the subtable's mask adds any bits,
1303 * and remove the last index if it doesn't. */
1304 if (index > 0) {
1305 flow_wildcards_fold_minimask_range(&new, mask, prev, FLOW_U32S);
1306 if (flow_wildcards_equal(&new, &old)) {
1307 --index;
1308 subtable->index_ofs[index] = 0;
f2c21402 1309 cmap_destroy(&subtable->indices[index]);
476f36e8
JR
1310 }
1311 }
1312 subtable->n_indices = index;
1313
03868246
JR
1314 subtable->tag = (minimask_get_metadata_mask(mask) == OVS_BE64_MAX
1315 ? tag_create_deterministic(hash)
1316 : TAG_ALL);
064af421 1317
13751fd8
JR
1318 for (i = 0; i < cls->n_tries; i++) {
1319 subtable->trie_plen[i] = minimask_get_prefix_len(mask,
1320 cls->tries[i].field);
1321 }
1322
69d6040e
JR
1323 /* Ports trie. */
1324 subtable->ports_trie = NULL;
1325 subtable->ports_mask_len
1326 = 32 - ctz32(ntohl(MINIFLOW_GET_BE32(&mask->masks, tp_src)));
1327
f2c21402 1328 cmap_insert(&cls->subtables_map, &subtable->cmap_node, hash);
ec988646 1329
03868246 1330 return subtable;
064af421
BP
1331}
1332
b5d97350 1333static void
cabd4c43 1334destroy_subtable(struct cls_classifier *cls, struct cls_subtable *subtable)
b5d97350 1335{
476f36e8
JR
1336 int i;
1337
fe7cfa5c 1338 pvector_remove(&cls->subtables, subtable);
69d6040e
JR
1339 trie_destroy(subtable->ports_trie);
1340
476f36e8 1341 for (i = 0; i < subtable->n_indices; i++) {
f2c21402 1342 cmap_destroy(&subtable->indices[i]);
476f36e8 1343 }
f2c21402
JR
1344 cmap_remove(&cls->subtables_map, &subtable->cmap_node,
1345 minimask_hash(&subtable->mask, 0));
fe7cfa5c 1346 minimask_destroy(&subtable->mask);
f2c21402 1347 cmap_destroy(&subtable->rules);
fe7cfa5c 1348 ovsrcu_postpone(free, subtable);
4aacd02d
BP
1349}
1350
13751fd8
JR
1351struct range {
1352 uint8_t start;
1353 uint8_t end;
1354};
1355
1356/* Return 'true' if can skip rest of the subtable based on the prefix trie
1357 * lookup results. */
1358static inline bool
1359check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries,
1360 const unsigned int field_plen[CLS_MAX_TRIES],
1361 const struct range ofs, const struct flow *flow,
1362 struct flow_wildcards *wc)
1363{
1364 int j;
1365
1366 /* Check if we could avoid fully unwildcarding the next level of
1367 * fields using the prefix tries. The trie checks are done only as
1368 * needed to avoid folding in additional bits to the wildcards mask. */
1369 for (j = 0; j < n_tries; j++) {
1370 /* Is the trie field relevant for this subtable? */
1371 if (field_plen[j]) {
1372 struct trie_ctx *ctx = &trie_ctx[j];
1373 uint8_t be32ofs = ctx->be32ofs;
1374
1375 /* Is the trie field within the current range of fields? */
1376 if (be32ofs >= ofs.start && be32ofs < ofs.end) {
1377 /* On-demand trie lookup. */
1378 if (!ctx->lookup_done) {
1379 ctx->match_plen = trie_lookup(ctx->trie, flow,
1380 &ctx->maskbits);
1381 ctx->lookup_done = true;
1382 }
1383 /* Possible to skip the rest of the subtable if subtable's
1384 * prefix on the field is longer than what is known to match
1385 * based on the trie lookup. */
1386 if (field_plen[j] > ctx->match_plen) {
1387 /* RFC: We want the trie lookup to never result in
1388 * unwildcarding any bits that would not be unwildcarded
1389 * otherwise. Since the trie is shared by the whole
1390 * classifier, it is possible that the 'maskbits' contain
1391 * bits that are irrelevant for the partition of the
1392 * classifier relevant for the current flow. */
1393
1394 /* Can skip if the field is already unwildcarded. */
1395 if (mask_prefix_bits_set(wc, be32ofs, ctx->maskbits)) {
1396 return true;
1397 }
1398 /* Check that the trie result will not unwildcard more bits
1399 * than this stage will. */
1400 if (ctx->maskbits <= field_plen[j]) {
1401 /* Unwildcard the bits and skip the rest. */
1402 mask_set_prefix_bits(wc, be32ofs, ctx->maskbits);
1403 /* Note: Prerequisite already unwildcarded, as the only
1404 * prerequisite of the supported trie lookup fields is
1405 * the ethertype, which is currently always
1406 * unwildcarded.
1407 */
1408 return true;
1409 }
1410 }
1411 }
1412 }
1413 }
1414 return false;
1415}
1416
3016f3e4
JR
1417/* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit
1418 * for which 'flow', for which 'mask' has a bit set, specifies a particular
1419 * value has the correct value in 'target'.
1420 *
1421 * This function is equivalent to miniflow_equal_flow_in_minimask(flow,
a64759f0
JR
1422 * target, mask) but this is faster because of the invariant that
1423 * flow->map and mask->masks.map are the same, and that this version
1424 * takes the 'wc'. */
3016f3e4
JR
1425static inline bool
1426miniflow_and_mask_matches_flow(const struct miniflow *flow,
1427 const struct minimask *mask,
e9319757 1428 const struct flow *target)
3016f3e4
JR
1429{
1430 const uint32_t *flowp = miniflow_get_u32_values(flow);
1431 const uint32_t *maskp = miniflow_get_u32_values(&mask->masks);
a64759f0 1432 uint32_t idx;
3016f3e4 1433
a64759f0
JR
1434 MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
1435 uint32_t diff = (*flowp++ ^ flow_u32_value(target, idx)) & *maskp++;
1436
1437 if (diff) {
3016f3e4
JR
1438 return false;
1439 }
1440 }
1441
1442 return true;
1443}
1444
627fb667 1445static inline struct cls_match *
476f36e8
JR
1446find_match(const struct cls_subtable *subtable, const struct flow *flow,
1447 uint32_t hash)
b5d97350 1448{
627fb667 1449 struct cls_match *rule;
b5d97350 1450
f2c21402 1451 CMAP_FOR_EACH_WITH_HASH (rule, cmap_node, hash, &subtable->rules) {
3016f3e4 1452 if (miniflow_and_mask_matches_flow(&rule->flow, &subtable->mask,
e9319757 1453 flow)) {
b5d97350 1454 return rule;
064af421
BP
1455 }
1456 }
c23740be 1457
064af421
BP
1458 return NULL;
1459}
1460
e9319757
JR
1461/* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit
1462 * for which 'flow', for which 'mask' has a bit set, specifies a particular
1463 * value has the correct value in 'target'.
1464 *
1465 * This function is equivalent to miniflow_and_mask_matches_flow() but this
1466 * version fills in the mask bits in 'wc'. */
1467static inline bool
1468miniflow_and_mask_matches_flow_wc(const struct miniflow *flow,
1469 const struct minimask *mask,
1470 const struct flow *target,
1471 struct flow_wildcards *wc)
1472{
1473 const uint32_t *flowp = miniflow_get_u32_values(flow);
1474 const uint32_t *maskp = miniflow_get_u32_values(&mask->masks);
1475 uint32_t idx;
1476
1477 MAP_FOR_EACH_INDEX(idx, mask->masks.map) {
1478 uint32_t mask = *maskp++;
1479 uint32_t diff = (*flowp++ ^ flow_u32_value(target, idx)) & mask;
1480
1481 if (diff) {
1482 /* Only unwildcard if none of the differing bits is already
1483 * exact-matched. */
1484 if (!(flow_u32_value(&wc->masks, idx) & diff)) {
1485 /* Keep one bit of the difference. */
1486 *flow_u32_lvalue(&wc->masks, idx) |= rightmost_1bit(diff);
1487 }
1488 return false;
1489 }
1490 /* Fill in the bits that were looked at. */
1491 *flow_u32_lvalue(&wc->masks, idx) |= mask;
1492 }
1493
1494 return true;
1495}
1496
386cb9f7
JR
1497/* Unwildcard the fields looked up so far, if any. */
1498static void
1499fill_range_wc(const struct cls_subtable *subtable, struct flow_wildcards *wc,
1500 uint8_t to)
1501{
1502 if (to) {
1503 flow_wildcards_fold_minimask_range(wc, &subtable->mask, 0, to);
1504 }
1505}
1506
627fb667 1507static struct cls_match *
476f36e8 1508find_match_wc(const struct cls_subtable *subtable, const struct flow *flow,
13751fd8
JR
1509 struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries,
1510 struct flow_wildcards *wc)
476f36e8
JR
1511{
1512 uint32_t basis = 0, hash;
a64759f0 1513 struct cls_match *rule;
476f36e8 1514 int i;
13751fd8 1515 struct range ofs;
476f36e8 1516
ec988646 1517 if (OVS_UNLIKELY(!wc)) {
476f36e8
JR
1518 return find_match(subtable, flow,
1519 flow_hash_in_minimask(flow, &subtable->mask, 0));
1520 }
1521
13751fd8 1522 ofs.start = 0;
476f36e8
JR
1523 /* Try to finish early by checking fields in segments. */
1524 for (i = 0; i < subtable->n_indices; i++) {
f2c21402
JR
1525 struct cmap_node *inode;
1526
13751fd8 1527 ofs.end = subtable->index_ofs[i];
476f36e8 1528
13751fd8
JR
1529 if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow,
1530 wc)) {
386cb9f7
JR
1531 /* 'wc' bits for the trie field set, now unwildcard the preceding
1532 * bits used so far. */
1533 fill_range_wc(subtable, wc, ofs.start);
1534 return NULL;
13751fd8
JR
1535 }
1536 hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start,
1537 ofs.end, &basis);
f2c21402 1538 inode = cmap_find(&subtable->indices[i], hash);
476f36e8 1539 if (!inode) {
386cb9f7
JR
1540 /* No match, can stop immediately, but must fold in the bits
1541 * used in lookup so far. */
1542 fill_range_wc(subtable, wc, ofs.end);
1543 return NULL;
476f36e8
JR
1544 }
1545
1546 /* If we have narrowed down to a single rule already, check whether
a64759f0 1547 * that rule matches. Either way, we're done.
476f36e8
JR
1548 *
1549 * (Rare) hash collisions may cause us to miss the opportunity for this
1550 * optimization. */
f2c21402 1551 if (!cmap_node_next(inode)) {
476f36e8 1552 ASSIGN_CONTAINER(rule, inode - i, index_nodes);
e9319757
JR
1553 if (miniflow_and_mask_matches_flow_wc(&rule->flow, &subtable->mask,
1554 flow, wc)) {
1555 return rule;
476f36e8 1556 }
e9319757 1557 return NULL;
476f36e8 1558 }
386cb9f7 1559 ofs.start = ofs.end;
476f36e8 1560 }
13751fd8
JR
1561 ofs.end = FLOW_U32S;
1562 /* Trie check for the final range. */
1563 if (check_tries(trie_ctx, n_tries, subtable->trie_plen, ofs, flow, wc)) {
386cb9f7
JR
1564 fill_range_wc(subtable, wc, ofs.start);
1565 return NULL;
13751fd8 1566 }
a64759f0
JR
1567 hash = flow_hash_in_minimask_range(flow, &subtable->mask, ofs.start,
1568 ofs.end, &basis);
1569 rule = find_match(subtable, flow, hash);
69d6040e
JR
1570 if (!rule && subtable->ports_mask_len) {
1571 /* Ports are always part of the final range, if any.
1572 * No match was found for the ports. Use the ports trie to figure out
1573 * which ports bits to unwildcard. */
1574 unsigned int mbits;
1575 ovs_be32 value, mask;
1576
1577 mask = MINIFLOW_GET_BE32(&subtable->mask.masks, tp_src);
1578 value = ((OVS_FORCE ovs_be32 *)flow)[TP_PORTS_OFS32] & mask;
f0e5aa11 1579 trie_lookup_value(subtable->ports_trie, &value, 32, &mbits);
69d6040e
JR
1580
1581 ((OVS_FORCE ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |=
1582 mask & htonl(~0 << (32 - mbits));
1583
386cb9f7
JR
1584 /* Unwildcard all bits in the mask upto the ports, as they were used
1585 * to determine there is no match. */
1586 fill_range_wc(subtable, wc, TP_PORTS_OFS32);
1587 return NULL;
69d6040e 1588 }
e9319757 1589
13751fd8 1590 /* Must unwildcard all the fields, as they were looked at. */
476f36e8
JR
1591 flow_wildcards_fold_minimask(wc, &subtable->mask);
1592 return rule;
1593}
1594
627fb667 1595static struct cls_match *
03868246
JR
1596find_equal(struct cls_subtable *subtable, const struct miniflow *flow,
1597 uint32_t hash)
064af421 1598{
627fb667 1599 struct cls_match *head;
064af421 1600
f2c21402 1601 CMAP_FOR_EACH_WITH_HASH (head, cmap_node, hash, &subtable->rules) {
3016f3e4 1602 if (miniflow_equal(&head->flow, flow)) {
b5d97350 1603 return head;
064af421
BP
1604 }
1605 }
1606 return NULL;
1607}
1608
627fb667 1609static struct cls_match *
cabd4c43 1610insert_rule(struct cls_classifier *cls, struct cls_subtable *subtable,
f2c21402 1611 struct cls_rule *new_rule)
064af421 1612{
627fb667 1613 struct cls_match *old = NULL;
f2c21402
JR
1614 struct cls_match *new = cls_match_alloc(new_rule);
1615 struct cls_match *head;
476f36e8 1616 int i;
f2c21402 1617 uint32_t basis = 0, hash, ihash[CLS_MAX_INDICES];
13751fd8 1618 uint8_t prev_be32ofs = 0;
476f36e8
JR
1619
1620 /* Add new node to segment indices. */
1621 for (i = 0; i < subtable->n_indices; i++) {
f2c21402
JR
1622 ihash[i] = minimatch_hash_range(&new_rule->match, prev_be32ofs,
1623 subtable->index_ofs[i], &basis);
1624 cmap_insert(&subtable->indices[i], &new->index_nodes[i], ihash[i]);
13751fd8 1625 prev_be32ofs = subtable->index_ofs[i];
476f36e8 1626 }
f2c21402
JR
1627 hash = minimatch_hash_range(&new_rule->match, prev_be32ofs, FLOW_U32S,
1628 &basis);
1629 head = find_equal(subtable, &new_rule->match.flow, hash);
b5d97350 1630 if (!head) {
f2c21402
JR
1631 cmap_insert(&subtable->rules, &new->cmap_node, hash);
1632 list_init(&new->list);
4aacd02d 1633 goto out;
b5d97350
BP
1634 } else {
1635 /* Scan the list for the insertion point that will keep the list in
1636 * order of decreasing priority. */
627fb667 1637 struct cls_match *rule;
476f36e8 1638
b5d97350 1639 FOR_EACH_RULE_IN_LIST (rule, head) {
f2c21402 1640 if (new->priority >= rule->priority) {
b5d97350 1641 if (rule == head) {
f2c21402
JR
1642 /* 'new' is the new highest-priority flow in the list. */
1643 cmap_replace(&subtable->rules, &rule->cmap_node,
1644 &new->cmap_node, hash);
b5d97350 1645 }
064af421 1646
f2c21402
JR
1647 if (new->priority == rule->priority) {
1648 list_replace(&new->list, &rule->list);
4aacd02d 1649 old = rule;
b5d97350 1650 } else {
f2c21402 1651 list_insert(&rule->list, &new->list);
b5d97350 1652 }
fe7cfa5c 1653 goto out;
b5d97350
BP
1654 }
1655 }
064af421 1656
b5d97350 1657 /* Insert 'new' at the end of the list. */
f2c21402 1658 list_push_back(&head->list, &new->list);
064af421 1659 }
4aacd02d
BP
1660
1661 out:
1662 if (!old) {
fe7cfa5c
JR
1663 subtable->n_rules++;
1664
1665 /* Rule was added, not replaced. Update 'subtable's 'max_priority'
1666 * and 'max_count', if necessary. */
1667 if (subtable->n_rules == 1) {
f2c21402 1668 subtable->max_priority = new->priority;
fe7cfa5c 1669 subtable->max_count = 1;
f2c21402
JR
1670 pvector_insert(&cls->subtables, subtable, new->priority);
1671 } else if (subtable->max_priority == new->priority) {
fe7cfa5c 1672 ++subtable->max_count;
f2c21402
JR
1673 } else if (new->priority > subtable->max_priority) {
1674 subtable->max_priority = new->priority;
fe7cfa5c 1675 subtable->max_count = 1;
f2c21402 1676 pvector_change_priority(&cls->subtables, subtable, new->priority);
fe7cfa5c 1677 }
476f36e8
JR
1678 } else {
1679 /* Remove old node from indices. */
1680 for (i = 0; i < subtable->n_indices; i++) {
f2c21402 1681 cmap_remove(&subtable->indices[i], &old->index_nodes[i], ihash[i]);
476f36e8 1682 }
4aacd02d
BP
1683 }
1684 return old;
064af421
BP
1685}
1686
627fb667
JR
1687static struct cls_match *
1688next_rule_in_list__(struct cls_match *rule)
064af421 1689{
627fb667 1690 struct cls_match *next = OBJECT_CONTAINING(rule->list.next, next, list);
955f579d
BP
1691 return next;
1692}
1693
627fb667
JR
1694static struct cls_match *
1695next_rule_in_list(struct cls_match *rule)
955f579d 1696{
627fb667 1697 struct cls_match *next = next_rule_in_list__(rule);
b5d97350 1698 return next->priority < rule->priority ? next : NULL;
064af421 1699}
13751fd8
JR
1700\f
1701/* A longest-prefix match tree. */
1702struct trie_node {
1703 uint32_t prefix; /* Prefix bits for this node, MSB first. */
c30cfa6b 1704 uint8_t n_bits; /* Never zero, except for the root node. */
13751fd8
JR
1705 unsigned int n_rules; /* Number of rules that have this prefix. */
1706 struct trie_node *edges[2]; /* Both NULL if leaf. */
1707};
1708
1709/* Max bits per node. Must fit in struct trie_node's 'prefix'.
1710 * Also tested with 16, 8, and 5 to stress the implementation. */
1711#define TRIE_PREFIX_BITS 32
1712
1713/* Return at least 'plen' bits of the 'prefix', starting at bit offset 'ofs'.
1714 * Prefixes are in the network byte order, and the offset 0 corresponds to
1715 * the most significant bit of the first byte. The offset can be read as
1716 * "how many bits to skip from the start of the prefix starting at 'pr'". */
1717static uint32_t
1718raw_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen)
1719{
1720 uint32_t prefix;
1721
1722 pr += ofs / 32; /* Where to start. */
1723 ofs %= 32; /* How many bits to skip at 'pr'. */
1724
1725 prefix = ntohl(*pr) << ofs; /* Get the first 32 - ofs bits. */
1726 if (plen > 32 - ofs) { /* Need more than we have already? */
1727 prefix |= ntohl(*++pr) >> (32 - ofs);
1728 }
1729 /* Return with possible unwanted bits at the end. */
1730 return prefix;
1731}
1732
1733/* Return min(TRIE_PREFIX_BITS, plen) bits of the 'prefix', starting at bit
1734 * offset 'ofs'. Prefixes are in the network byte order, and the offset 0
1735 * corresponds to the most significant bit of the first byte. The offset can
1736 * be read as "how many bits to skip from the start of the prefix starting at
1737 * 'pr'". */
1738static uint32_t
1739trie_get_prefix(const ovs_be32 pr[], unsigned int ofs, unsigned int plen)
1740{
1741 if (!plen) {
1742 return 0;
1743 }
1744 if (plen > TRIE_PREFIX_BITS) {
1745 plen = TRIE_PREFIX_BITS; /* Get at most TRIE_PREFIX_BITS. */
1746 }
1747 /* Return with unwanted bits cleared. */
1748 return raw_get_prefix(pr, ofs, plen) & ~0u << (32 - plen);
1749}
1750
c30cfa6b 1751/* Return the number of equal bits in 'n_bits' of 'prefix's MSBs and a 'value'
13751fd8
JR
1752 * starting at "MSB 0"-based offset 'ofs'. */
1753static unsigned int
c30cfa6b 1754prefix_equal_bits(uint32_t prefix, unsigned int n_bits, const ovs_be32 value[],
13751fd8
JR
1755 unsigned int ofs)
1756{
c30cfa6b 1757 uint64_t diff = prefix ^ raw_get_prefix(value, ofs, n_bits);
13751fd8 1758 /* Set the bit after the relevant bits to limit the result. */
c30cfa6b 1759 return raw_clz64(diff << 32 | UINT64_C(1) << (63 - n_bits));
13751fd8
JR
1760}
1761
1762/* Return the number of equal bits in 'node' prefix and a 'prefix' of length
1763 * 'plen', starting at "MSB 0"-based offset 'ofs'. */
1764static unsigned int
1765trie_prefix_equal_bits(const struct trie_node *node, const ovs_be32 prefix[],
1766 unsigned int ofs, unsigned int plen)
1767{
c30cfa6b 1768 return prefix_equal_bits(node->prefix, MIN(node->n_bits, plen - ofs),
13751fd8
JR
1769 prefix, ofs);
1770}
1771
1772/* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' can
1773 * be greater than 31. */
1774static unsigned int
1775be_get_bit_at(const ovs_be32 value[], unsigned int ofs)
1776{
1777 return (((const uint8_t *)value)[ofs / 8] >> (7 - ofs % 8)) & 1u;
1778}
1779
1780/* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' must
1781 * be between 0 and 31, inclusive. */
1782static unsigned int
1783get_bit_at(const uint32_t prefix, unsigned int ofs)
1784{
1785 return (prefix >> (31 - ofs)) & 1u;
1786}
1787
1788/* Create new branch. */
1789static struct trie_node *
1790trie_branch_create(const ovs_be32 *prefix, unsigned int ofs, unsigned int plen,
1791 unsigned int n_rules)
1792{
1793 struct trie_node *node = xmalloc(sizeof *node);
1794
1795 node->prefix = trie_get_prefix(prefix, ofs, plen);
1796
1797 if (plen <= TRIE_PREFIX_BITS) {
c30cfa6b 1798 node->n_bits = plen;
13751fd8
JR
1799 node->edges[0] = NULL;
1800 node->edges[1] = NULL;
1801 node->n_rules = n_rules;
1802 } else { /* Need intermediate nodes. */
1803 struct trie_node *subnode = trie_branch_create(prefix,
1804 ofs + TRIE_PREFIX_BITS,
1805 plen - TRIE_PREFIX_BITS,
1806 n_rules);
1807 int bit = get_bit_at(subnode->prefix, 0);
c30cfa6b 1808 node->n_bits = TRIE_PREFIX_BITS;
13751fd8
JR
1809 node->edges[bit] = subnode;
1810 node->edges[!bit] = NULL;
1811 node->n_rules = 0;
1812 }
1813 return node;
1814}
1815
1816static void
1817trie_node_destroy(struct trie_node *node)
1818{
1819 free(node);
1820}
1821
1822static void
1823trie_destroy(struct trie_node *node)
1824{
1825 if (node) {
1826 trie_destroy(node->edges[0]);
1827 trie_destroy(node->edges[1]);
1828 free(node);
1829 }
1830}
1831
1832static bool
1833trie_is_leaf(const struct trie_node *trie)
1834{
1835 return !trie->edges[0] && !trie->edges[1]; /* No children. */
1836}
1837
1838static void
1839mask_set_prefix_bits(struct flow_wildcards *wc, uint8_t be32ofs,
c30cfa6b 1840 unsigned int n_bits)
13751fd8
JR
1841{
1842 ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs];
1843 unsigned int i;
1844
c30cfa6b 1845 for (i = 0; i < n_bits / 32; i++) {
13751fd8
JR
1846 mask[i] = OVS_BE32_MAX;
1847 }
c30cfa6b
JR
1848 if (n_bits % 32) {
1849 mask[i] |= htonl(~0u << (32 - n_bits % 32));
13751fd8
JR
1850 }
1851}
1852
1853static bool
1854mask_prefix_bits_set(const struct flow_wildcards *wc, uint8_t be32ofs,
c30cfa6b 1855 unsigned int n_bits)
13751fd8
JR
1856{
1857 ovs_be32 *mask = &((ovs_be32 *)&wc->masks)[be32ofs];
1858 unsigned int i;
1859 ovs_be32 zeroes = 0;
1860
c30cfa6b 1861 for (i = 0; i < n_bits / 32; i++) {
13751fd8
JR
1862 zeroes |= ~mask[i];
1863 }
c30cfa6b
JR
1864 if (n_bits % 32) {
1865 zeroes |= ~mask[i] & htonl(~0u << (32 - n_bits % 32));
13751fd8
JR
1866 }
1867
c30cfa6b 1868 return !zeroes; /* All 'n_bits' bits set. */
13751fd8
JR
1869}
1870
1871static struct trie_node **
1872trie_next_edge(struct trie_node *node, const ovs_be32 value[],
1873 unsigned int ofs)
1874{
1875 return node->edges + be_get_bit_at(value, ofs);
1876}
1877
1878static const struct trie_node *
1879trie_next_node(const struct trie_node *node, const ovs_be32 value[],
1880 unsigned int ofs)
1881{
1882 return node->edges[be_get_bit_at(value, ofs)];
1883}
1884
1885/* Return the prefix mask length necessary to find the longest-prefix match for
1886 * the '*value' in the prefix tree 'node'.
1887 * '*checkbits' is set to the number of bits in the prefix mask necessary to
1888 * determine a mismatch, in case there are longer prefixes in the tree below
1889 * the one that matched.
1890 */
1891static unsigned int
1892trie_lookup_value(const struct trie_node *node, const ovs_be32 value[],
f0e5aa11 1893 unsigned int n_bits, unsigned int *checkbits)
13751fd8 1894{
197573ac 1895 unsigned int ofs = 0, match_len = 0;
13751fd8
JR
1896 const struct trie_node *prev = NULL;
1897
197573ac 1898 for (; node; prev = node, node = trie_next_node(node, value, ofs)) {
13751fd8
JR
1899 unsigned int eqbits;
1900 /* Check if this edge can be followed. */
c30cfa6b 1901 eqbits = prefix_equal_bits(node->prefix, node->n_bits, value, ofs);
197573ac 1902 ofs += eqbits;
c30cfa6b 1903 if (eqbits < node->n_bits) { /* Mismatch, nothing more to be found. */
197573ac
JR
1904 /* Bit at offset 'ofs' differed. */
1905 *checkbits = ofs + 1; /* Includes the first mismatching bit. */
13751fd8
JR
1906 return match_len;
1907 }
1908 /* Full match, check if rules exist at this prefix length. */
1909 if (node->n_rules > 0) {
197573ac 1910 match_len = ofs;
13751fd8 1911 }
f0e5aa11
JR
1912 if (ofs >= n_bits) {
1913 *checkbits = n_bits; /* Full prefix. */
1914 return match_len;
1915 }
13751fd8 1916 }
f0e5aa11
JR
1917 /* node == NULL. Full match so far, but we came to a dead end.
1918 * need to exclude the other branch if it exists. */
197573ac 1919 *checkbits = !prev || trie_is_leaf(prev) ? ofs : ofs + 1;
13751fd8
JR
1920 return match_len;
1921}
1922
1923static unsigned int
1924trie_lookup(const struct cls_trie *trie, const struct flow *flow,
1925 unsigned int *checkbits)
1926{
1927 const struct mf_field *mf = trie->field;
1928
1929 /* Check that current flow matches the prerequisites for the trie
1930 * field. Some match fields are used for multiple purposes, so we
1931 * must check that the trie is relevant for this flow. */
1932 if (mf_are_prereqs_ok(mf, flow)) {
1933 return trie_lookup_value(trie->root,
1934 &((ovs_be32 *)flow)[mf->flow_be32ofs],
f0e5aa11 1935 mf->n_bits, checkbits);
13751fd8
JR
1936 }
1937 *checkbits = 0; /* Value not used in this case. */
1938 return UINT_MAX;
1939}
1940
1941/* Returns the length of a prefix match mask for the field 'mf' in 'minimask'.
1942 * Returns the u32 offset to the miniflow data in '*miniflow_index', if
1943 * 'miniflow_index' is not NULL. */
1944static unsigned int
1945minimask_get_prefix_len(const struct minimask *minimask,
1946 const struct mf_field *mf)
1947{
c30cfa6b 1948 unsigned int n_bits = 0, mask_tz = 0; /* Non-zero when end of mask seen. */
13751fd8
JR
1949 uint8_t u32_ofs = mf->flow_be32ofs;
1950 uint8_t u32_end = u32_ofs + mf->n_bytes / 4;
1951
1952 for (; u32_ofs < u32_end; ++u32_ofs) {
1953 uint32_t mask;
1954 mask = ntohl((OVS_FORCE ovs_be32)minimask_get(minimask, u32_ofs));
1955
1956 /* Validate mask, count the mask length. */
1957 if (mask_tz) {
1958 if (mask) {
1959 return 0; /* No bits allowed after mask ended. */
1960 }
1961 } else {
1962 if (~mask & (~mask + 1)) {
1963 return 0; /* Mask not contiguous. */
1964 }
1965 mask_tz = ctz32(mask);
c30cfa6b 1966 n_bits += 32 - mask_tz;
13751fd8
JR
1967 }
1968 }
1969
c30cfa6b 1970 return n_bits;
13751fd8
JR
1971}
1972
1973/*
1974 * This is called only when mask prefix is known to be CIDR and non-zero.
1975 * Relies on the fact that the flow and mask have the same map, and since
1976 * the mask is CIDR, the storage for the flow field exists even if it
1977 * happened to be zeros.
1978 */
1979static const ovs_be32 *
1980minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf)
1981{
27bbe15d 1982 return miniflow_get_be32_values(&match->flow) +
13751fd8
JR
1983 count_1bits(match->flow.map & ((UINT64_C(1) << mf->flow_be32ofs) - 1));
1984}
1985
1986/* Insert rule in to the prefix tree.
1987 * 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
1988 * in 'rule'. */
1989static void
1990trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
1991{
69d6040e
JR
1992 trie_insert_prefix(&trie->root,
1993 minimatch_get_prefix(&rule->match, trie->field), mlen);
1994}
1995
1996static void
1997trie_insert_prefix(struct trie_node **edge, const ovs_be32 *prefix, int mlen)
1998{
13751fd8 1999 struct trie_node *node;
13751fd8
JR
2000 int ofs = 0;
2001
2002 /* Walk the tree. */
69d6040e 2003 for (; (node = *edge) != NULL;
13751fd8
JR
2004 edge = trie_next_edge(node, prefix, ofs)) {
2005 unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen);
2006 ofs += eqbits;
c30cfa6b 2007 if (eqbits < node->n_bits) {
13751fd8
JR
2008 /* Mismatch, new node needs to be inserted above. */
2009 int old_branch = get_bit_at(node->prefix, eqbits);
2010
2011 /* New parent node. */
2012 *edge = trie_branch_create(prefix, ofs - eqbits, eqbits,
2013 ofs == mlen ? 1 : 0);
2014
2015 /* Adjust old node for its new position in the tree. */
2016 node->prefix <<= eqbits;
c30cfa6b 2017 node->n_bits -= eqbits;
13751fd8
JR
2018 (*edge)->edges[old_branch] = node;
2019
2020 /* Check if need a new branch for the new rule. */
2021 if (ofs < mlen) {
2022 (*edge)->edges[!old_branch]
2023 = trie_branch_create(prefix, ofs, mlen - ofs, 1);
2024 }
2025 return;
2026 }
2027 /* Full match so far. */
2028
2029 if (ofs == mlen) {
2030 /* Full match at the current node, rule needs to be added here. */
2031 node->n_rules++;
2032 return;
2033 }
2034 }
2035 /* Must insert a new tree branch for the new rule. */
2036 *edge = trie_branch_create(prefix, ofs, mlen - ofs, 1);
2037}
2038
2039/* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
2040 * in 'rule'. */
2041static void
2042trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
2043{
69d6040e
JR
2044 trie_remove_prefix(&trie->root,
2045 minimatch_get_prefix(&rule->match, trie->field), mlen);
2046}
2047
2048/* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
2049 * in 'rule'. */
2050static void
2051trie_remove_prefix(struct trie_node **root, const ovs_be32 *prefix, int mlen)
2052{
13751fd8
JR
2053 struct trie_node *node;
2054 struct trie_node **edges[sizeof(union mf_value) * 8];
2055 int depth = 0, ofs = 0;
2056
2057 /* Walk the tree. */
69d6040e 2058 for (edges[0] = root;
13751fd8
JR
2059 (node = *edges[depth]) != NULL;
2060 edges[++depth] = trie_next_edge(node, prefix, ofs)) {
2061 unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, mlen);
69d6040e 2062
c30cfa6b 2063 if (eqbits < node->n_bits) {
13751fd8
JR
2064 /* Mismatch, nothing to be removed. This should never happen, as
2065 * only rules in the classifier are ever removed. */
2066 break; /* Log a warning. */
2067 }
2068 /* Full match so far. */
2069 ofs += eqbits;
2070
2071 if (ofs == mlen) {
2072 /* Full prefix match at the current node, remove rule here. */
2073 if (!node->n_rules) {
2074 break; /* Log a warning. */
2075 }
2076 node->n_rules--;
2077
2078 /* Check if can prune the tree. */
2079 while (!node->n_rules && !(node->edges[0] && node->edges[1])) {
2080 /* No rules and at most one child node, remove this node. */
2081 struct trie_node *next;
2082 next = node->edges[0] ? node->edges[0] : node->edges[1];
2083
2084 if (next) {
c30cfa6b 2085 if (node->n_bits + next->n_bits > TRIE_PREFIX_BITS) {
13751fd8
JR
2086 break; /* Cannot combine. */
2087 }
2088 /* Combine node with next. */
c30cfa6b
JR
2089 next->prefix = node->prefix | next->prefix >> node->n_bits;
2090 next->n_bits += node->n_bits;
13751fd8
JR
2091 }
2092 trie_node_destroy(node);
2093 /* Update the parent's edge. */
2094 *edges[depth] = next;
2095 if (next || !depth) {
2096 /* Branch not pruned or at root, nothing more to do. */
2097 break;
2098 }
2099 node = *edges[--depth];
2100 }
2101 return;
2102 }
2103 }
2104 /* Cannot go deeper. This should never happen, since only rules
2105 * that actually exist in the classifier are ever removed. */
2106 VLOG_WARN("Trying to remove non-existing rule from a prefix trie.");
2107}