2 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include "classifier.h"
20 #include <netinet/in.h>
21 #include "byte-order.h"
22 #include "dynamic-string.h"
34 VLOG_DEFINE_THIS_MODULE(classifier
);
38 /* Ports trie depends on both ports sharing the same ovs_be32. */
39 #define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4)
40 BUILD_ASSERT_DECL(TP_PORTS_OFS32
== offsetof(struct flow
, tp_dst
) / 4);
42 /* A set of rules that all have the same fields wildcarded. */
44 /* The fields are only used by writers and iterators. */
45 struct cmap_node cmap_node
; /* Within struct classifier 'subtables_map'. */
47 /* The fields are only used by writers. */
48 int n_rules OVS_GUARDED
; /* Number of rules, including
50 unsigned int max_priority OVS_GUARDED
; /* Max priority of any rule in
52 unsigned int max_count OVS_GUARDED
; /* Count of max_priority rules. */
54 /* These fields are accessed by readers who care about wildcarding. */
55 tag_type tag
; /* Tag generated from mask for partitioning (const). */
56 uint8_t n_indices
; /* How many indices to use (const). */
57 uint8_t index_ofs
[CLS_MAX_INDICES
]; /* u32 segment boundaries (const). */
58 unsigned int trie_plen
[CLS_MAX_TRIES
]; /* Trie prefix length in 'mask'
59 * (runtime configurable). */
60 int ports_mask_len
; /* (const) */
61 struct cmap indices
[CLS_MAX_INDICES
]; /* Staged lookup indices. */
62 rcu_trie_ptr ports_trie
; /* NULL if none. */
64 /* These fields are accessed by all readers. */
65 struct cmap rules
; /* Contains "struct cls_rule"s. */
66 struct minimask mask
; /* Wildcards for fields (const). */
67 /* 'mask' must be the last field. */
70 /* Associates a metadata value (that is, a value of the OpenFlow 1.1+ metadata
71 * field) with tags for the "cls_subtable"s that contain rules that match that
73 struct cls_partition
{
74 struct cmap_node cmap_node
; /* In struct classifier's 'partitions' map. */
75 ovs_be64 metadata
; /* metadata value for this partition. */
76 tag_type tags
; /* OR of each flow's cls_subtable tag. */
77 struct tag_tracker tracker OVS_GUARDED
; /* Tracks the bits in 'tags'. */
80 /* Internal representation of a rule in a "struct cls_subtable". */
82 /* Accessed only by writers and iterators. */
83 struct list list OVS_GUARDED
; /* List of identical, lower-priority rules. */
85 /* Accessed only by writers. */
86 struct cls_partition
*partition OVS_GUARDED
;
88 /* Accessed by readers interested in wildcarding. */
89 unsigned int priority
; /* Larger numbers are higher priorities. */
90 struct cmap_node index_nodes
[CLS_MAX_INDICES
]; /* Within subtable's
92 /* Accessed by all readers. */
93 struct cmap_node cmap_node
; /* Within struct cls_subtable 'rules'. */
94 struct cls_rule
*cls_rule
;
95 struct miniflow flow
; /* Matching rule. Mask is in the subtable. */
96 /* 'flow' must be the last field. */
99 static struct cls_match
*
100 cls_match_alloc(struct cls_rule
*rule
)
102 int count
= count_1bits(rule
->match
.flow
.map
);
104 struct cls_match
*cls_match
105 = xmalloc(sizeof *cls_match
- sizeof cls_match
->flow
.inline_values
106 + MINIFLOW_VALUES_SIZE(count
));
108 cls_match
->cls_rule
= rule
;
109 miniflow_clone_inline(&cls_match
->flow
, &rule
->match
.flow
, count
);
110 cls_match
->priority
= rule
->priority
;
111 rule
->cls_match
= cls_match
;
116 static struct cls_subtable
*find_subtable(const struct classifier
*cls
,
117 const struct minimask
*)
118 OVS_REQUIRES(cls
->mutex
);
119 static struct cls_subtable
*insert_subtable(struct classifier
*cls
,
120 const struct minimask
*)
121 OVS_REQUIRES(cls
->mutex
);
122 static void destroy_subtable(struct classifier
*cls
, struct cls_subtable
*)
123 OVS_REQUIRES(cls
->mutex
);
124 static struct cls_match
*insert_rule(struct classifier
*cls
,
125 struct cls_subtable
*, struct cls_rule
*)
126 OVS_REQUIRES(cls
->mutex
);
128 static struct cls_match
*find_match_wc(const struct cls_subtable
*,
129 const struct flow
*, struct trie_ctx
*,
130 unsigned int n_tries
,
131 struct flow_wildcards
*);
132 static struct cls_match
*find_equal(struct cls_subtable
*,
133 const struct miniflow
*, uint32_t hash
);
135 /* Iterates RULE over HEAD and all of the cls_rules on HEAD->list.
136 * Classifier's mutex must be held while iterating, as the list is
137 * protoceted by it. */
138 #define FOR_EACH_RULE_IN_LIST(RULE, HEAD) \
139 for ((RULE) = (HEAD); (RULE) != NULL; (RULE) = next_rule_in_list(RULE))
140 #define FOR_EACH_RULE_IN_LIST_SAFE(RULE, NEXT, HEAD) \
141 for ((RULE) = (HEAD); \
142 (RULE) != NULL && ((NEXT) = next_rule_in_list(RULE), true); \
145 static struct cls_match
*next_rule_in_list__(struct cls_match
*);
146 static struct cls_match
*next_rule_in_list(struct cls_match
*);
148 static unsigned int minimask_get_prefix_len(const struct minimask
*,
149 const struct mf_field
*);
150 static void trie_init(struct classifier
*cls
, int trie_idx
,
151 const struct mf_field
*)
152 OVS_REQUIRES(cls
->mutex
);
153 static unsigned int trie_lookup(const struct cls_trie
*, const struct flow
*,
154 union mf_value
*plens
);
155 static unsigned int trie_lookup_value(const rcu_trie_ptr
*,
156 const ovs_be32 value
[], ovs_be32 plens
[],
157 unsigned int value_bits
);
158 static void trie_destroy(rcu_trie_ptr
*);
159 static void trie_insert(struct cls_trie
*, const struct cls_rule
*, int mlen
);
160 static void trie_insert_prefix(rcu_trie_ptr
*, const ovs_be32
*prefix
,
162 static void trie_remove(struct cls_trie
*, const struct cls_rule
*, int mlen
);
163 static void trie_remove_prefix(rcu_trie_ptr
*, const ovs_be32
*prefix
,
165 static void mask_set_prefix_bits(struct flow_wildcards
*, uint8_t be32ofs
,
166 unsigned int n_bits
);
167 static bool mask_prefix_bits_set(const struct flow_wildcards
*,
168 uint8_t be32ofs
, unsigned int n_bits
);
170 /* flow/miniflow/minimask/minimatch utilities.
171 * These are only used by the classifier, so place them here to allow
172 * for better optimization. */
174 static inline uint64_t
175 miniflow_get_map_in_range(const struct miniflow
*miniflow
,
176 uint8_t start
, uint8_t end
, unsigned int *offset
)
178 uint64_t map
= miniflow
->map
;
182 uint64_t msk
= (UINT64_C(1) << start
) - 1; /* 'start' LSBs set */
183 *offset
= count_1bits(map
& msk
);
186 if (end
< FLOW_U32S
) {
187 uint64_t msk
= (UINT64_C(1) << end
) - 1; /* 'end' LSBs set */
193 /* Returns a hash value for the bits of 'flow' where there are 1-bits in
194 * 'mask', given 'basis'.
196 * The hash values returned by this function are the same as those returned by
197 * miniflow_hash_in_minimask(), only the form of the arguments differ. */
198 static inline uint32_t
199 flow_hash_in_minimask(const struct flow
*flow
, const struct minimask
*mask
,
202 const uint32_t *mask_values
= miniflow_get_u32_values(&mask
->masks
);
203 const uint32_t *flow_u32
= (const uint32_t *)flow
;
204 const uint32_t *p
= mask_values
;
209 for (map
= mask
->masks
.map
; map
; map
= zero_rightmost_1bit(map
)) {
210 hash
= hash_add(hash
, flow_u32
[raw_ctz(map
)] & *p
++);
213 return hash_finish(hash
, (p
- mask_values
) * 4);
216 /* Returns a hash value for the bits of 'flow' where there are 1-bits in
217 * 'mask', given 'basis'.
219 * The hash values returned by this function are the same as those returned by
220 * flow_hash_in_minimask(), only the form of the arguments differ. */
221 static inline uint32_t
222 miniflow_hash_in_minimask(const struct miniflow
*flow
,
223 const struct minimask
*mask
, uint32_t basis
)
225 const uint32_t *mask_values
= miniflow_get_u32_values(&mask
->masks
);
226 const uint32_t *p
= mask_values
;
227 uint32_t hash
= basis
;
230 MINIFLOW_FOR_EACH_IN_MAP(flow_u32
, flow
, mask
->masks
.map
) {
231 hash
= hash_add(hash
, flow_u32
& *p
++);
234 return hash_finish(hash
, (p
- mask_values
) * 4);
237 /* Returns a hash value for the bits of range [start, end) in 'flow',
238 * where there are 1-bits in 'mask', given 'hash'.
240 * The hash values returned by this function are the same as those returned by
241 * minimatch_hash_range(), only the form of the arguments differ. */
242 static inline uint32_t
243 flow_hash_in_minimask_range(const struct flow
*flow
,
244 const struct minimask
*mask
,
245 uint8_t start
, uint8_t end
, uint32_t *basis
)
247 const uint32_t *mask_values
= miniflow_get_u32_values(&mask
->masks
);
248 const uint32_t *flow_u32
= (const uint32_t *)flow
;
250 uint64_t map
= miniflow_get_map_in_range(&mask
->masks
, start
, end
,
252 const uint32_t *p
= mask_values
+ offset
;
253 uint32_t hash
= *basis
;
255 for (; map
; map
= zero_rightmost_1bit(map
)) {
256 hash
= hash_add(hash
, flow_u32
[raw_ctz(map
)] & *p
++);
259 *basis
= hash
; /* Allow continuation from the unfinished value. */
260 return hash_finish(hash
, (p
- mask_values
) * 4);
263 /* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask. */
265 flow_wildcards_fold_minimask(struct flow_wildcards
*wc
,
266 const struct minimask
*mask
)
268 flow_union_with_miniflow(&wc
->masks
, &mask
->masks
);
271 /* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask
272 * in range [start, end). */
274 flow_wildcards_fold_minimask_range(struct flow_wildcards
*wc
,
275 const struct minimask
*mask
,
276 uint8_t start
, uint8_t end
)
278 uint32_t *dst_u32
= (uint32_t *)&wc
->masks
;
280 uint64_t map
= miniflow_get_map_in_range(&mask
->masks
, start
, end
,
282 const uint32_t *p
= miniflow_get_u32_values(&mask
->masks
) + offset
;
284 for (; map
; map
= zero_rightmost_1bit(map
)) {
285 dst_u32
[raw_ctz(map
)] |= *p
++;
289 /* Returns a hash value for 'flow', given 'basis'. */
290 static inline uint32_t
291 miniflow_hash(const struct miniflow
*flow
, uint32_t basis
)
293 const uint32_t *values
= miniflow_get_u32_values(flow
);
294 const uint32_t *p
= values
;
295 uint32_t hash
= basis
;
296 uint64_t hash_map
= 0;
299 for (map
= flow
->map
; map
; map
= zero_rightmost_1bit(map
)) {
301 hash
= hash_add(hash
, *p
);
302 hash_map
|= rightmost_1bit(map
);
306 hash
= hash_add(hash
, hash_map
);
307 hash
= hash_add(hash
, hash_map
>> 32);
309 return hash_finish(hash
, p
- values
);
312 /* Returns a hash value for 'mask', given 'basis'. */
313 static inline uint32_t
314 minimask_hash(const struct minimask
*mask
, uint32_t basis
)
316 return miniflow_hash(&mask
->masks
, basis
);
319 /* Returns a hash value for 'match', given 'basis'. */
320 static inline uint32_t
321 minimatch_hash(const struct minimatch
*match
, uint32_t basis
)
323 return miniflow_hash(&match
->flow
, minimask_hash(&match
->mask
, basis
));
326 /* Returns a hash value for the bits of range [start, end) in 'minimatch',
329 * The hash values returned by this function are the same as those returned by
330 * flow_hash_in_minimask_range(), only the form of the arguments differ. */
331 static inline uint32_t
332 minimatch_hash_range(const struct minimatch
*match
, uint8_t start
, uint8_t end
,
336 const uint32_t *p
, *q
;
337 uint32_t hash
= *basis
;
340 n
= count_1bits(miniflow_get_map_in_range(&match
->mask
.masks
, start
, end
,
342 q
= miniflow_get_u32_values(&match
->mask
.masks
) + offset
;
343 p
= miniflow_get_u32_values(&match
->flow
) + offset
;
345 for (i
= 0; i
< n
; i
++) {
346 hash
= hash_add(hash
, p
[i
] & q
[i
]);
348 *basis
= hash
; /* Allow continuation from the unfinished value. */
349 return hash_finish(hash
, (offset
+ n
) * 4);
355 /* Initializes 'rule' to match packets specified by 'match' at the given
356 * 'priority'. 'match' must satisfy the invariant described in the comment at
357 * the definition of struct match.
359 * The caller must eventually destroy 'rule' with cls_rule_destroy().
361 * (OpenFlow uses priorities between 0 and UINT16_MAX, inclusive, but
362 * internally Open vSwitch supports a wider range.) */
364 cls_rule_init(struct cls_rule
*rule
,
365 const struct match
*match
, unsigned int priority
)
367 minimatch_init(&rule
->match
, match
);
368 rule
->priority
= priority
;
369 rule
->cls_match
= NULL
;
372 /* Same as cls_rule_init() for initialization from a "struct minimatch". */
374 cls_rule_init_from_minimatch(struct cls_rule
*rule
,
375 const struct minimatch
*match
,
376 unsigned int priority
)
378 minimatch_clone(&rule
->match
, match
);
379 rule
->priority
= priority
;
380 rule
->cls_match
= NULL
;
383 /* Initializes 'dst' as a copy of 'src'.
385 * The caller must eventually destroy 'dst' with cls_rule_destroy(). */
387 cls_rule_clone(struct cls_rule
*dst
, const struct cls_rule
*src
)
389 minimatch_clone(&dst
->match
, &src
->match
);
390 dst
->priority
= src
->priority
;
391 dst
->cls_match
= NULL
;
394 /* Initializes 'dst' with the data in 'src', destroying 'src'.
396 * The caller must eventually destroy 'dst' with cls_rule_destroy(). */
398 cls_rule_move(struct cls_rule
*dst
, struct cls_rule
*src
)
400 minimatch_move(&dst
->match
, &src
->match
);
401 dst
->priority
= src
->priority
;
402 dst
->cls_match
= NULL
;
405 /* Frees memory referenced by 'rule'. Doesn't free 'rule' itself (it's
406 * normally embedded into a larger structure).
408 * ('rule' must not currently be in a classifier.) */
410 cls_rule_destroy(struct cls_rule
*rule
)
412 ovs_assert(!rule
->cls_match
);
413 minimatch_destroy(&rule
->match
);
416 /* Returns true if 'a' and 'b' match the same packets at the same priority,
417 * false if they differ in some way. */
419 cls_rule_equal(const struct cls_rule
*a
, const struct cls_rule
*b
)
421 return a
->priority
== b
->priority
&& minimatch_equal(&a
->match
, &b
->match
);
424 /* Returns a hash value for 'rule', folding in 'basis'. */
426 cls_rule_hash(const struct cls_rule
*rule
, uint32_t basis
)
428 return minimatch_hash(&rule
->match
, hash_int(rule
->priority
, basis
));
431 /* Appends a string describing 'rule' to 's'. */
433 cls_rule_format(const struct cls_rule
*rule
, struct ds
*s
)
435 minimatch_format(&rule
->match
, s
, rule
->priority
);
438 /* Returns true if 'rule' matches every packet, false otherwise. */
440 cls_rule_is_catchall(const struct cls_rule
*rule
)
442 return minimask_is_catchall(&rule
->match
.mask
);
445 /* Initializes 'cls' as a classifier that initially contains no classification
448 classifier_init(struct classifier
*cls
, const uint8_t *flow_segments
)
449 OVS_EXCLUDED(cls
->mutex
)
451 ovs_mutex_init(&cls
->mutex
);
452 ovs_mutex_lock(&cls
->mutex
);
454 cmap_init(&cls
->subtables_map
);
455 pvector_init(&cls
->subtables
);
456 cmap_init(&cls
->partitions
);
457 cls
->n_flow_segments
= 0;
459 while (cls
->n_flow_segments
< CLS_MAX_INDICES
460 && *flow_segments
< FLOW_U32S
) {
461 cls
->flow_segments
[cls
->n_flow_segments
++] = *flow_segments
++;
465 for (int i
= 0; i
< CLS_MAX_TRIES
; i
++) {
466 trie_init(cls
, i
, NULL
);
468 ovs_mutex_unlock(&cls
->mutex
);
471 /* Destroys 'cls'. Rules within 'cls', if any, are not freed; this is the
472 * caller's responsibility.
473 * May only be called after all the readers have been terminated. */
475 classifier_destroy(struct classifier
*cls
)
476 OVS_EXCLUDED(cls
->mutex
)
479 struct cls_partition
*partition
;
480 struct cls_subtable
*subtable
;
483 ovs_mutex_lock(&cls
->mutex
);
484 for (i
= 0; i
< cls
->n_tries
; i
++) {
485 trie_destroy(&cls
->tries
[i
].root
);
488 CMAP_FOR_EACH_SAFE (subtable
, cmap_node
, &cls
->subtables_map
) {
489 destroy_subtable(cls
, subtable
);
491 cmap_destroy(&cls
->subtables_map
);
493 CMAP_FOR_EACH_SAFE (partition
, cmap_node
, &cls
->partitions
) {
494 ovsrcu_postpone(free
, partition
);
496 cmap_destroy(&cls
->partitions
);
498 pvector_destroy(&cls
->subtables
);
499 ovs_mutex_unlock(&cls
->mutex
);
500 ovs_mutex_destroy(&cls
->mutex
);
504 /* Set the fields for which prefix lookup should be performed. */
506 classifier_set_prefix_fields(struct classifier
*cls
,
507 const enum mf_field_id
*trie_fields
,
508 unsigned int n_fields
)
509 OVS_EXCLUDED(cls
->mutex
)
511 const struct mf_field
* new_fields
[CLS_MAX_TRIES
];
512 struct mf_bitmap fields
= MF_BITMAP_INITIALIZER
;
514 bool changed
= false;
516 ovs_mutex_lock(&cls
->mutex
);
517 for (i
= 0; i
< n_fields
&& n_tries
< CLS_MAX_TRIES
; i
++) {
518 const struct mf_field
*field
= mf_from_id(trie_fields
[i
]);
519 if (field
->flow_be32ofs
< 0 || field
->n_bits
% 32) {
520 /* Incompatible field. This is the only place where we
521 * enforce these requirements, but the rest of the trie code
522 * depends on the flow_be32ofs to be non-negative and the
523 * field length to be a multiple of 32 bits. */
527 if (bitmap_is_set(fields
.bm
, trie_fields
[i
])) {
528 /* Duplicate field, there is no need to build more than
529 * one index for any one field. */
532 bitmap_set1(fields
.bm
, trie_fields
[i
]);
534 new_fields
[n_tries
] = NULL
;
535 if (n_tries
>= cls
->n_tries
|| field
!= cls
->tries
[n_tries
].field
) {
536 new_fields
[n_tries
] = field
;
542 if (changed
|| n_tries
< cls
->n_tries
) {
543 struct cls_subtable
*subtable
;
545 /* Trie configuration needs to change. Disable trie lookups
546 * for the tries that are changing and wait all the current readers
547 * with the old configuration to be done. */
549 CMAP_FOR_EACH (subtable
, cmap_node
, &cls
->subtables_map
) {
550 for (i
= 0; i
< cls
->n_tries
; i
++) {
551 if ((i
< n_tries
&& new_fields
[i
]) || i
>= n_tries
) {
552 if (subtable
->trie_plen
[i
]) {
553 subtable
->trie_plen
[i
] = 0;
559 /* Synchronize if any readers were using tries. The readers may
560 * temporarily function without the trie lookup based optimizations. */
562 /* ovsrcu_synchronize() functions as a memory barrier, so it does
563 * not matter that subtable->trie_plen is not atomic. */
564 ovsrcu_synchronize();
567 /* Now set up the tries. */
568 for (i
= 0; i
< n_tries
; i
++) {
570 trie_init(cls
, i
, new_fields
[i
]);
573 /* Destroy the rest, if any. */
574 for (; i
< cls
->n_tries
; i
++) {
575 trie_init(cls
, i
, NULL
);
578 cls
->n_tries
= n_tries
;
579 ovs_mutex_unlock(&cls
->mutex
);
583 ovs_mutex_unlock(&cls
->mutex
);
584 return false; /* No change. */
588 trie_init(struct classifier
*cls
, int trie_idx
, const struct mf_field
*field
)
589 OVS_REQUIRES(cls
->mutex
)
591 struct cls_trie
*trie
= &cls
->tries
[trie_idx
];
592 struct cls_subtable
*subtable
;
594 if (trie_idx
< cls
->n_tries
) {
595 trie_destroy(&trie
->root
);
597 ovsrcu_set_hidden(&trie
->root
, NULL
);
601 /* Add existing rules to the new trie. */
602 CMAP_FOR_EACH (subtable
, cmap_node
, &cls
->subtables_map
) {
605 plen
= field
? minimask_get_prefix_len(&subtable
->mask
, field
) : 0;
607 struct cls_match
*head
;
609 CMAP_FOR_EACH (head
, cmap_node
, &subtable
->rules
) {
610 struct cls_match
*match
;
612 FOR_EACH_RULE_IN_LIST (match
, head
) {
613 trie_insert(trie
, match
->cls_rule
, plen
);
617 /* Initialize subtable's prefix length on this field. This will
618 * allow readers to use the trie. */
619 atomic_thread_fence(memory_order_release
);
620 subtable
->trie_plen
[trie_idx
] = plen
;
624 /* Returns true if 'cls' contains no classification rules, false otherwise.
625 * Checking the cmap requires no locking. */
627 classifier_is_empty(const struct classifier
*cls
)
629 return cmap_is_empty(&cls
->subtables_map
);
632 /* Returns the number of rules in 'cls'. */
634 classifier_count(const struct classifier
*cls
)
635 OVS_NO_THREAD_SAFETY_ANALYSIS
637 /* n_rules is an int, so in the presence of concurrent writers this will
638 * return either the old or a new value. */
643 hash_metadata(ovs_be64 metadata_
)
645 uint64_t metadata
= (OVS_FORCE
uint64_t) metadata_
;
646 return hash_uint64(metadata
);
649 static struct cls_partition
*
650 find_partition(const struct classifier
*cls
, ovs_be64 metadata
, uint32_t hash
)
652 struct cls_partition
*partition
;
654 CMAP_FOR_EACH_WITH_HASH (partition
, cmap_node
, hash
, &cls
->partitions
) {
655 if (partition
->metadata
== metadata
) {
663 static struct cls_partition
*
664 create_partition(struct classifier
*cls
, struct cls_subtable
*subtable
,
666 OVS_REQUIRES(cls
->mutex
)
668 uint32_t hash
= hash_metadata(metadata
);
669 struct cls_partition
*partition
= find_partition(cls
, metadata
, hash
);
671 partition
= xmalloc(sizeof *partition
);
672 partition
->metadata
= metadata
;
674 tag_tracker_init(&partition
->tracker
);
675 cmap_insert(&cls
->partitions
, &partition
->cmap_node
, hash
);
677 tag_tracker_add(&partition
->tracker
, &partition
->tags
, subtable
->tag
);
681 static inline ovs_be32
minimatch_get_ports(const struct minimatch
*match
)
683 /* Could optimize to use the same map if needed for fast path. */
684 return MINIFLOW_GET_BE32(&match
->flow
, tp_src
)
685 & MINIFLOW_GET_BE32(&match
->mask
.masks
, tp_src
);
688 /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller
689 * must not modify or free it.
691 * If 'cls' already contains an identical rule (including wildcards, values of
692 * fixed fields, and priority), replaces the old rule by 'rule' and returns the
693 * rule that was replaced. The caller takes ownership of the returned rule and
694 * is thus responsible for destroying it with cls_rule_destroy(), freeing the
695 * memory block in which it resides, etc., as necessary.
697 * Returns NULL if 'cls' does not contain a rule with an identical key, after
698 * inserting the new rule. In this case, no rules are displaced by the new
699 * rule, even rules that cannot have any effect because the new rule matches a
700 * superset of their flows and has higher priority. */
702 classifier_replace(struct classifier
*cls
, struct cls_rule
*rule
)
703 OVS_EXCLUDED(cls
->mutex
)
705 struct cls_match
*old_rule
;
706 struct cls_subtable
*subtable
;
707 struct cls_rule
*old_cls_rule
= NULL
;
709 ovs_mutex_lock(&cls
->mutex
);
710 subtable
= find_subtable(cls
, &rule
->match
.mask
);
712 subtable
= insert_subtable(cls
, &rule
->match
.mask
);
715 old_rule
= insert_rule(cls
, subtable
, rule
);
719 rule
->cls_match
->partition
= NULL
;
720 if (minimask_get_metadata_mask(&rule
->match
.mask
) == OVS_BE64_MAX
) {
721 ovs_be64 metadata
= miniflow_get_metadata(&rule
->match
.flow
);
722 rule
->cls_match
->partition
= create_partition(cls
, subtable
,
728 for (int i
= 0; i
< cls
->n_tries
; i
++) {
729 if (subtable
->trie_plen
[i
]) {
730 trie_insert(&cls
->tries
[i
], rule
, subtable
->trie_plen
[i
]);
735 if (subtable
->ports_mask_len
) {
736 /* We mask the value to be inserted to always have the wildcarded
737 * bits in known (zero) state, so we can include them in comparison
738 * and they will always match (== their original value does not
740 ovs_be32 masked_ports
= minimatch_get_ports(&rule
->match
);
742 trie_insert_prefix(&subtable
->ports_trie
, &masked_ports
,
743 subtable
->ports_mask_len
);
746 old_cls_rule
= old_rule
->cls_rule
;
747 rule
->cls_match
->partition
= old_rule
->partition
;
748 old_cls_rule
->cls_match
= NULL
;
750 /* 'old_rule' contains a cmap_node, which may not be freed
752 ovsrcu_postpone(free
, old_rule
);
754 ovs_mutex_unlock(&cls
->mutex
);
758 /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller
759 * must not modify or free it.
761 * 'cls' must not contain an identical rule (including wildcards, values of
762 * fixed fields, and priority). Use classifier_find_rule_exactly() to find
765 classifier_insert(struct classifier
*cls
, struct cls_rule
*rule
)
767 struct cls_rule
*displaced_rule
= classifier_replace(cls
, rule
);
768 ovs_assert(!displaced_rule
);
771 /* Removes 'rule' from 'cls'. It is the caller's responsibility to destroy
772 * 'rule' with cls_rule_destroy(), freeing the memory block in which 'rule'
773 * resides, etc., as necessary. */
775 classifier_remove(struct classifier
*cls
, struct cls_rule
*rule
)
776 OVS_EXCLUDED(cls
->mutex
)
778 struct cls_partition
*partition
;
779 struct cls_match
*cls_match
= rule
->cls_match
;
780 struct cls_match
*head
;
781 struct cls_subtable
*subtable
;
783 uint32_t basis
= 0, hash
, ihash
[CLS_MAX_INDICES
];
784 uint8_t prev_be32ofs
= 0;
786 ovs_assert(cls_match
);
788 ovs_mutex_lock(&cls
->mutex
);
789 subtable
= find_subtable(cls
, &rule
->match
.mask
);
790 ovs_assert(subtable
);
792 if (subtable
->ports_mask_len
) {
793 ovs_be32 masked_ports
= minimatch_get_ports(&rule
->match
);
795 trie_remove_prefix(&subtable
->ports_trie
,
796 &masked_ports
, subtable
->ports_mask_len
);
798 for (i
= 0; i
< cls
->n_tries
; i
++) {
799 if (subtable
->trie_plen
[i
]) {
800 trie_remove(&cls
->tries
[i
], rule
, subtable
->trie_plen
[i
]);
804 /* Remove rule node from indices. */
805 for (i
= 0; i
< subtable
->n_indices
; i
++) {
806 ihash
[i
] = minimatch_hash_range(&rule
->match
, prev_be32ofs
,
807 subtable
->index_ofs
[i
], &basis
);
808 cmap_remove(&subtable
->indices
[i
], &cls_match
->index_nodes
[i
],
810 prev_be32ofs
= subtable
->index_ofs
[i
];
812 hash
= minimatch_hash_range(&rule
->match
, prev_be32ofs
, FLOW_U32S
, &basis
);
814 head
= find_equal(subtable
, &rule
->match
.flow
, hash
);
815 if (head
!= cls_match
) {
816 list_remove(&cls_match
->list
);
817 } else if (list_is_empty(&cls_match
->list
)) {
818 cmap_remove(&subtable
->rules
, &cls_match
->cmap_node
, hash
);
820 struct cls_match
*next
= CONTAINER_OF(cls_match
->list
.next
,
821 struct cls_match
, list
);
823 list_remove(&cls_match
->list
);
824 cmap_replace(&subtable
->rules
, &cls_match
->cmap_node
,
825 &next
->cmap_node
, hash
);
828 partition
= cls_match
->partition
;
830 tag_tracker_subtract(&partition
->tracker
, &partition
->tags
,
832 if (!partition
->tags
) {
833 cmap_remove(&cls
->partitions
, &partition
->cmap_node
,
834 hash_metadata(partition
->metadata
));
835 ovsrcu_postpone(free
, partition
);
839 if (--subtable
->n_rules
== 0) {
840 destroy_subtable(cls
, subtable
);
841 } else if (subtable
->max_priority
== cls_match
->priority
842 && --subtable
->max_count
== 0) {
843 /* Find the new 'max_priority' and 'max_count'. */
844 struct cls_match
*head
;
845 unsigned int max_priority
= 0;
847 CMAP_FOR_EACH (head
, cmap_node
, &subtable
->rules
) {
848 if (head
->priority
> max_priority
) {
849 max_priority
= head
->priority
;
850 subtable
->max_count
= 1;
851 } else if (head
->priority
== max_priority
) {
852 ++subtable
->max_count
;
855 subtable
->max_priority
= max_priority
;
856 pvector_change_priority(&cls
->subtables
, subtable
, max_priority
);
861 rule
->cls_match
= NULL
;
862 ovsrcu_postpone(free
, cls_match
);
863 ovs_mutex_unlock(&cls
->mutex
);
866 /* Prefix tree context. Valid when 'lookup_done' is true. Can skip all
867 * subtables which have a prefix match on the trie field, but whose prefix
868 * length is not indicated in 'match_plens'. For example, a subtable that
869 * has a 8-bit trie field prefix match can be skipped if
870 * !be_get_bit_at(&match_plens, 8 - 1). If skipped, 'maskbits' prefix bits
871 * must be unwildcarded to make datapath flow only match packets it should. */
873 const struct cls_trie
*trie
;
874 bool lookup_done
; /* Status of the lookup. */
875 uint8_t be32ofs
; /* U32 offset of the field in question. */
876 unsigned int maskbits
; /* Prefix length needed to avoid false matches. */
877 union mf_value match_plens
; /* Bitmask of prefix lengths with possible
882 trie_ctx_init(struct trie_ctx
*ctx
, const struct cls_trie
*trie
)
885 ctx
->be32ofs
= trie
->field
->flow_be32ofs
;
886 ctx
->lookup_done
= false;
889 /* Finds and returns the highest-priority rule in 'cls' that matches 'flow'.
890 * Returns a null pointer if no rules in 'cls' match 'flow'. If multiple rules
891 * of equal priority match 'flow', returns one arbitrarily.
893 * If a rule is found and 'wc' is non-null, bitwise-OR's 'wc' with the
894 * set of bits that were significant in the lookup. At some point
895 * earlier, 'wc' should have been initialized (e.g., by
896 * flow_wildcards_init_catchall()). */
898 classifier_lookup(const struct classifier
*cls
, const struct flow
*flow
,
899 struct flow_wildcards
*wc
)
901 const struct cls_partition
*partition
;
903 int64_t best_priority
= -1;
904 const struct cls_match
*best
;
905 struct trie_ctx trie_ctx
[CLS_MAX_TRIES
];
906 struct cls_subtable
*subtable
;
908 /* Synchronize for cls->n_tries and subtable->trie_plen. They can change
909 * when table configuration changes, which happens typically only on
911 atomic_thread_fence(memory_order_acquire
);
913 /* Determine 'tags' such that, if 'subtable->tag' doesn't intersect them,
914 * then 'flow' cannot possibly match in 'subtable':
916 * - If flow->metadata maps to a given 'partition', then we can use
917 * 'tags' for 'partition->tags'.
919 * - If flow->metadata has no partition, then no rule in 'cls' has an
920 * exact-match for flow->metadata. That means that we don't need to
921 * search any subtable that includes flow->metadata in its mask.
923 * In either case, we always need to search any cls_subtables that do not
924 * include flow->metadata in its mask. One way to do that would be to
925 * check the "cls_subtable"s explicitly for that, but that would require an
926 * extra branch per subtable. Instead, we mark such a cls_subtable's
927 * 'tags' as TAG_ALL and make sure that 'tags' is never empty. This means
928 * that 'tags' always intersects such a cls_subtable's 'tags', so we don't
929 * need a special case.
931 partition
= (cmap_is_empty(&cls
->partitions
)
933 : find_partition(cls
, flow
->metadata
,
934 hash_metadata(flow
->metadata
)));
935 tags
= partition
? partition
->tags
: TAG_ARBITRARY
;
937 /* Initialize trie contexts for match_find_wc(). */
938 for (int i
= 0; i
< cls
->n_tries
; i
++) {
939 trie_ctx_init(&trie_ctx
[i
], &cls
->tries
[i
]);
943 PVECTOR_FOR_EACH_PRIORITY(subtable
, best_priority
, 2,
944 sizeof(struct cls_subtable
), &cls
->subtables
) {
945 struct cls_match
*rule
;
947 if (!tag_intersects(tags
, subtable
->tag
)) {
951 rule
= find_match_wc(subtable
, flow
, trie_ctx
, cls
->n_tries
, wc
);
952 if (rule
&& (int64_t)rule
->priority
> best_priority
) {
953 best_priority
= (int64_t)rule
->priority
;
958 return best
? best
->cls_rule
: NULL
;
961 /* Returns true if 'target' satisifies 'match', that is, if each bit for which
962 * 'match' specifies a particular value has the correct value in 'target'.
964 * 'flow' and 'mask' have the same mask! */
966 miniflow_and_mask_matches_miniflow(const struct miniflow
*flow
,
967 const struct minimask
*mask
,
968 const struct miniflow
*target
)
970 const uint32_t *flowp
= miniflow_get_u32_values(flow
);
971 const uint32_t *maskp
= miniflow_get_u32_values(&mask
->masks
);
974 MINIFLOW_FOR_EACH_IN_MAP(target_u32
, target
, mask
->masks
.map
) {
975 if ((*flowp
++ ^ target_u32
) & *maskp
++) {
983 static inline struct cls_match
*
984 find_match_miniflow(const struct cls_subtable
*subtable
,
985 const struct miniflow
*flow
,
988 struct cls_match
*rule
;
990 CMAP_FOR_EACH_WITH_HASH (rule
, cmap_node
, hash
, &subtable
->rules
) {
991 if (miniflow_and_mask_matches_miniflow(&rule
->flow
, &subtable
->mask
,
1000 /* For each miniflow in 'flows' performs a classifier lookup writing the result
1001 * into the corresponding slot in 'rules'. If a particular entry in 'flows' is
1002 * NULL it is skipped.
1004 * This function is optimized for use in the userspace datapath and therefore
1005 * does not implement a lot of features available in the standard
1006 * classifier_lookup() function. Specifically, it does not implement
1007 * priorities, instead returning any rule which matches the flow. */
1009 classifier_lookup_miniflow_batch(const struct classifier
*cls
,
1010 const struct miniflow
**flows
,
1011 struct cls_rule
**rules
, size_t len
)
1013 struct cls_subtable
*subtable
;
1014 size_t i
, begin
= 0;
1016 memset(rules
, 0, len
* sizeof *rules
);
1017 PVECTOR_FOR_EACH (subtable
, &cls
->subtables
) {
1018 for (i
= begin
; i
< len
; i
++) {
1019 struct cls_match
*match
;
1022 if (OVS_UNLIKELY(rules
[i
] || !flows
[i
])) {
1026 hash
= miniflow_hash_in_minimask(flows
[i
], &subtable
->mask
, 0);
1027 match
= find_match_miniflow(subtable
, flows
[i
], hash
);
1028 if (OVS_UNLIKELY(match
)) {
1029 rules
[i
] = match
->cls_rule
;
1033 while (begin
< len
&& (rules
[begin
] || !flows
[begin
])) {
1042 /* Finds and returns a rule in 'cls' with exactly the same priority and
1043 * matching criteria as 'target'. Returns a null pointer if 'cls' doesn't
1044 * contain an exact match. */
1046 classifier_find_rule_exactly(const struct classifier
*cls
,
1047 const struct cls_rule
*target
)
1048 OVS_EXCLUDED(cls
->mutex
)
1050 struct cls_match
*head
, *rule
;
1051 struct cls_subtable
*subtable
;
1053 ovs_mutex_lock(&cls
->mutex
);
1054 subtable
= find_subtable(cls
, &target
->match
.mask
);
1059 /* Skip if there is no hope. */
1060 if (target
->priority
> subtable
->max_priority
) {
1064 head
= find_equal(subtable
, &target
->match
.flow
,
1065 miniflow_hash_in_minimask(&target
->match
.flow
,
1066 &target
->match
.mask
, 0));
1067 FOR_EACH_RULE_IN_LIST (rule
, head
) {
1068 if (target
->priority
>= rule
->priority
) {
1069 ovs_mutex_unlock(&cls
->mutex
);
1070 return target
->priority
== rule
->priority
? rule
->cls_rule
: NULL
;
1074 ovs_mutex_unlock(&cls
->mutex
);
1078 /* Finds and returns a rule in 'cls' with priority 'priority' and exactly the
1079 * same matching criteria as 'target'. Returns a null pointer if 'cls' doesn't
1080 * contain an exact match. */
1082 classifier_find_match_exactly(const struct classifier
*cls
,
1083 const struct match
*target
,
1084 unsigned int priority
)
1086 struct cls_rule
*retval
;
1089 cls_rule_init(&cr
, target
, priority
);
1090 retval
= classifier_find_rule_exactly(cls
, &cr
);
1091 cls_rule_destroy(&cr
);
1096 /* Checks if 'target' would overlap any other rule in 'cls'. Two rules are
1097 * considered to overlap if both rules have the same priority and a packet
1098 * could match both. */
1100 classifier_rule_overlaps(const struct classifier
*cls
,
1101 const struct cls_rule
*target
)
1102 OVS_EXCLUDED(cls
->mutex
)
1104 struct cls_subtable
*subtable
;
1105 int64_t stop_at_priority
= (int64_t)target
->priority
- 1;
1107 ovs_mutex_lock(&cls
->mutex
);
1108 /* Iterate subtables in the descending max priority order. */
1109 PVECTOR_FOR_EACH_PRIORITY (subtable
, stop_at_priority
, 2,
1110 sizeof(struct cls_subtable
), &cls
->subtables
) {
1111 uint32_t storage
[FLOW_U32S
];
1112 struct minimask mask
;
1113 struct cls_match
*head
;
1115 minimask_combine(&mask
, &target
->match
.mask
, &subtable
->mask
, storage
);
1116 CMAP_FOR_EACH (head
, cmap_node
, &subtable
->rules
) {
1117 struct cls_match
*rule
;
1119 FOR_EACH_RULE_IN_LIST (rule
, head
) {
1120 if (rule
->priority
< target
->priority
) {
1121 break; /* Rules in descending priority order. */
1123 if (rule
->priority
== target
->priority
1124 && miniflow_equal_in_minimask(&target
->match
.flow
,
1125 &rule
->flow
, &mask
)) {
1126 ovs_mutex_unlock(&cls
->mutex
);
1133 ovs_mutex_unlock(&cls
->mutex
);
1137 /* Returns true if 'rule' exactly matches 'criteria' or if 'rule' is more
1138 * specific than 'criteria'. That is, 'rule' matches 'criteria' and this
1139 * function returns true if, for every field:
1141 * - 'criteria' and 'rule' specify the same (non-wildcarded) value for the
1144 * - 'criteria' wildcards the field,
1146 * Conversely, 'rule' does not match 'criteria' and this function returns false
1147 * if, for at least one field:
1149 * - 'criteria' and 'rule' specify different values for the field, or
1151 * - 'criteria' specifies a value for the field but 'rule' wildcards it.
1153 * Equivalently, the truth table for whether a field matches is:
1158 * r +---------+---------+
1159 * i wild | yes | yes |
1161 * e +---------+---------+
1162 * r exact | no |if values|
1164 * a +---------+---------+
1166 * This is the matching rule used by OpenFlow 1.0 non-strict OFPT_FLOW_MOD
1167 * commands and by OpenFlow 1.0 aggregate and flow stats.
1169 * Ignores rule->priority. */
1171 cls_rule_is_loose_match(const struct cls_rule
*rule
,
1172 const struct minimatch
*criteria
)
1174 return (!minimask_has_extra(&rule
->match
.mask
, &criteria
->mask
)
1175 && miniflow_equal_in_minimask(&rule
->match
.flow
, &criteria
->flow
,
1182 rule_matches(const struct cls_match
*rule
, const struct cls_rule
*target
)
1185 || miniflow_equal_in_minimask(&rule
->flow
,
1186 &target
->match
.flow
,
1187 &target
->match
.mask
));
1190 static struct cls_match
*
1191 search_subtable(const struct cls_subtable
*subtable
,
1192 struct cls_cursor
*cursor
)
1195 || !minimask_has_extra(&subtable
->mask
, &cursor
->target
->match
.mask
)) {
1196 struct cls_match
*rule
;
1198 CMAP_CURSOR_FOR_EACH (rule
, cmap_node
, &cursor
->rules
,
1200 if (rule_matches(rule
, cursor
->target
)) {
1208 /* Initializes 'cursor' for iterating through rules in 'cls', and returns the
1209 * first matching cls_rule via '*pnode', or NULL if there are no matches.
1211 * - If 'target' is null, the cursor will visit every rule in 'cls'.
1213 * - If 'target' is nonnull, the cursor will visit each 'rule' in 'cls'
1214 * such that cls_rule_is_loose_match(rule, target) returns true.
1216 * Ignores target->priority. */
1217 struct cls_cursor
cls_cursor_start(const struct classifier
*cls
,
1218 const struct cls_rule
*target
,
1220 OVS_NO_THREAD_SAFETY_ANALYSIS
1222 struct cls_cursor cursor
;
1223 struct cls_subtable
*subtable
;
1227 cursor
.target
= target
&& !cls_rule_is_catchall(target
) ? target
: NULL
;
1230 /* Find first rule. */
1231 ovs_mutex_lock(&cursor
.cls
->mutex
);
1232 CMAP_CURSOR_FOR_EACH (subtable
, cmap_node
, &cursor
.subtables
,
1233 &cursor
.cls
->subtables_map
) {
1234 struct cls_match
*rule
= search_subtable(subtable
, &cursor
);
1237 cursor
.subtable
= subtable
;
1238 cursor
.rule
= rule
->cls_rule
;
1243 /* Leave locked if requested and have a rule. */
1244 if (safe
|| !cursor
.rule
) {
1245 ovs_mutex_unlock(&cursor
.cls
->mutex
);
1250 static struct cls_rule
*
1251 cls_cursor_next(struct cls_cursor
*cursor
)
1252 OVS_NO_THREAD_SAFETY_ANALYSIS
1254 struct cls_match
*rule
= cursor
->rule
->cls_match
;
1255 const struct cls_subtable
*subtable
;
1256 struct cls_match
*next
;
1258 next
= next_rule_in_list__(rule
);
1259 if (next
->priority
< rule
->priority
) {
1260 return next
->cls_rule
;
1263 /* 'next' is the head of the list, that is, the rule that is included in
1264 * the subtable's map. (This is important when the classifier contains
1265 * rules that differ only in priority.) */
1267 CMAP_CURSOR_FOR_EACH_CONTINUE (rule
, cmap_node
, &cursor
->rules
) {
1268 if (rule_matches(rule
, cursor
->target
)) {
1269 return rule
->cls_rule
;
1273 subtable
= cursor
->subtable
;
1274 CMAP_CURSOR_FOR_EACH_CONTINUE (subtable
, cmap_node
, &cursor
->subtables
) {
1275 rule
= search_subtable(subtable
, cursor
);
1277 cursor
->subtable
= subtable
;
1278 return rule
->cls_rule
;
1285 /* Sets 'cursor->rule' to the next matching cls_rule in 'cursor''s iteration,
1286 * or to null if all matching rules have been visited. */
1288 cls_cursor_advance(struct cls_cursor
*cursor
)
1289 OVS_NO_THREAD_SAFETY_ANALYSIS
1292 ovs_mutex_lock(&cursor
->cls
->mutex
);
1294 cursor
->rule
= cls_cursor_next(cursor
);
1295 if (cursor
->safe
|| !cursor
->rule
) {
1296 ovs_mutex_unlock(&cursor
->cls
->mutex
);
1300 static struct cls_subtable
*
1301 find_subtable(const struct classifier
*cls
, const struct minimask
*mask
)
1302 OVS_REQUIRES(cls
->mutex
)
1304 struct cls_subtable
*subtable
;
1306 CMAP_FOR_EACH_WITH_HASH (subtable
, cmap_node
, minimask_hash(mask
, 0),
1307 &cls
->subtables_map
) {
1308 if (minimask_equal(mask
, &subtable
->mask
)) {
1315 /* The new subtable will be visible to the readers only after this. */
1316 static struct cls_subtable
*
1317 insert_subtable(struct classifier
*cls
, const struct minimask
*mask
)
1318 OVS_REQUIRES(cls
->mutex
)
1320 uint32_t hash
= minimask_hash(mask
, 0);
1321 struct cls_subtable
*subtable
;
1323 struct flow_wildcards old
, new;
1325 int count
= count_1bits(mask
->masks
.map
);
1327 subtable
= xzalloc(sizeof *subtable
- sizeof mask
->masks
.inline_values
1328 + MINIFLOW_VALUES_SIZE(count
));
1329 cmap_init(&subtable
->rules
);
1330 miniflow_clone_inline(&subtable
->mask
.masks
, &mask
->masks
, count
);
1332 /* Init indices for segmented lookup, if any. */
1333 flow_wildcards_init_catchall(&new);
1336 for (i
= 0; i
< cls
->n_flow_segments
; i
++) {
1337 flow_wildcards_fold_minimask_range(&new, mask
, prev
,
1338 cls
->flow_segments
[i
]);
1339 /* Add an index if it adds mask bits. */
1340 if (!flow_wildcards_equal(&new, &old
)) {
1341 cmap_init(&subtable
->indices
[index
]);
1342 subtable
->index_ofs
[index
] = cls
->flow_segments
[i
];
1346 prev
= cls
->flow_segments
[i
];
1348 /* Check if the rest of the subtable's mask adds any bits,
1349 * and remove the last index if it doesn't. */
1351 flow_wildcards_fold_minimask_range(&new, mask
, prev
, FLOW_U32S
);
1352 if (flow_wildcards_equal(&new, &old
)) {
1354 subtable
->index_ofs
[index
] = 0;
1355 cmap_destroy(&subtable
->indices
[index
]);
1358 subtable
->n_indices
= index
;
1360 subtable
->tag
= (minimask_get_metadata_mask(mask
) == OVS_BE64_MAX
1361 ? tag_create_deterministic(hash
)
1364 for (i
= 0; i
< cls
->n_tries
; i
++) {
1365 subtable
->trie_plen
[i
] = minimask_get_prefix_len(mask
,
1366 cls
->tries
[i
].field
);
1370 ovsrcu_set_hidden(&subtable
->ports_trie
, NULL
);
1371 subtable
->ports_mask_len
1372 = 32 - ctz32(ntohl(MINIFLOW_GET_BE32(&mask
->masks
, tp_src
)));
1374 cmap_insert(&cls
->subtables_map
, &subtable
->cmap_node
, hash
);
1380 destroy_subtable(struct classifier
*cls
, struct cls_subtable
*subtable
)
1381 OVS_REQUIRES(cls
->mutex
)
1385 pvector_remove(&cls
->subtables
, subtable
);
1386 trie_destroy(&subtable
->ports_trie
);
1388 for (i
= 0; i
< subtable
->n_indices
; i
++) {
1389 cmap_destroy(&subtable
->indices
[i
]);
1391 cmap_remove(&cls
->subtables_map
, &subtable
->cmap_node
,
1392 minimask_hash(&subtable
->mask
, 0));
1393 minimask_destroy(&subtable
->mask
);
1394 cmap_destroy(&subtable
->rules
);
1395 ovsrcu_postpone(free
, subtable
);
1403 static unsigned int be_get_bit_at(const ovs_be32 value
[], unsigned int ofs
);
1405 /* Return 'true' if can skip rest of the subtable based on the prefix trie
1406 * lookup results. */
1408 check_tries(struct trie_ctx trie_ctx
[CLS_MAX_TRIES
], unsigned int n_tries
,
1409 const unsigned int field_plen
[CLS_MAX_TRIES
],
1410 const struct range ofs
, const struct flow
*flow
,
1411 struct flow_wildcards
*wc
)
1415 /* Check if we could avoid fully unwildcarding the next level of
1416 * fields using the prefix tries. The trie checks are done only as
1417 * needed to avoid folding in additional bits to the wildcards mask. */
1418 for (j
= 0; j
< n_tries
; j
++) {
1419 /* Is the trie field relevant for this subtable? */
1420 if (field_plen
[j
]) {
1421 struct trie_ctx
*ctx
= &trie_ctx
[j
];
1422 uint8_t be32ofs
= ctx
->be32ofs
;
1424 /* Is the trie field within the current range of fields? */
1425 if (be32ofs
>= ofs
.start
&& be32ofs
< ofs
.end
) {
1426 /* On-demand trie lookup. */
1427 if (!ctx
->lookup_done
) {
1428 memset(&ctx
->match_plens
, 0, sizeof ctx
->match_plens
);
1429 ctx
->maskbits
= trie_lookup(ctx
->trie
, flow
,
1431 ctx
->lookup_done
= true;
1433 /* Possible to skip the rest of the subtable if subtable's
1434 * prefix on the field is not included in the lookup result. */
1435 if (!be_get_bit_at(&ctx
->match_plens
.be32
, field_plen
[j
] - 1)) {
1436 /* We want the trie lookup to never result in unwildcarding
1437 * any bits that would not be unwildcarded otherwise.
1438 * Since the trie is shared by the whole classifier, it is
1439 * possible that the 'maskbits' contain bits that are
1440 * irrelevant for the partition relevant for the current
1441 * packet. Hence the checks below. */
1443 /* Check that the trie result will not unwildcard more bits
1444 * than this subtable would otherwise. */
1445 if (ctx
->maskbits
<= field_plen
[j
]) {
1446 /* Unwildcard the bits and skip the rest. */
1447 mask_set_prefix_bits(wc
, be32ofs
, ctx
->maskbits
);
1448 /* Note: Prerequisite already unwildcarded, as the only
1449 * prerequisite of the supported trie lookup fields is
1450 * the ethertype, which is always unwildcarded. */
1453 /* Can skip if the field is already unwildcarded. */
1454 if (mask_prefix_bits_set(wc
, be32ofs
, ctx
->maskbits
)) {
1464 /* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit
1465 * for which 'flow', for which 'mask' has a bit set, specifies a particular
1466 * value has the correct value in 'target'.
1468 * This function is equivalent to miniflow_equal_flow_in_minimask(flow,
1469 * target, mask) but this is faster because of the invariant that
1470 * flow->map and mask->masks.map are the same, and that this version
1471 * takes the 'wc'. */
1473 miniflow_and_mask_matches_flow(const struct miniflow
*flow
,
1474 const struct minimask
*mask
,
1475 const struct flow
*target
)
1477 const uint32_t *flowp
= miniflow_get_u32_values(flow
);
1478 const uint32_t *maskp
= miniflow_get_u32_values(&mask
->masks
);
1481 MAP_FOR_EACH_INDEX(idx
, mask
->masks
.map
) {
1482 uint32_t diff
= (*flowp
++ ^ flow_u32_value(target
, idx
)) & *maskp
++;
1492 static inline struct cls_match
*
1493 find_match(const struct cls_subtable
*subtable
, const struct flow
*flow
,
1496 struct cls_match
*rule
;
1498 CMAP_FOR_EACH_WITH_HASH (rule
, cmap_node
, hash
, &subtable
->rules
) {
1499 if (miniflow_and_mask_matches_flow(&rule
->flow
, &subtable
->mask
,
1508 /* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit
1509 * for which 'flow', for which 'mask' has a bit set, specifies a particular
1510 * value has the correct value in 'target'.
1512 * This function is equivalent to miniflow_and_mask_matches_flow() but this
1513 * version fills in the mask bits in 'wc'. */
1515 miniflow_and_mask_matches_flow_wc(const struct miniflow
*flow
,
1516 const struct minimask
*mask
,
1517 const struct flow
*target
,
1518 struct flow_wildcards
*wc
)
1520 const uint32_t *flowp
= miniflow_get_u32_values(flow
);
1521 const uint32_t *maskp
= miniflow_get_u32_values(&mask
->masks
);
1524 MAP_FOR_EACH_INDEX(idx
, mask
->masks
.map
) {
1525 uint32_t mask
= *maskp
++;
1526 uint32_t diff
= (*flowp
++ ^ flow_u32_value(target
, idx
)) & mask
;
1529 /* Only unwildcard if none of the differing bits is already
1531 if (!(flow_u32_value(&wc
->masks
, idx
) & diff
)) {
1532 /* Keep one bit of the difference. */
1533 *flow_u32_lvalue(&wc
->masks
, idx
) |= rightmost_1bit(diff
);
1537 /* Fill in the bits that were looked at. */
1538 *flow_u32_lvalue(&wc
->masks
, idx
) |= mask
;
1544 /* Unwildcard the fields looked up so far, if any. */
1546 fill_range_wc(const struct cls_subtable
*subtable
, struct flow_wildcards
*wc
,
1550 flow_wildcards_fold_minimask_range(wc
, &subtable
->mask
, 0, to
);
1554 static struct cls_match
*
1555 find_match_wc(const struct cls_subtable
*subtable
, const struct flow
*flow
,
1556 struct trie_ctx trie_ctx
[CLS_MAX_TRIES
], unsigned int n_tries
,
1557 struct flow_wildcards
*wc
)
1559 uint32_t basis
= 0, hash
;
1560 struct cls_match
*rule
;
1564 if (OVS_UNLIKELY(!wc
)) {
1565 return find_match(subtable
, flow
,
1566 flow_hash_in_minimask(flow
, &subtable
->mask
, 0));
1570 /* Try to finish early by checking fields in segments. */
1571 for (i
= 0; i
< subtable
->n_indices
; i
++) {
1572 struct cmap_node
*inode
;
1574 ofs
.end
= subtable
->index_ofs
[i
];
1576 if (check_tries(trie_ctx
, n_tries
, subtable
->trie_plen
, ofs
, flow
,
1578 /* 'wc' bits for the trie field set, now unwildcard the preceding
1579 * bits used so far. */
1580 fill_range_wc(subtable
, wc
, ofs
.start
);
1583 hash
= flow_hash_in_minimask_range(flow
, &subtable
->mask
, ofs
.start
,
1585 inode
= cmap_find(&subtable
->indices
[i
], hash
);
1587 /* No match, can stop immediately, but must fold in the bits
1588 * used in lookup so far. */
1589 fill_range_wc(subtable
, wc
, ofs
.end
);
1593 /* If we have narrowed down to a single rule already, check whether
1594 * that rule matches. Either way, we're done.
1596 * (Rare) hash collisions may cause us to miss the opportunity for this
1598 if (!cmap_node_next(inode
)) {
1599 ASSIGN_CONTAINER(rule
, inode
- i
, index_nodes
);
1600 if (miniflow_and_mask_matches_flow_wc(&rule
->flow
, &subtable
->mask
,
1606 ofs
.start
= ofs
.end
;
1608 ofs
.end
= FLOW_U32S
;
1609 /* Trie check for the final range. */
1610 if (check_tries(trie_ctx
, n_tries
, subtable
->trie_plen
, ofs
, flow
, wc
)) {
1611 fill_range_wc(subtable
, wc
, ofs
.start
);
1614 hash
= flow_hash_in_minimask_range(flow
, &subtable
->mask
, ofs
.start
,
1616 rule
= find_match(subtable
, flow
, hash
);
1617 if (!rule
&& subtable
->ports_mask_len
) {
1618 /* Ports are always part of the final range, if any.
1619 * No match was found for the ports. Use the ports trie to figure out
1620 * which ports bits to unwildcard. */
1622 ovs_be32 value
, plens
, mask
;
1624 mask
= MINIFLOW_GET_BE32(&subtable
->mask
.masks
, tp_src
);
1625 value
= ((OVS_FORCE ovs_be32
*)flow
)[TP_PORTS_OFS32
] & mask
;
1626 mbits
= trie_lookup_value(&subtable
->ports_trie
, &value
, &plens
, 32);
1628 ((OVS_FORCE ovs_be32
*)&wc
->masks
)[TP_PORTS_OFS32
] |=
1629 mask
& htonl(~0 << (32 - mbits
));
1631 /* Unwildcard all bits in the mask upto the ports, as they were used
1632 * to determine there is no match. */
1633 fill_range_wc(subtable
, wc
, TP_PORTS_OFS32
);
1637 /* Must unwildcard all the fields, as they were looked at. */
1638 flow_wildcards_fold_minimask(wc
, &subtable
->mask
);
1642 static struct cls_match
*
1643 find_equal(struct cls_subtable
*subtable
, const struct miniflow
*flow
,
1646 struct cls_match
*head
;
1648 CMAP_FOR_EACH_WITH_HASH (head
, cmap_node
, hash
, &subtable
->rules
) {
1649 if (miniflow_equal(&head
->flow
, flow
)) {
1657 * As the readers are operating concurrently with the modifications, a
1658 * concurrent reader may or may not see the new rule, depending on how
1659 * the concurrent events overlap with each other. This is no
1660 * different from the former locked behavior, but there the visibility
1661 * of the new rule only depended on the timing of the locking
1664 * The new rule is first added to the segment indices, so the readers
1665 * may find the rule in the indices before the rule is visible in the
1666 * subtables 'rules' map. This may result in us losing the
1667 * opportunity to quit lookups earlier, resulting in sub-optimal
1668 * wildcarding. This will be fixed by forthcoming revalidation always
1669 * scheduled after flow table changes.
1671 * Similar behavior may happen due to us removing the overlapping rule
1672 * (if any) from the indices only after the new rule has been added.
1674 * The subtable's max priority is updated only after the rule is
1675 * inserted, so the concurrent readers may not see the rule, as the
1676 * updated priority ordered subtable list will only be visible after
1677 * the subtable's max priority is updated.
1679 * Similarly, the classifier's partitions for new rules are updated by
1680 * the caller after this function, so the readers may keep skipping
1681 * the subtable until they see the updated partitions.
1683 static struct cls_match
*
1684 insert_rule(struct classifier
*cls
, struct cls_subtable
*subtable
,
1685 struct cls_rule
*new_rule
)
1686 OVS_REQUIRES(cls
->mutex
)
1688 struct cls_match
*old
= NULL
;
1689 struct cls_match
*new = cls_match_alloc(new_rule
);
1690 struct cls_match
*head
;
1692 uint32_t basis
= 0, hash
, ihash
[CLS_MAX_INDICES
];
1693 uint8_t prev_be32ofs
= 0;
1695 /* Add new node to segment indices. */
1696 for (i
= 0; i
< subtable
->n_indices
; i
++) {
1697 ihash
[i
] = minimatch_hash_range(&new_rule
->match
, prev_be32ofs
,
1698 subtable
->index_ofs
[i
], &basis
);
1699 cmap_insert(&subtable
->indices
[i
], &new->index_nodes
[i
], ihash
[i
]);
1700 prev_be32ofs
= subtable
->index_ofs
[i
];
1702 hash
= minimatch_hash_range(&new_rule
->match
, prev_be32ofs
, FLOW_U32S
,
1704 head
= find_equal(subtable
, &new_rule
->match
.flow
, hash
);
1706 cmap_insert(&subtable
->rules
, &new->cmap_node
, hash
);
1707 list_init(&new->list
);
1710 /* Scan the list for the insertion point that will keep the list in
1711 * order of decreasing priority. */
1712 struct cls_match
*rule
;
1714 FOR_EACH_RULE_IN_LIST (rule
, head
) {
1715 if (new->priority
>= rule
->priority
) {
1717 /* 'new' is the new highest-priority flow in the list. */
1718 cmap_replace(&subtable
->rules
, &rule
->cmap_node
,
1719 &new->cmap_node
, hash
);
1722 if (new->priority
== rule
->priority
) {
1723 list_replace(&new->list
, &rule
->list
);
1726 list_insert(&rule
->list
, &new->list
);
1732 /* Insert 'new' at the end of the list. */
1733 list_push_back(&head
->list
, &new->list
);
1738 subtable
->n_rules
++;
1740 /* Rule was added, not replaced. Update 'subtable's 'max_priority'
1741 * and 'max_count', if necessary. */
1742 if (subtable
->n_rules
== 1) {
1743 subtable
->max_priority
= new->priority
;
1744 subtable
->max_count
= 1;
1745 pvector_insert(&cls
->subtables
, subtable
, new->priority
);
1746 } else if (subtable
->max_priority
== new->priority
) {
1747 ++subtable
->max_count
;
1748 } else if (new->priority
> subtable
->max_priority
) {
1749 subtable
->max_priority
= new->priority
;
1750 subtable
->max_count
= 1;
1751 pvector_change_priority(&cls
->subtables
, subtable
, new->priority
);
1754 /* Remove old node from indices. */
1755 for (i
= 0; i
< subtable
->n_indices
; i
++) {
1756 cmap_remove(&subtable
->indices
[i
], &old
->index_nodes
[i
], ihash
[i
]);
1762 static struct cls_match
*
1763 next_rule_in_list__(struct cls_match
*rule
)
1764 OVS_NO_THREAD_SAFETY_ANALYSIS
1766 struct cls_match
*next
= OBJECT_CONTAINING(rule
->list
.next
, next
, list
);
1770 static struct cls_match
*
1771 next_rule_in_list(struct cls_match
*rule
)
1773 struct cls_match
*next
= next_rule_in_list__(rule
);
1774 return next
->priority
< rule
->priority
? next
: NULL
;
1777 /* A longest-prefix match tree. */
1779 uint32_t prefix
; /* Prefix bits for this node, MSB first. */
1780 uint8_t n_bits
; /* Never zero, except for the root node. */
1781 unsigned int n_rules
; /* Number of rules that have this prefix. */
1782 rcu_trie_ptr edges
[2]; /* Both NULL if leaf. */
1785 /* Max bits per node. Must fit in struct trie_node's 'prefix'.
1786 * Also tested with 16, 8, and 5 to stress the implementation. */
1787 #define TRIE_PREFIX_BITS 32
1789 /* Return at least 'plen' bits of the 'prefix', starting at bit offset 'ofs'.
1790 * Prefixes are in the network byte order, and the offset 0 corresponds to
1791 * the most significant bit of the first byte. The offset can be read as
1792 * "how many bits to skip from the start of the prefix starting at 'pr'". */
1794 raw_get_prefix(const ovs_be32 pr
[], unsigned int ofs
, unsigned int plen
)
1798 pr
+= ofs
/ 32; /* Where to start. */
1799 ofs
%= 32; /* How many bits to skip at 'pr'. */
1801 prefix
= ntohl(*pr
) << ofs
; /* Get the first 32 - ofs bits. */
1802 if (plen
> 32 - ofs
) { /* Need more than we have already? */
1803 prefix
|= ntohl(*++pr
) >> (32 - ofs
);
1805 /* Return with possible unwanted bits at the end. */
1809 /* Return min(TRIE_PREFIX_BITS, plen) bits of the 'prefix', starting at bit
1810 * offset 'ofs'. Prefixes are in the network byte order, and the offset 0
1811 * corresponds to the most significant bit of the first byte. The offset can
1812 * be read as "how many bits to skip from the start of the prefix starting at
1815 trie_get_prefix(const ovs_be32 pr
[], unsigned int ofs
, unsigned int plen
)
1820 if (plen
> TRIE_PREFIX_BITS
) {
1821 plen
= TRIE_PREFIX_BITS
; /* Get at most TRIE_PREFIX_BITS. */
1823 /* Return with unwanted bits cleared. */
1824 return raw_get_prefix(pr
, ofs
, plen
) & ~0u << (32 - plen
);
1827 /* Return the number of equal bits in 'n_bits' of 'prefix's MSBs and a 'value'
1828 * starting at "MSB 0"-based offset 'ofs'. */
1830 prefix_equal_bits(uint32_t prefix
, unsigned int n_bits
, const ovs_be32 value
[],
1833 uint64_t diff
= prefix
^ raw_get_prefix(value
, ofs
, n_bits
);
1834 /* Set the bit after the relevant bits to limit the result. */
1835 return raw_clz64(diff
<< 32 | UINT64_C(1) << (63 - n_bits
));
1838 /* Return the number of equal bits in 'node' prefix and a 'prefix' of length
1839 * 'plen', starting at "MSB 0"-based offset 'ofs'. */
1841 trie_prefix_equal_bits(const struct trie_node
*node
, const ovs_be32 prefix
[],
1842 unsigned int ofs
, unsigned int plen
)
1844 return prefix_equal_bits(node
->prefix
, MIN(node
->n_bits
, plen
- ofs
),
1848 /* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' can
1849 * be greater than 31. */
1851 be_get_bit_at(const ovs_be32 value
[], unsigned int ofs
)
1853 return (((const uint8_t *)value
)[ofs
/ 8] >> (7 - ofs
% 8)) & 1u;
1856 /* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' must
1857 * be between 0 and 31, inclusive. */
1859 get_bit_at(const uint32_t prefix
, unsigned int ofs
)
1861 return (prefix
>> (31 - ofs
)) & 1u;
1864 /* Create new branch. */
1865 static struct trie_node
*
1866 trie_branch_create(const ovs_be32
*prefix
, unsigned int ofs
, unsigned int plen
,
1867 unsigned int n_rules
)
1869 struct trie_node
*node
= xmalloc(sizeof *node
);
1871 node
->prefix
= trie_get_prefix(prefix
, ofs
, plen
);
1873 if (plen
<= TRIE_PREFIX_BITS
) {
1874 node
->n_bits
= plen
;
1875 ovsrcu_set_hidden(&node
->edges
[0], NULL
);
1876 ovsrcu_set_hidden(&node
->edges
[1], NULL
);
1877 node
->n_rules
= n_rules
;
1878 } else { /* Need intermediate nodes. */
1879 struct trie_node
*subnode
= trie_branch_create(prefix
,
1880 ofs
+ TRIE_PREFIX_BITS
,
1881 plen
- TRIE_PREFIX_BITS
,
1883 int bit
= get_bit_at(subnode
->prefix
, 0);
1884 node
->n_bits
= TRIE_PREFIX_BITS
;
1885 ovsrcu_set_hidden(&node
->edges
[bit
], subnode
);
1886 ovsrcu_set_hidden(&node
->edges
[!bit
], NULL
);
1893 trie_node_destroy(const struct trie_node
*node
)
1895 ovsrcu_postpone(free
, CONST_CAST(struct trie_node
*, node
));
1898 /* Copy a trie node for modification and postpone delete the old one. */
1899 static struct trie_node
*
1900 trie_node_rcu_realloc(const struct trie_node
*node
)
1902 struct trie_node
*new_node
= xmalloc(sizeof *node
);
1905 trie_node_destroy(node
);
1910 /* May only be called while holding the classifier mutex. */
1912 trie_destroy(rcu_trie_ptr
*trie
)
1914 struct trie_node
*node
= ovsrcu_get_protected(struct trie_node
*, trie
);
1917 ovsrcu_set_hidden(trie
, NULL
);
1918 trie_destroy(&node
->edges
[0]);
1919 trie_destroy(&node
->edges
[1]);
1920 trie_node_destroy(node
);
1925 trie_is_leaf(const struct trie_node
*trie
)
1928 return !ovsrcu_get(struct trie_node
*, &trie
->edges
[0])
1929 && !ovsrcu_get(struct trie_node
*, &trie
->edges
[1]);
1933 mask_set_prefix_bits(struct flow_wildcards
*wc
, uint8_t be32ofs
,
1934 unsigned int n_bits
)
1936 ovs_be32
*mask
= &((ovs_be32
*)&wc
->masks
)[be32ofs
];
1939 for (i
= 0; i
< n_bits
/ 32; i
++) {
1940 mask
[i
] = OVS_BE32_MAX
;
1943 mask
[i
] |= htonl(~0u << (32 - n_bits
% 32));
1948 mask_prefix_bits_set(const struct flow_wildcards
*wc
, uint8_t be32ofs
,
1949 unsigned int n_bits
)
1951 ovs_be32
*mask
= &((ovs_be32
*)&wc
->masks
)[be32ofs
];
1953 ovs_be32 zeroes
= 0;
1955 for (i
= 0; i
< n_bits
/ 32; i
++) {
1959 zeroes
|= ~mask
[i
] & htonl(~0u << (32 - n_bits
% 32));
1962 return !zeroes
; /* All 'n_bits' bits set. */
1965 static rcu_trie_ptr
*
1966 trie_next_edge(struct trie_node
*node
, const ovs_be32 value
[],
1969 return node
->edges
+ be_get_bit_at(value
, ofs
);
1972 static const struct trie_node
*
1973 trie_next_node(const struct trie_node
*node
, const ovs_be32 value
[],
1976 return ovsrcu_get(struct trie_node
*,
1977 &node
->edges
[be_get_bit_at(value
, ofs
)]);
1980 /* Set the bit at ("MSB 0"-based) offset 'ofs'. 'ofs' can be greater than 31.
1983 be_set_bit_at(ovs_be32 value
[], unsigned int ofs
)
1985 ((uint8_t *)value
)[ofs
/ 8] |= 1u << (7 - ofs
% 8);
1988 /* Returns the number of bits in the prefix mask necessary to determine a
1989 * mismatch, in case there are longer prefixes in the tree below the one that
1991 * '*plens' will have a bit set for each prefix length that may have matching
1992 * rules. The caller is responsible for clearing the '*plens' prior to
1996 trie_lookup_value(const rcu_trie_ptr
*trie
, const ovs_be32 value
[],
1997 ovs_be32 plens
[], unsigned int n_bits
)
1999 const struct trie_node
*prev
= NULL
;
2000 const struct trie_node
*node
= ovsrcu_get(struct trie_node
*, trie
);
2001 unsigned int match_len
= 0; /* Number of matching bits. */
2003 for (; node
; prev
= node
, node
= trie_next_node(node
, value
, match_len
)) {
2004 unsigned int eqbits
;
2005 /* Check if this edge can be followed. */
2006 eqbits
= prefix_equal_bits(node
->prefix
, node
->n_bits
, value
,
2008 match_len
+= eqbits
;
2009 if (eqbits
< node
->n_bits
) { /* Mismatch, nothing more to be found. */
2010 /* Bit at offset 'match_len' differed. */
2011 return match_len
+ 1; /* Includes the first mismatching bit. */
2013 /* Full match, check if rules exist at this prefix length. */
2014 if (node
->n_rules
> 0) {
2015 be_set_bit_at(plens
, match_len
- 1);
2017 if (match_len
>= n_bits
) {
2018 return n_bits
; /* Full prefix. */
2021 /* node == NULL. Full match so far, but we tried to follow an
2022 * non-existing branch. Need to exclude the other branch if it exists
2023 * (it does not if we were called on an empty trie or 'prev' is a leaf
2025 return !prev
|| trie_is_leaf(prev
) ? match_len
: match_len
+ 1;
2029 trie_lookup(const struct cls_trie
*trie
, const struct flow
*flow
,
2030 union mf_value
*plens
)
2032 const struct mf_field
*mf
= trie
->field
;
2034 /* Check that current flow matches the prerequisites for the trie
2035 * field. Some match fields are used for multiple purposes, so we
2036 * must check that the trie is relevant for this flow. */
2037 if (mf_are_prereqs_ok(mf
, flow
)) {
2038 return trie_lookup_value(&trie
->root
,
2039 &((ovs_be32
*)flow
)[mf
->flow_be32ofs
],
2040 &plens
->be32
, mf
->n_bits
);
2042 memset(plens
, 0xff, sizeof *plens
); /* All prefixes, no skipping. */
2043 return 0; /* Value not used in this case. */
2046 /* Returns the length of a prefix match mask for the field 'mf' in 'minimask'.
2047 * Returns the u32 offset to the miniflow data in '*miniflow_index', if
2048 * 'miniflow_index' is not NULL. */
2050 minimask_get_prefix_len(const struct minimask
*minimask
,
2051 const struct mf_field
*mf
)
2053 unsigned int n_bits
= 0, mask_tz
= 0; /* Non-zero when end of mask seen. */
2054 uint8_t u32_ofs
= mf
->flow_be32ofs
;
2055 uint8_t u32_end
= u32_ofs
+ mf
->n_bytes
/ 4;
2057 for (; u32_ofs
< u32_end
; ++u32_ofs
) {
2059 mask
= ntohl((OVS_FORCE ovs_be32
)minimask_get(minimask
, u32_ofs
));
2061 /* Validate mask, count the mask length. */
2064 return 0; /* No bits allowed after mask ended. */
2067 if (~mask
& (~mask
+ 1)) {
2068 return 0; /* Mask not contiguous. */
2070 mask_tz
= ctz32(mask
);
2071 n_bits
+= 32 - mask_tz
;
2079 * This is called only when mask prefix is known to be CIDR and non-zero.
2080 * Relies on the fact that the flow and mask have the same map, and since
2081 * the mask is CIDR, the storage for the flow field exists even if it
2082 * happened to be zeros.
2084 static const ovs_be32
*
2085 minimatch_get_prefix(const struct minimatch
*match
, const struct mf_field
*mf
)
2087 return miniflow_get_be32_values(&match
->flow
) +
2088 count_1bits(match
->flow
.map
& ((UINT64_C(1) << mf
->flow_be32ofs
) - 1));
2091 /* Insert rule in to the prefix tree.
2092 * 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
2095 trie_insert(struct cls_trie
*trie
, const struct cls_rule
*rule
, int mlen
)
2097 trie_insert_prefix(&trie
->root
,
2098 minimatch_get_prefix(&rule
->match
, trie
->field
), mlen
);
2102 trie_insert_prefix(rcu_trie_ptr
*edge
, const ovs_be32
*prefix
, int mlen
)
2104 struct trie_node
*node
;
2107 /* Walk the tree. */
2108 for (; (node
= ovsrcu_get_protected(struct trie_node
*, edge
));
2109 edge
= trie_next_edge(node
, prefix
, ofs
)) {
2110 unsigned int eqbits
= trie_prefix_equal_bits(node
, prefix
, ofs
, mlen
);
2112 if (eqbits
< node
->n_bits
) {
2113 /* Mismatch, new node needs to be inserted above. */
2114 int old_branch
= get_bit_at(node
->prefix
, eqbits
);
2115 struct trie_node
*new_parent
;
2117 new_parent
= trie_branch_create(prefix
, ofs
- eqbits
, eqbits
,
2118 ofs
== mlen
? 1 : 0);
2119 /* Copy the node to modify it. */
2120 node
= trie_node_rcu_realloc(node
);
2121 /* Adjust the new node for its new position in the tree. */
2122 node
->prefix
<<= eqbits
;
2123 node
->n_bits
-= eqbits
;
2124 ovsrcu_set_hidden(&new_parent
->edges
[old_branch
], node
);
2126 /* Check if need a new branch for the new rule. */
2128 ovsrcu_set_hidden(&new_parent
->edges
[!old_branch
],
2129 trie_branch_create(prefix
, ofs
, mlen
- ofs
,
2132 ovsrcu_set(edge
, new_parent
); /* Publish changes. */
2135 /* Full match so far. */
2138 /* Full match at the current node, rule needs to be added here. */
2143 /* Must insert a new tree branch for the new rule. */
2144 ovsrcu_set(edge
, trie_branch_create(prefix
, ofs
, mlen
- ofs
, 1));
2147 /* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
2150 trie_remove(struct cls_trie
*trie
, const struct cls_rule
*rule
, int mlen
)
2152 trie_remove_prefix(&trie
->root
,
2153 minimatch_get_prefix(&rule
->match
, trie
->field
), mlen
);
2156 /* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
2159 trie_remove_prefix(rcu_trie_ptr
*root
, const ovs_be32
*prefix
, int mlen
)
2161 struct trie_node
*node
;
2162 rcu_trie_ptr
*edges
[sizeof(union mf_value
) * 8];
2163 int depth
= 0, ofs
= 0;
2165 /* Walk the tree. */
2166 for (edges
[0] = root
;
2167 (node
= ovsrcu_get_protected(struct trie_node
*, edges
[depth
]));
2168 edges
[++depth
] = trie_next_edge(node
, prefix
, ofs
)) {
2169 unsigned int eqbits
= trie_prefix_equal_bits(node
, prefix
, ofs
, mlen
);
2171 if (eqbits
< node
->n_bits
) {
2172 /* Mismatch, nothing to be removed. This should never happen, as
2173 * only rules in the classifier are ever removed. */
2174 break; /* Log a warning. */
2176 /* Full match so far. */
2180 /* Full prefix match at the current node, remove rule here. */
2181 if (!node
->n_rules
) {
2182 break; /* Log a warning. */
2186 /* Check if can prune the tree. */
2187 while (!node
->n_rules
) {
2188 struct trie_node
*next
,
2189 *edge0
= ovsrcu_get_protected(struct trie_node
*,
2191 *edge1
= ovsrcu_get_protected(struct trie_node
*,
2194 if (edge0
&& edge1
) {
2195 break; /* A branching point, cannot prune. */
2198 /* Else have at most one child node, remove this node. */
2199 next
= edge0
? edge0
: edge1
;
2202 if (node
->n_bits
+ next
->n_bits
> TRIE_PREFIX_BITS
) {
2203 break; /* Cannot combine. */
2205 next
= trie_node_rcu_realloc(next
); /* Modify. */
2207 /* Combine node with next. */
2208 next
->prefix
= node
->prefix
| next
->prefix
>> node
->n_bits
;
2209 next
->n_bits
+= node
->n_bits
;
2211 /* Update the parent's edge. */
2212 ovsrcu_set(edges
[depth
], next
); /* Publish changes. */
2213 trie_node_destroy(node
);
2215 if (next
|| !depth
) {
2216 /* Branch not pruned or at root, nothing more to do. */
2219 node
= ovsrcu_get_protected(struct trie_node
*,
2225 /* Cannot go deeper. This should never happen, since only rules
2226 * that actually exist in the classifier are ever removed. */
2227 VLOG_WARN("Trying to remove non-existing rule from a prefix trie.");