2 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include "classifier.h"
20 #include <netinet/in.h>
21 #include "byte-order.h"
22 #include "dynamic-string.h"
34 VLOG_DEFINE_THIS_MODULE(classifier
);
38 /* Ports trie depends on both ports sharing the same ovs_be32. */
39 #define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4)
40 BUILD_ASSERT_DECL(TP_PORTS_OFS32
== offsetof(struct flow
, tp_dst
) / 4);
42 /* A set of rules that all have the same fields wildcarded. */
44 /* The fields are only used by writers and iterators. */
45 struct cmap_node cmap_node
; /* Within struct classifier 'subtables_map'. */
47 /* The fields are only used by writers. */
48 int n_rules OVS_GUARDED
; /* Number of rules, including
50 unsigned int max_priority OVS_GUARDED
; /* Max priority of any rule in
52 unsigned int max_count OVS_GUARDED
; /* Count of max_priority rules. */
54 /* These fields are accessed by readers who care about wildcarding. */
55 tag_type tag
; /* Tag generated from mask for partitioning (const). */
56 uint8_t n_indices
; /* How many indices to use (const). */
57 uint8_t index_ofs
[CLS_MAX_INDICES
]; /* u32 segment boundaries (const). */
58 unsigned int trie_plen
[CLS_MAX_TRIES
]; /* Trie prefix length in 'mask'
59 * (runtime configurable). */
60 int ports_mask_len
; /* (const) */
61 struct cmap indices
[CLS_MAX_INDICES
]; /* Staged lookup indices. */
62 rcu_trie_ptr ports_trie
; /* NULL if none. */
64 /* These fields are accessed by all readers. */
65 struct cmap rules
; /* Contains "struct cls_rule"s. */
66 struct minimask mask
; /* Wildcards for fields (const). */
67 /* 'mask' must be the last field. */
70 /* Associates a metadata value (that is, a value of the OpenFlow 1.1+ metadata
71 * field) with tags for the "cls_subtable"s that contain rules that match that
73 struct cls_partition
{
74 struct cmap_node cmap_node
; /* In struct classifier's 'partitions' map. */
75 ovs_be64 metadata
; /* metadata value for this partition. */
76 tag_type tags
; /* OR of each flow's cls_subtable tag. */
77 struct tag_tracker tracker OVS_GUARDED
; /* Tracks the bits in 'tags'. */
80 /* Internal representation of a rule in a "struct cls_subtable". */
82 /* Accessed only by writers and iterators. */
83 struct list list OVS_GUARDED
; /* List of identical, lower-priority rules. */
85 /* Accessed only by writers. */
86 struct cls_partition
*partition OVS_GUARDED
;
88 /* Accessed by readers interested in wildcarding. */
89 unsigned int priority
; /* Larger numbers are higher priorities. */
90 struct cmap_node index_nodes
[CLS_MAX_INDICES
]; /* Within subtable's
92 /* Accessed by all readers. */
93 struct cmap_node cmap_node
; /* Within struct cls_subtable 'rules'. */
94 struct cls_rule
*cls_rule
;
95 struct miniflow flow
; /* Matching rule. Mask is in the subtable. */
96 /* 'flow' must be the last field. */
99 static struct cls_match
*
100 cls_match_alloc(struct cls_rule
*rule
)
102 int count
= count_1bits(rule
->match
.flow
.map
);
104 struct cls_match
*cls_match
105 = xmalloc(sizeof *cls_match
- sizeof cls_match
->flow
.inline_values
106 + MINIFLOW_VALUES_SIZE(count
));
108 cls_match
->cls_rule
= rule
;
109 miniflow_clone_inline(&cls_match
->flow
, &rule
->match
.flow
, count
);
110 cls_match
->priority
= rule
->priority
;
111 rule
->cls_match
= cls_match
;
116 static struct cls_subtable
*find_subtable(const struct classifier
*cls
,
117 const struct minimask
*)
118 OVS_REQUIRES(cls
->mutex
);
119 static struct cls_subtable
*insert_subtable(struct classifier
*cls
,
120 const struct minimask
*)
121 OVS_REQUIRES(cls
->mutex
);
122 static void destroy_subtable(struct classifier
*cls
, struct cls_subtable
*)
123 OVS_REQUIRES(cls
->mutex
);
124 static struct cls_match
*insert_rule(struct classifier
*cls
,
125 struct cls_subtable
*, struct cls_rule
*)
126 OVS_REQUIRES(cls
->mutex
);
128 static struct cls_match
*find_match_wc(const struct cls_subtable
*,
129 const struct flow
*, struct trie_ctx
*,
130 unsigned int n_tries
,
131 struct flow_wildcards
*);
132 static struct cls_match
*find_equal(struct cls_subtable
*,
133 const struct miniflow
*, uint32_t hash
);
135 /* Iterates RULE over HEAD and all of the cls_rules on HEAD->list.
136 * Classifier's mutex must be held while iterating, as the list is
137 * protoceted by it. */
138 #define FOR_EACH_RULE_IN_LIST(RULE, HEAD) \
139 for ((RULE) = (HEAD); (RULE) != NULL; (RULE) = next_rule_in_list(RULE))
140 #define FOR_EACH_RULE_IN_LIST_SAFE(RULE, NEXT, HEAD) \
141 for ((RULE) = (HEAD); \
142 (RULE) != NULL && ((NEXT) = next_rule_in_list(RULE), true); \
145 static struct cls_match
*next_rule_in_list__(struct cls_match
*);
146 static struct cls_match
*next_rule_in_list(struct cls_match
*);
148 static unsigned int minimask_get_prefix_len(const struct minimask
*,
149 const struct mf_field
*);
150 static void trie_init(struct classifier
*cls
, int trie_idx
,
151 const struct mf_field
*)
152 OVS_REQUIRES(cls
->mutex
);
153 static unsigned int trie_lookup(const struct cls_trie
*, const struct flow
*,
154 union mf_value
*plens
);
155 static unsigned int trie_lookup_value(const rcu_trie_ptr
*,
156 const ovs_be32 value
[], ovs_be32 plens
[],
157 unsigned int value_bits
);
158 static void trie_destroy(rcu_trie_ptr
*);
159 static void trie_insert(struct cls_trie
*, const struct cls_rule
*, int mlen
);
160 static void trie_insert_prefix(rcu_trie_ptr
*, const ovs_be32
*prefix
,
162 static void trie_remove(struct cls_trie
*, const struct cls_rule
*, int mlen
);
163 static void trie_remove_prefix(rcu_trie_ptr
*, const ovs_be32
*prefix
,
165 static void mask_set_prefix_bits(struct flow_wildcards
*, uint8_t be32ofs
,
166 unsigned int n_bits
);
167 static bool mask_prefix_bits_set(const struct flow_wildcards
*,
168 uint8_t be32ofs
, unsigned int n_bits
);
170 /* flow/miniflow/minimask/minimatch utilities.
171 * These are only used by the classifier, so place them here to allow
172 * for better optimization. */
174 static inline uint64_t
175 miniflow_get_map_in_range(const struct miniflow
*miniflow
,
176 uint8_t start
, uint8_t end
, unsigned int *offset
)
178 uint64_t map
= miniflow
->map
;
182 uint64_t msk
= (UINT64_C(1) << start
) - 1; /* 'start' LSBs set */
183 *offset
= count_1bits(map
& msk
);
186 if (end
< FLOW_U32S
) {
187 uint64_t msk
= (UINT64_C(1) << end
) - 1; /* 'end' LSBs set */
193 /* Returns a hash value for the bits of 'flow' where there are 1-bits in
194 * 'mask', given 'basis'.
196 * The hash values returned by this function are the same as those returned by
197 * miniflow_hash_in_minimask(), only the form of the arguments differ. */
198 static inline uint32_t
199 flow_hash_in_minimask(const struct flow
*flow
, const struct minimask
*mask
,
202 const uint32_t *mask_values
= miniflow_get_u32_values(&mask
->masks
);
203 const uint32_t *flow_u32
= (const uint32_t *)flow
;
204 const uint32_t *p
= mask_values
;
209 for (map
= mask
->masks
.map
; map
; map
= zero_rightmost_1bit(map
)) {
210 hash
= hash_add(hash
, flow_u32
[raw_ctz(map
)] & *p
++);
213 return hash_finish(hash
, (p
- mask_values
) * 4);
216 /* Returns a hash value for the bits of 'flow' where there are 1-bits in
217 * 'mask', given 'basis'.
219 * The hash values returned by this function are the same as those returned by
220 * flow_hash_in_minimask(), only the form of the arguments differ. */
221 static inline uint32_t
222 miniflow_hash_in_minimask(const struct miniflow
*flow
,
223 const struct minimask
*mask
, uint32_t basis
)
225 const uint32_t *mask_values
= miniflow_get_u32_values(&mask
->masks
);
226 const uint32_t *p
= mask_values
;
227 uint32_t hash
= basis
;
230 MINIFLOW_FOR_EACH_IN_MAP(flow_u32
, flow
, mask
->masks
.map
) {
231 hash
= hash_add(hash
, flow_u32
& *p
++);
234 return hash_finish(hash
, (p
- mask_values
) * 4);
237 /* Returns a hash value for the bits of range [start, end) in 'flow',
238 * where there are 1-bits in 'mask', given 'hash'.
240 * The hash values returned by this function are the same as those returned by
241 * minimatch_hash_range(), only the form of the arguments differ. */
242 static inline uint32_t
243 flow_hash_in_minimask_range(const struct flow
*flow
,
244 const struct minimask
*mask
,
245 uint8_t start
, uint8_t end
, uint32_t *basis
)
247 const uint32_t *mask_values
= miniflow_get_u32_values(&mask
->masks
);
248 const uint32_t *flow_u32
= (const uint32_t *)flow
;
250 uint64_t map
= miniflow_get_map_in_range(&mask
->masks
, start
, end
,
252 const uint32_t *p
= mask_values
+ offset
;
253 uint32_t hash
= *basis
;
255 for (; map
; map
= zero_rightmost_1bit(map
)) {
256 hash
= hash_add(hash
, flow_u32
[raw_ctz(map
)] & *p
++);
259 *basis
= hash
; /* Allow continuation from the unfinished value. */
260 return hash_finish(hash
, (p
- mask_values
) * 4);
263 /* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask. */
265 flow_wildcards_fold_minimask(struct flow_wildcards
*wc
,
266 const struct minimask
*mask
)
268 flow_union_with_miniflow(&wc
->masks
, &mask
->masks
);
271 /* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask
272 * in range [start, end). */
274 flow_wildcards_fold_minimask_range(struct flow_wildcards
*wc
,
275 const struct minimask
*mask
,
276 uint8_t start
, uint8_t end
)
278 uint32_t *dst_u32
= (uint32_t *)&wc
->masks
;
280 uint64_t map
= miniflow_get_map_in_range(&mask
->masks
, start
, end
,
282 const uint32_t *p
= miniflow_get_u32_values(&mask
->masks
) + offset
;
284 for (; map
; map
= zero_rightmost_1bit(map
)) {
285 dst_u32
[raw_ctz(map
)] |= *p
++;
289 /* Returns a hash value for 'flow', given 'basis'. */
290 static inline uint32_t
291 miniflow_hash(const struct miniflow
*flow
, uint32_t basis
)
293 const uint32_t *values
= miniflow_get_u32_values(flow
);
294 const uint32_t *p
= values
;
295 uint32_t hash
= basis
;
296 uint64_t hash_map
= 0;
299 for (map
= flow
->map
; map
; map
= zero_rightmost_1bit(map
)) {
301 hash
= hash_add(hash
, *p
);
302 hash_map
|= rightmost_1bit(map
);
306 hash
= hash_add(hash
, hash_map
);
307 hash
= hash_add(hash
, hash_map
>> 32);
309 return hash_finish(hash
, p
- values
);
312 /* Returns a hash value for 'mask', given 'basis'. */
313 static inline uint32_t
314 minimask_hash(const struct minimask
*mask
, uint32_t basis
)
316 return miniflow_hash(&mask
->masks
, basis
);
319 /* Returns a hash value for 'match', given 'basis'. */
320 static inline uint32_t
321 minimatch_hash(const struct minimatch
*match
, uint32_t basis
)
323 return miniflow_hash(&match
->flow
, minimask_hash(&match
->mask
, basis
));
326 /* Returns a hash value for the bits of range [start, end) in 'minimatch',
329 * The hash values returned by this function are the same as those returned by
330 * flow_hash_in_minimask_range(), only the form of the arguments differ. */
331 static inline uint32_t
332 minimatch_hash_range(const struct minimatch
*match
, uint8_t start
, uint8_t end
,
336 const uint32_t *p
, *q
;
337 uint32_t hash
= *basis
;
340 n
= count_1bits(miniflow_get_map_in_range(&match
->mask
.masks
, start
, end
,
342 q
= miniflow_get_u32_values(&match
->mask
.masks
) + offset
;
343 p
= miniflow_get_u32_values(&match
->flow
) + offset
;
345 for (i
= 0; i
< n
; i
++) {
346 hash
= hash_add(hash
, p
[i
] & q
[i
]);
348 *basis
= hash
; /* Allow continuation from the unfinished value. */
349 return hash_finish(hash
, (offset
+ n
) * 4);
355 /* Initializes 'rule' to match packets specified by 'match' at the given
356 * 'priority'. 'match' must satisfy the invariant described in the comment at
357 * the definition of struct match.
359 * The caller must eventually destroy 'rule' with cls_rule_destroy().
361 * (OpenFlow uses priorities between 0 and UINT16_MAX, inclusive, but
362 * internally Open vSwitch supports a wider range.) */
364 cls_rule_init(struct cls_rule
*rule
,
365 const struct match
*match
, unsigned int priority
)
367 minimatch_init(&rule
->match
, match
);
368 rule
->priority
= priority
;
369 rule
->cls_match
= NULL
;
372 /* Same as cls_rule_init() for initialization from a "struct minimatch". */
374 cls_rule_init_from_minimatch(struct cls_rule
*rule
,
375 const struct minimatch
*match
,
376 unsigned int priority
)
378 minimatch_clone(&rule
->match
, match
);
379 rule
->priority
= priority
;
380 rule
->cls_match
= NULL
;
383 /* Initializes 'dst' as a copy of 'src'.
385 * The caller must eventually destroy 'dst' with cls_rule_destroy(). */
387 cls_rule_clone(struct cls_rule
*dst
, const struct cls_rule
*src
)
389 minimatch_clone(&dst
->match
, &src
->match
);
390 dst
->priority
= src
->priority
;
391 dst
->cls_match
= NULL
;
394 /* Initializes 'dst' with the data in 'src', destroying 'src'.
396 * The caller must eventually destroy 'dst' with cls_rule_destroy(). */
398 cls_rule_move(struct cls_rule
*dst
, struct cls_rule
*src
)
400 minimatch_move(&dst
->match
, &src
->match
);
401 dst
->priority
= src
->priority
;
402 dst
->cls_match
= NULL
;
405 /* Frees memory referenced by 'rule'. Doesn't free 'rule' itself (it's
406 * normally embedded into a larger structure).
408 * ('rule' must not currently be in a classifier.) */
410 cls_rule_destroy(struct cls_rule
*rule
)
412 ovs_assert(!rule
->cls_match
);
413 minimatch_destroy(&rule
->match
);
416 /* Returns true if 'a' and 'b' match the same packets at the same priority,
417 * false if they differ in some way. */
419 cls_rule_equal(const struct cls_rule
*a
, const struct cls_rule
*b
)
421 return a
->priority
== b
->priority
&& minimatch_equal(&a
->match
, &b
->match
);
424 /* Returns a hash value for 'rule', folding in 'basis'. */
426 cls_rule_hash(const struct cls_rule
*rule
, uint32_t basis
)
428 return minimatch_hash(&rule
->match
, hash_int(rule
->priority
, basis
));
431 /* Appends a string describing 'rule' to 's'. */
433 cls_rule_format(const struct cls_rule
*rule
, struct ds
*s
)
435 minimatch_format(&rule
->match
, s
, rule
->priority
);
438 /* Returns true if 'rule' matches every packet, false otherwise. */
440 cls_rule_is_catchall(const struct cls_rule
*rule
)
442 return minimask_is_catchall(&rule
->match
.mask
);
445 /* Initializes 'cls' as a classifier that initially contains no classification
448 classifier_init(struct classifier
*cls
, const uint8_t *flow_segments
)
449 OVS_EXCLUDED(cls
->mutex
)
451 ovs_mutex_init(&cls
->mutex
);
452 ovs_mutex_lock(&cls
->mutex
);
454 cmap_init(&cls
->subtables_map
);
455 pvector_init(&cls
->subtables
);
456 cmap_init(&cls
->partitions
);
457 cls
->n_flow_segments
= 0;
459 while (cls
->n_flow_segments
< CLS_MAX_INDICES
460 && *flow_segments
< FLOW_U32S
) {
461 cls
->flow_segments
[cls
->n_flow_segments
++] = *flow_segments
++;
465 for (int i
= 0; i
< CLS_MAX_TRIES
; i
++) {
466 trie_init(cls
, i
, NULL
);
468 ovs_mutex_unlock(&cls
->mutex
);
471 /* Destroys 'cls'. Rules within 'cls', if any, are not freed; this is the
472 * caller's responsibility.
473 * May only be called after all the readers have been terminated. */
475 classifier_destroy(struct classifier
*cls
)
476 OVS_EXCLUDED(cls
->mutex
)
479 struct cls_partition
*partition
;
480 struct cls_subtable
*subtable
;
483 ovs_mutex_lock(&cls
->mutex
);
484 for (i
= 0; i
< cls
->n_tries
; i
++) {
485 trie_destroy(&cls
->tries
[i
].root
);
488 CMAP_FOR_EACH (subtable
, cmap_node
, &cls
->subtables_map
) {
489 destroy_subtable(cls
, subtable
);
491 cmap_destroy(&cls
->subtables_map
);
493 CMAP_FOR_EACH (partition
, cmap_node
, &cls
->partitions
) {
494 ovsrcu_postpone(free
, partition
);
496 cmap_destroy(&cls
->partitions
);
498 pvector_destroy(&cls
->subtables
);
499 ovs_mutex_unlock(&cls
->mutex
);
500 ovs_mutex_destroy(&cls
->mutex
);
504 /* Set the fields for which prefix lookup should be performed. */
506 classifier_set_prefix_fields(struct classifier
*cls
,
507 const enum mf_field_id
*trie_fields
,
508 unsigned int n_fields
)
509 OVS_EXCLUDED(cls
->mutex
)
511 const struct mf_field
* new_fields
[CLS_MAX_TRIES
];
512 struct mf_bitmap fields
= MF_BITMAP_INITIALIZER
;
514 bool changed
= false;
516 ovs_mutex_lock(&cls
->mutex
);
517 for (i
= 0; i
< n_fields
&& n_tries
< CLS_MAX_TRIES
; i
++) {
518 const struct mf_field
*field
= mf_from_id(trie_fields
[i
]);
519 if (field
->flow_be32ofs
< 0 || field
->n_bits
% 32) {
520 /* Incompatible field. This is the only place where we
521 * enforce these requirements, but the rest of the trie code
522 * depends on the flow_be32ofs to be non-negative and the
523 * field length to be a multiple of 32 bits. */
527 if (bitmap_is_set(fields
.bm
, trie_fields
[i
])) {
528 /* Duplicate field, there is no need to build more than
529 * one index for any one field. */
532 bitmap_set1(fields
.bm
, trie_fields
[i
]);
534 new_fields
[n_tries
] = NULL
;
535 if (n_tries
>= cls
->n_tries
|| field
!= cls
->tries
[n_tries
].field
) {
536 new_fields
[n_tries
] = field
;
542 if (changed
|| n_tries
< cls
->n_tries
) {
543 struct cls_subtable
*subtable
;
545 /* Trie configuration needs to change. Disable trie lookups
546 * for the tries that are changing and wait all the current readers
547 * with the old configuration to be done. */
549 CMAP_FOR_EACH (subtable
, cmap_node
, &cls
->subtables_map
) {
550 for (i
= 0; i
< cls
->n_tries
; i
++) {
551 if ((i
< n_tries
&& new_fields
[i
]) || i
>= n_tries
) {
552 if (subtable
->trie_plen
[i
]) {
553 subtable
->trie_plen
[i
] = 0;
559 /* Synchronize if any readers were using tries. The readers may
560 * temporarily function without the trie lookup based optimizations. */
562 /* ovsrcu_synchronize() functions as a memory barrier, so it does
563 * not matter that subtable->trie_plen is not atomic. */
564 ovsrcu_synchronize();
567 /* Now set up the tries. */
568 for (i
= 0; i
< n_tries
; i
++) {
570 trie_init(cls
, i
, new_fields
[i
]);
573 /* Destroy the rest, if any. */
574 for (; i
< cls
->n_tries
; i
++) {
575 trie_init(cls
, i
, NULL
);
578 cls
->n_tries
= n_tries
;
579 ovs_mutex_unlock(&cls
->mutex
);
583 ovs_mutex_unlock(&cls
->mutex
);
584 return false; /* No change. */
588 trie_init(struct classifier
*cls
, int trie_idx
, const struct mf_field
*field
)
589 OVS_REQUIRES(cls
->mutex
)
591 struct cls_trie
*trie
= &cls
->tries
[trie_idx
];
592 struct cls_subtable
*subtable
;
594 if (trie_idx
< cls
->n_tries
) {
595 trie_destroy(&trie
->root
);
597 ovsrcu_set_hidden(&trie
->root
, NULL
);
601 /* Add existing rules to the new trie. */
602 CMAP_FOR_EACH (subtable
, cmap_node
, &cls
->subtables_map
) {
605 plen
= field
? minimask_get_prefix_len(&subtable
->mask
, field
) : 0;
607 struct cls_match
*head
;
609 CMAP_FOR_EACH (head
, cmap_node
, &subtable
->rules
) {
610 struct cls_match
*match
;
612 FOR_EACH_RULE_IN_LIST (match
, head
) {
613 trie_insert(trie
, match
->cls_rule
, plen
);
617 /* Initialize subtable's prefix length on this field. This will
618 * allow readers to use the trie. */
619 atomic_thread_fence(memory_order_release
);
620 subtable
->trie_plen
[trie_idx
] = plen
;
624 /* Returns true if 'cls' contains no classification rules, false otherwise.
625 * Checking the cmap requires no locking. */
627 classifier_is_empty(const struct classifier
*cls
)
629 return cmap_is_empty(&cls
->subtables_map
);
632 /* Returns the number of rules in 'cls'. */
634 classifier_count(const struct classifier
*cls
)
635 OVS_NO_THREAD_SAFETY_ANALYSIS
637 /* n_rules is an int, so in the presence of concurrent writers this will
638 * return either the old or a new value. */
643 hash_metadata(ovs_be64 metadata_
)
645 uint64_t metadata
= (OVS_FORCE
uint64_t) metadata_
;
646 return hash_uint64(metadata
);
649 static struct cls_partition
*
650 find_partition(const struct classifier
*cls
, ovs_be64 metadata
, uint32_t hash
)
652 struct cls_partition
*partition
;
654 CMAP_FOR_EACH_WITH_HASH (partition
, cmap_node
, hash
, &cls
->partitions
) {
655 if (partition
->metadata
== metadata
) {
663 static struct cls_partition
*
664 create_partition(struct classifier
*cls
, struct cls_subtable
*subtable
,
666 OVS_REQUIRES(cls
->mutex
)
668 uint32_t hash
= hash_metadata(metadata
);
669 struct cls_partition
*partition
= find_partition(cls
, metadata
, hash
);
671 partition
= xmalloc(sizeof *partition
);
672 partition
->metadata
= metadata
;
674 tag_tracker_init(&partition
->tracker
);
675 cmap_insert(&cls
->partitions
, &partition
->cmap_node
, hash
);
677 tag_tracker_add(&partition
->tracker
, &partition
->tags
, subtable
->tag
);
681 static inline ovs_be32
minimatch_get_ports(const struct minimatch
*match
)
683 /* Could optimize to use the same map if needed for fast path. */
684 return MINIFLOW_GET_BE32(&match
->flow
, tp_src
)
685 & MINIFLOW_GET_BE32(&match
->mask
.masks
, tp_src
);
688 /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller
689 * must not modify or free it.
691 * If 'cls' already contains an identical rule (including wildcards, values of
692 * fixed fields, and priority), replaces the old rule by 'rule' and returns the
693 * rule that was replaced. The caller takes ownership of the returned rule and
694 * is thus responsible for destroying it with cls_rule_destroy(), freeing the
695 * memory block in which it resides, etc., as necessary.
697 * Returns NULL if 'cls' does not contain a rule with an identical key, after
698 * inserting the new rule. In this case, no rules are displaced by the new
699 * rule, even rules that cannot have any effect because the new rule matches a
700 * superset of their flows and has higher priority. */
702 classifier_replace(struct classifier
*cls
, struct cls_rule
*rule
)
703 OVS_EXCLUDED(cls
->mutex
)
705 struct cls_match
*old_rule
;
706 struct cls_subtable
*subtable
;
707 struct cls_rule
*old_cls_rule
= NULL
;
709 ovs_mutex_lock(&cls
->mutex
);
710 subtable
= find_subtable(cls
, &rule
->match
.mask
);
712 subtable
= insert_subtable(cls
, &rule
->match
.mask
);
715 old_rule
= insert_rule(cls
, subtable
, rule
);
719 rule
->cls_match
->partition
= NULL
;
720 if (minimask_get_metadata_mask(&rule
->match
.mask
) == OVS_BE64_MAX
) {
721 ovs_be64 metadata
= miniflow_get_metadata(&rule
->match
.flow
);
722 rule
->cls_match
->partition
= create_partition(cls
, subtable
,
728 for (int i
= 0; i
< cls
->n_tries
; i
++) {
729 if (subtable
->trie_plen
[i
]) {
730 trie_insert(&cls
->tries
[i
], rule
, subtable
->trie_plen
[i
]);
735 if (subtable
->ports_mask_len
) {
736 /* We mask the value to be inserted to always have the wildcarded
737 * bits in known (zero) state, so we can include them in comparison
738 * and they will always match (== their original value does not
740 ovs_be32 masked_ports
= minimatch_get_ports(&rule
->match
);
742 trie_insert_prefix(&subtable
->ports_trie
, &masked_ports
,
743 subtable
->ports_mask_len
);
746 old_cls_rule
= old_rule
->cls_rule
;
747 rule
->cls_match
->partition
= old_rule
->partition
;
748 old_cls_rule
->cls_match
= NULL
;
750 /* 'old_rule' contains a cmap_node, which may not be freed
752 ovsrcu_postpone(free
, old_rule
);
754 ovs_mutex_unlock(&cls
->mutex
);
758 /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller
759 * must not modify or free it.
761 * 'cls' must not contain an identical rule (including wildcards, values of
762 * fixed fields, and priority). Use classifier_find_rule_exactly() to find
765 classifier_insert(struct classifier
*cls
, struct cls_rule
*rule
)
767 struct cls_rule
*displaced_rule
= classifier_replace(cls
, rule
);
768 ovs_assert(!displaced_rule
);
771 /* Removes 'rule' from 'cls'. It is the caller's responsibility to destroy
772 * 'rule' with cls_rule_destroy(), freeing the memory block in which 'rule'
773 * resides, etc., as necessary. */
775 classifier_remove(struct classifier
*cls
, struct cls_rule
*rule
)
776 OVS_EXCLUDED(cls
->mutex
)
778 struct cls_partition
*partition
;
779 struct cls_match
*cls_match
= rule
->cls_match
;
780 struct cls_match
*head
;
781 struct cls_subtable
*subtable
;
783 uint32_t basis
= 0, hash
, ihash
[CLS_MAX_INDICES
];
784 uint8_t prev_be32ofs
= 0;
786 ovs_assert(cls_match
);
788 ovs_mutex_lock(&cls
->mutex
);
789 subtable
= find_subtable(cls
, &rule
->match
.mask
);
790 ovs_assert(subtable
);
792 if (subtable
->ports_mask_len
) {
793 ovs_be32 masked_ports
= minimatch_get_ports(&rule
->match
);
795 trie_remove_prefix(&subtable
->ports_trie
,
796 &masked_ports
, subtable
->ports_mask_len
);
798 for (i
= 0; i
< cls
->n_tries
; i
++) {
799 if (subtable
->trie_plen
[i
]) {
800 trie_remove(&cls
->tries
[i
], rule
, subtable
->trie_plen
[i
]);
804 /* Remove rule node from indices. */
805 for (i
= 0; i
< subtable
->n_indices
; i
++) {
806 ihash
[i
] = minimatch_hash_range(&rule
->match
, prev_be32ofs
,
807 subtable
->index_ofs
[i
], &basis
);
808 cmap_remove(&subtable
->indices
[i
], &cls_match
->index_nodes
[i
],
810 prev_be32ofs
= subtable
->index_ofs
[i
];
812 hash
= minimatch_hash_range(&rule
->match
, prev_be32ofs
, FLOW_U32S
, &basis
);
814 head
= find_equal(subtable
, &rule
->match
.flow
, hash
);
815 if (head
!= cls_match
) {
816 list_remove(&cls_match
->list
);
817 } else if (list_is_empty(&cls_match
->list
)) {
818 cmap_remove(&subtable
->rules
, &cls_match
->cmap_node
, hash
);
820 struct cls_match
*next
= CONTAINER_OF(cls_match
->list
.next
,
821 struct cls_match
, list
);
823 list_remove(&cls_match
->list
);
824 cmap_replace(&subtable
->rules
, &cls_match
->cmap_node
,
825 &next
->cmap_node
, hash
);
828 partition
= cls_match
->partition
;
830 tag_tracker_subtract(&partition
->tracker
, &partition
->tags
,
832 if (!partition
->tags
) {
833 cmap_remove(&cls
->partitions
, &partition
->cmap_node
,
834 hash_metadata(partition
->metadata
));
835 ovsrcu_postpone(free
, partition
);
839 if (--subtable
->n_rules
== 0) {
840 destroy_subtable(cls
, subtable
);
841 } else if (subtable
->max_priority
== cls_match
->priority
842 && --subtable
->max_count
== 0) {
843 /* Find the new 'max_priority' and 'max_count'. */
844 struct cls_match
*head
;
845 unsigned int max_priority
= 0;
847 CMAP_FOR_EACH (head
, cmap_node
, &subtable
->rules
) {
848 if (head
->priority
> max_priority
) {
849 max_priority
= head
->priority
;
850 subtable
->max_count
= 1;
851 } else if (head
->priority
== max_priority
) {
852 ++subtable
->max_count
;
855 subtable
->max_priority
= max_priority
;
856 pvector_change_priority(&cls
->subtables
, subtable
, max_priority
);
861 rule
->cls_match
= NULL
;
862 ovsrcu_postpone(free
, cls_match
);
863 ovs_mutex_unlock(&cls
->mutex
);
866 /* Prefix tree context. Valid when 'lookup_done' is true. Can skip all
867 * subtables which have a prefix match on the trie field, but whose prefix
868 * length is not indicated in 'match_plens'. For example, a subtable that
869 * has a 8-bit trie field prefix match can be skipped if
870 * !be_get_bit_at(&match_plens, 8 - 1). If skipped, 'maskbits' prefix bits
871 * must be unwildcarded to make datapath flow only match packets it should. */
873 const struct cls_trie
*trie
;
874 bool lookup_done
; /* Status of the lookup. */
875 uint8_t be32ofs
; /* U32 offset of the field in question. */
876 unsigned int maskbits
; /* Prefix length needed to avoid false matches. */
877 union mf_value match_plens
; /* Bitmask of prefix lengths with possible
882 trie_ctx_init(struct trie_ctx
*ctx
, const struct cls_trie
*trie
)
885 ctx
->be32ofs
= trie
->field
->flow_be32ofs
;
886 ctx
->lookup_done
= false;
889 /* Finds and returns the highest-priority rule in 'cls' that matches 'flow'.
890 * Returns a null pointer if no rules in 'cls' match 'flow'. If multiple rules
891 * of equal priority match 'flow', returns one arbitrarily.
893 * If a rule is found and 'wc' is non-null, bitwise-OR's 'wc' with the
894 * set of bits that were significant in the lookup. At some point
895 * earlier, 'wc' should have been initialized (e.g., by
896 * flow_wildcards_init_catchall()). */
898 classifier_lookup(const struct classifier
*cls
, const struct flow
*flow
,
899 struct flow_wildcards
*wc
)
901 const struct cls_partition
*partition
;
903 int64_t best_priority
= -1;
904 const struct cls_match
*best
;
905 struct trie_ctx trie_ctx
[CLS_MAX_TRIES
];
906 struct cls_subtable
*subtable
;
908 /* Synchronize for cls->n_tries and subtable->trie_plen. They can change
909 * when table configuration changes, which happens typically only on
911 atomic_thread_fence(memory_order_acquire
);
913 /* Determine 'tags' such that, if 'subtable->tag' doesn't intersect them,
914 * then 'flow' cannot possibly match in 'subtable':
916 * - If flow->metadata maps to a given 'partition', then we can use
917 * 'tags' for 'partition->tags'.
919 * - If flow->metadata has no partition, then no rule in 'cls' has an
920 * exact-match for flow->metadata. That means that we don't need to
921 * search any subtable that includes flow->metadata in its mask.
923 * In either case, we always need to search any cls_subtables that do not
924 * include flow->metadata in its mask. One way to do that would be to
925 * check the "cls_subtable"s explicitly for that, but that would require an
926 * extra branch per subtable. Instead, we mark such a cls_subtable's
927 * 'tags' as TAG_ALL and make sure that 'tags' is never empty. This means
928 * that 'tags' always intersects such a cls_subtable's 'tags', so we don't
929 * need a special case.
931 partition
= (cmap_is_empty(&cls
->partitions
)
933 : find_partition(cls
, flow
->metadata
,
934 hash_metadata(flow
->metadata
)));
935 tags
= partition
? partition
->tags
: TAG_ARBITRARY
;
937 /* Initialize trie contexts for match_find_wc(). */
938 for (int i
= 0; i
< cls
->n_tries
; i
++) {
939 trie_ctx_init(&trie_ctx
[i
], &cls
->tries
[i
]);
943 PVECTOR_FOR_EACH_PRIORITY(subtable
, best_priority
, 2,
944 sizeof(struct cls_subtable
), &cls
->subtables
) {
945 struct cls_match
*rule
;
947 if (!tag_intersects(tags
, subtable
->tag
)) {
951 rule
= find_match_wc(subtable
, flow
, trie_ctx
, cls
->n_tries
, wc
);
952 if (rule
&& (int64_t)rule
->priority
> best_priority
) {
953 best_priority
= (int64_t)rule
->priority
;
958 return best
? best
->cls_rule
: NULL
;
961 /* Returns true if 'target' satisifies 'match', that is, if each bit for which
962 * 'match' specifies a particular value has the correct value in 'target'.
964 * 'flow' and 'mask' have the same mask! */
966 miniflow_and_mask_matches_miniflow(const struct miniflow
*flow
,
967 const struct minimask
*mask
,
968 const struct miniflow
*target
)
970 const uint32_t *flowp
= miniflow_get_u32_values(flow
);
971 const uint32_t *maskp
= miniflow_get_u32_values(&mask
->masks
);
974 MINIFLOW_FOR_EACH_IN_MAP(target_u32
, target
, mask
->masks
.map
) {
975 if ((*flowp
++ ^ target_u32
) & *maskp
++) {
983 static inline struct cls_match
*
984 find_match_miniflow(const struct cls_subtable
*subtable
,
985 const struct miniflow
*flow
,
988 struct cls_match
*rule
;
990 CMAP_FOR_EACH_WITH_HASH (rule
, cmap_node
, hash
, &subtable
->rules
) {
991 if (miniflow_and_mask_matches_miniflow(&rule
->flow
, &subtable
->mask
,
1000 /* For each miniflow in 'flows' performs a classifier lookup writing the result
1001 * into the corresponding slot in 'rules'. If a particular entry in 'flows' is
1002 * NULL it is skipped.
1004 * This function is optimized for use in the userspace datapath and therefore
1005 * does not implement a lot of features available in the standard
1006 * classifier_lookup() function. Specifically, it does not implement
1007 * priorities, instead returning any rule which matches the flow.
1009 * Returns true if all flows found a corresponding rule. */
1011 classifier_lookup_miniflow_batch(const struct classifier
*cls
,
1012 const struct miniflow
**flows
,
1013 struct cls_rule
**rules
, size_t len
)
1015 struct cls_subtable
*subtable
;
1016 size_t i
, begin
= 0;
1018 memset(rules
, 0, len
* sizeof *rules
);
1019 PVECTOR_FOR_EACH (subtable
, &cls
->subtables
) {
1020 for (i
= begin
; i
< len
; i
++) {
1021 struct cls_match
*match
;
1024 if (OVS_UNLIKELY(rules
[i
] || !flows
[i
])) {
1028 hash
= miniflow_hash_in_minimask(flows
[i
], &subtable
->mask
, 0);
1029 match
= find_match_miniflow(subtable
, flows
[i
], hash
);
1030 if (OVS_UNLIKELY(match
)) {
1031 rules
[i
] = match
->cls_rule
;
1035 while (begin
< len
&& (rules
[begin
] || !flows
[begin
])) {
1046 /* Finds and returns a rule in 'cls' with exactly the same priority and
1047 * matching criteria as 'target'. Returns a null pointer if 'cls' doesn't
1048 * contain an exact match. */
1050 classifier_find_rule_exactly(const struct classifier
*cls
,
1051 const struct cls_rule
*target
)
1052 OVS_EXCLUDED(cls
->mutex
)
1054 struct cls_match
*head
, *rule
;
1055 struct cls_subtable
*subtable
;
1057 ovs_mutex_lock(&cls
->mutex
);
1058 subtable
= find_subtable(cls
, &target
->match
.mask
);
1063 /* Skip if there is no hope. */
1064 if (target
->priority
> subtable
->max_priority
) {
1068 head
= find_equal(subtable
, &target
->match
.flow
,
1069 miniflow_hash_in_minimask(&target
->match
.flow
,
1070 &target
->match
.mask
, 0));
1071 FOR_EACH_RULE_IN_LIST (rule
, head
) {
1072 if (target
->priority
>= rule
->priority
) {
1073 ovs_mutex_unlock(&cls
->mutex
);
1074 return target
->priority
== rule
->priority
? rule
->cls_rule
: NULL
;
1078 ovs_mutex_unlock(&cls
->mutex
);
1082 /* Finds and returns a rule in 'cls' with priority 'priority' and exactly the
1083 * same matching criteria as 'target'. Returns a null pointer if 'cls' doesn't
1084 * contain an exact match. */
1086 classifier_find_match_exactly(const struct classifier
*cls
,
1087 const struct match
*target
,
1088 unsigned int priority
)
1090 struct cls_rule
*retval
;
1093 cls_rule_init(&cr
, target
, priority
);
1094 retval
= classifier_find_rule_exactly(cls
, &cr
);
1095 cls_rule_destroy(&cr
);
1100 /* Checks if 'target' would overlap any other rule in 'cls'. Two rules are
1101 * considered to overlap if both rules have the same priority and a packet
1102 * could match both. */
1104 classifier_rule_overlaps(const struct classifier
*cls
,
1105 const struct cls_rule
*target
)
1106 OVS_EXCLUDED(cls
->mutex
)
1108 struct cls_subtable
*subtable
;
1109 int64_t stop_at_priority
= (int64_t)target
->priority
- 1;
1111 ovs_mutex_lock(&cls
->mutex
);
1112 /* Iterate subtables in the descending max priority order. */
1113 PVECTOR_FOR_EACH_PRIORITY (subtable
, stop_at_priority
, 2,
1114 sizeof(struct cls_subtable
), &cls
->subtables
) {
1115 uint32_t storage
[FLOW_U32S
];
1116 struct minimask mask
;
1117 struct cls_match
*head
;
1119 minimask_combine(&mask
, &target
->match
.mask
, &subtable
->mask
, storage
);
1120 CMAP_FOR_EACH (head
, cmap_node
, &subtable
->rules
) {
1121 struct cls_match
*rule
;
1123 FOR_EACH_RULE_IN_LIST (rule
, head
) {
1124 if (rule
->priority
< target
->priority
) {
1125 break; /* Rules in descending priority order. */
1127 if (rule
->priority
== target
->priority
1128 && miniflow_equal_in_minimask(&target
->match
.flow
,
1129 &rule
->flow
, &mask
)) {
1130 ovs_mutex_unlock(&cls
->mutex
);
1137 ovs_mutex_unlock(&cls
->mutex
);
1141 /* Returns true if 'rule' exactly matches 'criteria' or if 'rule' is more
1142 * specific than 'criteria'. That is, 'rule' matches 'criteria' and this
1143 * function returns true if, for every field:
1145 * - 'criteria' and 'rule' specify the same (non-wildcarded) value for the
1148 * - 'criteria' wildcards the field,
1150 * Conversely, 'rule' does not match 'criteria' and this function returns false
1151 * if, for at least one field:
1153 * - 'criteria' and 'rule' specify different values for the field, or
1155 * - 'criteria' specifies a value for the field but 'rule' wildcards it.
1157 * Equivalently, the truth table for whether a field matches is:
1162 * r +---------+---------+
1163 * i wild | yes | yes |
1165 * e +---------+---------+
1166 * r exact | no |if values|
1168 * a +---------+---------+
1170 * This is the matching rule used by OpenFlow 1.0 non-strict OFPT_FLOW_MOD
1171 * commands and by OpenFlow 1.0 aggregate and flow stats.
1173 * Ignores rule->priority. */
1175 cls_rule_is_loose_match(const struct cls_rule
*rule
,
1176 const struct minimatch
*criteria
)
1178 return (!minimask_has_extra(&rule
->match
.mask
, &criteria
->mask
)
1179 && miniflow_equal_in_minimask(&rule
->match
.flow
, &criteria
->flow
,
1186 rule_matches(const struct cls_match
*rule
, const struct cls_rule
*target
)
1189 || miniflow_equal_in_minimask(&rule
->flow
,
1190 &target
->match
.flow
,
1191 &target
->match
.mask
));
1194 static struct cls_match
*
1195 search_subtable(const struct cls_subtable
*subtable
,
1196 struct cls_cursor
*cursor
)
1199 || !minimask_has_extra(&subtable
->mask
, &cursor
->target
->match
.mask
)) {
1200 struct cls_match
*rule
;
1202 CMAP_CURSOR_FOR_EACH (rule
, cmap_node
, &cursor
->rules
,
1204 if (rule_matches(rule
, cursor
->target
)) {
1212 /* Initializes 'cursor' for iterating through rules in 'cls', and returns the
1213 * first matching cls_rule via '*pnode', or NULL if there are no matches.
1215 * - If 'target' is null, the cursor will visit every rule in 'cls'.
1217 * - If 'target' is nonnull, the cursor will visit each 'rule' in 'cls'
1218 * such that cls_rule_is_loose_match(rule, target) returns true.
1220 * Ignores target->priority. */
1221 struct cls_cursor
cls_cursor_start(const struct classifier
*cls
,
1222 const struct cls_rule
*target
,
1224 OVS_NO_THREAD_SAFETY_ANALYSIS
1226 struct cls_cursor cursor
;
1227 struct cls_subtable
*subtable
;
1231 cursor
.target
= target
&& !cls_rule_is_catchall(target
) ? target
: NULL
;
1234 /* Find first rule. */
1235 ovs_mutex_lock(&cursor
.cls
->mutex
);
1236 CMAP_CURSOR_FOR_EACH (subtable
, cmap_node
, &cursor
.subtables
,
1237 &cursor
.cls
->subtables_map
) {
1238 struct cls_match
*rule
= search_subtable(subtable
, &cursor
);
1241 cursor
.subtable
= subtable
;
1242 cursor
.rule
= rule
->cls_rule
;
1247 /* Leave locked if requested and have a rule. */
1248 if (safe
|| !cursor
.rule
) {
1249 ovs_mutex_unlock(&cursor
.cls
->mutex
);
1254 static struct cls_rule
*
1255 cls_cursor_next(struct cls_cursor
*cursor
)
1256 OVS_NO_THREAD_SAFETY_ANALYSIS
1258 struct cls_match
*rule
= cursor
->rule
->cls_match
;
1259 const struct cls_subtable
*subtable
;
1260 struct cls_match
*next
;
1262 next
= next_rule_in_list__(rule
);
1263 if (next
->priority
< rule
->priority
) {
1264 return next
->cls_rule
;
1267 /* 'next' is the head of the list, that is, the rule that is included in
1268 * the subtable's map. (This is important when the classifier contains
1269 * rules that differ only in priority.) */
1271 CMAP_CURSOR_FOR_EACH_CONTINUE (rule
, cmap_node
, &cursor
->rules
) {
1272 if (rule_matches(rule
, cursor
->target
)) {
1273 return rule
->cls_rule
;
1277 subtable
= cursor
->subtable
;
1278 CMAP_CURSOR_FOR_EACH_CONTINUE (subtable
, cmap_node
, &cursor
->subtables
) {
1279 rule
= search_subtable(subtable
, cursor
);
1281 cursor
->subtable
= subtable
;
1282 return rule
->cls_rule
;
1289 /* Sets 'cursor->rule' to the next matching cls_rule in 'cursor''s iteration,
1290 * or to null if all matching rules have been visited. */
1292 cls_cursor_advance(struct cls_cursor
*cursor
)
1293 OVS_NO_THREAD_SAFETY_ANALYSIS
1296 ovs_mutex_lock(&cursor
->cls
->mutex
);
1298 cursor
->rule
= cls_cursor_next(cursor
);
1299 if (cursor
->safe
|| !cursor
->rule
) {
1300 ovs_mutex_unlock(&cursor
->cls
->mutex
);
1304 static struct cls_subtable
*
1305 find_subtable(const struct classifier
*cls
, const struct minimask
*mask
)
1306 OVS_REQUIRES(cls
->mutex
)
1308 struct cls_subtable
*subtable
;
1310 CMAP_FOR_EACH_WITH_HASH (subtable
, cmap_node
, minimask_hash(mask
, 0),
1311 &cls
->subtables_map
) {
1312 if (minimask_equal(mask
, &subtable
->mask
)) {
1319 /* The new subtable will be visible to the readers only after this. */
1320 static struct cls_subtable
*
1321 insert_subtable(struct classifier
*cls
, const struct minimask
*mask
)
1322 OVS_REQUIRES(cls
->mutex
)
1324 uint32_t hash
= minimask_hash(mask
, 0);
1325 struct cls_subtable
*subtable
;
1327 struct flow_wildcards old
, new;
1329 int count
= count_1bits(mask
->masks
.map
);
1331 subtable
= xzalloc(sizeof *subtable
- sizeof mask
->masks
.inline_values
1332 + MINIFLOW_VALUES_SIZE(count
));
1333 cmap_init(&subtable
->rules
);
1334 miniflow_clone_inline(&subtable
->mask
.masks
, &mask
->masks
, count
);
1336 /* Init indices for segmented lookup, if any. */
1337 flow_wildcards_init_catchall(&new);
1340 for (i
= 0; i
< cls
->n_flow_segments
; i
++) {
1341 flow_wildcards_fold_minimask_range(&new, mask
, prev
,
1342 cls
->flow_segments
[i
]);
1343 /* Add an index if it adds mask bits. */
1344 if (!flow_wildcards_equal(&new, &old
)) {
1345 cmap_init(&subtable
->indices
[index
]);
1346 subtable
->index_ofs
[index
] = cls
->flow_segments
[i
];
1350 prev
= cls
->flow_segments
[i
];
1352 /* Check if the rest of the subtable's mask adds any bits,
1353 * and remove the last index if it doesn't. */
1355 flow_wildcards_fold_minimask_range(&new, mask
, prev
, FLOW_U32S
);
1356 if (flow_wildcards_equal(&new, &old
)) {
1358 subtable
->index_ofs
[index
] = 0;
1359 cmap_destroy(&subtable
->indices
[index
]);
1362 subtable
->n_indices
= index
;
1364 subtable
->tag
= (minimask_get_metadata_mask(mask
) == OVS_BE64_MAX
1365 ? tag_create_deterministic(hash
)
1368 for (i
= 0; i
< cls
->n_tries
; i
++) {
1369 subtable
->trie_plen
[i
] = minimask_get_prefix_len(mask
,
1370 cls
->tries
[i
].field
);
1374 ovsrcu_set_hidden(&subtable
->ports_trie
, NULL
);
1375 subtable
->ports_mask_len
1376 = 32 - ctz32(ntohl(MINIFLOW_GET_BE32(&mask
->masks
, tp_src
)));
1378 cmap_insert(&cls
->subtables_map
, &subtable
->cmap_node
, hash
);
1384 destroy_subtable(struct classifier
*cls
, struct cls_subtable
*subtable
)
1385 OVS_REQUIRES(cls
->mutex
)
1389 pvector_remove(&cls
->subtables
, subtable
);
1390 trie_destroy(&subtable
->ports_trie
);
1392 for (i
= 0; i
< subtable
->n_indices
; i
++) {
1393 cmap_destroy(&subtable
->indices
[i
]);
1395 cmap_remove(&cls
->subtables_map
, &subtable
->cmap_node
,
1396 minimask_hash(&subtable
->mask
, 0));
1397 minimask_destroy(&subtable
->mask
);
1398 cmap_destroy(&subtable
->rules
);
1399 ovsrcu_postpone(free
, subtable
);
1407 static unsigned int be_get_bit_at(const ovs_be32 value
[], unsigned int ofs
);
1409 /* Return 'true' if can skip rest of the subtable based on the prefix trie
1410 * lookup results. */
1412 check_tries(struct trie_ctx trie_ctx
[CLS_MAX_TRIES
], unsigned int n_tries
,
1413 const unsigned int field_plen
[CLS_MAX_TRIES
],
1414 const struct range ofs
, const struct flow
*flow
,
1415 struct flow_wildcards
*wc
)
1419 /* Check if we could avoid fully unwildcarding the next level of
1420 * fields using the prefix tries. The trie checks are done only as
1421 * needed to avoid folding in additional bits to the wildcards mask. */
1422 for (j
= 0; j
< n_tries
; j
++) {
1423 /* Is the trie field relevant for this subtable? */
1424 if (field_plen
[j
]) {
1425 struct trie_ctx
*ctx
= &trie_ctx
[j
];
1426 uint8_t be32ofs
= ctx
->be32ofs
;
1428 /* Is the trie field within the current range of fields? */
1429 if (be32ofs
>= ofs
.start
&& be32ofs
< ofs
.end
) {
1430 /* On-demand trie lookup. */
1431 if (!ctx
->lookup_done
) {
1432 memset(&ctx
->match_plens
, 0, sizeof ctx
->match_plens
);
1433 ctx
->maskbits
= trie_lookup(ctx
->trie
, flow
,
1435 ctx
->lookup_done
= true;
1437 /* Possible to skip the rest of the subtable if subtable's
1438 * prefix on the field is not included in the lookup result. */
1439 if (!be_get_bit_at(&ctx
->match_plens
.be32
, field_plen
[j
] - 1)) {
1440 /* We want the trie lookup to never result in unwildcarding
1441 * any bits that would not be unwildcarded otherwise.
1442 * Since the trie is shared by the whole classifier, it is
1443 * possible that the 'maskbits' contain bits that are
1444 * irrelevant for the partition relevant for the current
1445 * packet. Hence the checks below. */
1447 /* Check that the trie result will not unwildcard more bits
1448 * than this subtable would otherwise. */
1449 if (ctx
->maskbits
<= field_plen
[j
]) {
1450 /* Unwildcard the bits and skip the rest. */
1451 mask_set_prefix_bits(wc
, be32ofs
, ctx
->maskbits
);
1452 /* Note: Prerequisite already unwildcarded, as the only
1453 * prerequisite of the supported trie lookup fields is
1454 * the ethertype, which is always unwildcarded. */
1457 /* Can skip if the field is already unwildcarded. */
1458 if (mask_prefix_bits_set(wc
, be32ofs
, ctx
->maskbits
)) {
1468 /* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit
1469 * for which 'flow', for which 'mask' has a bit set, specifies a particular
1470 * value has the correct value in 'target'.
1472 * This function is equivalent to miniflow_equal_flow_in_minimask(flow,
1473 * target, mask) but this is faster because of the invariant that
1474 * flow->map and mask->masks.map are the same, and that this version
1475 * takes the 'wc'. */
1477 miniflow_and_mask_matches_flow(const struct miniflow
*flow
,
1478 const struct minimask
*mask
,
1479 const struct flow
*target
)
1481 const uint32_t *flowp
= miniflow_get_u32_values(flow
);
1482 const uint32_t *maskp
= miniflow_get_u32_values(&mask
->masks
);
1485 MAP_FOR_EACH_INDEX(idx
, mask
->masks
.map
) {
1486 uint32_t diff
= (*flowp
++ ^ flow_u32_value(target
, idx
)) & *maskp
++;
1496 static inline struct cls_match
*
1497 find_match(const struct cls_subtable
*subtable
, const struct flow
*flow
,
1500 struct cls_match
*rule
;
1502 CMAP_FOR_EACH_WITH_HASH (rule
, cmap_node
, hash
, &subtable
->rules
) {
1503 if (miniflow_and_mask_matches_flow(&rule
->flow
, &subtable
->mask
,
1512 /* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit
1513 * for which 'flow', for which 'mask' has a bit set, specifies a particular
1514 * value has the correct value in 'target'.
1516 * This function is equivalent to miniflow_and_mask_matches_flow() but this
1517 * version fills in the mask bits in 'wc'. */
1519 miniflow_and_mask_matches_flow_wc(const struct miniflow
*flow
,
1520 const struct minimask
*mask
,
1521 const struct flow
*target
,
1522 struct flow_wildcards
*wc
)
1524 const uint32_t *flowp
= miniflow_get_u32_values(flow
);
1525 const uint32_t *maskp
= miniflow_get_u32_values(&mask
->masks
);
1528 MAP_FOR_EACH_INDEX(idx
, mask
->masks
.map
) {
1529 uint32_t mask
= *maskp
++;
1530 uint32_t diff
= (*flowp
++ ^ flow_u32_value(target
, idx
)) & mask
;
1533 /* Only unwildcard if none of the differing bits is already
1535 if (!(flow_u32_value(&wc
->masks
, idx
) & diff
)) {
1536 /* Keep one bit of the difference. */
1537 *flow_u32_lvalue(&wc
->masks
, idx
) |= rightmost_1bit(diff
);
1541 /* Fill in the bits that were looked at. */
1542 *flow_u32_lvalue(&wc
->masks
, idx
) |= mask
;
1548 /* Unwildcard the fields looked up so far, if any. */
1550 fill_range_wc(const struct cls_subtable
*subtable
, struct flow_wildcards
*wc
,
1554 flow_wildcards_fold_minimask_range(wc
, &subtable
->mask
, 0, to
);
1558 static struct cls_match
*
1559 find_match_wc(const struct cls_subtable
*subtable
, const struct flow
*flow
,
1560 struct trie_ctx trie_ctx
[CLS_MAX_TRIES
], unsigned int n_tries
,
1561 struct flow_wildcards
*wc
)
1563 uint32_t basis
= 0, hash
;
1564 struct cls_match
*rule
= NULL
;
1568 if (OVS_UNLIKELY(!wc
)) {
1569 return find_match(subtable
, flow
,
1570 flow_hash_in_minimask(flow
, &subtable
->mask
, 0));
1574 /* Try to finish early by checking fields in segments. */
1575 for (i
= 0; i
< subtable
->n_indices
; i
++) {
1576 const struct cmap_node
*inode
;
1578 ofs
.end
= subtable
->index_ofs
[i
];
1580 if (check_tries(trie_ctx
, n_tries
, subtable
->trie_plen
, ofs
, flow
,
1582 /* 'wc' bits for the trie field set, now unwildcard the preceding
1583 * bits used so far. */
1584 fill_range_wc(subtable
, wc
, ofs
.start
);
1587 hash
= flow_hash_in_minimask_range(flow
, &subtable
->mask
, ofs
.start
,
1589 inode
= cmap_find(&subtable
->indices
[i
], hash
);
1591 /* No match, can stop immediately, but must fold in the bits
1592 * used in lookup so far. */
1593 fill_range_wc(subtable
, wc
, ofs
.end
);
1597 /* If we have narrowed down to a single rule already, check whether
1598 * that rule matches. Either way, we're done.
1600 * (Rare) hash collisions may cause us to miss the opportunity for this
1602 if (!cmap_node_next(inode
)) {
1603 ASSIGN_CONTAINER(rule
, inode
- i
, index_nodes
);
1604 if (miniflow_and_mask_matches_flow_wc(&rule
->flow
, &subtable
->mask
,
1610 ofs
.start
= ofs
.end
;
1612 ofs
.end
= FLOW_U32S
;
1613 /* Trie check for the final range. */
1614 if (check_tries(trie_ctx
, n_tries
, subtable
->trie_plen
, ofs
, flow
, wc
)) {
1615 fill_range_wc(subtable
, wc
, ofs
.start
);
1618 hash
= flow_hash_in_minimask_range(flow
, &subtable
->mask
, ofs
.start
,
1620 rule
= find_match(subtable
, flow
, hash
);
1621 if (!rule
&& subtable
->ports_mask_len
) {
1622 /* Ports are always part of the final range, if any.
1623 * No match was found for the ports. Use the ports trie to figure out
1624 * which ports bits to unwildcard. */
1626 ovs_be32 value
, plens
, mask
;
1628 mask
= MINIFLOW_GET_BE32(&subtable
->mask
.masks
, tp_src
);
1629 value
= ((OVS_FORCE ovs_be32
*)flow
)[TP_PORTS_OFS32
] & mask
;
1630 mbits
= trie_lookup_value(&subtable
->ports_trie
, &value
, &plens
, 32);
1632 ((OVS_FORCE ovs_be32
*)&wc
->masks
)[TP_PORTS_OFS32
] |=
1633 mask
& htonl(~0 << (32 - mbits
));
1635 /* Unwildcard all bits in the mask upto the ports, as they were used
1636 * to determine there is no match. */
1637 fill_range_wc(subtable
, wc
, TP_PORTS_OFS32
);
1641 /* Must unwildcard all the fields, as they were looked at. */
1642 flow_wildcards_fold_minimask(wc
, &subtable
->mask
);
1646 static struct cls_match
*
1647 find_equal(struct cls_subtable
*subtable
, const struct miniflow
*flow
,
1650 struct cls_match
*head
;
1652 CMAP_FOR_EACH_WITH_HASH (head
, cmap_node
, hash
, &subtable
->rules
) {
1653 if (miniflow_equal(&head
->flow
, flow
)) {
1661 * As the readers are operating concurrently with the modifications, a
1662 * concurrent reader may or may not see the new rule, depending on how
1663 * the concurrent events overlap with each other. This is no
1664 * different from the former locked behavior, but there the visibility
1665 * of the new rule only depended on the timing of the locking
1668 * The new rule is first added to the segment indices, so the readers
1669 * may find the rule in the indices before the rule is visible in the
1670 * subtables 'rules' map. This may result in us losing the
1671 * opportunity to quit lookups earlier, resulting in sub-optimal
1672 * wildcarding. This will be fixed by forthcoming revalidation always
1673 * scheduled after flow table changes.
1675 * Similar behavior may happen due to us removing the overlapping rule
1676 * (if any) from the indices only after the new rule has been added.
1678 * The subtable's max priority is updated only after the rule is
1679 * inserted, so the concurrent readers may not see the rule, as the
1680 * updated priority ordered subtable list will only be visible after
1681 * the subtable's max priority is updated.
1683 * Similarly, the classifier's partitions for new rules are updated by
1684 * the caller after this function, so the readers may keep skipping
1685 * the subtable until they see the updated partitions.
1687 static struct cls_match
*
1688 insert_rule(struct classifier
*cls
, struct cls_subtable
*subtable
,
1689 struct cls_rule
*new_rule
)
1690 OVS_REQUIRES(cls
->mutex
)
1692 struct cls_match
*old
= NULL
;
1693 struct cls_match
*new = cls_match_alloc(new_rule
);
1694 struct cls_match
*head
;
1696 uint32_t basis
= 0, hash
, ihash
[CLS_MAX_INDICES
];
1697 uint8_t prev_be32ofs
= 0;
1699 /* Add new node to segment indices. */
1700 for (i
= 0; i
< subtable
->n_indices
; i
++) {
1701 ihash
[i
] = minimatch_hash_range(&new_rule
->match
, prev_be32ofs
,
1702 subtable
->index_ofs
[i
], &basis
);
1703 cmap_insert(&subtable
->indices
[i
], &new->index_nodes
[i
], ihash
[i
]);
1704 prev_be32ofs
= subtable
->index_ofs
[i
];
1706 hash
= minimatch_hash_range(&new_rule
->match
, prev_be32ofs
, FLOW_U32S
,
1708 head
= find_equal(subtable
, &new_rule
->match
.flow
, hash
);
1710 cmap_insert(&subtable
->rules
, &new->cmap_node
, hash
);
1711 list_init(&new->list
);
1714 /* Scan the list for the insertion point that will keep the list in
1715 * order of decreasing priority. */
1716 struct cls_match
*rule
;
1718 FOR_EACH_RULE_IN_LIST (rule
, head
) {
1719 if (new->priority
>= rule
->priority
) {
1721 /* 'new' is the new highest-priority flow in the list. */
1722 cmap_replace(&subtable
->rules
, &rule
->cmap_node
,
1723 &new->cmap_node
, hash
);
1726 if (new->priority
== rule
->priority
) {
1727 list_replace(&new->list
, &rule
->list
);
1730 list_insert(&rule
->list
, &new->list
);
1736 /* Insert 'new' at the end of the list. */
1737 list_push_back(&head
->list
, &new->list
);
1742 subtable
->n_rules
++;
1744 /* Rule was added, not replaced. Update 'subtable's 'max_priority'
1745 * and 'max_count', if necessary. */
1746 if (subtable
->n_rules
== 1) {
1747 subtable
->max_priority
= new->priority
;
1748 subtable
->max_count
= 1;
1749 pvector_insert(&cls
->subtables
, subtable
, new->priority
);
1750 } else if (subtable
->max_priority
== new->priority
) {
1751 ++subtable
->max_count
;
1752 } else if (new->priority
> subtable
->max_priority
) {
1753 subtable
->max_priority
= new->priority
;
1754 subtable
->max_count
= 1;
1755 pvector_change_priority(&cls
->subtables
, subtable
, new->priority
);
1758 /* Remove old node from indices. */
1759 for (i
= 0; i
< subtable
->n_indices
; i
++) {
1760 cmap_remove(&subtable
->indices
[i
], &old
->index_nodes
[i
], ihash
[i
]);
1766 static struct cls_match
*
1767 next_rule_in_list__(struct cls_match
*rule
)
1768 OVS_NO_THREAD_SAFETY_ANALYSIS
1770 struct cls_match
*next
= NULL
;
1771 next
= OBJECT_CONTAINING(rule
->list
.next
, next
, list
);
1775 static struct cls_match
*
1776 next_rule_in_list(struct cls_match
*rule
)
1778 struct cls_match
*next
= next_rule_in_list__(rule
);
1779 return next
->priority
< rule
->priority
? next
: NULL
;
1782 /* A longest-prefix match tree. */
1784 uint32_t prefix
; /* Prefix bits for this node, MSB first. */
1785 uint8_t n_bits
; /* Never zero, except for the root node. */
1786 unsigned int n_rules
; /* Number of rules that have this prefix. */
1787 rcu_trie_ptr edges
[2]; /* Both NULL if leaf. */
1790 /* Max bits per node. Must fit in struct trie_node's 'prefix'.
1791 * Also tested with 16, 8, and 5 to stress the implementation. */
1792 #define TRIE_PREFIX_BITS 32
1794 /* Return at least 'plen' bits of the 'prefix', starting at bit offset 'ofs'.
1795 * Prefixes are in the network byte order, and the offset 0 corresponds to
1796 * the most significant bit of the first byte. The offset can be read as
1797 * "how many bits to skip from the start of the prefix starting at 'pr'". */
1799 raw_get_prefix(const ovs_be32 pr
[], unsigned int ofs
, unsigned int plen
)
1803 pr
+= ofs
/ 32; /* Where to start. */
1804 ofs
%= 32; /* How many bits to skip at 'pr'. */
1806 prefix
= ntohl(*pr
) << ofs
; /* Get the first 32 - ofs bits. */
1807 if (plen
> 32 - ofs
) { /* Need more than we have already? */
1808 prefix
|= ntohl(*++pr
) >> (32 - ofs
);
1810 /* Return with possible unwanted bits at the end. */
1814 /* Return min(TRIE_PREFIX_BITS, plen) bits of the 'prefix', starting at bit
1815 * offset 'ofs'. Prefixes are in the network byte order, and the offset 0
1816 * corresponds to the most significant bit of the first byte. The offset can
1817 * be read as "how many bits to skip from the start of the prefix starting at
1820 trie_get_prefix(const ovs_be32 pr
[], unsigned int ofs
, unsigned int plen
)
1825 if (plen
> TRIE_PREFIX_BITS
) {
1826 plen
= TRIE_PREFIX_BITS
; /* Get at most TRIE_PREFIX_BITS. */
1828 /* Return with unwanted bits cleared. */
1829 return raw_get_prefix(pr
, ofs
, plen
) & ~0u << (32 - plen
);
1832 /* Return the number of equal bits in 'n_bits' of 'prefix's MSBs and a 'value'
1833 * starting at "MSB 0"-based offset 'ofs'. */
1835 prefix_equal_bits(uint32_t prefix
, unsigned int n_bits
, const ovs_be32 value
[],
1838 uint64_t diff
= prefix
^ raw_get_prefix(value
, ofs
, n_bits
);
1839 /* Set the bit after the relevant bits to limit the result. */
1840 return raw_clz64(diff
<< 32 | UINT64_C(1) << (63 - n_bits
));
1843 /* Return the number of equal bits in 'node' prefix and a 'prefix' of length
1844 * 'plen', starting at "MSB 0"-based offset 'ofs'. */
1846 trie_prefix_equal_bits(const struct trie_node
*node
, const ovs_be32 prefix
[],
1847 unsigned int ofs
, unsigned int plen
)
1849 return prefix_equal_bits(node
->prefix
, MIN(node
->n_bits
, plen
- ofs
),
1853 /* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' can
1854 * be greater than 31. */
1856 be_get_bit_at(const ovs_be32 value
[], unsigned int ofs
)
1858 return (((const uint8_t *)value
)[ofs
/ 8] >> (7 - ofs
% 8)) & 1u;
1861 /* Return the bit at ("MSB 0"-based) offset 'ofs' as an int. 'ofs' must
1862 * be between 0 and 31, inclusive. */
1864 get_bit_at(const uint32_t prefix
, unsigned int ofs
)
1866 return (prefix
>> (31 - ofs
)) & 1u;
1869 /* Create new branch. */
1870 static struct trie_node
*
1871 trie_branch_create(const ovs_be32
*prefix
, unsigned int ofs
, unsigned int plen
,
1872 unsigned int n_rules
)
1874 struct trie_node
*node
= xmalloc(sizeof *node
);
1876 node
->prefix
= trie_get_prefix(prefix
, ofs
, plen
);
1878 if (plen
<= TRIE_PREFIX_BITS
) {
1879 node
->n_bits
= plen
;
1880 ovsrcu_set_hidden(&node
->edges
[0], NULL
);
1881 ovsrcu_set_hidden(&node
->edges
[1], NULL
);
1882 node
->n_rules
= n_rules
;
1883 } else { /* Need intermediate nodes. */
1884 struct trie_node
*subnode
= trie_branch_create(prefix
,
1885 ofs
+ TRIE_PREFIX_BITS
,
1886 plen
- TRIE_PREFIX_BITS
,
1888 int bit
= get_bit_at(subnode
->prefix
, 0);
1889 node
->n_bits
= TRIE_PREFIX_BITS
;
1890 ovsrcu_set_hidden(&node
->edges
[bit
], subnode
);
1891 ovsrcu_set_hidden(&node
->edges
[!bit
], NULL
);
1898 trie_node_destroy(const struct trie_node
*node
)
1900 ovsrcu_postpone(free
, CONST_CAST(struct trie_node
*, node
));
1903 /* Copy a trie node for modification and postpone delete the old one. */
1904 static struct trie_node
*
1905 trie_node_rcu_realloc(const struct trie_node
*node
)
1907 struct trie_node
*new_node
= xmalloc(sizeof *node
);
1910 trie_node_destroy(node
);
1915 /* May only be called while holding the classifier mutex. */
1917 trie_destroy(rcu_trie_ptr
*trie
)
1919 struct trie_node
*node
= ovsrcu_get_protected(struct trie_node
*, trie
);
1922 ovsrcu_set_hidden(trie
, NULL
);
1923 trie_destroy(&node
->edges
[0]);
1924 trie_destroy(&node
->edges
[1]);
1925 trie_node_destroy(node
);
1930 trie_is_leaf(const struct trie_node
*trie
)
1933 return !ovsrcu_get(struct trie_node
*, &trie
->edges
[0])
1934 && !ovsrcu_get(struct trie_node
*, &trie
->edges
[1]);
1938 mask_set_prefix_bits(struct flow_wildcards
*wc
, uint8_t be32ofs
,
1939 unsigned int n_bits
)
1941 ovs_be32
*mask
= &((ovs_be32
*)&wc
->masks
)[be32ofs
];
1944 for (i
= 0; i
< n_bits
/ 32; i
++) {
1945 mask
[i
] = OVS_BE32_MAX
;
1948 mask
[i
] |= htonl(~0u << (32 - n_bits
% 32));
1953 mask_prefix_bits_set(const struct flow_wildcards
*wc
, uint8_t be32ofs
,
1954 unsigned int n_bits
)
1956 ovs_be32
*mask
= &((ovs_be32
*)&wc
->masks
)[be32ofs
];
1958 ovs_be32 zeroes
= 0;
1960 for (i
= 0; i
< n_bits
/ 32; i
++) {
1964 zeroes
|= ~mask
[i
] & htonl(~0u << (32 - n_bits
% 32));
1967 return !zeroes
; /* All 'n_bits' bits set. */
1970 static rcu_trie_ptr
*
1971 trie_next_edge(struct trie_node
*node
, const ovs_be32 value
[],
1974 return node
->edges
+ be_get_bit_at(value
, ofs
);
1977 static const struct trie_node
*
1978 trie_next_node(const struct trie_node
*node
, const ovs_be32 value
[],
1981 return ovsrcu_get(struct trie_node
*,
1982 &node
->edges
[be_get_bit_at(value
, ofs
)]);
1985 /* Set the bit at ("MSB 0"-based) offset 'ofs'. 'ofs' can be greater than 31.
1988 be_set_bit_at(ovs_be32 value
[], unsigned int ofs
)
1990 ((uint8_t *)value
)[ofs
/ 8] |= 1u << (7 - ofs
% 8);
1993 /* Returns the number of bits in the prefix mask necessary to determine a
1994 * mismatch, in case there are longer prefixes in the tree below the one that
1996 * '*plens' will have a bit set for each prefix length that may have matching
1997 * rules. The caller is responsible for clearing the '*plens' prior to
2001 trie_lookup_value(const rcu_trie_ptr
*trie
, const ovs_be32 value
[],
2002 ovs_be32 plens
[], unsigned int n_bits
)
2004 const struct trie_node
*prev
= NULL
;
2005 const struct trie_node
*node
= ovsrcu_get(struct trie_node
*, trie
);
2006 unsigned int match_len
= 0; /* Number of matching bits. */
2008 for (; node
; prev
= node
, node
= trie_next_node(node
, value
, match_len
)) {
2009 unsigned int eqbits
;
2010 /* Check if this edge can be followed. */
2011 eqbits
= prefix_equal_bits(node
->prefix
, node
->n_bits
, value
,
2013 match_len
+= eqbits
;
2014 if (eqbits
< node
->n_bits
) { /* Mismatch, nothing more to be found. */
2015 /* Bit at offset 'match_len' differed. */
2016 return match_len
+ 1; /* Includes the first mismatching bit. */
2018 /* Full match, check if rules exist at this prefix length. */
2019 if (node
->n_rules
> 0) {
2020 be_set_bit_at(plens
, match_len
- 1);
2022 if (match_len
>= n_bits
) {
2023 return n_bits
; /* Full prefix. */
2026 /* node == NULL. Full match so far, but we tried to follow an
2027 * non-existing branch. Need to exclude the other branch if it exists
2028 * (it does not if we were called on an empty trie or 'prev' is a leaf
2030 return !prev
|| trie_is_leaf(prev
) ? match_len
: match_len
+ 1;
2034 trie_lookup(const struct cls_trie
*trie
, const struct flow
*flow
,
2035 union mf_value
*plens
)
2037 const struct mf_field
*mf
= trie
->field
;
2039 /* Check that current flow matches the prerequisites for the trie
2040 * field. Some match fields are used for multiple purposes, so we
2041 * must check that the trie is relevant for this flow. */
2042 if (mf_are_prereqs_ok(mf
, flow
)) {
2043 return trie_lookup_value(&trie
->root
,
2044 &((ovs_be32
*)flow
)[mf
->flow_be32ofs
],
2045 &plens
->be32
, mf
->n_bits
);
2047 memset(plens
, 0xff, sizeof *plens
); /* All prefixes, no skipping. */
2048 return 0; /* Value not used in this case. */
2051 /* Returns the length of a prefix match mask for the field 'mf' in 'minimask'.
2052 * Returns the u32 offset to the miniflow data in '*miniflow_index', if
2053 * 'miniflow_index' is not NULL. */
2055 minimask_get_prefix_len(const struct minimask
*minimask
,
2056 const struct mf_field
*mf
)
2058 unsigned int n_bits
= 0, mask_tz
= 0; /* Non-zero when end of mask seen. */
2059 uint8_t u32_ofs
= mf
->flow_be32ofs
;
2060 uint8_t u32_end
= u32_ofs
+ mf
->n_bytes
/ 4;
2062 for (; u32_ofs
< u32_end
; ++u32_ofs
) {
2064 mask
= ntohl((OVS_FORCE ovs_be32
)minimask_get(minimask
, u32_ofs
));
2066 /* Validate mask, count the mask length. */
2069 return 0; /* No bits allowed after mask ended. */
2072 if (~mask
& (~mask
+ 1)) {
2073 return 0; /* Mask not contiguous. */
2075 mask_tz
= ctz32(mask
);
2076 n_bits
+= 32 - mask_tz
;
2084 * This is called only when mask prefix is known to be CIDR and non-zero.
2085 * Relies on the fact that the flow and mask have the same map, and since
2086 * the mask is CIDR, the storage for the flow field exists even if it
2087 * happened to be zeros.
2089 static const ovs_be32
*
2090 minimatch_get_prefix(const struct minimatch
*match
, const struct mf_field
*mf
)
2092 return miniflow_get_be32_values(&match
->flow
) +
2093 count_1bits(match
->flow
.map
& ((UINT64_C(1) << mf
->flow_be32ofs
) - 1));
2096 /* Insert rule in to the prefix tree.
2097 * 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
2100 trie_insert(struct cls_trie
*trie
, const struct cls_rule
*rule
, int mlen
)
2102 trie_insert_prefix(&trie
->root
,
2103 minimatch_get_prefix(&rule
->match
, trie
->field
), mlen
);
2107 trie_insert_prefix(rcu_trie_ptr
*edge
, const ovs_be32
*prefix
, int mlen
)
2109 struct trie_node
*node
;
2112 /* Walk the tree. */
2113 for (; (node
= ovsrcu_get_protected(struct trie_node
*, edge
));
2114 edge
= trie_next_edge(node
, prefix
, ofs
)) {
2115 unsigned int eqbits
= trie_prefix_equal_bits(node
, prefix
, ofs
, mlen
);
2117 if (eqbits
< node
->n_bits
) {
2118 /* Mismatch, new node needs to be inserted above. */
2119 int old_branch
= get_bit_at(node
->prefix
, eqbits
);
2120 struct trie_node
*new_parent
;
2122 new_parent
= trie_branch_create(prefix
, ofs
- eqbits
, eqbits
,
2123 ofs
== mlen
? 1 : 0);
2124 /* Copy the node to modify it. */
2125 node
= trie_node_rcu_realloc(node
);
2126 /* Adjust the new node for its new position in the tree. */
2127 node
->prefix
<<= eqbits
;
2128 node
->n_bits
-= eqbits
;
2129 ovsrcu_set_hidden(&new_parent
->edges
[old_branch
], node
);
2131 /* Check if need a new branch for the new rule. */
2133 ovsrcu_set_hidden(&new_parent
->edges
[!old_branch
],
2134 trie_branch_create(prefix
, ofs
, mlen
- ofs
,
2137 ovsrcu_set(edge
, new_parent
); /* Publish changes. */
2140 /* Full match so far. */
2143 /* Full match at the current node, rule needs to be added here. */
2148 /* Must insert a new tree branch for the new rule. */
2149 ovsrcu_set(edge
, trie_branch_create(prefix
, ofs
, mlen
- ofs
, 1));
2152 /* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
2155 trie_remove(struct cls_trie
*trie
, const struct cls_rule
*rule
, int mlen
)
2157 trie_remove_prefix(&trie
->root
,
2158 minimatch_get_prefix(&rule
->match
, trie
->field
), mlen
);
2161 /* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
2164 trie_remove_prefix(rcu_trie_ptr
*root
, const ovs_be32
*prefix
, int mlen
)
2166 struct trie_node
*node
;
2167 rcu_trie_ptr
*edges
[sizeof(union mf_value
) * 8];
2168 int depth
= 0, ofs
= 0;
2170 /* Walk the tree. */
2171 for (edges
[0] = root
;
2172 (node
= ovsrcu_get_protected(struct trie_node
*, edges
[depth
]));
2173 edges
[++depth
] = trie_next_edge(node
, prefix
, ofs
)) {
2174 unsigned int eqbits
= trie_prefix_equal_bits(node
, prefix
, ofs
, mlen
);
2176 if (eqbits
< node
->n_bits
) {
2177 /* Mismatch, nothing to be removed. This should never happen, as
2178 * only rules in the classifier are ever removed. */
2179 break; /* Log a warning. */
2181 /* Full match so far. */
2185 /* Full prefix match at the current node, remove rule here. */
2186 if (!node
->n_rules
) {
2187 break; /* Log a warning. */
2191 /* Check if can prune the tree. */
2192 while (!node
->n_rules
) {
2193 struct trie_node
*next
,
2194 *edge0
= ovsrcu_get_protected(struct trie_node
*,
2196 *edge1
= ovsrcu_get_protected(struct trie_node
*,
2199 if (edge0
&& edge1
) {
2200 break; /* A branching point, cannot prune. */
2203 /* Else have at most one child node, remove this node. */
2204 next
= edge0
? edge0
: edge1
;
2207 if (node
->n_bits
+ next
->n_bits
> TRIE_PREFIX_BITS
) {
2208 break; /* Cannot combine. */
2210 next
= trie_node_rcu_realloc(next
); /* Modify. */
2212 /* Combine node with next. */
2213 next
->prefix
= node
->prefix
| next
->prefix
>> node
->n_bits
;
2214 next
->n_bits
+= node
->n_bits
;
2216 /* Update the parent's edge. */
2217 ovsrcu_set(edges
[depth
], next
); /* Publish changes. */
2218 trie_node_destroy(node
);
2220 if (next
|| !depth
) {
2221 /* Branch not pruned or at root, nothing more to do. */
2224 node
= ovsrcu_get_protected(struct trie_node
*,
2230 /* Cannot go deeper. This should never happen, since only rules
2231 * that actually exist in the classifier are ever removed. */
2232 VLOG_WARN("Trying to remove non-existing rule from a prefix trie.");