]>
Commit | Line | Data |
---|---|---|
92c7c870 HH |
1 | /* |
2 | * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2016, 2017 Nicira, Inc. | |
e90e115a | 3 | * Copyright (c) 2019, 2020 Intel Corporation. |
92c7c870 HH |
4 | * |
5 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
6 | * you may not use this file except in compliance with the License. | |
7 | * You may obtain a copy of the License at: | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, software | |
12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | * See the License for the specific language governing permissions and | |
15 | * limitations under the License. | |
16 | */ | |
17 | ||
18 | #include <config.h> | |
19 | #include "dpif-netdev.h" | |
20 | #include "dpif-netdev-private.h" | |
e90e115a | 21 | #include "dpif-netdev-lookup.h" |
92c7c870 HH |
22 | |
23 | #include "bitmap.h" | |
24 | #include "cmap.h" | |
25 | ||
26 | #include "dp-packet.h" | |
27 | #include "dpif.h" | |
28 | #include "dpif-netdev-perf.h" | |
29 | #include "dpif-provider.h" | |
30 | #include "flow.h" | |
a0b36b39 | 31 | #include "ovs-thread.h" |
92c7c870 HH |
32 | #include "packets.h" |
33 | #include "pvector.h" | |
34 | ||
a0b36b39 HH |
35 | VLOG_DEFINE_THIS_MODULE(dpif_lookup_generic); |
36 | ||
37 | /* Lookup functions below depends on the internal structure of flowmap. */ | |
38 | BUILD_ASSERT_DECL(FLOWMAP_UNITS == 2); | |
39 | ||
40 | struct block_array { | |
41 | uint32_t count; /* Number of items allocated in 'blocks' */ | |
42 | uint64_t blocks[]; | |
43 | }; | |
44 | ||
45 | DEFINE_PER_THREAD_MALLOCED_DATA(struct block_array *, block_array); | |
46 | ||
47 | static inline uint64_t * | |
48 | get_blocks_scratch(uint32_t required_count) | |
92c7c870 | 49 | { |
a0b36b39 | 50 | struct block_array *array = block_array_get(); |
92c7c870 | 51 | |
a0b36b39 HH |
52 | /* Check if this thread already has a large enough array allocated. |
53 | * This is a predictable and unlikely branch, as it occurs only once at | |
54 | * startup, or if a subtable with higher block count is added. | |
55 | */ | |
56 | if (OVS_UNLIKELY(!array || array->count < required_count)) { | |
57 | array = xrealloc(array, sizeof *array + | |
58 | (required_count * sizeof array->blocks[0])); | |
59 | array->count = required_count; | |
60 | block_array_set_unsafe(array); | |
61 | VLOG_DBG("Block array resized to %"PRIu32, required_count); | |
92c7c870 HH |
62 | } |
63 | ||
a0b36b39 | 64 | return &array->blocks[0]; |
92c7c870 HH |
65 | } |
66 | ||
a0b36b39 HH |
67 | static inline void |
68 | netdev_flow_key_flatten_unit(const uint64_t *pkt_blocks, | |
69 | const uint64_t *tbl_blocks, | |
70 | const uint64_t *mf_masks, | |
71 | uint64_t *blocks_scratch, | |
72 | const uint64_t pkt_mf_bits, | |
73 | const uint32_t count) | |
92c7c870 | 74 | { |
a0b36b39 HH |
75 | uint32_t i; |
76 | ||
77 | for (i = 0; i < count; i++) { | |
78 | uint64_t mf_mask = mf_masks[i]; | |
79 | /* Calculate the block index for the packet metadata. */ | |
80 | uint64_t idx_bits = mf_mask & pkt_mf_bits; | |
81 | const uint32_t pkt_idx = count_1bits(idx_bits); | |
82 | ||
83 | /* Check if the packet has the subtable miniflow bit set. If yes, the | |
84 | * block at the above pkt_idx will be stored, otherwise it is masked | |
85 | * out to be zero. | |
86 | */ | |
87 | uint64_t pkt_has_mf_bit = (mf_mask + 1) & pkt_mf_bits; | |
88 | uint64_t no_bit = ((!pkt_has_mf_bit) > 0) - 1; | |
89 | ||
90 | /* Mask packet block by table block, and mask to zero if packet | |
91 | * doesn't actually contain this block of metadata. | |
92 | */ | |
93 | blocks_scratch[i] = pkt_blocks[pkt_idx] & tbl_blocks[i] & no_bit; | |
94 | } | |
95 | } | |
96 | ||
97 | /* This function takes a packet, and subtable and writes an array of uint64_t | |
98 | * blocks. The blocks contain the metadata that the subtable matches on, in | |
99 | * the same order as the subtable, allowing linear iteration over the blocks. | |
100 | * | |
101 | * To calculate the blocks contents, the netdev_flow_key_flatten_unit function | |
102 | * is called twice, once for each "unit" of the miniflow. This call can be | |
103 | * inlined by the compiler for performance. | |
104 | * | |
105 | * Note that the u0_count and u1_count variables can be compile-time constants, | |
106 | * allowing the loop in the inlined flatten_unit() function to be compile-time | |
107 | * unrolled, or possibly removed totally by unrolling by the loop iterations. | |
108 | * The compile time optimizations enabled by this design improves performance. | |
109 | */ | |
110 | static inline void | |
111 | netdev_flow_key_flatten(const struct netdev_flow_key *key, | |
112 | const struct netdev_flow_key *mask, | |
113 | const uint64_t *mf_masks, | |
114 | uint64_t *blocks_scratch, | |
115 | const uint32_t u0_count, | |
116 | const uint32_t u1_count) | |
117 | { | |
118 | /* Load mask from subtable, mask with packet mf, popcount to get idx. */ | |
119 | const uint64_t *pkt_blocks = miniflow_get_values(&key->mf); | |
120 | const uint64_t *tbl_blocks = miniflow_get_values(&mask->mf); | |
121 | ||
122 | /* Packet miniflow bits to be masked by pre-calculated mf_masks. */ | |
123 | const uint64_t pkt_bits_u0 = key->mf.map.bits[0]; | |
124 | const uint32_t pkt_bits_u0_pop = count_1bits(pkt_bits_u0); | |
125 | const uint64_t pkt_bits_u1 = key->mf.map.bits[1]; | |
126 | ||
127 | /* Unit 0 flattening */ | |
128 | netdev_flow_key_flatten_unit(&pkt_blocks[0], | |
129 | &tbl_blocks[0], | |
130 | &mf_masks[0], | |
131 | &blocks_scratch[0], | |
132 | pkt_bits_u0, | |
133 | u0_count); | |
134 | ||
135 | /* Unit 1 flattening: | |
136 | * Move the pointers forward in the arrays based on u0 offsets, NOTE: | |
137 | * 1) pkt blocks indexed by actual popcount of u0, which is NOT always | |
138 | * the same as the amount of bits set in the subtable. | |
139 | * 2) mf_masks, tbl_block and blocks_scratch are all "flat" arrays, so | |
140 | * the index is always u0_count. | |
141 | */ | |
142 | netdev_flow_key_flatten_unit(&pkt_blocks[pkt_bits_u0_pop], | |
143 | &tbl_blocks[u0_count], | |
144 | &mf_masks[u0_count], | |
145 | &blocks_scratch[u0_count], | |
146 | pkt_bits_u1, | |
147 | u1_count); | |
148 | } | |
149 | ||
150 | /* Compares a rule and the blocks representing a key, returns 1 on a match. */ | |
151 | static inline uint64_t | |
152 | netdev_rule_matches_key(const struct dpcls_rule *rule, | |
153 | const uint32_t mf_bits_total, | |
154 | const uint64_t *blocks_scratch) | |
155 | { | |
156 | const uint64_t *keyp = miniflow_get_values(&rule->flow.mf); | |
157 | const uint64_t *maskp = miniflow_get_values(&rule->mask->mf); | |
158 | uint64_t not_match = 0; | |
159 | ||
160 | for (int i = 0; i < mf_bits_total; i++) { | |
161 | not_match |= (blocks_scratch[i] & maskp[i]) != keyp[i]; | |
162 | } | |
92c7c870 | 163 | |
a0b36b39 HH |
164 | /* Invert result to show match as 1. */ |
165 | return !not_match; | |
166 | } | |
167 | ||
168 | /* Const prop version of the function: note that mf bits total and u0 are | |
169 | * explicitly passed in here, while they're also available at runtime from the | |
170 | * subtable pointer. By making them compile time, we enable the compiler to | |
171 | * unroll loops and flatten out code-sequences based on the knowledge of the | |
172 | * mf_bits_* compile time values. This results in improved performance. | |
173 | * | |
174 | * Note: this function is marked with ALWAYS_INLINE to ensure the compiler | |
175 | * inlines the below code, and then uses the compile time constants to make | |
176 | * specialized versions of the runtime code. Without ALWAYS_INLINE, the | |
177 | * compiler might decide to not inline, and performance will suffer. | |
178 | */ | |
179 | static inline uint32_t ALWAYS_INLINE | |
180 | lookup_generic_impl(struct dpcls_subtable *subtable, | |
181 | uint32_t keys_map, | |
182 | const struct netdev_flow_key *keys[], | |
183 | struct dpcls_rule **rules, | |
184 | const uint32_t bit_count_u0, | |
185 | const uint32_t bit_count_u1) | |
186 | { | |
187 | const uint32_t n_pkts = count_1bits(keys_map); | |
188 | ovs_assert(NETDEV_MAX_BURST >= n_pkts); | |
92c7c870 | 189 | uint32_t hashes[NETDEV_MAX_BURST]; |
a0b36b39 HH |
190 | |
191 | const uint32_t bit_count_total = bit_count_u0 + bit_count_u1; | |
192 | const uint32_t block_count_required = bit_count_total * NETDEV_MAX_BURST; | |
193 | uint64_t *mf_masks = subtable->mf_masks; | |
194 | int i; | |
195 | ||
196 | /* Blocks scratch is an optimization to re-use the same packet miniflow | |
197 | * block data when doing rule-verify. This reduces work done during lookup | |
198 | * and hence improves performance. The blocks_scratch array is stored as a | |
199 | * thread local variable, as each thread requires its own blocks memory. | |
200 | */ | |
201 | uint64_t *blocks_scratch = get_blocks_scratch(block_count_required); | |
202 | ||
203 | /* Flatten the packet metadata into the blocks_scratch[] using subtable. */ | |
204 | ULLONG_FOR_EACH_1 (i, keys_map) { | |
205 | netdev_flow_key_flatten(keys[i], | |
206 | &subtable->mask, | |
207 | mf_masks, | |
208 | &blocks_scratch[i * bit_count_total], | |
209 | bit_count_u0, | |
210 | bit_count_u1); | |
211 | } | |
212 | ||
213 | /* Hash the now linearized blocks of packet metadata. */ | |
92c7c870 | 214 | ULLONG_FOR_EACH_1 (i, keys_map) { |
a0b36b39 HH |
215 | uint64_t *block_ptr = &blocks_scratch[i * bit_count_total]; |
216 | uint32_t hash = hash_add_words64(0, block_ptr, bit_count_total); | |
217 | hashes[i] = hash_finish(hash, bit_count_total * 8); | |
92c7c870 HH |
218 | } |
219 | ||
a0b36b39 HH |
220 | /* Lookup: this returns a bitmask of packets where the hash table had |
221 | * an entry for the given hash key. Presence of a hash key does not | |
222 | * guarantee matching the key, as there can be hash collisions. | |
223 | */ | |
224 | uint32_t found_map; | |
92c7c870 | 225 | const struct cmap_node *nodes[NETDEV_MAX_BURST]; |
a0b36b39 | 226 | |
92c7c870 HH |
227 | found_map = cmap_find_batch(&subtable->rules, keys_map, hashes, nodes); |
228 | ||
a0b36b39 HH |
229 | /* Verify that packet actually matched rule. If not found, a hash |
230 | * collision has taken place, so continue searching with the next node. | |
231 | */ | |
92c7c870 HH |
232 | ULLONG_FOR_EACH_1 (i, found_map) { |
233 | struct dpcls_rule *rule; | |
234 | ||
235 | CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) { | |
a0b36b39 HH |
236 | const uint32_t cidx = i * bit_count_total; |
237 | uint32_t match = netdev_rule_matches_key(rule, bit_count_total, | |
238 | &blocks_scratch[cidx]); | |
239 | ||
240 | if (OVS_LIKELY(match)) { | |
92c7c870 | 241 | rules[i] = rule; |
92c7c870 HH |
242 | subtable->hit_cnt++; |
243 | goto next; | |
244 | } | |
245 | } | |
246 | ||
247 | /* None of the found rules was a match. Reset the i-th bit to | |
248 | * keep searching this key in the next subtable. */ | |
249 | ULLONG_SET0(found_map, i); /* Did not match. */ | |
250 | next: | |
251 | ; /* Keep Sparse happy. */ | |
252 | } | |
253 | ||
254 | return found_map; | |
255 | } | |
a0b36b39 HH |
256 | |
257 | /* Generic lookup function that uses runtime provided mf bits for iterating. */ | |
e90e115a | 258 | static uint32_t |
a0b36b39 HH |
259 | dpcls_subtable_lookup_generic(struct dpcls_subtable *subtable, |
260 | uint32_t keys_map, | |
261 | const struct netdev_flow_key *keys[], | |
262 | struct dpcls_rule **rules) | |
263 | { | |
f54d8f00 HH |
264 | /* Here the runtime subtable->mf_bits counts are used, which forces the |
265 | * compiler to iterate normal for() loops. Due to this limitation in the | |
266 | * compilers available optimizations, this function has lower performance | |
267 | * than the below specialized functions. | |
268 | */ | |
a0b36b39 HH |
269 | return lookup_generic_impl(subtable, keys_map, keys, rules, |
270 | subtable->mf_bits_set_unit0, | |
271 | subtable->mf_bits_set_unit1); | |
272 | } | |
f54d8f00 HH |
273 | |
274 | /* Expand out specialized functions with U0 and U1 bit attributes. */ | |
275 | #define DECLARE_OPTIMIZED_LOOKUP_FUNCTION(U0, U1) \ | |
276 | static uint32_t \ | |
277 | dpcls_subtable_lookup_mf_u0w##U0##_u1w##U1( \ | |
278 | struct dpcls_subtable *subtable, \ | |
279 | uint32_t keys_map, \ | |
280 | const struct netdev_flow_key *keys[],\ | |
281 | struct dpcls_rule **rules) \ | |
282 | { \ | |
283 | return lookup_generic_impl(subtable, keys_map, keys, rules, U0, U1); \ | |
284 | } \ | |
285 | ||
286 | DECLARE_OPTIMIZED_LOOKUP_FUNCTION(5, 1) | |
287 | DECLARE_OPTIMIZED_LOOKUP_FUNCTION(4, 1) | |
288 | DECLARE_OPTIMIZED_LOOKUP_FUNCTION(4, 0) | |
289 | ||
290 | /* Check if a specialized function is valid for the required subtable. */ | |
291 | #define CHECK_LOOKUP_FUNCTION(U0, U1) \ | |
292 | if (!f && u0_bits == U0 && u1_bits == U1) { \ | |
293 | f = dpcls_subtable_lookup_mf_u0w##U0##_u1w##U1; \ | |
294 | } | |
295 | ||
296 | /* Probe function to lookup an available specialized function. | |
297 | * If capable to run the requested miniflow fingerprint, this function returns | |
298 | * the most optimal implementation for that miniflow fingerprint. | |
299 | * @retval Non-NULL A valid function to handle the miniflow bit pattern | |
300 | * @retval NULL The requested miniflow is not supported by this implementation. | |
301 | */ | |
302 | dpcls_subtable_lookup_func | |
303 | dpcls_subtable_generic_probe(uint32_t u0_bits, uint32_t u1_bits) | |
304 | { | |
305 | dpcls_subtable_lookup_func f = NULL; | |
306 | ||
307 | CHECK_LOOKUP_FUNCTION(5, 1); | |
308 | CHECK_LOOKUP_FUNCTION(4, 1); | |
309 | CHECK_LOOKUP_FUNCTION(4, 0); | |
310 | ||
311 | if (f) { | |
312 | VLOG_DBG("Subtable using Generic Optimized for u0 %d, u1 %d\n", | |
313 | u0_bits, u1_bits); | |
e90e115a HH |
314 | } else { |
315 | /* Always return the generic function. */ | |
316 | f = dpcls_subtable_lookup_generic; | |
f54d8f00 | 317 | } |
e90e115a | 318 | |
f54d8f00 HH |
319 | return f; |
320 | } |