]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | /* SPDX-License-Identifier: BSD-3-Clause |
2 | * Copyright(c) 2010-2014 Intel Corporation | |
7c673cae FG |
3 | */ |
4 | ||
5 | #include <rte_acl.h> | |
6 | #include "acl.h" | |
7 | ||
8 | #define QRANGE_MIN ((uint8_t)INT8_MIN) | |
9 | ||
10 | #define RTE_ACL_VERIFY(exp) do { \ | |
11 | if (!(exp)) \ | |
12 | rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \ | |
13 | } while (0) | |
14 | ||
15 | struct acl_node_counters { | |
16 | int32_t match; | |
17 | int32_t match_used; | |
18 | int32_t single; | |
19 | int32_t quad; | |
20 | int32_t quad_vectors; | |
21 | int32_t dfa; | |
22 | int32_t dfa_gr64; | |
23 | }; | |
24 | ||
25 | struct rte_acl_indices { | |
26 | int32_t dfa_index; | |
27 | int32_t quad_index; | |
28 | int32_t single_index; | |
29 | int32_t match_index; | |
30 | int32_t match_start; | |
31 | }; | |
32 | ||
33 | static void | |
34 | acl_gen_log_stats(const struct rte_acl_ctx *ctx, | |
35 | const struct acl_node_counters *counts, | |
36 | const struct rte_acl_indices *indices, | |
37 | size_t max_size) | |
38 | { | |
39 | RTE_LOG(DEBUG, ACL, "Gen phase for ACL \"%s\":\n" | |
40 | "runtime memory footprint on socket %d:\n" | |
41 | "single nodes/bytes used: %d/%zu\n" | |
42 | "quad nodes/vectors/bytes used: %d/%d/%zu\n" | |
43 | "DFA nodes/group64/bytes used: %d/%d/%zu\n" | |
44 | "match nodes/bytes used: %d/%zu\n" | |
45 | "total: %zu bytes\n" | |
46 | "max limit: %zu bytes\n", | |
47 | ctx->name, ctx->socket_id, | |
48 | counts->single, counts->single * sizeof(uint64_t), | |
49 | counts->quad, counts->quad_vectors, | |
50 | (indices->quad_index - indices->dfa_index) * sizeof(uint64_t), | |
51 | counts->dfa, counts->dfa_gr64, | |
52 | indices->dfa_index * sizeof(uint64_t), | |
53 | counts->match, | |
54 | counts->match * sizeof(struct rte_acl_match_results), | |
55 | ctx->mem_sz, | |
56 | max_size); | |
57 | } | |
58 | ||
59 | static uint64_t | |
60 | acl_dfa_gen_idx(const struct rte_acl_node *node, uint32_t index) | |
61 | { | |
62 | uint64_t idx; | |
63 | uint32_t i; | |
64 | ||
65 | idx = 0; | |
66 | for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) { | |
67 | RTE_ACL_VERIFY(node->dfa_gr64[i] < RTE_ACL_DFA_GR64_NUM); | |
68 | RTE_ACL_VERIFY(node->dfa_gr64[i] < node->fanout); | |
69 | idx |= (i - node->dfa_gr64[i]) << | |
70 | (6 + RTE_ACL_DFA_GR64_BIT * i); | |
71 | } | |
72 | ||
73 | return idx << (CHAR_BIT * sizeof(index)) | index | node->node_type; | |
74 | } | |
75 | ||
76 | static void | |
77 | acl_dfa_fill_gr64(const struct rte_acl_node *node, | |
78 | const uint64_t src[RTE_ACL_DFA_SIZE], uint64_t dst[RTE_ACL_DFA_SIZE]) | |
79 | { | |
80 | uint32_t i; | |
81 | ||
82 | for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) { | |
83 | memcpy(dst + node->dfa_gr64[i] * RTE_ACL_DFA_GR64_SIZE, | |
84 | src + i * RTE_ACL_DFA_GR64_SIZE, | |
85 | RTE_ACL_DFA_GR64_SIZE * sizeof(dst[0])); | |
86 | } | |
87 | } | |
88 | ||
89 | static uint32_t | |
90 | acl_dfa_count_gr64(const uint64_t array_ptr[RTE_ACL_DFA_SIZE], | |
91 | uint8_t gr64[RTE_ACL_DFA_GR64_NUM]) | |
92 | { | |
93 | uint32_t i, j, k; | |
94 | ||
95 | k = 0; | |
96 | for (i = 0; i != RTE_ACL_DFA_GR64_NUM; i++) { | |
97 | gr64[i] = i; | |
98 | for (j = 0; j != i; j++) { | |
99 | if (memcmp(array_ptr + i * RTE_ACL_DFA_GR64_SIZE, | |
100 | array_ptr + j * RTE_ACL_DFA_GR64_SIZE, | |
101 | RTE_ACL_DFA_GR64_SIZE * | |
102 | sizeof(array_ptr[0])) == 0) | |
103 | break; | |
104 | } | |
105 | gr64[i] = (j != i) ? gr64[j] : k++; | |
106 | } | |
107 | ||
108 | return k; | |
109 | } | |
110 | ||
111 | static uint32_t | |
112 | acl_node_fill_dfa(const struct rte_acl_node *node, | |
113 | uint64_t dfa[RTE_ACL_DFA_SIZE], uint64_t no_match, int32_t resolved) | |
114 | { | |
115 | uint32_t n, x; | |
116 | uint32_t ranges, last_bit; | |
117 | struct rte_acl_node *child; | |
118 | struct rte_acl_bitset *bits; | |
119 | ||
120 | ranges = 0; | |
121 | last_bit = 0; | |
122 | ||
123 | for (n = 0; n < RTE_ACL_DFA_SIZE; n++) | |
124 | dfa[n] = no_match; | |
125 | ||
126 | for (x = 0; x < node->num_ptrs; x++) { | |
127 | ||
128 | child = node->ptrs[x].ptr; | |
129 | if (child == NULL) | |
130 | continue; | |
131 | ||
132 | bits = &node->ptrs[x].values; | |
133 | for (n = 0; n < RTE_ACL_DFA_SIZE; n++) { | |
134 | ||
135 | if (bits->bits[n / (sizeof(bits_t) * CHAR_BIT)] & | |
136 | (1 << (n % (sizeof(bits_t) * CHAR_BIT)))) { | |
137 | ||
138 | dfa[n] = resolved ? child->node_index : x; | |
139 | ranges += (last_bit == 0); | |
140 | last_bit = 1; | |
141 | } else { | |
142 | last_bit = 0; | |
143 | } | |
144 | } | |
145 | } | |
146 | ||
147 | return ranges; | |
148 | } | |
149 | ||
150 | /* | |
151 | * Counts the number of groups of sequential bits that are | |
152 | * either 0 or 1, as specified by the zero_one parameter. This is used to | |
153 | * calculate the number of ranges in a node to see if it fits in a quad range | |
154 | * node. | |
155 | */ | |
156 | static int | |
157 | acl_count_sequential_groups(struct rte_acl_bitset *bits, int zero_one) | |
158 | { | |
159 | int n, ranges, last_bit; | |
160 | ||
161 | ranges = 0; | |
162 | last_bit = zero_one ^ 1; | |
163 | ||
164 | for (n = QRANGE_MIN; n < UINT8_MAX + 1; n++) { | |
165 | if (bits->bits[n / (sizeof(bits_t) * 8)] & | |
9f95a23c | 166 | (1U << (n % (sizeof(bits_t) * 8)))) { |
7c673cae FG |
167 | if (zero_one == 1 && last_bit != 1) |
168 | ranges++; | |
169 | last_bit = 1; | |
170 | } else { | |
171 | if (zero_one == 0 && last_bit != 0) | |
172 | ranges++; | |
173 | last_bit = 0; | |
174 | } | |
175 | } | |
176 | for (n = 0; n < QRANGE_MIN; n++) { | |
177 | if (bits->bits[n / (sizeof(bits_t) * 8)] & | |
178 | (1 << (n % (sizeof(bits_t) * 8)))) { | |
179 | if (zero_one == 1 && last_bit != 1) | |
180 | ranges++; | |
181 | last_bit = 1; | |
182 | } else { | |
183 | if (zero_one == 0 && last_bit != 0) | |
184 | ranges++; | |
185 | last_bit = 0; | |
186 | } | |
187 | } | |
188 | ||
189 | return ranges; | |
190 | } | |
191 | ||
192 | /* | |
193 | * Count number of ranges spanned by the node's pointers | |
194 | */ | |
195 | static int | |
196 | acl_count_fanout(struct rte_acl_node *node) | |
197 | { | |
198 | uint32_t n; | |
199 | int ranges; | |
200 | ||
201 | if (node->fanout != 0) | |
202 | return node->fanout; | |
203 | ||
204 | ranges = acl_count_sequential_groups(&node->values, 0); | |
205 | ||
206 | for (n = 0; n < node->num_ptrs; n++) { | |
207 | if (node->ptrs[n].ptr != NULL) | |
208 | ranges += acl_count_sequential_groups( | |
209 | &node->ptrs[n].values, 1); | |
210 | } | |
211 | ||
212 | node->fanout = ranges; | |
213 | return node->fanout; | |
214 | } | |
215 | ||
216 | /* | |
217 | * Determine the type of nodes and count each type | |
218 | */ | |
219 | static void | |
220 | acl_count_trie_types(struct acl_node_counters *counts, | |
221 | struct rte_acl_node *node, uint64_t no_match, int force_dfa) | |
222 | { | |
223 | uint32_t n; | |
224 | int num_ptrs; | |
225 | uint64_t dfa[RTE_ACL_DFA_SIZE]; | |
226 | ||
227 | /* skip if this node has been counted */ | |
228 | if (node->node_type != (uint32_t)RTE_ACL_NODE_UNDEFINED) | |
229 | return; | |
230 | ||
231 | if (node->match_flag != 0 || node->num_ptrs == 0) { | |
232 | counts->match++; | |
233 | node->node_type = RTE_ACL_NODE_MATCH; | |
234 | return; | |
235 | } | |
236 | ||
237 | num_ptrs = acl_count_fanout(node); | |
238 | ||
239 | /* Force type to dfa */ | |
240 | if (force_dfa) | |
241 | num_ptrs = RTE_ACL_DFA_SIZE; | |
242 | ||
243 | /* determine node type based on number of ranges */ | |
244 | if (num_ptrs == 1) { | |
245 | counts->single++; | |
246 | node->node_type = RTE_ACL_NODE_SINGLE; | |
247 | } else if (num_ptrs <= RTE_ACL_QUAD_MAX) { | |
248 | counts->quad++; | |
249 | counts->quad_vectors += node->fanout; | |
250 | node->node_type = RTE_ACL_NODE_QRANGE; | |
251 | } else { | |
252 | counts->dfa++; | |
253 | node->node_type = RTE_ACL_NODE_DFA; | |
254 | if (force_dfa != 0) { | |
255 | /* always expand to a max number of nodes. */ | |
256 | for (n = 0; n != RTE_DIM(node->dfa_gr64); n++) | |
257 | node->dfa_gr64[n] = n; | |
258 | node->fanout = n; | |
259 | } else { | |
260 | acl_node_fill_dfa(node, dfa, no_match, 0); | |
261 | node->fanout = acl_dfa_count_gr64(dfa, node->dfa_gr64); | |
262 | } | |
263 | counts->dfa_gr64 += node->fanout; | |
264 | } | |
265 | ||
266 | /* | |
267 | * recursively count the types of all children | |
268 | */ | |
269 | for (n = 0; n < node->num_ptrs; n++) { | |
270 | if (node->ptrs[n].ptr != NULL) | |
271 | acl_count_trie_types(counts, node->ptrs[n].ptr, | |
272 | no_match, 0); | |
273 | } | |
274 | } | |
275 | ||
276 | static void | |
277 | acl_add_ptrs(struct rte_acl_node *node, uint64_t *node_array, uint64_t no_match, | |
278 | int resolved) | |
279 | { | |
280 | uint32_t x; | |
281 | int32_t m; | |
282 | uint64_t *node_a, index, dfa[RTE_ACL_DFA_SIZE]; | |
283 | ||
284 | acl_node_fill_dfa(node, dfa, no_match, resolved); | |
285 | ||
286 | /* | |
287 | * Rather than going from 0 to 256, the range count and | |
288 | * the layout are from 80-ff then 0-7f due to signed compare | |
289 | * for SSE (cmpgt). | |
290 | */ | |
291 | if (node->node_type == RTE_ACL_NODE_QRANGE) { | |
292 | ||
293 | m = 0; | |
294 | node_a = node_array; | |
295 | index = dfa[QRANGE_MIN]; | |
296 | *node_a++ = index; | |
297 | ||
298 | for (x = QRANGE_MIN + 1; x < UINT8_MAX + 1; x++) { | |
299 | if (dfa[x] != index) { | |
300 | index = dfa[x]; | |
301 | *node_a++ = index; | |
302 | node->transitions[m++] = (uint8_t)(x - 1); | |
303 | } | |
304 | } | |
305 | ||
306 | for (x = 0; x < INT8_MAX + 1; x++) { | |
307 | if (dfa[x] != index) { | |
308 | index = dfa[x]; | |
309 | *node_a++ = index; | |
310 | node->transitions[m++] = (uint8_t)(x - 1); | |
311 | } | |
312 | } | |
313 | ||
314 | /* fill unused locations with max value - nothing is greater */ | |
315 | for (; m < RTE_ACL_QUAD_SIZE; m++) | |
316 | node->transitions[m] = INT8_MAX; | |
317 | ||
318 | RTE_ACL_VERIFY(m <= RTE_ACL_QUAD_SIZE); | |
319 | ||
320 | } else if (node->node_type == RTE_ACL_NODE_DFA && resolved) { | |
321 | acl_dfa_fill_gr64(node, dfa, node_array); | |
322 | } | |
323 | } | |
324 | ||
325 | /* | |
326 | * Routine that allocates space for this node and recursively calls | |
327 | * to allocate space for each child. Once all the children are allocated, | |
328 | * then resolve all transitions for this node. | |
329 | */ | |
330 | static void | |
331 | acl_gen_node(struct rte_acl_node *node, uint64_t *node_array, | |
332 | uint64_t no_match, struct rte_acl_indices *index, int num_categories) | |
333 | { | |
334 | uint32_t n, sz, *qtrp; | |
335 | uint64_t *array_ptr; | |
336 | struct rte_acl_match_results *match; | |
337 | ||
338 | if (node->node_index != RTE_ACL_NODE_UNDEFINED) | |
339 | return; | |
340 | ||
341 | array_ptr = NULL; | |
342 | ||
343 | switch (node->node_type) { | |
344 | case RTE_ACL_NODE_DFA: | |
345 | array_ptr = &node_array[index->dfa_index]; | |
346 | node->node_index = acl_dfa_gen_idx(node, index->dfa_index); | |
347 | sz = node->fanout * RTE_ACL_DFA_GR64_SIZE; | |
348 | index->dfa_index += sz; | |
349 | for (n = 0; n < sz; n++) | |
350 | array_ptr[n] = no_match; | |
351 | break; | |
352 | case RTE_ACL_NODE_SINGLE: | |
353 | node->node_index = RTE_ACL_QUAD_SINGLE | index->single_index | | |
354 | node->node_type; | |
355 | array_ptr = &node_array[index->single_index]; | |
356 | index->single_index += 1; | |
357 | array_ptr[0] = no_match; | |
358 | break; | |
359 | case RTE_ACL_NODE_QRANGE: | |
360 | array_ptr = &node_array[index->quad_index]; | |
361 | acl_add_ptrs(node, array_ptr, no_match, 0); | |
362 | qtrp = (uint32_t *)node->transitions; | |
363 | node->node_index = qtrp[0]; | |
364 | node->node_index <<= sizeof(index->quad_index) * CHAR_BIT; | |
365 | node->node_index |= index->quad_index | node->node_type; | |
366 | index->quad_index += node->fanout; | |
367 | break; | |
368 | case RTE_ACL_NODE_MATCH: | |
369 | match = ((struct rte_acl_match_results *) | |
370 | (node_array + index->match_start)); | |
371 | for (n = 0; n != RTE_DIM(match->results); n++) | |
372 | RTE_ACL_VERIFY(match->results[0] == 0); | |
373 | memcpy(match + index->match_index, node->mrt, | |
374 | sizeof(*node->mrt)); | |
375 | node->node_index = index->match_index | node->node_type; | |
376 | index->match_index += 1; | |
377 | break; | |
378 | case RTE_ACL_NODE_UNDEFINED: | |
379 | RTE_ACL_VERIFY(node->node_type != | |
380 | (uint32_t)RTE_ACL_NODE_UNDEFINED); | |
381 | break; | |
382 | } | |
383 | ||
384 | /* recursively allocate space for all children */ | |
385 | for (n = 0; n < node->num_ptrs; n++) { | |
386 | if (node->ptrs[n].ptr != NULL) | |
387 | acl_gen_node(node->ptrs[n].ptr, | |
388 | node_array, | |
389 | no_match, | |
390 | index, | |
391 | num_categories); | |
392 | } | |
393 | ||
394 | /* All children are resolved, resolve this node's pointers */ | |
395 | switch (node->node_type) { | |
396 | case RTE_ACL_NODE_DFA: | |
397 | acl_add_ptrs(node, array_ptr, no_match, 1); | |
398 | break; | |
399 | case RTE_ACL_NODE_SINGLE: | |
400 | for (n = 0; n < node->num_ptrs; n++) { | |
401 | if (node->ptrs[n].ptr != NULL) | |
402 | array_ptr[0] = node->ptrs[n].ptr->node_index; | |
403 | } | |
404 | break; | |
405 | case RTE_ACL_NODE_QRANGE: | |
406 | acl_add_ptrs(node, array_ptr, no_match, 1); | |
407 | break; | |
408 | case RTE_ACL_NODE_MATCH: | |
409 | break; | |
410 | case RTE_ACL_NODE_UNDEFINED: | |
411 | RTE_ACL_VERIFY(node->node_type != | |
412 | (uint32_t)RTE_ACL_NODE_UNDEFINED); | |
413 | break; | |
414 | } | |
415 | } | |
416 | ||
417 | static void | |
418 | acl_calc_counts_indices(struct acl_node_counters *counts, | |
419 | struct rte_acl_indices *indices, | |
420 | struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries, | |
421 | uint64_t no_match) | |
422 | { | |
423 | uint32_t n; | |
424 | ||
425 | memset(indices, 0, sizeof(*indices)); | |
426 | memset(counts, 0, sizeof(*counts)); | |
427 | ||
428 | /* Get stats on nodes */ | |
429 | for (n = 0; n < num_tries; n++) { | |
430 | acl_count_trie_types(counts, node_bld_trie[n].trie, | |
431 | no_match, 1); | |
432 | } | |
433 | ||
434 | indices->dfa_index = RTE_ACL_DFA_SIZE + 1; | |
435 | indices->quad_index = indices->dfa_index + | |
436 | counts->dfa_gr64 * RTE_ACL_DFA_GR64_SIZE; | |
437 | indices->single_index = indices->quad_index + counts->quad_vectors; | |
438 | indices->match_start = indices->single_index + counts->single + 1; | |
439 | indices->match_start = RTE_ALIGN(indices->match_start, | |
440 | (XMM_SIZE / sizeof(uint64_t))); | |
441 | indices->match_index = 1; | |
442 | } | |
443 | ||
444 | /* | |
445 | * Generate the runtime structure using build structure | |
446 | */ | |
447 | int | |
448 | rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie, | |
449 | struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries, | |
450 | uint32_t num_categories, uint32_t data_index_sz, size_t max_size) | |
451 | { | |
452 | void *mem; | |
453 | size_t total_size; | |
454 | uint64_t *node_array, no_match; | |
455 | uint32_t n, match_index; | |
456 | struct rte_acl_match_results *match; | |
457 | struct acl_node_counters counts; | |
458 | struct rte_acl_indices indices; | |
459 | ||
460 | no_match = RTE_ACL_NODE_MATCH; | |
461 | ||
462 | /* Fill counts and indices arrays from the nodes. */ | |
463 | acl_calc_counts_indices(&counts, &indices, | |
464 | node_bld_trie, num_tries, no_match); | |
465 | ||
466 | /* Allocate runtime memory (align to cache boundary) */ | |
467 | total_size = RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE) + | |
468 | indices.match_start * sizeof(uint64_t) + | |
469 | (counts.match + 1) * sizeof(struct rte_acl_match_results) + | |
470 | XMM_SIZE; | |
471 | ||
472 | if (total_size > max_size) { | |
473 | RTE_LOG(DEBUG, ACL, | |
474 | "Gen phase for ACL ctx \"%s\" exceeds max_size limit, " | |
475 | "bytes required: %zu, allowed: %zu\n", | |
476 | ctx->name, total_size, max_size); | |
477 | return -ERANGE; | |
478 | } | |
479 | ||
480 | mem = rte_zmalloc_socket(ctx->name, total_size, RTE_CACHE_LINE_SIZE, | |
481 | ctx->socket_id); | |
482 | if (mem == NULL) { | |
483 | RTE_LOG(ERR, ACL, | |
484 | "allocation of %zu bytes on socket %d for %s failed\n", | |
485 | total_size, ctx->socket_id, ctx->name); | |
486 | return -ENOMEM; | |
487 | } | |
488 | ||
489 | /* Fill the runtime structure */ | |
490 | match_index = indices.match_start; | |
491 | node_array = (uint64_t *)((uintptr_t)mem + | |
492 | RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE)); | |
493 | ||
494 | /* | |
495 | * Setup the NOMATCH node (a SINGLE at the | |
496 | * highest index, that points to itself) | |
497 | */ | |
498 | ||
499 | node_array[RTE_ACL_DFA_SIZE] = RTE_ACL_DFA_SIZE | RTE_ACL_NODE_SINGLE; | |
500 | ||
501 | for (n = 0; n < RTE_ACL_DFA_SIZE; n++) | |
502 | node_array[n] = no_match; | |
503 | ||
504 | /* NOMATCH result at index 0 */ | |
505 | match = ((struct rte_acl_match_results *)(node_array + match_index)); | |
506 | memset(match, 0, sizeof(*match)); | |
507 | ||
508 | for (n = 0; n < num_tries; n++) { | |
509 | ||
510 | acl_gen_node(node_bld_trie[n].trie, node_array, no_match, | |
511 | &indices, num_categories); | |
512 | ||
513 | if (node_bld_trie[n].trie->node_index == no_match) | |
514 | trie[n].root_index = 0; | |
515 | else | |
516 | trie[n].root_index = node_bld_trie[n].trie->node_index; | |
517 | } | |
518 | ||
519 | ctx->mem = mem; | |
520 | ctx->mem_sz = total_size; | |
521 | ctx->data_indexes = mem; | |
522 | ctx->num_tries = num_tries; | |
523 | ctx->num_categories = num_categories; | |
524 | ctx->match_index = match_index; | |
525 | ctx->no_match = no_match; | |
526 | ctx->idle = node_array[RTE_ACL_DFA_SIZE]; | |
527 | ctx->trans_table = node_array; | |
528 | memcpy(ctx->trie, trie, sizeof(ctx->trie)); | |
529 | ||
530 | acl_gen_log_stats(ctx, &counts, &indices, max_size); | |
531 | return 0; | |
532 | } |