]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | /* SPDX-License-Identifier: BSD-3-Clause |
2 | * Copyright(c) 2010-2014 Intel Corporation | |
7c673cae FG |
3 | */ |
4 | ||
5 | #ifndef _RTE_ACL_VECT_H_ | |
6 | #define _RTE_ACL_VECT_H_ | |
7 | ||
8 | /** | |
9 | * @file | |
10 | * | |
11 | * RTE ACL SSE/AVX related header. | |
12 | */ | |
13 | ||
14 | #ifdef __cplusplus | |
15 | extern "C" { | |
16 | #endif | |
17 | ||
18 | ||
19 | /* | |
9f95a23c | 20 | * Takes 2 SIMD registers containing N transitions each (tr0, tr1). |
7c673cae FG |
21 | * Shuffles it into different representation: |
22 | * lo - contains low 32 bits of given N transitions. | |
23 | * hi - contains high 32 bits of given N transitions. | |
24 | */ | |
25 | #define ACL_TR_HILO(P, TC, tr0, tr1, lo, hi) do { \ | |
26 | lo = (typeof(lo))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0x88); \ | |
27 | hi = (typeof(hi))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0xdd); \ | |
28 | } while (0) | |
29 | ||
30 | ||
31 | /* | |
32 | * Calculate the address of the next transition for | |
33 | * all types of nodes. Note that only DFA nodes and range | |
34 | * nodes actually transition to another node. Match | |
35 | * nodes not supposed to be encountered here. | |
36 | * For quad range nodes: | |
37 | * Calculate number of range boundaries that are less than the | |
38 | * input value. Range boundaries for each node are in signed 8 bit, | |
39 | * ordered from -128 to 127. | |
40 | * This is effectively a popcnt of bytes that are greater than the | |
41 | * input byte. | |
42 | * Single nodes are processed in the same ways as quad range nodes. | |
43 | */ | |
44 | #define ACL_TR_CALC_ADDR(P, S, \ | |
45 | addr, index_mask, next_input, shuffle_input, \ | |
46 | ones_16, range_base, tr_lo, tr_hi) do { \ | |
47 | \ | |
48 | typeof(addr) in, node_type, r, t; \ | |
49 | typeof(addr) dfa_msk, dfa_ofs, quad_ofs; \ | |
50 | \ | |
51 | t = _##P##_xor_si##S(index_mask, index_mask); \ | |
52 | in = _##P##_shuffle_epi8(next_input, shuffle_input); \ | |
53 | \ | |
54 | /* Calc node type and node addr */ \ | |
55 | node_type = _##P##_andnot_si##S(index_mask, tr_lo); \ | |
56 | addr = _##P##_and_si##S(index_mask, tr_lo); \ | |
57 | \ | |
58 | /* mask for DFA type(0) nodes */ \ | |
59 | dfa_msk = _##P##_cmpeq_epi32(node_type, t); \ | |
60 | \ | |
61 | /* DFA calculations. */ \ | |
62 | r = _##P##_srli_epi32(in, 30); \ | |
63 | r = _##P##_add_epi8(r, range_base); \ | |
64 | t = _##P##_srli_epi32(in, 24); \ | |
65 | r = _##P##_shuffle_epi8(tr_hi, r); \ | |
66 | \ | |
67 | dfa_ofs = _##P##_sub_epi32(t, r); \ | |
68 | \ | |
9f95a23c | 69 | /* QUAD/SINGLE calculations. */ \ |
7c673cae FG |
70 | t = _##P##_cmpgt_epi8(in, tr_hi); \ |
71 | t = _##P##_sign_epi8(t, t); \ | |
72 | t = _##P##_maddubs_epi16(t, t); \ | |
73 | quad_ofs = _##P##_madd_epi16(t, ones_16); \ | |
74 | \ | |
75 | /* blend DFA and QUAD/SINGLE. */ \ | |
76 | t = _##P##_blendv_epi8(quad_ofs, dfa_ofs, dfa_msk); \ | |
77 | \ | |
78 | /* calculate address for next transitions. */ \ | |
79 | addr = _##P##_add_epi32(addr, t); \ | |
80 | } while (0) | |
81 | ||
82 | ||
83 | #ifdef __cplusplus | |
84 | } | |
85 | #endif | |
86 | ||
87 | #endif /* _RTE_ACL_VECT_H_ */ |