]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | /* SPDX-License-Identifier: BSD-3-Clause |
2 | * Copyright(C) 2020 Marvell International Ltd. | |
3 | */ | |
4 | ||
5 | #ifndef __INCLUDE_IP4_LOOKUP_SSE_H__ | |
6 | #define __INCLUDE_IP4_LOOKUP_SSE_H__ | |
7 | ||
8 | /* X86 SSE */ | |
9 | static uint16_t | |
10 | ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node, | |
11 | void **objs, uint16_t nb_objs) | |
12 | { | |
13 | struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts; | |
14 | rte_edge_t next0, next1, next2, next3, next_index; | |
15 | struct rte_ipv4_hdr *ipv4_hdr; | |
16 | uint32_t ip0, ip1, ip2, ip3; | |
17 | void **to_next, **from; | |
18 | uint16_t last_spec = 0; | |
19 | uint16_t n_left_from; | |
20 | struct rte_lpm *lpm; | |
21 | uint16_t held = 0; | |
22 | uint32_t drop_nh; | |
23 | rte_xmm_t dst; | |
24 | __m128i dip; /* SSE register */ | |
25 | int rc, i; | |
26 | ||
27 | /* Speculative next */ | |
28 | next_index = RTE_NODE_IP4_LOOKUP_NEXT_REWRITE; | |
29 | /* Drop node */ | |
30 | drop_nh = ((uint32_t)RTE_NODE_IP4_LOOKUP_NEXT_PKT_DROP) << 16; | |
31 | ||
32 | /* Get socket specific LPM from ctx */ | |
33 | lpm = *((struct rte_lpm **)node->ctx); | |
34 | ||
35 | pkts = (struct rte_mbuf **)objs; | |
36 | from = objs; | |
37 | n_left_from = nb_objs; | |
38 | ||
39 | if (n_left_from >= 4) { | |
40 | for (i = 0; i < 4; i++) | |
41 | rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[i], void *, | |
42 | sizeof(struct rte_ether_hdr))); | |
43 | } | |
44 | ||
45 | /* Get stream for the speculated next node */ | |
46 | to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs); | |
47 | while (n_left_from >= 4) { | |
48 | /* Prefetch next-next mbufs */ | |
49 | if (likely(n_left_from > 11)) { | |
50 | rte_prefetch0(pkts[8]); | |
51 | rte_prefetch0(pkts[9]); | |
52 | rte_prefetch0(pkts[10]); | |
53 | rte_prefetch0(pkts[11]); | |
54 | } | |
55 | ||
56 | /* Prefetch next mbuf data */ | |
57 | if (likely(n_left_from > 7)) { | |
58 | rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[4], void *, | |
59 | sizeof(struct rte_ether_hdr))); | |
60 | rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[5], void *, | |
61 | sizeof(struct rte_ether_hdr))); | |
62 | rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[6], void *, | |
63 | sizeof(struct rte_ether_hdr))); | |
64 | rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[7], void *, | |
65 | sizeof(struct rte_ether_hdr))); | |
66 | } | |
67 | ||
68 | mbuf0 = pkts[0]; | |
69 | mbuf1 = pkts[1]; | |
70 | mbuf2 = pkts[2]; | |
71 | mbuf3 = pkts[3]; | |
72 | ||
73 | pkts += 4; | |
74 | n_left_from -= 4; | |
75 | ||
76 | /* Extract DIP of mbuf0 */ | |
77 | ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv4_hdr *, | |
78 | sizeof(struct rte_ether_hdr)); | |
79 | ip0 = ipv4_hdr->dst_addr; | |
80 | /* Extract cksum, ttl as ipv4 hdr is in cache */ | |
81 | node_mbuf_priv1(mbuf0)->cksum = ipv4_hdr->hdr_checksum; | |
82 | node_mbuf_priv1(mbuf0)->ttl = ipv4_hdr->time_to_live; | |
83 | ||
84 | /* Extract DIP of mbuf1 */ | |
85 | ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf1, struct rte_ipv4_hdr *, | |
86 | sizeof(struct rte_ether_hdr)); | |
87 | ip1 = ipv4_hdr->dst_addr; | |
88 | /* Extract cksum, ttl as ipv4 hdr is in cache */ | |
89 | node_mbuf_priv1(mbuf1)->cksum = ipv4_hdr->hdr_checksum; | |
90 | node_mbuf_priv1(mbuf1)->ttl = ipv4_hdr->time_to_live; | |
91 | ||
92 | /* Extract DIP of mbuf2 */ | |
93 | ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf2, struct rte_ipv4_hdr *, | |
94 | sizeof(struct rte_ether_hdr)); | |
95 | ip2 = ipv4_hdr->dst_addr; | |
96 | /* Extract cksum, ttl as ipv4 hdr is in cache */ | |
97 | node_mbuf_priv1(mbuf2)->cksum = ipv4_hdr->hdr_checksum; | |
98 | node_mbuf_priv1(mbuf2)->ttl = ipv4_hdr->time_to_live; | |
99 | ||
100 | /* Extract DIP of mbuf3 */ | |
101 | ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf3, struct rte_ipv4_hdr *, | |
102 | sizeof(struct rte_ether_hdr)); | |
103 | ip3 = ipv4_hdr->dst_addr; | |
104 | ||
105 | /* Prepare for lookup x4 */ | |
106 | dip = _mm_set_epi32(ip3, ip2, ip1, ip0); | |
107 | ||
108 | /* Byte swap 4 IPV4 addresses. */ | |
109 | const __m128i bswap_mask = _mm_set_epi8( | |
110 | 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3); | |
111 | dip = _mm_shuffle_epi8(dip, bswap_mask); | |
112 | ||
113 | /* Extract cksum, ttl as ipv4 hdr is in cache */ | |
114 | node_mbuf_priv1(mbuf3)->cksum = ipv4_hdr->hdr_checksum; | |
115 | node_mbuf_priv1(mbuf3)->ttl = ipv4_hdr->time_to_live; | |
116 | ||
117 | /* Perform LPM lookup to get NH and next node */ | |
118 | rte_lpm_lookupx4(lpm, dip, dst.u32, drop_nh); | |
119 | ||
120 | /* Extract next node id and NH */ | |
121 | node_mbuf_priv1(mbuf0)->nh = dst.u32[0] & 0xFFFF; | |
122 | next0 = (dst.u32[0] >> 16); | |
123 | ||
124 | node_mbuf_priv1(mbuf1)->nh = dst.u32[1] & 0xFFFF; | |
125 | next1 = (dst.u32[1] >> 16); | |
126 | ||
127 | node_mbuf_priv1(mbuf2)->nh = dst.u32[2] & 0xFFFF; | |
128 | next2 = (dst.u32[2] >> 16); | |
129 | ||
130 | node_mbuf_priv1(mbuf3)->nh = dst.u32[3] & 0xFFFF; | |
131 | next3 = (dst.u32[3] >> 16); | |
132 | ||
133 | /* Enqueue four to next node */ | |
134 | rte_edge_t fix_spec = | |
135 | (next_index ^ next0) | (next_index ^ next1) | | |
136 | (next_index ^ next2) | (next_index ^ next3); | |
137 | ||
138 | if (unlikely(fix_spec)) { | |
139 | /* Copy things successfully speculated till now */ | |
140 | rte_memcpy(to_next, from, last_spec * sizeof(from[0])); | |
141 | from += last_spec; | |
142 | to_next += last_spec; | |
143 | held += last_spec; | |
144 | last_spec = 0; | |
145 | ||
146 | /* Next0 */ | |
147 | if (next_index == next0) { | |
148 | to_next[0] = from[0]; | |
149 | to_next++; | |
150 | held++; | |
151 | } else { | |
152 | rte_node_enqueue_x1(graph, node, next0, | |
153 | from[0]); | |
154 | } | |
155 | ||
156 | /* Next1 */ | |
157 | if (next_index == next1) { | |
158 | to_next[0] = from[1]; | |
159 | to_next++; | |
160 | held++; | |
161 | } else { | |
162 | rte_node_enqueue_x1(graph, node, next1, | |
163 | from[1]); | |
164 | } | |
165 | ||
166 | /* Next2 */ | |
167 | if (next_index == next2) { | |
168 | to_next[0] = from[2]; | |
169 | to_next++; | |
170 | held++; | |
171 | } else { | |
172 | rte_node_enqueue_x1(graph, node, next2, | |
173 | from[2]); | |
174 | } | |
175 | ||
176 | /* Next3 */ | |
177 | if (next_index == next3) { | |
178 | to_next[0] = from[3]; | |
179 | to_next++; | |
180 | held++; | |
181 | } else { | |
182 | rte_node_enqueue_x1(graph, node, next3, | |
183 | from[3]); | |
184 | } | |
185 | ||
186 | from += 4; | |
187 | ||
188 | } else { | |
189 | last_spec += 4; | |
190 | } | |
191 | } | |
192 | ||
193 | while (n_left_from > 0) { | |
194 | uint32_t next_hop; | |
195 | ||
196 | mbuf0 = pkts[0]; | |
197 | ||
198 | pkts += 1; | |
199 | n_left_from -= 1; | |
200 | ||
201 | /* Extract DIP of mbuf0 */ | |
202 | ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv4_hdr *, | |
203 | sizeof(struct rte_ether_hdr)); | |
204 | /* Extract cksum, ttl as ipv4 hdr is in cache */ | |
205 | node_mbuf_priv1(mbuf0)->cksum = ipv4_hdr->hdr_checksum; | |
206 | node_mbuf_priv1(mbuf0)->ttl = ipv4_hdr->time_to_live; | |
207 | ||
208 | rc = rte_lpm_lookup(lpm, rte_be_to_cpu_32(ipv4_hdr->dst_addr), | |
209 | &next_hop); | |
210 | next_hop = (rc == 0) ? next_hop : drop_nh; | |
211 | ||
212 | node_mbuf_priv1(mbuf0)->nh = next_hop & 0xFFFF; | |
213 | next0 = (next_hop >> 16); | |
214 | ||
215 | if (unlikely(next_index ^ next0)) { | |
216 | /* Copy things successfully speculated till now */ | |
217 | rte_memcpy(to_next, from, last_spec * sizeof(from[0])); | |
218 | from += last_spec; | |
219 | to_next += last_spec; | |
220 | held += last_spec; | |
221 | last_spec = 0; | |
222 | ||
223 | rte_node_enqueue_x1(graph, node, next0, from[0]); | |
224 | from += 1; | |
225 | } else { | |
226 | last_spec += 1; | |
227 | } | |
228 | } | |
229 | ||
230 | /* !!! Home run !!! */ | |
231 | if (likely(last_spec == nb_objs)) { | |
232 | rte_node_next_stream_move(graph, node, next_index); | |
233 | return nb_objs; | |
234 | } | |
235 | ||
236 | held += last_spec; | |
237 | /* Copy things successfully speculated till now */ | |
238 | rte_memcpy(to_next, from, last_spec * sizeof(from[0])); | |
239 | rte_node_next_stream_put(graph, node, next_index, held); | |
240 | ||
241 | return nb_objs; | |
242 | } | |
243 | ||
244 | #endif /* __INCLUDE_IP4_LOOKUP_SSE_H__ */ |