]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*- |
2 | * BSD LICENSE | |
3 | * | |
4 | * Copyright(c) 2016 Intel Corporation. All rights reserved. | |
5 | * All rights reserved. | |
6 | * | |
7 | * Redistribution and use in source and binary forms, with or without | |
8 | * modification, are permitted provided that the following conditions | |
9 | * are met: | |
10 | * | |
11 | * * Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * * Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in | |
15 | * the documentation and/or other materials provided with the | |
16 | * distribution. | |
17 | * * Neither the name of Intel Corporation nor the names of its | |
18 | * contributors may be used to endorse or promote products derived | |
19 | * from this software without specific prior written permission. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
32 | */ | |
33 | ||
34 | ||
35 | #ifndef _L3FWD_COMMON_H_ | |
36 | #define _L3FWD_COMMON_H_ | |
37 | ||
38 | #include "l3fwd.h" | |
39 | ||
40 | #ifdef DO_RFC_1812_CHECKS | |
41 | ||
42 | #define IPV4_MIN_VER_IHL 0x45 | |
43 | #define IPV4_MAX_VER_IHL 0x4f | |
44 | #define IPV4_MAX_VER_IHL_DIFF (IPV4_MAX_VER_IHL - IPV4_MIN_VER_IHL) | |
45 | ||
46 | /* Minimum value of IPV4 total length (20B) in network byte order. */ | |
47 | #define IPV4_MIN_LEN_BE (sizeof(struct ipv4_hdr) << 8) | |
48 | ||
49 | /* | |
50 | * From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2: | |
51 | * - The IP version number must be 4. | |
52 | * - The IP header length field must be large enough to hold the | |
53 | * minimum length legal IP datagram (20 bytes = 5 words). | |
54 | * - The IP total length field must be large enough to hold the IP | |
55 | * datagram header, whose length is specified in the IP header length | |
56 | * field. | |
57 | * If we encounter invalid IPV4 packet, then set destination port for it | |
58 | * to BAD_PORT value. | |
59 | */ | |
60 | static inline __attribute__((always_inline)) void | |
61 | rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype) | |
62 | { | |
63 | uint8_t ihl; | |
64 | ||
65 | if (RTE_ETH_IS_IPV4_HDR(ptype)) { | |
66 | ihl = ipv4_hdr->version_ihl - IPV4_MIN_VER_IHL; | |
67 | ||
68 | ipv4_hdr->time_to_live--; | |
69 | ipv4_hdr->hdr_checksum++; | |
70 | ||
71 | if (ihl > IPV4_MAX_VER_IHL_DIFF || | |
72 | ((uint8_t)ipv4_hdr->total_length == 0 && | |
73 | ipv4_hdr->total_length < IPV4_MIN_LEN_BE)) | |
74 | dp[0] = BAD_PORT; | |
75 | ||
76 | } | |
77 | } | |
78 | ||
79 | #else | |
80 | #define rfc1812_process(mb, dp, ptype) do { } while (0) | |
81 | #endif /* DO_RFC_1812_CHECKS */ | |
82 | ||
83 | /* | |
84 | * Update source and destination MAC addresses in the ethernet header. | |
85 | * Perform RFC1812 checks and updates for IPV4 packets. | |
86 | */ | |
87 | static inline void | |
88 | processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP]) | |
89 | { | |
90 | __m128i te[FWDSTEP]; | |
91 | __m128i ve[FWDSTEP]; | |
92 | __m128i *p[FWDSTEP]; | |
93 | ||
94 | p[0] = rte_pktmbuf_mtod(pkt[0], __m128i *); | |
95 | p[1] = rte_pktmbuf_mtod(pkt[1], __m128i *); | |
96 | p[2] = rte_pktmbuf_mtod(pkt[2], __m128i *); | |
97 | p[3] = rte_pktmbuf_mtod(pkt[3], __m128i *); | |
98 | ||
99 | ve[0] = val_eth[dst_port[0]]; | |
100 | te[0] = _mm_loadu_si128(p[0]); | |
101 | ||
102 | ve[1] = val_eth[dst_port[1]]; | |
103 | te[1] = _mm_loadu_si128(p[1]); | |
104 | ||
105 | ve[2] = val_eth[dst_port[2]]; | |
106 | te[2] = _mm_loadu_si128(p[2]); | |
107 | ||
108 | ve[3] = val_eth[dst_port[3]]; | |
109 | te[3] = _mm_loadu_si128(p[3]); | |
110 | ||
111 | /* Update first 12 bytes, keep rest bytes intact. */ | |
112 | te[0] = _mm_blend_epi16(te[0], ve[0], MASK_ETH); | |
113 | te[1] = _mm_blend_epi16(te[1], ve[1], MASK_ETH); | |
114 | te[2] = _mm_blend_epi16(te[2], ve[2], MASK_ETH); | |
115 | te[3] = _mm_blend_epi16(te[3], ve[3], MASK_ETH); | |
116 | ||
117 | _mm_storeu_si128(p[0], te[0]); | |
118 | _mm_storeu_si128(p[1], te[1]); | |
119 | _mm_storeu_si128(p[2], te[2]); | |
120 | _mm_storeu_si128(p[3], te[3]); | |
121 | ||
122 | rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[0] + 1), | |
123 | &dst_port[0], pkt[0]->packet_type); | |
124 | rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[1] + 1), | |
125 | &dst_port[1], pkt[1]->packet_type); | |
126 | rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[2] + 1), | |
127 | &dst_port[2], pkt[2]->packet_type); | |
128 | rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[3] + 1), | |
129 | &dst_port[3], pkt[3]->packet_type); | |
130 | } | |
131 | ||
132 | /* | |
133 | * We group consecutive packets with the same destionation port into one burst. | |
134 | * To avoid extra latency this is done together with some other packet | |
135 | * processing, but after we made a final decision about packet's destination. | |
136 | * To do this we maintain: | |
137 | * pnum - array of number of consecutive packets with the same dest port for | |
138 | * each packet in the input burst. | |
139 | * lp - pointer to the last updated element in the pnum. | |
140 | * dlp - dest port value lp corresponds to. | |
141 | */ | |
142 | ||
143 | #define GRPSZ (1 << FWDSTEP) | |
144 | #define GRPMSK (GRPSZ - 1) | |
145 | ||
146 | #define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ | |
147 | if (likely((dlp) == (dcp)[(idx)])) { \ | |
148 | (lp)[0]++; \ | |
149 | } else { \ | |
150 | (dlp) = (dcp)[idx]; \ | |
151 | (lp) = (pn) + (idx); \ | |
152 | (lp)[0] = 1; \ | |
153 | } \ | |
154 | } while (0) | |
155 | ||
156 | /* | |
157 | * Group consecutive packets with the same destination port in bursts of 4. | |
158 | * Suppose we have array of destionation ports: | |
159 | * dst_port[] = {a, b, c, d,, e, ... } | |
160 | * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>. | |
161 | * We doing 4 comparisions at once and the result is 4 bit mask. | |
162 | * This mask is used as an index into prebuild array of pnum values. | |
163 | */ | |
164 | static inline uint16_t * | |
165 | port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, __m128i dp1, __m128i dp2) | |
166 | { | |
167 | static const struct { | |
168 | uint64_t pnum; /* prebuild 4 values for pnum[]. */ | |
169 | int32_t idx; /* index for new last updated elemnet. */ | |
170 | uint16_t lpv; /* add value to the last updated element. */ | |
171 | } gptbl[GRPSZ] = { | |
172 | { | |
173 | /* 0: a != b, b != c, c != d, d != e */ | |
174 | .pnum = UINT64_C(0x0001000100010001), | |
175 | .idx = 4, | |
176 | .lpv = 0, | |
177 | }, | |
178 | { | |
179 | /* 1: a == b, b != c, c != d, d != e */ | |
180 | .pnum = UINT64_C(0x0001000100010002), | |
181 | .idx = 4, | |
182 | .lpv = 1, | |
183 | }, | |
184 | { | |
185 | /* 2: a != b, b == c, c != d, d != e */ | |
186 | .pnum = UINT64_C(0x0001000100020001), | |
187 | .idx = 4, | |
188 | .lpv = 0, | |
189 | }, | |
190 | { | |
191 | /* 3: a == b, b == c, c != d, d != e */ | |
192 | .pnum = UINT64_C(0x0001000100020003), | |
193 | .idx = 4, | |
194 | .lpv = 2, | |
195 | }, | |
196 | { | |
197 | /* 4: a != b, b != c, c == d, d != e */ | |
198 | .pnum = UINT64_C(0x0001000200010001), | |
199 | .idx = 4, | |
200 | .lpv = 0, | |
201 | }, | |
202 | { | |
203 | /* 5: a == b, b != c, c == d, d != e */ | |
204 | .pnum = UINT64_C(0x0001000200010002), | |
205 | .idx = 4, | |
206 | .lpv = 1, | |
207 | }, | |
208 | { | |
209 | /* 6: a != b, b == c, c == d, d != e */ | |
210 | .pnum = UINT64_C(0x0001000200030001), | |
211 | .idx = 4, | |
212 | .lpv = 0, | |
213 | }, | |
214 | { | |
215 | /* 7: a == b, b == c, c == d, d != e */ | |
216 | .pnum = UINT64_C(0x0001000200030004), | |
217 | .idx = 4, | |
218 | .lpv = 3, | |
219 | }, | |
220 | { | |
221 | /* 8: a != b, b != c, c != d, d == e */ | |
222 | .pnum = UINT64_C(0x0002000100010001), | |
223 | .idx = 3, | |
224 | .lpv = 0, | |
225 | }, | |
226 | { | |
227 | /* 9: a == b, b != c, c != d, d == e */ | |
228 | .pnum = UINT64_C(0x0002000100010002), | |
229 | .idx = 3, | |
230 | .lpv = 1, | |
231 | }, | |
232 | { | |
233 | /* 0xa: a != b, b == c, c != d, d == e */ | |
234 | .pnum = UINT64_C(0x0002000100020001), | |
235 | .idx = 3, | |
236 | .lpv = 0, | |
237 | }, | |
238 | { | |
239 | /* 0xb: a == b, b == c, c != d, d == e */ | |
240 | .pnum = UINT64_C(0x0002000100020003), | |
241 | .idx = 3, | |
242 | .lpv = 2, | |
243 | }, | |
244 | { | |
245 | /* 0xc: a != b, b != c, c == d, d == e */ | |
246 | .pnum = UINT64_C(0x0002000300010001), | |
247 | .idx = 2, | |
248 | .lpv = 0, | |
249 | }, | |
250 | { | |
251 | /* 0xd: a == b, b != c, c == d, d == e */ | |
252 | .pnum = UINT64_C(0x0002000300010002), | |
253 | .idx = 2, | |
254 | .lpv = 1, | |
255 | }, | |
256 | { | |
257 | /* 0xe: a != b, b == c, c == d, d == e */ | |
258 | .pnum = UINT64_C(0x0002000300040001), | |
259 | .idx = 1, | |
260 | .lpv = 0, | |
261 | }, | |
262 | { | |
263 | /* 0xf: a == b, b == c, c == d, d == e */ | |
264 | .pnum = UINT64_C(0x0002000300040005), | |
265 | .idx = 0, | |
266 | .lpv = 4, | |
267 | }, | |
268 | }; | |
269 | ||
270 | union { | |
271 | uint16_t u16[FWDSTEP + 1]; | |
272 | uint64_t u64; | |
273 | } *pnum = (void *)pn; | |
274 | ||
275 | int32_t v; | |
276 | ||
277 | dp1 = _mm_cmpeq_epi16(dp1, dp2); | |
278 | dp1 = _mm_unpacklo_epi16(dp1, dp1); | |
279 | v = _mm_movemask_ps((__m128)dp1); | |
280 | ||
281 | /* update last port counter. */ | |
282 | lp[0] += gptbl[v].lpv; | |
283 | ||
284 | /* if dest port value has changed. */ | |
285 | if (v != GRPMSK) { | |
286 | pnum->u64 = gptbl[v].pnum; | |
287 | pnum->u16[FWDSTEP] = 1; | |
288 | lp = pnum->u16 + gptbl[v].idx; | |
289 | } | |
290 | ||
291 | return lp; | |
292 | } | |
293 | ||
294 | /** | |
295 | * Process one packet: | |
296 | * Update source and destination MAC addresses in the ethernet header. | |
297 | * Perform RFC1812 checks and updates for IPV4 packets. | |
298 | */ | |
299 | static inline void | |
300 | process_packet(struct rte_mbuf *pkt, uint16_t *dst_port) | |
301 | { | |
302 | struct ether_hdr *eth_hdr; | |
303 | __m128i te, ve; | |
304 | ||
305 | eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); | |
306 | ||
307 | te = _mm_loadu_si128((__m128i *)eth_hdr); | |
308 | ve = val_eth[dst_port[0]]; | |
309 | ||
310 | rfc1812_process((struct ipv4_hdr *)(eth_hdr + 1), dst_port, | |
311 | pkt->packet_type); | |
312 | ||
313 | te = _mm_blend_epi16(te, ve, MASK_ETH); | |
314 | _mm_storeu_si128((__m128i *)eth_hdr, te); | |
315 | } | |
316 | ||
317 | static inline __attribute__((always_inline)) void | |
318 | send_packetsx4(struct lcore_conf *qconf, uint8_t port, struct rte_mbuf *m[], | |
319 | uint32_t num) | |
320 | { | |
321 | uint32_t len, j, n; | |
322 | ||
323 | len = qconf->tx_mbufs[port].len; | |
324 | ||
325 | /* | |
326 | * If TX buffer for that queue is empty, and we have enough packets, | |
327 | * then send them straightway. | |
328 | */ | |
329 | if (num >= MAX_TX_BURST && len == 0) { | |
330 | n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num); | |
331 | if (unlikely(n < num)) { | |
332 | do { | |
333 | rte_pktmbuf_free(m[n]); | |
334 | } while (++n < num); | |
335 | } | |
336 | return; | |
337 | } | |
338 | ||
339 | /* | |
340 | * Put packets into TX buffer for that queue. | |
341 | */ | |
342 | ||
343 | n = len + num; | |
344 | n = (n > MAX_PKT_BURST) ? MAX_PKT_BURST - len : num; | |
345 | ||
346 | j = 0; | |
347 | switch (n % FWDSTEP) { | |
348 | while (j < n) { | |
349 | case 0: | |
350 | qconf->tx_mbufs[port].m_table[len + j] = m[j]; | |
351 | j++; | |
352 | case 3: | |
353 | qconf->tx_mbufs[port].m_table[len + j] = m[j]; | |
354 | j++; | |
355 | case 2: | |
356 | qconf->tx_mbufs[port].m_table[len + j] = m[j]; | |
357 | j++; | |
358 | case 1: | |
359 | qconf->tx_mbufs[port].m_table[len + j] = m[j]; | |
360 | j++; | |
361 | } | |
362 | } | |
363 | ||
364 | len += n; | |
365 | ||
366 | /* enough pkts to be sent */ | |
367 | if (unlikely(len == MAX_PKT_BURST)) { | |
368 | ||
369 | send_burst(qconf, MAX_PKT_BURST, port); | |
370 | ||
371 | /* copy rest of the packets into the TX buffer. */ | |
372 | len = num - n; | |
373 | j = 0; | |
374 | switch (len % FWDSTEP) { | |
375 | while (j < len) { | |
376 | case 0: | |
377 | qconf->tx_mbufs[port].m_table[j] = m[n + j]; | |
378 | j++; | |
379 | case 3: | |
380 | qconf->tx_mbufs[port].m_table[j] = m[n + j]; | |
381 | j++; | |
382 | case 2: | |
383 | qconf->tx_mbufs[port].m_table[j] = m[n + j]; | |
384 | j++; | |
385 | case 1: | |
386 | qconf->tx_mbufs[port].m_table[j] = m[n + j]; | |
387 | j++; | |
388 | } | |
389 | } | |
390 | } | |
391 | ||
392 | qconf->tx_mbufs[port].len = len; | |
393 | } | |
394 | ||
395 | /** | |
396 | * Send packets burst from pkts_burst to the ports in dst_port array | |
397 | */ | |
398 | static inline __attribute__((always_inline)) void | |
399 | send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, | |
400 | uint16_t dst_port[MAX_PKT_BURST], int nb_rx) | |
401 | { | |
402 | int32_t k; | |
403 | int j = 0; | |
404 | uint16_t dlp; | |
405 | uint16_t *lp; | |
406 | uint16_t pnum[MAX_PKT_BURST + 1]; | |
407 | ||
408 | /* | |
409 | * Finish packet processing and group consecutive | |
410 | * packets with the same destination port. | |
411 | */ | |
412 | k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); | |
413 | if (k != 0) { | |
414 | __m128i dp1, dp2; | |
415 | ||
416 | lp = pnum; | |
417 | lp[0] = 1; | |
418 | ||
419 | processx4_step3(pkts_burst, dst_port); | |
420 | ||
421 | /* dp1: <d[0], d[1], d[2], d[3], ... > */ | |
422 | dp1 = _mm_loadu_si128((__m128i *)dst_port); | |
423 | ||
424 | for (j = FWDSTEP; j != k; j += FWDSTEP) { | |
425 | processx4_step3(&pkts_burst[j], &dst_port[j]); | |
426 | ||
427 | /* | |
428 | * dp2: | |
429 | * <d[j-3], d[j-2], d[j-1], d[j], ... > | |
430 | */ | |
431 | dp2 = _mm_loadu_si128((__m128i *) | |
432 | &dst_port[j - FWDSTEP + 1]); | |
433 | lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); | |
434 | ||
435 | /* | |
436 | * dp1: | |
437 | * <d[j], d[j+1], d[j+2], d[j+3], ... > | |
438 | */ | |
439 | dp1 = _mm_srli_si128(dp2, (FWDSTEP - 1) * | |
440 | sizeof(dst_port[0])); | |
441 | } | |
442 | ||
443 | /* | |
444 | * dp2: <d[j-3], d[j-2], d[j-1], d[j-1], ... > | |
445 | */ | |
446 | dp2 = _mm_shufflelo_epi16(dp1, 0xf9); | |
447 | lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); | |
448 | ||
449 | /* | |
450 | * remove values added by the last repeated | |
451 | * dst port. | |
452 | */ | |
453 | lp[0]--; | |
454 | dlp = dst_port[j - 1]; | |
455 | } else { | |
456 | /* set dlp and lp to the never used values. */ | |
457 | dlp = BAD_PORT - 1; | |
458 | lp = pnum + MAX_PKT_BURST; | |
459 | } | |
460 | ||
461 | /* Process up to last 3 packets one by one. */ | |
462 | switch (nb_rx % FWDSTEP) { | |
463 | case 3: | |
464 | process_packet(pkts_burst[j], dst_port + j); | |
465 | GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); | |
466 | j++; | |
467 | case 2: | |
468 | process_packet(pkts_burst[j], dst_port + j); | |
469 | GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); | |
470 | j++; | |
471 | case 1: | |
472 | process_packet(pkts_burst[j], dst_port + j); | |
473 | GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); | |
474 | j++; | |
475 | } | |
476 | ||
477 | /* | |
478 | * Send packets out, through destination port. | |
479 | * Consecutive packets with the same destination port | |
480 | * are already grouped together. | |
481 | * If destination port for the packet equals BAD_PORT, | |
482 | * then free the packet without sending it out. | |
483 | */ | |
484 | for (j = 0; j < nb_rx; j += k) { | |
485 | ||
486 | int32_t m; | |
487 | uint16_t pn; | |
488 | ||
489 | pn = dst_port[j]; | |
490 | k = pnum[j]; | |
491 | ||
492 | if (likely(pn != BAD_PORT)) | |
493 | send_packetsx4(qconf, pn, pkts_burst + j, k); | |
494 | else | |
495 | for (m = j; m != j + k; m++) | |
496 | rte_pktmbuf_free(pkts_burst[m]); | |
497 | ||
498 | } | |
499 | } | |
500 | ||
501 | #endif /* _L3FWD_COMMON_H_ */ |