1 // SPDX-License-Identifier: GPL-2.0-only
3 /* PIPAPO: PIle PAcket POlicies: AVX2 packet lookup routines
5 * Copyright (c) 2019-2020 Red Hat GmbH
7 * Author: Stefano Brivio <sbrivio@redhat.com>
10 #include <linux/kernel.h>
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/netlink.h>
14 #include <linux/netfilter.h>
15 #include <linux/netfilter/nf_tables.h>
16 #include <net/netfilter/nf_tables_core.h>
17 #include <uapi/linux/netfilter/nf_tables.h>
18 #include <linux/bitmap.h>
19 #include <linux/bitops.h>
21 #include <linux/compiler.h>
22 #include <asm/fpu/api.h>
24 #include "nft_set_pipapo_avx2.h"
25 #include "nft_set_pipapo.h"
27 #define NFT_PIPAPO_LONGS_PER_M256 (XSAVE_YMM_SIZE / BITS_PER_LONG)
29 /* Load from memory into YMM register with non-temporal hint ("stream load"),
30 * that is, don't fetch lines from memory into the cache. This avoids pushing
31 * precious packet data out of the cache hierarchy, and is appropriate when:
33 * - loading buckets from lookup tables, as they are not going to be used
34 * again before packets are entirely classified
36 * - loading the result bitmap from the previous field, as it's never used
39 #define NFT_PIPAPO_AVX2_LOAD(reg, loc) \
40 asm volatile("vmovntdqa %0, %%ymm" #reg : : "m" (loc))
42 /* Stream a single lookup table bucket into YMM register given lookup table,
43 * group index, value of packet bits, bucket size.
45 #define NFT_PIPAPO_AVX2_BUCKET_LOAD4(reg, lt, group, v, bsize) \
46 NFT_PIPAPO_AVX2_LOAD(reg, \
47 lt[((group) * NFT_PIPAPO_BUCKETS(4) + \
49 #define NFT_PIPAPO_AVX2_BUCKET_LOAD8(reg, lt, group, v, bsize) \
50 NFT_PIPAPO_AVX2_LOAD(reg, \
51 lt[((group) * NFT_PIPAPO_BUCKETS(8) + \
54 /* Bitwise AND: the staple operation of this algorithm */
55 #define NFT_PIPAPO_AVX2_AND(dst, a, b) \
56 asm volatile("vpand %ymm" #a ", %ymm" #b ", %ymm" #dst)
58 /* Jump to label if @reg is zero */
59 #define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label) \
60 asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \
61 "je %l[" #label "]" : : : : label)
63 /* Store 256 bits from YMM register into memory. Contrary to bucket load
64 * operation, we don't bypass the cache here, as stored matching results
65 * are always used shortly after.
67 #define NFT_PIPAPO_AVX2_STORE(loc, reg) \
68 asm volatile("vmovdqa %%ymm" #reg ", %0" : "=m" (loc))
70 /* Zero out a complete YMM register, @reg */
71 #define NFT_PIPAPO_AVX2_ZERO(reg) \
72 asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg)
74 /* Current working bitmap index, toggled between field matches */
75 static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index
);
78 * nft_pipapo_avx2_prepare() - Prepare before main algorithm body
80 * This zeroes out ymm15, which is later used whenever we need to clear a
81 * memory location, by storing its content into memory.
83 static void nft_pipapo_avx2_prepare(void)
85 NFT_PIPAPO_AVX2_ZERO(15);
89 * nft_pipapo_avx2_fill() - Fill a bitmap region with ones
90 * @data: Base memory area
91 * @start: First bit to set
92 * @len: Count of bits to fill
94 * This is nothing else than a version of bitmap_set(), as used e.g. by
95 * pipapo_refill(), tailored for the microarchitectures using it and better
96 * suited for the specific usage: it's very likely that we'll set a small number
97 * of bits, not crossing a word boundary, and correct branch prediction is
100 * This function doesn't actually use any AVX2 instruction.
102 static void nft_pipapo_avx2_fill(unsigned long *data
, int start
, int len
)
104 int offset
= start
% BITS_PER_LONG
;
107 data
+= start
/ BITS_PER_LONG
;
109 if (likely(len
== 1)) {
110 *data
|= BIT(offset
);
114 if (likely(len
< BITS_PER_LONG
|| offset
)) {
115 if (likely(len
+ offset
<= BITS_PER_LONG
)) {
116 *data
|= GENMASK(len
- 1 + offset
, offset
);
120 *data
|= ~0UL << offset
;
121 len
-= BITS_PER_LONG
- offset
;
124 if (len
<= BITS_PER_LONG
) {
125 mask
= ~0UL >> (BITS_PER_LONG
- len
);
131 memset(data
, 0xff, len
/ BITS_PER_BYTE
);
132 data
+= len
/ BITS_PER_LONG
;
134 len
%= BITS_PER_LONG
;
136 *data
|= ~0UL >> (BITS_PER_LONG
- len
);
140 * nft_pipapo_avx2_refill() - Scan bitmap, select mapping table item, set bits
141 * @offset: Start from given bitmap (equivalent to bucket) offset, in longs
142 * @map: Bitmap to be scanned for set bits
143 * @dst: Destination bitmap
144 * @mt: Mapping table containing bit set specifiers
145 * @len: Length of bitmap in longs
146 * @last: Return index of first set bit, if this is the last field
148 * This is an alternative implementation of pipapo_refill() suitable for usage
149 * with AVX2 lookup routines: we know there are four words to be scanned, at
150 * a given offset inside the map, for each matching iteration.
152 * This function doesn't actually use any AVX2 instruction.
154 * Return: first set bit index if @last, index of first filled word otherwise.
156 static int nft_pipapo_avx2_refill(int offset
, unsigned long *map
,
158 union nft_pipapo_map_bucket
*mt
, bool last
)
162 #define NFT_PIPAPO_AVX2_REFILL_ONE_WORD(x) \
165 int r = __builtin_ctzl(map[(x)]); \
166 int i = (offset + (x)) * BITS_PER_LONG + r; \
171 nft_pipapo_avx2_fill(dst, mt[i].to, mt[i].n); \
176 map[(x)] &= ~(1UL << r); \
180 NFT_PIPAPO_AVX2_REFILL_ONE_WORD(0);
181 NFT_PIPAPO_AVX2_REFILL_ONE_WORD(1);
182 NFT_PIPAPO_AVX2_REFILL_ONE_WORD(2);
183 NFT_PIPAPO_AVX2_REFILL_ONE_WORD(3);
184 #undef NFT_PIPAPO_AVX2_REFILL_ONE_WORD
190 * nft_pipapo_avx2_lookup_4b_2() - AVX2-based lookup for 2 four-bit groups
191 * @map: Previous match result, used as initial bitmap
192 * @fill: Destination bitmap to be filled with current match result
193 * @f: Field, containing lookup and mapping tables
194 * @offset: Ignore buckets before the given index, no bits are filled there
195 * @pkt: Packet data, pointer to input nftables register
196 * @first: If this is the first field, don't source previous result
197 * @last: Last field: stop at the first match and return bit index
199 * Load buckets from lookup table corresponding to the values of each 4-bit
200 * group of packet bytes, and perform a bitwise intersection between them. If
201 * this is the first field in the set, simply AND the buckets together
202 * (equivalent to using an all-ones starting bitmap), use the provided starting
203 * bitmap otherwise. Then call nft_pipapo_avx2_refill() to generate the next
204 * working bitmap, @fill.
206 * This is used for 8-bit fields (i.e. protocol numbers).
208 * Out-of-order (and superscalar) execution is vital here, so it's critical to
209 * avoid false data dependencies. CPU and compiler could (mostly) take care of
210 * this on their own, but the operation ordering is explicitly given here with
211 * a likely execution order in mind, to highlight possible stalls. That's why
212 * a number of logically distinct operations (i.e. loading buckets, intersecting
213 * buckets) are interleaved.
215 * Return: -1 on no match, rule index of match if @last, otherwise first long
216 * word index to be checked next (i.e. first filled word).
218 static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map
, unsigned long *fill
,
219 struct nft_pipapo_field
*f
, int offset
,
220 const u8
*pkt
, bool first
, bool last
)
222 int i
, ret
= -1, m256_size
= f
->bsize
/ NFT_PIPAPO_LONGS_PER_M256
, b
;
223 u8 pg
[2] = { pkt
[0] >> 4, pkt
[0] & 0xf };
224 unsigned long *lt
= f
->lt
, bsize
= f
->bsize
;
226 lt
+= offset
* NFT_PIPAPO_LONGS_PER_M256
;
227 for (i
= offset
; i
< m256_size
; i
++, lt
+= NFT_PIPAPO_LONGS_PER_M256
) {
228 int i_ul
= i
* NFT_PIPAPO_LONGS_PER_M256
;
231 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt
, 0, pg
[0], bsize
);
232 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt
, 1, pg
[1], bsize
);
233 NFT_PIPAPO_AVX2_AND(4, 0, 1);
235 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt
, 0, pg
[0], bsize
);
236 NFT_PIPAPO_AVX2_LOAD(2, map
[i_ul
]);
237 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt
, 1, pg
[1], bsize
);
238 NFT_PIPAPO_AVX2_NOMATCH_GOTO(2, nothing
);
239 NFT_PIPAPO_AVX2_AND(3, 0, 1);
240 NFT_PIPAPO_AVX2_AND(4, 2, 3);
243 NFT_PIPAPO_AVX2_NOMATCH_GOTO(4, nomatch
);
244 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 4);
246 b
= nft_pipapo_avx2_refill(i_ul
, &map
[i_ul
], fill
, f
->mt
, last
);
250 if (unlikely(ret
== -1))
251 ret
= b
/ XSAVE_YMM_SIZE
;
255 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 15);
264 * nft_pipapo_avx2_lookup_4b_4() - AVX2-based lookup for 4 four-bit groups
265 * @map: Previous match result, used as initial bitmap
266 * @fill: Destination bitmap to be filled with current match result
267 * @f: Field, containing lookup and mapping tables
268 * @offset: Ignore buckets before the given index, no bits are filled there
269 * @pkt: Packet data, pointer to input nftables register
270 * @first: If this is the first field, don't source previous result
271 * @last: Last field: stop at the first match and return bit index
273 * See nft_pipapo_avx2_lookup_4b_2().
275 * This is used for 16-bit fields (i.e. ports).
277 * Return: -1 on no match, rule index of match if @last, otherwise first long
278 * word index to be checked next (i.e. first filled word).
280 static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map
, unsigned long *fill
,
281 struct nft_pipapo_field
*f
, int offset
,
282 const u8
*pkt
, bool first
, bool last
)
284 int i
, ret
= -1, m256_size
= f
->bsize
/ NFT_PIPAPO_LONGS_PER_M256
, b
;
285 u8 pg
[4] = { pkt
[0] >> 4, pkt
[0] & 0xf, pkt
[1] >> 4, pkt
[1] & 0xf };
286 unsigned long *lt
= f
->lt
, bsize
= f
->bsize
;
288 lt
+= offset
* NFT_PIPAPO_LONGS_PER_M256
;
289 for (i
= offset
; i
< m256_size
; i
++, lt
+= NFT_PIPAPO_LONGS_PER_M256
) {
290 int i_ul
= i
* NFT_PIPAPO_LONGS_PER_M256
;
293 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt
, 0, pg
[0], bsize
);
294 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt
, 1, pg
[1], bsize
);
295 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt
, 2, pg
[2], bsize
);
296 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt
, 3, pg
[3], bsize
);
297 NFT_PIPAPO_AVX2_AND(4, 0, 1);
298 NFT_PIPAPO_AVX2_AND(5, 2, 3);
299 NFT_PIPAPO_AVX2_AND(7, 4, 5);
301 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt
, 0, pg
[0], bsize
);
303 NFT_PIPAPO_AVX2_LOAD(1, map
[i_ul
]);
305 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt
, 1, pg
[1], bsize
);
306 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt
, 2, pg
[2], bsize
);
307 NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt
, 3, pg
[3], bsize
);
308 NFT_PIPAPO_AVX2_AND(5, 0, 1);
310 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing
);
312 NFT_PIPAPO_AVX2_AND(6, 2, 3);
313 NFT_PIPAPO_AVX2_AND(7, 4, 5);
315 NFT_PIPAPO_AVX2_AND(7, 6, 7);
319 NFT_PIPAPO_AVX2_NOMATCH_GOTO(7, nomatch
);
320 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 7);
322 b
= nft_pipapo_avx2_refill(i_ul
, &map
[i_ul
], fill
, f
->mt
, last
);
326 if (unlikely(ret
== -1))
327 ret
= b
/ XSAVE_YMM_SIZE
;
331 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 15);
340 * nft_pipapo_avx2_lookup_4b_8() - AVX2-based lookup for 8 four-bit groups
341 * @map: Previous match result, used as initial bitmap
342 * @fill: Destination bitmap to be filled with current match result
343 * @f: Field, containing lookup and mapping tables
344 * @offset: Ignore buckets before the given index, no bits are filled there
345 * @pkt: Packet data, pointer to input nftables register
346 * @first: If this is the first field, don't source previous result
347 * @last: Last field: stop at the first match and return bit index
349 * See nft_pipapo_avx2_lookup_4b_2().
351 * This is used for 32-bit fields (i.e. IPv4 addresses).
353 * Return: -1 on no match, rule index of match if @last, otherwise first long
354 * word index to be checked next (i.e. first filled word).
356 static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map
, unsigned long *fill
,
357 struct nft_pipapo_field
*f
, int offset
,
358 const u8
*pkt
, bool first
, bool last
)
360 u8 pg
[8] = { pkt
[0] >> 4, pkt
[0] & 0xf, pkt
[1] >> 4, pkt
[1] & 0xf,
361 pkt
[2] >> 4, pkt
[2] & 0xf, pkt
[3] >> 4, pkt
[3] & 0xf,
363 int i
, ret
= -1, m256_size
= f
->bsize
/ NFT_PIPAPO_LONGS_PER_M256
, b
;
364 unsigned long *lt
= f
->lt
, bsize
= f
->bsize
;
366 lt
+= offset
* NFT_PIPAPO_LONGS_PER_M256
;
367 for (i
= offset
; i
< m256_size
; i
++, lt
+= NFT_PIPAPO_LONGS_PER_M256
) {
368 int i_ul
= i
* NFT_PIPAPO_LONGS_PER_M256
;
371 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt
, 0, pg
[0], bsize
);
372 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt
, 1, pg
[1], bsize
);
373 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt
, 2, pg
[2], bsize
);
374 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt
, 3, pg
[3], bsize
);
375 NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt
, 4, pg
[4], bsize
);
376 NFT_PIPAPO_AVX2_AND(5, 0, 1);
377 NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt
, 5, pg
[5], bsize
);
378 NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt
, 6, pg
[6], bsize
);
379 NFT_PIPAPO_AVX2_AND(8, 2, 3);
380 NFT_PIPAPO_AVX2_AND(9, 4, 5);
381 NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt
, 7, pg
[7], bsize
);
382 NFT_PIPAPO_AVX2_AND(11, 6, 7);
383 NFT_PIPAPO_AVX2_AND(12, 8, 9);
384 NFT_PIPAPO_AVX2_AND(13, 10, 11);
387 NFT_PIPAPO_AVX2_AND(1, 12, 13);
389 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt
, 0, pg
[0], bsize
);
390 NFT_PIPAPO_AVX2_LOAD(1, map
[i_ul
]);
391 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt
, 1, pg
[1], bsize
);
392 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt
, 2, pg
[2], bsize
);
393 NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt
, 3, pg
[3], bsize
);
395 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing
);
397 NFT_PIPAPO_AVX2_AND(5, 0, 1);
398 NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt
, 4, pg
[4], bsize
);
399 NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt
, 5, pg
[5], bsize
);
400 NFT_PIPAPO_AVX2_AND(8, 2, 3);
401 NFT_PIPAPO_AVX2_BUCKET_LOAD4(9, lt
, 6, pg
[6], bsize
);
402 NFT_PIPAPO_AVX2_AND(10, 4, 5);
403 NFT_PIPAPO_AVX2_BUCKET_LOAD4(11, lt
, 7, pg
[7], bsize
);
404 NFT_PIPAPO_AVX2_AND(12, 6, 7);
405 NFT_PIPAPO_AVX2_AND(13, 8, 9);
406 NFT_PIPAPO_AVX2_AND(14, 10, 11);
409 NFT_PIPAPO_AVX2_AND(1, 12, 13);
410 NFT_PIPAPO_AVX2_AND(1, 1, 14);
413 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nomatch
);
414 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 1);
416 b
= nft_pipapo_avx2_refill(i_ul
, &map
[i_ul
], fill
, f
->mt
, last
);
420 if (unlikely(ret
== -1))
421 ret
= b
/ XSAVE_YMM_SIZE
;
426 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 15);
435 * nft_pipapo_avx2_lookup_4b_12() - AVX2-based lookup for 12 four-bit groups
436 * @map: Previous match result, used as initial bitmap
437 * @fill: Destination bitmap to be filled with current match result
438 * @f: Field, containing lookup and mapping tables
439 * @offset: Ignore buckets before the given index, no bits are filled there
440 * @pkt: Packet data, pointer to input nftables register
441 * @first: If this is the first field, don't source previous result
442 * @last: Last field: stop at the first match and return bit index
444 * See nft_pipapo_avx2_lookup_4b_2().
446 * This is used for 48-bit fields (i.e. MAC addresses/EUI-48).
448 * Return: -1 on no match, rule index of match if @last, otherwise first long
449 * word index to be checked next (i.e. first filled word).
451 static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map
, unsigned long *fill
,
452 struct nft_pipapo_field
*f
, int offset
,
453 const u8
*pkt
, bool first
, bool last
)
455 u8 pg
[12] = { pkt
[0] >> 4, pkt
[0] & 0xf, pkt
[1] >> 4, pkt
[1] & 0xf,
456 pkt
[2] >> 4, pkt
[2] & 0xf, pkt
[3] >> 4, pkt
[3] & 0xf,
457 pkt
[4] >> 4, pkt
[4] & 0xf, pkt
[5] >> 4, pkt
[5] & 0xf,
459 int i
, ret
= -1, m256_size
= f
->bsize
/ NFT_PIPAPO_LONGS_PER_M256
, b
;
460 unsigned long *lt
= f
->lt
, bsize
= f
->bsize
;
462 lt
+= offset
* NFT_PIPAPO_LONGS_PER_M256
;
463 for (i
= offset
; i
< m256_size
; i
++, lt
+= NFT_PIPAPO_LONGS_PER_M256
) {
464 int i_ul
= i
* NFT_PIPAPO_LONGS_PER_M256
;
467 NFT_PIPAPO_AVX2_LOAD(0, map
[i_ul
]);
469 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt
, 0, pg
[0], bsize
);
470 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt
, 1, pg
[1], bsize
);
471 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt
, 2, pg
[2], bsize
);
474 NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing
);
475 NFT_PIPAPO_AVX2_AND(1, 1, 0);
478 NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt
, 3, pg
[3], bsize
);
479 NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt
, 4, pg
[4], bsize
);
480 NFT_PIPAPO_AVX2_AND(6, 2, 3);
481 NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt
, 5, pg
[5], bsize
);
482 NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt
, 6, pg
[6], bsize
);
483 NFT_PIPAPO_AVX2_AND(9, 1, 4);
484 NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt
, 7, pg
[7], bsize
);
485 NFT_PIPAPO_AVX2_AND(11, 5, 6);
486 NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt
, 8, pg
[8], bsize
);
487 NFT_PIPAPO_AVX2_AND(13, 7, 8);
488 NFT_PIPAPO_AVX2_BUCKET_LOAD4(14, lt
, 9, pg
[9], bsize
);
490 NFT_PIPAPO_AVX2_AND(0, 9, 10);
491 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt
, 10, pg
[10], bsize
);
492 NFT_PIPAPO_AVX2_AND(2, 11, 12);
493 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt
, 11, pg
[11], bsize
);
494 NFT_PIPAPO_AVX2_AND(4, 13, 14);
495 NFT_PIPAPO_AVX2_AND(5, 0, 1);
497 NFT_PIPAPO_AVX2_AND(6, 2, 3);
500 NFT_PIPAPO_AVX2_AND(7, 4, 5);
501 NFT_PIPAPO_AVX2_AND(8, 6, 7);
503 NFT_PIPAPO_AVX2_NOMATCH_GOTO(8, nomatch
);
504 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 8);
506 b
= nft_pipapo_avx2_refill(i_ul
, &map
[i_ul
], fill
, f
->mt
, last
);
510 if (unlikely(ret
== -1))
511 ret
= b
/ XSAVE_YMM_SIZE
;
515 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 15);
524 * nft_pipapo_avx2_lookup_4b_32() - AVX2-based lookup for 32 four-bit groups
525 * @map: Previous match result, used as initial bitmap
526 * @fill: Destination bitmap to be filled with current match result
527 * @f: Field, containing lookup and mapping tables
528 * @offset: Ignore buckets before the given index, no bits are filled there
529 * @pkt: Packet data, pointer to input nftables register
530 * @first: If this is the first field, don't source previous result
531 * @last: Last field: stop at the first match and return bit index
533 * See nft_pipapo_avx2_lookup_4b_2().
535 * This is used for 128-bit fields (i.e. IPv6 addresses).
537 * Return: -1 on no match, rule index of match if @last, otherwise first long
538 * word index to be checked next (i.e. first filled word).
540 static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map
, unsigned long *fill
,
541 struct nft_pipapo_field
*f
, int offset
,
542 const u8
*pkt
, bool first
, bool last
)
544 u8 pg
[32] = { pkt
[0] >> 4, pkt
[0] & 0xf, pkt
[1] >> 4, pkt
[1] & 0xf,
545 pkt
[2] >> 4, pkt
[2] & 0xf, pkt
[3] >> 4, pkt
[3] & 0xf,
546 pkt
[4] >> 4, pkt
[4] & 0xf, pkt
[5] >> 4, pkt
[5] & 0xf,
547 pkt
[6] >> 4, pkt
[6] & 0xf, pkt
[7] >> 4, pkt
[7] & 0xf,
548 pkt
[8] >> 4, pkt
[8] & 0xf, pkt
[9] >> 4, pkt
[9] & 0xf,
549 pkt
[10] >> 4, pkt
[10] & 0xf, pkt
[11] >> 4, pkt
[11] & 0xf,
550 pkt
[12] >> 4, pkt
[12] & 0xf, pkt
[13] >> 4, pkt
[13] & 0xf,
551 pkt
[14] >> 4, pkt
[14] & 0xf, pkt
[15] >> 4, pkt
[15] & 0xf,
553 int i
, ret
= -1, m256_size
= f
->bsize
/ NFT_PIPAPO_LONGS_PER_M256
, b
;
554 unsigned long *lt
= f
->lt
, bsize
= f
->bsize
;
556 lt
+= offset
* NFT_PIPAPO_LONGS_PER_M256
;
557 for (i
= offset
; i
< m256_size
; i
++, lt
+= NFT_PIPAPO_LONGS_PER_M256
) {
558 int i_ul
= i
* NFT_PIPAPO_LONGS_PER_M256
;
561 NFT_PIPAPO_AVX2_LOAD(0, map
[i_ul
]);
563 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt
, 0, pg
[0], bsize
);
564 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt
, 1, pg
[1], bsize
);
565 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt
, 2, pg
[2], bsize
);
566 NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt
, 3, pg
[3], bsize
);
568 NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing
);
569 NFT_PIPAPO_AVX2_AND(1, 1, 0);
572 NFT_PIPAPO_AVX2_AND(5, 2, 3);
573 NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt
, 4, pg
[4], bsize
);
574 NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt
, 5, pg
[5], bsize
);
575 NFT_PIPAPO_AVX2_AND(8, 1, 4);
576 NFT_PIPAPO_AVX2_BUCKET_LOAD4(9, lt
, 6, pg
[6], bsize
);
577 NFT_PIPAPO_AVX2_AND(10, 5, 6);
578 NFT_PIPAPO_AVX2_BUCKET_LOAD4(11, lt
, 7, pg
[7], bsize
);
579 NFT_PIPAPO_AVX2_AND(12, 7, 8);
580 NFT_PIPAPO_AVX2_BUCKET_LOAD4(13, lt
, 8, pg
[8], bsize
);
581 NFT_PIPAPO_AVX2_AND(14, 9, 10);
583 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt
, 9, pg
[9], bsize
);
584 NFT_PIPAPO_AVX2_AND(1, 11, 12);
585 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt
, 10, pg
[10], bsize
);
586 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt
, 11, pg
[11], bsize
);
587 NFT_PIPAPO_AVX2_AND(4, 13, 14);
588 NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt
, 12, pg
[12], bsize
);
589 NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt
, 13, pg
[13], bsize
);
590 NFT_PIPAPO_AVX2_AND(7, 0, 1);
591 NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt
, 14, pg
[14], bsize
);
592 NFT_PIPAPO_AVX2_AND(9, 2, 3);
593 NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt
, 15, pg
[15], bsize
);
594 NFT_PIPAPO_AVX2_AND(11, 4, 5);
595 NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt
, 16, pg
[16], bsize
);
596 NFT_PIPAPO_AVX2_AND(13, 6, 7);
597 NFT_PIPAPO_AVX2_BUCKET_LOAD4(14, lt
, 17, pg
[17], bsize
);
599 NFT_PIPAPO_AVX2_AND(0, 8, 9);
600 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt
, 18, pg
[18], bsize
);
601 NFT_PIPAPO_AVX2_AND(2, 10, 11);
602 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt
, 19, pg
[19], bsize
);
603 NFT_PIPAPO_AVX2_AND(4, 12, 13);
604 NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt
, 20, pg
[20], bsize
);
605 NFT_PIPAPO_AVX2_AND(6, 14, 0);
606 NFT_PIPAPO_AVX2_AND(7, 1, 2);
607 NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt
, 21, pg
[21], bsize
);
608 NFT_PIPAPO_AVX2_AND(9, 3, 4);
609 NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt
, 22, pg
[22], bsize
);
610 NFT_PIPAPO_AVX2_AND(11, 5, 6);
611 NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt
, 23, pg
[23], bsize
);
612 NFT_PIPAPO_AVX2_AND(13, 7, 8);
614 NFT_PIPAPO_AVX2_BUCKET_LOAD4(14, lt
, 24, pg
[24], bsize
);
615 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt
, 25, pg
[25], bsize
);
616 NFT_PIPAPO_AVX2_AND(1, 9, 10);
617 NFT_PIPAPO_AVX2_AND(2, 11, 12);
618 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt
, 26, pg
[26], bsize
);
619 NFT_PIPAPO_AVX2_AND(4, 13, 14);
620 NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt
, 27, pg
[27], bsize
);
621 NFT_PIPAPO_AVX2_AND(6, 0, 1);
622 NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt
, 28, pg
[28], bsize
);
623 NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt
, 29, pg
[29], bsize
);
624 NFT_PIPAPO_AVX2_AND(9, 2, 3);
625 NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt
, 30, pg
[30], bsize
);
626 NFT_PIPAPO_AVX2_AND(11, 4, 5);
627 NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt
, 31, pg
[31], bsize
);
629 NFT_PIPAPO_AVX2_AND(0, 6, 7);
630 NFT_PIPAPO_AVX2_AND(1, 8, 9);
631 NFT_PIPAPO_AVX2_AND(2, 10, 11);
632 NFT_PIPAPO_AVX2_AND(3, 12, 0);
635 NFT_PIPAPO_AVX2_AND(4, 1, 2);
636 NFT_PIPAPO_AVX2_AND(5, 3, 4);
638 NFT_PIPAPO_AVX2_NOMATCH_GOTO(5, nomatch
);
639 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 5);
641 b
= nft_pipapo_avx2_refill(i_ul
, &map
[i_ul
], fill
, f
->mt
, last
);
645 if (unlikely(ret
== -1))
646 ret
= b
/ XSAVE_YMM_SIZE
;
650 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 15);
659 * nft_pipapo_avx2_lookup_8b_1() - AVX2-based lookup for one eight-bit group
660 * @map: Previous match result, used as initial bitmap
661 * @fill: Destination bitmap to be filled with current match result
662 * @f: Field, containing lookup and mapping tables
663 * @offset: Ignore buckets before the given index, no bits are filled there
664 * @pkt: Packet data, pointer to input nftables register
665 * @first: If this is the first field, don't source previous result
666 * @last: Last field: stop at the first match and return bit index
668 * See nft_pipapo_avx2_lookup_4b_2().
670 * This is used for 8-bit fields (i.e. protocol numbers).
672 * Return: -1 on no match, rule index of match if @last, otherwise first long
673 * word index to be checked next (i.e. first filled word).
675 static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map
, unsigned long *fill
,
676 struct nft_pipapo_field
*f
, int offset
,
677 const u8
*pkt
, bool first
, bool last
)
679 int i
, ret
= -1, m256_size
= f
->bsize
/ NFT_PIPAPO_LONGS_PER_M256
, b
;
680 unsigned long *lt
= f
->lt
, bsize
= f
->bsize
;
682 lt
+= offset
* NFT_PIPAPO_LONGS_PER_M256
;
683 for (i
= offset
; i
< m256_size
; i
++, lt
+= NFT_PIPAPO_LONGS_PER_M256
) {
684 int i_ul
= i
* NFT_PIPAPO_LONGS_PER_M256
;
687 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt
, 0, pkt
[0], bsize
);
689 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt
, 0, pkt
[0], bsize
);
690 NFT_PIPAPO_AVX2_LOAD(1, map
[i_ul
]);
691 NFT_PIPAPO_AVX2_AND(2, 0, 1);
692 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing
);
695 NFT_PIPAPO_AVX2_NOMATCH_GOTO(2, nomatch
);
696 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 2);
698 b
= nft_pipapo_avx2_refill(i_ul
, &map
[i_ul
], fill
, f
->mt
, last
);
702 if (unlikely(ret
== -1))
703 ret
= b
/ XSAVE_YMM_SIZE
;
707 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 15);
716 * nft_pipapo_avx2_lookup_8b_2() - AVX2-based lookup for 2 eight-bit groups
717 * @map: Previous match result, used as initial bitmap
718 * @fill: Destination bitmap to be filled with current match result
719 * @f: Field, containing lookup and mapping tables
720 * @offset: Ignore buckets before the given index, no bits are filled there
721 * @pkt: Packet data, pointer to input nftables register
722 * @first: If this is the first field, don't source previous result
723 * @last: Last field: stop at the first match and return bit index
725 * See nft_pipapo_avx2_lookup_4b_2().
727 * This is used for 16-bit fields (i.e. ports).
729 * Return: -1 on no match, rule index of match if @last, otherwise first long
730 * word index to be checked next (i.e. first filled word).
732 static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map
, unsigned long *fill
,
733 struct nft_pipapo_field
*f
, int offset
,
734 const u8
*pkt
, bool first
, bool last
)
736 int i
, ret
= -1, m256_size
= f
->bsize
/ NFT_PIPAPO_LONGS_PER_M256
, b
;
737 unsigned long *lt
= f
->lt
, bsize
= f
->bsize
;
739 lt
+= offset
* NFT_PIPAPO_LONGS_PER_M256
;
740 for (i
= offset
; i
< m256_size
; i
++, lt
+= NFT_PIPAPO_LONGS_PER_M256
) {
741 int i_ul
= i
* NFT_PIPAPO_LONGS_PER_M256
;
744 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt
, 0, pkt
[0], bsize
);
745 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt
, 1, pkt
[1], bsize
);
746 NFT_PIPAPO_AVX2_AND(4, 0, 1);
748 NFT_PIPAPO_AVX2_LOAD(0, map
[i_ul
]);
749 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt
, 0, pkt
[0], bsize
);
750 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt
, 1, pkt
[1], bsize
);
753 NFT_PIPAPO_AVX2_AND(3, 0, 1);
754 NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing
);
755 NFT_PIPAPO_AVX2_AND(4, 3, 2);
759 NFT_PIPAPO_AVX2_NOMATCH_GOTO(4, nomatch
);
760 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 4);
762 b
= nft_pipapo_avx2_refill(i_ul
, &map
[i_ul
], fill
, f
->mt
, last
);
766 if (unlikely(ret
== -1))
767 ret
= b
/ XSAVE_YMM_SIZE
;
771 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 15);
780 * nft_pipapo_avx2_lookup_8b_4() - AVX2-based lookup for 4 eight-bit groups
781 * @map: Previous match result, used as initial bitmap
782 * @fill: Destination bitmap to be filled with current match result
783 * @f: Field, containing lookup and mapping tables
784 * @offset: Ignore buckets before the given index, no bits are filled there
785 * @pkt: Packet data, pointer to input nftables register
786 * @first: If this is the first field, don't source previous result
787 * @last: Last field: stop at the first match and return bit index
789 * See nft_pipapo_avx2_lookup_4b_2().
791 * This is used for 32-bit fields (i.e. IPv4 addresses).
793 * Return: -1 on no match, rule index of match if @last, otherwise first long
794 * word index to be checked next (i.e. first filled word).
796 static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map
, unsigned long *fill
,
797 struct nft_pipapo_field
*f
, int offset
,
798 const u8
*pkt
, bool first
, bool last
)
800 int i
, ret
= -1, m256_size
= f
->bsize
/ NFT_PIPAPO_LONGS_PER_M256
, b
;
801 unsigned long *lt
= f
->lt
, bsize
= f
->bsize
;
803 lt
+= offset
* NFT_PIPAPO_LONGS_PER_M256
;
804 for (i
= offset
; i
< m256_size
; i
++, lt
+= NFT_PIPAPO_LONGS_PER_M256
) {
805 int i_ul
= i
* NFT_PIPAPO_LONGS_PER_M256
;
808 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt
, 0, pkt
[0], bsize
);
809 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt
, 1, pkt
[1], bsize
);
810 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt
, 2, pkt
[2], bsize
);
811 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt
, 3, pkt
[3], bsize
);
814 NFT_PIPAPO_AVX2_AND(4, 0, 1);
815 NFT_PIPAPO_AVX2_AND(5, 2, 3);
816 NFT_PIPAPO_AVX2_AND(0, 4, 5);
818 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt
, 0, pkt
[0], bsize
);
819 NFT_PIPAPO_AVX2_LOAD(1, map
[i_ul
]);
820 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt
, 1, pkt
[1], bsize
);
821 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt
, 2, pkt
[2], bsize
);
822 NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt
, 3, pkt
[3], bsize
);
824 NFT_PIPAPO_AVX2_AND(5, 0, 1);
825 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing
);
826 NFT_PIPAPO_AVX2_AND(6, 2, 3);
829 NFT_PIPAPO_AVX2_AND(7, 4, 5);
830 NFT_PIPAPO_AVX2_AND(0, 6, 7);
833 NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nomatch
);
834 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 0);
836 b
= nft_pipapo_avx2_refill(i_ul
, &map
[i_ul
], fill
, f
->mt
, last
);
840 if (unlikely(ret
== -1))
841 ret
= b
/ XSAVE_YMM_SIZE
;
846 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 15);
855 * nft_pipapo_avx2_lookup_8b_6() - AVX2-based lookup for 6 eight-bit groups
856 * @map: Previous match result, used as initial bitmap
857 * @fill: Destination bitmap to be filled with current match result
858 * @f: Field, containing lookup and mapping tables
859 * @offset: Ignore buckets before the given index, no bits are filled there
860 * @pkt: Packet data, pointer to input nftables register
861 * @first: If this is the first field, don't source previous result
862 * @last: Last field: stop at the first match and return bit index
864 * See nft_pipapo_avx2_lookup_4b_2().
866 * This is used for 48-bit fields (i.e. MAC addresses/EUI-48).
868 * Return: -1 on no match, rule index of match if @last, otherwise first long
869 * word index to be checked next (i.e. first filled word).
871 static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map
, unsigned long *fill
,
872 struct nft_pipapo_field
*f
, int offset
,
873 const u8
*pkt
, bool first
, bool last
)
875 int i
, ret
= -1, m256_size
= f
->bsize
/ NFT_PIPAPO_LONGS_PER_M256
, b
;
876 unsigned long *lt
= f
->lt
, bsize
= f
->bsize
;
878 lt
+= offset
* NFT_PIPAPO_LONGS_PER_M256
;
879 for (i
= offset
; i
< m256_size
; i
++, lt
+= NFT_PIPAPO_LONGS_PER_M256
) {
880 int i_ul
= i
* NFT_PIPAPO_LONGS_PER_M256
;
883 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt
, 0, pkt
[0], bsize
);
884 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt
, 1, pkt
[1], bsize
);
885 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt
, 2, pkt
[2], bsize
);
886 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt
, 3, pkt
[3], bsize
);
887 NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt
, 4, pkt
[4], bsize
);
889 NFT_PIPAPO_AVX2_AND(5, 0, 1);
890 NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt
, 6, pkt
[5], bsize
);
891 NFT_PIPAPO_AVX2_AND(7, 2, 3);
894 NFT_PIPAPO_AVX2_AND(0, 4, 5);
895 NFT_PIPAPO_AVX2_AND(1, 6, 7);
896 NFT_PIPAPO_AVX2_AND(4, 0, 1);
898 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt
, 0, pkt
[0], bsize
);
899 NFT_PIPAPO_AVX2_LOAD(1, map
[i_ul
]);
900 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt
, 1, pkt
[1], bsize
);
901 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt
, 2, pkt
[2], bsize
);
902 NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt
, 3, pkt
[3], bsize
);
904 NFT_PIPAPO_AVX2_AND(5, 0, 1);
905 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing
);
907 NFT_PIPAPO_AVX2_AND(6, 2, 3);
908 NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt
, 4, pkt
[4], bsize
);
909 NFT_PIPAPO_AVX2_AND(0, 4, 5);
910 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt
, 5, pkt
[5], bsize
);
911 NFT_PIPAPO_AVX2_AND(2, 6, 7);
914 NFT_PIPAPO_AVX2_AND(3, 0, 1);
915 NFT_PIPAPO_AVX2_AND(4, 2, 3);
918 NFT_PIPAPO_AVX2_NOMATCH_GOTO(4, nomatch
);
919 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 4);
921 b
= nft_pipapo_avx2_refill(i_ul
, &map
[i_ul
], fill
, f
->mt
, last
);
925 if (unlikely(ret
== -1))
926 ret
= b
/ XSAVE_YMM_SIZE
;
931 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 15);
940 * nft_pipapo_avx2_lookup_8b_16() - AVX2-based lookup for 16 eight-bit groups
941 * @map: Previous match result, used as initial bitmap
942 * @fill: Destination bitmap to be filled with current match result
943 * @f: Field, containing lookup and mapping tables
944 * @offset: Ignore buckets before the given index, no bits are filled there
945 * @pkt: Packet data, pointer to input nftables register
946 * @first: If this is the first field, don't source previous result
947 * @last: Last field: stop at the first match and return bit index
949 * See nft_pipapo_avx2_lookup_4b_2().
951 * This is used for 128-bit fields (i.e. IPv6 addresses).
953 * Return: -1 on no match, rule index of match if @last, otherwise first long
954 * word index to be checked next (i.e. first filled word).
956 static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map
, unsigned long *fill
,
957 struct nft_pipapo_field
*f
, int offset
,
958 const u8
*pkt
, bool first
, bool last
)
960 int i
, ret
= -1, m256_size
= f
->bsize
/ NFT_PIPAPO_LONGS_PER_M256
, b
;
961 unsigned long *lt
= f
->lt
, bsize
= f
->bsize
;
963 lt
+= offset
* NFT_PIPAPO_LONGS_PER_M256
;
964 for (i
= offset
; i
< m256_size
; i
++, lt
+= NFT_PIPAPO_LONGS_PER_M256
) {
965 int i_ul
= i
* NFT_PIPAPO_LONGS_PER_M256
;
968 NFT_PIPAPO_AVX2_LOAD(0, map
[i_ul
]);
970 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt
, 0, pkt
[0], bsize
);
971 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt
, 1, pkt
[1], bsize
);
972 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt
, 2, pkt
[2], bsize
);
974 NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing
);
975 NFT_PIPAPO_AVX2_AND(1, 1, 0);
977 NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt
, 3, pkt
[3], bsize
);
979 NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt
, 4, pkt
[4], bsize
);
980 NFT_PIPAPO_AVX2_AND(6, 1, 2);
981 NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt
, 5, pkt
[5], bsize
);
982 NFT_PIPAPO_AVX2_AND(0, 3, 4);
983 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt
, 6, pkt
[6], bsize
);
985 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt
, 7, pkt
[7], bsize
);
986 NFT_PIPAPO_AVX2_AND(3, 5, 6);
987 NFT_PIPAPO_AVX2_AND(4, 0, 1);
988 NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt
, 8, pkt
[8], bsize
);
990 NFT_PIPAPO_AVX2_AND(6, 2, 3);
991 NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt
, 9, pkt
[9], bsize
);
992 NFT_PIPAPO_AVX2_AND(0, 4, 5);
993 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt
, 10, pkt
[10], bsize
);
994 NFT_PIPAPO_AVX2_AND(2, 6, 7);
995 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt
, 11, pkt
[11], bsize
);
996 NFT_PIPAPO_AVX2_AND(4, 0, 1);
997 NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt
, 12, pkt
[12], bsize
);
998 NFT_PIPAPO_AVX2_AND(6, 2, 3);
999 NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt
, 13, pkt
[13], bsize
);
1000 NFT_PIPAPO_AVX2_AND(0, 4, 5);
1001 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt
, 14, pkt
[14], bsize
);
1002 NFT_PIPAPO_AVX2_AND(2, 6, 7);
1003 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt
, 15, pkt
[15], bsize
);
1004 NFT_PIPAPO_AVX2_AND(4, 0, 1);
1007 NFT_PIPAPO_AVX2_AND(5, 2, 3);
1008 NFT_PIPAPO_AVX2_AND(6, 4, 5);
1010 NFT_PIPAPO_AVX2_NOMATCH_GOTO(6, nomatch
);
1011 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 6);
1013 b
= nft_pipapo_avx2_refill(i_ul
, &map
[i_ul
], fill
, f
->mt
, last
);
1017 if (unlikely(ret
== -1))
1018 ret
= b
/ XSAVE_YMM_SIZE
;
1023 NFT_PIPAPO_AVX2_STORE(map
[i_ul
], 15);
1032 * nft_pipapo_avx2_lookup_slow() - Fallback function for uncommon field sizes
1033 * @map: Previous match result, used as initial bitmap
1034 * @fill: Destination bitmap to be filled with current match result
1035 * @f: Field, containing lookup and mapping tables
1036 * @offset: Ignore buckets before the given index, no bits are filled there
1037 * @pkt: Packet data, pointer to input nftables register
1038 * @first: If this is the first field, don't source previous result
1039 * @last: Last field: stop at the first match and return bit index
1041 * This function should never be called, but is provided for the case the field
1042 * size doesn't match any of the known data types. Matching rate is
1043 * substantially lower than AVX2 routines.
1045 * Return: -1 on no match, rule index of match if @last, otherwise first long
1046 * word index to be checked next (i.e. first filled word).
1048 static int nft_pipapo_avx2_lookup_slow(unsigned long *map
, unsigned long *fill
,
1049 struct nft_pipapo_field
*f
, int offset
,
1050 const u8
*pkt
, bool first
, bool last
)
1052 unsigned long *lt
= f
->lt
, bsize
= f
->bsize
;
1055 lt
+= offset
* NFT_PIPAPO_LONGS_PER_M256
;
1058 memset(map
, 0xff, bsize
* sizeof(*map
));
1060 for (i
= offset
; i
< bsize
; i
++) {
1062 pipapo_and_field_buckets_8bit(f
, map
, pkt
);
1064 pipapo_and_field_buckets_4bit(f
, map
, pkt
);
1065 NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4
;
1067 b
= pipapo_refill(map
, bsize
, f
->rules
, fill
, f
->mt
, last
);
1073 ret
= b
/ XSAVE_YMM_SIZE
;
1080 * nft_pipapo_avx2_estimate() - Set size, space and lookup complexity
1081 * @desc: Set description, element count and field description used
1082 * @features: Flags: NFT_SET_INTERVAL needs to be there
1083 * @est: Storage for estimation data
1085 * Return: true if set is compatible and AVX2 available, false otherwise.
1087 bool nft_pipapo_avx2_estimate(const struct nft_set_desc
*desc
, u32 features
,
1088 struct nft_set_estimate
*est
)
1090 if (!(features
& NFT_SET_INTERVAL
) ||
1091 desc
->field_count
< NFT_PIPAPO_MIN_FIELDS
)
1094 if (!boot_cpu_has(X86_FEATURE_AVX2
) || !boot_cpu_has(X86_FEATURE_AVX
))
1097 est
->size
= pipapo_estimate_size(desc
);
1101 est
->lookup
= NFT_SET_CLASS_O_LOG_N
;
1103 est
->space
= NFT_SET_CLASS_O_N
;
1109 * nft_pipapo_avx2_lookup() - Lookup function for AVX2 implementation
1110 * @net: Network namespace
1111 * @set: nftables API set representation
1112 * @elem: nftables API element representation containing key data
1113 * @ext: nftables API extension pointer, filled with matching reference
1115 * For more details, see DOC: Theory of Operation in nft_set_pipapo.c.
1117 * This implementation exploits the repetitive characteristic of the algorithm
1118 * to provide a fast, vectorised version using the AVX2 SIMD instruction set.
1120 * Return: true on match, false otherwise.
1122 bool nft_pipapo_avx2_lookup(const struct net
*net
, const struct nft_set
*set
,
1123 const u32
*key
, const struct nft_set_ext
**ext
)
1125 struct nft_pipapo
*priv
= nft_set_priv(set
);
1126 unsigned long *res
, *fill
, *scratch
;
1127 u8 genmask
= nft_genmask_cur(net
);
1128 const u8
*rp
= (const u8
*)key
;
1129 struct nft_pipapo_match
*m
;
1130 struct nft_pipapo_field
*f
;
1134 m
= rcu_dereference(priv
->match
);
1136 /* This also protects access to all data related to scratch maps */
1139 scratch
= *raw_cpu_ptr(m
->scratch_aligned
);
1140 if (unlikely(!scratch
)) {
1144 map_index
= raw_cpu_read(nft_pipapo_avx2_scratch_index
);
1146 res
= scratch
+ (map_index
? m
->bsize_max
: 0);
1147 fill
= scratch
+ (map_index
? 0 : m
->bsize_max
);
1149 /* Starting map doesn't need to be set for this implementation */
1151 nft_pipapo_avx2_prepare();
1154 nft_pipapo_for_each_field(f
, i
, m
) {
1155 bool last
= i
== m
->field_count
- 1, first
= !i
;
1157 #define NFT_SET_PIPAPO_AVX2_LOOKUP(b, n) \
1158 (ret = nft_pipapo_avx2_lookup_##b##b_##n(res, fill, f, \
1162 if (likely(f
->bb
== 8)) {
1163 if (f
->groups
== 1) {
1164 NFT_SET_PIPAPO_AVX2_LOOKUP(8, 1);
1165 } else if (f
->groups
== 2) {
1166 NFT_SET_PIPAPO_AVX2_LOOKUP(8, 2);
1167 } else if (f
->groups
== 4) {
1168 NFT_SET_PIPAPO_AVX2_LOOKUP(8, 4);
1169 } else if (f
->groups
== 6) {
1170 NFT_SET_PIPAPO_AVX2_LOOKUP(8, 6);
1171 } else if (f
->groups
== 16) {
1172 NFT_SET_PIPAPO_AVX2_LOOKUP(8, 16);
1174 ret
= nft_pipapo_avx2_lookup_slow(res
, fill
, f
,
1179 if (f
->groups
== 2) {
1180 NFT_SET_PIPAPO_AVX2_LOOKUP(4, 2);
1181 } else if (f
->groups
== 4) {
1182 NFT_SET_PIPAPO_AVX2_LOOKUP(4, 4);
1183 } else if (f
->groups
== 8) {
1184 NFT_SET_PIPAPO_AVX2_LOOKUP(4, 8);
1185 } else if (f
->groups
== 12) {
1186 NFT_SET_PIPAPO_AVX2_LOOKUP(4, 12);
1187 } else if (f
->groups
== 32) {
1188 NFT_SET_PIPAPO_AVX2_LOOKUP(4, 32);
1190 ret
= nft_pipapo_avx2_lookup_slow(res
, fill
, f
,
1195 NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4
;
1197 #undef NFT_SET_PIPAPO_AVX2_LOOKUP
1203 *ext
= &f
->mt
[ret
].e
->ext
;
1204 if (unlikely(nft_set_elem_expired(*ext
) ||
1205 !nft_set_elem_active(*ext
, genmask
))) {
1214 rp
+= NFT_PIPAPO_GROUPS_PADDED_SIZE(f
);
1219 raw_cpu_write(nft_pipapo_avx2_scratch_index
, !map_index
);