]>
git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/dpdk/lib/librte_bpf/bpf_jit_arm64.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2019 Marvell International Ltd.
8 #include <rte_common.h>
9 #include <rte_byteorder.h>
13 #define A64_REG_MASK(r) ((r) & 0x1f)
14 #define A64_INVALID_OP_CODE (0xffffffff)
16 #define TMP_REG_1 (EBPF_REG_10 + 1)
17 #define TMP_REG_2 (EBPF_REG_10 + 2)
18 #define TMP_REG_3 (EBPF_REG_10 + 3)
20 #define EBPF_FP (EBPF_REG_10)
21 #define EBPF_OP_GET(op) (BPF_OP(op) >> 4)
29 #define check_imm(n, val) (((val) >= 0) ? !!((val) >> (n)) : !!((~val) >> (n)))
30 #define mask_imm(n, val) ((val) & ((1 << (n)) - 1))
33 uint32_t off
; /* eBPF to arm64 insn offset mapping for jump */
34 uint8_t off_to_b
; /* Offset to branch instruction delta */
38 size_t stack_sz
; /* Stack size */
39 uint32_t *ins
; /* ARM64 instructions. NULL if first pass */
40 struct ebpf_a64_map
*map
; /* eBPF to arm64 insn mapping for jump */
41 uint32_t idx
; /* Current instruction index */
42 uint32_t program_start
; /* Program index, Just after prologue */
43 uint32_t program_sz
; /* Program size. Found in first pass */
44 uint8_t foundcall
; /* Found EBPF_CALL class code in eBPF pgm */
48 check_immr_imms(bool is64
, uint8_t immr
, uint8_t imms
)
50 const unsigned int width
= is64
? 64 : 32;
52 if (immr
>= width
|| imms
>= width
)
59 check_mov_hw(bool is64
, const uint8_t val
)
61 if (val
== 16 || val
== 0)
63 else if (is64
&& val
!= 64 && val
!= 48 && val
!= 32)
70 check_ls_sz(uint8_t sz
)
72 if (sz
== BPF_B
|| sz
== BPF_H
|| sz
== BPF_W
|| sz
== EBPF_DW
)
81 return (r
> 31) ? 1 : 0;
85 is_first_pass(struct a64_jit_ctx
*ctx
)
87 return (ctx
->ins
== NULL
);
91 check_invalid_args(struct a64_jit_ctx
*ctx
, uint32_t limit
)
95 if (is_first_pass(ctx
))
98 for (idx
= 0; idx
< limit
; idx
++) {
99 if (rte_le_to_cpu_32(ctx
->ins
[idx
]) == A64_INVALID_OP_CODE
) {
101 "%s: invalid opcode at %u;\n", __func__
, idx
);
109 jump_offset_init(struct a64_jit_ctx
*ctx
, struct rte_bpf
*bpf
)
113 ctx
->map
= malloc(bpf
->prm
.nb_ins
* sizeof(ctx
->map
[0]));
114 if (ctx
->map
== NULL
)
117 /* Fill with fake offsets */
118 for (i
= 0; i
!= bpf
->prm
.nb_ins
; i
++) {
119 ctx
->map
[i
].off
= INT32_MAX
;
120 ctx
->map
[i
].off_to_b
= 0;
126 jump_offset_fini(struct a64_jit_ctx
*ctx
)
132 jump_offset_update(struct a64_jit_ctx
*ctx
, uint32_t ebpf_idx
)
134 if (is_first_pass(ctx
))
135 ctx
->map
[ebpf_idx
].off
= ctx
->idx
;
139 jump_offset_to_branch_update(struct a64_jit_ctx
*ctx
, uint32_t ebpf_idx
)
141 if (is_first_pass(ctx
))
142 ctx
->map
[ebpf_idx
].off_to_b
= ctx
->idx
- ctx
->map
[ebpf_idx
].off
;
147 jump_offset_get(struct a64_jit_ctx
*ctx
, uint32_t from
, int16_t offset
)
149 int32_t a64_from
, a64_to
;
151 a64_from
= ctx
->map
[from
].off
+ ctx
->map
[from
].off_to_b
;
152 a64_to
= ctx
->map
[from
+ offset
+ 1].off
;
154 if (a64_to
== INT32_MAX
)
157 return a64_to
- a64_from
;
161 A64_EQ
= 0x0, /* == */
162 A64_NE
= 0x1, /* != */
163 A64_CS
= 0x2, /* Unsigned >= */
164 A64_CC
= 0x3, /* Unsigned < */
165 A64_MI
= 0x4, /* < 0 */
166 A64_PL
= 0x5, /* >= 0 */
167 A64_VS
= 0x6, /* Overflow */
168 A64_VC
= 0x7, /* No overflow */
169 A64_HI
= 0x8, /* Unsigned > */
170 A64_LS
= 0x9, /* Unsigned <= */
171 A64_GE
= 0xa, /* Signed >= */
172 A64_LT
= 0xb, /* Signed < */
173 A64_GT
= 0xc, /* Signed > */
174 A64_LE
= 0xd, /* Signed <= */
175 A64_AL
= 0xe, /* Always */
179 check_cond(uint8_t cond
)
181 return (cond
>= A64_AL
) ? 1 : 0;
185 ebpf_to_a64_cond(uint8_t op
)
187 switch (BPF_OP(op
)) {
214 /* Emit an instruction */
216 emit_insn(struct a64_jit_ctx
*ctx
, uint32_t insn
, int error
)
219 insn
= A64_INVALID_OP_CODE
;
222 ctx
->ins
[ctx
->idx
] = rte_cpu_to_le_32(insn
);
228 emit_ret(struct a64_jit_ctx
*ctx
)
230 emit_insn(ctx
, 0xd65f03c0, 0);
234 emit_add_sub_imm(struct a64_jit_ctx
*ctx
, bool is64
, bool sub
, uint8_t rd
,
235 uint8_t rn
, int16_t imm12
)
239 imm
= mask_imm(12, imm12
);
240 insn
= (!!is64
) << 31;
241 insn
|= (!!sub
) << 30;
248 check_reg(rd
) || check_reg(rn
) || check_imm(12, imm12
));
252 emit_add_imm_64(struct a64_jit_ctx
*ctx
, uint8_t rd
, uint8_t rn
, uint16_t imm12
)
254 emit_add_sub_imm(ctx
, 1, 0, rd
, rn
, imm12
);
258 emit_sub_imm_64(struct a64_jit_ctx
*ctx
, uint8_t rd
, uint8_t rn
, uint16_t imm12
)
260 emit_add_sub_imm(ctx
, 1, 1, rd
, rn
, imm12
);
264 emit_mov(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
, uint8_t rn
)
266 emit_add_sub_imm(ctx
, is64
, 0, rd
, rn
, 0);
270 emit_mov_64(struct a64_jit_ctx
*ctx
, uint8_t rd
, uint8_t rn
)
272 emit_mov(ctx
, 1, rd
, rn
);
276 emit_ls_pair_64(struct a64_jit_ctx
*ctx
, uint8_t rt
, uint8_t rt2
, uint8_t rn
,
277 bool push
, bool load
, bool pre_index
)
281 insn
= (!!load
) << 22;
282 insn
|= (!!pre_index
) << 24;
288 insn
|= 0x7e << 15; /* 0x7e means -2 with imm7 */
292 emit_insn(ctx
, insn
, check_reg(rn
) || check_reg(rt
) || check_reg(rt2
));
296 /* Emit stp rt, rt2, [sp, #-16]! */
298 emit_stack_push(struct a64_jit_ctx
*ctx
, uint8_t rt
, uint8_t rt2
)
300 emit_ls_pair_64(ctx
, rt
, rt2
, A64_SP
, 1, 0, 1);
303 /* Emit ldp rt, rt2, [sp, #16] */
305 emit_stack_pop(struct a64_jit_ctx
*ctx
, uint8_t rt
, uint8_t rt2
)
307 emit_ls_pair_64(ctx
, rt
, rt2
, A64_SP
, 0, 1, 0);
314 mov_imm(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
, uint8_t type
,
315 uint16_t imm16
, uint8_t shift
)
319 insn
= (!!is64
) << 31;
322 insn
|= (shift
/16) << 21;
326 emit_insn(ctx
, insn
, check_reg(rd
) || check_mov_hw(is64
, shift
));
330 emit_mov_imm32(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
, uint32_t val
)
332 uint16_t upper
= val
>> 16;
333 uint16_t lower
= val
& 0xffff;
335 /* Positive number */
336 if ((val
& 1UL << 31) == 0) {
337 mov_imm(ctx
, is64
, rd
, A64_MOVZ
, lower
, 0);
339 mov_imm(ctx
, is64
, rd
, A64_MOVK
, upper
, 16);
340 } else { /* Negative number */
341 if (upper
== 0xffff) {
342 mov_imm(ctx
, is64
, rd
, A64_MOVN
, ~lower
, 0);
344 mov_imm(ctx
, is64
, rd
, A64_MOVN
, ~upper
, 16);
346 mov_imm(ctx
, is64
, rd
, A64_MOVK
, lower
, 0);
352 u16_blocks_weight(const uint64_t val
, bool one
)
354 return (((val
>> 0) & 0xffff) == (one
? 0xffff : 0x0000)) +
355 (((val
>> 16) & 0xffff) == (one
? 0xffff : 0x0000)) +
356 (((val
>> 32) & 0xffff) == (one
? 0xffff : 0x0000)) +
357 (((val
>> 48) & 0xffff) == (one
? 0xffff : 0x0000));
361 emit_mov_imm(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
, uint64_t val
)
363 uint64_t nval
= ~val
;
367 return emit_mov_imm32(ctx
, 0, rd
, (uint32_t)(val
& 0xffffffff));
369 /* Find MOVN or MOVZ first */
370 movn
= u16_blocks_weight(val
, true) > u16_blocks_weight(val
, false);
371 /* Find shift right value */
372 sr
= movn
? rte_fls_u64(nval
) - 1 : rte_fls_u64(val
) - 1;
373 sr
= RTE_ALIGN_FLOOR(sr
, 16);
377 mov_imm(ctx
, 1, rd
, A64_MOVN
, (nval
>> sr
) & 0xffff, sr
);
379 mov_imm(ctx
, 1, rd
, A64_MOVZ
, (val
>> sr
) & 0xffff, sr
);
383 if (((val
>> sr
) & 0xffff) != (movn
? 0xffff : 0x0000))
384 mov_imm(ctx
, 1, rd
, A64_MOVK
, (val
>> sr
) & 0xffff, sr
);
390 emit_ls(struct a64_jit_ctx
*ctx
, uint8_t sz
, uint8_t rt
, uint8_t rn
, uint8_t rm
,
400 else if (sz
== BPF_H
)
402 else if (sz
== BPF_W
)
404 else if (sz
== EBPF_DW
)
408 insn
|= 0x1a << 10; /* LSL and S = 0 */
412 emit_insn(ctx
, insn
, check_reg(rt
) || check_reg(rn
) || check_reg(rm
) ||
417 emit_str(struct a64_jit_ctx
*ctx
, uint8_t sz
, uint8_t rt
, uint8_t rn
,
420 emit_ls(ctx
, sz
, rt
, rn
, rm
, 0);
424 emit_ldr(struct a64_jit_ctx
*ctx
, uint8_t sz
, uint8_t rt
, uint8_t rn
,
427 emit_ls(ctx
, sz
, rt
, rn
, rm
, 1);
431 #define A64_SUB 0x258
433 emit_add_sub(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
, uint8_t rn
,
434 uint8_t rm
, uint16_t op
)
438 insn
= (!!is64
) << 31;
439 insn
|= op
<< 21; /* shift == 0 */
444 emit_insn(ctx
, insn
, check_reg(rd
) || check_reg(rm
));
448 emit_add(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
, uint8_t rm
)
450 emit_add_sub(ctx
, is64
, rd
, rd
, rm
, A64_ADD
);
454 emit_sub(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
, uint8_t rm
)
456 emit_add_sub(ctx
, is64
, rd
, rd
, rm
, A64_SUB
);
460 emit_neg(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
)
462 emit_add_sub(ctx
, is64
, rd
, A64_ZR
, rd
, A64_SUB
);
466 emit_mul(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
, uint8_t rm
)
470 insn
= (!!is64
) << 31;
473 insn
|= A64_ZR
<< 10;
477 emit_insn(ctx
, insn
, check_reg(rd
) || check_reg(rm
));
485 emit_data_process_two_src(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
,
486 uint8_t rn
, uint8_t rm
, uint16_t op
)
491 insn
= (!!is64
) << 31;
498 emit_insn(ctx
, insn
, check_reg(rd
) || check_reg(rm
));
502 emit_div(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
, uint8_t rm
)
504 emit_data_process_two_src(ctx
, is64
, rd
, rd
, rm
, A64_UDIV
);
508 emit_lslv(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
, uint8_t rm
)
510 emit_data_process_two_src(ctx
, is64
, rd
, rd
, rm
, A64_LSLV
);
514 emit_lsrv(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
, uint8_t rm
)
516 emit_data_process_two_src(ctx
, is64
, rd
, rd
, rm
, A64_LSRV
);
520 emit_asrv(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
, uint8_t rm
)
522 emit_data_process_two_src(ctx
, is64
, rd
, rd
, rm
, A64_ASRV
);
528 emit_bitfield(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
, uint8_t rn
,
529 uint8_t immr
, uint8_t imms
, uint16_t op
)
534 insn
= (!!is64
) << 31;
536 insn
|= 1 << 22; /* Set N bit when is64 is set */
544 emit_insn(ctx
, insn
, check_reg(rd
) || check_reg(rn
) ||
545 check_immr_imms(is64
, immr
, imms
));
548 emit_lsl(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
, uint8_t imm
)
550 const unsigned int width
= is64
? 64 : 32;
553 immr
= (width
- imm
) & (width
- 1);
554 imms
= width
- 1 - imm
;
556 emit_bitfield(ctx
, is64
, rd
, rd
, immr
, imms
, A64_UBFM
);
560 emit_lsr(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
, uint8_t imm
)
562 emit_bitfield(ctx
, is64
, rd
, rd
, imm
, is64
? 63 : 31, A64_UBFM
);
566 emit_asr(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
, uint8_t imm
)
568 emit_bitfield(ctx
, is64
, rd
, rd
, imm
, is64
? 63 : 31, A64_SBFM
);
575 emit_logical(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
,
576 uint8_t rm
, uint16_t op
)
580 insn
= (!!is64
) << 31;
587 emit_insn(ctx
, insn
, check_reg(rd
) || check_reg(rm
));
591 emit_or(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
, uint8_t rm
)
593 emit_logical(ctx
, is64
, rd
, rm
, A64_OR
);
597 emit_and(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
, uint8_t rm
)
599 emit_logical(ctx
, is64
, rd
, rm
, A64_AND
);
603 emit_xor(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
, uint8_t rm
)
605 emit_logical(ctx
, is64
, rd
, rm
, A64_XOR
);
609 emit_msub(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rd
, uint8_t rn
,
610 uint8_t rm
, uint8_t ra
)
614 insn
= (!!is64
) << 31;
622 emit_insn(ctx
, insn
, check_reg(rd
) || check_reg(rn
) || check_reg(rm
) ||
627 emit_mod(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t tmp
, uint8_t rd
,
630 emit_data_process_two_src(ctx
, is64
, tmp
, rd
, rm
, A64_UDIV
);
631 emit_msub(ctx
, is64
, rd
, tmp
, rm
, rd
);
635 emit_blr(struct a64_jit_ctx
*ctx
, uint8_t rn
)
642 emit_insn(ctx
, insn
, check_reg(rn
));
646 emit_zero_extend(struct a64_jit_ctx
*ctx
, uint8_t rd
, int32_t imm
)
650 /* Zero-extend 16 bits into 64 bits */
651 emit_bitfield(ctx
, 1, rd
, rd
, 0, 15, A64_UBFM
);
654 /* Zero-extend 32 bits into 64 bits */
655 emit_bitfield(ctx
, 1, rd
, rd
, 0, 31, A64_UBFM
);
661 emit_insn(ctx
, 0, 1);
666 emit_rev(struct a64_jit_ctx
*ctx
, uint8_t rd
, int32_t imm
)
677 emit_insn(ctx
, insn
, check_reg(rd
));
678 emit_zero_extend(ctx
, rd
, 16);
682 emit_insn(ctx
, insn
, check_reg(rd
));
683 /* Upper 32 bits already cleared */
687 emit_insn(ctx
, insn
, check_reg(rd
));
691 emit_insn(ctx
, insn
, 1);
698 #if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
706 emit_be(struct a64_jit_ctx
*ctx
, uint8_t rd
, int32_t imm
)
709 emit_zero_extend(ctx
, rd
, imm
);
711 emit_rev(ctx
, rd
, imm
);
715 emit_le(struct a64_jit_ctx
*ctx
, uint8_t rd
, int32_t imm
)
718 emit_rev(ctx
, rd
, imm
);
720 emit_zero_extend(ctx
, rd
, imm
);
724 ebpf_to_a64_reg(struct a64_jit_ctx
*ctx
, uint8_t reg
)
726 const uint32_t ebpf2a64_has_call
[] = {
727 /* Map A64 R7 register as EBPF return register */
728 [EBPF_REG_0
] = A64_R(7),
729 /* Map A64 arguments register as EBPF arguments register */
730 [EBPF_REG_1
] = A64_R(0),
731 [EBPF_REG_2
] = A64_R(1),
732 [EBPF_REG_3
] = A64_R(2),
733 [EBPF_REG_4
] = A64_R(3),
734 [EBPF_REG_5
] = A64_R(4),
735 /* Map A64 callee save register as EBPF callee save register */
736 [EBPF_REG_6
] = A64_R(19),
737 [EBPF_REG_7
] = A64_R(20),
738 [EBPF_REG_8
] = A64_R(21),
739 [EBPF_REG_9
] = A64_R(22),
740 [EBPF_FP
] = A64_R(25),
741 /* Map A64 scratch registers as temporary storage */
742 [TMP_REG_1
] = A64_R(9),
743 [TMP_REG_2
] = A64_R(10),
744 [TMP_REG_3
] = A64_R(11),
747 const uint32_t ebpf2a64_no_call
[] = {
748 /* Map A64 R7 register as EBPF return register */
749 [EBPF_REG_0
] = A64_R(7),
750 /* Map A64 arguments register as EBPF arguments register */
751 [EBPF_REG_1
] = A64_R(0),
752 [EBPF_REG_2
] = A64_R(1),
753 [EBPF_REG_3
] = A64_R(2),
754 [EBPF_REG_4
] = A64_R(3),
755 [EBPF_REG_5
] = A64_R(4),
757 * EBPF program does not have EBPF_CALL op code,
758 * Map A64 scratch registers as EBPF callee save registers.
760 [EBPF_REG_6
] = A64_R(9),
761 [EBPF_REG_7
] = A64_R(10),
762 [EBPF_REG_8
] = A64_R(11),
763 [EBPF_REG_9
] = A64_R(12),
764 /* Map A64 FP register as EBPF FP register */
766 /* Map remaining A64 scratch registers as temporary storage */
767 [TMP_REG_1
] = A64_R(13),
768 [TMP_REG_2
] = A64_R(14),
769 [TMP_REG_3
] = A64_R(15),
773 return ebpf2a64_has_call
[reg
];
775 return ebpf2a64_no_call
[reg
];
779 * Procedure call standard for the arm64
780 * -------------------------------------
781 * R0..R7 - Parameter/result registers
782 * R8 - Indirect result location register
783 * R9..R15 - Scratch registers
784 * R15 - Platform Register
785 * R16 - First intra-procedure-call scratch register
786 * R17 - Second intra-procedure-call temporary register
787 * R19-R28 - Callee saved registers
788 * R29 - Frame pointer
789 * R30 - Link register
790 * R31 - Stack pointer
793 emit_prologue_has_call(struct a64_jit_ctx
*ctx
)
795 uint8_t r6
, r7
, r8
, r9
, fp
;
797 r6
= ebpf_to_a64_reg(ctx
, EBPF_REG_6
);
798 r7
= ebpf_to_a64_reg(ctx
, EBPF_REG_7
);
799 r8
= ebpf_to_a64_reg(ctx
, EBPF_REG_8
);
800 r9
= ebpf_to_a64_reg(ctx
, EBPF_REG_9
);
801 fp
= ebpf_to_a64_reg(ctx
, EBPF_FP
);
804 * eBPF prog stack layout
807 * eBPF prologue 0:+-----+ <= original A64_SP
809 * -16:+-----+ <= current A64_FP
810 * Callee saved registers | ... |
811 * EBPF_FP => -64:+-----+
813 * eBPF prog stack | ... |
815 * (EBPF_FP - bpf->stack_sz)=> +-----+
816 * Pad for A64_SP 16B alignment| PAD |
817 * (EBPF_FP - ctx->stack_sz)=> +-----+ <= current A64_SP
819 * | ... | Function call stack
824 emit_stack_push(ctx
, A64_FP
, A64_LR
);
825 emit_mov_64(ctx
, A64_FP
, A64_SP
);
826 emit_stack_push(ctx
, r6
, r7
);
827 emit_stack_push(ctx
, r8
, r9
);
829 * There is no requirement to save A64_R(28) in stack. Doing it here,
830 * because, A64_SP needs be to 16B aligned and STR vs STP
831 * takes same number of cycles(typically).
833 emit_stack_push(ctx
, fp
, A64_R(28));
834 emit_mov_64(ctx
, fp
, A64_SP
);
836 emit_sub_imm_64(ctx
, A64_SP
, A64_SP
, ctx
->stack_sz
);
840 emit_epilogue_has_call(struct a64_jit_ctx
*ctx
)
842 uint8_t r6
, r7
, r8
, r9
, fp
, r0
;
844 r6
= ebpf_to_a64_reg(ctx
, EBPF_REG_6
);
845 r7
= ebpf_to_a64_reg(ctx
, EBPF_REG_7
);
846 r8
= ebpf_to_a64_reg(ctx
, EBPF_REG_8
);
847 r9
= ebpf_to_a64_reg(ctx
, EBPF_REG_9
);
848 fp
= ebpf_to_a64_reg(ctx
, EBPF_FP
);
849 r0
= ebpf_to_a64_reg(ctx
, EBPF_REG_0
);
852 emit_add_imm_64(ctx
, A64_SP
, A64_SP
, ctx
->stack_sz
);
853 emit_stack_pop(ctx
, fp
, A64_R(28));
854 emit_stack_pop(ctx
, r8
, r9
);
855 emit_stack_pop(ctx
, r6
, r7
);
856 emit_stack_pop(ctx
, A64_FP
, A64_LR
);
857 emit_mov_64(ctx
, A64_R(0), r0
);
862 emit_prologue_no_call(struct a64_jit_ctx
*ctx
)
865 * eBPF prog stack layout without EBPF_CALL opcode
868 * eBPF prologue(EBPF_FP) 0:+-----+ <= original A64_SP/current A64_FP
871 * eBPF prog stack | |
873 * (EBPF_FP - bpf->stack_sz)=> +-----+
874 * Pad for A64_SP 16B alignment| PAD |
875 * (EBPF_FP - ctx->stack_sz)=> +-----+ <= current A64_SP
877 * | ... | Function call stack
883 emit_mov_64(ctx
, A64_FP
, A64_SP
);
884 emit_sub_imm_64(ctx
, A64_SP
, A64_SP
, ctx
->stack_sz
);
889 emit_epilogue_no_call(struct a64_jit_ctx
*ctx
)
892 emit_add_imm_64(ctx
, A64_SP
, A64_SP
, ctx
->stack_sz
);
893 emit_mov_64(ctx
, A64_R(0), ebpf_to_a64_reg(ctx
, EBPF_REG_0
));
898 emit_prologue(struct a64_jit_ctx
*ctx
)
901 emit_prologue_has_call(ctx
);
903 emit_prologue_no_call(ctx
);
905 ctx
->program_start
= ctx
->idx
;
909 emit_epilogue(struct a64_jit_ctx
*ctx
)
911 ctx
->program_sz
= ctx
->idx
- ctx
->program_start
;
914 emit_epilogue_has_call(ctx
);
916 emit_epilogue_no_call(ctx
);
920 emit_call(struct a64_jit_ctx
*ctx
, uint8_t tmp
, void *func
)
922 uint8_t r0
= ebpf_to_a64_reg(ctx
, EBPF_REG_0
);
924 emit_mov_imm(ctx
, 1, tmp
, (uint64_t)func
);
926 emit_mov_64(ctx
, r0
, A64_R(0));
930 emit_cbnz(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rt
, int32_t imm19
)
934 imm
= mask_imm(19, imm19
);
935 insn
= (!!is64
) << 31;
940 emit_insn(ctx
, insn
, check_reg(rt
) || check_imm(19, imm19
));
944 emit_b(struct a64_jit_ctx
*ctx
, int32_t imm26
)
948 imm
= mask_imm(26, imm26
);
952 emit_insn(ctx
, insn
, check_imm(26, imm26
));
956 emit_return_zero_if_src_zero(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t src
)
958 uint8_t r0
= ebpf_to_a64_reg(ctx
, EBPF_REG_0
);
959 uint16_t jump_to_epilogue
;
961 emit_cbnz(ctx
, is64
, src
, 3);
962 emit_mov_imm(ctx
, is64
, r0
, 0);
963 jump_to_epilogue
= (ctx
->program_start
+ ctx
->program_sz
) - ctx
->idx
;
964 emit_b(ctx
, jump_to_epilogue
);
968 emit_stadd(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rs
, uint8_t rn
)
973 insn
|= (!!is64
) << 30;
977 emit_insn(ctx
, insn
, check_reg(rs
) || check_reg(rn
));
981 emit_ldxr(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rt
, uint8_t rn
)
986 insn
|= (!!is64
) << 30;
990 emit_insn(ctx
, insn
, check_reg(rt
) || check_reg(rn
));
994 emit_stxr(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rs
, uint8_t rt
,
1000 insn
|= (!!is64
) << 30;
1005 emit_insn(ctx
, insn
, check_reg(rs
) || check_reg(rt
) || check_reg(rn
));
1013 #if defined(__ARM_FEATURE_ATOMICS) || defined(RTE_ARM_FEATURE_ATOMICS)
1020 emit_xadd(struct a64_jit_ctx
*ctx
, uint8_t op
, uint8_t tmp1
, uint8_t tmp2
,
1021 uint8_t tmp3
, uint8_t dst
, int16_t off
, uint8_t src
)
1023 bool is64
= (BPF_SIZE(op
) == EBPF_DW
);
1027 emit_mov_imm(ctx
, 1, tmp1
, off
);
1028 emit_add(ctx
, 1, tmp1
, dst
);
1034 if (has_atomics()) {
1035 emit_stadd(ctx
, is64
, src
, rn
);
1037 emit_ldxr(ctx
, is64
, tmp2
, rn
);
1038 emit_add(ctx
, is64
, tmp2
, src
);
1039 emit_stxr(ctx
, is64
, tmp3
, tmp2
, rn
);
1040 emit_cbnz(ctx
, is64
, tmp3
, -3);
1044 #define A64_CMP 0x6b00000f
1045 #define A64_TST 0x6a00000f
1047 emit_cmp_tst(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rn
, uint8_t rm
,
1053 insn
|= (!!is64
) << 31;
1057 emit_insn(ctx
, insn
, check_reg(rn
) || check_reg(rm
));
1061 emit_cmp(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rn
, uint8_t rm
)
1063 emit_cmp_tst(ctx
, is64
, rn
, rm
, A64_CMP
);
1067 emit_tst(struct a64_jit_ctx
*ctx
, bool is64
, uint8_t rn
, uint8_t rm
)
1069 emit_cmp_tst(ctx
, is64
, rn
, rm
, A64_TST
);
1073 emit_b_cond(struct a64_jit_ctx
*ctx
, uint8_t cond
, int32_t imm19
)
1077 imm
= mask_imm(19, imm19
);
1082 emit_insn(ctx
, insn
, check_cond(cond
) || check_imm(19, imm19
));
1086 emit_branch(struct a64_jit_ctx
*ctx
, uint8_t op
, uint32_t i
, int16_t off
)
1088 jump_offset_to_branch_update(ctx
, i
);
1089 emit_b_cond(ctx
, ebpf_to_a64_cond(op
), jump_offset_get(ctx
, i
, off
));
1093 check_program_has_call(struct a64_jit_ctx
*ctx
, struct rte_bpf
*bpf
)
1095 const struct ebpf_insn
*ins
;
1099 for (i
= 0; i
!= bpf
->prm
.nb_ins
; i
++) {
1100 ins
= bpf
->prm
.ins
+ i
;
1105 case (BPF_JMP
| EBPF_CALL
):
1113 * Walk through eBPF code and translate them to arm64 one.
1116 emit(struct a64_jit_ctx
*ctx
, struct rte_bpf
*bpf
)
1118 uint8_t op
, dst
, src
, tmp1
, tmp2
, tmp3
;
1119 const struct ebpf_insn
*ins
;
1127 /* Reset context fields */
1129 /* arm64 SP must be aligned to 16 */
1130 ctx
->stack_sz
= RTE_ALIGN_MUL_CEIL(bpf
->stack_sz
, 16);
1131 tmp1
= ebpf_to_a64_reg(ctx
, TMP_REG_1
);
1132 tmp2
= ebpf_to_a64_reg(ctx
, TMP_REG_2
);
1133 tmp3
= ebpf_to_a64_reg(ctx
, TMP_REG_3
);
1137 for (i
= 0; i
!= bpf
->prm
.nb_ins
; i
++) {
1139 jump_offset_update(ctx
, i
);
1140 ins
= bpf
->prm
.ins
+ i
;
1145 dst
= ebpf_to_a64_reg(ctx
, ins
->dst_reg
);
1146 src
= ebpf_to_a64_reg(ctx
, ins
->src_reg
);
1147 is64
= (BPF_CLASS(op
) == EBPF_ALU64
);
1151 case (BPF_ALU
| EBPF_MOV
| BPF_X
):
1152 case (EBPF_ALU64
| EBPF_MOV
| BPF_X
):
1153 emit_mov(ctx
, is64
, dst
, src
);
1156 case (BPF_ALU
| EBPF_MOV
| BPF_K
):
1157 case (EBPF_ALU64
| EBPF_MOV
| BPF_K
):
1158 emit_mov_imm(ctx
, is64
, dst
, imm
);
1161 case (BPF_ALU
| BPF_ADD
| BPF_X
):
1162 case (EBPF_ALU64
| BPF_ADD
| BPF_X
):
1163 emit_add(ctx
, is64
, dst
, src
);
1166 case (BPF_ALU
| BPF_ADD
| BPF_K
):
1167 case (EBPF_ALU64
| BPF_ADD
| BPF_K
):
1168 emit_mov_imm(ctx
, is64
, tmp1
, imm
);
1169 emit_add(ctx
, is64
, dst
, tmp1
);
1172 case (BPF_ALU
| BPF_SUB
| BPF_X
):
1173 case (EBPF_ALU64
| BPF_SUB
| BPF_X
):
1174 emit_sub(ctx
, is64
, dst
, src
);
1177 case (BPF_ALU
| BPF_SUB
| BPF_K
):
1178 case (EBPF_ALU64
| BPF_SUB
| BPF_K
):
1179 emit_mov_imm(ctx
, is64
, tmp1
, imm
);
1180 emit_sub(ctx
, is64
, dst
, tmp1
);
1183 case (BPF_ALU
| BPF_MUL
| BPF_X
):
1184 case (EBPF_ALU64
| BPF_MUL
| BPF_X
):
1185 emit_mul(ctx
, is64
, dst
, src
);
1188 case (BPF_ALU
| BPF_MUL
| BPF_K
):
1189 case (EBPF_ALU64
| BPF_MUL
| BPF_K
):
1190 emit_mov_imm(ctx
, is64
, tmp1
, imm
);
1191 emit_mul(ctx
, is64
, dst
, tmp1
);
1194 case (BPF_ALU
| BPF_DIV
| BPF_X
):
1195 case (EBPF_ALU64
| BPF_DIV
| BPF_X
):
1196 emit_return_zero_if_src_zero(ctx
, is64
, src
);
1197 emit_div(ctx
, is64
, dst
, src
);
1200 case (BPF_ALU
| BPF_DIV
| BPF_K
):
1201 case (EBPF_ALU64
| BPF_DIV
| BPF_K
):
1202 emit_mov_imm(ctx
, is64
, tmp1
, imm
);
1203 emit_div(ctx
, is64
, dst
, tmp1
);
1206 case (BPF_ALU
| BPF_MOD
| BPF_X
):
1207 case (EBPF_ALU64
| BPF_MOD
| BPF_X
):
1208 emit_return_zero_if_src_zero(ctx
, is64
, src
);
1209 emit_mod(ctx
, is64
, tmp1
, dst
, src
);
1212 case (BPF_ALU
| BPF_MOD
| BPF_K
):
1213 case (EBPF_ALU64
| BPF_MOD
| BPF_K
):
1214 emit_mov_imm(ctx
, is64
, tmp1
, imm
);
1215 emit_mod(ctx
, is64
, tmp2
, dst
, tmp1
);
1218 case (BPF_ALU
| BPF_OR
| BPF_X
):
1219 case (EBPF_ALU64
| BPF_OR
| BPF_X
):
1220 emit_or(ctx
, is64
, dst
, src
);
1223 case (BPF_ALU
| BPF_OR
| BPF_K
):
1224 case (EBPF_ALU64
| BPF_OR
| BPF_K
):
1225 emit_mov_imm(ctx
, is64
, tmp1
, imm
);
1226 emit_or(ctx
, is64
, dst
, tmp1
);
1229 case (BPF_ALU
| BPF_AND
| BPF_X
):
1230 case (EBPF_ALU64
| BPF_AND
| BPF_X
):
1231 emit_and(ctx
, is64
, dst
, src
);
1234 case (BPF_ALU
| BPF_AND
| BPF_K
):
1235 case (EBPF_ALU64
| BPF_AND
| BPF_K
):
1236 emit_mov_imm(ctx
, is64
, tmp1
, imm
);
1237 emit_and(ctx
, is64
, dst
, tmp1
);
1240 case (BPF_ALU
| BPF_XOR
| BPF_X
):
1241 case (EBPF_ALU64
| BPF_XOR
| BPF_X
):
1242 emit_xor(ctx
, is64
, dst
, src
);
1245 case (BPF_ALU
| BPF_XOR
| BPF_K
):
1246 case (EBPF_ALU64
| BPF_XOR
| BPF_K
):
1247 emit_mov_imm(ctx
, is64
, tmp1
, imm
);
1248 emit_xor(ctx
, is64
, dst
, tmp1
);
1251 case (BPF_ALU
| BPF_NEG
):
1252 case (EBPF_ALU64
| BPF_NEG
):
1253 emit_neg(ctx
, is64
, dst
);
1256 case BPF_ALU
| BPF_LSH
| BPF_X
:
1257 case EBPF_ALU64
| BPF_LSH
| BPF_X
:
1258 emit_lslv(ctx
, is64
, dst
, src
);
1261 case BPF_ALU
| BPF_LSH
| BPF_K
:
1262 case EBPF_ALU64
| BPF_LSH
| BPF_K
:
1263 emit_lsl(ctx
, is64
, dst
, imm
);
1266 case BPF_ALU
| BPF_RSH
| BPF_X
:
1267 case EBPF_ALU64
| BPF_RSH
| BPF_X
:
1268 emit_lsrv(ctx
, is64
, dst
, src
);
1271 case BPF_ALU
| BPF_RSH
| BPF_K
:
1272 case EBPF_ALU64
| BPF_RSH
| BPF_K
:
1273 emit_lsr(ctx
, is64
, dst
, imm
);
1275 /* dst >>= src (arithmetic) */
1276 case BPF_ALU
| EBPF_ARSH
| BPF_X
:
1277 case EBPF_ALU64
| EBPF_ARSH
| BPF_X
:
1278 emit_asrv(ctx
, is64
, dst
, src
);
1280 /* dst >>= imm (arithmetic) */
1281 case BPF_ALU
| EBPF_ARSH
| BPF_K
:
1282 case EBPF_ALU64
| EBPF_ARSH
| BPF_K
:
1283 emit_asr(ctx
, is64
, dst
, imm
);
1285 /* dst = be##imm(dst) */
1286 case (BPF_ALU
| EBPF_END
| EBPF_TO_BE
):
1287 emit_be(ctx
, dst
, imm
);
1289 /* dst = le##imm(dst) */
1290 case (BPF_ALU
| EBPF_END
| EBPF_TO_LE
):
1291 emit_le(ctx
, dst
, imm
);
1293 /* dst = *(size *) (src + off) */
1294 case (BPF_LDX
| BPF_MEM
| BPF_B
):
1295 case (BPF_LDX
| BPF_MEM
| BPF_H
):
1296 case (BPF_LDX
| BPF_MEM
| BPF_W
):
1297 case (BPF_LDX
| BPF_MEM
| EBPF_DW
):
1298 emit_mov_imm(ctx
, 1, tmp1
, off
);
1299 emit_ldr(ctx
, BPF_SIZE(op
), dst
, src
, tmp1
);
1302 case (BPF_LD
| BPF_IMM
| EBPF_DW
):
1303 u64
= ((uint64_t)ins
[1].imm
<< 32) | (uint32_t)imm
;
1304 emit_mov_imm(ctx
, 1, dst
, u64
);
1307 /* *(size *)(dst + off) = src */
1308 case (BPF_STX
| BPF_MEM
| BPF_B
):
1309 case (BPF_STX
| BPF_MEM
| BPF_H
):
1310 case (BPF_STX
| BPF_MEM
| BPF_W
):
1311 case (BPF_STX
| BPF_MEM
| EBPF_DW
):
1312 emit_mov_imm(ctx
, 1, tmp1
, off
);
1313 emit_str(ctx
, BPF_SIZE(op
), src
, dst
, tmp1
);
1315 /* *(size *)(dst + off) = imm */
1316 case (BPF_ST
| BPF_MEM
| BPF_B
):
1317 case (BPF_ST
| BPF_MEM
| BPF_H
):
1318 case (BPF_ST
| BPF_MEM
| BPF_W
):
1319 case (BPF_ST
| BPF_MEM
| EBPF_DW
):
1320 emit_mov_imm(ctx
, 1, tmp1
, imm
);
1321 emit_mov_imm(ctx
, 1, tmp2
, off
);
1322 emit_str(ctx
, BPF_SIZE(op
), tmp1
, dst
, tmp2
);
1324 /* STX XADD: lock *(size *)(dst + off) += src */
1325 case (BPF_STX
| EBPF_XADD
| BPF_W
):
1326 case (BPF_STX
| EBPF_XADD
| EBPF_DW
):
1327 emit_xadd(ctx
, op
, tmp1
, tmp2
, tmp3
, dst
, off
, src
);
1330 case (BPF_JMP
| BPF_JA
):
1331 emit_b(ctx
, jump_offset_get(ctx
, i
, off
));
1333 /* PC += off if dst COND imm */
1334 case (BPF_JMP
| BPF_JEQ
| BPF_K
):
1335 case (BPF_JMP
| EBPF_JNE
| BPF_K
):
1336 case (BPF_JMP
| BPF_JGT
| BPF_K
):
1337 case (BPF_JMP
| EBPF_JLT
| BPF_K
):
1338 case (BPF_JMP
| BPF_JGE
| BPF_K
):
1339 case (BPF_JMP
| EBPF_JLE
| BPF_K
):
1340 case (BPF_JMP
| EBPF_JSGT
| BPF_K
):
1341 case (BPF_JMP
| EBPF_JSLT
| BPF_K
):
1342 case (BPF_JMP
| EBPF_JSGE
| BPF_K
):
1343 case (BPF_JMP
| EBPF_JSLE
| BPF_K
):
1344 emit_mov_imm(ctx
, 1, tmp1
, imm
);
1345 emit_cmp(ctx
, 1, dst
, tmp1
);
1346 emit_branch(ctx
, op
, i
, off
);
1348 case (BPF_JMP
| BPF_JSET
| BPF_K
):
1349 emit_mov_imm(ctx
, 1, tmp1
, imm
);
1350 emit_tst(ctx
, 1, dst
, tmp1
);
1351 emit_branch(ctx
, op
, i
, off
);
1353 /* PC += off if dst COND src */
1354 case (BPF_JMP
| BPF_JEQ
| BPF_X
):
1355 case (BPF_JMP
| EBPF_JNE
| BPF_X
):
1356 case (BPF_JMP
| BPF_JGT
| BPF_X
):
1357 case (BPF_JMP
| EBPF_JLT
| BPF_X
):
1358 case (BPF_JMP
| BPF_JGE
| BPF_X
):
1359 case (BPF_JMP
| EBPF_JLE
| BPF_X
):
1360 case (BPF_JMP
| EBPF_JSGT
| BPF_X
):
1361 case (BPF_JMP
| EBPF_JSLT
| BPF_X
):
1362 case (BPF_JMP
| EBPF_JSGE
| BPF_X
):
1363 case (BPF_JMP
| EBPF_JSLE
| BPF_X
):
1364 emit_cmp(ctx
, 1, dst
, src
);
1365 emit_branch(ctx
, op
, i
, off
);
1367 case (BPF_JMP
| BPF_JSET
| BPF_X
):
1368 emit_tst(ctx
, 1, dst
, src
);
1369 emit_branch(ctx
, op
, i
, off
);
1372 case (BPF_JMP
| EBPF_CALL
):
1373 emit_call(ctx
, tmp1
, bpf
->prm
.xsym
[ins
->imm
].func
.val
);
1376 case (BPF_JMP
| EBPF_EXIT
):
1381 "%s(%p): invalid opcode %#x at pc: %u;\n",
1382 __func__
, bpf
, ins
->code
, i
);
1386 rc
= check_invalid_args(ctx
, ctx
->idx
);
1392 * Produce a native ISA version of the given BPF code.
1395 bpf_jit_arm64(struct rte_bpf
*bpf
)
1397 struct a64_jit_ctx ctx
;
1401 /* Init JIT context */
1402 memset(&ctx
, 0, sizeof(ctx
));
1404 /* Initialize the memory for eBPF to a64 insn offset map for jump */
1405 rc
= jump_offset_init(&ctx
, bpf
);
1409 /* Find eBPF program has call class or not */
1410 check_program_has_call(&ctx
, bpf
);
1412 /* First pass to calculate total code size and valid jump offsets */
1413 rc
= emit(&ctx
, bpf
);
1417 size
= ctx
.idx
* sizeof(uint32_t);
1418 /* Allocate JIT program memory */
1419 ctx
.ins
= mmap(NULL
, size
, PROT_READ
| PROT_WRITE
,
1420 MAP_PRIVATE
| MAP_ANONYMOUS
, -1, 0);
1421 if (ctx
.ins
== MAP_FAILED
) {
1426 /* Second pass to generate code */
1427 rc
= emit(&ctx
, bpf
);
1431 rc
= mprotect(ctx
.ins
, size
, PROT_READ
| PROT_EXEC
) != 0;
1437 /* Flush the icache */
1438 __builtin___clear_cache((char *)ctx
.ins
, (char *)(ctx
.ins
+ ctx
.idx
));
1440 bpf
->jit
.func
= (void *)ctx
.ins
;
1446 munmap(ctx
.ins
, size
);
1448 jump_offset_fini(&ctx
);