]>
Commit | Line | Data |
---|---|---|
b886d83c | 1 | // SPDX-License-Identifier: GPL-2.0-only |
ddecdfce | 2 | /* |
39c13c20 | 3 | * Just-In-Time compiler for eBPF filters on 32bit ARM |
ddecdfce | 4 | * |
39c13c20 | 5 | * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> |
ddecdfce | 6 | * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> |
ddecdfce MG |
7 | */ |
8 | ||
39c13c20 | 9 | #include <linux/bpf.h> |
ddecdfce MG |
10 | #include <linux/bitops.h> |
11 | #include <linux/compiler.h> | |
12 | #include <linux/errno.h> | |
13 | #include <linux/filter.h> | |
ddecdfce MG |
14 | #include <linux/netdevice.h> |
15 | #include <linux/string.h> | |
16 | #include <linux/slab.h> | |
bf0098f2 | 17 | #include <linux/if_vlan.h> |
e8b56d55 | 18 | |
ddecdfce MG |
19 | #include <asm/cacheflush.h> |
20 | #include <asm/hwcap.h> | |
3460743e | 21 | #include <asm/opcodes.h> |
8c9602d3 | 22 | #include <asm/system_info.h> |
ddecdfce MG |
23 | |
24 | #include "bpf_jit_32.h" | |
25 | ||
70ec3a6c | 26 | /* |
0005e55a | 27 | * eBPF prog stack layout: |
70ec3a6c RK |
28 | * |
29 | * high | |
0005e55a RK |
30 | * original ARM_SP => +-----+ |
31 | * | | callee saved registers | |
32 | * +-----+ <= (BPF_FP + SCRATCH_SIZE) | |
70ec3a6c | 33 | * | ... | eBPF JIT scratch space |
0005e55a RK |
34 | * eBPF fp register => +-----+ |
35 | * (BPF_FP) | ... | eBPF prog stack | |
70ec3a6c RK |
36 | * +-----+ |
37 | * |RSVD | JIT scratchpad | |
0005e55a | 38 | * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE) |
70ec3a6c RK |
39 | * | | |
40 | * | ... | Function call stack | |
41 | * | | | |
42 | * +-----+ | |
43 | * low | |
0005e55a RK |
44 | * |
45 | * The callee saved registers depends on whether frame pointers are enabled. | |
46 | * With frame pointers (to be compliant with the ABI): | |
47 | * | |
bef8968d RK |
48 | * high |
49 | * original ARM_SP => +--------------+ \ | |
50 | * | pc | | | |
51 | * current ARM_FP => +--------------+ } callee saved registers | |
52 | * |r4-r9,fp,ip,lr| | | |
53 | * +--------------+ / | |
54 | * low | |
0005e55a RK |
55 | * |
56 | * Without frame pointers: | |
57 | * | |
bef8968d RK |
58 | * high |
59 | * original ARM_SP => +--------------+ | |
60 | * | r4-r9,fp,lr | callee saved registers | |
61 | * current ARM_FP => +--------------+ | |
62 | * low | |
02088d9b RK |
63 | * |
64 | * When popping registers off the stack at the end of a BPF function, we | |
65 | * reference them via the current ARM_FP register. | |
70ec3a6c | 66 | */ |
02088d9b | 67 | #define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \ |
bef8968d | 68 | 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R9 | \ |
02088d9b RK |
69 | 1 << ARM_FP) |
70 | #define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) | |
71 | #define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC) | |
70ec3a6c | 72 | |
d449ceb1 RK |
73 | enum { |
74 | /* Stack layout - these are offsets from (top of stack - 4) */ | |
75 | BPF_R2_HI, | |
76 | BPF_R2_LO, | |
77 | BPF_R3_HI, | |
78 | BPF_R3_LO, | |
79 | BPF_R4_HI, | |
80 | BPF_R4_LO, | |
81 | BPF_R5_HI, | |
82 | BPF_R5_LO, | |
83 | BPF_R7_HI, | |
84 | BPF_R7_LO, | |
85 | BPF_R8_HI, | |
86 | BPF_R8_LO, | |
87 | BPF_R9_HI, | |
88 | BPF_R9_LO, | |
89 | BPF_FP_HI, | |
90 | BPF_FP_LO, | |
91 | BPF_TC_HI, | |
92 | BPF_TC_LO, | |
93 | BPF_AX_HI, | |
94 | BPF_AX_LO, | |
95 | /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4, | |
96 | * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9, | |
97 | * BPF_REG_FP and Tail call counts. | |
98 | */ | |
99 | BPF_JIT_SCRATCH_REGS, | |
100 | }; | |
101 | ||
1c35ba12 RK |
102 | /* |
103 | * Negative "register" values indicate the register is stored on the stack | |
104 | * and are the offset from the top of the eBPF JIT scratch space. | |
105 | */ | |
106 | #define STACK_OFFSET(k) (-4 - (k) * 4) | |
d449ceb1 RK |
107 | #define SCRATCH_SIZE (BPF_JIT_SCRATCH_REGS * 4) |
108 | ||
96cced4e RK |
109 | #ifdef CONFIG_FRAME_POINTER |
110 | #define EBPF_SCRATCH_TO_ARM_FP(x) ((x) - 4 * hweight16(CALLEE_PUSH_MASK) - 4) | |
111 | #else | |
112 | #define EBPF_SCRATCH_TO_ARM_FP(x) (x) | |
113 | #endif | |
114 | ||
39c13c20 SB |
115 | #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ |
116 | #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */ | |
117 | #define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */ | |
118 | ||
39c13c20 SB |
119 | #define FLAG_IMM_OVERFLOW (1 << 0) |
120 | ||
ddecdfce | 121 | /* |
39c13c20 SB |
122 | * Map eBPF registers to ARM 32bit registers or stack scratch space. |
123 | * | |
124 | * 1. First argument is passed using the arm 32bit registers and rest of the | |
125 | * arguments are passed on stack scratch space. | |
2b589a7e | 126 | * 2. First callee-saved argument is mapped to arm 32 bit registers and rest |
39c13c20 SB |
127 | * arguments are mapped to scratch space on stack. |
128 | * 3. We need two 64 bit temp registers to do complex operations on eBPF | |
129 | * registers. | |
130 | * | |
131 | * As the eBPF registers are all 64 bit registers and arm has only 32 bit | |
132 | * registers, we have to map each eBPF registers with two arm 32 bit regs or | |
133 | * scratch memory space and we have to build eBPF 64 bit register from those. | |
ddecdfce | 134 | * |
ddecdfce | 135 | */ |
1c35ba12 | 136 | static const s8 bpf2a32[][2] = { |
39c13c20 SB |
137 | /* return value from in-kernel function, and exit value from eBPF */ |
138 | [BPF_REG_0] = {ARM_R1, ARM_R0}, | |
139 | /* arguments from eBPF program to in-kernel function */ | |
140 | [BPF_REG_1] = {ARM_R3, ARM_R2}, | |
141 | /* Stored on stack scratch space */ | |
d449ceb1 RK |
142 | [BPF_REG_2] = {STACK_OFFSET(BPF_R2_HI), STACK_OFFSET(BPF_R2_LO)}, |
143 | [BPF_REG_3] = {STACK_OFFSET(BPF_R3_HI), STACK_OFFSET(BPF_R3_LO)}, | |
144 | [BPF_REG_4] = {STACK_OFFSET(BPF_R4_HI), STACK_OFFSET(BPF_R4_LO)}, | |
145 | [BPF_REG_5] = {STACK_OFFSET(BPF_R5_HI), STACK_OFFSET(BPF_R5_LO)}, | |
39c13c20 SB |
146 | /* callee saved registers that in-kernel function will preserve */ |
147 | [BPF_REG_6] = {ARM_R5, ARM_R4}, | |
148 | /* Stored on stack scratch space */ | |
d449ceb1 RK |
149 | [BPF_REG_7] = {STACK_OFFSET(BPF_R7_HI), STACK_OFFSET(BPF_R7_LO)}, |
150 | [BPF_REG_8] = {STACK_OFFSET(BPF_R8_HI), STACK_OFFSET(BPF_R8_LO)}, | |
151 | [BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)}, | |
39c13c20 | 152 | /* Read only Frame Pointer to access Stack */ |
d449ceb1 | 153 | [BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)}, |
39c13c20 SB |
154 | /* Temporary Register for internal BPF JIT, can be used |
155 | * for constant blindings and others. | |
156 | */ | |
157 | [TMP_REG_1] = {ARM_R7, ARM_R6}, | |
bef8968d | 158 | [TMP_REG_2] = {ARM_R9, ARM_R8}, |
39c13c20 | 159 | /* Tail call count. Stored on stack scratch space. */ |
d449ceb1 | 160 | [TCALL_CNT] = {STACK_OFFSET(BPF_TC_HI), STACK_OFFSET(BPF_TC_LO)}, |
39c13c20 SB |
161 | /* temporary register for blinding constants. |
162 | * Stored on stack scratch space. | |
163 | */ | |
d449ceb1 | 164 | [BPF_REG_AX] = {STACK_OFFSET(BPF_AX_HI), STACK_OFFSET(BPF_AX_LO)}, |
39c13c20 | 165 | }; |
ddecdfce | 166 | |
39c13c20 SB |
167 | #define dst_lo dst[1] |
168 | #define dst_hi dst[0] | |
169 | #define src_lo src[1] | |
170 | #define src_hi src[0] | |
ddecdfce | 171 | |
39c13c20 SB |
172 | /* |
173 | * JIT Context: | |
174 | * | |
175 | * prog : bpf_prog | |
176 | * idx : index of current last JITed instruction. | |
177 | * prologue_bytes : bytes used in prologue. | |
178 | * epilogue_offset : offset of epilogue starting. | |
39c13c20 SB |
179 | * offsets : array of eBPF instruction offsets in |
180 | * JITed code. | |
181 | * target : final JITed code. | |
182 | * epilogue_bytes : no of bytes used in epilogue. | |
183 | * imm_count : no of immediate counts used for global | |
184 | * variables. | |
185 | * imms : array of global variable addresses. | |
186 | */ | |
ddecdfce MG |
187 | |
188 | struct jit_ctx { | |
39c13c20 SB |
189 | const struct bpf_prog *prog; |
190 | unsigned int idx; | |
191 | unsigned int prologue_bytes; | |
192 | unsigned int epilogue_offset; | |
8c9602d3 | 193 | unsigned int cpu_architecture; |
ddecdfce MG |
194 | u32 flags; |
195 | u32 *offsets; | |
196 | u32 *target; | |
39c13c20 | 197 | u32 stack_size; |
ddecdfce MG |
198 | #if __LINUX_ARM_ARCH__ < 7 |
199 | u16 epilogue_bytes; | |
200 | u16 imm_count; | |
201 | u32 *imms; | |
202 | #endif | |
203 | }; | |
204 | ||
ddecdfce | 205 | /* |
4560cdff | 206 | * Wrappers which handle both OABI and EABI and assures Thumb2 interworking |
ddecdfce MG |
207 | * (where the assembly routines like __aeabi_uidiv could cause problems). |
208 | */ | |
39c13c20 | 209 | static u32 jit_udiv32(u32 dividend, u32 divisor) |
ddecdfce MG |
210 | { |
211 | return dividend / divisor; | |
212 | } | |
213 | ||
39c13c20 | 214 | static u32 jit_mod32(u32 dividend, u32 divisor) |
4560cdff NS |
215 | { |
216 | return dividend % divisor; | |
217 | } | |
218 | ||
ddecdfce MG |
219 | static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) |
220 | { | |
3460743e BD |
221 | inst |= (cond << 28); |
222 | inst = __opcode_to_mem_arm(inst); | |
223 | ||
ddecdfce | 224 | if (ctx->target != NULL) |
3460743e | 225 | ctx->target[ctx->idx] = inst; |
ddecdfce MG |
226 | |
227 | ctx->idx++; | |
228 | } | |
229 | ||
230 | /* | |
231 | * Emit an instruction that will be executed unconditionally. | |
232 | */ | |
233 | static inline void emit(u32 inst, struct jit_ctx *ctx) | |
234 | { | |
235 | _emit(ARM_COND_AL, inst, ctx); | |
236 | } | |
237 | ||
1ca3b17b RK |
238 | /* |
239 | * This is rather horrid, but necessary to convert an integer constant | |
240 | * to an immediate operand for the opcodes, and be able to detect at | |
241 | * build time whether the constant can't be converted (iow, usable in | |
242 | * BUILD_BUG_ON()). | |
243 | */ | |
244 | #define imm12val(v, s) (rol32(v, (s)) | (s) << 7) | |
245 | #define const_imm8m(x) \ | |
246 | ({ int r; \ | |
247 | u32 v = (x); \ | |
248 | if (!(v & ~0x000000ff)) \ | |
249 | r = imm12val(v, 0); \ | |
250 | else if (!(v & ~0xc000003f)) \ | |
251 | r = imm12val(v, 2); \ | |
252 | else if (!(v & ~0xf000000f)) \ | |
253 | r = imm12val(v, 4); \ | |
254 | else if (!(v & ~0xfc000003)) \ | |
255 | r = imm12val(v, 6); \ | |
256 | else if (!(v & ~0xff000000)) \ | |
257 | r = imm12val(v, 8); \ | |
258 | else if (!(v & ~0x3fc00000)) \ | |
259 | r = imm12val(v, 10); \ | |
260 | else if (!(v & ~0x0ff00000)) \ | |
261 | r = imm12val(v, 12); \ | |
262 | else if (!(v & ~0x03fc0000)) \ | |
263 | r = imm12val(v, 14); \ | |
264 | else if (!(v & ~0x00ff0000)) \ | |
265 | r = imm12val(v, 16); \ | |
266 | else if (!(v & ~0x003fc000)) \ | |
267 | r = imm12val(v, 18); \ | |
268 | else if (!(v & ~0x000ff000)) \ | |
269 | r = imm12val(v, 20); \ | |
270 | else if (!(v & ~0x0003fc00)) \ | |
271 | r = imm12val(v, 22); \ | |
272 | else if (!(v & ~0x0000ff00)) \ | |
273 | r = imm12val(v, 24); \ | |
274 | else if (!(v & ~0x00003fc0)) \ | |
275 | r = imm12val(v, 26); \ | |
276 | else if (!(v & ~0x00000ff0)) \ | |
277 | r = imm12val(v, 28); \ | |
278 | else if (!(v & ~0x000003fc)) \ | |
279 | r = imm12val(v, 30); \ | |
280 | else \ | |
281 | r = -1; \ | |
282 | r; }) | |
283 | ||
39c13c20 SB |
284 | /* |
285 | * Checks if immediate value can be converted to imm12(12 bits) value. | |
286 | */ | |
1ca3b17b | 287 | static int imm8m(u32 x) |
ddecdfce | 288 | { |
39c13c20 | 289 | u32 rot; |
ddecdfce | 290 | |
39c13c20 SB |
291 | for (rot = 0; rot < 16; rot++) |
292 | if ((x & ~ror32(0xff, 2 * rot)) == 0) | |
293 | return rol32(x, 2 * rot) | (rot << 8); | |
294 | return -1; | |
ddecdfce MG |
295 | } |
296 | ||
1ca3b17b RK |
297 | #define imm8m(x) (__builtin_constant_p(x) ? const_imm8m(x) : imm8m(x)) |
298 | ||
a8ef95a0 RK |
299 | static u32 arm_bpf_ldst_imm12(u32 op, u8 rt, u8 rn, s16 imm12) |
300 | { | |
301 | op |= rt << 12 | rn << 16; | |
302 | if (imm12 >= 0) | |
303 | op |= ARM_INST_LDST__U; | |
304 | else | |
305 | imm12 = -imm12; | |
828e2b90 | 306 | return op | (imm12 & ARM_INST_LDST__IMM12); |
a8ef95a0 RK |
307 | } |
308 | ||
309 | static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8) | |
310 | { | |
311 | op |= rt << 12 | rn << 16; | |
312 | if (imm8 >= 0) | |
313 | op |= ARM_INST_LDST__U; | |
314 | else | |
315 | imm8 = -imm8; | |
316 | return op | (imm8 & 0xf0) << 4 | (imm8 & 0x0f); | |
317 | } | |
318 | ||
319 | #define ARM_LDR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDR_I, rt, rn, off) | |
320 | #define ARM_LDRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDRB_I, rt, rn, off) | |
8c9602d3 | 321 | #define ARM_LDRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off) |
a8ef95a0 RK |
322 | #define ARM_LDRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off) |
323 | ||
324 | #define ARM_STR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off) | |
325 | #define ARM_STRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off) | |
8c9602d3 | 326 | #define ARM_STRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off) |
a8ef95a0 RK |
327 | #define ARM_STRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRH_I, rt, rn, off) |
328 | ||
39c13c20 SB |
329 | /* |
330 | * Initializes the JIT space with undefined instructions. | |
331 | */ | |
55309dd3 DB |
332 | static void jit_fill_hole(void *area, unsigned int size) |
333 | { | |
e8b56d55 | 334 | u32 *ptr; |
55309dd3 DB |
335 | /* We are guaranteed to have aligned memory. */ |
336 | for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) | |
e8b56d55 | 337 | *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF); |
55309dd3 DB |
338 | } |
339 | ||
d1220efd RK |
340 | #if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) |
341 | /* EABI requires the stack to be aligned to 64-bit boundaries */ | |
342 | #define STACK_ALIGNMENT 8 | |
343 | #else | |
344 | /* Stack must be aligned to 32-bit boundaries */ | |
345 | #define STACK_ALIGNMENT 4 | |
346 | #endif | |
ddecdfce | 347 | |
39c13c20 | 348 | /* total stack size used in JITed code */ |
38ca9306 DB |
349 | #define _STACK_SIZE (ctx->prog->aux->stack_depth + SCRATCH_SIZE) |
350 | #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) | |
ddecdfce | 351 | |
ddecdfce MG |
352 | #if __LINUX_ARM_ARCH__ < 7 |
353 | ||
354 | static u16 imm_offset(u32 k, struct jit_ctx *ctx) | |
355 | { | |
39c13c20 | 356 | unsigned int i = 0, offset; |
ddecdfce MG |
357 | u16 imm; |
358 | ||
359 | /* on the "fake" run we just count them (duplicates included) */ | |
360 | if (ctx->target == NULL) { | |
361 | ctx->imm_count++; | |
362 | return 0; | |
363 | } | |
364 | ||
365 | while ((i < ctx->imm_count) && ctx->imms[i]) { | |
366 | if (ctx->imms[i] == k) | |
367 | break; | |
368 | i++; | |
369 | } | |
370 | ||
371 | if (ctx->imms[i] == 0) | |
372 | ctx->imms[i] = k; | |
373 | ||
374 | /* constants go just after the epilogue */ | |
39c13c20 | 375 | offset = ctx->offsets[ctx->prog->len - 1] * 4; |
ddecdfce MG |
376 | offset += ctx->prologue_bytes; |
377 | offset += ctx->epilogue_bytes; | |
378 | offset += i * 4; | |
379 | ||
380 | ctx->target[offset / 4] = k; | |
381 | ||
382 | /* PC in ARM mode == address of the instruction + 8 */ | |
383 | imm = offset - (8 + ctx->idx * 4); | |
384 | ||
0b59d880 NS |
385 | if (imm & ~0xfff) { |
386 | /* | |
387 | * literal pool is too far, signal it into flags. we | |
388 | * can only detect it on the second pass unfortunately. | |
389 | */ | |
390 | ctx->flags |= FLAG_IMM_OVERFLOW; | |
391 | return 0; | |
392 | } | |
393 | ||
ddecdfce MG |
394 | return imm; |
395 | } | |
396 | ||
397 | #endif /* __LINUX_ARM_ARCH__ */ | |
398 | ||
39c13c20 SB |
399 | static inline int bpf2a32_offset(int bpf_to, int bpf_from, |
400 | const struct jit_ctx *ctx) { | |
401 | int to, from; | |
402 | ||
403 | if (ctx->target == NULL) | |
404 | return 0; | |
405 | to = ctx->offsets[bpf_to]; | |
406 | from = ctx->offsets[bpf_from]; | |
407 | ||
408 | return to - from - 1; | |
409 | } | |
410 | ||
ddecdfce MG |
411 | /* |
412 | * Move an immediate that's not an imm8m to a core register. | |
413 | */ | |
39c13c20 | 414 | static inline void emit_mov_i_no8m(const u8 rd, u32 val, struct jit_ctx *ctx) |
ddecdfce MG |
415 | { |
416 | #if __LINUX_ARM_ARCH__ < 7 | |
417 | emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx); | |
418 | #else | |
419 | emit(ARM_MOVW(rd, val & 0xffff), ctx); | |
420 | if (val > 0xffff) | |
421 | emit(ARM_MOVT(rd, val >> 16), ctx); | |
422 | #endif | |
423 | } | |
424 | ||
39c13c20 | 425 | static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx) |
ddecdfce MG |
426 | { |
427 | int imm12 = imm8m(val); | |
428 | ||
429 | if (imm12 >= 0) | |
430 | emit(ARM_MOV_I(rd, imm12), ctx); | |
431 | else | |
432 | emit_mov_i_no8m(rd, val, ctx); | |
433 | } | |
434 | ||
e9062481 | 435 | static void emit_bx_r(u8 tgt_reg, struct jit_ctx *ctx) |
ddecdfce | 436 | { |
39c13c20 SB |
437 | if (elf_hwcap & HWCAP_THUMB) |
438 | emit(ARM_BX(tgt_reg), ctx); | |
439 | else | |
440 | emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx); | |
e9062481 RK |
441 | } |
442 | ||
443 | static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) | |
444 | { | |
e9062481 RK |
445 | #if __LINUX_ARM_ARCH__ < 5 |
446 | emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); | |
447 | emit_bx_r(tgt_reg, ctx); | |
39c13c20 SB |
448 | #else |
449 | emit(ARM_BLX_R(tgt_reg), ctx); | |
450 | #endif | |
ddecdfce MG |
451 | } |
452 | ||
39c13c20 | 453 | static inline int epilogue_offset(const struct jit_ctx *ctx) |
ddecdfce | 454 | { |
39c13c20 SB |
455 | int to, from; |
456 | /* No need for 1st dummy run */ | |
457 | if (ctx->target == NULL) | |
458 | return 0; | |
459 | to = ctx->epilogue_offset; | |
460 | from = ctx->idx; | |
461 | ||
462 | return to - from - 2; | |
ddecdfce MG |
463 | } |
464 | ||
39c13c20 | 465 | static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) |
ddecdfce | 466 | { |
1c35ba12 | 467 | const s8 *tmp = bpf2a32[TMP_REG_1]; |
39c13c20 | 468 | |
39c13c20 SB |
469 | #if __LINUX_ARM_ARCH__ == 7 |
470 | if (elf_hwcap & HWCAP_IDIVA) { | |
471 | if (op == BPF_DIV) | |
472 | emit(ARM_UDIV(rd, rm, rn), ctx); | |
473 | else { | |
474 | emit(ARM_UDIV(ARM_IP, rm, rn), ctx); | |
475 | emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx); | |
476 | } | |
477 | return; | |
478 | } | |
479 | #endif | |
462738f4 NS |
480 | |
481 | /* | |
39c13c20 SB |
482 | * For BPF_ALU | BPF_DIV | BPF_K instructions |
483 | * As ARM_R1 and ARM_R0 contains 1st argument of bpf | |
484 | * function, we need to save it on caller side to save | |
485 | * it from getting destroyed within callee. | |
486 | * After the return from the callee, we restore ARM_R0 | |
487 | * ARM_R1. | |
462738f4 | 488 | */ |
39c13c20 SB |
489 | if (rn != ARM_R1) { |
490 | emit(ARM_MOV_R(tmp[0], ARM_R1), ctx); | |
491 | emit(ARM_MOV_R(ARM_R1, rn), ctx); | |
492 | } | |
493 | if (rm != ARM_R0) { | |
494 | emit(ARM_MOV_R(tmp[1], ARM_R0), ctx); | |
495 | emit(ARM_MOV_R(ARM_R0, rm), ctx); | |
496 | } | |
ddecdfce | 497 | |
39c13c20 | 498 | /* Call appropriate function */ |
39c13c20 SB |
499 | emit_mov_i(ARM_IP, op == BPF_DIV ? |
500 | (u32)jit_udiv32 : (u32)jit_mod32, ctx); | |
501 | emit_blx_r(ARM_IP, ctx); | |
ddecdfce | 502 | |
39c13c20 SB |
503 | /* Save return value */ |
504 | if (rd != ARM_R0) | |
505 | emit(ARM_MOV_R(rd, ARM_R0), ctx); | |
506 | ||
507 | /* Restore ARM_R0 and ARM_R1 */ | |
508 | if (rn != ARM_R1) | |
509 | emit(ARM_MOV_R(ARM_R1, tmp[0]), ctx); | |
510 | if (rm != ARM_R0) | |
511 | emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx); | |
ddecdfce MG |
512 | } |
513 | ||
47b9c3bf RK |
514 | /* Is the translated BPF register on stack? */ |
515 | static bool is_stacked(s8 reg) | |
ddecdfce | 516 | { |
47b9c3bf | 517 | return reg < 0; |
ddecdfce MG |
518 | } |
519 | ||
7a987025 RK |
520 | /* If a BPF register is on the stack (stk is true), load it to the |
521 | * supplied temporary register and return the temporary register | |
522 | * for subsequent operations, otherwise just use the CPU register. | |
523 | */ | |
524 | static s8 arm_bpf_get_reg32(s8 reg, s8 tmp, struct jit_ctx *ctx) | |
525 | { | |
526 | if (is_stacked(reg)) { | |
96cced4e | 527 | emit(ARM_LDR_I(tmp, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx); |
7a987025 RK |
528 | reg = tmp; |
529 | } | |
530 | return reg; | |
531 | } | |
532 | ||
a6eccac5 RK |
533 | static const s8 *arm_bpf_get_reg64(const s8 *reg, const s8 *tmp, |
534 | struct jit_ctx *ctx) | |
535 | { | |
536 | if (is_stacked(reg[1])) { | |
8c9602d3 RK |
537 | if (__LINUX_ARM_ARCH__ >= 6 || |
538 | ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) { | |
539 | emit(ARM_LDRD_I(tmp[1], ARM_FP, | |
540 | EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); | |
541 | } else { | |
542 | emit(ARM_LDR_I(tmp[1], ARM_FP, | |
543 | EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); | |
544 | emit(ARM_LDR_I(tmp[0], ARM_FP, | |
545 | EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx); | |
546 | } | |
a6eccac5 RK |
547 | reg = tmp; |
548 | } | |
549 | return reg; | |
550 | } | |
551 | ||
7a987025 RK |
552 | /* If a BPF register is on the stack (stk is true), save the register |
553 | * back to the stack. If the source register is not the same, then | |
554 | * move it into the correct register. | |
555 | */ | |
556 | static void arm_bpf_put_reg32(s8 reg, s8 src, struct jit_ctx *ctx) | |
557 | { | |
558 | if (is_stacked(reg)) | |
96cced4e | 559 | emit(ARM_STR_I(src, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx); |
7a987025 RK |
560 | else if (reg != src) |
561 | emit(ARM_MOV_R(reg, src), ctx); | |
562 | } | |
563 | ||
a6eccac5 RK |
564 | static void arm_bpf_put_reg64(const s8 *reg, const s8 *src, |
565 | struct jit_ctx *ctx) | |
566 | { | |
567 | if (is_stacked(reg[1])) { | |
8c9602d3 RK |
568 | if (__LINUX_ARM_ARCH__ >= 6 || |
569 | ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) { | |
570 | emit(ARM_STRD_I(src[1], ARM_FP, | |
571 | EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); | |
572 | } else { | |
573 | emit(ARM_STR_I(src[1], ARM_FP, | |
574 | EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); | |
575 | emit(ARM_STR_I(src[0], ARM_FP, | |
576 | EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx); | |
577 | } | |
a6eccac5 RK |
578 | } else { |
579 | if (reg[1] != src[1]) | |
580 | emit(ARM_MOV_R(reg[1], src[1]), ctx); | |
581 | if (reg[0] != src[0]) | |
582 | emit(ARM_MOV_R(reg[0], src[0]), ctx); | |
583 | } | |
584 | } | |
585 | ||
1c35ba12 | 586 | static inline void emit_a32_mov_i(const s8 dst, const u32 val, |
47b9c3bf | 587 | struct jit_ctx *ctx) |
ddecdfce | 588 | { |
1c35ba12 | 589 | const s8 *tmp = bpf2a32[TMP_REG_1]; |
39c13c20 | 590 | |
47b9c3bf | 591 | if (is_stacked(dst)) { |
39c13c20 | 592 | emit_mov_i(tmp[1], val, ctx); |
7a987025 | 593 | arm_bpf_put_reg32(dst, tmp[1], ctx); |
39c13c20 SB |
594 | } else { |
595 | emit_mov_i(dst, val, ctx); | |
596 | } | |
ddecdfce MG |
597 | } |
598 | ||
f9ff5018 RK |
599 | static void emit_a32_mov_i64(const s8 dst[], u64 val, struct jit_ctx *ctx) |
600 | { | |
601 | const s8 *tmp = bpf2a32[TMP_REG_1]; | |
602 | const s8 *rd = is_stacked(dst_lo) ? tmp : dst; | |
603 | ||
604 | emit_mov_i(rd[1], (u32)val, ctx); | |
605 | emit_mov_i(rd[0], val >> 32, ctx); | |
606 | ||
607 | arm_bpf_put_reg64(dst, rd, ctx); | |
608 | } | |
609 | ||
39c13c20 | 610 | /* Sign extended move */ |
f9ff5018 RK |
611 | static inline void emit_a32_mov_se_i64(const bool is64, const s8 dst[], |
612 | const u32 val, struct jit_ctx *ctx) { | |
077513b8 | 613 | u64 val64 = val; |
ddecdfce | 614 | |
39c13c20 | 615 | if (is64 && (val & (1<<31))) |
077513b8 RK |
616 | val64 |= 0xffffffff00000000ULL; |
617 | emit_a32_mov_i64(dst, val64, ctx); | |
39c13c20 | 618 | } |
ddecdfce | 619 | |
39c13c20 SB |
620 | static inline void emit_a32_add_r(const u8 dst, const u8 src, |
621 | const bool is64, const bool hi, | |
622 | struct jit_ctx *ctx) { | |
623 | /* 64 bit : | |
624 | * adds dst_lo, dst_lo, src_lo | |
625 | * adc dst_hi, dst_hi, src_hi | |
626 | * 32 bit : | |
627 | * add dst_lo, dst_lo, src_lo | |
628 | */ | |
629 | if (!hi && is64) | |
630 | emit(ARM_ADDS_R(dst, dst, src), ctx); | |
631 | else if (hi && is64) | |
632 | emit(ARM_ADC_R(dst, dst, src), ctx); | |
633 | else | |
634 | emit(ARM_ADD_R(dst, dst, src), ctx); | |
635 | } | |
ddecdfce | 636 | |
39c13c20 SB |
637 | static inline void emit_a32_sub_r(const u8 dst, const u8 src, |
638 | const bool is64, const bool hi, | |
639 | struct jit_ctx *ctx) { | |
640 | /* 64 bit : | |
641 | * subs dst_lo, dst_lo, src_lo | |
642 | * sbc dst_hi, dst_hi, src_hi | |
643 | * 32 bit : | |
644 | * sub dst_lo, dst_lo, src_lo | |
ddecdfce | 645 | */ |
39c13c20 SB |
646 | if (!hi && is64) |
647 | emit(ARM_SUBS_R(dst, dst, src), ctx); | |
648 | else if (hi && is64) | |
649 | emit(ARM_SBC_R(dst, dst, src), ctx); | |
650 | else | |
651 | emit(ARM_SUB_R(dst, dst, src), ctx); | |
652 | } | |
ddecdfce | 653 | |
39c13c20 SB |
654 | static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64, |
655 | const bool hi, const u8 op, struct jit_ctx *ctx){ | |
656 | switch (BPF_OP(op)) { | |
657 | /* dst = dst + src */ | |
658 | case BPF_ADD: | |
659 | emit_a32_add_r(dst, src, is64, hi, ctx); | |
660 | break; | |
661 | /* dst = dst - src */ | |
662 | case BPF_SUB: | |
663 | emit_a32_sub_r(dst, src, is64, hi, ctx); | |
664 | break; | |
665 | /* dst = dst | src */ | |
666 | case BPF_OR: | |
667 | emit(ARM_ORR_R(dst, dst, src), ctx); | |
668 | break; | |
669 | /* dst = dst & src */ | |
670 | case BPF_AND: | |
671 | emit(ARM_AND_R(dst, dst, src), ctx); | |
672 | break; | |
673 | /* dst = dst ^ src */ | |
674 | case BPF_XOR: | |
675 | emit(ARM_EOR_R(dst, dst, src), ctx); | |
676 | break; | |
677 | /* dst = dst * src */ | |
678 | case BPF_MUL: | |
679 | emit(ARM_MUL(dst, dst, src), ctx); | |
680 | break; | |
681 | /* dst = dst << src */ | |
682 | case BPF_LSH: | |
683 | emit(ARM_LSL_R(dst, dst, src), ctx); | |
684 | break; | |
685 | /* dst = dst >> src */ | |
686 | case BPF_RSH: | |
687 | emit(ARM_LSR_R(dst, dst, src), ctx); | |
688 | break; | |
689 | /* dst = dst >> src (signed)*/ | |
690 | case BPF_ARSH: | |
691 | emit(ARM_MOV_SR(dst, dst, SRTYPE_ASR, src), ctx); | |
692 | break; | |
693 | } | |
ddecdfce MG |
694 | } |
695 | ||
39c13c20 SB |
696 | /* ALU operation (32 bit) |
697 | * dst = dst (op) src | |
698 | */ | |
1c35ba12 | 699 | static inline void emit_a32_alu_r(const s8 dst, const s8 src, |
39c13c20 SB |
700 | struct jit_ctx *ctx, const bool is64, |
701 | const bool hi, const u8 op) { | |
1c35ba12 | 702 | const s8 *tmp = bpf2a32[TMP_REG_1]; |
7a987025 | 703 | s8 rn, rd; |
39c13c20 | 704 | |
7a987025 RK |
705 | rn = arm_bpf_get_reg32(src, tmp[1], ctx); |
706 | rd = arm_bpf_get_reg32(dst, tmp[0], ctx); | |
39c13c20 | 707 | /* ALU operation */ |
7a987025 RK |
708 | emit_alu_r(rd, rn, is64, hi, op, ctx); |
709 | arm_bpf_put_reg32(dst, rd, ctx); | |
ddecdfce MG |
710 | } |
711 | ||
39c13c20 | 712 | /* ALU operation (64 bit) */ |
1c35ba12 | 713 | static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], |
47b9c3bf | 714 | const s8 src[], struct jit_ctx *ctx, |
39c13c20 | 715 | const u8 op) { |
b18bea2a RK |
716 | const s8 *tmp = bpf2a32[TMP_REG_1]; |
717 | const s8 *tmp2 = bpf2a32[TMP_REG_2]; | |
718 | const s8 *rd; | |
719 | ||
720 | rd = arm_bpf_get_reg64(dst, tmp, ctx); | |
721 | if (is64) { | |
722 | const s8 *rs; | |
723 | ||
724 | rs = arm_bpf_get_reg64(src, tmp2, ctx); | |
725 | ||
726 | /* ALU operation */ | |
727 | emit_alu_r(rd[1], rs[1], true, false, op, ctx); | |
728 | emit_alu_r(rd[0], rs[0], true, true, op, ctx); | |
729 | } else { | |
730 | s8 rs; | |
731 | ||
732 | rs = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); | |
733 | ||
734 | /* ALU operation */ | |
735 | emit_alu_r(rd[1], rs, true, false, op, ctx); | |
736 | emit_a32_mov_i(rd[0], 0, ctx); | |
737 | } | |
738 | ||
739 | arm_bpf_put_reg64(dst, rd, ctx); | |
39c13c20 | 740 | } |
ddecdfce | 741 | |
7a987025 | 742 | /* dst = src (4 bytes)*/ |
1c35ba12 | 743 | static inline void emit_a32_mov_r(const s8 dst, const s8 src, |
39c13c20 | 744 | struct jit_ctx *ctx) { |
1c35ba12 | 745 | const s8 *tmp = bpf2a32[TMP_REG_1]; |
7a987025 | 746 | s8 rt; |
39c13c20 | 747 | |
7a987025 RK |
748 | rt = arm_bpf_get_reg32(src, tmp[0], ctx); |
749 | arm_bpf_put_reg32(dst, rt, ctx); | |
ddecdfce MG |
750 | } |
751 | ||
39c13c20 | 752 | /* dst = src */ |
1c35ba12 | 753 | static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], |
47b9c3bf RK |
754 | const s8 src[], |
755 | struct jit_ctx *ctx) { | |
8c9602d3 RK |
756 | if (!is64) { |
757 | emit_a32_mov_r(dst_lo, src_lo, ctx); | |
758 | /* Zero out high 4 bytes */ | |
759 | emit_a32_mov_i(dst_hi, 0, ctx); | |
760 | } else if (__LINUX_ARM_ARCH__ < 6 && | |
761 | ctx->cpu_architecture < CPU_ARCH_ARMv5TE) { | |
39c13c20 | 762 | /* complete 8 byte move */ |
8c9602d3 | 763 | emit_a32_mov_r(dst_lo, src_lo, ctx); |
47b9c3bf | 764 | emit_a32_mov_r(dst_hi, src_hi, ctx); |
8c9602d3 RK |
765 | } else if (is_stacked(src_lo) && is_stacked(dst_lo)) { |
766 | const u8 *tmp = bpf2a32[TMP_REG_1]; | |
767 | ||
768 | emit(ARM_LDRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx); | |
769 | emit(ARM_STRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx); | |
770 | } else if (is_stacked(src_lo)) { | |
771 | emit(ARM_LDRD_I(dst[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx); | |
772 | } else if (is_stacked(dst_lo)) { | |
773 | emit(ARM_STRD_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx); | |
39c13c20 | 774 | } else { |
8c9602d3 RK |
775 | emit(ARM_MOV_R(dst[0], src[0]), ctx); |
776 | emit(ARM_MOV_R(dst[1], src[1]), ctx); | |
ddecdfce | 777 | } |
39c13c20 | 778 | } |
19fc99d0 | 779 | |
39c13c20 | 780 | /* Shift operations */ |
47b9c3bf | 781 | static inline void emit_a32_alu_i(const s8 dst, const u32 val, |
39c13c20 | 782 | struct jit_ctx *ctx, const u8 op) { |
1c35ba12 | 783 | const s8 *tmp = bpf2a32[TMP_REG_1]; |
7a987025 | 784 | s8 rd; |
39c13c20 | 785 | |
7a987025 | 786 | rd = arm_bpf_get_reg32(dst, tmp[0], ctx); |
39c13c20 SB |
787 | |
788 | /* Do shift operation */ | |
789 | switch (op) { | |
790 | case BPF_LSH: | |
791 | emit(ARM_LSL_I(rd, rd, val), ctx); | |
792 | break; | |
793 | case BPF_RSH: | |
794 | emit(ARM_LSR_I(rd, rd, val), ctx); | |
795 | break; | |
796 | case BPF_NEG: | |
797 | emit(ARM_RSB_I(rd, rd, val), ctx); | |
798 | break; | |
799 | } | |
ddecdfce | 800 | |
7a987025 | 801 | arm_bpf_put_reg32(dst, rd, ctx); |
39c13c20 SB |
802 | } |
803 | ||
804 | /* dst = ~dst (64 bit) */ | |
47b9c3bf | 805 | static inline void emit_a32_neg64(const s8 dst[], |
39c13c20 | 806 | struct jit_ctx *ctx){ |
1c35ba12 | 807 | const s8 *tmp = bpf2a32[TMP_REG_1]; |
a6eccac5 | 808 | const s8 *rd; |
39c13c20 SB |
809 | |
810 | /* Setup Operand */ | |
a6eccac5 | 811 | rd = arm_bpf_get_reg64(dst, tmp, ctx); |
39c13c20 SB |
812 | |
813 | /* Do Negate Operation */ | |
a6eccac5 RK |
814 | emit(ARM_RSBS_I(rd[1], rd[1], 0), ctx); |
815 | emit(ARM_RSC_I(rd[0], rd[0], 0), ctx); | |
39c13c20 | 816 | |
a6eccac5 | 817 | arm_bpf_put_reg64(dst, rd, ctx); |
39c13c20 SB |
818 | } |
819 | ||
820 | /* dst = dst << src */ | |
47b9c3bf RK |
821 | static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[], |
822 | struct jit_ctx *ctx) { | |
1c35ba12 RK |
823 | const s8 *tmp = bpf2a32[TMP_REG_1]; |
824 | const s8 *tmp2 = bpf2a32[TMP_REG_2]; | |
a6eccac5 RK |
825 | const s8 *rd; |
826 | s8 rt; | |
39c13c20 SB |
827 | |
828 | /* Setup Operands */ | |
7a987025 | 829 | rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); |
a6eccac5 | 830 | rd = arm_bpf_get_reg64(dst, tmp, ctx); |
39c13c20 SB |
831 | |
832 | /* Do LSH operation */ | |
833 | emit(ARM_SUB_I(ARM_IP, rt, 32), ctx); | |
834 | emit(ARM_RSB_I(tmp2[0], rt, 32), ctx); | |
a6eccac5 RK |
835 | emit(ARM_MOV_SR(ARM_LR, rd[0], SRTYPE_ASL, rt), ctx); |
836 | emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[1], SRTYPE_ASL, ARM_IP), ctx); | |
837 | emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd[1], SRTYPE_LSR, tmp2[0]), ctx); | |
838 | emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_ASL, rt), ctx); | |
39c13c20 | 839 | |
7a987025 RK |
840 | arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); |
841 | arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); | |
39c13c20 | 842 | } |
ddecdfce | 843 | |
39c13c20 | 844 | /* dst = dst >> src (signed)*/ |
47b9c3bf RK |
845 | static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], |
846 | struct jit_ctx *ctx) { | |
1c35ba12 RK |
847 | const s8 *tmp = bpf2a32[TMP_REG_1]; |
848 | const s8 *tmp2 = bpf2a32[TMP_REG_2]; | |
a6eccac5 RK |
849 | const s8 *rd; |
850 | s8 rt; | |
7a987025 | 851 | |
39c13c20 | 852 | /* Setup Operands */ |
7a987025 | 853 | rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); |
a6eccac5 | 854 | rd = arm_bpf_get_reg64(dst, tmp, ctx); |
39c13c20 SB |
855 | |
856 | /* Do the ARSH operation */ | |
857 | emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); | |
858 | emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); | |
a6eccac5 RK |
859 | emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx); |
860 | emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx); | |
39c13c20 | 861 | _emit(ARM_COND_MI, ARM_B(0), ctx); |
a6eccac5 RK |
862 | emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx); |
863 | emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_ASR, rt), ctx); | |
7a987025 RK |
864 | |
865 | arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); | |
866 | arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); | |
39c13c20 SB |
867 | } |
868 | ||
869 | /* dst = dst >> src */ | |
47b9c3bf RK |
870 | static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[], |
871 | struct jit_ctx *ctx) { | |
1c35ba12 RK |
872 | const s8 *tmp = bpf2a32[TMP_REG_1]; |
873 | const s8 *tmp2 = bpf2a32[TMP_REG_2]; | |
a6eccac5 RK |
874 | const s8 *rd; |
875 | s8 rt; | |
7a987025 | 876 | |
39c13c20 | 877 | /* Setup Operands */ |
7a987025 | 878 | rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); |
a6eccac5 | 879 | rd = arm_bpf_get_reg64(dst, tmp, ctx); |
39c13c20 | 880 | |
68565a1a | 881 | /* Do RSH operation */ |
39c13c20 SB |
882 | emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); |
883 | emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); | |
a6eccac5 RK |
884 | emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx); |
885 | emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx); | |
886 | emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_LSR, tmp2[0]), ctx); | |
887 | emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_LSR, rt), ctx); | |
7a987025 RK |
888 | |
889 | arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); | |
890 | arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); | |
ddecdfce MG |
891 | } |
892 | ||
39c13c20 | 893 | /* dst = dst << val */ |
47b9c3bf RK |
894 | static inline void emit_a32_lsh_i64(const s8 dst[], |
895 | const u32 val, struct jit_ctx *ctx){ | |
1c35ba12 RK |
896 | const s8 *tmp = bpf2a32[TMP_REG_1]; |
897 | const s8 *tmp2 = bpf2a32[TMP_REG_2]; | |
a6eccac5 | 898 | const s8 *rd; |
39c13c20 | 899 | |
7a987025 | 900 | /* Setup operands */ |
a6eccac5 | 901 | rd = arm_bpf_get_reg64(dst, tmp, ctx); |
39c13c20 SB |
902 | |
903 | /* Do LSH operation */ | |
904 | if (val < 32) { | |
a6eccac5 RK |
905 | emit(ARM_MOV_SI(tmp2[0], rd[0], SRTYPE_ASL, val), ctx); |
906 | emit(ARM_ORR_SI(rd[0], tmp2[0], rd[1], SRTYPE_LSR, 32 - val), ctx); | |
907 | emit(ARM_MOV_SI(rd[1], rd[1], SRTYPE_ASL, val), ctx); | |
39c13c20 SB |
908 | } else { |
909 | if (val == 32) | |
a6eccac5 | 910 | emit(ARM_MOV_R(rd[0], rd[1]), ctx); |
39c13c20 | 911 | else |
a6eccac5 RK |
912 | emit(ARM_MOV_SI(rd[0], rd[1], SRTYPE_ASL, val - 32), ctx); |
913 | emit(ARM_EOR_R(rd[1], rd[1], rd[1]), ctx); | |
39c13c20 SB |
914 | } |
915 | ||
a6eccac5 | 916 | arm_bpf_put_reg64(dst, rd, ctx); |
39c13c20 SB |
917 | } |
918 | ||
919 | /* dst = dst >> val */ | |
47b9c3bf | 920 | static inline void emit_a32_rsh_i64(const s8 dst[], |
39c13c20 | 921 | const u32 val, struct jit_ctx *ctx) { |
1c35ba12 RK |
922 | const s8 *tmp = bpf2a32[TMP_REG_1]; |
923 | const s8 *tmp2 = bpf2a32[TMP_REG_2]; | |
a6eccac5 | 924 | const s8 *rd; |
39c13c20 | 925 | |
7a987025 | 926 | /* Setup operands */ |
a6eccac5 | 927 | rd = arm_bpf_get_reg64(dst, tmp, ctx); |
39c13c20 SB |
928 | |
929 | /* Do LSR operation */ | |
930 | if (val < 32) { | |
a6eccac5 RK |
931 | emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); |
932 | emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); | |
933 | emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_LSR, val), ctx); | |
39c13c20 | 934 | } else if (val == 32) { |
a6eccac5 RK |
935 | emit(ARM_MOV_R(rd[1], rd[0]), ctx); |
936 | emit(ARM_MOV_I(rd[0], 0), ctx); | |
39c13c20 | 937 | } else { |
a6eccac5 RK |
938 | emit(ARM_MOV_SI(rd[1], rd[0], SRTYPE_LSR, val - 32), ctx); |
939 | emit(ARM_MOV_I(rd[0], 0), ctx); | |
39c13c20 SB |
940 | } |
941 | ||
a6eccac5 | 942 | arm_bpf_put_reg64(dst, rd, ctx); |
39c13c20 SB |
943 | } |
944 | ||
945 | /* dst = dst >> val (signed) */ | |
47b9c3bf | 946 | static inline void emit_a32_arsh_i64(const s8 dst[], |
39c13c20 | 947 | const u32 val, struct jit_ctx *ctx){ |
1c35ba12 RK |
948 | const s8 *tmp = bpf2a32[TMP_REG_1]; |
949 | const s8 *tmp2 = bpf2a32[TMP_REG_2]; | |
a6eccac5 | 950 | const s8 *rd; |
39c13c20 | 951 | |
7a987025 | 952 | /* Setup operands */ |
a6eccac5 | 953 | rd = arm_bpf_get_reg64(dst, tmp, ctx); |
39c13c20 SB |
954 | |
955 | /* Do ARSH operation */ | |
956 | if (val < 32) { | |
a6eccac5 RK |
957 | emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); |
958 | emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); | |
959 | emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, val), ctx); | |
39c13c20 | 960 | } else if (val == 32) { |
a6eccac5 RK |
961 | emit(ARM_MOV_R(rd[1], rd[0]), ctx); |
962 | emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, 31), ctx); | |
39c13c20 | 963 | } else { |
a6eccac5 RK |
964 | emit(ARM_MOV_SI(rd[1], rd[0], SRTYPE_ASR, val - 32), ctx); |
965 | emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, 31), ctx); | |
39c13c20 SB |
966 | } |
967 | ||
a6eccac5 | 968 | arm_bpf_put_reg64(dst, rd, ctx); |
39c13c20 SB |
969 | } |
970 | ||
47b9c3bf RK |
971 | static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], |
972 | struct jit_ctx *ctx) { | |
1c35ba12 RK |
973 | const s8 *tmp = bpf2a32[TMP_REG_1]; |
974 | const s8 *tmp2 = bpf2a32[TMP_REG_2]; | |
a6eccac5 | 975 | const s8 *rd, *rt; |
7a987025 | 976 | |
39c13c20 | 977 | /* Setup operands for multiplication */ |
a6eccac5 RK |
978 | rd = arm_bpf_get_reg64(dst, tmp, ctx); |
979 | rt = arm_bpf_get_reg64(src, tmp2, ctx); | |
39c13c20 SB |
980 | |
981 | /* Do Multiplication */ | |
a6eccac5 RK |
982 | emit(ARM_MUL(ARM_IP, rd[1], rt[0]), ctx); |
983 | emit(ARM_MUL(ARM_LR, rd[0], rt[1]), ctx); | |
39c13c20 SB |
984 | emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx); |
985 | ||
a6eccac5 RK |
986 | emit(ARM_UMULL(ARM_IP, rd[0], rd[1], rt[1]), ctx); |
987 | emit(ARM_ADD_R(rd[0], ARM_LR, rd[0]), ctx); | |
7a987025 RK |
988 | |
989 | arm_bpf_put_reg32(dst_lo, ARM_IP, ctx); | |
a6eccac5 | 990 | arm_bpf_put_reg32(dst_hi, rd[0], ctx); |
39c13c20 SB |
991 | } |
992 | ||
993 | /* *(size *)(dst + off) = src */ | |
c5eae692 RK |
994 | static inline void emit_str_r(const s8 dst, const s8 src[], |
995 | s32 off, struct jit_ctx *ctx, const u8 sz){ | |
1c35ba12 | 996 | const s8 *tmp = bpf2a32[TMP_REG_1]; |
c5eae692 | 997 | s32 off_max; |
7a987025 | 998 | s8 rd; |
39c13c20 | 999 | |
7a987025 | 1000 | rd = arm_bpf_get_reg32(dst, tmp[1], ctx); |
c5eae692 RK |
1001 | |
1002 | if (sz == BPF_H) | |
1003 | off_max = 0xff; | |
1004 | else | |
1005 | off_max = 0xfff; | |
1006 | ||
1007 | if (off < 0 || off > off_max) { | |
47b9c3bf | 1008 | emit_a32_mov_i(tmp[0], off, ctx); |
c5eae692 | 1009 | emit(ARM_ADD_R(tmp[0], tmp[0], rd), ctx); |
39c13c20 | 1010 | rd = tmp[0]; |
c5eae692 | 1011 | off = 0; |
39c13c20 SB |
1012 | } |
1013 | switch (sz) { | |
c5eae692 RK |
1014 | case BPF_B: |
1015 | /* Store a Byte */ | |
1016 | emit(ARM_STRB_I(src_lo, rd, off), ctx); | |
39c13c20 SB |
1017 | break; |
1018 | case BPF_H: | |
1019 | /* Store a HalfWord */ | |
c5eae692 | 1020 | emit(ARM_STRH_I(src_lo, rd, off), ctx); |
39c13c20 | 1021 | break; |
c5eae692 RK |
1022 | case BPF_W: |
1023 | /* Store a Word */ | |
1024 | emit(ARM_STR_I(src_lo, rd, off), ctx); | |
1025 | break; | |
1026 | case BPF_DW: | |
1027 | /* Store a Double Word */ | |
1028 | emit(ARM_STR_I(src_lo, rd, off), ctx); | |
1029 | emit(ARM_STR_I(src_hi, rd, off + 4), ctx); | |
39c13c20 SB |
1030 | break; |
1031 | } | |
1032 | } | |
1033 | ||
1034 | /* dst = *(size*)(src + off) */ | |
47b9c3bf | 1035 | static inline void emit_ldx_r(const s8 dst[], const s8 src, |
ec19e02b | 1036 | s32 off, struct jit_ctx *ctx, const u8 sz){ |
1c35ba12 | 1037 | const s8 *tmp = bpf2a32[TMP_REG_1]; |
47b9c3bf | 1038 | const s8 *rd = is_stacked(dst_lo) ? tmp : dst; |
1c35ba12 | 1039 | s8 rm = src; |
ec19e02b | 1040 | s32 off_max; |
39c13c20 | 1041 | |
ec19e02b RK |
1042 | if (sz == BPF_H) |
1043 | off_max = 0xff; | |
1044 | else | |
1045 | off_max = 0xfff; | |
1046 | ||
1047 | if (off < 0 || off > off_max) { | |
47b9c3bf | 1048 | emit_a32_mov_i(tmp[0], off, ctx); |
39c13c20 SB |
1049 | emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); |
1050 | rm = tmp[0]; | |
ec19e02b RK |
1051 | off = 0; |
1052 | } else if (rd[1] == rm) { | |
1053 | emit(ARM_MOV_R(tmp[0], rm), ctx); | |
1054 | rm = tmp[0]; | |
39c13c20 SB |
1055 | } |
1056 | switch (sz) { | |
ec19e02b RK |
1057 | case BPF_B: |
1058 | /* Load a Byte */ | |
1059 | emit(ARM_LDRB_I(rd[1], rm, off), ctx); | |
a6eccac5 | 1060 | emit_a32_mov_i(rd[0], 0, ctx); |
39c13c20 SB |
1061 | break; |
1062 | case BPF_H: | |
1063 | /* Load a HalfWord */ | |
ec19e02b | 1064 | emit(ARM_LDRH_I(rd[1], rm, off), ctx); |
a6eccac5 | 1065 | emit_a32_mov_i(rd[0], 0, ctx); |
39c13c20 | 1066 | break; |
ec19e02b RK |
1067 | case BPF_W: |
1068 | /* Load a Word */ | |
1069 | emit(ARM_LDR_I(rd[1], rm, off), ctx); | |
a6eccac5 | 1070 | emit_a32_mov_i(rd[0], 0, ctx); |
ec19e02b RK |
1071 | break; |
1072 | case BPF_DW: | |
1073 | /* Load a Double Word */ | |
1074 | emit(ARM_LDR_I(rd[1], rm, off), ctx); | |
1075 | emit(ARM_LDR_I(rd[0], rm, off + 4), ctx); | |
39c13c20 SB |
1076 | break; |
1077 | } | |
a6eccac5 | 1078 | arm_bpf_put_reg64(dst, rd, ctx); |
39c13c20 SB |
1079 | } |
1080 | ||
1081 | /* Arithmatic Operation */ | |
1082 | static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm, | |
b85062ac JW |
1083 | const u8 rn, struct jit_ctx *ctx, u8 op, |
1084 | bool is_jmp64) { | |
39c13c20 SB |
1085 | switch (op) { |
1086 | case BPF_JSET: | |
b85062ac JW |
1087 | if (is_jmp64) { |
1088 | emit(ARM_AND_R(ARM_IP, rt, rn), ctx); | |
1089 | emit(ARM_AND_R(ARM_LR, rd, rm), ctx); | |
1090 | emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx); | |
1091 | } else { | |
1092 | emit(ARM_ANDS_R(ARM_IP, rt, rn), ctx); | |
1093 | } | |
39c13c20 SB |
1094 | break; |
1095 | case BPF_JEQ: | |
1096 | case BPF_JNE: | |
1097 | case BPF_JGT: | |
1098 | case BPF_JGE: | |
1099 | case BPF_JLE: | |
1100 | case BPF_JLT: | |
b85062ac JW |
1101 | if (is_jmp64) { |
1102 | emit(ARM_CMP_R(rd, rm), ctx); | |
1103 | /* Only compare low halve if high halve are equal. */ | |
1104 | _emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx); | |
1105 | } else { | |
1106 | emit(ARM_CMP_R(rt, rn), ctx); | |
1107 | } | |
39c13c20 SB |
1108 | break; |
1109 | case BPF_JSLE: | |
1110 | case BPF_JSGT: | |
1111 | emit(ARM_CMP_R(rn, rt), ctx); | |
b85062ac JW |
1112 | if (is_jmp64) |
1113 | emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx); | |
39c13c20 SB |
1114 | break; |
1115 | case BPF_JSLT: | |
1116 | case BPF_JSGE: | |
1117 | emit(ARM_CMP_R(rt, rn), ctx); | |
b85062ac JW |
1118 | if (is_jmp64) |
1119 | emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx); | |
39c13c20 SB |
1120 | break; |
1121 | } | |
1122 | } | |
1123 | ||
1124 | static int out_offset = -1; /* initialized on the first pass of build_body() */ | |
1125 | static int emit_bpf_tail_call(struct jit_ctx *ctx) | |
ddecdfce | 1126 | { |
ddecdfce | 1127 | |
39c13c20 | 1128 | /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ |
1c35ba12 RK |
1129 | const s8 *r2 = bpf2a32[BPF_REG_2]; |
1130 | const s8 *r3 = bpf2a32[BPF_REG_3]; | |
1131 | const s8 *tmp = bpf2a32[TMP_REG_1]; | |
1132 | const s8 *tmp2 = bpf2a32[TMP_REG_2]; | |
1133 | const s8 *tcc = bpf2a32[TCALL_CNT]; | |
a6eccac5 | 1134 | const s8 *tc; |
39c13c20 SB |
1135 | const int idx0 = ctx->idx; |
1136 | #define cur_offset (ctx->idx - idx0) | |
f4483f2c | 1137 | #define jmp_offset (out_offset - (cur_offset) - 2) |
828e2b90 | 1138 | u32 lo, hi; |
a6eccac5 | 1139 | s8 r_array, r_index; |
828e2b90 | 1140 | int off; |
39c13c20 SB |
1141 | |
1142 | /* if (index >= array->map.max_entries) | |
1143 | * goto out; | |
1144 | */ | |
828e2b90 RK |
1145 | BUILD_BUG_ON(offsetof(struct bpf_array, map.max_entries) > |
1146 | ARM_INST_LDST__IMM12); | |
39c13c20 | 1147 | off = offsetof(struct bpf_array, map.max_entries); |
b5045229 | 1148 | r_array = arm_bpf_get_reg32(r2[1], tmp2[0], ctx); |
091f0248 | 1149 | /* index is 32-bit for arrays */ |
7a987025 | 1150 | r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx); |
b5045229 RK |
1151 | /* array->map.max_entries */ |
1152 | emit(ARM_LDR_I(tmp[1], r_array, off), ctx); | |
39c13c20 | 1153 | /* index >= array->map.max_entries */ |
7a987025 | 1154 | emit(ARM_CMP_R(r_index, tmp[1]), ctx); |
39c13c20 SB |
1155 | _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); |
1156 | ||
b5045229 | 1157 | /* tmp2[0] = array, tmp2[1] = index */ |
aaffd2f5 | 1158 | |
39c13c20 SB |
1159 | /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) |
1160 | * goto out; | |
1161 | * tail_call_cnt++; | |
1162 | */ | |
1163 | lo = (u32)MAX_TAIL_CALL_CNT; | |
1164 | hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32); | |
a6eccac5 RK |
1165 | tc = arm_bpf_get_reg64(tcc, tmp, ctx); |
1166 | emit(ARM_CMP_I(tc[0], hi), ctx); | |
1167 | _emit(ARM_COND_EQ, ARM_CMP_I(tc[1], lo), ctx); | |
39c13c20 | 1168 | _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx); |
a6eccac5 RK |
1169 | emit(ARM_ADDS_I(tc[1], tc[1], 1), ctx); |
1170 | emit(ARM_ADC_I(tc[0], tc[0], 0), ctx); | |
1171 | arm_bpf_put_reg64(tcc, tmp, ctx); | |
39c13c20 SB |
1172 | |
1173 | /* prog = array->ptrs[index] | |
1174 | * if (prog == NULL) | |
1175 | * goto out; | |
1176 | */ | |
828e2b90 RK |
1177 | BUILD_BUG_ON(imm8m(offsetof(struct bpf_array, ptrs)) < 0); |
1178 | off = imm8m(offsetof(struct bpf_array, ptrs)); | |
828e2b90 | 1179 | emit(ARM_ADD_I(tmp[1], r_array, off), ctx); |
2b6958ef | 1180 | emit(ARM_LDR_R_SI(tmp[1], tmp[1], r_index, SRTYPE_ASL, 2), ctx); |
39c13c20 SB |
1181 | emit(ARM_CMP_I(tmp[1], 0), ctx); |
1182 | _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); | |
1183 | ||
1184 | /* goto *(prog->bpf_func + prologue_size); */ | |
828e2b90 RK |
1185 | BUILD_BUG_ON(offsetof(struct bpf_prog, bpf_func) > |
1186 | ARM_INST_LDST__IMM12); | |
39c13c20 | 1187 | off = offsetof(struct bpf_prog, bpf_func); |
828e2b90 | 1188 | emit(ARM_LDR_I(tmp[1], tmp[1], off), ctx); |
39c13c20 | 1189 | emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx); |
e9062481 | 1190 | emit_bx_r(tmp[1], ctx); |
39c13c20 SB |
1191 | |
1192 | /* out: */ | |
1193 | if (out_offset == -1) | |
1194 | out_offset = cur_offset; | |
1195 | if (cur_offset != out_offset) { | |
1196 | pr_err_once("tail_call out_offset = %d, expected %d!\n", | |
1197 | cur_offset, out_offset); | |
1198 | return -1; | |
1199 | } | |
1200 | return 0; | |
1201 | #undef cur_offset | |
1202 | #undef jmp_offset | |
ddecdfce MG |
1203 | } |
1204 | ||
39c13c20 SB |
1205 | /* 0xabcd => 0xcdab */ |
1206 | static inline void emit_rev16(const u8 rd, const u8 rn, struct jit_ctx *ctx) | |
ddecdfce | 1207 | { |
39c13c20 | 1208 | #if __LINUX_ARM_ARCH__ < 6 |
1c35ba12 | 1209 | const s8 *tmp2 = bpf2a32[TMP_REG_2]; |
39c13c20 SB |
1210 | |
1211 | emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx); | |
1212 | emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 8), ctx); | |
1213 | emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx); | |
1214 | emit(ARM_ORR_SI(rd, tmp2[0], tmp2[1], SRTYPE_LSL, 8), ctx); | |
1215 | #else /* ARMv6+ */ | |
1216 | emit(ARM_REV16(rd, rn), ctx); | |
1217 | #endif | |
1218 | } | |
ddecdfce | 1219 | |
39c13c20 SB |
1220 | /* 0xabcdefgh => 0xghefcdab */ |
1221 | static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx) | |
1222 | { | |
1223 | #if __LINUX_ARM_ARCH__ < 6 | |
1c35ba12 | 1224 | const s8 *tmp2 = bpf2a32[TMP_REG_2]; |
39c13c20 SB |
1225 | |
1226 | emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx); | |
1227 | emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 24), ctx); | |
1228 | emit(ARM_ORR_SI(ARM_IP, tmp2[0], tmp2[1], SRTYPE_LSL, 24), ctx); | |
1229 | ||
1230 | emit(ARM_MOV_SI(tmp2[1], rn, SRTYPE_LSR, 8), ctx); | |
1231 | emit(ARM_AND_I(tmp2[1], tmp2[1], 0xff), ctx); | |
1232 | emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 16), ctx); | |
1233 | emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx); | |
1234 | emit(ARM_MOV_SI(tmp2[0], tmp2[0], SRTYPE_LSL, 8), ctx); | |
1235 | emit(ARM_ORR_SI(tmp2[0], tmp2[0], tmp2[1], SRTYPE_LSL, 16), ctx); | |
1236 | emit(ARM_ORR_R(rd, ARM_IP, tmp2[0]), ctx); | |
1237 | ||
1238 | #else /* ARMv6+ */ | |
1239 | emit(ARM_REV(rd, rn), ctx); | |
1240 | #endif | |
1241 | } | |
34805931 | 1242 | |
39c13c20 | 1243 | // push the scratch stack register on top of the stack |
96cced4e | 1244 | static inline void emit_push_r64(const s8 src[], struct jit_ctx *ctx) |
39c13c20 | 1245 | { |
1c35ba12 | 1246 | const s8 *tmp2 = bpf2a32[TMP_REG_2]; |
96cced4e | 1247 | const s8 *rt; |
39c13c20 | 1248 | u16 reg_set = 0; |
ddecdfce | 1249 | |
96cced4e | 1250 | rt = arm_bpf_get_reg64(src, tmp2, ctx); |
39c13c20 | 1251 | |
96cced4e | 1252 | reg_set = (1 << rt[1]) | (1 << rt[0]); |
39c13c20 SB |
1253 | emit(ARM_PUSH(reg_set), ctx); |
1254 | } | |
1255 | ||
1256 | static void build_prologue(struct jit_ctx *ctx) | |
1257 | { | |
1c35ba12 RK |
1258 | const s8 r0 = bpf2a32[BPF_REG_0][1]; |
1259 | const s8 r2 = bpf2a32[BPF_REG_1][1]; | |
1260 | const s8 r3 = bpf2a32[BPF_REG_1][0]; | |
1261 | const s8 r4 = bpf2a32[BPF_REG_6][1]; | |
1262 | const s8 fplo = bpf2a32[BPF_REG_FP][1]; | |
1263 | const s8 fphi = bpf2a32[BPF_REG_FP][0]; | |
1264 | const s8 *tcc = bpf2a32[TCALL_CNT]; | |
39c13c20 | 1265 | |
39c13c20 | 1266 | /* Save callee saved registers. */ |
39c13c20 | 1267 | #ifdef CONFIG_FRAME_POINTER |
02088d9b RK |
1268 | u16 reg_set = CALLEE_PUSH_MASK | 1 << ARM_IP | 1 << ARM_PC; |
1269 | emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx); | |
39c13c20 SB |
1270 | emit(ARM_PUSH(reg_set), ctx); |
1271 | emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx); | |
1272 | #else | |
02088d9b RK |
1273 | emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx); |
1274 | emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx); | |
39c13c20 SB |
1275 | #endif |
1276 | /* Save frame pointer for later */ | |
02088d9b | 1277 | emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx); |
39c13c20 SB |
1278 | |
1279 | ctx->stack_size = imm8m(STACK_SIZE); | |
1280 | ||
1281 | /* Set up function call stack */ | |
1282 | emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx); | |
ddecdfce | 1283 | |
39c13c20 | 1284 | /* Set up BPF prog stack base register */ |
47b9c3bf RK |
1285 | emit_a32_mov_r(fplo, ARM_IP, ctx); |
1286 | emit_a32_mov_i(fphi, 0, ctx); | |
39c13c20 SB |
1287 | |
1288 | /* mov r4, 0 */ | |
1289 | emit(ARM_MOV_I(r4, 0), ctx); | |
1290 | ||
1291 | /* Move BPF_CTX to BPF_R1 */ | |
1292 | emit(ARM_MOV_R(r3, r4), ctx); | |
1293 | emit(ARM_MOV_R(r2, r0), ctx); | |
1294 | /* Initialize Tail Count */ | |
96cced4e RK |
1295 | emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[0])), ctx); |
1296 | emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[1])), ctx); | |
39c13c20 SB |
1297 | /* end of prologue */ |
1298 | } | |
1299 | ||
02088d9b | 1300 | /* restore callee saved registers. */ |
39c13c20 SB |
1301 | static void build_epilogue(struct jit_ctx *ctx) |
1302 | { | |
39c13c20 | 1303 | #ifdef CONFIG_FRAME_POINTER |
02088d9b RK |
1304 | /* When using frame pointers, some additional registers need to |
1305 | * be loaded. */ | |
1306 | u16 reg_set = CALLEE_POP_MASK | 1 << ARM_SP; | |
1307 | emit(ARM_SUB_I(ARM_SP, ARM_FP, hweight16(reg_set) * 4), ctx); | |
39c13c20 SB |
1308 | emit(ARM_LDM(ARM_SP, reg_set), ctx); |
1309 | #else | |
39c13c20 | 1310 | /* Restore callee saved registers. */ |
02088d9b RK |
1311 | emit(ARM_MOV_R(ARM_SP, ARM_FP), ctx); |
1312 | emit(ARM_POP(CALLEE_POP_MASK), ctx); | |
39c13c20 SB |
1313 | #endif |
1314 | } | |
1315 | ||
1316 | /* | |
1317 | * Convert an eBPF instruction to native instruction, i.e | |
1318 | * JITs an eBPF instruction. | |
1319 | * Returns : | |
1320 | * 0 - Successfully JITed an 8-byte eBPF instruction | |
1321 | * >0 - Successfully JITed a 16-byte eBPF instruction | |
1322 | * <0 - Failed to JIT. | |
1323 | */ | |
1324 | static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) | |
1325 | { | |
1326 | const u8 code = insn->code; | |
1c35ba12 RK |
1327 | const s8 *dst = bpf2a32[insn->dst_reg]; |
1328 | const s8 *src = bpf2a32[insn->src_reg]; | |
1329 | const s8 *tmp = bpf2a32[TMP_REG_1]; | |
1330 | const s8 *tmp2 = bpf2a32[TMP_REG_2]; | |
39c13c20 SB |
1331 | const s16 off = insn->off; |
1332 | const s32 imm = insn->imm; | |
1333 | const int i = insn - ctx->prog->insnsi; | |
1334 | const bool is64 = BPF_CLASS(code) == BPF_ALU64; | |
a6eccac5 RK |
1335 | const s8 *rd, *rs; |
1336 | s8 rd_lo, rt, rm, rn; | |
39c13c20 SB |
1337 | s32 jmp_offset; |
1338 | ||
1339 | #define check_imm(bits, imm) do { \ | |
2b589a7e WY |
1340 | if ((imm) >= (1 << ((bits) - 1)) || \ |
1341 | (imm) < -(1 << ((bits) - 1))) { \ | |
39c13c20 SB |
1342 | pr_info("[%2d] imm=%d(0x%x) out of range\n", \ |
1343 | i, imm, imm); \ | |
1344 | return -EINVAL; \ | |
1345 | } \ | |
1346 | } while (0) | |
1347 | #define check_imm24(imm) check_imm(24, imm) | |
1348 | ||
1349 | switch (code) { | |
1350 | /* ALU operations */ | |
1351 | ||
1352 | /* dst = src */ | |
1353 | case BPF_ALU | BPF_MOV | BPF_K: | |
1354 | case BPF_ALU | BPF_MOV | BPF_X: | |
1355 | case BPF_ALU64 | BPF_MOV | BPF_K: | |
1356 | case BPF_ALU64 | BPF_MOV | BPF_X: | |
1357 | switch (BPF_SRC(code)) { | |
1358 | case BPF_X: | |
47b9c3bf | 1359 | emit_a32_mov_r64(is64, dst, src, ctx); |
ddecdfce | 1360 | break; |
39c13c20 SB |
1361 | case BPF_K: |
1362 | /* Sign-extend immediate value to destination reg */ | |
f9ff5018 | 1363 | emit_a32_mov_se_i64(is64, dst, imm, ctx); |
ddecdfce | 1364 | break; |
39c13c20 SB |
1365 | } |
1366 | break; | |
1367 | /* dst = dst + src/imm */ | |
1368 | /* dst = dst - src/imm */ | |
1369 | /* dst = dst | src/imm */ | |
1370 | /* dst = dst & src/imm */ | |
1371 | /* dst = dst ^ src/imm */ | |
1372 | /* dst = dst * src/imm */ | |
1373 | /* dst = dst << src */ | |
1374 | /* dst = dst >> src */ | |
1375 | case BPF_ALU | BPF_ADD | BPF_K: | |
1376 | case BPF_ALU | BPF_ADD | BPF_X: | |
1377 | case BPF_ALU | BPF_SUB | BPF_K: | |
1378 | case BPF_ALU | BPF_SUB | BPF_X: | |
1379 | case BPF_ALU | BPF_OR | BPF_K: | |
1380 | case BPF_ALU | BPF_OR | BPF_X: | |
1381 | case BPF_ALU | BPF_AND | BPF_K: | |
1382 | case BPF_ALU | BPF_AND | BPF_X: | |
1383 | case BPF_ALU | BPF_XOR | BPF_K: | |
1384 | case BPF_ALU | BPF_XOR | BPF_X: | |
1385 | case BPF_ALU | BPF_MUL | BPF_K: | |
1386 | case BPF_ALU | BPF_MUL | BPF_X: | |
1387 | case BPF_ALU | BPF_LSH | BPF_X: | |
1388 | case BPF_ALU | BPF_RSH | BPF_X: | |
1389 | case BPF_ALU | BPF_ARSH | BPF_K: | |
1390 | case BPF_ALU | BPF_ARSH | BPF_X: | |
1391 | case BPF_ALU64 | BPF_ADD | BPF_K: | |
1392 | case BPF_ALU64 | BPF_ADD | BPF_X: | |
1393 | case BPF_ALU64 | BPF_SUB | BPF_K: | |
1394 | case BPF_ALU64 | BPF_SUB | BPF_X: | |
1395 | case BPF_ALU64 | BPF_OR | BPF_K: | |
1396 | case BPF_ALU64 | BPF_OR | BPF_X: | |
1397 | case BPF_ALU64 | BPF_AND | BPF_K: | |
1398 | case BPF_ALU64 | BPF_AND | BPF_X: | |
1399 | case BPF_ALU64 | BPF_XOR | BPF_K: | |
1400 | case BPF_ALU64 | BPF_XOR | BPF_X: | |
1401 | switch (BPF_SRC(code)) { | |
1402 | case BPF_X: | |
47b9c3bf | 1403 | emit_a32_alu_r64(is64, dst, src, ctx, BPF_OP(code)); |
ddecdfce | 1404 | break; |
39c13c20 SB |
1405 | case BPF_K: |
1406 | /* Move immediate value to the temporary register | |
1407 | * and then do the ALU operation on the temporary | |
1408 | * register as this will sign-extend the immediate | |
1409 | * value into temporary reg and then it would be | |
1410 | * safe to do the operation on it. | |
6d715e30 | 1411 | */ |
f9ff5018 | 1412 | emit_a32_mov_se_i64(is64, tmp2, imm, ctx); |
47b9c3bf | 1413 | emit_a32_alu_r64(is64, dst, tmp2, ctx, BPF_OP(code)); |
ddecdfce | 1414 | break; |
39c13c20 SB |
1415 | } |
1416 | break; | |
1417 | /* dst = dst / src(imm) */ | |
1418 | /* dst = dst % src(imm) */ | |
1419 | case BPF_ALU | BPF_DIV | BPF_K: | |
1420 | case BPF_ALU | BPF_DIV | BPF_X: | |
1421 | case BPF_ALU | BPF_MOD | BPF_K: | |
1422 | case BPF_ALU | BPF_MOD | BPF_X: | |
a6eccac5 | 1423 | rd_lo = arm_bpf_get_reg32(dst_lo, tmp2[1], ctx); |
39c13c20 SB |
1424 | switch (BPF_SRC(code)) { |
1425 | case BPF_X: | |
7a987025 | 1426 | rt = arm_bpf_get_reg32(src_lo, tmp2[0], ctx); |
ddecdfce | 1427 | break; |
39c13c20 SB |
1428 | case BPF_K: |
1429 | rt = tmp2[0]; | |
47b9c3bf RK |
1430 | emit_a32_mov_i(rt, imm, ctx); |
1431 | break; | |
1432 | default: | |
1433 | rt = src_lo; | |
ddecdfce | 1434 | break; |
39c13c20 | 1435 | } |
a6eccac5 RK |
1436 | emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code)); |
1437 | arm_bpf_put_reg32(dst_lo, rd_lo, ctx); | |
47b9c3bf | 1438 | emit_a32_mov_i(dst_hi, 0, ctx); |
39c13c20 SB |
1439 | break; |
1440 | case BPF_ALU64 | BPF_DIV | BPF_K: | |
1441 | case BPF_ALU64 | BPF_DIV | BPF_X: | |
1442 | case BPF_ALU64 | BPF_MOD | BPF_K: | |
1443 | case BPF_ALU64 | BPF_MOD | BPF_X: | |
1444 | goto notyet; | |
1445 | /* dst = dst >> imm */ | |
1446 | /* dst = dst << imm */ | |
1447 | case BPF_ALU | BPF_RSH | BPF_K: | |
1448 | case BPF_ALU | BPF_LSH | BPF_K: | |
1449 | if (unlikely(imm > 31)) | |
1450 | return -EINVAL; | |
1451 | if (imm) | |
47b9c3bf RK |
1452 | emit_a32_alu_i(dst_lo, imm, ctx, BPF_OP(code)); |
1453 | emit_a32_mov_i(dst_hi, 0, ctx); | |
39c13c20 SB |
1454 | break; |
1455 | /* dst = dst << imm */ | |
1456 | case BPF_ALU64 | BPF_LSH | BPF_K: | |
1457 | if (unlikely(imm > 63)) | |
1458 | return -EINVAL; | |
47b9c3bf | 1459 | emit_a32_lsh_i64(dst, imm, ctx); |
39c13c20 SB |
1460 | break; |
1461 | /* dst = dst >> imm */ | |
1462 | case BPF_ALU64 | BPF_RSH | BPF_K: | |
1463 | if (unlikely(imm > 63)) | |
1464 | return -EINVAL; | |
47b9c3bf | 1465 | emit_a32_rsh_i64(dst, imm, ctx); |
39c13c20 SB |
1466 | break; |
1467 | /* dst = dst << src */ | |
1468 | case BPF_ALU64 | BPF_LSH | BPF_X: | |
47b9c3bf | 1469 | emit_a32_lsh_r64(dst, src, ctx); |
39c13c20 SB |
1470 | break; |
1471 | /* dst = dst >> src */ | |
1472 | case BPF_ALU64 | BPF_RSH | BPF_X: | |
47b9c3bf | 1473 | emit_a32_rsh_r64(dst, src, ctx); |
39c13c20 SB |
1474 | break; |
1475 | /* dst = dst >> src (signed) */ | |
1476 | case BPF_ALU64 | BPF_ARSH | BPF_X: | |
47b9c3bf | 1477 | emit_a32_arsh_r64(dst, src, ctx); |
39c13c20 SB |
1478 | break; |
1479 | /* dst = dst >> imm (signed) */ | |
1480 | case BPF_ALU64 | BPF_ARSH | BPF_K: | |
1481 | if (unlikely(imm > 63)) | |
1482 | return -EINVAL; | |
47b9c3bf | 1483 | emit_a32_arsh_i64(dst, imm, ctx); |
39c13c20 SB |
1484 | break; |
1485 | /* dst = ~dst */ | |
1486 | case BPF_ALU | BPF_NEG: | |
47b9c3bf RK |
1487 | emit_a32_alu_i(dst_lo, 0, ctx, BPF_OP(code)); |
1488 | emit_a32_mov_i(dst_hi, 0, ctx); | |
39c13c20 SB |
1489 | break; |
1490 | /* dst = ~dst (64 bit) */ | |
1491 | case BPF_ALU64 | BPF_NEG: | |
47b9c3bf | 1492 | emit_a32_neg64(dst, ctx); |
39c13c20 SB |
1493 | break; |
1494 | /* dst = dst * src/imm */ | |
1495 | case BPF_ALU64 | BPF_MUL | BPF_X: | |
1496 | case BPF_ALU64 | BPF_MUL | BPF_K: | |
1497 | switch (BPF_SRC(code)) { | |
1498 | case BPF_X: | |
47b9c3bf | 1499 | emit_a32_mul_r64(dst, src, ctx); |
ddecdfce | 1500 | break; |
39c13c20 SB |
1501 | case BPF_K: |
1502 | /* Move immediate value to the temporary register | |
1503 | * and then do the multiplication on it as this | |
1504 | * will sign-extend the immediate value into temp | |
1505 | * reg then it would be safe to do the operation | |
1506 | * on it. | |
ddecdfce | 1507 | */ |
f9ff5018 | 1508 | emit_a32_mov_se_i64(is64, tmp2, imm, ctx); |
47b9c3bf | 1509 | emit_a32_mul_r64(dst, tmp2, ctx); |
ddecdfce | 1510 | break; |
39c13c20 SB |
1511 | } |
1512 | break; | |
1513 | /* dst = htole(dst) */ | |
1514 | /* dst = htobe(dst) */ | |
1515 | case BPF_ALU | BPF_END | BPF_FROM_LE: | |
1516 | case BPF_ALU | BPF_END | BPF_FROM_BE: | |
a6eccac5 | 1517 | rd = arm_bpf_get_reg64(dst, tmp, ctx); |
39c13c20 SB |
1518 | if (BPF_SRC(code) == BPF_FROM_LE) |
1519 | goto emit_bswap_uxt; | |
1520 | switch (imm) { | |
1521 | case 16: | |
a6eccac5 | 1522 | emit_rev16(rd[1], rd[1], ctx); |
39c13c20 SB |
1523 | goto emit_bswap_uxt; |
1524 | case 32: | |
a6eccac5 | 1525 | emit_rev32(rd[1], rd[1], ctx); |
39c13c20 SB |
1526 | goto emit_bswap_uxt; |
1527 | case 64: | |
a6eccac5 RK |
1528 | emit_rev32(ARM_LR, rd[1], ctx); |
1529 | emit_rev32(rd[1], rd[0], ctx); | |
1530 | emit(ARM_MOV_R(rd[0], ARM_LR), ctx); | |
4560cdff | 1531 | break; |
39c13c20 SB |
1532 | } |
1533 | goto exit; | |
1534 | emit_bswap_uxt: | |
1535 | switch (imm) { | |
1536 | case 16: | |
1537 | /* zero-extend 16 bits into 64 bits */ | |
1538 | #if __LINUX_ARM_ARCH__ < 6 | |
47b9c3bf | 1539 | emit_a32_mov_i(tmp2[1], 0xffff, ctx); |
a6eccac5 | 1540 | emit(ARM_AND_R(rd[1], rd[1], tmp2[1]), ctx); |
39c13c20 | 1541 | #else /* ARMv6+ */ |
a6eccac5 | 1542 | emit(ARM_UXTH(rd[1], rd[1]), ctx); |
39c13c20 | 1543 | #endif |
a6eccac5 | 1544 | emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); |
4560cdff | 1545 | break; |
39c13c20 SB |
1546 | case 32: |
1547 | /* zero-extend 32 bits into 64 bits */ | |
a6eccac5 | 1548 | emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); |
ddecdfce | 1549 | break; |
39c13c20 SB |
1550 | case 64: |
1551 | /* nop */ | |
ddecdfce | 1552 | break; |
39c13c20 SB |
1553 | } |
1554 | exit: | |
a6eccac5 | 1555 | arm_bpf_put_reg64(dst, rd, ctx); |
39c13c20 SB |
1556 | break; |
1557 | /* dst = imm64 */ | |
1558 | case BPF_LD | BPF_IMM | BPF_DW: | |
1559 | { | |
f9ff5018 | 1560 | u64 val = (u32)imm | (u64)insn[1].imm << 32; |
39c13c20 | 1561 | |
f9ff5018 | 1562 | emit_a32_mov_i64(dst, val, ctx); |
39c13c20 SB |
1563 | |
1564 | return 1; | |
1565 | } | |
1566 | /* LDX: dst = *(size *)(src + off) */ | |
1567 | case BPF_LDX | BPF_MEM | BPF_W: | |
1568 | case BPF_LDX | BPF_MEM | BPF_H: | |
1569 | case BPF_LDX | BPF_MEM | BPF_B: | |
1570 | case BPF_LDX | BPF_MEM | BPF_DW: | |
7a987025 | 1571 | rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); |
47b9c3bf | 1572 | emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); |
39c13c20 | 1573 | break; |
39c13c20 SB |
1574 | /* ST: *(size *)(dst + off) = imm */ |
1575 | case BPF_ST | BPF_MEM | BPF_W: | |
1576 | case BPF_ST | BPF_MEM | BPF_H: | |
1577 | case BPF_ST | BPF_MEM | BPF_B: | |
1578 | case BPF_ST | BPF_MEM | BPF_DW: | |
1579 | switch (BPF_SIZE(code)) { | |
1580 | case BPF_DW: | |
1581 | /* Sign-extend immediate value into temp reg */ | |
f9ff5018 | 1582 | emit_a32_mov_se_i64(true, tmp2, imm, ctx); |
ddecdfce | 1583 | break; |
39c13c20 SB |
1584 | case BPF_W: |
1585 | case BPF_H: | |
1586 | case BPF_B: | |
47b9c3bf | 1587 | emit_a32_mov_i(tmp2[1], imm, ctx); |
ddecdfce | 1588 | break; |
39c13c20 | 1589 | } |
c5eae692 | 1590 | emit_str_r(dst_lo, tmp2, off, ctx, BPF_SIZE(code)); |
39c13c20 SB |
1591 | break; |
1592 | /* STX XADD: lock *(u32 *)(dst + off) += src */ | |
1593 | case BPF_STX | BPF_XADD | BPF_W: | |
1594 | /* STX XADD: lock *(u64 *)(dst + off) += src */ | |
1595 | case BPF_STX | BPF_XADD | BPF_DW: | |
1596 | goto notyet; | |
1597 | /* STX: *(size *)(dst + off) = src */ | |
1598 | case BPF_STX | BPF_MEM | BPF_W: | |
1599 | case BPF_STX | BPF_MEM | BPF_H: | |
1600 | case BPF_STX | BPF_MEM | BPF_B: | |
1601 | case BPF_STX | BPF_MEM | BPF_DW: | |
a6eccac5 | 1602 | rs = arm_bpf_get_reg64(src, tmp2, ctx); |
c5eae692 | 1603 | emit_str_r(dst_lo, rs, off, ctx, BPF_SIZE(code)); |
39c13c20 | 1604 | break; |
39c13c20 SB |
1605 | /* PC += off if dst == src */ |
1606 | /* PC += off if dst > src */ | |
1607 | /* PC += off if dst >= src */ | |
1608 | /* PC += off if dst < src */ | |
1609 | /* PC += off if dst <= src */ | |
1610 | /* PC += off if dst != src */ | |
1611 | /* PC += off if dst > src (signed) */ | |
1612 | /* PC += off if dst >= src (signed) */ | |
1613 | /* PC += off if dst < src (signed) */ | |
1614 | /* PC += off if dst <= src (signed) */ | |
1615 | /* PC += off if dst & src */ | |
1616 | case BPF_JMP | BPF_JEQ | BPF_X: | |
1617 | case BPF_JMP | BPF_JGT | BPF_X: | |
1618 | case BPF_JMP | BPF_JGE | BPF_X: | |
1619 | case BPF_JMP | BPF_JNE | BPF_X: | |
1620 | case BPF_JMP | BPF_JSGT | BPF_X: | |
1621 | case BPF_JMP | BPF_JSGE | BPF_X: | |
1622 | case BPF_JMP | BPF_JSET | BPF_X: | |
1623 | case BPF_JMP | BPF_JLE | BPF_X: | |
1624 | case BPF_JMP | BPF_JLT | BPF_X: | |
1625 | case BPF_JMP | BPF_JSLT | BPF_X: | |
1626 | case BPF_JMP | BPF_JSLE | BPF_X: | |
b85062ac JW |
1627 | case BPF_JMP32 | BPF_JEQ | BPF_X: |
1628 | case BPF_JMP32 | BPF_JGT | BPF_X: | |
1629 | case BPF_JMP32 | BPF_JGE | BPF_X: | |
1630 | case BPF_JMP32 | BPF_JNE | BPF_X: | |
1631 | case BPF_JMP32 | BPF_JSGT | BPF_X: | |
1632 | case BPF_JMP32 | BPF_JSGE | BPF_X: | |
1633 | case BPF_JMP32 | BPF_JSET | BPF_X: | |
1634 | case BPF_JMP32 | BPF_JLE | BPF_X: | |
1635 | case BPF_JMP32 | BPF_JLT | BPF_X: | |
1636 | case BPF_JMP32 | BPF_JSLT | BPF_X: | |
1637 | case BPF_JMP32 | BPF_JSLE | BPF_X: | |
39c13c20 | 1638 | /* Setup source registers */ |
7a987025 RK |
1639 | rm = arm_bpf_get_reg32(src_hi, tmp2[0], ctx); |
1640 | rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); | |
39c13c20 SB |
1641 | goto go_jmp; |
1642 | /* PC += off if dst == imm */ | |
1643 | /* PC += off if dst > imm */ | |
1644 | /* PC += off if dst >= imm */ | |
1645 | /* PC += off if dst < imm */ | |
1646 | /* PC += off if dst <= imm */ | |
1647 | /* PC += off if dst != imm */ | |
1648 | /* PC += off if dst > imm (signed) */ | |
1649 | /* PC += off if dst >= imm (signed) */ | |
1650 | /* PC += off if dst < imm (signed) */ | |
1651 | /* PC += off if dst <= imm (signed) */ | |
1652 | /* PC += off if dst & imm */ | |
1653 | case BPF_JMP | BPF_JEQ | BPF_K: | |
1654 | case BPF_JMP | BPF_JGT | BPF_K: | |
1655 | case BPF_JMP | BPF_JGE | BPF_K: | |
1656 | case BPF_JMP | BPF_JNE | BPF_K: | |
1657 | case BPF_JMP | BPF_JSGT | BPF_K: | |
1658 | case BPF_JMP | BPF_JSGE | BPF_K: | |
1659 | case BPF_JMP | BPF_JSET | BPF_K: | |
1660 | case BPF_JMP | BPF_JLT | BPF_K: | |
1661 | case BPF_JMP | BPF_JLE | BPF_K: | |
1662 | case BPF_JMP | BPF_JSLT | BPF_K: | |
1663 | case BPF_JMP | BPF_JSLE | BPF_K: | |
b85062ac JW |
1664 | case BPF_JMP32 | BPF_JEQ | BPF_K: |
1665 | case BPF_JMP32 | BPF_JGT | BPF_K: | |
1666 | case BPF_JMP32 | BPF_JGE | BPF_K: | |
1667 | case BPF_JMP32 | BPF_JNE | BPF_K: | |
1668 | case BPF_JMP32 | BPF_JSGT | BPF_K: | |
1669 | case BPF_JMP32 | BPF_JSGE | BPF_K: | |
1670 | case BPF_JMP32 | BPF_JSET | BPF_K: | |
1671 | case BPF_JMP32 | BPF_JLT | BPF_K: | |
1672 | case BPF_JMP32 | BPF_JLE | BPF_K: | |
1673 | case BPF_JMP32 | BPF_JSLT | BPF_K: | |
1674 | case BPF_JMP32 | BPF_JSLE | BPF_K: | |
39c13c20 | 1675 | if (off == 0) |
ddecdfce | 1676 | break; |
39c13c20 SB |
1677 | rm = tmp2[0]; |
1678 | rn = tmp2[1]; | |
1679 | /* Sign-extend immediate value */ | |
f9ff5018 | 1680 | emit_a32_mov_se_i64(true, tmp2, imm, ctx); |
39c13c20 SB |
1681 | go_jmp: |
1682 | /* Setup destination register */ | |
a6eccac5 | 1683 | rd = arm_bpf_get_reg64(dst, tmp, ctx); |
39c13c20 SB |
1684 | |
1685 | /* Check for the condition */ | |
b85062ac JW |
1686 | emit_ar_r(rd[0], rd[1], rm, rn, ctx, BPF_OP(code), |
1687 | BPF_CLASS(code) == BPF_JMP); | |
39c13c20 SB |
1688 | |
1689 | /* Setup JUMP instruction */ | |
1690 | jmp_offset = bpf2a32_offset(i+off, i, ctx); | |
1691 | switch (BPF_OP(code)) { | |
1692 | case BPF_JNE: | |
1693 | case BPF_JSET: | |
1694 | _emit(ARM_COND_NE, ARM_B(jmp_offset), ctx); | |
ddecdfce | 1695 | break; |
39c13c20 SB |
1696 | case BPF_JEQ: |
1697 | _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); | |
ddecdfce | 1698 | break; |
39c13c20 SB |
1699 | case BPF_JGT: |
1700 | _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx); | |
ddecdfce | 1701 | break; |
39c13c20 SB |
1702 | case BPF_JGE: |
1703 | _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); | |
ddecdfce | 1704 | break; |
39c13c20 SB |
1705 | case BPF_JSGT: |
1706 | _emit(ARM_COND_LT, ARM_B(jmp_offset), ctx); | |
ddecdfce | 1707 | break; |
39c13c20 SB |
1708 | case BPF_JSGE: |
1709 | _emit(ARM_COND_GE, ARM_B(jmp_offset), ctx); | |
ddecdfce | 1710 | break; |
39c13c20 SB |
1711 | case BPF_JLE: |
1712 | _emit(ARM_COND_LS, ARM_B(jmp_offset), ctx); | |
ddecdfce | 1713 | break; |
39c13c20 SB |
1714 | case BPF_JLT: |
1715 | _emit(ARM_COND_CC, ARM_B(jmp_offset), ctx); | |
ddecdfce | 1716 | break; |
39c13c20 SB |
1717 | case BPF_JSLT: |
1718 | _emit(ARM_COND_LT, ARM_B(jmp_offset), ctx); | |
ddecdfce | 1719 | break; |
39c13c20 SB |
1720 | case BPF_JSLE: |
1721 | _emit(ARM_COND_GE, ARM_B(jmp_offset), ctx); | |
bf0098f2 | 1722 | break; |
39c13c20 SB |
1723 | } |
1724 | break; | |
1725 | /* JMP OFF */ | |
1726 | case BPF_JMP | BPF_JA: | |
1727 | { | |
1728 | if (off == 0) | |
1447f93f | 1729 | break; |
39c13c20 SB |
1730 | jmp_offset = bpf2a32_offset(i+off, i, ctx); |
1731 | check_imm24(jmp_offset); | |
1732 | emit(ARM_B(jmp_offset), ctx); | |
1733 | break; | |
1734 | } | |
1735 | /* tail call */ | |
1736 | case BPF_JMP | BPF_TAIL_CALL: | |
1737 | if (emit_bpf_tail_call(ctx)) | |
1738 | return -EFAULT; | |
1739 | break; | |
1740 | /* function call */ | |
1741 | case BPF_JMP | BPF_CALL: | |
1742 | { | |
1c35ba12 RK |
1743 | const s8 *r0 = bpf2a32[BPF_REG_0]; |
1744 | const s8 *r1 = bpf2a32[BPF_REG_1]; | |
1745 | const s8 *r2 = bpf2a32[BPF_REG_2]; | |
1746 | const s8 *r3 = bpf2a32[BPF_REG_3]; | |
1747 | const s8 *r4 = bpf2a32[BPF_REG_4]; | |
1748 | const s8 *r5 = bpf2a32[BPF_REG_5]; | |
39c13c20 SB |
1749 | const u32 func = (u32)__bpf_call_base + (u32)imm; |
1750 | ||
47b9c3bf RK |
1751 | emit_a32_mov_r64(true, r0, r1, ctx); |
1752 | emit_a32_mov_r64(true, r1, r2, ctx); | |
96cced4e RK |
1753 | emit_push_r64(r5, ctx); |
1754 | emit_push_r64(r4, ctx); | |
1755 | emit_push_r64(r3, ctx); | |
39c13c20 | 1756 | |
47b9c3bf | 1757 | emit_a32_mov_i(tmp[1], func, ctx); |
39c13c20 SB |
1758 | emit_blx_r(tmp[1], ctx); |
1759 | ||
1760 | emit(ARM_ADD_I(ARM_SP, ARM_SP, imm8m(24)), ctx); // callee clean | |
1761 | break; | |
1762 | } | |
1763 | /* function return */ | |
1764 | case BPF_JMP | BPF_EXIT: | |
1765 | /* Optimization: when last instruction is EXIT | |
1766 | * simply fallthrough to epilogue. | |
1767 | */ | |
1768 | if (i == ctx->prog->len - 1) | |
ddecdfce | 1769 | break; |
39c13c20 SB |
1770 | jmp_offset = epilogue_offset(ctx); |
1771 | check_imm24(jmp_offset); | |
1772 | emit(ARM_B(jmp_offset), ctx); | |
1773 | break; | |
1774 | notyet: | |
1775 | pr_info_once("*** NOT YET: opcode %02x ***\n", code); | |
1776 | return -EFAULT; | |
1777 | default: | |
1778 | pr_err_once("unknown opcode %02x\n", code); | |
1779 | return -EINVAL; | |
1780 | } | |
303249ab | 1781 | |
39c13c20 SB |
1782 | if (ctx->flags & FLAG_IMM_OVERFLOW) |
1783 | /* | |
1784 | * this instruction generated an overflow when | |
1785 | * trying to access the literal pool, so | |
1786 | * delegate this filter to the kernel interpreter. | |
1787 | */ | |
1788 | return -1; | |
1789 | return 0; | |
1790 | } | |
1791 | ||
1792 | static int build_body(struct jit_ctx *ctx) | |
1793 | { | |
1794 | const struct bpf_prog *prog = ctx->prog; | |
1795 | unsigned int i; | |
1796 | ||
1797 | for (i = 0; i < prog->len; i++) { | |
1798 | const struct bpf_insn *insn = &(prog->insnsi[i]); | |
1799 | int ret; | |
1800 | ||
1801 | ret = build_insn(insn, ctx); | |
1802 | ||
1803 | /* It's used with loading the 64 bit immediate value. */ | |
1804 | if (ret > 0) { | |
1805 | i++; | |
1806 | if (ctx->target == NULL) | |
1807 | ctx->offsets[i] = ctx->idx; | |
1808 | continue; | |
ddecdfce | 1809 | } |
0b59d880 | 1810 | |
39c13c20 SB |
1811 | if (ctx->target == NULL) |
1812 | ctx->offsets[i] = ctx->idx; | |
1813 | ||
1814 | /* If unsuccesfull, return with error code */ | |
1815 | if (ret) | |
1816 | return ret; | |
ddecdfce | 1817 | } |
39c13c20 SB |
1818 | return 0; |
1819 | } | |
ddecdfce | 1820 | |
39c13c20 SB |
1821 | static int validate_code(struct jit_ctx *ctx) |
1822 | { | |
1823 | int i; | |
1824 | ||
1825 | for (i = 0; i < ctx->idx; i++) { | |
1826 | if (ctx->target[i] == __opcode_to_mem_arm(ARM_INST_UDF)) | |
1827 | return -1; | |
1828 | } | |
ddecdfce MG |
1829 | |
1830 | return 0; | |
1831 | } | |
1832 | ||
39c13c20 SB |
1833 | void bpf_jit_compile(struct bpf_prog *prog) |
1834 | { | |
1835 | /* Nothing to do here. We support Internal BPF. */ | |
1836 | } | |
ddecdfce | 1837 | |
39c13c20 | 1838 | struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) |
ddecdfce | 1839 | { |
39c13c20 | 1840 | struct bpf_prog *tmp, *orig_prog = prog; |
55309dd3 | 1841 | struct bpf_binary_header *header; |
39c13c20 | 1842 | bool tmp_blinded = false; |
ddecdfce | 1843 | struct jit_ctx ctx; |
39c13c20 SB |
1844 | unsigned int tmp_idx; |
1845 | unsigned int image_size; | |
1846 | u8 *image_ptr; | |
ddecdfce | 1847 | |
39c13c20 SB |
1848 | /* If BPF JIT was not enabled then we must fall back to |
1849 | * the interpreter. | |
1850 | */ | |
60b58afc | 1851 | if (!prog->jit_requested) |
39c13c20 | 1852 | return orig_prog; |
ddecdfce | 1853 | |
39c13c20 SB |
1854 | /* If constant blinding was enabled and we failed during blinding |
1855 | * then we must fall back to the interpreter. Otherwise, we save | |
1856 | * the new JITed code. | |
1857 | */ | |
1858 | tmp = bpf_jit_blind_constants(prog); | |
ddecdfce | 1859 | |
39c13c20 SB |
1860 | if (IS_ERR(tmp)) |
1861 | return orig_prog; | |
1862 | if (tmp != prog) { | |
1863 | tmp_blinded = true; | |
1864 | prog = tmp; | |
1865 | } | |
ddecdfce | 1866 | |
39c13c20 SB |
1867 | memset(&ctx, 0, sizeof(ctx)); |
1868 | ctx.prog = prog; | |
8c9602d3 | 1869 | ctx.cpu_architecture = cpu_architecture(); |
39c13c20 SB |
1870 | |
1871 | /* Not able to allocate memory for offsets[] , then | |
1872 | * we must fall back to the interpreter | |
1873 | */ | |
1874 | ctx.offsets = kcalloc(prog->len, sizeof(int), GFP_KERNEL); | |
1875 | if (ctx.offsets == NULL) { | |
1876 | prog = orig_prog; | |
ddecdfce | 1877 | goto out; |
39c13c20 SB |
1878 | } |
1879 | ||
1880 | /* 1) fake pass to find in the length of the JITed code, | |
1881 | * to compute ctx->offsets and other context variables | |
1882 | * needed to compute final JITed code. | |
1883 | * Also, calculate random starting pointer/start of JITed code | |
1884 | * which is prefixed by random number of fault instructions. | |
1885 | * | |
1886 | * If the first pass fails then there is no chance of it | |
1887 | * being successful in the second pass, so just fall back | |
1888 | * to the interpreter. | |
1889 | */ | |
1890 | if (build_body(&ctx)) { | |
1891 | prog = orig_prog; | |
1892 | goto out_off; | |
1893 | } | |
ddecdfce MG |
1894 | |
1895 | tmp_idx = ctx.idx; | |
1896 | build_prologue(&ctx); | |
1897 | ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4; | |
1898 | ||
39c13c20 SB |
1899 | ctx.epilogue_offset = ctx.idx; |
1900 | ||
ddecdfce MG |
1901 | #if __LINUX_ARM_ARCH__ < 7 |
1902 | tmp_idx = ctx.idx; | |
1903 | build_epilogue(&ctx); | |
1904 | ctx.epilogue_bytes = (ctx.idx - tmp_idx) * 4; | |
1905 | ||
1906 | ctx.idx += ctx.imm_count; | |
1907 | if (ctx.imm_count) { | |
39c13c20 SB |
1908 | ctx.imms = kcalloc(ctx.imm_count, sizeof(u32), GFP_KERNEL); |
1909 | if (ctx.imms == NULL) { | |
1910 | prog = orig_prog; | |
1911 | goto out_off; | |
1912 | } | |
ddecdfce MG |
1913 | } |
1914 | #else | |
39c13c20 | 1915 | /* there's nothing about the epilogue on ARMv7 */ |
ddecdfce MG |
1916 | build_epilogue(&ctx); |
1917 | #endif | |
39c13c20 SB |
1918 | /* Now we can get the actual image size of the JITed arm code. |
1919 | * Currently, we are not considering the THUMB-2 instructions | |
1920 | * for jit, although it can decrease the size of the image. | |
1921 | * | |
1922 | * As each arm instruction is of length 32bit, we are translating | |
1923 | * number of JITed intructions into the size required to store these | |
1924 | * JITed code. | |
1925 | */ | |
1926 | image_size = sizeof(u32) * ctx.idx; | |
ddecdfce | 1927 | |
39c13c20 SB |
1928 | /* Now we know the size of the structure to make */ |
1929 | header = bpf_jit_binary_alloc(image_size, &image_ptr, | |
1930 | sizeof(u32), jit_fill_hole); | |
1931 | /* Not able to allocate memory for the structure then | |
1932 | * we must fall back to the interpretation | |
1933 | */ | |
1934 | if (header == NULL) { | |
1935 | prog = orig_prog; | |
1936 | goto out_imms; | |
1937 | } | |
1938 | ||
1939 | /* 2.) Actual pass to generate final JIT code */ | |
1940 | ctx.target = (u32 *) image_ptr; | |
ddecdfce | 1941 | ctx.idx = 0; |
55309dd3 | 1942 | |
ddecdfce | 1943 | build_prologue(&ctx); |
39c13c20 SB |
1944 | |
1945 | /* If building the body of the JITed code fails somehow, | |
1946 | * we fall back to the interpretation. | |
1947 | */ | |
0b59d880 | 1948 | if (build_body(&ctx) < 0) { |
39c13c20 | 1949 | image_ptr = NULL; |
0b59d880 | 1950 | bpf_jit_binary_free(header); |
39c13c20 SB |
1951 | prog = orig_prog; |
1952 | goto out_imms; | |
0b59d880 | 1953 | } |
ddecdfce MG |
1954 | build_epilogue(&ctx); |
1955 | ||
39c13c20 SB |
1956 | /* 3.) Extra pass to validate JITed Code */ |
1957 | if (validate_code(&ctx)) { | |
1958 | image_ptr = NULL; | |
1959 | bpf_jit_binary_free(header); | |
1960 | prog = orig_prog; | |
1961 | goto out_imms; | |
1962 | } | |
ebaef649 | 1963 | flush_icache_range((u32)header, (u32)(ctx.target + ctx.idx)); |
ddecdfce | 1964 | |
ddecdfce | 1965 | if (bpf_jit_enable > 1) |
79617801 | 1966 | /* there are 2 passes here */ |
39c13c20 | 1967 | bpf_jit_dump(prog->len, image_size, 2, ctx.target); |
ddecdfce | 1968 | |
18d405af | 1969 | bpf_jit_binary_lock_ro(header); |
39c13c20 SB |
1970 | prog->bpf_func = (void *)ctx.target; |
1971 | prog->jited = 1; | |
1972 | prog->jited_len = image_size; | |
1973 | ||
1974 | out_imms: | |
1975 | #if __LINUX_ARM_ARCH__ < 7 | |
1976 | if (ctx.imm_count) | |
1977 | kfree(ctx.imms); | |
1978 | #endif | |
1979 | out_off: | |
ddecdfce | 1980 | kfree(ctx.offsets); |
39c13c20 SB |
1981 | out: |
1982 | if (tmp_blinded) | |
1983 | bpf_jit_prog_release_other(prog, prog == orig_prog ? | |
1984 | tmp : orig_prog); | |
1985 | return prog; | |
ddecdfce MG |
1986 | } |
1987 |