]>
Commit | Line | Data |
---|---|---|
ddecdfce | 1 | /* |
39c13c20 | 2 | * Just-In-Time compiler for eBPF filters on 32bit ARM |
ddecdfce | 3 | * |
39c13c20 | 4 | * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> |
ddecdfce MG |
5 | * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify it | |
8 | * under the terms of the GNU General Public License as published by the | |
9 | * Free Software Foundation; version 2 of the License. | |
10 | */ | |
11 | ||
39c13c20 | 12 | #include <linux/bpf.h> |
ddecdfce MG |
13 | #include <linux/bitops.h> |
14 | #include <linux/compiler.h> | |
15 | #include <linux/errno.h> | |
16 | #include <linux/filter.h> | |
ddecdfce MG |
17 | #include <linux/netdevice.h> |
18 | #include <linux/string.h> | |
19 | #include <linux/slab.h> | |
bf0098f2 | 20 | #include <linux/if_vlan.h> |
e8b56d55 | 21 | |
ddecdfce MG |
22 | #include <asm/cacheflush.h> |
23 | #include <asm/hwcap.h> | |
3460743e | 24 | #include <asm/opcodes.h> |
ddecdfce MG |
25 | |
26 | #include "bpf_jit_32.h" | |
27 | ||
70ec3a6c | 28 | /* |
0005e55a | 29 | * eBPF prog stack layout: |
70ec3a6c RK |
30 | * |
31 | * high | |
0005e55a RK |
32 | * original ARM_SP => +-----+ |
33 | * | | callee saved registers | |
34 | * +-----+ <= (BPF_FP + SCRATCH_SIZE) | |
70ec3a6c | 35 | * | ... | eBPF JIT scratch space |
0005e55a RK |
36 | * eBPF fp register => +-----+ |
37 | * (BPF_FP) | ... | eBPF prog stack | |
70ec3a6c RK |
38 | * +-----+ |
39 | * |RSVD | JIT scratchpad | |
0005e55a | 40 | * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE) |
70ec3a6c RK |
41 | * | | |
42 | * | ... | Function call stack | |
43 | * | | | |
44 | * +-----+ | |
45 | * low | |
0005e55a RK |
46 | * |
47 | * The callee saved registers depends on whether frame pointers are enabled. | |
48 | * With frame pointers (to be compliant with the ABI): | |
49 | * | |
50 | * high | |
51 | * original ARM_SP => +------------------+ \ | |
52 | * | pc | | | |
53 | * current ARM_FP => +------------------+ } callee saved registers | |
54 | * |r4-r8,r10,fp,ip,lr| | | |
55 | * +------------------+ / | |
56 | * low | |
57 | * | |
58 | * Without frame pointers: | |
59 | * | |
60 | * high | |
61 | * original ARM_SP => +------------------+ | |
02088d9b RK |
62 | * | r4-r8,r10,fp,lr | callee saved registers |
63 | * current ARM_FP => +------------------+ | |
0005e55a | 64 | * low |
02088d9b RK |
65 | * |
66 | * When popping registers off the stack at the end of a BPF function, we | |
67 | * reference them via the current ARM_FP register. | |
70ec3a6c | 68 | */ |
02088d9b RK |
69 | #define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \ |
70 | 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R10 | \ | |
71 | 1 << ARM_FP) | |
72 | #define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) | |
73 | #define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC) | |
70ec3a6c | 74 | |
39c13c20 SB |
75 | #define STACK_OFFSET(k) (k) |
76 | #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ | |
77 | #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */ | |
78 | #define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */ | |
79 | ||
39c13c20 SB |
80 | #define FLAG_IMM_OVERFLOW (1 << 0) |
81 | ||
ddecdfce | 82 | /* |
39c13c20 SB |
83 | * Map eBPF registers to ARM 32bit registers or stack scratch space. |
84 | * | |
85 | * 1. First argument is passed using the arm 32bit registers and rest of the | |
86 | * arguments are passed on stack scratch space. | |
87 | * 2. First callee-saved arugument is mapped to arm 32 bit registers and rest | |
88 | * arguments are mapped to scratch space on stack. | |
89 | * 3. We need two 64 bit temp registers to do complex operations on eBPF | |
90 | * registers. | |
91 | * | |
92 | * As the eBPF registers are all 64 bit registers and arm has only 32 bit | |
93 | * registers, we have to map each eBPF registers with two arm 32 bit regs or | |
94 | * scratch memory space and we have to build eBPF 64 bit register from those. | |
ddecdfce | 95 | * |
ddecdfce | 96 | */ |
39c13c20 SB |
97 | static const u8 bpf2a32[][2] = { |
98 | /* return value from in-kernel function, and exit value from eBPF */ | |
99 | [BPF_REG_0] = {ARM_R1, ARM_R0}, | |
100 | /* arguments from eBPF program to in-kernel function */ | |
101 | [BPF_REG_1] = {ARM_R3, ARM_R2}, | |
102 | /* Stored on stack scratch space */ | |
103 | [BPF_REG_2] = {STACK_OFFSET(0), STACK_OFFSET(4)}, | |
104 | [BPF_REG_3] = {STACK_OFFSET(8), STACK_OFFSET(12)}, | |
105 | [BPF_REG_4] = {STACK_OFFSET(16), STACK_OFFSET(20)}, | |
106 | [BPF_REG_5] = {STACK_OFFSET(24), STACK_OFFSET(28)}, | |
107 | /* callee saved registers that in-kernel function will preserve */ | |
108 | [BPF_REG_6] = {ARM_R5, ARM_R4}, | |
109 | /* Stored on stack scratch space */ | |
110 | [BPF_REG_7] = {STACK_OFFSET(32), STACK_OFFSET(36)}, | |
111 | [BPF_REG_8] = {STACK_OFFSET(40), STACK_OFFSET(44)}, | |
112 | [BPF_REG_9] = {STACK_OFFSET(48), STACK_OFFSET(52)}, | |
113 | /* Read only Frame Pointer to access Stack */ | |
114 | [BPF_REG_FP] = {STACK_OFFSET(56), STACK_OFFSET(60)}, | |
115 | /* Temporary Register for internal BPF JIT, can be used | |
116 | * for constant blindings and others. | |
117 | */ | |
118 | [TMP_REG_1] = {ARM_R7, ARM_R6}, | |
119 | [TMP_REG_2] = {ARM_R10, ARM_R8}, | |
120 | /* Tail call count. Stored on stack scratch space. */ | |
121 | [TCALL_CNT] = {STACK_OFFSET(64), STACK_OFFSET(68)}, | |
122 | /* temporary register for blinding constants. | |
123 | * Stored on stack scratch space. | |
124 | */ | |
125 | [BPF_REG_AX] = {STACK_OFFSET(72), STACK_OFFSET(76)}, | |
126 | }; | |
ddecdfce | 127 | |
39c13c20 SB |
128 | #define dst_lo dst[1] |
129 | #define dst_hi dst[0] | |
130 | #define src_lo src[1] | |
131 | #define src_hi src[0] | |
ddecdfce | 132 | |
39c13c20 SB |
133 | /* |
134 | * JIT Context: | |
135 | * | |
136 | * prog : bpf_prog | |
137 | * idx : index of current last JITed instruction. | |
138 | * prologue_bytes : bytes used in prologue. | |
139 | * epilogue_offset : offset of epilogue starting. | |
39c13c20 SB |
140 | * offsets : array of eBPF instruction offsets in |
141 | * JITed code. | |
142 | * target : final JITed code. | |
143 | * epilogue_bytes : no of bytes used in epilogue. | |
144 | * imm_count : no of immediate counts used for global | |
145 | * variables. | |
146 | * imms : array of global variable addresses. | |
147 | */ | |
ddecdfce MG |
148 | |
149 | struct jit_ctx { | |
39c13c20 SB |
150 | const struct bpf_prog *prog; |
151 | unsigned int idx; | |
152 | unsigned int prologue_bytes; | |
153 | unsigned int epilogue_offset; | |
ddecdfce MG |
154 | u32 flags; |
155 | u32 *offsets; | |
156 | u32 *target; | |
39c13c20 | 157 | u32 stack_size; |
ddecdfce MG |
158 | #if __LINUX_ARM_ARCH__ < 7 |
159 | u16 epilogue_bytes; | |
160 | u16 imm_count; | |
161 | u32 *imms; | |
162 | #endif | |
163 | }; | |
164 | ||
ddecdfce | 165 | /* |
4560cdff | 166 | * Wrappers which handle both OABI and EABI and assures Thumb2 interworking |
ddecdfce MG |
167 | * (where the assembly routines like __aeabi_uidiv could cause problems). |
168 | */ | |
39c13c20 | 169 | static u32 jit_udiv32(u32 dividend, u32 divisor) |
ddecdfce MG |
170 | { |
171 | return dividend / divisor; | |
172 | } | |
173 | ||
39c13c20 | 174 | static u32 jit_mod32(u32 dividend, u32 divisor) |
4560cdff NS |
175 | { |
176 | return dividend % divisor; | |
177 | } | |
178 | ||
ddecdfce MG |
179 | static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) |
180 | { | |
3460743e BD |
181 | inst |= (cond << 28); |
182 | inst = __opcode_to_mem_arm(inst); | |
183 | ||
ddecdfce | 184 | if (ctx->target != NULL) |
3460743e | 185 | ctx->target[ctx->idx] = inst; |
ddecdfce MG |
186 | |
187 | ctx->idx++; | |
188 | } | |
189 | ||
190 | /* | |
191 | * Emit an instruction that will be executed unconditionally. | |
192 | */ | |
193 | static inline void emit(u32 inst, struct jit_ctx *ctx) | |
194 | { | |
195 | _emit(ARM_COND_AL, inst, ctx); | |
196 | } | |
197 | ||
39c13c20 SB |
198 | /* |
199 | * Checks if immediate value can be converted to imm12(12 bits) value. | |
200 | */ | |
201 | static int16_t imm8m(u32 x) | |
ddecdfce | 202 | { |
39c13c20 | 203 | u32 rot; |
ddecdfce | 204 | |
39c13c20 SB |
205 | for (rot = 0; rot < 16; rot++) |
206 | if ((x & ~ror32(0xff, 2 * rot)) == 0) | |
207 | return rol32(x, 2 * rot) | (rot << 8); | |
208 | return -1; | |
ddecdfce MG |
209 | } |
210 | ||
39c13c20 SB |
211 | /* |
212 | * Initializes the JIT space with undefined instructions. | |
213 | */ | |
55309dd3 DB |
214 | static void jit_fill_hole(void *area, unsigned int size) |
215 | { | |
e8b56d55 | 216 | u32 *ptr; |
55309dd3 DB |
217 | /* We are guaranteed to have aligned memory. */ |
218 | for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) | |
e8b56d55 | 219 | *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF); |
55309dd3 DB |
220 | } |
221 | ||
d1220efd RK |
222 | #if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) |
223 | /* EABI requires the stack to be aligned to 64-bit boundaries */ | |
224 | #define STACK_ALIGNMENT 8 | |
225 | #else | |
226 | /* Stack must be aligned to 32-bit boundaries */ | |
227 | #define STACK_ALIGNMENT 4 | |
228 | #endif | |
ddecdfce | 229 | |
39c13c20 SB |
230 | /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4, |
231 | * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9, | |
232 | * BPF_REG_FP and Tail call counts. | |
233 | */ | |
234 | #define SCRATCH_SIZE 80 | |
ddecdfce | 235 | |
39c13c20 SB |
236 | /* total stack size used in JITed code */ |
237 | #define _STACK_SIZE \ | |
238 | (ctx->prog->aux->stack_depth + \ | |
239 | + SCRATCH_SIZE + \ | |
240 | + 4 /* extra for skb_copy_bits buffer */) | |
ddecdfce | 241 | |
d1220efd | 242 | #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) |
ddecdfce | 243 | |
39c13c20 SB |
244 | /* Get the offset of eBPF REGISTERs stored on scratch space. */ |
245 | #define STACK_VAR(off) (STACK_SIZE-off-4) | |
ddecdfce | 246 | |
39c13c20 SB |
247 | /* Offset of skb_copy_bits buffer */ |
248 | #define SKB_BUFFER STACK_VAR(SCRATCH_SIZE) | |
ddecdfce MG |
249 | |
250 | #if __LINUX_ARM_ARCH__ < 7 | |
251 | ||
252 | static u16 imm_offset(u32 k, struct jit_ctx *ctx) | |
253 | { | |
39c13c20 | 254 | unsigned int i = 0, offset; |
ddecdfce MG |
255 | u16 imm; |
256 | ||
257 | /* on the "fake" run we just count them (duplicates included) */ | |
258 | if (ctx->target == NULL) { | |
259 | ctx->imm_count++; | |
260 | return 0; | |
261 | } | |
262 | ||
263 | while ((i < ctx->imm_count) && ctx->imms[i]) { | |
264 | if (ctx->imms[i] == k) | |
265 | break; | |
266 | i++; | |
267 | } | |
268 | ||
269 | if (ctx->imms[i] == 0) | |
270 | ctx->imms[i] = k; | |
271 | ||
272 | /* constants go just after the epilogue */ | |
39c13c20 | 273 | offset = ctx->offsets[ctx->prog->len - 1] * 4; |
ddecdfce MG |
274 | offset += ctx->prologue_bytes; |
275 | offset += ctx->epilogue_bytes; | |
276 | offset += i * 4; | |
277 | ||
278 | ctx->target[offset / 4] = k; | |
279 | ||
280 | /* PC in ARM mode == address of the instruction + 8 */ | |
281 | imm = offset - (8 + ctx->idx * 4); | |
282 | ||
0b59d880 NS |
283 | if (imm & ~0xfff) { |
284 | /* | |
285 | * literal pool is too far, signal it into flags. we | |
286 | * can only detect it on the second pass unfortunately. | |
287 | */ | |
288 | ctx->flags |= FLAG_IMM_OVERFLOW; | |
289 | return 0; | |
290 | } | |
291 | ||
ddecdfce MG |
292 | return imm; |
293 | } | |
294 | ||
295 | #endif /* __LINUX_ARM_ARCH__ */ | |
296 | ||
39c13c20 SB |
297 | static inline int bpf2a32_offset(int bpf_to, int bpf_from, |
298 | const struct jit_ctx *ctx) { | |
299 | int to, from; | |
300 | ||
301 | if (ctx->target == NULL) | |
302 | return 0; | |
303 | to = ctx->offsets[bpf_to]; | |
304 | from = ctx->offsets[bpf_from]; | |
305 | ||
306 | return to - from - 1; | |
307 | } | |
308 | ||
ddecdfce MG |
309 | /* |
310 | * Move an immediate that's not an imm8m to a core register. | |
311 | */ | |
39c13c20 | 312 | static inline void emit_mov_i_no8m(const u8 rd, u32 val, struct jit_ctx *ctx) |
ddecdfce MG |
313 | { |
314 | #if __LINUX_ARM_ARCH__ < 7 | |
315 | emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx); | |
316 | #else | |
317 | emit(ARM_MOVW(rd, val & 0xffff), ctx); | |
318 | if (val > 0xffff) | |
319 | emit(ARM_MOVT(rd, val >> 16), ctx); | |
320 | #endif | |
321 | } | |
322 | ||
39c13c20 | 323 | static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx) |
ddecdfce MG |
324 | { |
325 | int imm12 = imm8m(val); | |
326 | ||
327 | if (imm12 >= 0) | |
328 | emit(ARM_MOV_I(rd, imm12), ctx); | |
329 | else | |
330 | emit_mov_i_no8m(rd, val, ctx); | |
331 | } | |
332 | ||
e9062481 | 333 | static void emit_bx_r(u8 tgt_reg, struct jit_ctx *ctx) |
ddecdfce | 334 | { |
39c13c20 SB |
335 | if (elf_hwcap & HWCAP_THUMB) |
336 | emit(ARM_BX(tgt_reg), ctx); | |
337 | else | |
338 | emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx); | |
e9062481 RK |
339 | } |
340 | ||
341 | static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) | |
342 | { | |
e9062481 RK |
343 | #if __LINUX_ARM_ARCH__ < 5 |
344 | emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); | |
345 | emit_bx_r(tgt_reg, ctx); | |
39c13c20 SB |
346 | #else |
347 | emit(ARM_BLX_R(tgt_reg), ctx); | |
348 | #endif | |
ddecdfce MG |
349 | } |
350 | ||
39c13c20 | 351 | static inline int epilogue_offset(const struct jit_ctx *ctx) |
ddecdfce | 352 | { |
39c13c20 SB |
353 | int to, from; |
354 | /* No need for 1st dummy run */ | |
355 | if (ctx->target == NULL) | |
356 | return 0; | |
357 | to = ctx->epilogue_offset; | |
358 | from = ctx->idx; | |
359 | ||
360 | return to - from - 2; | |
ddecdfce MG |
361 | } |
362 | ||
39c13c20 | 363 | static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) |
ddecdfce | 364 | { |
39c13c20 SB |
365 | const u8 *tmp = bpf2a32[TMP_REG_1]; |
366 | s32 jmp_offset; | |
367 | ||
368 | /* checks if divisor is zero or not. If it is, then | |
369 | * exit directly. | |
370 | */ | |
371 | emit(ARM_CMP_I(rn, 0), ctx); | |
372 | _emit(ARM_COND_EQ, ARM_MOV_I(ARM_R0, 0), ctx); | |
373 | jmp_offset = epilogue_offset(ctx); | |
374 | _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); | |
375 | #if __LINUX_ARM_ARCH__ == 7 | |
376 | if (elf_hwcap & HWCAP_IDIVA) { | |
377 | if (op == BPF_DIV) | |
378 | emit(ARM_UDIV(rd, rm, rn), ctx); | |
379 | else { | |
380 | emit(ARM_UDIV(ARM_IP, rm, rn), ctx); | |
381 | emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx); | |
382 | } | |
383 | return; | |
384 | } | |
385 | #endif | |
462738f4 NS |
386 | |
387 | /* | |
39c13c20 SB |
388 | * For BPF_ALU | BPF_DIV | BPF_K instructions |
389 | * As ARM_R1 and ARM_R0 contains 1st argument of bpf | |
390 | * function, we need to save it on caller side to save | |
391 | * it from getting destroyed within callee. | |
392 | * After the return from the callee, we restore ARM_R0 | |
393 | * ARM_R1. | |
462738f4 | 394 | */ |
39c13c20 SB |
395 | if (rn != ARM_R1) { |
396 | emit(ARM_MOV_R(tmp[0], ARM_R1), ctx); | |
397 | emit(ARM_MOV_R(ARM_R1, rn), ctx); | |
398 | } | |
399 | if (rm != ARM_R0) { | |
400 | emit(ARM_MOV_R(tmp[1], ARM_R0), ctx); | |
401 | emit(ARM_MOV_R(ARM_R0, rm), ctx); | |
402 | } | |
ddecdfce | 403 | |
39c13c20 | 404 | /* Call appropriate function */ |
39c13c20 SB |
405 | emit_mov_i(ARM_IP, op == BPF_DIV ? |
406 | (u32)jit_udiv32 : (u32)jit_mod32, ctx); | |
407 | emit_blx_r(ARM_IP, ctx); | |
ddecdfce | 408 | |
39c13c20 SB |
409 | /* Save return value */ |
410 | if (rd != ARM_R0) | |
411 | emit(ARM_MOV_R(rd, ARM_R0), ctx); | |
412 | ||
413 | /* Restore ARM_R0 and ARM_R1 */ | |
414 | if (rn != ARM_R1) | |
415 | emit(ARM_MOV_R(ARM_R1, tmp[0]), ctx); | |
416 | if (rm != ARM_R0) | |
417 | emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx); | |
ddecdfce MG |
418 | } |
419 | ||
39c13c20 SB |
420 | /* Checks whether BPF register is on scratch stack space or not. */ |
421 | static inline bool is_on_stack(u8 bpf_reg) | |
ddecdfce | 422 | { |
39c13c20 SB |
423 | static u8 stack_regs[] = {BPF_REG_AX, BPF_REG_3, BPF_REG_4, BPF_REG_5, |
424 | BPF_REG_7, BPF_REG_8, BPF_REG_9, TCALL_CNT, | |
425 | BPF_REG_2, BPF_REG_FP}; | |
426 | int i, reg_len = sizeof(stack_regs); | |
427 | ||
428 | for (i = 0 ; i < reg_len ; i++) { | |
429 | if (bpf_reg == stack_regs[i]) | |
430 | return true; | |
431 | } | |
432 | return false; | |
ddecdfce MG |
433 | } |
434 | ||
39c13c20 SB |
435 | static inline void emit_a32_mov_i(const u8 dst, const u32 val, |
436 | bool dstk, struct jit_ctx *ctx) | |
ddecdfce | 437 | { |
39c13c20 SB |
438 | const u8 *tmp = bpf2a32[TMP_REG_1]; |
439 | ||
440 | if (dstk) { | |
441 | emit_mov_i(tmp[1], val, ctx); | |
442 | emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(dst)), ctx); | |
443 | } else { | |
444 | emit_mov_i(dst, val, ctx); | |
445 | } | |
ddecdfce MG |
446 | } |
447 | ||
39c13c20 SB |
448 | /* Sign extended move */ |
449 | static inline void emit_a32_mov_i64(const bool is64, const u8 dst[], | |
450 | const u32 val, bool dstk, | |
451 | struct jit_ctx *ctx) { | |
452 | u32 hi = 0; | |
ddecdfce | 453 | |
39c13c20 SB |
454 | if (is64 && (val & (1<<31))) |
455 | hi = (u32)~0; | |
456 | emit_a32_mov_i(dst_lo, val, dstk, ctx); | |
457 | emit_a32_mov_i(dst_hi, hi, dstk, ctx); | |
458 | } | |
ddecdfce | 459 | |
39c13c20 SB |
460 | static inline void emit_a32_add_r(const u8 dst, const u8 src, |
461 | const bool is64, const bool hi, | |
462 | struct jit_ctx *ctx) { | |
463 | /* 64 bit : | |
464 | * adds dst_lo, dst_lo, src_lo | |
465 | * adc dst_hi, dst_hi, src_hi | |
466 | * 32 bit : | |
467 | * add dst_lo, dst_lo, src_lo | |
468 | */ | |
469 | if (!hi && is64) | |
470 | emit(ARM_ADDS_R(dst, dst, src), ctx); | |
471 | else if (hi && is64) | |
472 | emit(ARM_ADC_R(dst, dst, src), ctx); | |
473 | else | |
474 | emit(ARM_ADD_R(dst, dst, src), ctx); | |
475 | } | |
ddecdfce | 476 | |
39c13c20 SB |
477 | static inline void emit_a32_sub_r(const u8 dst, const u8 src, |
478 | const bool is64, const bool hi, | |
479 | struct jit_ctx *ctx) { | |
480 | /* 64 bit : | |
481 | * subs dst_lo, dst_lo, src_lo | |
482 | * sbc dst_hi, dst_hi, src_hi | |
483 | * 32 bit : | |
484 | * sub dst_lo, dst_lo, src_lo | |
ddecdfce | 485 | */ |
39c13c20 SB |
486 | if (!hi && is64) |
487 | emit(ARM_SUBS_R(dst, dst, src), ctx); | |
488 | else if (hi && is64) | |
489 | emit(ARM_SBC_R(dst, dst, src), ctx); | |
490 | else | |
491 | emit(ARM_SUB_R(dst, dst, src), ctx); | |
492 | } | |
ddecdfce | 493 | |
39c13c20 SB |
494 | static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64, |
495 | const bool hi, const u8 op, struct jit_ctx *ctx){ | |
496 | switch (BPF_OP(op)) { | |
497 | /* dst = dst + src */ | |
498 | case BPF_ADD: | |
499 | emit_a32_add_r(dst, src, is64, hi, ctx); | |
500 | break; | |
501 | /* dst = dst - src */ | |
502 | case BPF_SUB: | |
503 | emit_a32_sub_r(dst, src, is64, hi, ctx); | |
504 | break; | |
505 | /* dst = dst | src */ | |
506 | case BPF_OR: | |
507 | emit(ARM_ORR_R(dst, dst, src), ctx); | |
508 | break; | |
509 | /* dst = dst & src */ | |
510 | case BPF_AND: | |
511 | emit(ARM_AND_R(dst, dst, src), ctx); | |
512 | break; | |
513 | /* dst = dst ^ src */ | |
514 | case BPF_XOR: | |
515 | emit(ARM_EOR_R(dst, dst, src), ctx); | |
516 | break; | |
517 | /* dst = dst * src */ | |
518 | case BPF_MUL: | |
519 | emit(ARM_MUL(dst, dst, src), ctx); | |
520 | break; | |
521 | /* dst = dst << src */ | |
522 | case BPF_LSH: | |
523 | emit(ARM_LSL_R(dst, dst, src), ctx); | |
524 | break; | |
525 | /* dst = dst >> src */ | |
526 | case BPF_RSH: | |
527 | emit(ARM_LSR_R(dst, dst, src), ctx); | |
528 | break; | |
529 | /* dst = dst >> src (signed)*/ | |
530 | case BPF_ARSH: | |
531 | emit(ARM_MOV_SR(dst, dst, SRTYPE_ASR, src), ctx); | |
532 | break; | |
533 | } | |
ddecdfce MG |
534 | } |
535 | ||
39c13c20 SB |
536 | /* ALU operation (32 bit) |
537 | * dst = dst (op) src | |
538 | */ | |
539 | static inline void emit_a32_alu_r(const u8 dst, const u8 src, | |
540 | bool dstk, bool sstk, | |
541 | struct jit_ctx *ctx, const bool is64, | |
542 | const bool hi, const u8 op) { | |
543 | const u8 *tmp = bpf2a32[TMP_REG_1]; | |
544 | u8 rn = sstk ? tmp[1] : src; | |
545 | ||
546 | if (sstk) | |
547 | emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src)), ctx); | |
548 | ||
549 | /* ALU operation */ | |
550 | if (dstk) { | |
551 | emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx); | |
552 | emit_alu_r(tmp[0], rn, is64, hi, op, ctx); | |
553 | emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx); | |
ddecdfce | 554 | } else { |
39c13c20 | 555 | emit_alu_r(dst, rn, is64, hi, op, ctx); |
ddecdfce MG |
556 | } |
557 | } | |
558 | ||
39c13c20 SB |
559 | /* ALU operation (64 bit) */ |
560 | static inline void emit_a32_alu_r64(const bool is64, const u8 dst[], | |
561 | const u8 src[], bool dstk, | |
562 | bool sstk, struct jit_ctx *ctx, | |
563 | const u8 op) { | |
564 | emit_a32_alu_r(dst_lo, src_lo, dstk, sstk, ctx, is64, false, op); | |
565 | if (is64) | |
566 | emit_a32_alu_r(dst_hi, src_hi, dstk, sstk, ctx, is64, true, op); | |
567 | else | |
568 | emit_a32_mov_i(dst_hi, 0, dstk, ctx); | |
569 | } | |
ddecdfce | 570 | |
39c13c20 SB |
571 | /* dst = imm (4 bytes)*/ |
572 | static inline void emit_a32_mov_r(const u8 dst, const u8 src, | |
573 | bool dstk, bool sstk, | |
574 | struct jit_ctx *ctx) { | |
575 | const u8 *tmp = bpf2a32[TMP_REG_1]; | |
576 | u8 rt = sstk ? tmp[0] : src; | |
577 | ||
578 | if (sstk) | |
579 | emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(src)), ctx); | |
580 | if (dstk) | |
581 | emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst)), ctx); | |
ddecdfce | 582 | else |
39c13c20 | 583 | emit(ARM_MOV_R(dst, rt), ctx); |
ddecdfce MG |
584 | } |
585 | ||
39c13c20 SB |
586 | /* dst = src */ |
587 | static inline void emit_a32_mov_r64(const bool is64, const u8 dst[], | |
588 | const u8 src[], bool dstk, | |
589 | bool sstk, struct jit_ctx *ctx) { | |
590 | emit_a32_mov_r(dst_lo, src_lo, dstk, sstk, ctx); | |
591 | if (is64) { | |
592 | /* complete 8 byte move */ | |
593 | emit_a32_mov_r(dst_hi, src_hi, dstk, sstk, ctx); | |
594 | } else { | |
595 | /* Zero out high 4 bytes */ | |
596 | emit_a32_mov_i(dst_hi, 0, dstk, ctx); | |
ddecdfce | 597 | } |
39c13c20 | 598 | } |
19fc99d0 | 599 | |
39c13c20 SB |
600 | /* Shift operations */ |
601 | static inline void emit_a32_alu_i(const u8 dst, const u32 val, bool dstk, | |
602 | struct jit_ctx *ctx, const u8 op) { | |
603 | const u8 *tmp = bpf2a32[TMP_REG_1]; | |
604 | u8 rd = dstk ? tmp[0] : dst; | |
605 | ||
606 | if (dstk) | |
607 | emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); | |
608 | ||
609 | /* Do shift operation */ | |
610 | switch (op) { | |
611 | case BPF_LSH: | |
612 | emit(ARM_LSL_I(rd, rd, val), ctx); | |
613 | break; | |
614 | case BPF_RSH: | |
615 | emit(ARM_LSR_I(rd, rd, val), ctx); | |
616 | break; | |
617 | case BPF_NEG: | |
618 | emit(ARM_RSB_I(rd, rd, val), ctx); | |
619 | break; | |
620 | } | |
ddecdfce | 621 | |
39c13c20 SB |
622 | if (dstk) |
623 | emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); | |
624 | } | |
625 | ||
626 | /* dst = ~dst (64 bit) */ | |
627 | static inline void emit_a32_neg64(const u8 dst[], bool dstk, | |
628 | struct jit_ctx *ctx){ | |
629 | const u8 *tmp = bpf2a32[TMP_REG_1]; | |
630 | u8 rd = dstk ? tmp[1] : dst[1]; | |
631 | u8 rm = dstk ? tmp[0] : dst[0]; | |
632 | ||
633 | /* Setup Operand */ | |
634 | if (dstk) { | |
635 | emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); | |
636 | emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); | |
637 | } | |
638 | ||
639 | /* Do Negate Operation */ | |
640 | emit(ARM_RSBS_I(rd, rd, 0), ctx); | |
641 | emit(ARM_RSC_I(rm, rm, 0), ctx); | |
642 | ||
643 | if (dstk) { | |
644 | emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); | |
645 | emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); | |
646 | } | |
647 | } | |
648 | ||
649 | /* dst = dst << src */ | |
650 | static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk, | |
651 | bool sstk, struct jit_ctx *ctx) { | |
652 | const u8 *tmp = bpf2a32[TMP_REG_1]; | |
653 | const u8 *tmp2 = bpf2a32[TMP_REG_2]; | |
654 | ||
655 | /* Setup Operands */ | |
656 | u8 rt = sstk ? tmp2[1] : src_lo; | |
657 | u8 rd = dstk ? tmp[1] : dst_lo; | |
658 | u8 rm = dstk ? tmp[0] : dst_hi; | |
659 | ||
660 | if (sstk) | |
661 | emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); | |
662 | if (dstk) { | |
663 | emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); | |
664 | emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); | |
665 | } | |
666 | ||
667 | /* Do LSH operation */ | |
668 | emit(ARM_SUB_I(ARM_IP, rt, 32), ctx); | |
669 | emit(ARM_RSB_I(tmp2[0], rt, 32), ctx); | |
39c13c20 SB |
670 | emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx); |
671 | emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx); | |
672 | emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx); | |
673 | emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_ASL, rt), ctx); | |
674 | ||
675 | if (dstk) { | |
676 | emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); | |
677 | emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); | |
678 | } else { | |
679 | emit(ARM_MOV_R(rd, ARM_LR), ctx); | |
680 | emit(ARM_MOV_R(rm, ARM_IP), ctx); | |
681 | } | |
682 | } | |
ddecdfce | 683 | |
39c13c20 SB |
684 | /* dst = dst >> src (signed)*/ |
685 | static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk, | |
686 | bool sstk, struct jit_ctx *ctx) { | |
687 | const u8 *tmp = bpf2a32[TMP_REG_1]; | |
688 | const u8 *tmp2 = bpf2a32[TMP_REG_2]; | |
689 | /* Setup Operands */ | |
690 | u8 rt = sstk ? tmp2[1] : src_lo; | |
691 | u8 rd = dstk ? tmp[1] : dst_lo; | |
692 | u8 rm = dstk ? tmp[0] : dst_hi; | |
693 | ||
694 | if (sstk) | |
695 | emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); | |
696 | if (dstk) { | |
697 | emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); | |
698 | emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); | |
699 | } | |
700 | ||
701 | /* Do the ARSH operation */ | |
702 | emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); | |
703 | emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); | |
39c13c20 SB |
704 | emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); |
705 | emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); | |
706 | _emit(ARM_COND_MI, ARM_B(0), ctx); | |
707 | emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASR, tmp2[0]), ctx); | |
708 | emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_ASR, rt), ctx); | |
709 | if (dstk) { | |
710 | emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); | |
711 | emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); | |
712 | } else { | |
713 | emit(ARM_MOV_R(rd, ARM_LR), ctx); | |
714 | emit(ARM_MOV_R(rm, ARM_IP), ctx); | |
715 | } | |
716 | } | |
717 | ||
718 | /* dst = dst >> src */ | |
d1ab73c4 | 719 | static inline void emit_a32_rsh_r64(const u8 dst[], const u8 src[], bool dstk, |
39c13c20 SB |
720 | bool sstk, struct jit_ctx *ctx) { |
721 | const u8 *tmp = bpf2a32[TMP_REG_1]; | |
722 | const u8 *tmp2 = bpf2a32[TMP_REG_2]; | |
723 | /* Setup Operands */ | |
724 | u8 rt = sstk ? tmp2[1] : src_lo; | |
725 | u8 rd = dstk ? tmp[1] : dst_lo; | |
726 | u8 rm = dstk ? tmp[0] : dst_hi; | |
727 | ||
728 | if (sstk) | |
729 | emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); | |
730 | if (dstk) { | |
731 | emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); | |
732 | emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); | |
733 | } | |
734 | ||
d1ab73c4 | 735 | /* Do RSH operation */ |
39c13c20 SB |
736 | emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); |
737 | emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); | |
39c13c20 SB |
738 | emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); |
739 | emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); | |
740 | emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx); | |
741 | emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_LSR, rt), ctx); | |
742 | if (dstk) { | |
743 | emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); | |
744 | emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); | |
745 | } else { | |
746 | emit(ARM_MOV_R(rd, ARM_LR), ctx); | |
747 | emit(ARM_MOV_R(rm, ARM_IP), ctx); | |
748 | } | |
ddecdfce MG |
749 | } |
750 | ||
39c13c20 SB |
751 | /* dst = dst << val */ |
752 | static inline void emit_a32_lsh_i64(const u8 dst[], bool dstk, | |
753 | const u32 val, struct jit_ctx *ctx){ | |
754 | const u8 *tmp = bpf2a32[TMP_REG_1]; | |
755 | const u8 *tmp2 = bpf2a32[TMP_REG_2]; | |
756 | /* Setup operands */ | |
757 | u8 rd = dstk ? tmp[1] : dst_lo; | |
758 | u8 rm = dstk ? tmp[0] : dst_hi; | |
759 | ||
760 | if (dstk) { | |
761 | emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); | |
762 | emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); | |
763 | } | |
764 | ||
765 | /* Do LSH operation */ | |
766 | if (val < 32) { | |
767 | emit(ARM_MOV_SI(tmp2[0], rm, SRTYPE_ASL, val), ctx); | |
768 | emit(ARM_ORR_SI(rm, tmp2[0], rd, SRTYPE_LSR, 32 - val), ctx); | |
769 | emit(ARM_MOV_SI(rd, rd, SRTYPE_ASL, val), ctx); | |
770 | } else { | |
771 | if (val == 32) | |
772 | emit(ARM_MOV_R(rm, rd), ctx); | |
773 | else | |
774 | emit(ARM_MOV_SI(rm, rd, SRTYPE_ASL, val - 32), ctx); | |
775 | emit(ARM_EOR_R(rd, rd, rd), ctx); | |
776 | } | |
777 | ||
778 | if (dstk) { | |
779 | emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); | |
780 | emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); | |
781 | } | |
782 | } | |
783 | ||
784 | /* dst = dst >> val */ | |
d1ab73c4 | 785 | static inline void emit_a32_rsh_i64(const u8 dst[], bool dstk, |
39c13c20 SB |
786 | const u32 val, struct jit_ctx *ctx) { |
787 | const u8 *tmp = bpf2a32[TMP_REG_1]; | |
788 | const u8 *tmp2 = bpf2a32[TMP_REG_2]; | |
789 | /* Setup operands */ | |
790 | u8 rd = dstk ? tmp[1] : dst_lo; | |
791 | u8 rm = dstk ? tmp[0] : dst_hi; | |
792 | ||
793 | if (dstk) { | |
794 | emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); | |
795 | emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); | |
796 | } | |
797 | ||
798 | /* Do LSR operation */ | |
799 | if (val < 32) { | |
800 | emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx); | |
801 | emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx); | |
802 | emit(ARM_MOV_SI(rm, rm, SRTYPE_LSR, val), ctx); | |
803 | } else if (val == 32) { | |
804 | emit(ARM_MOV_R(rd, rm), ctx); | |
805 | emit(ARM_MOV_I(rm, 0), ctx); | |
806 | } else { | |
807 | emit(ARM_MOV_SI(rd, rm, SRTYPE_LSR, val - 32), ctx); | |
808 | emit(ARM_MOV_I(rm, 0), ctx); | |
809 | } | |
810 | ||
811 | if (dstk) { | |
812 | emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); | |
813 | emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); | |
814 | } | |
815 | } | |
816 | ||
817 | /* dst = dst >> val (signed) */ | |
818 | static inline void emit_a32_arsh_i64(const u8 dst[], bool dstk, | |
819 | const u32 val, struct jit_ctx *ctx){ | |
820 | const u8 *tmp = bpf2a32[TMP_REG_1]; | |
821 | const u8 *tmp2 = bpf2a32[TMP_REG_2]; | |
822 | /* Setup operands */ | |
823 | u8 rd = dstk ? tmp[1] : dst_lo; | |
824 | u8 rm = dstk ? tmp[0] : dst_hi; | |
825 | ||
826 | if (dstk) { | |
827 | emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); | |
828 | emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); | |
829 | } | |
830 | ||
831 | /* Do ARSH operation */ | |
832 | if (val < 32) { | |
833 | emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx); | |
834 | emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx); | |
835 | emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, val), ctx); | |
836 | } else if (val == 32) { | |
837 | emit(ARM_MOV_R(rd, rm), ctx); | |
838 | emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx); | |
839 | } else { | |
840 | emit(ARM_MOV_SI(rd, rm, SRTYPE_ASR, val - 32), ctx); | |
841 | emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx); | |
842 | } | |
843 | ||
844 | if (dstk) { | |
845 | emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); | |
846 | emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); | |
847 | } | |
848 | } | |
849 | ||
850 | static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk, | |
851 | bool sstk, struct jit_ctx *ctx) { | |
852 | const u8 *tmp = bpf2a32[TMP_REG_1]; | |
853 | const u8 *tmp2 = bpf2a32[TMP_REG_2]; | |
854 | /* Setup operands for multiplication */ | |
855 | u8 rd = dstk ? tmp[1] : dst_lo; | |
856 | u8 rm = dstk ? tmp[0] : dst_hi; | |
857 | u8 rt = sstk ? tmp2[1] : src_lo; | |
858 | u8 rn = sstk ? tmp2[0] : src_hi; | |
859 | ||
860 | if (dstk) { | |
861 | emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); | |
862 | emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); | |
863 | } | |
864 | if (sstk) { | |
865 | emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); | |
866 | emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_hi)), ctx); | |
867 | } | |
868 | ||
869 | /* Do Multiplication */ | |
870 | emit(ARM_MUL(ARM_IP, rd, rn), ctx); | |
871 | emit(ARM_MUL(ARM_LR, rm, rt), ctx); | |
39c13c20 SB |
872 | emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx); |
873 | ||
874 | emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx); | |
875 | emit(ARM_ADD_R(rm, ARM_LR, rm), ctx); | |
876 | if (dstk) { | |
877 | emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_lo)), ctx); | |
878 | emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); | |
879 | } else { | |
880 | emit(ARM_MOV_R(rd, ARM_IP), ctx); | |
881 | } | |
882 | } | |
883 | ||
884 | /* *(size *)(dst + off) = src */ | |
885 | static inline void emit_str_r(const u8 dst, const u8 src, bool dstk, | |
886 | const s32 off, struct jit_ctx *ctx, const u8 sz){ | |
887 | const u8 *tmp = bpf2a32[TMP_REG_1]; | |
888 | u8 rd = dstk ? tmp[1] : dst; | |
889 | ||
890 | if (dstk) | |
891 | emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); | |
892 | if (off) { | |
893 | emit_a32_mov_i(tmp[0], off, false, ctx); | |
894 | emit(ARM_ADD_R(tmp[0], rd, tmp[0]), ctx); | |
895 | rd = tmp[0]; | |
896 | } | |
897 | switch (sz) { | |
898 | case BPF_W: | |
899 | /* Store a Word */ | |
900 | emit(ARM_STR_I(src, rd, 0), ctx); | |
901 | break; | |
902 | case BPF_H: | |
903 | /* Store a HalfWord */ | |
904 | emit(ARM_STRH_I(src, rd, 0), ctx); | |
905 | break; | |
906 | case BPF_B: | |
907 | /* Store a Byte */ | |
908 | emit(ARM_STRB_I(src, rd, 0), ctx); | |
909 | break; | |
910 | } | |
911 | } | |
912 | ||
913 | /* dst = *(size*)(src + off) */ | |
ec19e02b RK |
914 | static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk, |
915 | s32 off, struct jit_ctx *ctx, const u8 sz){ | |
46a3323d | 916 | const u8 *tmp = bpf2a32[TMP_REG_2]; |
ec19e02b | 917 | const u8 *rd = dstk ? tmp : dst; |
39c13c20 | 918 | u8 rm = src; |
ec19e02b | 919 | s32 off_max; |
39c13c20 | 920 | |
ec19e02b RK |
921 | if (sz == BPF_H) |
922 | off_max = 0xff; | |
923 | else | |
924 | off_max = 0xfff; | |
925 | ||
926 | if (off < 0 || off > off_max) { | |
39c13c20 SB |
927 | emit_a32_mov_i(tmp[0], off, false, ctx); |
928 | emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); | |
929 | rm = tmp[0]; | |
ec19e02b RK |
930 | off = 0; |
931 | } else if (rd[1] == rm) { | |
932 | emit(ARM_MOV_R(tmp[0], rm), ctx); | |
933 | rm = tmp[0]; | |
39c13c20 SB |
934 | } |
935 | switch (sz) { | |
ec19e02b RK |
936 | case BPF_B: |
937 | /* Load a Byte */ | |
938 | emit(ARM_LDRB_I(rd[1], rm, off), ctx); | |
939 | emit_a32_mov_i(dst[0], 0, dstk, ctx); | |
39c13c20 SB |
940 | break; |
941 | case BPF_H: | |
942 | /* Load a HalfWord */ | |
ec19e02b RK |
943 | emit(ARM_LDRH_I(rd[1], rm, off), ctx); |
944 | emit_a32_mov_i(dst[0], 0, dstk, ctx); | |
39c13c20 | 945 | break; |
ec19e02b RK |
946 | case BPF_W: |
947 | /* Load a Word */ | |
948 | emit(ARM_LDR_I(rd[1], rm, off), ctx); | |
949 | emit_a32_mov_i(dst[0], 0, dstk, ctx); | |
950 | break; | |
951 | case BPF_DW: | |
952 | /* Load a Double Word */ | |
953 | emit(ARM_LDR_I(rd[1], rm, off), ctx); | |
954 | emit(ARM_LDR_I(rd[0], rm, off + 4), ctx); | |
39c13c20 SB |
955 | break; |
956 | } | |
957 | if (dstk) | |
ec19e02b RK |
958 | emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst[1])), ctx); |
959 | if (dstk && sz == BPF_DW) | |
960 | emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst[0])), ctx); | |
39c13c20 SB |
961 | } |
962 | ||
963 | /* Arithmatic Operation */ | |
964 | static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm, | |
965 | const u8 rn, struct jit_ctx *ctx, u8 op) { | |
966 | switch (op) { | |
967 | case BPF_JSET: | |
39c13c20 SB |
968 | emit(ARM_AND_R(ARM_IP, rt, rn), ctx); |
969 | emit(ARM_AND_R(ARM_LR, rd, rm), ctx); | |
970 | emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx); | |
971 | break; | |
972 | case BPF_JEQ: | |
973 | case BPF_JNE: | |
974 | case BPF_JGT: | |
975 | case BPF_JGE: | |
976 | case BPF_JLE: | |
977 | case BPF_JLT: | |
978 | emit(ARM_CMP_R(rd, rm), ctx); | |
979 | _emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx); | |
980 | break; | |
981 | case BPF_JSLE: | |
982 | case BPF_JSGT: | |
983 | emit(ARM_CMP_R(rn, rt), ctx); | |
984 | emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx); | |
985 | break; | |
986 | case BPF_JSLT: | |
987 | case BPF_JSGE: | |
988 | emit(ARM_CMP_R(rt, rn), ctx); | |
989 | emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx); | |
990 | break; | |
991 | } | |
992 | } | |
993 | ||
994 | static int out_offset = -1; /* initialized on the first pass of build_body() */ | |
995 | static int emit_bpf_tail_call(struct jit_ctx *ctx) | |
ddecdfce | 996 | { |
ddecdfce | 997 | |
39c13c20 SB |
998 | /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ |
999 | const u8 *r2 = bpf2a32[BPF_REG_2]; | |
1000 | const u8 *r3 = bpf2a32[BPF_REG_3]; | |
1001 | const u8 *tmp = bpf2a32[TMP_REG_1]; | |
1002 | const u8 *tmp2 = bpf2a32[TMP_REG_2]; | |
1003 | const u8 *tcc = bpf2a32[TCALL_CNT]; | |
1004 | const int idx0 = ctx->idx; | |
1005 | #define cur_offset (ctx->idx - idx0) | |
f4483f2c | 1006 | #define jmp_offset (out_offset - (cur_offset) - 2) |
39c13c20 SB |
1007 | u32 off, lo, hi; |
1008 | ||
1009 | /* if (index >= array->map.max_entries) | |
1010 | * goto out; | |
1011 | */ | |
1012 | off = offsetof(struct bpf_array, map.max_entries); | |
1013 | /* array->map.max_entries */ | |
1014 | emit_a32_mov_i(tmp[1], off, false, ctx); | |
1015 | emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); | |
1016 | emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx); | |
091f0248 | 1017 | /* index is 32-bit for arrays */ |
39c13c20 SB |
1018 | emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx); |
1019 | /* index >= array->map.max_entries */ | |
1020 | emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx); | |
1021 | _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); | |
1022 | ||
1023 | /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) | |
1024 | * goto out; | |
1025 | * tail_call_cnt++; | |
1026 | */ | |
1027 | lo = (u32)MAX_TAIL_CALL_CNT; | |
1028 | hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32); | |
1029 | emit(ARM_LDR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx); | |
1030 | emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx); | |
1031 | emit(ARM_CMP_I(tmp[0], hi), ctx); | |
1032 | _emit(ARM_COND_EQ, ARM_CMP_I(tmp[1], lo), ctx); | |
1033 | _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx); | |
1034 | emit(ARM_ADDS_I(tmp[1], tmp[1], 1), ctx); | |
1035 | emit(ARM_ADC_I(tmp[0], tmp[0], 0), ctx); | |
1036 | emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx); | |
1037 | emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx); | |
1038 | ||
1039 | /* prog = array->ptrs[index] | |
1040 | * if (prog == NULL) | |
1041 | * goto out; | |
1042 | */ | |
1043 | off = offsetof(struct bpf_array, ptrs); | |
1044 | emit_a32_mov_i(tmp[1], off, false, ctx); | |
1045 | emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); | |
1046 | emit(ARM_ADD_R(tmp[1], tmp2[1], tmp[1]), ctx); | |
1047 | emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx); | |
1048 | emit(ARM_MOV_SI(tmp[0], tmp2[1], SRTYPE_ASL, 2), ctx); | |
1049 | emit(ARM_LDR_R(tmp[1], tmp[1], tmp[0]), ctx); | |
1050 | emit(ARM_CMP_I(tmp[1], 0), ctx); | |
1051 | _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); | |
1052 | ||
1053 | /* goto *(prog->bpf_func + prologue_size); */ | |
1054 | off = offsetof(struct bpf_prog, bpf_func); | |
1055 | emit_a32_mov_i(tmp2[1], off, false, ctx); | |
1056 | emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx); | |
1057 | emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx); | |
e9062481 | 1058 | emit_bx_r(tmp[1], ctx); |
39c13c20 SB |
1059 | |
1060 | /* out: */ | |
1061 | if (out_offset == -1) | |
1062 | out_offset = cur_offset; | |
1063 | if (cur_offset != out_offset) { | |
1064 | pr_err_once("tail_call out_offset = %d, expected %d!\n", | |
1065 | cur_offset, out_offset); | |
1066 | return -1; | |
1067 | } | |
1068 | return 0; | |
1069 | #undef cur_offset | |
1070 | #undef jmp_offset | |
ddecdfce MG |
1071 | } |
1072 | ||
39c13c20 SB |
1073 | /* 0xabcd => 0xcdab */ |
1074 | static inline void emit_rev16(const u8 rd, const u8 rn, struct jit_ctx *ctx) | |
ddecdfce | 1075 | { |
39c13c20 SB |
1076 | #if __LINUX_ARM_ARCH__ < 6 |
1077 | const u8 *tmp2 = bpf2a32[TMP_REG_2]; | |
1078 | ||
1079 | emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx); | |
1080 | emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 8), ctx); | |
1081 | emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx); | |
1082 | emit(ARM_ORR_SI(rd, tmp2[0], tmp2[1], SRTYPE_LSL, 8), ctx); | |
1083 | #else /* ARMv6+ */ | |
1084 | emit(ARM_REV16(rd, rn), ctx); | |
1085 | #endif | |
1086 | } | |
ddecdfce | 1087 | |
39c13c20 SB |
1088 | /* 0xabcdefgh => 0xghefcdab */ |
1089 | static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx) | |
1090 | { | |
1091 | #if __LINUX_ARM_ARCH__ < 6 | |
1092 | const u8 *tmp2 = bpf2a32[TMP_REG_2]; | |
1093 | ||
1094 | emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx); | |
1095 | emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 24), ctx); | |
1096 | emit(ARM_ORR_SI(ARM_IP, tmp2[0], tmp2[1], SRTYPE_LSL, 24), ctx); | |
1097 | ||
1098 | emit(ARM_MOV_SI(tmp2[1], rn, SRTYPE_LSR, 8), ctx); | |
1099 | emit(ARM_AND_I(tmp2[1], tmp2[1], 0xff), ctx); | |
1100 | emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 16), ctx); | |
1101 | emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx); | |
1102 | emit(ARM_MOV_SI(tmp2[0], tmp2[0], SRTYPE_LSL, 8), ctx); | |
1103 | emit(ARM_ORR_SI(tmp2[0], tmp2[0], tmp2[1], SRTYPE_LSL, 16), ctx); | |
1104 | emit(ARM_ORR_R(rd, ARM_IP, tmp2[0]), ctx); | |
1105 | ||
1106 | #else /* ARMv6+ */ | |
1107 | emit(ARM_REV(rd, rn), ctx); | |
1108 | #endif | |
1109 | } | |
34805931 | 1110 | |
39c13c20 SB |
1111 | // push the scratch stack register on top of the stack |
1112 | static inline void emit_push_r64(const u8 src[], const u8 shift, | |
1113 | struct jit_ctx *ctx) | |
1114 | { | |
1115 | const u8 *tmp2 = bpf2a32[TMP_REG_2]; | |
1116 | u16 reg_set = 0; | |
ddecdfce | 1117 | |
39c13c20 SB |
1118 | emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(src[1]+shift)), ctx); |
1119 | emit(ARM_LDR_I(tmp2[0], ARM_SP, STACK_VAR(src[0]+shift)), ctx); | |
1120 | ||
1121 | reg_set = (1 << tmp2[1]) | (1 << tmp2[0]); | |
1122 | emit(ARM_PUSH(reg_set), ctx); | |
1123 | } | |
1124 | ||
1125 | static void build_prologue(struct jit_ctx *ctx) | |
1126 | { | |
1127 | const u8 r0 = bpf2a32[BPF_REG_0][1]; | |
1128 | const u8 r2 = bpf2a32[BPF_REG_1][1]; | |
1129 | const u8 r3 = bpf2a32[BPF_REG_1][0]; | |
1130 | const u8 r4 = bpf2a32[BPF_REG_6][1]; | |
39c13c20 SB |
1131 | const u8 fplo = bpf2a32[BPF_REG_FP][1]; |
1132 | const u8 fphi = bpf2a32[BPF_REG_FP][0]; | |
39c13c20 SB |
1133 | const u8 *tcc = bpf2a32[TCALL_CNT]; |
1134 | ||
39c13c20 | 1135 | /* Save callee saved registers. */ |
39c13c20 | 1136 | #ifdef CONFIG_FRAME_POINTER |
02088d9b RK |
1137 | u16 reg_set = CALLEE_PUSH_MASK | 1 << ARM_IP | 1 << ARM_PC; |
1138 | emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx); | |
39c13c20 SB |
1139 | emit(ARM_PUSH(reg_set), ctx); |
1140 | emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx); | |
1141 | #else | |
02088d9b RK |
1142 | emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx); |
1143 | emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx); | |
39c13c20 SB |
1144 | #endif |
1145 | /* Save frame pointer for later */ | |
02088d9b | 1146 | emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx); |
39c13c20 SB |
1147 | |
1148 | ctx->stack_size = imm8m(STACK_SIZE); | |
1149 | ||
1150 | /* Set up function call stack */ | |
1151 | emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx); | |
ddecdfce | 1152 | |
39c13c20 SB |
1153 | /* Set up BPF prog stack base register */ |
1154 | emit_a32_mov_r(fplo, ARM_IP, true, false, ctx); | |
1155 | emit_a32_mov_i(fphi, 0, true, ctx); | |
1156 | ||
1157 | /* mov r4, 0 */ | |
1158 | emit(ARM_MOV_I(r4, 0), ctx); | |
1159 | ||
1160 | /* Move BPF_CTX to BPF_R1 */ | |
1161 | emit(ARM_MOV_R(r3, r4), ctx); | |
1162 | emit(ARM_MOV_R(r2, r0), ctx); | |
1163 | /* Initialize Tail Count */ | |
1164 | emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[0])), ctx); | |
1165 | emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[1])), ctx); | |
1166 | /* end of prologue */ | |
1167 | } | |
1168 | ||
02088d9b | 1169 | /* restore callee saved registers. */ |
39c13c20 SB |
1170 | static void build_epilogue(struct jit_ctx *ctx) |
1171 | { | |
39c13c20 | 1172 | #ifdef CONFIG_FRAME_POINTER |
02088d9b RK |
1173 | /* When using frame pointers, some additional registers need to |
1174 | * be loaded. */ | |
1175 | u16 reg_set = CALLEE_POP_MASK | 1 << ARM_SP; | |
1176 | emit(ARM_SUB_I(ARM_SP, ARM_FP, hweight16(reg_set) * 4), ctx); | |
39c13c20 SB |
1177 | emit(ARM_LDM(ARM_SP, reg_set), ctx); |
1178 | #else | |
39c13c20 | 1179 | /* Restore callee saved registers. */ |
02088d9b RK |
1180 | emit(ARM_MOV_R(ARM_SP, ARM_FP), ctx); |
1181 | emit(ARM_POP(CALLEE_POP_MASK), ctx); | |
39c13c20 SB |
1182 | #endif |
1183 | } | |
1184 | ||
1185 | /* | |
1186 | * Convert an eBPF instruction to native instruction, i.e | |
1187 | * JITs an eBPF instruction. | |
1188 | * Returns : | |
1189 | * 0 - Successfully JITed an 8-byte eBPF instruction | |
1190 | * >0 - Successfully JITed a 16-byte eBPF instruction | |
1191 | * <0 - Failed to JIT. | |
1192 | */ | |
1193 | static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) | |
1194 | { | |
1195 | const u8 code = insn->code; | |
1196 | const u8 *dst = bpf2a32[insn->dst_reg]; | |
1197 | const u8 *src = bpf2a32[insn->src_reg]; | |
1198 | const u8 *tmp = bpf2a32[TMP_REG_1]; | |
1199 | const u8 *tmp2 = bpf2a32[TMP_REG_2]; | |
1200 | const s16 off = insn->off; | |
1201 | const s32 imm = insn->imm; | |
1202 | const int i = insn - ctx->prog->insnsi; | |
1203 | const bool is64 = BPF_CLASS(code) == BPF_ALU64; | |
1204 | const bool dstk = is_on_stack(insn->dst_reg); | |
1205 | const bool sstk = is_on_stack(insn->src_reg); | |
1206 | u8 rd, rt, rm, rn; | |
1207 | s32 jmp_offset; | |
1208 | ||
1209 | #define check_imm(bits, imm) do { \ | |
1210 | if ((((imm) > 0) && ((imm) >> (bits))) || \ | |
1211 | (((imm) < 0) && (~(imm) >> (bits)))) { \ | |
1212 | pr_info("[%2d] imm=%d(0x%x) out of range\n", \ | |
1213 | i, imm, imm); \ | |
1214 | return -EINVAL; \ | |
1215 | } \ | |
1216 | } while (0) | |
1217 | #define check_imm24(imm) check_imm(24, imm) | |
1218 | ||
1219 | switch (code) { | |
1220 | /* ALU operations */ | |
1221 | ||
1222 | /* dst = src */ | |
1223 | case BPF_ALU | BPF_MOV | BPF_K: | |
1224 | case BPF_ALU | BPF_MOV | BPF_X: | |
1225 | case BPF_ALU64 | BPF_MOV | BPF_K: | |
1226 | case BPF_ALU64 | BPF_MOV | BPF_X: | |
1227 | switch (BPF_SRC(code)) { | |
1228 | case BPF_X: | |
1229 | emit_a32_mov_r64(is64, dst, src, dstk, sstk, ctx); | |
ddecdfce | 1230 | break; |
39c13c20 SB |
1231 | case BPF_K: |
1232 | /* Sign-extend immediate value to destination reg */ | |
1233 | emit_a32_mov_i64(is64, dst, imm, dstk, ctx); | |
ddecdfce | 1234 | break; |
39c13c20 SB |
1235 | } |
1236 | break; | |
1237 | /* dst = dst + src/imm */ | |
1238 | /* dst = dst - src/imm */ | |
1239 | /* dst = dst | src/imm */ | |
1240 | /* dst = dst & src/imm */ | |
1241 | /* dst = dst ^ src/imm */ | |
1242 | /* dst = dst * src/imm */ | |
1243 | /* dst = dst << src */ | |
1244 | /* dst = dst >> src */ | |
1245 | case BPF_ALU | BPF_ADD | BPF_K: | |
1246 | case BPF_ALU | BPF_ADD | BPF_X: | |
1247 | case BPF_ALU | BPF_SUB | BPF_K: | |
1248 | case BPF_ALU | BPF_SUB | BPF_X: | |
1249 | case BPF_ALU | BPF_OR | BPF_K: | |
1250 | case BPF_ALU | BPF_OR | BPF_X: | |
1251 | case BPF_ALU | BPF_AND | BPF_K: | |
1252 | case BPF_ALU | BPF_AND | BPF_X: | |
1253 | case BPF_ALU | BPF_XOR | BPF_K: | |
1254 | case BPF_ALU | BPF_XOR | BPF_X: | |
1255 | case BPF_ALU | BPF_MUL | BPF_K: | |
1256 | case BPF_ALU | BPF_MUL | BPF_X: | |
1257 | case BPF_ALU | BPF_LSH | BPF_X: | |
1258 | case BPF_ALU | BPF_RSH | BPF_X: | |
1259 | case BPF_ALU | BPF_ARSH | BPF_K: | |
1260 | case BPF_ALU | BPF_ARSH | BPF_X: | |
1261 | case BPF_ALU64 | BPF_ADD | BPF_K: | |
1262 | case BPF_ALU64 | BPF_ADD | BPF_X: | |
1263 | case BPF_ALU64 | BPF_SUB | BPF_K: | |
1264 | case BPF_ALU64 | BPF_SUB | BPF_X: | |
1265 | case BPF_ALU64 | BPF_OR | BPF_K: | |
1266 | case BPF_ALU64 | BPF_OR | BPF_X: | |
1267 | case BPF_ALU64 | BPF_AND | BPF_K: | |
1268 | case BPF_ALU64 | BPF_AND | BPF_X: | |
1269 | case BPF_ALU64 | BPF_XOR | BPF_K: | |
1270 | case BPF_ALU64 | BPF_XOR | BPF_X: | |
1271 | switch (BPF_SRC(code)) { | |
1272 | case BPF_X: | |
1273 | emit_a32_alu_r64(is64, dst, src, dstk, sstk, | |
1274 | ctx, BPF_OP(code)); | |
ddecdfce | 1275 | break; |
39c13c20 SB |
1276 | case BPF_K: |
1277 | /* Move immediate value to the temporary register | |
1278 | * and then do the ALU operation on the temporary | |
1279 | * register as this will sign-extend the immediate | |
1280 | * value into temporary reg and then it would be | |
1281 | * safe to do the operation on it. | |
6d715e30 | 1282 | */ |
39c13c20 SB |
1283 | emit_a32_mov_i64(is64, tmp2, imm, false, ctx); |
1284 | emit_a32_alu_r64(is64, dst, tmp2, dstk, false, | |
1285 | ctx, BPF_OP(code)); | |
ddecdfce | 1286 | break; |
39c13c20 SB |
1287 | } |
1288 | break; | |
1289 | /* dst = dst / src(imm) */ | |
1290 | /* dst = dst % src(imm) */ | |
1291 | case BPF_ALU | BPF_DIV | BPF_K: | |
1292 | case BPF_ALU | BPF_DIV | BPF_X: | |
1293 | case BPF_ALU | BPF_MOD | BPF_K: | |
1294 | case BPF_ALU | BPF_MOD | BPF_X: | |
1295 | rt = src_lo; | |
1296 | rd = dstk ? tmp2[1] : dst_lo; | |
1297 | if (dstk) | |
1298 | emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); | |
1299 | switch (BPF_SRC(code)) { | |
1300 | case BPF_X: | |
1301 | rt = sstk ? tmp2[0] : rt; | |
1302 | if (sstk) | |
1303 | emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), | |
1304 | ctx); | |
ddecdfce | 1305 | break; |
39c13c20 SB |
1306 | case BPF_K: |
1307 | rt = tmp2[0]; | |
1308 | emit_a32_mov_i(rt, imm, false, ctx); | |
ddecdfce | 1309 | break; |
39c13c20 SB |
1310 | } |
1311 | emit_udivmod(rd, rd, rt, ctx, BPF_OP(code)); | |
1312 | if (dstk) | |
1313 | emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); | |
1314 | emit_a32_mov_i(dst_hi, 0, dstk, ctx); | |
1315 | break; | |
1316 | case BPF_ALU64 | BPF_DIV | BPF_K: | |
1317 | case BPF_ALU64 | BPF_DIV | BPF_X: | |
1318 | case BPF_ALU64 | BPF_MOD | BPF_K: | |
1319 | case BPF_ALU64 | BPF_MOD | BPF_X: | |
1320 | goto notyet; | |
1321 | /* dst = dst >> imm */ | |
1322 | /* dst = dst << imm */ | |
1323 | case BPF_ALU | BPF_RSH | BPF_K: | |
1324 | case BPF_ALU | BPF_LSH | BPF_K: | |
1325 | if (unlikely(imm > 31)) | |
1326 | return -EINVAL; | |
1327 | if (imm) | |
1328 | emit_a32_alu_i(dst_lo, imm, dstk, ctx, BPF_OP(code)); | |
1329 | emit_a32_mov_i(dst_hi, 0, dstk, ctx); | |
1330 | break; | |
1331 | /* dst = dst << imm */ | |
1332 | case BPF_ALU64 | BPF_LSH | BPF_K: | |
1333 | if (unlikely(imm > 63)) | |
1334 | return -EINVAL; | |
1335 | emit_a32_lsh_i64(dst, dstk, imm, ctx); | |
1336 | break; | |
1337 | /* dst = dst >> imm */ | |
1338 | case BPF_ALU64 | BPF_RSH | BPF_K: | |
1339 | if (unlikely(imm > 63)) | |
1340 | return -EINVAL; | |
d1ab73c4 | 1341 | emit_a32_rsh_i64(dst, dstk, imm, ctx); |
39c13c20 SB |
1342 | break; |
1343 | /* dst = dst << src */ | |
1344 | case BPF_ALU64 | BPF_LSH | BPF_X: | |
1345 | emit_a32_lsh_r64(dst, src, dstk, sstk, ctx); | |
1346 | break; | |
1347 | /* dst = dst >> src */ | |
1348 | case BPF_ALU64 | BPF_RSH | BPF_X: | |
d1ab73c4 | 1349 | emit_a32_rsh_r64(dst, src, dstk, sstk, ctx); |
39c13c20 SB |
1350 | break; |
1351 | /* dst = dst >> src (signed) */ | |
1352 | case BPF_ALU64 | BPF_ARSH | BPF_X: | |
1353 | emit_a32_arsh_r64(dst, src, dstk, sstk, ctx); | |
1354 | break; | |
1355 | /* dst = dst >> imm (signed) */ | |
1356 | case BPF_ALU64 | BPF_ARSH | BPF_K: | |
1357 | if (unlikely(imm > 63)) | |
1358 | return -EINVAL; | |
1359 | emit_a32_arsh_i64(dst, dstk, imm, ctx); | |
1360 | break; | |
1361 | /* dst = ~dst */ | |
1362 | case BPF_ALU | BPF_NEG: | |
1363 | emit_a32_alu_i(dst_lo, 0, dstk, ctx, BPF_OP(code)); | |
1364 | emit_a32_mov_i(dst_hi, 0, dstk, ctx); | |
1365 | break; | |
1366 | /* dst = ~dst (64 bit) */ | |
1367 | case BPF_ALU64 | BPF_NEG: | |
1368 | emit_a32_neg64(dst, dstk, ctx); | |
1369 | break; | |
1370 | /* dst = dst * src/imm */ | |
1371 | case BPF_ALU64 | BPF_MUL | BPF_X: | |
1372 | case BPF_ALU64 | BPF_MUL | BPF_K: | |
1373 | switch (BPF_SRC(code)) { | |
1374 | case BPF_X: | |
1375 | emit_a32_mul_r64(dst, src, dstk, sstk, ctx); | |
ddecdfce | 1376 | break; |
39c13c20 SB |
1377 | case BPF_K: |
1378 | /* Move immediate value to the temporary register | |
1379 | * and then do the multiplication on it as this | |
1380 | * will sign-extend the immediate value into temp | |
1381 | * reg then it would be safe to do the operation | |
1382 | * on it. | |
ddecdfce | 1383 | */ |
39c13c20 SB |
1384 | emit_a32_mov_i64(is64, tmp2, imm, false, ctx); |
1385 | emit_a32_mul_r64(dst, tmp2, dstk, false, ctx); | |
ddecdfce | 1386 | break; |
39c13c20 SB |
1387 | } |
1388 | break; | |
1389 | /* dst = htole(dst) */ | |
1390 | /* dst = htobe(dst) */ | |
1391 | case BPF_ALU | BPF_END | BPF_FROM_LE: | |
1392 | case BPF_ALU | BPF_END | BPF_FROM_BE: | |
1393 | rd = dstk ? tmp[0] : dst_hi; | |
1394 | rt = dstk ? tmp[1] : dst_lo; | |
1395 | if (dstk) { | |
1396 | emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); | |
1397 | emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); | |
1398 | } | |
1399 | if (BPF_SRC(code) == BPF_FROM_LE) | |
1400 | goto emit_bswap_uxt; | |
1401 | switch (imm) { | |
1402 | case 16: | |
1403 | emit_rev16(rt, rt, ctx); | |
1404 | goto emit_bswap_uxt; | |
1405 | case 32: | |
1406 | emit_rev32(rt, rt, ctx); | |
1407 | goto emit_bswap_uxt; | |
1408 | case 64: | |
39c13c20 SB |
1409 | emit_rev32(ARM_LR, rt, ctx); |
1410 | emit_rev32(rt, rd, ctx); | |
1411 | emit(ARM_MOV_R(rd, ARM_LR), ctx); | |
4560cdff | 1412 | break; |
39c13c20 SB |
1413 | } |
1414 | goto exit; | |
1415 | emit_bswap_uxt: | |
1416 | switch (imm) { | |
1417 | case 16: | |
1418 | /* zero-extend 16 bits into 64 bits */ | |
1419 | #if __LINUX_ARM_ARCH__ < 6 | |
1420 | emit_a32_mov_i(tmp2[1], 0xffff, false, ctx); | |
1421 | emit(ARM_AND_R(rt, rt, tmp2[1]), ctx); | |
1422 | #else /* ARMv6+ */ | |
1423 | emit(ARM_UXTH(rt, rt), ctx); | |
1424 | #endif | |
1425 | emit(ARM_EOR_R(rd, rd, rd), ctx); | |
4560cdff | 1426 | break; |
39c13c20 SB |
1427 | case 32: |
1428 | /* zero-extend 32 bits into 64 bits */ | |
1429 | emit(ARM_EOR_R(rd, rd, rd), ctx); | |
ddecdfce | 1430 | break; |
39c13c20 SB |
1431 | case 64: |
1432 | /* nop */ | |
ddecdfce | 1433 | break; |
39c13c20 SB |
1434 | } |
1435 | exit: | |
1436 | if (dstk) { | |
1437 | emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); | |
1438 | emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); | |
1439 | } | |
1440 | break; | |
1441 | /* dst = imm64 */ | |
1442 | case BPF_LD | BPF_IMM | BPF_DW: | |
1443 | { | |
1444 | const struct bpf_insn insn1 = insn[1]; | |
1445 | u32 hi, lo = imm; | |
1446 | ||
1447 | hi = insn1.imm; | |
1448 | emit_a32_mov_i(dst_lo, lo, dstk, ctx); | |
1449 | emit_a32_mov_i(dst_hi, hi, dstk, ctx); | |
1450 | ||
1451 | return 1; | |
1452 | } | |
1453 | /* LDX: dst = *(size *)(src + off) */ | |
1454 | case BPF_LDX | BPF_MEM | BPF_W: | |
1455 | case BPF_LDX | BPF_MEM | BPF_H: | |
1456 | case BPF_LDX | BPF_MEM | BPF_B: | |
1457 | case BPF_LDX | BPF_MEM | BPF_DW: | |
1458 | rn = sstk ? tmp2[1] : src_lo; | |
1459 | if (sstk) | |
1460 | emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); | |
ec19e02b | 1461 | emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code)); |
39c13c20 SB |
1462 | break; |
1463 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */ | |
1464 | case BPF_LD | BPF_ABS | BPF_W: | |
1465 | case BPF_LD | BPF_ABS | BPF_H: | |
1466 | case BPF_LD | BPF_ABS | BPF_B: | |
1467 | /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */ | |
1468 | case BPF_LD | BPF_IND | BPF_W: | |
1469 | case BPF_LD | BPF_IND | BPF_H: | |
1470 | case BPF_LD | BPF_IND | BPF_B: | |
1471 | { | |
1472 | const u8 r4 = bpf2a32[BPF_REG_6][1]; /* r4 = ptr to sk_buff */ | |
1473 | const u8 r0 = bpf2a32[BPF_REG_0][1]; /*r0: struct sk_buff *skb*/ | |
1474 | /* rtn value */ | |
1475 | const u8 r1 = bpf2a32[BPF_REG_0][0]; /* r1: int k */ | |
1476 | const u8 r2 = bpf2a32[BPF_REG_1][1]; /* r2: unsigned int size */ | |
1477 | const u8 r3 = bpf2a32[BPF_REG_1][0]; /* r3: void *buffer */ | |
1478 | const u8 r6 = bpf2a32[TMP_REG_1][1]; /* r6: void *(*func)(..) */ | |
1479 | int size; | |
1480 | ||
1481 | /* Setting up first argument */ | |
1482 | emit(ARM_MOV_R(r0, r4), ctx); | |
1483 | ||
1484 | /* Setting up second argument */ | |
1485 | emit_a32_mov_i(r1, imm, false, ctx); | |
1486 | if (BPF_MODE(code) == BPF_IND) | |
1487 | emit_a32_alu_r(r1, src_lo, false, sstk, ctx, | |
1488 | false, false, BPF_ADD); | |
1489 | ||
1490 | /* Setting up third argument */ | |
1491 | switch (BPF_SIZE(code)) { | |
1492 | case BPF_W: | |
1493 | size = 4; | |
3cbe2041 | 1494 | break; |
39c13c20 SB |
1495 | case BPF_H: |
1496 | size = 2; | |
ddecdfce | 1497 | break; |
39c13c20 SB |
1498 | case BPF_B: |
1499 | size = 1; | |
ddecdfce | 1500 | break; |
39c13c20 SB |
1501 | default: |
1502 | return -EINVAL; | |
1503 | } | |
1504 | emit_a32_mov_i(r2, size, false, ctx); | |
1505 | ||
1506 | /* Setting up fourth argument */ | |
1507 | emit(ARM_ADD_I(r3, ARM_SP, imm8m(SKB_BUFFER)), ctx); | |
1508 | ||
1509 | /* Setting up function pointer to call */ | |
1510 | emit_a32_mov_i(r6, (unsigned int)bpf_load_pointer, false, ctx); | |
1511 | emit_blx_r(r6, ctx); | |
1512 | ||
1513 | emit(ARM_EOR_R(r1, r1, r1), ctx); | |
1514 | /* Check if return address is NULL or not. | |
1515 | * if NULL then jump to epilogue | |
1516 | * else continue to load the value from retn address | |
1517 | */ | |
1518 | emit(ARM_CMP_I(r0, 0), ctx); | |
1519 | jmp_offset = epilogue_offset(ctx); | |
1520 | check_imm24(jmp_offset); | |
1521 | _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); | |
1522 | ||
1523 | /* Load value from the address */ | |
1524 | switch (BPF_SIZE(code)) { | |
1525 | case BPF_W: | |
1526 | emit(ARM_LDR_I(r0, r0, 0), ctx); | |
1527 | emit_rev32(r0, r0, ctx); | |
ddecdfce | 1528 | break; |
39c13c20 SB |
1529 | case BPF_H: |
1530 | emit(ARM_LDRH_I(r0, r0, 0), ctx); | |
1531 | emit_rev16(r0, r0, ctx); | |
ddecdfce | 1532 | break; |
39c13c20 SB |
1533 | case BPF_B: |
1534 | emit(ARM_LDRB_I(r0, r0, 0), ctx); | |
1535 | /* No need to reverse */ | |
ddecdfce | 1536 | break; |
39c13c20 SB |
1537 | } |
1538 | break; | |
1539 | } | |
1540 | /* ST: *(size *)(dst + off) = imm */ | |
1541 | case BPF_ST | BPF_MEM | BPF_W: | |
1542 | case BPF_ST | BPF_MEM | BPF_H: | |
1543 | case BPF_ST | BPF_MEM | BPF_B: | |
1544 | case BPF_ST | BPF_MEM | BPF_DW: | |
1545 | switch (BPF_SIZE(code)) { | |
1546 | case BPF_DW: | |
1547 | /* Sign-extend immediate value into temp reg */ | |
1548 | emit_a32_mov_i64(true, tmp2, imm, false, ctx); | |
1549 | emit_str_r(dst_lo, tmp2[1], dstk, off, ctx, BPF_W); | |
1550 | emit_str_r(dst_lo, tmp2[0], dstk, off+4, ctx, BPF_W); | |
ddecdfce | 1551 | break; |
39c13c20 SB |
1552 | case BPF_W: |
1553 | case BPF_H: | |
1554 | case BPF_B: | |
1555 | emit_a32_mov_i(tmp2[1], imm, false, ctx); | |
1556 | emit_str_r(dst_lo, tmp2[1], dstk, off, ctx, | |
1557 | BPF_SIZE(code)); | |
ddecdfce | 1558 | break; |
39c13c20 SB |
1559 | } |
1560 | break; | |
1561 | /* STX XADD: lock *(u32 *)(dst + off) += src */ | |
1562 | case BPF_STX | BPF_XADD | BPF_W: | |
1563 | /* STX XADD: lock *(u64 *)(dst + off) += src */ | |
1564 | case BPF_STX | BPF_XADD | BPF_DW: | |
1565 | goto notyet; | |
1566 | /* STX: *(size *)(dst + off) = src */ | |
1567 | case BPF_STX | BPF_MEM | BPF_W: | |
1568 | case BPF_STX | BPF_MEM | BPF_H: | |
1569 | case BPF_STX | BPF_MEM | BPF_B: | |
1570 | case BPF_STX | BPF_MEM | BPF_DW: | |
1571 | { | |
1572 | u8 sz = BPF_SIZE(code); | |
1573 | ||
1574 | rn = sstk ? tmp2[1] : src_lo; | |
1575 | rm = sstk ? tmp2[0] : src_hi; | |
1576 | if (sstk) { | |
1577 | emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); | |
1578 | emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx); | |
1579 | } | |
1580 | ||
1581 | /* Store the value */ | |
1582 | if (BPF_SIZE(code) == BPF_DW) { | |
1583 | emit_str_r(dst_lo, rn, dstk, off, ctx, BPF_W); | |
1584 | emit_str_r(dst_lo, rm, dstk, off+4, ctx, BPF_W); | |
1585 | } else { | |
1586 | emit_str_r(dst_lo, rn, dstk, off, ctx, sz); | |
1587 | } | |
1588 | break; | |
1589 | } | |
1590 | /* PC += off if dst == src */ | |
1591 | /* PC += off if dst > src */ | |
1592 | /* PC += off if dst >= src */ | |
1593 | /* PC += off if dst < src */ | |
1594 | /* PC += off if dst <= src */ | |
1595 | /* PC += off if dst != src */ | |
1596 | /* PC += off if dst > src (signed) */ | |
1597 | /* PC += off if dst >= src (signed) */ | |
1598 | /* PC += off if dst < src (signed) */ | |
1599 | /* PC += off if dst <= src (signed) */ | |
1600 | /* PC += off if dst & src */ | |
1601 | case BPF_JMP | BPF_JEQ | BPF_X: | |
1602 | case BPF_JMP | BPF_JGT | BPF_X: | |
1603 | case BPF_JMP | BPF_JGE | BPF_X: | |
1604 | case BPF_JMP | BPF_JNE | BPF_X: | |
1605 | case BPF_JMP | BPF_JSGT | BPF_X: | |
1606 | case BPF_JMP | BPF_JSGE | BPF_X: | |
1607 | case BPF_JMP | BPF_JSET | BPF_X: | |
1608 | case BPF_JMP | BPF_JLE | BPF_X: | |
1609 | case BPF_JMP | BPF_JLT | BPF_X: | |
1610 | case BPF_JMP | BPF_JSLT | BPF_X: | |
1611 | case BPF_JMP | BPF_JSLE | BPF_X: | |
1612 | /* Setup source registers */ | |
1613 | rm = sstk ? tmp2[0] : src_hi; | |
1614 | rn = sstk ? tmp2[1] : src_lo; | |
1615 | if (sstk) { | |
1616 | emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); | |
1617 | emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx); | |
1618 | } | |
1619 | goto go_jmp; | |
1620 | /* PC += off if dst == imm */ | |
1621 | /* PC += off if dst > imm */ | |
1622 | /* PC += off if dst >= imm */ | |
1623 | /* PC += off if dst < imm */ | |
1624 | /* PC += off if dst <= imm */ | |
1625 | /* PC += off if dst != imm */ | |
1626 | /* PC += off if dst > imm (signed) */ | |
1627 | /* PC += off if dst >= imm (signed) */ | |
1628 | /* PC += off if dst < imm (signed) */ | |
1629 | /* PC += off if dst <= imm (signed) */ | |
1630 | /* PC += off if dst & imm */ | |
1631 | case BPF_JMP | BPF_JEQ | BPF_K: | |
1632 | case BPF_JMP | BPF_JGT | BPF_K: | |
1633 | case BPF_JMP | BPF_JGE | BPF_K: | |
1634 | case BPF_JMP | BPF_JNE | BPF_K: | |
1635 | case BPF_JMP | BPF_JSGT | BPF_K: | |
1636 | case BPF_JMP | BPF_JSGE | BPF_K: | |
1637 | case BPF_JMP | BPF_JSET | BPF_K: | |
1638 | case BPF_JMP | BPF_JLT | BPF_K: | |
1639 | case BPF_JMP | BPF_JLE | BPF_K: | |
1640 | case BPF_JMP | BPF_JSLT | BPF_K: | |
1641 | case BPF_JMP | BPF_JSLE | BPF_K: | |
1642 | if (off == 0) | |
ddecdfce | 1643 | break; |
39c13c20 SB |
1644 | rm = tmp2[0]; |
1645 | rn = tmp2[1]; | |
1646 | /* Sign-extend immediate value */ | |
1647 | emit_a32_mov_i64(true, tmp2, imm, false, ctx); | |
1648 | go_jmp: | |
1649 | /* Setup destination register */ | |
1650 | rd = dstk ? tmp[0] : dst_hi; | |
1651 | rt = dstk ? tmp[1] : dst_lo; | |
1652 | if (dstk) { | |
1653 | emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); | |
1654 | emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); | |
1655 | } | |
1656 | ||
1657 | /* Check for the condition */ | |
1658 | emit_ar_r(rd, rt, rm, rn, ctx, BPF_OP(code)); | |
1659 | ||
1660 | /* Setup JUMP instruction */ | |
1661 | jmp_offset = bpf2a32_offset(i+off, i, ctx); | |
1662 | switch (BPF_OP(code)) { | |
1663 | case BPF_JNE: | |
1664 | case BPF_JSET: | |
1665 | _emit(ARM_COND_NE, ARM_B(jmp_offset), ctx); | |
ddecdfce | 1666 | break; |
39c13c20 SB |
1667 | case BPF_JEQ: |
1668 | _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); | |
ddecdfce | 1669 | break; |
39c13c20 SB |
1670 | case BPF_JGT: |
1671 | _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx); | |
ddecdfce | 1672 | break; |
39c13c20 SB |
1673 | case BPF_JGE: |
1674 | _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); | |
ddecdfce | 1675 | break; |
39c13c20 SB |
1676 | case BPF_JSGT: |
1677 | _emit(ARM_COND_LT, ARM_B(jmp_offset), ctx); | |
ddecdfce | 1678 | break; |
39c13c20 SB |
1679 | case BPF_JSGE: |
1680 | _emit(ARM_COND_GE, ARM_B(jmp_offset), ctx); | |
ddecdfce | 1681 | break; |
39c13c20 SB |
1682 | case BPF_JLE: |
1683 | _emit(ARM_COND_LS, ARM_B(jmp_offset), ctx); | |
ddecdfce | 1684 | break; |
39c13c20 SB |
1685 | case BPF_JLT: |
1686 | _emit(ARM_COND_CC, ARM_B(jmp_offset), ctx); | |
ddecdfce | 1687 | break; |
39c13c20 SB |
1688 | case BPF_JSLT: |
1689 | _emit(ARM_COND_LT, ARM_B(jmp_offset), ctx); | |
ddecdfce | 1690 | break; |
39c13c20 SB |
1691 | case BPF_JSLE: |
1692 | _emit(ARM_COND_GE, ARM_B(jmp_offset), ctx); | |
bf0098f2 | 1693 | break; |
39c13c20 SB |
1694 | } |
1695 | break; | |
1696 | /* JMP OFF */ | |
1697 | case BPF_JMP | BPF_JA: | |
1698 | { | |
1699 | if (off == 0) | |
1447f93f | 1700 | break; |
39c13c20 SB |
1701 | jmp_offset = bpf2a32_offset(i+off, i, ctx); |
1702 | check_imm24(jmp_offset); | |
1703 | emit(ARM_B(jmp_offset), ctx); | |
1704 | break; | |
1705 | } | |
1706 | /* tail call */ | |
1707 | case BPF_JMP | BPF_TAIL_CALL: | |
1708 | if (emit_bpf_tail_call(ctx)) | |
1709 | return -EFAULT; | |
1710 | break; | |
1711 | /* function call */ | |
1712 | case BPF_JMP | BPF_CALL: | |
1713 | { | |
1714 | const u8 *r0 = bpf2a32[BPF_REG_0]; | |
1715 | const u8 *r1 = bpf2a32[BPF_REG_1]; | |
1716 | const u8 *r2 = bpf2a32[BPF_REG_2]; | |
1717 | const u8 *r3 = bpf2a32[BPF_REG_3]; | |
1718 | const u8 *r4 = bpf2a32[BPF_REG_4]; | |
1719 | const u8 *r5 = bpf2a32[BPF_REG_5]; | |
1720 | const u32 func = (u32)__bpf_call_base + (u32)imm; | |
1721 | ||
1722 | emit_a32_mov_r64(true, r0, r1, false, false, ctx); | |
1723 | emit_a32_mov_r64(true, r1, r2, false, true, ctx); | |
1724 | emit_push_r64(r5, 0, ctx); | |
1725 | emit_push_r64(r4, 8, ctx); | |
1726 | emit_push_r64(r3, 16, ctx); | |
1727 | ||
1728 | emit_a32_mov_i(tmp[1], func, false, ctx); | |
1729 | emit_blx_r(tmp[1], ctx); | |
1730 | ||
1731 | emit(ARM_ADD_I(ARM_SP, ARM_SP, imm8m(24)), ctx); // callee clean | |
1732 | break; | |
1733 | } | |
1734 | /* function return */ | |
1735 | case BPF_JMP | BPF_EXIT: | |
1736 | /* Optimization: when last instruction is EXIT | |
1737 | * simply fallthrough to epilogue. | |
1738 | */ | |
1739 | if (i == ctx->prog->len - 1) | |
ddecdfce | 1740 | break; |
39c13c20 SB |
1741 | jmp_offset = epilogue_offset(ctx); |
1742 | check_imm24(jmp_offset); | |
1743 | emit(ARM_B(jmp_offset), ctx); | |
1744 | break; | |
1745 | notyet: | |
1746 | pr_info_once("*** NOT YET: opcode %02x ***\n", code); | |
1747 | return -EFAULT; | |
1748 | default: | |
1749 | pr_err_once("unknown opcode %02x\n", code); | |
1750 | return -EINVAL; | |
1751 | } | |
303249ab | 1752 | |
39c13c20 SB |
1753 | if (ctx->flags & FLAG_IMM_OVERFLOW) |
1754 | /* | |
1755 | * this instruction generated an overflow when | |
1756 | * trying to access the literal pool, so | |
1757 | * delegate this filter to the kernel interpreter. | |
1758 | */ | |
1759 | return -1; | |
1760 | return 0; | |
1761 | } | |
1762 | ||
1763 | static int build_body(struct jit_ctx *ctx) | |
1764 | { | |
1765 | const struct bpf_prog *prog = ctx->prog; | |
1766 | unsigned int i; | |
1767 | ||
1768 | for (i = 0; i < prog->len; i++) { | |
1769 | const struct bpf_insn *insn = &(prog->insnsi[i]); | |
1770 | int ret; | |
1771 | ||
1772 | ret = build_insn(insn, ctx); | |
1773 | ||
1774 | /* It's used with loading the 64 bit immediate value. */ | |
1775 | if (ret > 0) { | |
1776 | i++; | |
1777 | if (ctx->target == NULL) | |
1778 | ctx->offsets[i] = ctx->idx; | |
1779 | continue; | |
ddecdfce | 1780 | } |
0b59d880 | 1781 | |
39c13c20 SB |
1782 | if (ctx->target == NULL) |
1783 | ctx->offsets[i] = ctx->idx; | |
1784 | ||
1785 | /* If unsuccesfull, return with error code */ | |
1786 | if (ret) | |
1787 | return ret; | |
ddecdfce | 1788 | } |
39c13c20 SB |
1789 | return 0; |
1790 | } | |
ddecdfce | 1791 | |
39c13c20 SB |
1792 | static int validate_code(struct jit_ctx *ctx) |
1793 | { | |
1794 | int i; | |
1795 | ||
1796 | for (i = 0; i < ctx->idx; i++) { | |
1797 | if (ctx->target[i] == __opcode_to_mem_arm(ARM_INST_UDF)) | |
1798 | return -1; | |
1799 | } | |
ddecdfce MG |
1800 | |
1801 | return 0; | |
1802 | } | |
1803 | ||
39c13c20 SB |
1804 | void bpf_jit_compile(struct bpf_prog *prog) |
1805 | { | |
1806 | /* Nothing to do here. We support Internal BPF. */ | |
1807 | } | |
ddecdfce | 1808 | |
39c13c20 | 1809 | struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) |
ddecdfce | 1810 | { |
39c13c20 | 1811 | struct bpf_prog *tmp, *orig_prog = prog; |
55309dd3 | 1812 | struct bpf_binary_header *header; |
39c13c20 | 1813 | bool tmp_blinded = false; |
ddecdfce | 1814 | struct jit_ctx ctx; |
39c13c20 SB |
1815 | unsigned int tmp_idx; |
1816 | unsigned int image_size; | |
1817 | u8 *image_ptr; | |
ddecdfce | 1818 | |
39c13c20 SB |
1819 | /* If BPF JIT was not enabled then we must fall back to |
1820 | * the interpreter. | |
1821 | */ | |
ddecdfce | 1822 | if (!bpf_jit_enable) |
39c13c20 | 1823 | return orig_prog; |
ddecdfce | 1824 | |
39c13c20 SB |
1825 | /* If constant blinding was enabled and we failed during blinding |
1826 | * then we must fall back to the interpreter. Otherwise, we save | |
1827 | * the new JITed code. | |
1828 | */ | |
1829 | tmp = bpf_jit_blind_constants(prog); | |
ddecdfce | 1830 | |
39c13c20 SB |
1831 | if (IS_ERR(tmp)) |
1832 | return orig_prog; | |
1833 | if (tmp != prog) { | |
1834 | tmp_blinded = true; | |
1835 | prog = tmp; | |
1836 | } | |
ddecdfce | 1837 | |
39c13c20 SB |
1838 | memset(&ctx, 0, sizeof(ctx)); |
1839 | ctx.prog = prog; | |
1840 | ||
1841 | /* Not able to allocate memory for offsets[] , then | |
1842 | * we must fall back to the interpreter | |
1843 | */ | |
1844 | ctx.offsets = kcalloc(prog->len, sizeof(int), GFP_KERNEL); | |
1845 | if (ctx.offsets == NULL) { | |
1846 | prog = orig_prog; | |
ddecdfce | 1847 | goto out; |
39c13c20 SB |
1848 | } |
1849 | ||
1850 | /* 1) fake pass to find in the length of the JITed code, | |
1851 | * to compute ctx->offsets and other context variables | |
1852 | * needed to compute final JITed code. | |
1853 | * Also, calculate random starting pointer/start of JITed code | |
1854 | * which is prefixed by random number of fault instructions. | |
1855 | * | |
1856 | * If the first pass fails then there is no chance of it | |
1857 | * being successful in the second pass, so just fall back | |
1858 | * to the interpreter. | |
1859 | */ | |
1860 | if (build_body(&ctx)) { | |
1861 | prog = orig_prog; | |
1862 | goto out_off; | |
1863 | } | |
ddecdfce MG |
1864 | |
1865 | tmp_idx = ctx.idx; | |
1866 | build_prologue(&ctx); | |
1867 | ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4; | |
1868 | ||
39c13c20 SB |
1869 | ctx.epilogue_offset = ctx.idx; |
1870 | ||
ddecdfce MG |
1871 | #if __LINUX_ARM_ARCH__ < 7 |
1872 | tmp_idx = ctx.idx; | |
1873 | build_epilogue(&ctx); | |
1874 | ctx.epilogue_bytes = (ctx.idx - tmp_idx) * 4; | |
1875 | ||
1876 | ctx.idx += ctx.imm_count; | |
1877 | if (ctx.imm_count) { | |
39c13c20 SB |
1878 | ctx.imms = kcalloc(ctx.imm_count, sizeof(u32), GFP_KERNEL); |
1879 | if (ctx.imms == NULL) { | |
1880 | prog = orig_prog; | |
1881 | goto out_off; | |
1882 | } | |
ddecdfce MG |
1883 | } |
1884 | #else | |
39c13c20 | 1885 | /* there's nothing about the epilogue on ARMv7 */ |
ddecdfce MG |
1886 | build_epilogue(&ctx); |
1887 | #endif | |
39c13c20 SB |
1888 | /* Now we can get the actual image size of the JITed arm code. |
1889 | * Currently, we are not considering the THUMB-2 instructions | |
1890 | * for jit, although it can decrease the size of the image. | |
1891 | * | |
1892 | * As each arm instruction is of length 32bit, we are translating | |
1893 | * number of JITed intructions into the size required to store these | |
1894 | * JITed code. | |
1895 | */ | |
1896 | image_size = sizeof(u32) * ctx.idx; | |
ddecdfce | 1897 | |
39c13c20 SB |
1898 | /* Now we know the size of the structure to make */ |
1899 | header = bpf_jit_binary_alloc(image_size, &image_ptr, | |
1900 | sizeof(u32), jit_fill_hole); | |
1901 | /* Not able to allocate memory for the structure then | |
1902 | * we must fall back to the interpretation | |
1903 | */ | |
1904 | if (header == NULL) { | |
1905 | prog = orig_prog; | |
1906 | goto out_imms; | |
1907 | } | |
1908 | ||
1909 | /* 2.) Actual pass to generate final JIT code */ | |
1910 | ctx.target = (u32 *) image_ptr; | |
ddecdfce | 1911 | ctx.idx = 0; |
55309dd3 | 1912 | |
ddecdfce | 1913 | build_prologue(&ctx); |
39c13c20 SB |
1914 | |
1915 | /* If building the body of the JITed code fails somehow, | |
1916 | * we fall back to the interpretation. | |
1917 | */ | |
0b59d880 | 1918 | if (build_body(&ctx) < 0) { |
39c13c20 | 1919 | image_ptr = NULL; |
0b59d880 | 1920 | bpf_jit_binary_free(header); |
39c13c20 SB |
1921 | prog = orig_prog; |
1922 | goto out_imms; | |
0b59d880 | 1923 | } |
ddecdfce MG |
1924 | build_epilogue(&ctx); |
1925 | ||
39c13c20 SB |
1926 | /* 3.) Extra pass to validate JITed Code */ |
1927 | if (validate_code(&ctx)) { | |
1928 | image_ptr = NULL; | |
1929 | bpf_jit_binary_free(header); | |
1930 | prog = orig_prog; | |
1931 | goto out_imms; | |
1932 | } | |
ebaef649 | 1933 | flush_icache_range((u32)header, (u32)(ctx.target + ctx.idx)); |
ddecdfce | 1934 | |
ddecdfce | 1935 | if (bpf_jit_enable > 1) |
79617801 | 1936 | /* there are 2 passes here */ |
39c13c20 | 1937 | bpf_jit_dump(prog->len, image_size, 2, ctx.target); |
ddecdfce | 1938 | |
1158556f | 1939 | bpf_jit_binary_lock_ro(header); |
39c13c20 SB |
1940 | prog->bpf_func = (void *)ctx.target; |
1941 | prog->jited = 1; | |
1942 | prog->jited_len = image_size; | |
1943 | ||
1944 | out_imms: | |
1945 | #if __LINUX_ARM_ARCH__ < 7 | |
1946 | if (ctx.imm_count) | |
1947 | kfree(ctx.imms); | |
1948 | #endif | |
1949 | out_off: | |
ddecdfce | 1950 | kfree(ctx.offsets); |
39c13c20 SB |
1951 | out: |
1952 | if (tmp_blinded) | |
1953 | bpf_jit_prog_release_other(prog, prog == orig_prog ? | |
1954 | tmp : orig_prog); | |
1955 | return prog; | |
ddecdfce MG |
1956 | } |
1957 |