]>
git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - kernel/bpf/core.c
2 * Linux Socket Filter - Kernel level socket filtering
4 * Based on the design of the Berkeley Packet Filter. The new
5 * internal format has been designed by PLUMgrid:
7 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
11 * Jay Schulist <jschlst@samba.org>
12 * Alexei Starovoitov <ast@plumgrid.com>
13 * Daniel Borkmann <dborkman@redhat.com>
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version.
20 * Andi Kleen - Fix a few bad bugs and races.
21 * Kris Katterjohn - Added many additional checks in bpf_check_classic()
23 #include <linux/filter.h>
24 #include <linux/skbuff.h>
25 #include <linux/vmalloc.h>
26 #include <asm/unaligned.h>
29 #define BPF_R0 regs[BPF_REG_0]
30 #define BPF_R1 regs[BPF_REG_1]
31 #define BPF_R2 regs[BPF_REG_2]
32 #define BPF_R3 regs[BPF_REG_3]
33 #define BPF_R4 regs[BPF_REG_4]
34 #define BPF_R5 regs[BPF_REG_5]
35 #define BPF_R6 regs[BPF_REG_6]
36 #define BPF_R7 regs[BPF_REG_7]
37 #define BPF_R8 regs[BPF_REG_8]
38 #define BPF_R9 regs[BPF_REG_9]
39 #define BPF_R10 regs[BPF_REG_10]
42 #define DST regs[insn->dst_reg]
43 #define SRC regs[insn->src_reg]
44 #define FP regs[BPF_REG_FP]
45 #define ARG1 regs[BPF_REG_ARG1]
46 #define CTX regs[BPF_REG_CTX]
49 /* No hurry in this branch
51 * Exported for the bpf jit load helper.
53 void *bpf_internal_load_pointer_neg_helper(const struct sk_buff
*skb
, int k
, unsigned int size
)
58 ptr
= skb_network_header(skb
) + k
- SKF_NET_OFF
;
59 else if (k
>= SKF_LL_OFF
)
60 ptr
= skb_mac_header(skb
) + k
- SKF_LL_OFF
;
61 if (ptr
>= skb
->head
&& ptr
+ size
<= skb_tail_pointer(skb
))
67 struct bpf_prog
*bpf_prog_alloc(unsigned int size
, gfp_t gfp_extra_flags
)
69 gfp_t gfp_flags
= GFP_KERNEL
| __GFP_HIGHMEM
| __GFP_ZERO
|
71 struct bpf_work_struct
*ws
;
74 size
= round_up(size
, PAGE_SIZE
);
75 fp
= __vmalloc(size
, gfp_flags
, PAGE_KERNEL
);
79 ws
= kmalloc(sizeof(*ws
), GFP_KERNEL
| gfp_extra_flags
);
85 fp
->pages
= size
/ PAGE_SIZE
;
90 EXPORT_SYMBOL_GPL(bpf_prog_alloc
);
92 struct bpf_prog
*bpf_prog_realloc(struct bpf_prog
*fp_old
, unsigned int size
,
93 gfp_t gfp_extra_flags
)
95 gfp_t gfp_flags
= GFP_KERNEL
| __GFP_HIGHMEM
| __GFP_ZERO
|
99 BUG_ON(fp_old
== NULL
);
101 size
= round_up(size
, PAGE_SIZE
);
102 if (size
<= fp_old
->pages
* PAGE_SIZE
)
105 fp
= __vmalloc(size
, gfp_flags
, PAGE_KERNEL
);
107 memcpy(fp
, fp_old
, fp_old
->pages
* PAGE_SIZE
);
108 fp
->pages
= size
/ PAGE_SIZE
;
110 /* We keep fp->work from fp_old around in the new
111 * reallocated structure.
114 __bpf_prog_free(fp_old
);
119 EXPORT_SYMBOL_GPL(bpf_prog_realloc
);
121 void __bpf_prog_free(struct bpf_prog
*fp
)
126 EXPORT_SYMBOL_GPL(__bpf_prog_free
);
128 /* Base function for offset calculation. Needs to go into .text section,
129 * therefore keeping it non-static as well; will also be used by JITs
130 * anyway later on, so do not let the compiler omit it.
132 noinline u64
__bpf_call_base(u64 r1
, u64 r2
, u64 r3
, u64 r4
, u64 r5
)
138 * __bpf_prog_run - run eBPF program on a given context
139 * @ctx: is the data we are operating on
140 * @insn: is the array of eBPF instructions
142 * Decode and execute eBPF instructions.
144 static unsigned int __bpf_prog_run(void *ctx
, const struct bpf_insn
*insn
)
146 u64 stack
[MAX_BPF_STACK
/ sizeof(u64
)];
147 u64 regs
[MAX_BPF_REG
], tmp
;
148 static const void *jumptable
[256] = {
149 [0 ... 255] = &&default_label
,
150 /* Now overwrite non-defaults ... */
151 /* 32 bit ALU operations */
152 [BPF_ALU
| BPF_ADD
| BPF_X
] = &&ALU_ADD_X
,
153 [BPF_ALU
| BPF_ADD
| BPF_K
] = &&ALU_ADD_K
,
154 [BPF_ALU
| BPF_SUB
| BPF_X
] = &&ALU_SUB_X
,
155 [BPF_ALU
| BPF_SUB
| BPF_K
] = &&ALU_SUB_K
,
156 [BPF_ALU
| BPF_AND
| BPF_X
] = &&ALU_AND_X
,
157 [BPF_ALU
| BPF_AND
| BPF_K
] = &&ALU_AND_K
,
158 [BPF_ALU
| BPF_OR
| BPF_X
] = &&ALU_OR_X
,
159 [BPF_ALU
| BPF_OR
| BPF_K
] = &&ALU_OR_K
,
160 [BPF_ALU
| BPF_LSH
| BPF_X
] = &&ALU_LSH_X
,
161 [BPF_ALU
| BPF_LSH
| BPF_K
] = &&ALU_LSH_K
,
162 [BPF_ALU
| BPF_RSH
| BPF_X
] = &&ALU_RSH_X
,
163 [BPF_ALU
| BPF_RSH
| BPF_K
] = &&ALU_RSH_K
,
164 [BPF_ALU
| BPF_XOR
| BPF_X
] = &&ALU_XOR_X
,
165 [BPF_ALU
| BPF_XOR
| BPF_K
] = &&ALU_XOR_K
,
166 [BPF_ALU
| BPF_MUL
| BPF_X
] = &&ALU_MUL_X
,
167 [BPF_ALU
| BPF_MUL
| BPF_K
] = &&ALU_MUL_K
,
168 [BPF_ALU
| BPF_MOV
| BPF_X
] = &&ALU_MOV_X
,
169 [BPF_ALU
| BPF_MOV
| BPF_K
] = &&ALU_MOV_K
,
170 [BPF_ALU
| BPF_DIV
| BPF_X
] = &&ALU_DIV_X
,
171 [BPF_ALU
| BPF_DIV
| BPF_K
] = &&ALU_DIV_K
,
172 [BPF_ALU
| BPF_MOD
| BPF_X
] = &&ALU_MOD_X
,
173 [BPF_ALU
| BPF_MOD
| BPF_K
] = &&ALU_MOD_K
,
174 [BPF_ALU
| BPF_NEG
] = &&ALU_NEG
,
175 [BPF_ALU
| BPF_END
| BPF_TO_BE
] = &&ALU_END_TO_BE
,
176 [BPF_ALU
| BPF_END
| BPF_TO_LE
] = &&ALU_END_TO_LE
,
177 /* 64 bit ALU operations */
178 [BPF_ALU64
| BPF_ADD
| BPF_X
] = &&ALU64_ADD_X
,
179 [BPF_ALU64
| BPF_ADD
| BPF_K
] = &&ALU64_ADD_K
,
180 [BPF_ALU64
| BPF_SUB
| BPF_X
] = &&ALU64_SUB_X
,
181 [BPF_ALU64
| BPF_SUB
| BPF_K
] = &&ALU64_SUB_K
,
182 [BPF_ALU64
| BPF_AND
| BPF_X
] = &&ALU64_AND_X
,
183 [BPF_ALU64
| BPF_AND
| BPF_K
] = &&ALU64_AND_K
,
184 [BPF_ALU64
| BPF_OR
| BPF_X
] = &&ALU64_OR_X
,
185 [BPF_ALU64
| BPF_OR
| BPF_K
] = &&ALU64_OR_K
,
186 [BPF_ALU64
| BPF_LSH
| BPF_X
] = &&ALU64_LSH_X
,
187 [BPF_ALU64
| BPF_LSH
| BPF_K
] = &&ALU64_LSH_K
,
188 [BPF_ALU64
| BPF_RSH
| BPF_X
] = &&ALU64_RSH_X
,
189 [BPF_ALU64
| BPF_RSH
| BPF_K
] = &&ALU64_RSH_K
,
190 [BPF_ALU64
| BPF_XOR
| BPF_X
] = &&ALU64_XOR_X
,
191 [BPF_ALU64
| BPF_XOR
| BPF_K
] = &&ALU64_XOR_K
,
192 [BPF_ALU64
| BPF_MUL
| BPF_X
] = &&ALU64_MUL_X
,
193 [BPF_ALU64
| BPF_MUL
| BPF_K
] = &&ALU64_MUL_K
,
194 [BPF_ALU64
| BPF_MOV
| BPF_X
] = &&ALU64_MOV_X
,
195 [BPF_ALU64
| BPF_MOV
| BPF_K
] = &&ALU64_MOV_K
,
196 [BPF_ALU64
| BPF_ARSH
| BPF_X
] = &&ALU64_ARSH_X
,
197 [BPF_ALU64
| BPF_ARSH
| BPF_K
] = &&ALU64_ARSH_K
,
198 [BPF_ALU64
| BPF_DIV
| BPF_X
] = &&ALU64_DIV_X
,
199 [BPF_ALU64
| BPF_DIV
| BPF_K
] = &&ALU64_DIV_K
,
200 [BPF_ALU64
| BPF_MOD
| BPF_X
] = &&ALU64_MOD_X
,
201 [BPF_ALU64
| BPF_MOD
| BPF_K
] = &&ALU64_MOD_K
,
202 [BPF_ALU64
| BPF_NEG
] = &&ALU64_NEG
,
203 /* Call instruction */
204 [BPF_JMP
| BPF_CALL
] = &&JMP_CALL
,
206 [BPF_JMP
| BPF_JA
] = &&JMP_JA
,
207 [BPF_JMP
| BPF_JEQ
| BPF_X
] = &&JMP_JEQ_X
,
208 [BPF_JMP
| BPF_JEQ
| BPF_K
] = &&JMP_JEQ_K
,
209 [BPF_JMP
| BPF_JNE
| BPF_X
] = &&JMP_JNE_X
,
210 [BPF_JMP
| BPF_JNE
| BPF_K
] = &&JMP_JNE_K
,
211 [BPF_JMP
| BPF_JGT
| BPF_X
] = &&JMP_JGT_X
,
212 [BPF_JMP
| BPF_JGT
| BPF_K
] = &&JMP_JGT_K
,
213 [BPF_JMP
| BPF_JGE
| BPF_X
] = &&JMP_JGE_X
,
214 [BPF_JMP
| BPF_JGE
| BPF_K
] = &&JMP_JGE_K
,
215 [BPF_JMP
| BPF_JSGT
| BPF_X
] = &&JMP_JSGT_X
,
216 [BPF_JMP
| BPF_JSGT
| BPF_K
] = &&JMP_JSGT_K
,
217 [BPF_JMP
| BPF_JSGE
| BPF_X
] = &&JMP_JSGE_X
,
218 [BPF_JMP
| BPF_JSGE
| BPF_K
] = &&JMP_JSGE_K
,
219 [BPF_JMP
| BPF_JSET
| BPF_X
] = &&JMP_JSET_X
,
220 [BPF_JMP
| BPF_JSET
| BPF_K
] = &&JMP_JSET_K
,
222 [BPF_JMP
| BPF_EXIT
] = &&JMP_EXIT
,
223 /* Store instructions */
224 [BPF_STX
| BPF_MEM
| BPF_B
] = &&STX_MEM_B
,
225 [BPF_STX
| BPF_MEM
| BPF_H
] = &&STX_MEM_H
,
226 [BPF_STX
| BPF_MEM
| BPF_W
] = &&STX_MEM_W
,
227 [BPF_STX
| BPF_MEM
| BPF_DW
] = &&STX_MEM_DW
,
228 [BPF_STX
| BPF_XADD
| BPF_W
] = &&STX_XADD_W
,
229 [BPF_STX
| BPF_XADD
| BPF_DW
] = &&STX_XADD_DW
,
230 [BPF_ST
| BPF_MEM
| BPF_B
] = &&ST_MEM_B
,
231 [BPF_ST
| BPF_MEM
| BPF_H
] = &&ST_MEM_H
,
232 [BPF_ST
| BPF_MEM
| BPF_W
] = &&ST_MEM_W
,
233 [BPF_ST
| BPF_MEM
| BPF_DW
] = &&ST_MEM_DW
,
234 /* Load instructions */
235 [BPF_LDX
| BPF_MEM
| BPF_B
] = &&LDX_MEM_B
,
236 [BPF_LDX
| BPF_MEM
| BPF_H
] = &&LDX_MEM_H
,
237 [BPF_LDX
| BPF_MEM
| BPF_W
] = &&LDX_MEM_W
,
238 [BPF_LDX
| BPF_MEM
| BPF_DW
] = &&LDX_MEM_DW
,
239 [BPF_LD
| BPF_ABS
| BPF_W
] = &&LD_ABS_W
,
240 [BPF_LD
| BPF_ABS
| BPF_H
] = &&LD_ABS_H
,
241 [BPF_LD
| BPF_ABS
| BPF_B
] = &&LD_ABS_B
,
242 [BPF_LD
| BPF_IND
| BPF_W
] = &&LD_IND_W
,
243 [BPF_LD
| BPF_IND
| BPF_H
] = &&LD_IND_H
,
244 [BPF_LD
| BPF_IND
| BPF_B
] = &&LD_IND_B
,
249 #define CONT ({ insn++; goto select_insn; })
250 #define CONT_JMP ({ insn++; goto select_insn; })
252 FP
= (u64
) (unsigned long) &stack
[ARRAY_SIZE(stack
)];
253 ARG1
= (u64
) (unsigned long) ctx
;
255 /* Registers used in classic BPF programs need to be reset first. */
260 goto *jumptable
[insn
->code
];
263 #define ALU(OPCODE, OP) \
264 ALU64_##OPCODE##_X: \
268 DST = (u32) DST OP (u32) SRC; \
270 ALU64_##OPCODE##_K: \
274 DST = (u32) DST OP (u32) IMM; \
305 (*(s64
*) &DST
) >>= SRC
;
308 (*(s64
*) &DST
) >>= IMM
;
311 if (unlikely(SRC
== 0))
314 DST
= do_div(tmp
, SRC
);
317 if (unlikely(SRC
== 0))
320 DST
= do_div(tmp
, (u32
) SRC
);
324 DST
= do_div(tmp
, IMM
);
328 DST
= do_div(tmp
, (u32
) IMM
);
331 if (unlikely(SRC
== 0))
336 if (unlikely(SRC
== 0))
339 do_div(tmp
, (u32
) SRC
);
347 do_div(tmp
, (u32
) IMM
);
353 DST
= (__force u16
) cpu_to_be16(DST
);
356 DST
= (__force u32
) cpu_to_be32(DST
);
359 DST
= (__force u64
) cpu_to_be64(DST
);
366 DST
= (__force u16
) cpu_to_le16(DST
);
369 DST
= (__force u32
) cpu_to_le32(DST
);
372 DST
= (__force u64
) cpu_to_le64(DST
);
379 /* Function call scratches BPF_R1-BPF_R5 registers,
380 * preserves BPF_R6-BPF_R9, and stores return value
383 BPF_R0
= (__bpf_call_base
+ insn
->imm
)(BPF_R1
, BPF_R2
, BPF_R3
,
440 if (((s64
) DST
) > ((s64
) SRC
)) {
446 if (((s64
) DST
) > ((s64
) IMM
)) {
452 if (((s64
) DST
) >= ((s64
) SRC
)) {
458 if (((s64
) DST
) >= ((s64
) IMM
)) {
478 /* STX and ST and LDX*/
479 #define LDST(SIZEOP, SIZE) \
481 *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \
484 *(SIZE *)(unsigned long) (DST + insn->off) = IMM; \
487 DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
495 STX_XADD_W
: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
496 atomic_add((u32
) SRC
, (atomic_t
*)(unsigned long)
499 STX_XADD_DW
: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
500 atomic64_add((u64
) SRC
, (atomic64_t
*)(unsigned long)
503 LD_ABS_W
: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
506 /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
507 * only appearing in the programs where ctx ==
508 * skb. All programs keep 'ctx' in regs[BPF_REG_CTX]
509 * == BPF_R6, bpf_convert_filter() saves it in BPF_R6,
510 * internal BPF verifier will check that BPF_R6 ==
513 * BPF_ABS and BPF_IND are wrappers of function calls,
514 * so they scratch BPF_R1-BPF_R5 registers, preserve
515 * BPF_R6-BPF_R9, and store return value into BPF_R0.
518 * ctx == skb == BPF_R6 == CTX
521 * SRC == any register
522 * IMM == 32-bit immediate
525 * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
528 ptr
= bpf_load_pointer((struct sk_buff
*) (unsigned long) CTX
, off
, 4, &tmp
);
529 if (likely(ptr
!= NULL
)) {
530 BPF_R0
= get_unaligned_be32(ptr
);
535 LD_ABS_H
: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */
538 ptr
= bpf_load_pointer((struct sk_buff
*) (unsigned long) CTX
, off
, 2, &tmp
);
539 if (likely(ptr
!= NULL
)) {
540 BPF_R0
= get_unaligned_be16(ptr
);
545 LD_ABS_B
: /* BPF_R0 = *(u8 *) (skb->data + imm32) */
548 ptr
= bpf_load_pointer((struct sk_buff
*) (unsigned long) CTX
, off
, 1, &tmp
);
549 if (likely(ptr
!= NULL
)) {
555 LD_IND_W
: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */
558 LD_IND_H
: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */
561 LD_IND_B
: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */
566 /* If we ever reach this, we have a bug somewhere. */
567 WARN_RATELIMIT(1, "unknown opcode %02x\n", insn
->code
);
571 void __weak
bpf_int_jit_compile(struct bpf_prog
*prog
)
576 * bpf_prog_select_runtime - select execution runtime for BPF program
577 * @fp: bpf_prog populated with internal BPF program
579 * try to JIT internal BPF program, if JIT is not available select interpreter
580 * BPF program will be executed via BPF_PROG_RUN() macro
582 void bpf_prog_select_runtime(struct bpf_prog
*fp
)
584 fp
->bpf_func
= (void *) __bpf_prog_run
;
586 /* Probe if internal BPF can be JITed */
587 bpf_int_jit_compile(fp
);
588 /* Lock whole bpf_prog as read-only */
589 bpf_prog_lock_ro(fp
);
591 EXPORT_SYMBOL_GPL(bpf_prog_select_runtime
);
593 static void bpf_prog_free_deferred(struct work_struct
*work
)
595 struct bpf_work_struct
*ws
;
597 ws
= container_of(work
, struct bpf_work_struct
, work
);
598 bpf_jit_free(ws
->prog
);
601 /* Free internal BPF program */
602 void bpf_prog_free(struct bpf_prog
*fp
)
604 struct bpf_work_struct
*ws
= fp
->work
;
606 INIT_WORK(&ws
->work
, bpf_prog_free_deferred
);
608 schedule_work(&ws
->work
);
610 EXPORT_SYMBOL_GPL(bpf_prog_free
);