1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 * Copyright (c) 2016 Facebook
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 #include <linux/kernel.h>
14 #include <linux/types.h>
15 #include <linux/slab.h>
16 #include <linux/bpf.h>
17 #include <linux/bpf_verifier.h>
18 #include <linux/filter.h>
19 #include <net/netlink.h>
20 #include <linux/file.h>
21 #include <linux/vmalloc.h>
22 #include <linux/stringify.h>
23 #include <linux/sched/signal.h>
27 static const struct bpf_verifier_ops
* const bpf_verifier_ops
[] = {
28 #define BPF_PROG_TYPE(_id, _name) \
29 [_id] = & _name ## _verifier_ops,
30 #define BPF_MAP_TYPE(_id, _ops)
31 #include <linux/bpf_types.h>
36 /* bpf_check() is a static code analyzer that walks eBPF program
37 * instruction by instruction and updates register/stack state.
38 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
40 * The first pass is depth-first-search to check that the program is a DAG.
41 * It rejects the following programs:
42 * - larger than BPF_MAXINSNS insns
43 * - if loop is present (detected via back-edge)
44 * - unreachable insns exist (shouldn't be a forest. program = one function)
45 * - out of bounds or malformed jumps
46 * The second pass is all possible path descent from the 1st insn.
47 * Since it's analyzing all pathes through the program, the length of the
48 * analysis is limited to 64k insn, which may be hit even if total number of
49 * insn is less then 4K, but there are too many branches that change stack/regs.
50 * Number of 'branches to be analyzed' is limited to 1k
52 * On entry to each instruction, each register has a type, and the instruction
53 * changes the types of the registers depending on instruction semantics.
54 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
57 * All registers are 64-bit.
58 * R0 - return register
59 * R1-R5 argument passing registers
60 * R6-R9 callee saved registers
61 * R10 - frame pointer read-only
63 * At the start of BPF program the register R1 contains a pointer to bpf_context
64 * and has type PTR_TO_CTX.
66 * Verifier tracks arithmetic operations on pointers in case:
67 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
68 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
69 * 1st insn copies R10 (which has FRAME_PTR) type into R1
70 * and 2nd arithmetic instruction is pattern matched to recognize
71 * that it wants to construct a pointer to some element within stack.
72 * So after 2nd insn, the register R1 has type PTR_TO_STACK
73 * (and -20 constant is saved for further stack bounds checking).
74 * Meaning that this reg is a pointer to stack plus known immediate constant.
76 * Most of the time the registers have SCALAR_VALUE type, which
77 * means the register has some value, but it's not a valid pointer.
78 * (like pointer plus pointer becomes SCALAR_VALUE type)
80 * When verifier sees load or store instructions the type of base register
81 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer
82 * types recognized by check_mem_access() function.
84 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
85 * and the range of [ptr, ptr + map's value_size) is accessible.
87 * registers used to pass values to function calls are checked against
88 * function argument constraints.
90 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
91 * It means that the register type passed to this function must be
92 * PTR_TO_STACK and it will be used inside the function as
93 * 'pointer to map element key'
95 * For example the argument constraints for bpf_map_lookup_elem():
96 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
97 * .arg1_type = ARG_CONST_MAP_PTR,
98 * .arg2_type = ARG_PTR_TO_MAP_KEY,
100 * ret_type says that this function returns 'pointer to map elem value or null'
101 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
102 * 2nd argument should be a pointer to stack, which will be used inside
103 * the helper function as a pointer to map element key.
105 * On the kernel side the helper function looks like:
106 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
108 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
109 * void *key = (void *) (unsigned long) r2;
112 * here kernel can access 'key' and 'map' pointers safely, knowing that
113 * [key, key + map->key_size) bytes are valid and were initialized on
114 * the stack of eBPF program.
117 * Corresponding eBPF program may look like:
118 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
119 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
120 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
121 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
122 * here verifier looks at prototype of map_lookup_elem() and sees:
123 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
124 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
126 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
127 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
128 * and were initialized prior to this call.
129 * If it's ok, then verifier allows this BPF_CALL insn and looks at
130 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
131 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
132 * returns ether pointer to map value or NULL.
134 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
135 * insn, the register holding that pointer in the true branch changes state to
136 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
137 * branch. See check_cond_jmp_op().
139 * After the call R0 is set to return type of the function and registers R1-R5
140 * are set to NOT_INIT to indicate that they are no longer readable.
143 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
144 struct bpf_verifier_stack_elem
{
145 /* verifer state is 'st'
146 * before processing instruction 'insn_idx'
147 * and after processing instruction 'prev_insn_idx'
149 struct bpf_verifier_state st
;
152 struct bpf_verifier_stack_elem
*next
;
155 #define BPF_COMPLEXITY_LIMIT_INSNS 131072
156 #define BPF_COMPLEXITY_LIMIT_STACK 1024
157 #define BPF_COMPLEXITY_LIMIT_STATES 64
159 #define BPF_MAP_PTR_UNPRIV 1UL
160 #define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \
161 POISON_POINTER_DELTA))
162 #define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
164 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data
*aux
)
166 return BPF_MAP_PTR(aux
->map_state
) == BPF_MAP_PTR_POISON
;
169 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data
*aux
)
171 return aux
->map_state
& BPF_MAP_PTR_UNPRIV
;
174 static void bpf_map_ptr_store(struct bpf_insn_aux_data
*aux
,
175 const struct bpf_map
*map
, bool unpriv
)
177 BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON
& BPF_MAP_PTR_UNPRIV
);
178 unpriv
|= bpf_map_ptr_unpriv(aux
);
179 aux
->map_state
= (unsigned long)map
|
180 (unpriv
? BPF_MAP_PTR_UNPRIV
: 0UL);
183 struct bpf_call_arg_meta
{
184 struct bpf_map
*map_ptr
;
191 static DEFINE_MUTEX(bpf_verifier_lock
);
193 /* log_level controls verbosity level of eBPF verifier.
194 * verbose() is used to dump the verification trace to the log, so the user
195 * can figure out what's wrong with the program
197 static __printf(2, 3) void verbose(struct bpf_verifier_env
*env
,
198 const char *fmt
, ...)
200 struct bpf_verifer_log
*log
= &env
->log
;
204 if (!log
->level
|| !log
->ubuf
|| bpf_verifier_log_full(log
))
208 n
= vscnprintf(log
->kbuf
, BPF_VERIFIER_TMP_LOG_SIZE
, fmt
, args
);
211 WARN_ONCE(n
>= BPF_VERIFIER_TMP_LOG_SIZE
- 1,
212 "verifier log line truncated - local buffer too short\n");
214 n
= min(log
->len_total
- log
->len_used
- 1, n
);
217 if (!copy_to_user(log
->ubuf
+ log
->len_used
, log
->kbuf
, n
+ 1))
223 static bool type_is_pkt_pointer(enum bpf_reg_type type
)
225 return type
== PTR_TO_PACKET
||
226 type
== PTR_TO_PACKET_META
;
229 /* string representation of 'enum bpf_reg_type' */
230 static const char * const reg_type_str
[] = {
232 [SCALAR_VALUE
] = "inv",
233 [PTR_TO_CTX
] = "ctx",
234 [CONST_PTR_TO_MAP
] = "map_ptr",
235 [PTR_TO_MAP_VALUE
] = "map_value",
236 [PTR_TO_MAP_VALUE_OR_NULL
] = "map_value_or_null",
237 [PTR_TO_STACK
] = "fp",
238 [PTR_TO_PACKET
] = "pkt",
239 [PTR_TO_PACKET_META
] = "pkt_meta",
240 [PTR_TO_PACKET_END
] = "pkt_end",
243 static void print_verifier_state(struct bpf_verifier_env
*env
,
244 struct bpf_verifier_state
*state
)
246 struct bpf_reg_state
*reg
;
250 for (i
= 0; i
< MAX_BPF_REG
; i
++) {
251 reg
= &state
->regs
[i
];
255 verbose(env
, " R%d=%s", i
, reg_type_str
[t
]);
256 if ((t
== SCALAR_VALUE
|| t
== PTR_TO_STACK
) &&
257 tnum_is_const(reg
->var_off
)) {
258 /* reg->off should be 0 for SCALAR_VALUE */
259 verbose(env
, "%lld", reg
->var_off
.value
+ reg
->off
);
261 verbose(env
, "(id=%d", reg
->id
);
262 if (t
!= SCALAR_VALUE
)
263 verbose(env
, ",off=%d", reg
->off
);
264 if (type_is_pkt_pointer(t
))
265 verbose(env
, ",r=%d", reg
->range
);
266 else if (t
== CONST_PTR_TO_MAP
||
267 t
== PTR_TO_MAP_VALUE
||
268 t
== PTR_TO_MAP_VALUE_OR_NULL
)
269 verbose(env
, ",ks=%d,vs=%d",
270 reg
->map_ptr
->key_size
,
271 reg
->map_ptr
->value_size
);
272 if (tnum_is_const(reg
->var_off
)) {
273 /* Typically an immediate SCALAR_VALUE, but
274 * could be a pointer whose offset is too big
277 verbose(env
, ",imm=%llx", reg
->var_off
.value
);
279 if (reg
->smin_value
!= reg
->umin_value
&&
280 reg
->smin_value
!= S64_MIN
)
281 verbose(env
, ",smin_value=%lld",
282 (long long)reg
->smin_value
);
283 if (reg
->smax_value
!= reg
->umax_value
&&
284 reg
->smax_value
!= S64_MAX
)
285 verbose(env
, ",smax_value=%lld",
286 (long long)reg
->smax_value
);
287 if (reg
->umin_value
!= 0)
288 verbose(env
, ",umin_value=%llu",
289 (unsigned long long)reg
->umin_value
);
290 if (reg
->umax_value
!= U64_MAX
)
291 verbose(env
, ",umax_value=%llu",
292 (unsigned long long)reg
->umax_value
);
293 if (!tnum_is_unknown(reg
->var_off
)) {
296 tnum_strn(tn_buf
, sizeof(tn_buf
), reg
->var_off
);
297 verbose(env
, ",var_off=%s", tn_buf
);
303 for (i
= 0; i
< state
->allocated_stack
/ BPF_REG_SIZE
; i
++) {
304 if (state
->stack
[i
].slot_type
[0] == STACK_SPILL
)
305 verbose(env
, " fp%d=%s",
306 (-i
- 1) * BPF_REG_SIZE
,
307 reg_type_str
[state
->stack
[i
].spilled_ptr
.type
]);
312 static int copy_stack_state(struct bpf_verifier_state
*dst
,
313 const struct bpf_verifier_state
*src
)
317 if (WARN_ON_ONCE(dst
->allocated_stack
< src
->allocated_stack
)) {
318 /* internal bug, make state invalid to reject the program */
319 memset(dst
, 0, sizeof(*dst
));
322 memcpy(dst
->stack
, src
->stack
,
323 sizeof(*src
->stack
) * (src
->allocated_stack
/ BPF_REG_SIZE
));
327 /* do_check() starts with zero-sized stack in struct bpf_verifier_state to
328 * make it consume minimal amount of memory. check_stack_write() access from
329 * the program calls into realloc_verifier_state() to grow the stack size.
330 * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
331 * which this function copies over. It points to previous bpf_verifier_state
332 * which is never reallocated
334 static int realloc_verifier_state(struct bpf_verifier_state
*state
, int size
,
337 u32 old_size
= state
->allocated_stack
;
338 struct bpf_stack_state
*new_stack
;
339 int slot
= size
/ BPF_REG_SIZE
;
341 if (size
<= old_size
|| !size
) {
344 state
->allocated_stack
= slot
* BPF_REG_SIZE
;
345 if (!size
&& old_size
) {
351 new_stack
= kmalloc_array(slot
, sizeof(struct bpf_stack_state
),
357 memcpy(new_stack
, state
->stack
,
358 sizeof(*new_stack
) * (old_size
/ BPF_REG_SIZE
));
359 memset(new_stack
+ old_size
/ BPF_REG_SIZE
, 0,
360 sizeof(*new_stack
) * (size
- old_size
) / BPF_REG_SIZE
);
362 state
->allocated_stack
= slot
* BPF_REG_SIZE
;
364 state
->stack
= new_stack
;
368 static void free_verifier_state(struct bpf_verifier_state
*state
,
376 /* copy verifier state from src to dst growing dst stack space
377 * when necessary to accommodate larger src stack
379 static int copy_verifier_state(struct bpf_verifier_state
*dst
,
380 const struct bpf_verifier_state
*src
)
384 err
= realloc_verifier_state(dst
, src
->allocated_stack
, false);
387 memcpy(dst
, src
, offsetof(struct bpf_verifier_state
, allocated_stack
));
388 return copy_stack_state(dst
, src
);
391 static int pop_stack(struct bpf_verifier_env
*env
, int *prev_insn_idx
,
394 struct bpf_verifier_state
*cur
= env
->cur_state
;
395 struct bpf_verifier_stack_elem
*elem
, *head
= env
->head
;
398 if (env
->head
== NULL
)
402 err
= copy_verifier_state(cur
, &head
->st
);
407 *insn_idx
= head
->insn_idx
;
409 *prev_insn_idx
= head
->prev_insn_idx
;
411 free_verifier_state(&head
->st
, false);
418 static struct bpf_verifier_state
*push_stack(struct bpf_verifier_env
*env
,
419 int insn_idx
, int prev_insn_idx
,
422 struct bpf_verifier_state
*cur
= env
->cur_state
;
423 struct bpf_verifier_stack_elem
*elem
;
426 elem
= kzalloc(sizeof(struct bpf_verifier_stack_elem
), GFP_KERNEL
);
430 elem
->insn_idx
= insn_idx
;
431 elem
->prev_insn_idx
= prev_insn_idx
;
432 elem
->next
= env
->head
;
435 err
= copy_verifier_state(&elem
->st
, cur
);
438 elem
->st
.speculative
|= speculative
;
439 if (env
->stack_size
> BPF_COMPLEXITY_LIMIT_STACK
) {
440 verbose(env
, "BPF program is too complex\n");
445 /* pop all elements and return */
446 while (!pop_stack(env
, NULL
, NULL
));
450 #define CALLER_SAVED_REGS 6
451 static const int caller_saved
[CALLER_SAVED_REGS
] = {
452 BPF_REG_0
, BPF_REG_1
, BPF_REG_2
, BPF_REG_3
, BPF_REG_4
, BPF_REG_5
455 static void __mark_reg_not_init(struct bpf_reg_state
*reg
);
457 /* Mark the unknown part of a register (variable offset or scalar value) as
458 * known to have the value @imm.
460 static void __mark_reg_known(struct bpf_reg_state
*reg
, u64 imm
)
462 /* Clear id, off, and union(map_ptr, range) */
463 memset(((u8
*)reg
) + sizeof(reg
->type
), 0,
464 offsetof(struct bpf_reg_state
, var_off
) - sizeof(reg
->type
));
465 reg
->var_off
= tnum_const(imm
);
466 reg
->smin_value
= (s64
)imm
;
467 reg
->smax_value
= (s64
)imm
;
468 reg
->umin_value
= imm
;
469 reg
->umax_value
= imm
;
472 /* Mark the 'variable offset' part of a register as zero. This should be
473 * used only on registers holding a pointer type.
475 static void __mark_reg_known_zero(struct bpf_reg_state
*reg
)
477 __mark_reg_known(reg
, 0);
480 static void mark_reg_known_zero(struct bpf_verifier_env
*env
,
481 struct bpf_reg_state
*regs
, u32 regno
)
483 if (WARN_ON(regno
>= MAX_BPF_REG
)) {
484 verbose(env
, "mark_reg_known_zero(regs, %u)\n", regno
);
485 /* Something bad happened, let's kill all regs */
486 for (regno
= 0; regno
< MAX_BPF_REG
; regno
++)
487 __mark_reg_not_init(regs
+ regno
);
490 __mark_reg_known_zero(regs
+ regno
);
493 static bool reg_is_pkt_pointer(const struct bpf_reg_state
*reg
)
495 return type_is_pkt_pointer(reg
->type
);
498 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state
*reg
)
500 return reg_is_pkt_pointer(reg
) ||
501 reg
->type
== PTR_TO_PACKET_END
;
504 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
505 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state
*reg
,
506 enum bpf_reg_type which
)
508 /* The register can already have a range from prior markings.
509 * This is fine as long as it hasn't been advanced from its
512 return reg
->type
== which
&&
515 tnum_equals_const(reg
->var_off
, 0);
518 /* Attempts to improve min/max values based on var_off information */
519 static void __update_reg_bounds(struct bpf_reg_state
*reg
)
521 /* min signed is max(sign bit) | min(other bits) */
522 reg
->smin_value
= max_t(s64
, reg
->smin_value
,
523 reg
->var_off
.value
| (reg
->var_off
.mask
& S64_MIN
));
524 /* max signed is min(sign bit) | max(other bits) */
525 reg
->smax_value
= min_t(s64
, reg
->smax_value
,
526 reg
->var_off
.value
| (reg
->var_off
.mask
& S64_MAX
));
527 reg
->umin_value
= max(reg
->umin_value
, reg
->var_off
.value
);
528 reg
->umax_value
= min(reg
->umax_value
,
529 reg
->var_off
.value
| reg
->var_off
.mask
);
532 /* Uses signed min/max values to inform unsigned, and vice-versa */
533 static void __reg_deduce_bounds(struct bpf_reg_state
*reg
)
535 /* Learn sign from signed bounds.
536 * If we cannot cross the sign boundary, then signed and unsigned bounds
537 * are the same, so combine. This works even in the negative case, e.g.
538 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
540 if (reg
->smin_value
>= 0 || reg
->smax_value
< 0) {
541 reg
->smin_value
= reg
->umin_value
= max_t(u64
, reg
->smin_value
,
543 reg
->smax_value
= reg
->umax_value
= min_t(u64
, reg
->smax_value
,
547 /* Learn sign from unsigned bounds. Signed bounds cross the sign
548 * boundary, so we must be careful.
550 if ((s64
)reg
->umax_value
>= 0) {
551 /* Positive. We can't learn anything from the smin, but smax
552 * is positive, hence safe.
554 reg
->smin_value
= reg
->umin_value
;
555 reg
->smax_value
= reg
->umax_value
= min_t(u64
, reg
->smax_value
,
557 } else if ((s64
)reg
->umin_value
< 0) {
558 /* Negative. We can't learn anything from the smax, but smin
559 * is negative, hence safe.
561 reg
->smin_value
= reg
->umin_value
= max_t(u64
, reg
->smin_value
,
563 reg
->smax_value
= reg
->umax_value
;
567 /* Attempts to improve var_off based on unsigned min/max information */
568 static void __reg_bound_offset(struct bpf_reg_state
*reg
)
570 reg
->var_off
= tnum_intersect(reg
->var_off
,
571 tnum_range(reg
->umin_value
,
575 /* Reset the min/max bounds of a register */
576 static void __mark_reg_unbounded(struct bpf_reg_state
*reg
)
578 reg
->smin_value
= S64_MIN
;
579 reg
->smax_value
= S64_MAX
;
581 reg
->umax_value
= U64_MAX
;
584 /* Mark a register as having a completely unknown (scalar) value. */
585 static void __mark_reg_unknown(struct bpf_reg_state
*reg
)
588 * Clear type, id, off, and union(map_ptr, range) and
589 * padding between 'type' and union
591 memset(reg
, 0, offsetof(struct bpf_reg_state
, var_off
));
592 reg
->type
= SCALAR_VALUE
;
593 reg
->var_off
= tnum_unknown
;
594 __mark_reg_unbounded(reg
);
597 static void mark_reg_unknown(struct bpf_verifier_env
*env
,
598 struct bpf_reg_state
*regs
, u32 regno
)
600 if (WARN_ON(regno
>= MAX_BPF_REG
)) {
601 verbose(env
, "mark_reg_unknown(regs, %u)\n", regno
);
602 /* Something bad happened, let's kill all regs */
603 for (regno
= 0; regno
< MAX_BPF_REG
; regno
++)
604 __mark_reg_not_init(regs
+ regno
);
607 __mark_reg_unknown(regs
+ regno
);
610 static void __mark_reg_not_init(struct bpf_reg_state
*reg
)
612 __mark_reg_unknown(reg
);
613 reg
->type
= NOT_INIT
;
616 static void mark_reg_not_init(struct bpf_verifier_env
*env
,
617 struct bpf_reg_state
*regs
, u32 regno
)
619 if (WARN_ON(regno
>= MAX_BPF_REG
)) {
620 verbose(env
, "mark_reg_not_init(regs, %u)\n", regno
);
621 /* Something bad happened, let's kill all regs */
622 for (regno
= 0; regno
< MAX_BPF_REG
; regno
++)
623 __mark_reg_not_init(regs
+ regno
);
626 __mark_reg_not_init(regs
+ regno
);
629 static void init_reg_state(struct bpf_verifier_env
*env
,
630 struct bpf_reg_state
*regs
)
634 for (i
= 0; i
< MAX_BPF_REG
; i
++) {
635 mark_reg_not_init(env
, regs
, i
);
636 regs
[i
].live
= REG_LIVE_NONE
;
640 regs
[BPF_REG_FP
].type
= PTR_TO_STACK
;
641 mark_reg_known_zero(env
, regs
, BPF_REG_FP
);
643 /* 1st arg to a function */
644 regs
[BPF_REG_1
].type
= PTR_TO_CTX
;
645 mark_reg_known_zero(env
, regs
, BPF_REG_1
);
649 SRC_OP
, /* register is used as source operand */
650 DST_OP
, /* register is used as destination operand */
651 DST_OP_NO_MARK
/* same as above, check only, don't mark */
654 static void mark_reg_read(const struct bpf_verifier_state
*state
, u32 regno
)
656 struct bpf_verifier_state
*parent
= state
->parent
;
658 if (regno
== BPF_REG_FP
)
659 /* We don't need to worry about FP liveness because it's read-only */
663 /* if read wasn't screened by an earlier write ... */
664 if (state
->regs
[regno
].live
& REG_LIVE_WRITTEN
)
666 /* ... then we depend on parent's value */
667 parent
->regs
[regno
].live
|= REG_LIVE_READ
;
669 parent
= state
->parent
;
673 static int check_reg_arg(struct bpf_verifier_env
*env
, u32 regno
,
676 struct bpf_reg_state
*regs
= env
->cur_state
->regs
;
678 if (regno
>= MAX_BPF_REG
) {
679 verbose(env
, "R%d is invalid\n", regno
);
684 /* check whether register used as source operand can be read */
685 if (regs
[regno
].type
== NOT_INIT
) {
686 verbose(env
, "R%d !read_ok\n", regno
);
689 mark_reg_read(env
->cur_state
, regno
);
691 /* check whether register used as dest operand can be written to */
692 if (regno
== BPF_REG_FP
) {
693 verbose(env
, "frame pointer is read only\n");
696 regs
[regno
].live
|= REG_LIVE_WRITTEN
;
698 mark_reg_unknown(env
, regs
, regno
);
703 static bool is_spillable_regtype(enum bpf_reg_type type
)
706 case PTR_TO_MAP_VALUE
:
707 case PTR_TO_MAP_VALUE_OR_NULL
:
711 case PTR_TO_PACKET_META
:
712 case PTR_TO_PACKET_END
:
713 case CONST_PTR_TO_MAP
:
720 /* check_stack_read/write functions track spill/fill of registers,
721 * stack boundary and alignment are checked in check_mem_access()
723 static int check_stack_write(struct bpf_verifier_env
*env
,
724 struct bpf_verifier_state
*state
, int off
,
725 int size
, int value_regno
, int insn_idx
)
727 int i
, slot
= -off
- 1, spi
= slot
/ BPF_REG_SIZE
, err
;
729 err
= realloc_verifier_state(state
, round_up(slot
+ 1, BPF_REG_SIZE
),
733 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
734 * so it's aligned access and [off, off + size) are within stack limits
736 if (!env
->allow_ptr_leaks
&&
737 state
->stack
[spi
].slot_type
[0] == STACK_SPILL
&&
738 size
!= BPF_REG_SIZE
) {
739 verbose(env
, "attempt to corrupt spilled pointer on stack\n");
743 if (value_regno
>= 0 &&
744 is_spillable_regtype(state
->regs
[value_regno
].type
)) {
746 /* register containing pointer is being spilled into stack */
747 if (size
!= BPF_REG_SIZE
) {
748 verbose(env
, "invalid size of register spill\n");
752 /* save register state */
753 state
->stack
[spi
].spilled_ptr
= state
->regs
[value_regno
];
754 state
->stack
[spi
].spilled_ptr
.live
|= REG_LIVE_WRITTEN
;
756 for (i
= 0; i
< BPF_REG_SIZE
; i
++) {
757 if (state
->stack
[spi
].slot_type
[i
] == STACK_MISC
&&
758 !env
->allow_ptr_leaks
) {
759 int *poff
= &env
->insn_aux_data
[insn_idx
].sanitize_stack_off
;
760 int soff
= (-spi
- 1) * BPF_REG_SIZE
;
762 /* detected reuse of integer stack slot with a pointer
763 * which means either llvm is reusing stack slot or
764 * an attacker is trying to exploit CVE-2018-3639
765 * (speculative store bypass)
766 * Have to sanitize that slot with preemptive
769 if (*poff
&& *poff
!= soff
) {
770 /* disallow programs where single insn stores
771 * into two different stack slots, since verifier
772 * cannot sanitize them
775 "insn %d cannot access two stack slots fp%d and fp%d",
776 insn_idx
, *poff
, soff
);
781 state
->stack
[spi
].slot_type
[i
] = STACK_SPILL
;
784 /* regular write of data into stack */
785 state
->stack
[spi
].spilled_ptr
= (struct bpf_reg_state
) {};
787 for (i
= 0; i
< size
; i
++)
788 state
->stack
[spi
].slot_type
[(slot
- i
) % BPF_REG_SIZE
] =
794 static void mark_stack_slot_read(const struct bpf_verifier_state
*state
, int slot
)
796 struct bpf_verifier_state
*parent
= state
->parent
;
799 /* if read wasn't screened by an earlier write ... */
800 if (state
->stack
[slot
].spilled_ptr
.live
& REG_LIVE_WRITTEN
)
802 /* ... then we depend on parent's value */
803 parent
->stack
[slot
].spilled_ptr
.live
|= REG_LIVE_READ
;
805 parent
= state
->parent
;
809 static int check_stack_read(struct bpf_verifier_env
*env
,
810 struct bpf_verifier_state
*state
, int off
, int size
,
813 int i
, slot
= -off
- 1, spi
= slot
/ BPF_REG_SIZE
;
816 if (state
->allocated_stack
<= slot
) {
817 verbose(env
, "invalid read from stack off %d+0 size %d\n",
821 stype
= state
->stack
[spi
].slot_type
;
823 if (stype
[0] == STACK_SPILL
) {
824 if (size
!= BPF_REG_SIZE
) {
825 verbose(env
, "invalid size of register spill\n");
828 for (i
= 1; i
< BPF_REG_SIZE
; i
++) {
829 if (stype
[(slot
- i
) % BPF_REG_SIZE
] != STACK_SPILL
) {
830 verbose(env
, "corrupted spill memory\n");
835 if (value_regno
>= 0) {
836 /* restore register state from stack */
837 state
->regs
[value_regno
] = state
->stack
[spi
].spilled_ptr
;
838 mark_stack_slot_read(state
, spi
);
842 for (i
= 0; i
< size
; i
++) {
843 if (stype
[(slot
- i
) % BPF_REG_SIZE
] != STACK_MISC
) {
844 verbose(env
, "invalid read from stack off %d+%d size %d\n",
849 if (value_regno
>= 0)
850 /* have read misc data from the stack */
851 mark_reg_unknown(env
, state
->regs
, value_regno
);
856 static int check_stack_access(struct bpf_verifier_env
*env
,
857 const struct bpf_reg_state
*reg
,
860 /* Stack accesses must be at a fixed offset, so that we
861 * can determine what type of data were returned. See
862 * check_stack_read().
864 if (!tnum_is_const(reg
->var_off
)) {
867 tnum_strn(tn_buf
, sizeof(tn_buf
), reg
->var_off
);
868 verbose(env
, "variable stack access var_off=%s off=%d size=%d",
873 if (off
>= 0 || off
< -MAX_BPF_STACK
) {
874 verbose(env
, "invalid stack off=%d size=%d\n", off
, size
);
881 /* check read/write into map element returned by bpf_map_lookup_elem() */
882 static int __check_map_access(struct bpf_verifier_env
*env
, u32 regno
, int off
,
883 int size
, bool zero_size_allowed
)
885 struct bpf_reg_state
*regs
= cur_regs(env
);
886 struct bpf_map
*map
= regs
[regno
].map_ptr
;
888 if (off
< 0 || size
< 0 || (size
== 0 && !zero_size_allowed
) ||
889 off
+ size
> map
->value_size
) {
890 verbose(env
, "invalid access to map value, value_size=%d off=%d size=%d\n",
891 map
->value_size
, off
, size
);
897 /* check read/write into a map element with possible variable offset */
898 static int check_map_access(struct bpf_verifier_env
*env
, u32 regno
,
899 int off
, int size
, bool zero_size_allowed
)
901 struct bpf_verifier_state
*state
= env
->cur_state
;
902 struct bpf_reg_state
*reg
= &state
->regs
[regno
];
905 /* We may have adjusted the register to this map value, so we
906 * need to try adding each of min_value and max_value to off
907 * to make sure our theoretical access will be safe.
910 print_verifier_state(env
, state
);
912 /* The minimum value is only important with signed
913 * comparisons where we can't assume the floor of a
914 * value is 0. If we are using signed variables for our
915 * index'es we need to make sure that whatever we use
916 * will have a set floor within our range.
918 if (reg
->smin_value
< 0 &&
919 (reg
->smin_value
== S64_MIN
||
920 (off
+ reg
->smin_value
!= (s64
)(s32
)(off
+ reg
->smin_value
)) ||
921 reg
->smin_value
+ off
< 0)) {
922 verbose(env
, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
926 err
= __check_map_access(env
, regno
, reg
->smin_value
+ off
, size
,
929 verbose(env
, "R%d min value is outside of the array range\n",
934 /* If we haven't set a max value then we need to bail since we can't be
935 * sure we won't do bad things.
936 * If reg->umax_value + off could overflow, treat that as unbounded too.
938 if (reg
->umax_value
>= BPF_MAX_VAR_OFF
) {
939 verbose(env
, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
943 err
= __check_map_access(env
, regno
, reg
->umax_value
+ off
, size
,
946 verbose(env
, "R%d max value is outside of the array range\n",
951 #define MAX_PACKET_OFF 0xffff
953 static bool may_access_direct_pkt_data(struct bpf_verifier_env
*env
,
954 const struct bpf_call_arg_meta
*meta
,
955 enum bpf_access_type t
)
957 switch (env
->prog
->type
) {
958 case BPF_PROG_TYPE_LWT_IN
:
959 case BPF_PROG_TYPE_LWT_OUT
:
960 /* dst_input() and dst_output() can't write for now */
964 case BPF_PROG_TYPE_SCHED_CLS
:
965 case BPF_PROG_TYPE_SCHED_ACT
:
966 case BPF_PROG_TYPE_XDP
:
967 case BPF_PROG_TYPE_LWT_XMIT
:
968 case BPF_PROG_TYPE_SK_SKB
:
970 return meta
->pkt_access
;
972 env
->seen_direct_write
= true;
979 static int __check_packet_access(struct bpf_verifier_env
*env
, u32 regno
,
980 int off
, int size
, bool zero_size_allowed
)
982 struct bpf_reg_state
*regs
= cur_regs(env
);
983 struct bpf_reg_state
*reg
= ®s
[regno
];
985 if (off
< 0 || size
< 0 || (size
== 0 && !zero_size_allowed
) ||
986 (u64
)off
+ size
> reg
->range
) {
987 verbose(env
, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
988 off
, size
, regno
, reg
->id
, reg
->off
, reg
->range
);
994 static int check_packet_access(struct bpf_verifier_env
*env
, u32 regno
, int off
,
995 int size
, bool zero_size_allowed
)
997 struct bpf_reg_state
*regs
= cur_regs(env
);
998 struct bpf_reg_state
*reg
= ®s
[regno
];
1001 /* We may have added a variable offset to the packet pointer; but any
1002 * reg->range we have comes after that. We are only checking the fixed
1006 /* We don't allow negative numbers, because we aren't tracking enough
1007 * detail to prove they're safe.
1009 if (reg
->smin_value
< 0) {
1010 verbose(env
, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1014 err
= __check_packet_access(env
, regno
, off
, size
, zero_size_allowed
);
1016 verbose(env
, "R%d offset is outside of the packet\n", regno
);
1022 /* check access to 'struct bpf_context' fields. Supports fixed offsets only */
1023 static int check_ctx_access(struct bpf_verifier_env
*env
, int insn_idx
, int off
, int size
,
1024 enum bpf_access_type t
, enum bpf_reg_type
*reg_type
)
1026 struct bpf_insn_access_aux info
= {
1027 .reg_type
= *reg_type
,
1030 if (env
->ops
->is_valid_access
&&
1031 env
->ops
->is_valid_access(off
, size
, t
, &info
)) {
1032 /* A non zero info.ctx_field_size indicates that this field is a
1033 * candidate for later verifier transformation to load the whole
1034 * field and then apply a mask when accessed with a narrower
1035 * access than actual ctx access size. A zero info.ctx_field_size
1036 * will only allow for whole field access and rejects any other
1037 * type of narrower access.
1039 *reg_type
= info
.reg_type
;
1041 env
->insn_aux_data
[insn_idx
].ctx_field_size
= info
.ctx_field_size
;
1042 /* remember the offset of last byte accessed in ctx */
1043 if (env
->prog
->aux
->max_ctx_offset
< off
+ size
)
1044 env
->prog
->aux
->max_ctx_offset
= off
+ size
;
1048 verbose(env
, "invalid bpf_context access off=%d size=%d\n", off
, size
);
1052 static bool __is_pointer_value(bool allow_ptr_leaks
,
1053 const struct bpf_reg_state
*reg
)
1055 if (allow_ptr_leaks
)
1058 return reg
->type
!= SCALAR_VALUE
;
1061 static bool is_pointer_value(struct bpf_verifier_env
*env
, int regno
)
1063 return __is_pointer_value(env
->allow_ptr_leaks
, cur_regs(env
) + regno
);
1066 static bool is_ctx_reg(struct bpf_verifier_env
*env
, int regno
)
1068 const struct bpf_reg_state
*reg
= cur_regs(env
) + regno
;
1070 return reg
->type
== PTR_TO_CTX
;
1073 static bool is_pkt_reg(struct bpf_verifier_env
*env
, int regno
)
1075 const struct bpf_reg_state
*reg
= cur_regs(env
) + regno
;
1077 return type_is_pkt_pointer(reg
->type
);
1080 static int check_pkt_ptr_alignment(struct bpf_verifier_env
*env
,
1081 const struct bpf_reg_state
*reg
,
1082 int off
, int size
, bool strict
)
1084 struct tnum reg_off
;
1087 /* Byte size accesses are always allowed. */
1088 if (!strict
|| size
== 1)
1091 /* For platforms that do not have a Kconfig enabling
1092 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
1093 * NET_IP_ALIGN is universally set to '2'. And on platforms
1094 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
1095 * to this code only in strict mode where we want to emulate
1096 * the NET_IP_ALIGN==2 checking. Therefore use an
1097 * unconditional IP align value of '2'.
1101 reg_off
= tnum_add(reg
->var_off
, tnum_const(ip_align
+ reg
->off
+ off
));
1102 if (!tnum_is_aligned(reg_off
, size
)) {
1105 tnum_strn(tn_buf
, sizeof(tn_buf
), reg
->var_off
);
1107 "misaligned packet access off %d+%s+%d+%d size %d\n",
1108 ip_align
, tn_buf
, reg
->off
, off
, size
);
1115 static int check_generic_ptr_alignment(struct bpf_verifier_env
*env
,
1116 const struct bpf_reg_state
*reg
,
1117 const char *pointer_desc
,
1118 int off
, int size
, bool strict
)
1120 struct tnum reg_off
;
1122 /* Byte size accesses are always allowed. */
1123 if (!strict
|| size
== 1)
1126 reg_off
= tnum_add(reg
->var_off
, tnum_const(reg
->off
+ off
));
1127 if (!tnum_is_aligned(reg_off
, size
)) {
1130 tnum_strn(tn_buf
, sizeof(tn_buf
), reg
->var_off
);
1131 verbose(env
, "misaligned %saccess off %s+%d+%d size %d\n",
1132 pointer_desc
, tn_buf
, reg
->off
, off
, size
);
1139 static int check_ptr_alignment(struct bpf_verifier_env
*env
,
1140 const struct bpf_reg_state
*reg
, int off
,
1141 int size
, bool strict_alignment_once
)
1143 bool strict
= env
->strict_alignment
|| strict_alignment_once
;
1144 const char *pointer_desc
= "";
1146 switch (reg
->type
) {
1148 case PTR_TO_PACKET_META
:
1149 /* Special case, because of NET_IP_ALIGN. Given metadata sits
1150 * right in front, treat it the very same way.
1152 return check_pkt_ptr_alignment(env
, reg
, off
, size
, strict
);
1153 case PTR_TO_MAP_VALUE
:
1154 pointer_desc
= "value ";
1157 pointer_desc
= "context ";
1160 pointer_desc
= "stack ";
1161 /* The stack spill tracking logic in check_stack_write()
1162 * and check_stack_read() relies on stack accesses being
1170 return check_generic_ptr_alignment(env
, reg
, pointer_desc
, off
, size
,
1174 static int check_ctx_reg(struct bpf_verifier_env
*env
,
1175 const struct bpf_reg_state
*reg
, int regno
)
1177 /* Access to ctx or passing it to a helper is only allowed in
1178 * its original, unmodified form.
1182 verbose(env
, "dereference of modified ctx ptr R%d off=%d disallowed\n",
1187 if (!tnum_is_const(reg
->var_off
) || reg
->var_off
.value
) {
1190 tnum_strn(tn_buf
, sizeof(tn_buf
), reg
->var_off
);
1191 verbose(env
, "variable ctx access var_off=%s disallowed\n", tn_buf
);
1198 /* truncate register to smaller size (in bytes)
1199 * must be called with size < BPF_REG_SIZE
1201 static void coerce_reg_to_size(struct bpf_reg_state
*reg
, int size
)
1205 /* clear high bits in bit representation */
1206 reg
->var_off
= tnum_cast(reg
->var_off
, size
);
1208 /* fix arithmetic bounds */
1209 mask
= ((u64
)1 << (size
* 8)) - 1;
1210 if ((reg
->umin_value
& ~mask
) == (reg
->umax_value
& ~mask
)) {
1211 reg
->umin_value
&= mask
;
1212 reg
->umax_value
&= mask
;
1214 reg
->umin_value
= 0;
1215 reg
->umax_value
= mask
;
1217 reg
->smin_value
= reg
->umin_value
;
1218 reg
->smax_value
= reg
->umax_value
;
1221 /* check whether memory at (regno + off) is accessible for t = (read | write)
1222 * if t==write, value_regno is a register which value is stored into memory
1223 * if t==read, value_regno is a register which will receive the value from memory
1224 * if t==write && value_regno==-1, some unknown value is stored into memory
1225 * if t==read && value_regno==-1, don't care what we read from memory
1227 static int check_mem_access(struct bpf_verifier_env
*env
, int insn_idx
, u32 regno
,
1228 int off
, int bpf_size
, enum bpf_access_type t
,
1229 int value_regno
, bool strict_alignment_once
)
1231 struct bpf_verifier_state
*state
= env
->cur_state
;
1232 struct bpf_reg_state
*regs
= cur_regs(env
);
1233 struct bpf_reg_state
*reg
= regs
+ regno
;
1236 size
= bpf_size_to_bytes(bpf_size
);
1240 /* alignment checks will add in reg->off themselves */
1241 err
= check_ptr_alignment(env
, reg
, off
, size
, strict_alignment_once
);
1245 /* for access checks, reg->off is just part of off */
1248 if (reg
->type
== PTR_TO_MAP_VALUE
) {
1249 if (t
== BPF_WRITE
&& value_regno
>= 0 &&
1250 is_pointer_value(env
, value_regno
)) {
1251 verbose(env
, "R%d leaks addr into map\n", value_regno
);
1255 err
= check_map_access(env
, regno
, off
, size
, false);
1256 if (!err
&& t
== BPF_READ
&& value_regno
>= 0)
1257 mark_reg_unknown(env
, regs
, value_regno
);
1259 } else if (reg
->type
== PTR_TO_CTX
) {
1260 enum bpf_reg_type reg_type
= SCALAR_VALUE
;
1262 if (t
== BPF_WRITE
&& value_regno
>= 0 &&
1263 is_pointer_value(env
, value_regno
)) {
1264 verbose(env
, "R%d leaks addr into ctx\n", value_regno
);
1268 err
= check_ctx_reg(env
, reg
, regno
);
1272 err
= check_ctx_access(env
, insn_idx
, off
, size
, t
, ®_type
);
1273 if (!err
&& t
== BPF_READ
&& value_regno
>= 0) {
1274 /* ctx access returns either a scalar, or a
1275 * PTR_TO_PACKET[_META,_END]. In the latter
1276 * case, we know the offset is zero.
1278 if (reg_type
== SCALAR_VALUE
)
1279 mark_reg_unknown(env
, regs
, value_regno
);
1281 mark_reg_known_zero(env
, regs
,
1283 regs
[value_regno
].type
= reg_type
;
1286 } else if (reg
->type
== PTR_TO_STACK
) {
1287 off
+= reg
->var_off
.value
;
1288 err
= check_stack_access(env
, reg
, off
, size
);
1292 if (env
->prog
->aux
->stack_depth
< -off
)
1293 env
->prog
->aux
->stack_depth
= -off
;
1296 err
= check_stack_write(env
, state
, off
, size
,
1297 value_regno
, insn_idx
);
1299 err
= check_stack_read(env
, state
, off
, size
,
1301 } else if (reg_is_pkt_pointer(reg
)) {
1302 if (t
== BPF_WRITE
&& !may_access_direct_pkt_data(env
, NULL
, t
)) {
1303 verbose(env
, "cannot write into packet\n");
1306 if (t
== BPF_WRITE
&& value_regno
>= 0 &&
1307 is_pointer_value(env
, value_regno
)) {
1308 verbose(env
, "R%d leaks addr into packet\n",
1312 err
= check_packet_access(env
, regno
, off
, size
, false);
1313 if (!err
&& t
== BPF_READ
&& value_regno
>= 0)
1314 mark_reg_unknown(env
, regs
, value_regno
);
1316 verbose(env
, "R%d invalid mem access '%s'\n", regno
,
1317 reg_type_str
[reg
->type
]);
1321 if (!err
&& size
< BPF_REG_SIZE
&& value_regno
>= 0 && t
== BPF_READ
&&
1322 regs
[value_regno
].type
== SCALAR_VALUE
) {
1323 /* b/h/w load zero-extends, mark upper bits as known 0 */
1324 coerce_reg_to_size(®s
[value_regno
], size
);
1329 static int check_xadd(struct bpf_verifier_env
*env
, int insn_idx
, struct bpf_insn
*insn
)
1333 if ((BPF_SIZE(insn
->code
) != BPF_W
&& BPF_SIZE(insn
->code
) != BPF_DW
) ||
1335 verbose(env
, "BPF_XADD uses reserved fields\n");
1339 /* check src1 operand */
1340 err
= check_reg_arg(env
, insn
->src_reg
, SRC_OP
);
1344 /* check src2 operand */
1345 err
= check_reg_arg(env
, insn
->dst_reg
, SRC_OP
);
1349 if (is_pointer_value(env
, insn
->src_reg
)) {
1350 verbose(env
, "R%d leaks addr into mem\n", insn
->src_reg
);
1354 if (is_ctx_reg(env
, insn
->dst_reg
) ||
1355 is_pkt_reg(env
, insn
->dst_reg
)) {
1356 verbose(env
, "BPF_XADD stores into R%d %s is not allowed\n",
1357 insn
->dst_reg
, is_ctx_reg(env
, insn
->dst_reg
) ?
1358 "context" : "packet");
1362 /* check whether atomic_add can read the memory */
1363 err
= check_mem_access(env
, insn_idx
, insn
->dst_reg
, insn
->off
,
1364 BPF_SIZE(insn
->code
), BPF_READ
, -1, true);
1368 /* check whether atomic_add can write into the same memory */
1369 return check_mem_access(env
, insn_idx
, insn
->dst_reg
, insn
->off
,
1370 BPF_SIZE(insn
->code
), BPF_WRITE
, -1, true);
1373 /* Does this register contain a constant zero? */
1374 static bool register_is_null(struct bpf_reg_state reg
)
1376 return reg
.type
== SCALAR_VALUE
&& tnum_equals_const(reg
.var_off
, 0);
1379 /* when register 'regno' is passed into function that will read 'access_size'
1380 * bytes from that pointer, make sure that it's within stack boundary
1381 * and all elements of stack are initialized.
1382 * Unlike most pointer bounds-checking functions, this one doesn't take an
1383 * 'off' argument, so it has to add in reg->off itself.
1385 static int check_stack_boundary(struct bpf_verifier_env
*env
, int regno
,
1386 int access_size
, bool zero_size_allowed
,
1387 struct bpf_call_arg_meta
*meta
)
1389 struct bpf_verifier_state
*state
= env
->cur_state
;
1390 struct bpf_reg_state
*regs
= state
->regs
;
1391 int off
, i
, slot
, spi
;
1393 if (regs
[regno
].type
!= PTR_TO_STACK
) {
1394 /* Allow zero-byte read from NULL, regardless of pointer type */
1395 if (zero_size_allowed
&& access_size
== 0 &&
1396 register_is_null(regs
[regno
]))
1399 verbose(env
, "R%d type=%s expected=%s\n", regno
,
1400 reg_type_str
[regs
[regno
].type
],
1401 reg_type_str
[PTR_TO_STACK
]);
1405 /* Only allow fixed-offset stack reads */
1406 if (!tnum_is_const(regs
[regno
].var_off
)) {
1409 tnum_strn(tn_buf
, sizeof(tn_buf
), regs
[regno
].var_off
);
1410 verbose(env
, "invalid variable stack read R%d var_off=%s\n",
1414 off
= regs
[regno
].off
+ regs
[regno
].var_off
.value
;
1415 if (off
>= 0 || off
< -MAX_BPF_STACK
|| off
+ access_size
> 0 ||
1416 access_size
< 0 || (access_size
== 0 && !zero_size_allowed
)) {
1417 verbose(env
, "invalid stack type R%d off=%d access_size=%d\n",
1418 regno
, off
, access_size
);
1422 if (env
->prog
->aux
->stack_depth
< -off
)
1423 env
->prog
->aux
->stack_depth
= -off
;
1425 if (meta
&& meta
->raw_mode
) {
1426 meta
->access_size
= access_size
;
1427 meta
->regno
= regno
;
1431 for (i
= 0; i
< access_size
; i
++) {
1432 slot
= -(off
+ i
) - 1;
1433 spi
= slot
/ BPF_REG_SIZE
;
1434 if (state
->allocated_stack
<= slot
||
1435 state
->stack
[spi
].slot_type
[slot
% BPF_REG_SIZE
] !=
1437 verbose(env
, "invalid indirect read from stack off %d+%d size %d\n",
1438 off
, i
, access_size
);
1445 static int check_helper_mem_access(struct bpf_verifier_env
*env
, int regno
,
1446 int access_size
, bool zero_size_allowed
,
1447 struct bpf_call_arg_meta
*meta
)
1449 struct bpf_reg_state
*regs
= cur_regs(env
), *reg
= ®s
[regno
];
1451 switch (reg
->type
) {
1453 case PTR_TO_PACKET_META
:
1454 return check_packet_access(env
, regno
, reg
->off
, access_size
,
1456 case PTR_TO_MAP_VALUE
:
1457 return check_map_access(env
, regno
, reg
->off
, access_size
,
1459 default: /* scalar_value|ptr_to_stack or invalid ptr */
1460 return check_stack_boundary(env
, regno
, access_size
,
1461 zero_size_allowed
, meta
);
1465 static int check_func_arg(struct bpf_verifier_env
*env
, u32 regno
,
1466 enum bpf_arg_type arg_type
,
1467 struct bpf_call_arg_meta
*meta
)
1469 struct bpf_reg_state
*regs
= cur_regs(env
), *reg
= ®s
[regno
];
1470 enum bpf_reg_type expected_type
, type
= reg
->type
;
1473 if (arg_type
== ARG_DONTCARE
)
1476 err
= check_reg_arg(env
, regno
, SRC_OP
);
1480 if (arg_type
== ARG_ANYTHING
) {
1481 if (is_pointer_value(env
, regno
)) {
1482 verbose(env
, "R%d leaks addr into helper function\n",
1489 if (type_is_pkt_pointer(type
) &&
1490 !may_access_direct_pkt_data(env
, meta
, BPF_READ
)) {
1491 verbose(env
, "helper access to the packet is not allowed\n");
1495 if (arg_type
== ARG_PTR_TO_MAP_KEY
||
1496 arg_type
== ARG_PTR_TO_MAP_VALUE
) {
1497 expected_type
= PTR_TO_STACK
;
1498 if (!type_is_pkt_pointer(type
) &&
1499 type
!= expected_type
)
1501 } else if (arg_type
== ARG_CONST_SIZE
||
1502 arg_type
== ARG_CONST_SIZE_OR_ZERO
) {
1503 expected_type
= SCALAR_VALUE
;
1504 if (type
!= expected_type
)
1506 } else if (arg_type
== ARG_CONST_MAP_PTR
) {
1507 expected_type
= CONST_PTR_TO_MAP
;
1508 if (type
!= expected_type
)
1510 } else if (arg_type
== ARG_PTR_TO_CTX
) {
1511 expected_type
= PTR_TO_CTX
;
1512 if (type
!= expected_type
)
1514 err
= check_ctx_reg(env
, reg
, regno
);
1517 } else if (arg_type
== ARG_PTR_TO_MEM
||
1518 arg_type
== ARG_PTR_TO_MEM_OR_NULL
||
1519 arg_type
== ARG_PTR_TO_UNINIT_MEM
) {
1520 expected_type
= PTR_TO_STACK
;
1521 /* One exception here. In case function allows for NULL to be
1522 * passed in as argument, it's a SCALAR_VALUE type. Final test
1523 * happens during stack boundary checking.
1525 if (register_is_null(*reg
) &&
1526 arg_type
== ARG_PTR_TO_MEM_OR_NULL
)
1527 /* final test in check_stack_boundary() */;
1528 else if (!type_is_pkt_pointer(type
) &&
1529 type
!= PTR_TO_MAP_VALUE
&&
1530 type
!= expected_type
)
1532 meta
->raw_mode
= arg_type
== ARG_PTR_TO_UNINIT_MEM
;
1534 verbose(env
, "unsupported arg_type %d\n", arg_type
);
1538 if (arg_type
== ARG_CONST_MAP_PTR
) {
1539 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
1540 meta
->map_ptr
= reg
->map_ptr
;
1541 } else if (arg_type
== ARG_PTR_TO_MAP_KEY
) {
1542 /* bpf_map_xxx(..., map_ptr, ..., key) call:
1543 * check that [key, key + map->key_size) are within
1544 * stack limits and initialized
1546 if (!meta
->map_ptr
) {
1547 /* in function declaration map_ptr must come before
1548 * map_key, so that it's verified and known before
1549 * we have to check map_key here. Otherwise it means
1550 * that kernel subsystem misconfigured verifier
1552 verbose(env
, "invalid map_ptr to access map->key\n");
1555 if (type_is_pkt_pointer(type
))
1556 err
= check_packet_access(env
, regno
, reg
->off
,
1557 meta
->map_ptr
->key_size
,
1560 err
= check_stack_boundary(env
, regno
,
1561 meta
->map_ptr
->key_size
,
1563 } else if (arg_type
== ARG_PTR_TO_MAP_VALUE
) {
1564 /* bpf_map_xxx(..., map_ptr, ..., value) call:
1565 * check [value, value + map->value_size) validity
1567 if (!meta
->map_ptr
) {
1568 /* kernel subsystem misconfigured verifier */
1569 verbose(env
, "invalid map_ptr to access map->value\n");
1572 if (type_is_pkt_pointer(type
))
1573 err
= check_packet_access(env
, regno
, reg
->off
,
1574 meta
->map_ptr
->value_size
,
1577 err
= check_stack_boundary(env
, regno
,
1578 meta
->map_ptr
->value_size
,
1580 } else if (arg_type
== ARG_CONST_SIZE
||
1581 arg_type
== ARG_CONST_SIZE_OR_ZERO
) {
1582 bool zero_size_allowed
= (arg_type
== ARG_CONST_SIZE_OR_ZERO
);
1584 /* bpf_xxx(..., buf, len) call will access 'len' bytes
1585 * from stack pointer 'buf'. Check it
1586 * note: regno == len, regno - 1 == buf
1589 /* kernel subsystem misconfigured verifier */
1591 "ARG_CONST_SIZE cannot be first argument\n");
1595 /* The register is SCALAR_VALUE; the access check
1596 * happens using its boundaries.
1599 if (!tnum_is_const(reg
->var_off
))
1600 /* For unprivileged variable accesses, disable raw
1601 * mode so that the program is required to
1602 * initialize all the memory that the helper could
1603 * just partially fill up.
1607 if (reg
->smin_value
< 0) {
1608 verbose(env
, "R%d min value is negative, either use unsigned or 'var &= const'\n",
1613 if (reg
->umin_value
== 0) {
1614 err
= check_helper_mem_access(env
, regno
- 1, 0,
1621 if (reg
->umax_value
>= BPF_MAX_VAR_SIZ
) {
1622 verbose(env
, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
1626 err
= check_helper_mem_access(env
, regno
- 1,
1628 zero_size_allowed
, meta
);
1633 verbose(env
, "R%d type=%s expected=%s\n", regno
,
1634 reg_type_str
[type
], reg_type_str
[expected_type
]);
1638 static int check_map_func_compatibility(struct bpf_verifier_env
*env
,
1639 struct bpf_map
*map
, int func_id
)
1644 /* We need a two way check, first is from map perspective ... */
1645 switch (map
->map_type
) {
1646 case BPF_MAP_TYPE_PROG_ARRAY
:
1647 if (func_id
!= BPF_FUNC_tail_call
)
1650 case BPF_MAP_TYPE_PERF_EVENT_ARRAY
:
1651 if (func_id
!= BPF_FUNC_perf_event_read
&&
1652 func_id
!= BPF_FUNC_perf_event_output
&&
1653 func_id
!= BPF_FUNC_perf_event_read_value
)
1656 case BPF_MAP_TYPE_STACK_TRACE
:
1657 if (func_id
!= BPF_FUNC_get_stackid
)
1660 case BPF_MAP_TYPE_CGROUP_ARRAY
:
1661 if (func_id
!= BPF_FUNC_skb_under_cgroup
&&
1662 func_id
!= BPF_FUNC_current_task_under_cgroup
)
1665 /* devmap returns a pointer to a live net_device ifindex that we cannot
1666 * allow to be modified from bpf side. So do not allow lookup elements
1669 case BPF_MAP_TYPE_DEVMAP
:
1670 if (func_id
!= BPF_FUNC_redirect_map
)
1673 /* Restrict bpf side of cpumap, open when use-cases appear */
1674 case BPF_MAP_TYPE_CPUMAP
:
1675 if (func_id
!= BPF_FUNC_redirect_map
)
1678 case BPF_MAP_TYPE_ARRAY_OF_MAPS
:
1679 case BPF_MAP_TYPE_HASH_OF_MAPS
:
1680 if (func_id
!= BPF_FUNC_map_lookup_elem
)
1683 case BPF_MAP_TYPE_SOCKMAP
:
1684 if (func_id
!= BPF_FUNC_sk_redirect_map
&&
1685 func_id
!= BPF_FUNC_sock_map_update
&&
1686 func_id
!= BPF_FUNC_map_delete_elem
)
1693 /* ... and second from the function itself. */
1695 case BPF_FUNC_tail_call
:
1696 if (map
->map_type
!= BPF_MAP_TYPE_PROG_ARRAY
)
1699 case BPF_FUNC_perf_event_read
:
1700 case BPF_FUNC_perf_event_output
:
1701 case BPF_FUNC_perf_event_read_value
:
1702 if (map
->map_type
!= BPF_MAP_TYPE_PERF_EVENT_ARRAY
)
1705 case BPF_FUNC_get_stackid
:
1706 if (map
->map_type
!= BPF_MAP_TYPE_STACK_TRACE
)
1709 case BPF_FUNC_current_task_under_cgroup
:
1710 case BPF_FUNC_skb_under_cgroup
:
1711 if (map
->map_type
!= BPF_MAP_TYPE_CGROUP_ARRAY
)
1714 case BPF_FUNC_redirect_map
:
1715 if (map
->map_type
!= BPF_MAP_TYPE_DEVMAP
&&
1716 map
->map_type
!= BPF_MAP_TYPE_CPUMAP
)
1719 case BPF_FUNC_sk_redirect_map
:
1720 if (map
->map_type
!= BPF_MAP_TYPE_SOCKMAP
)
1723 case BPF_FUNC_sock_map_update
:
1724 if (map
->map_type
!= BPF_MAP_TYPE_SOCKMAP
)
1733 verbose(env
, "cannot pass map_type %d into func %s#%d\n",
1734 map
->map_type
, func_id_name(func_id
), func_id
);
1738 static int check_raw_mode(const struct bpf_func_proto
*fn
)
1742 if (fn
->arg1_type
== ARG_PTR_TO_UNINIT_MEM
)
1744 if (fn
->arg2_type
== ARG_PTR_TO_UNINIT_MEM
)
1746 if (fn
->arg3_type
== ARG_PTR_TO_UNINIT_MEM
)
1748 if (fn
->arg4_type
== ARG_PTR_TO_UNINIT_MEM
)
1750 if (fn
->arg5_type
== ARG_PTR_TO_UNINIT_MEM
)
1753 return count
> 1 ? -EINVAL
: 0;
1756 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
1757 * are now invalid, so turn them into unknown SCALAR_VALUE.
1759 static void clear_all_pkt_pointers(struct bpf_verifier_env
*env
)
1761 struct bpf_verifier_state
*state
= env
->cur_state
;
1762 struct bpf_reg_state
*regs
= state
->regs
, *reg
;
1765 for (i
= 0; i
< MAX_BPF_REG
; i
++)
1766 if (reg_is_pkt_pointer_any(®s
[i
]))
1767 mark_reg_unknown(env
, regs
, i
);
1769 for (i
= 0; i
< state
->allocated_stack
/ BPF_REG_SIZE
; i
++) {
1770 if (state
->stack
[i
].slot_type
[0] != STACK_SPILL
)
1772 reg
= &state
->stack
[i
].spilled_ptr
;
1773 if (reg_is_pkt_pointer_any(reg
))
1774 __mark_reg_unknown(reg
);
1779 record_func_map(struct bpf_verifier_env
*env
, struct bpf_call_arg_meta
*meta
,
1780 int func_id
, int insn_idx
)
1782 struct bpf_insn_aux_data
*aux
= &env
->insn_aux_data
[insn_idx
];
1784 if (func_id
!= BPF_FUNC_tail_call
&&
1785 func_id
!= BPF_FUNC_map_lookup_elem
)
1787 if (meta
->map_ptr
== NULL
) {
1788 verbose(env
, "kernel subsystem misconfigured verifier\n");
1792 if (!BPF_MAP_PTR(aux
->map_state
))
1793 bpf_map_ptr_store(aux
, meta
->map_ptr
,
1794 meta
->map_ptr
->unpriv_array
);
1795 else if (BPF_MAP_PTR(aux
->map_state
) != meta
->map_ptr
)
1796 bpf_map_ptr_store(aux
, BPF_MAP_PTR_POISON
,
1797 meta
->map_ptr
->unpriv_array
);
1801 static int check_call(struct bpf_verifier_env
*env
, int func_id
, int insn_idx
)
1803 const struct bpf_func_proto
*fn
= NULL
;
1804 struct bpf_reg_state
*regs
;
1805 struct bpf_call_arg_meta meta
;
1809 /* find function prototype */
1810 if (func_id
< 0 || func_id
>= __BPF_FUNC_MAX_ID
) {
1811 verbose(env
, "invalid func %s#%d\n", func_id_name(func_id
),
1816 if (env
->ops
->get_func_proto
)
1817 fn
= env
->ops
->get_func_proto(func_id
);
1820 verbose(env
, "unknown func %s#%d\n", func_id_name(func_id
),
1825 /* eBPF programs must be GPL compatible to use GPL-ed functions */
1826 if (!env
->prog
->gpl_compatible
&& fn
->gpl_only
) {
1827 verbose(env
, "cannot call GPL only function from proprietary program\n");
1831 /* With LD_ABS/IND some JITs save/restore skb from r1. */
1832 changes_data
= bpf_helper_changes_pkt_data(fn
->func
);
1833 if (changes_data
&& fn
->arg1_type
!= ARG_PTR_TO_CTX
) {
1834 verbose(env
, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
1835 func_id_name(func_id
), func_id
);
1839 memset(&meta
, 0, sizeof(meta
));
1840 meta
.pkt_access
= fn
->pkt_access
;
1842 /* We only support one arg being in raw mode at the moment, which
1843 * is sufficient for the helper functions we have right now.
1845 err
= check_raw_mode(fn
);
1847 verbose(env
, "kernel subsystem misconfigured func %s#%d\n",
1848 func_id_name(func_id
), func_id
);
1853 err
= check_func_arg(env
, BPF_REG_1
, fn
->arg1_type
, &meta
);
1856 err
= check_func_arg(env
, BPF_REG_2
, fn
->arg2_type
, &meta
);
1859 err
= check_func_arg(env
, BPF_REG_3
, fn
->arg3_type
, &meta
);
1862 err
= check_func_arg(env
, BPF_REG_4
, fn
->arg4_type
, &meta
);
1865 err
= check_func_arg(env
, BPF_REG_5
, fn
->arg5_type
, &meta
);
1869 err
= record_func_map(env
, &meta
, func_id
, insn_idx
);
1873 /* Mark slots with STACK_MISC in case of raw mode, stack offset
1874 * is inferred from register state.
1876 for (i
= 0; i
< meta
.access_size
; i
++) {
1877 err
= check_mem_access(env
, insn_idx
, meta
.regno
, i
, BPF_B
,
1878 BPF_WRITE
, -1, false);
1883 regs
= cur_regs(env
);
1884 /* reset caller saved regs */
1885 for (i
= 0; i
< CALLER_SAVED_REGS
; i
++) {
1886 mark_reg_not_init(env
, regs
, caller_saved
[i
]);
1887 check_reg_arg(env
, caller_saved
[i
], DST_OP_NO_MARK
);
1890 /* update return register (already marked as written above) */
1891 if (fn
->ret_type
== RET_INTEGER
) {
1892 /* sets type to SCALAR_VALUE */
1893 mark_reg_unknown(env
, regs
, BPF_REG_0
);
1894 } else if (fn
->ret_type
== RET_VOID
) {
1895 regs
[BPF_REG_0
].type
= NOT_INIT
;
1896 } else if (fn
->ret_type
== RET_PTR_TO_MAP_VALUE_OR_NULL
) {
1897 regs
[BPF_REG_0
].type
= PTR_TO_MAP_VALUE_OR_NULL
;
1898 /* There is no offset yet applied, variable or fixed */
1899 mark_reg_known_zero(env
, regs
, BPF_REG_0
);
1900 /* remember map_ptr, so that check_map_access()
1901 * can check 'value_size' boundary of memory access
1902 * to map element returned from bpf_map_lookup_elem()
1904 if (meta
.map_ptr
== NULL
) {
1906 "kernel subsystem misconfigured verifier\n");
1909 regs
[BPF_REG_0
].map_ptr
= meta
.map_ptr
;
1910 regs
[BPF_REG_0
].id
= ++env
->id_gen
;
1912 verbose(env
, "unknown return type %d of func %s#%d\n",
1913 fn
->ret_type
, func_id_name(func_id
), func_id
);
1917 err
= check_map_func_compatibility(env
, meta
.map_ptr
, func_id
);
1922 clear_all_pkt_pointers(env
);
1926 static bool signed_add_overflows(s64 a
, s64 b
)
1928 /* Do the add in u64, where overflow is well-defined */
1929 s64 res
= (s64
)((u64
)a
+ (u64
)b
);
1936 static bool signed_sub_overflows(s64 a
, s64 b
)
1938 /* Do the sub in u64, where overflow is well-defined */
1939 s64 res
= (s64
)((u64
)a
- (u64
)b
);
1946 static bool check_reg_sane_offset(struct bpf_verifier_env
*env
,
1947 const struct bpf_reg_state
*reg
,
1948 enum bpf_reg_type type
)
1950 bool known
= tnum_is_const(reg
->var_off
);
1951 s64 val
= reg
->var_off
.value
;
1952 s64 smin
= reg
->smin_value
;
1954 if (known
&& (val
>= BPF_MAX_VAR_OFF
|| val
<= -BPF_MAX_VAR_OFF
)) {
1955 verbose(env
, "math between %s pointer and %lld is not allowed\n",
1956 reg_type_str
[type
], val
);
1960 if (reg
->off
>= BPF_MAX_VAR_OFF
|| reg
->off
<= -BPF_MAX_VAR_OFF
) {
1961 verbose(env
, "%s pointer offset %d is not allowed\n",
1962 reg_type_str
[type
], reg
->off
);
1966 if (smin
== S64_MIN
) {
1967 verbose(env
, "math between %s pointer and register with unbounded min value is not allowed\n",
1968 reg_type_str
[type
]);
1972 if (smin
>= BPF_MAX_VAR_OFF
|| smin
<= -BPF_MAX_VAR_OFF
) {
1973 verbose(env
, "value %lld makes %s pointer be out of bounds\n",
1974 smin
, reg_type_str
[type
]);
1981 static struct bpf_insn_aux_data
*cur_aux(struct bpf_verifier_env
*env
)
1983 return &env
->insn_aux_data
[env
->insn_idx
];
1986 static int retrieve_ptr_limit(const struct bpf_reg_state
*ptr_reg
,
1987 u32
*ptr_limit
, u8 opcode
, bool off_is_neg
)
1989 bool mask_to_left
= (opcode
== BPF_ADD
&& off_is_neg
) ||
1990 (opcode
== BPF_SUB
&& !off_is_neg
);
1993 switch (ptr_reg
->type
) {
1995 off
= ptr_reg
->off
+ ptr_reg
->var_off
.value
;
1997 *ptr_limit
= MAX_BPF_STACK
+ off
;
2001 case PTR_TO_MAP_VALUE
:
2003 *ptr_limit
= ptr_reg
->umax_value
+ ptr_reg
->off
;
2005 off
= ptr_reg
->smin_value
+ ptr_reg
->off
;
2006 *ptr_limit
= ptr_reg
->map_ptr
->value_size
- off
;
2014 static bool can_skip_alu_sanitation(const struct bpf_verifier_env
*env
,
2015 const struct bpf_insn
*insn
)
2017 return env
->allow_ptr_leaks
|| BPF_SRC(insn
->code
) == BPF_K
;
2020 static int update_alu_sanitation_state(struct bpf_insn_aux_data
*aux
,
2021 u32 alu_state
, u32 alu_limit
)
2023 /* If we arrived here from different branches with different
2024 * state or limits to sanitize, then this won't work.
2026 if (aux
->alu_state
&&
2027 (aux
->alu_state
!= alu_state
||
2028 aux
->alu_limit
!= alu_limit
))
2031 /* Corresponding fixup done in fixup_bpf_calls(). */
2032 aux
->alu_state
= alu_state
;
2033 aux
->alu_limit
= alu_limit
;
2037 static int sanitize_val_alu(struct bpf_verifier_env
*env
,
2038 struct bpf_insn
*insn
)
2040 struct bpf_insn_aux_data
*aux
= cur_aux(env
);
2042 if (can_skip_alu_sanitation(env
, insn
))
2045 return update_alu_sanitation_state(aux
, BPF_ALU_NON_POINTER
, 0);
2048 static int sanitize_ptr_alu(struct bpf_verifier_env
*env
,
2049 struct bpf_insn
*insn
,
2050 const struct bpf_reg_state
*ptr_reg
,
2051 struct bpf_reg_state
*dst_reg
,
2054 struct bpf_verifier_state
*vstate
= env
->cur_state
;
2055 struct bpf_insn_aux_data
*aux
= cur_aux(env
);
2056 bool ptr_is_dst_reg
= ptr_reg
== dst_reg
;
2057 u8 opcode
= BPF_OP(insn
->code
);
2058 u32 alu_state
, alu_limit
;
2059 struct bpf_reg_state tmp
;
2062 if (can_skip_alu_sanitation(env
, insn
))
2065 /* We already marked aux for masking from non-speculative
2066 * paths, thus we got here in the first place. We only care
2067 * to explore bad access from here.
2069 if (vstate
->speculative
)
2072 alu_state
= off_is_neg
? BPF_ALU_NEG_VALUE
: 0;
2073 alu_state
|= ptr_is_dst_reg
?
2074 BPF_ALU_SANITIZE_SRC
: BPF_ALU_SANITIZE_DST
;
2076 if (retrieve_ptr_limit(ptr_reg
, &alu_limit
, opcode
, off_is_neg
))
2078 if (update_alu_sanitation_state(aux
, alu_state
, alu_limit
))
2081 /* Simulate and find potential out-of-bounds access under
2082 * speculative execution from truncation as a result of
2083 * masking when off was not within expected range. If off
2084 * sits in dst, then we temporarily need to move ptr there
2085 * to simulate dst (== 0) +/-= ptr. Needed, for example,
2086 * for cases where we use K-based arithmetic in one direction
2087 * and truncated reg-based in the other in order to explore
2090 if (!ptr_is_dst_reg
) {
2092 *dst_reg
= *ptr_reg
;
2094 ret
= push_stack(env
, env
->insn_idx
+ 1, env
->insn_idx
, true);
2095 if (!ptr_is_dst_reg
)
2097 return !ret
? -EFAULT
: 0;
2100 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
2101 * Caller should also handle BPF_MOV case separately.
2102 * If we return -EACCES, caller may want to try again treating pointer as a
2103 * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks.
2105 static int adjust_ptr_min_max_vals(struct bpf_verifier_env
*env
,
2106 struct bpf_insn
*insn
,
2107 const struct bpf_reg_state
*ptr_reg
,
2108 const struct bpf_reg_state
*off_reg
)
2110 struct bpf_reg_state
*regs
= cur_regs(env
), *dst_reg
;
2111 bool known
= tnum_is_const(off_reg
->var_off
);
2112 s64 smin_val
= off_reg
->smin_value
, smax_val
= off_reg
->smax_value
,
2113 smin_ptr
= ptr_reg
->smin_value
, smax_ptr
= ptr_reg
->smax_value
;
2114 u64 umin_val
= off_reg
->umin_value
, umax_val
= off_reg
->umax_value
,
2115 umin_ptr
= ptr_reg
->umin_value
, umax_ptr
= ptr_reg
->umax_value
;
2116 u32 dst
= insn
->dst_reg
, src
= insn
->src_reg
;
2117 u8 opcode
= BPF_OP(insn
->code
);
2120 dst_reg
= ®s
[dst
];
2122 if ((known
&& (smin_val
!= smax_val
|| umin_val
!= umax_val
)) ||
2123 smin_val
> smax_val
|| umin_val
> umax_val
) {
2124 /* Taint dst register if offset had invalid bounds derived from
2125 * e.g. dead branches.
2127 __mark_reg_unknown(dst_reg
);
2131 if (BPF_CLASS(insn
->code
) != BPF_ALU64
) {
2132 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
2134 "R%d 32-bit pointer arithmetic prohibited\n",
2139 if (ptr_reg
->type
== PTR_TO_MAP_VALUE_OR_NULL
) {
2140 verbose(env
, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
2144 if (ptr_reg
->type
== CONST_PTR_TO_MAP
) {
2145 verbose(env
, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
2149 if (ptr_reg
->type
== PTR_TO_PACKET_END
) {
2150 verbose(env
, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
2154 if (ptr_reg
->type
== PTR_TO_MAP_VALUE
) {
2155 if (!env
->allow_ptr_leaks
&& !known
&& (smin_val
< 0) != (smax_val
< 0)) {
2156 verbose(env
, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
2157 off_reg
== dst_reg
? dst
: src
);
2162 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
2163 * The id may be overwritten later if we create a new variable offset.
2165 dst_reg
->type
= ptr_reg
->type
;
2166 dst_reg
->id
= ptr_reg
->id
;
2168 if (!check_reg_sane_offset(env
, off_reg
, ptr_reg
->type
) ||
2169 !check_reg_sane_offset(env
, ptr_reg
, ptr_reg
->type
))
2174 ret
= sanitize_ptr_alu(env
, insn
, ptr_reg
, dst_reg
, smin_val
< 0);
2176 verbose(env
, "R%d tried to add from different maps or paths\n", dst
);
2179 /* We can take a fixed offset as long as it doesn't overflow
2180 * the s32 'off' field
2182 if (known
&& (ptr_reg
->off
+ smin_val
==
2183 (s64
)(s32
)(ptr_reg
->off
+ smin_val
))) {
2184 /* pointer += K. Accumulate it into fixed offset */
2185 dst_reg
->smin_value
= smin_ptr
;
2186 dst_reg
->smax_value
= smax_ptr
;
2187 dst_reg
->umin_value
= umin_ptr
;
2188 dst_reg
->umax_value
= umax_ptr
;
2189 dst_reg
->var_off
= ptr_reg
->var_off
;
2190 dst_reg
->off
= ptr_reg
->off
+ smin_val
;
2191 dst_reg
->raw
= ptr_reg
->raw
;
2194 /* A new variable offset is created. Note that off_reg->off
2195 * == 0, since it's a scalar.
2196 * dst_reg gets the pointer type and since some positive
2197 * integer value was added to the pointer, give it a new 'id'
2198 * if it's a PTR_TO_PACKET.
2199 * this creates a new 'base' pointer, off_reg (variable) gets
2200 * added into the variable offset, and we copy the fixed offset
2203 if (signed_add_overflows(smin_ptr
, smin_val
) ||
2204 signed_add_overflows(smax_ptr
, smax_val
)) {
2205 dst_reg
->smin_value
= S64_MIN
;
2206 dst_reg
->smax_value
= S64_MAX
;
2208 dst_reg
->smin_value
= smin_ptr
+ smin_val
;
2209 dst_reg
->smax_value
= smax_ptr
+ smax_val
;
2211 if (umin_ptr
+ umin_val
< umin_ptr
||
2212 umax_ptr
+ umax_val
< umax_ptr
) {
2213 dst_reg
->umin_value
= 0;
2214 dst_reg
->umax_value
= U64_MAX
;
2216 dst_reg
->umin_value
= umin_ptr
+ umin_val
;
2217 dst_reg
->umax_value
= umax_ptr
+ umax_val
;
2219 dst_reg
->var_off
= tnum_add(ptr_reg
->var_off
, off_reg
->var_off
);
2220 dst_reg
->off
= ptr_reg
->off
;
2221 dst_reg
->raw
= ptr_reg
->raw
;
2222 if (reg_is_pkt_pointer(ptr_reg
)) {
2223 dst_reg
->id
= ++env
->id_gen
;
2224 /* something was added to pkt_ptr, set range to zero */
2229 ret
= sanitize_ptr_alu(env
, insn
, ptr_reg
, dst_reg
, smin_val
< 0);
2231 verbose(env
, "R%d tried to sub from different maps or paths\n", dst
);
2234 if (dst_reg
== off_reg
) {
2235 /* scalar -= pointer. Creates an unknown scalar */
2236 verbose(env
, "R%d tried to subtract pointer from scalar\n",
2240 /* We don't allow subtraction from FP, because (according to
2241 * test_verifier.c test "invalid fp arithmetic", JITs might not
2242 * be able to deal with it.
2244 if (ptr_reg
->type
== PTR_TO_STACK
) {
2245 verbose(env
, "R%d subtraction from stack pointer prohibited\n",
2249 if (known
&& (ptr_reg
->off
- smin_val
==
2250 (s64
)(s32
)(ptr_reg
->off
- smin_val
))) {
2251 /* pointer -= K. Subtract it from fixed offset */
2252 dst_reg
->smin_value
= smin_ptr
;
2253 dst_reg
->smax_value
= smax_ptr
;
2254 dst_reg
->umin_value
= umin_ptr
;
2255 dst_reg
->umax_value
= umax_ptr
;
2256 dst_reg
->var_off
= ptr_reg
->var_off
;
2257 dst_reg
->id
= ptr_reg
->id
;
2258 dst_reg
->off
= ptr_reg
->off
- smin_val
;
2259 dst_reg
->raw
= ptr_reg
->raw
;
2262 /* A new variable offset is created. If the subtrahend is known
2263 * nonnegative, then any reg->range we had before is still good.
2265 if (signed_sub_overflows(smin_ptr
, smax_val
) ||
2266 signed_sub_overflows(smax_ptr
, smin_val
)) {
2267 /* Overflow possible, we know nothing */
2268 dst_reg
->smin_value
= S64_MIN
;
2269 dst_reg
->smax_value
= S64_MAX
;
2271 dst_reg
->smin_value
= smin_ptr
- smax_val
;
2272 dst_reg
->smax_value
= smax_ptr
- smin_val
;
2274 if (umin_ptr
< umax_val
) {
2275 /* Overflow possible, we know nothing */
2276 dst_reg
->umin_value
= 0;
2277 dst_reg
->umax_value
= U64_MAX
;
2279 /* Cannot overflow (as long as bounds are consistent) */
2280 dst_reg
->umin_value
= umin_ptr
- umax_val
;
2281 dst_reg
->umax_value
= umax_ptr
- umin_val
;
2283 dst_reg
->var_off
= tnum_sub(ptr_reg
->var_off
, off_reg
->var_off
);
2284 dst_reg
->off
= ptr_reg
->off
;
2285 dst_reg
->raw
= ptr_reg
->raw
;
2286 if (reg_is_pkt_pointer(ptr_reg
)) {
2287 dst_reg
->id
= ++env
->id_gen
;
2288 /* something was added to pkt_ptr, set range to zero */
2296 /* bitwise ops on pointers are troublesome, prohibit. */
2297 verbose(env
, "R%d bitwise operator %s on pointer prohibited\n",
2298 dst
, bpf_alu_string
[opcode
>> 4]);
2301 /* other operators (e.g. MUL,LSH) produce non-pointer results */
2302 verbose(env
, "R%d pointer arithmetic with %s operator prohibited\n",
2303 dst
, bpf_alu_string
[opcode
>> 4]);
2307 if (!check_reg_sane_offset(env
, dst_reg
, ptr_reg
->type
))
2310 __update_reg_bounds(dst_reg
);
2311 __reg_deduce_bounds(dst_reg
);
2312 __reg_bound_offset(dst_reg
);
2314 /* For unprivileged we require that resulting offset must be in bounds
2315 * in order to be able to sanitize access later on.
2317 if (!env
->allow_ptr_leaks
) {
2318 if (dst_reg
->type
== PTR_TO_MAP_VALUE
&&
2319 check_map_access(env
, dst
, dst_reg
->off
, 1, false)) {
2320 verbose(env
, "R%d pointer arithmetic of map value goes out of range, "
2321 "prohibited for !root\n", dst
);
2323 } else if (dst_reg
->type
== PTR_TO_STACK
&&
2324 check_stack_access(env
, dst_reg
, dst_reg
->off
+
2325 dst_reg
->var_off
.value
, 1)) {
2326 verbose(env
, "R%d stack pointer arithmetic goes out of range, "
2327 "prohibited for !root\n", dst
);
2335 /* WARNING: This function does calculations on 64-bit values, but the actual
2336 * execution may occur on 32-bit values. Therefore, things like bitshifts
2337 * need extra checks in the 32-bit case.
2339 static int adjust_scalar_min_max_vals(struct bpf_verifier_env
*env
,
2340 struct bpf_insn
*insn
,
2341 struct bpf_reg_state
*dst_reg
,
2342 struct bpf_reg_state src_reg
)
2344 struct bpf_reg_state
*regs
= cur_regs(env
);
2345 u8 opcode
= BPF_OP(insn
->code
);
2346 bool src_known
, dst_known
;
2347 s64 smin_val
, smax_val
;
2348 u64 umin_val
, umax_val
;
2349 u64 insn_bitness
= (BPF_CLASS(insn
->code
) == BPF_ALU64
) ? 64 : 32;
2350 u32 dst
= insn
->dst_reg
;
2353 if (insn_bitness
== 32) {
2354 /* Relevant for 32-bit RSH: Information can propagate towards
2355 * LSB, so it isn't sufficient to only truncate the output to
2358 coerce_reg_to_size(dst_reg
, 4);
2359 coerce_reg_to_size(&src_reg
, 4);
2362 smin_val
= src_reg
.smin_value
;
2363 smax_val
= src_reg
.smax_value
;
2364 umin_val
= src_reg
.umin_value
;
2365 umax_val
= src_reg
.umax_value
;
2366 src_known
= tnum_is_const(src_reg
.var_off
);
2367 dst_known
= tnum_is_const(dst_reg
->var_off
);
2369 if ((src_known
&& (smin_val
!= smax_val
|| umin_val
!= umax_val
)) ||
2370 smin_val
> smax_val
|| umin_val
> umax_val
) {
2371 /* Taint dst register if offset had invalid bounds derived from
2372 * e.g. dead branches.
2374 __mark_reg_unknown(dst_reg
);
2379 opcode
!= BPF_ADD
&& opcode
!= BPF_SUB
&& opcode
!= BPF_AND
) {
2380 __mark_reg_unknown(dst_reg
);
2386 ret
= sanitize_val_alu(env
, insn
);
2388 verbose(env
, "R%d tried to add from different pointers or scalars\n", dst
);
2391 if (signed_add_overflows(dst_reg
->smin_value
, smin_val
) ||
2392 signed_add_overflows(dst_reg
->smax_value
, smax_val
)) {
2393 dst_reg
->smin_value
= S64_MIN
;
2394 dst_reg
->smax_value
= S64_MAX
;
2396 dst_reg
->smin_value
+= smin_val
;
2397 dst_reg
->smax_value
+= smax_val
;
2399 if (dst_reg
->umin_value
+ umin_val
< umin_val
||
2400 dst_reg
->umax_value
+ umax_val
< umax_val
) {
2401 dst_reg
->umin_value
= 0;
2402 dst_reg
->umax_value
= U64_MAX
;
2404 dst_reg
->umin_value
+= umin_val
;
2405 dst_reg
->umax_value
+= umax_val
;
2407 dst_reg
->var_off
= tnum_add(dst_reg
->var_off
, src_reg
.var_off
);
2410 ret
= sanitize_val_alu(env
, insn
);
2412 verbose(env
, "R%d tried to sub from different pointers or scalars\n", dst
);
2415 if (signed_sub_overflows(dst_reg
->smin_value
, smax_val
) ||
2416 signed_sub_overflows(dst_reg
->smax_value
, smin_val
)) {
2417 /* Overflow possible, we know nothing */
2418 dst_reg
->smin_value
= S64_MIN
;
2419 dst_reg
->smax_value
= S64_MAX
;
2421 dst_reg
->smin_value
-= smax_val
;
2422 dst_reg
->smax_value
-= smin_val
;
2424 if (dst_reg
->umin_value
< umax_val
) {
2425 /* Overflow possible, we know nothing */
2426 dst_reg
->umin_value
= 0;
2427 dst_reg
->umax_value
= U64_MAX
;
2429 /* Cannot overflow (as long as bounds are consistent) */
2430 dst_reg
->umin_value
-= umax_val
;
2431 dst_reg
->umax_value
-= umin_val
;
2433 dst_reg
->var_off
= tnum_sub(dst_reg
->var_off
, src_reg
.var_off
);
2436 dst_reg
->var_off
= tnum_mul(dst_reg
->var_off
, src_reg
.var_off
);
2437 if (smin_val
< 0 || dst_reg
->smin_value
< 0) {
2438 /* Ain't nobody got time to multiply that sign */
2439 __mark_reg_unbounded(dst_reg
);
2440 __update_reg_bounds(dst_reg
);
2443 /* Both values are positive, so we can work with unsigned and
2444 * copy the result to signed (unless it exceeds S64_MAX).
2446 if (umax_val
> U32_MAX
|| dst_reg
->umax_value
> U32_MAX
) {
2447 /* Potential overflow, we know nothing */
2448 __mark_reg_unbounded(dst_reg
);
2449 /* (except what we can learn from the var_off) */
2450 __update_reg_bounds(dst_reg
);
2453 dst_reg
->umin_value
*= umin_val
;
2454 dst_reg
->umax_value
*= umax_val
;
2455 if (dst_reg
->umax_value
> S64_MAX
) {
2456 /* Overflow possible, we know nothing */
2457 dst_reg
->smin_value
= S64_MIN
;
2458 dst_reg
->smax_value
= S64_MAX
;
2460 dst_reg
->smin_value
= dst_reg
->umin_value
;
2461 dst_reg
->smax_value
= dst_reg
->umax_value
;
2465 if (src_known
&& dst_known
) {
2466 __mark_reg_known(dst_reg
, dst_reg
->var_off
.value
&
2467 src_reg
.var_off
.value
);
2470 /* We get our minimum from the var_off, since that's inherently
2471 * bitwise. Our maximum is the minimum of the operands' maxima.
2473 dst_reg
->var_off
= tnum_and(dst_reg
->var_off
, src_reg
.var_off
);
2474 dst_reg
->umin_value
= dst_reg
->var_off
.value
;
2475 dst_reg
->umax_value
= min(dst_reg
->umax_value
, umax_val
);
2476 if (dst_reg
->smin_value
< 0 || smin_val
< 0) {
2477 /* Lose signed bounds when ANDing negative numbers,
2478 * ain't nobody got time for that.
2480 dst_reg
->smin_value
= S64_MIN
;
2481 dst_reg
->smax_value
= S64_MAX
;
2483 /* ANDing two positives gives a positive, so safe to
2484 * cast result into s64.
2486 dst_reg
->smin_value
= dst_reg
->umin_value
;
2487 dst_reg
->smax_value
= dst_reg
->umax_value
;
2489 /* We may learn something more from the var_off */
2490 __update_reg_bounds(dst_reg
);
2493 if (src_known
&& dst_known
) {
2494 __mark_reg_known(dst_reg
, dst_reg
->var_off
.value
|
2495 src_reg
.var_off
.value
);
2498 /* We get our maximum from the var_off, and our minimum is the
2499 * maximum of the operands' minima
2501 dst_reg
->var_off
= tnum_or(dst_reg
->var_off
, src_reg
.var_off
);
2502 dst_reg
->umin_value
= max(dst_reg
->umin_value
, umin_val
);
2503 dst_reg
->umax_value
= dst_reg
->var_off
.value
|
2504 dst_reg
->var_off
.mask
;
2505 if (dst_reg
->smin_value
< 0 || smin_val
< 0) {
2506 /* Lose signed bounds when ORing negative numbers,
2507 * ain't nobody got time for that.
2509 dst_reg
->smin_value
= S64_MIN
;
2510 dst_reg
->smax_value
= S64_MAX
;
2512 /* ORing two positives gives a positive, so safe to
2513 * cast result into s64.
2515 dst_reg
->smin_value
= dst_reg
->umin_value
;
2516 dst_reg
->smax_value
= dst_reg
->umax_value
;
2518 /* We may learn something more from the var_off */
2519 __update_reg_bounds(dst_reg
);
2522 if (umax_val
>= insn_bitness
) {
2523 /* Shifts greater than 31 or 63 are undefined.
2524 * This includes shifts by a negative number.
2526 mark_reg_unknown(env
, regs
, insn
->dst_reg
);
2529 /* We lose all sign bit information (except what we can pick
2532 dst_reg
->smin_value
= S64_MIN
;
2533 dst_reg
->smax_value
= S64_MAX
;
2534 /* If we might shift our top bit out, then we know nothing */
2535 if (dst_reg
->umax_value
> 1ULL << (63 - umax_val
)) {
2536 dst_reg
->umin_value
= 0;
2537 dst_reg
->umax_value
= U64_MAX
;
2539 dst_reg
->umin_value
<<= umin_val
;
2540 dst_reg
->umax_value
<<= umax_val
;
2543 dst_reg
->var_off
= tnum_lshift(dst_reg
->var_off
, umin_val
);
2545 dst_reg
->var_off
= tnum_lshift(tnum_unknown
, umin_val
);
2546 /* We may learn something more from the var_off */
2547 __update_reg_bounds(dst_reg
);
2550 if (umax_val
>= insn_bitness
) {
2551 /* Shifts greater than 31 or 63 are undefined.
2552 * This includes shifts by a negative number.
2554 mark_reg_unknown(env
, regs
, insn
->dst_reg
);
2557 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
2558 * be negative, then either:
2559 * 1) src_reg might be zero, so the sign bit of the result is
2560 * unknown, so we lose our signed bounds
2561 * 2) it's known negative, thus the unsigned bounds capture the
2563 * 3) the signed bounds cross zero, so they tell us nothing
2565 * If the value in dst_reg is known nonnegative, then again the
2566 * unsigned bounts capture the signed bounds.
2567 * Thus, in all cases it suffices to blow away our signed bounds
2568 * and rely on inferring new ones from the unsigned bounds and
2569 * var_off of the result.
2571 dst_reg
->smin_value
= S64_MIN
;
2572 dst_reg
->smax_value
= S64_MAX
;
2574 dst_reg
->var_off
= tnum_rshift(dst_reg
->var_off
,
2577 dst_reg
->var_off
= tnum_rshift(tnum_unknown
, umin_val
);
2578 dst_reg
->umin_value
>>= umax_val
;
2579 dst_reg
->umax_value
>>= umin_val
;
2580 /* We may learn something more from the var_off */
2581 __update_reg_bounds(dst_reg
);
2584 mark_reg_unknown(env
, regs
, insn
->dst_reg
);
2588 if (BPF_CLASS(insn
->code
) != BPF_ALU64
) {
2589 /* 32-bit ALU ops are (32,32)->32 */
2590 coerce_reg_to_size(dst_reg
, 4);
2593 __reg_deduce_bounds(dst_reg
);
2594 __reg_bound_offset(dst_reg
);
2598 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
2601 static int adjust_reg_min_max_vals(struct bpf_verifier_env
*env
,
2602 struct bpf_insn
*insn
)
2604 struct bpf_reg_state
*regs
= cur_regs(env
), *dst_reg
, *src_reg
;
2605 struct bpf_reg_state
*ptr_reg
= NULL
, off_reg
= {0};
2606 u8 opcode
= BPF_OP(insn
->code
);
2608 dst_reg
= ®s
[insn
->dst_reg
];
2610 if (dst_reg
->type
!= SCALAR_VALUE
)
2612 if (BPF_SRC(insn
->code
) == BPF_X
) {
2613 src_reg
= ®s
[insn
->src_reg
];
2614 if (src_reg
->type
!= SCALAR_VALUE
) {
2615 if (dst_reg
->type
!= SCALAR_VALUE
) {
2616 /* Combining two pointers by any ALU op yields
2617 * an arbitrary scalar. Disallow all math except
2618 * pointer subtraction
2620 if (opcode
== BPF_SUB
&& env
->allow_ptr_leaks
) {
2621 mark_reg_unknown(env
, regs
, insn
->dst_reg
);
2624 verbose(env
, "R%d pointer %s pointer prohibited\n",
2626 bpf_alu_string
[opcode
>> 4]);
2629 /* scalar += pointer
2630 * This is legal, but we have to reverse our
2631 * src/dest handling in computing the range
2633 return adjust_ptr_min_max_vals(env
, insn
,
2636 } else if (ptr_reg
) {
2637 /* pointer += scalar */
2638 return adjust_ptr_min_max_vals(env
, insn
,
2642 /* Pretend the src is a reg with a known value, since we only
2643 * need to be able to read from this state.
2645 off_reg
.type
= SCALAR_VALUE
;
2646 __mark_reg_known(&off_reg
, insn
->imm
);
2648 if (ptr_reg
) /* pointer += K */
2649 return adjust_ptr_min_max_vals(env
, insn
,
2653 /* Got here implies adding two SCALAR_VALUEs */
2654 if (WARN_ON_ONCE(ptr_reg
)) {
2655 print_verifier_state(env
, env
->cur_state
);
2656 verbose(env
, "verifier internal error: unexpected ptr_reg\n");
2659 if (WARN_ON(!src_reg
)) {
2660 print_verifier_state(env
, env
->cur_state
);
2661 verbose(env
, "verifier internal error: no src_reg\n");
2664 return adjust_scalar_min_max_vals(env
, insn
, dst_reg
, *src_reg
);
2667 /* check validity of 32-bit and 64-bit arithmetic operations */
2668 static int check_alu_op(struct bpf_verifier_env
*env
, struct bpf_insn
*insn
)
2670 struct bpf_reg_state
*regs
= cur_regs(env
);
2671 u8 opcode
= BPF_OP(insn
->code
);
2674 if (opcode
== BPF_END
|| opcode
== BPF_NEG
) {
2675 if (opcode
== BPF_NEG
) {
2676 if (BPF_SRC(insn
->code
) != 0 ||
2677 insn
->src_reg
!= BPF_REG_0
||
2678 insn
->off
!= 0 || insn
->imm
!= 0) {
2679 verbose(env
, "BPF_NEG uses reserved fields\n");
2683 if (insn
->src_reg
!= BPF_REG_0
|| insn
->off
!= 0 ||
2684 (insn
->imm
!= 16 && insn
->imm
!= 32 && insn
->imm
!= 64) ||
2685 BPF_CLASS(insn
->code
) == BPF_ALU64
) {
2686 verbose(env
, "BPF_END uses reserved fields\n");
2691 /* check src operand */
2692 err
= check_reg_arg(env
, insn
->dst_reg
, SRC_OP
);
2696 if (is_pointer_value(env
, insn
->dst_reg
)) {
2697 verbose(env
, "R%d pointer arithmetic prohibited\n",
2702 /* check dest operand */
2703 err
= check_reg_arg(env
, insn
->dst_reg
, DST_OP
);
2707 } else if (opcode
== BPF_MOV
) {
2709 if (BPF_SRC(insn
->code
) == BPF_X
) {
2710 if (insn
->imm
!= 0 || insn
->off
!= 0) {
2711 verbose(env
, "BPF_MOV uses reserved fields\n");
2715 /* check src operand */
2716 err
= check_reg_arg(env
, insn
->src_reg
, SRC_OP
);
2720 if (insn
->src_reg
!= BPF_REG_0
|| insn
->off
!= 0) {
2721 verbose(env
, "BPF_MOV uses reserved fields\n");
2726 /* check dest operand */
2727 err
= check_reg_arg(env
, insn
->dst_reg
, DST_OP
);
2731 if (BPF_SRC(insn
->code
) == BPF_X
) {
2732 struct bpf_reg_state
*src_reg
= regs
+ insn
->src_reg
;
2733 struct bpf_reg_state
*dst_reg
= regs
+ insn
->dst_reg
;
2735 if (BPF_CLASS(insn
->code
) == BPF_ALU64
) {
2737 * copy register state to dest reg
2739 *dst_reg
= *src_reg
;
2740 dst_reg
->live
|= REG_LIVE_WRITTEN
;
2743 if (is_pointer_value(env
, insn
->src_reg
)) {
2745 "R%d partial copy of pointer\n",
2748 } else if (src_reg
->type
== SCALAR_VALUE
) {
2749 *dst_reg
= *src_reg
;
2750 dst_reg
->live
|= REG_LIVE_WRITTEN
;
2752 mark_reg_unknown(env
, regs
,
2755 coerce_reg_to_size(dst_reg
, 4);
2759 * remember the value we stored into this reg
2761 regs
[insn
->dst_reg
].type
= SCALAR_VALUE
;
2762 if (BPF_CLASS(insn
->code
) == BPF_ALU64
) {
2763 __mark_reg_known(regs
+ insn
->dst_reg
,
2766 __mark_reg_known(regs
+ insn
->dst_reg
,
2771 } else if (opcode
> BPF_END
) {
2772 verbose(env
, "invalid BPF_ALU opcode %x\n", opcode
);
2775 } else { /* all other ALU ops: and, sub, xor, add, ... */
2777 if (BPF_SRC(insn
->code
) == BPF_X
) {
2778 if (insn
->imm
!= 0 || insn
->off
!= 0) {
2779 verbose(env
, "BPF_ALU uses reserved fields\n");
2782 /* check src1 operand */
2783 err
= check_reg_arg(env
, insn
->src_reg
, SRC_OP
);
2787 if (insn
->src_reg
!= BPF_REG_0
|| insn
->off
!= 0) {
2788 verbose(env
, "BPF_ALU uses reserved fields\n");
2793 /* check src2 operand */
2794 err
= check_reg_arg(env
, insn
->dst_reg
, SRC_OP
);
2798 if ((opcode
== BPF_MOD
|| opcode
== BPF_DIV
) &&
2799 BPF_SRC(insn
->code
) == BPF_K
&& insn
->imm
== 0) {
2800 verbose(env
, "div by zero\n");
2804 if (opcode
== BPF_ARSH
&& BPF_CLASS(insn
->code
) != BPF_ALU64
) {
2805 verbose(env
, "BPF_ARSH not supported for 32 bit ALU\n");
2809 if ((opcode
== BPF_LSH
|| opcode
== BPF_RSH
||
2810 opcode
== BPF_ARSH
) && BPF_SRC(insn
->code
) == BPF_K
) {
2811 int size
= BPF_CLASS(insn
->code
) == BPF_ALU64
? 64 : 32;
2813 if (insn
->imm
< 0 || insn
->imm
>= size
) {
2814 verbose(env
, "invalid shift %d\n", insn
->imm
);
2819 /* check dest operand */
2820 err
= check_reg_arg(env
, insn
->dst_reg
, DST_OP_NO_MARK
);
2824 return adjust_reg_min_max_vals(env
, insn
);
2830 static void find_good_pkt_pointers(struct bpf_verifier_state
*state
,
2831 struct bpf_reg_state
*dst_reg
,
2832 enum bpf_reg_type type
,
2833 bool range_right_open
)
2835 struct bpf_reg_state
*regs
= state
->regs
, *reg
;
2839 if (dst_reg
->off
< 0 ||
2840 (dst_reg
->off
== 0 && range_right_open
))
2841 /* This doesn't give us any range */
2844 if (dst_reg
->umax_value
> MAX_PACKET_OFF
||
2845 dst_reg
->umax_value
+ dst_reg
->off
> MAX_PACKET_OFF
)
2846 /* Risk of overflow. For instance, ptr + (1<<63) may be less
2847 * than pkt_end, but that's because it's also less than pkt.
2851 new_range
= dst_reg
->off
;
2852 if (range_right_open
)
2855 /* Examples for register markings:
2857 * pkt_data in dst register:
2861 * if (r2 > pkt_end) goto <handle exception>
2866 * if (r2 < pkt_end) goto <access okay>
2867 * <handle exception>
2870 * r2 == dst_reg, pkt_end == src_reg
2871 * r2=pkt(id=n,off=8,r=0)
2872 * r3=pkt(id=n,off=0,r=0)
2874 * pkt_data in src register:
2878 * if (pkt_end >= r2) goto <access okay>
2879 * <handle exception>
2883 * if (pkt_end <= r2) goto <handle exception>
2887 * pkt_end == dst_reg, r2 == src_reg
2888 * r2=pkt(id=n,off=8,r=0)
2889 * r3=pkt(id=n,off=0,r=0)
2891 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
2892 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
2893 * and [r3, r3 + 8-1) respectively is safe to access depending on
2897 /* If our ids match, then we must have the same max_value. And we
2898 * don't care about the other reg's fixed offset, since if it's too big
2899 * the range won't allow anything.
2900 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
2902 for (i
= 0; i
< MAX_BPF_REG
; i
++)
2903 if (regs
[i
].type
== type
&& regs
[i
].id
== dst_reg
->id
)
2904 /* keep the maximum range already checked */
2905 regs
[i
].range
= max(regs
[i
].range
, new_range
);
2907 for (i
= 0; i
< state
->allocated_stack
/ BPF_REG_SIZE
; i
++) {
2908 if (state
->stack
[i
].slot_type
[0] != STACK_SPILL
)
2910 reg
= &state
->stack
[i
].spilled_ptr
;
2911 if (reg
->type
== type
&& reg
->id
== dst_reg
->id
)
2912 reg
->range
= max(reg
->range
, new_range
);
2916 /* compute branch direction of the expression "if (reg opcode val) goto target;"
2918 * 1 - branch will be taken and "goto target" will be executed
2919 * 0 - branch will not be taken and fall-through to next insn
2920 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10]
2922 static int is_branch_taken(struct bpf_reg_state
*reg
, u64 val
, u8 opcode
)
2924 if (__is_pointer_value(false, reg
))
2929 if (tnum_is_const(reg
->var_off
))
2930 return !!tnum_equals_const(reg
->var_off
, val
);
2933 if (tnum_is_const(reg
->var_off
))
2934 return !tnum_equals_const(reg
->var_off
, val
);
2937 if (reg
->umin_value
> val
)
2939 else if (reg
->umax_value
<= val
)
2943 if (reg
->smin_value
> (s64
)val
)
2945 else if (reg
->smax_value
< (s64
)val
)
2949 if (reg
->umax_value
< val
)
2951 else if (reg
->umin_value
>= val
)
2955 if (reg
->smax_value
< (s64
)val
)
2957 else if (reg
->smin_value
>= (s64
)val
)
2961 if (reg
->umin_value
>= val
)
2963 else if (reg
->umax_value
< val
)
2967 if (reg
->smin_value
>= (s64
)val
)
2969 else if (reg
->smax_value
< (s64
)val
)
2973 if (reg
->umax_value
<= val
)
2975 else if (reg
->umin_value
> val
)
2979 if (reg
->smax_value
<= (s64
)val
)
2981 else if (reg
->smin_value
> (s64
)val
)
2989 /* Adjusts the register min/max values in the case that the dst_reg is the
2990 * variable register that we are working on, and src_reg is a constant or we're
2991 * simply doing a BPF_K check.
2992 * In JEQ/JNE cases we also adjust the var_off values.
2994 static void reg_set_min_max(struct bpf_reg_state
*true_reg
,
2995 struct bpf_reg_state
*false_reg
, u64 val
,
2998 /* If the dst_reg is a pointer, we can't learn anything about its
2999 * variable offset from the compare (unless src_reg were a pointer into
3000 * the same object, but we don't bother with that.
3001 * Since false_reg and true_reg have the same type by construction, we
3002 * only need to check one of them for pointerness.
3004 if (__is_pointer_value(false, false_reg
))
3009 /* If this is false then we know nothing Jon Snow, but if it is
3010 * true then we know for sure.
3012 __mark_reg_known(true_reg
, val
);
3015 /* If this is true we know nothing Jon Snow, but if it is false
3016 * we know the value for sure;
3018 __mark_reg_known(false_reg
, val
);
3021 false_reg
->umax_value
= min(false_reg
->umax_value
, val
);
3022 true_reg
->umin_value
= max(true_reg
->umin_value
, val
+ 1);
3025 false_reg
->smax_value
= min_t(s64
, false_reg
->smax_value
, val
);
3026 true_reg
->smin_value
= max_t(s64
, true_reg
->smin_value
, val
+ 1);
3029 false_reg
->umin_value
= max(false_reg
->umin_value
, val
);
3030 true_reg
->umax_value
= min(true_reg
->umax_value
, val
- 1);
3033 false_reg
->smin_value
= max_t(s64
, false_reg
->smin_value
, val
);
3034 true_reg
->smax_value
= min_t(s64
, true_reg
->smax_value
, val
- 1);
3037 false_reg
->umax_value
= min(false_reg
->umax_value
, val
- 1);
3038 true_reg
->umin_value
= max(true_reg
->umin_value
, val
);
3041 false_reg
->smax_value
= min_t(s64
, false_reg
->smax_value
, val
- 1);
3042 true_reg
->smin_value
= max_t(s64
, true_reg
->smin_value
, val
);
3045 false_reg
->umin_value
= max(false_reg
->umin_value
, val
+ 1);
3046 true_reg
->umax_value
= min(true_reg
->umax_value
, val
);
3049 false_reg
->smin_value
= max_t(s64
, false_reg
->smin_value
, val
+ 1);
3050 true_reg
->smax_value
= min_t(s64
, true_reg
->smax_value
, val
);
3056 __reg_deduce_bounds(false_reg
);
3057 __reg_deduce_bounds(true_reg
);
3058 /* We might have learned some bits from the bounds. */
3059 __reg_bound_offset(false_reg
);
3060 __reg_bound_offset(true_reg
);
3061 /* Intersecting with the old var_off might have improved our bounds
3062 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
3063 * then new var_off is (0; 0x7f...fc) which improves our umax.
3065 __update_reg_bounds(false_reg
);
3066 __update_reg_bounds(true_reg
);
3069 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
3072 static void reg_set_min_max_inv(struct bpf_reg_state
*true_reg
,
3073 struct bpf_reg_state
*false_reg
, u64 val
,
3076 if (__is_pointer_value(false, false_reg
))
3081 /* If this is false then we know nothing Jon Snow, but if it is
3082 * true then we know for sure.
3084 __mark_reg_known(true_reg
, val
);
3087 /* If this is true we know nothing Jon Snow, but if it is false
3088 * we know the value for sure;
3090 __mark_reg_known(false_reg
, val
);
3093 true_reg
->umax_value
= min(true_reg
->umax_value
, val
- 1);
3094 false_reg
->umin_value
= max(false_reg
->umin_value
, val
);
3097 true_reg
->smax_value
= min_t(s64
, true_reg
->smax_value
, val
- 1);
3098 false_reg
->smin_value
= max_t(s64
, false_reg
->smin_value
, val
);
3101 true_reg
->umin_value
= max(true_reg
->umin_value
, val
+ 1);
3102 false_reg
->umax_value
= min(false_reg
->umax_value
, val
);
3105 true_reg
->smin_value
= max_t(s64
, true_reg
->smin_value
, val
+ 1);
3106 false_reg
->smax_value
= min_t(s64
, false_reg
->smax_value
, val
);
3109 true_reg
->umax_value
= min(true_reg
->umax_value
, val
);
3110 false_reg
->umin_value
= max(false_reg
->umin_value
, val
+ 1);
3113 true_reg
->smax_value
= min_t(s64
, true_reg
->smax_value
, val
);
3114 false_reg
->smin_value
= max_t(s64
, false_reg
->smin_value
, val
+ 1);
3117 true_reg
->umin_value
= max(true_reg
->umin_value
, val
);
3118 false_reg
->umax_value
= min(false_reg
->umax_value
, val
- 1);
3121 true_reg
->smin_value
= max_t(s64
, true_reg
->smin_value
, val
);
3122 false_reg
->smax_value
= min_t(s64
, false_reg
->smax_value
, val
- 1);
3128 __reg_deduce_bounds(false_reg
);
3129 __reg_deduce_bounds(true_reg
);
3130 /* We might have learned some bits from the bounds. */
3131 __reg_bound_offset(false_reg
);
3132 __reg_bound_offset(true_reg
);
3133 /* Intersecting with the old var_off might have improved our bounds
3134 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
3135 * then new var_off is (0; 0x7f...fc) which improves our umax.
3137 __update_reg_bounds(false_reg
);
3138 __update_reg_bounds(true_reg
);
3141 /* Regs are known to be equal, so intersect their min/max/var_off */
3142 static void __reg_combine_min_max(struct bpf_reg_state
*src_reg
,
3143 struct bpf_reg_state
*dst_reg
)
3145 src_reg
->umin_value
= dst_reg
->umin_value
= max(src_reg
->umin_value
,
3146 dst_reg
->umin_value
);
3147 src_reg
->umax_value
= dst_reg
->umax_value
= min(src_reg
->umax_value
,
3148 dst_reg
->umax_value
);
3149 src_reg
->smin_value
= dst_reg
->smin_value
= max(src_reg
->smin_value
,
3150 dst_reg
->smin_value
);
3151 src_reg
->smax_value
= dst_reg
->smax_value
= min(src_reg
->smax_value
,
3152 dst_reg
->smax_value
);
3153 src_reg
->var_off
= dst_reg
->var_off
= tnum_intersect(src_reg
->var_off
,
3155 /* We might have learned new bounds from the var_off. */
3156 __update_reg_bounds(src_reg
);
3157 __update_reg_bounds(dst_reg
);
3158 /* We might have learned something about the sign bit. */
3159 __reg_deduce_bounds(src_reg
);
3160 __reg_deduce_bounds(dst_reg
);
3161 /* We might have learned some bits from the bounds. */
3162 __reg_bound_offset(src_reg
);
3163 __reg_bound_offset(dst_reg
);
3164 /* Intersecting with the old var_off might have improved our bounds
3165 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
3166 * then new var_off is (0; 0x7f...fc) which improves our umax.
3168 __update_reg_bounds(src_reg
);
3169 __update_reg_bounds(dst_reg
);
3172 static void reg_combine_min_max(struct bpf_reg_state
*true_src
,
3173 struct bpf_reg_state
*true_dst
,
3174 struct bpf_reg_state
*false_src
,
3175 struct bpf_reg_state
*false_dst
,
3180 __reg_combine_min_max(true_src
, true_dst
);
3183 __reg_combine_min_max(false_src
, false_dst
);
3188 static void mark_map_reg(struct bpf_reg_state
*regs
, u32 regno
, u32 id
,
3191 struct bpf_reg_state
*reg
= ®s
[regno
];
3193 if (reg
->type
== PTR_TO_MAP_VALUE_OR_NULL
&& reg
->id
== id
) {
3194 /* Old offset (both fixed and variable parts) should
3195 * have been known-zero, because we don't allow pointer
3196 * arithmetic on pointers that might be NULL.
3198 if (WARN_ON_ONCE(reg
->smin_value
|| reg
->smax_value
||
3199 !tnum_equals_const(reg
->var_off
, 0) ||
3201 __mark_reg_known_zero(reg
);
3205 reg
->type
= SCALAR_VALUE
;
3206 } else if (reg
->map_ptr
->inner_map_meta
) {
3207 reg
->type
= CONST_PTR_TO_MAP
;
3208 reg
->map_ptr
= reg
->map_ptr
->inner_map_meta
;
3210 reg
->type
= PTR_TO_MAP_VALUE
;
3212 /* We don't need id from this point onwards anymore, thus we
3213 * should better reset it, so that state pruning has chances
3220 /* The logic is similar to find_good_pkt_pointers(), both could eventually
3221 * be folded together at some point.
3223 static void mark_map_regs(struct bpf_verifier_state
*state
, u32 regno
,
3226 struct bpf_reg_state
*regs
= state
->regs
;
3227 u32 id
= regs
[regno
].id
;
3230 for (i
= 0; i
< MAX_BPF_REG
; i
++)
3231 mark_map_reg(regs
, i
, id
, is_null
);
3233 for (i
= 0; i
< state
->allocated_stack
/ BPF_REG_SIZE
; i
++) {
3234 if (state
->stack
[i
].slot_type
[0] != STACK_SPILL
)
3236 mark_map_reg(&state
->stack
[i
].spilled_ptr
, 0, id
, is_null
);
3240 static bool try_match_pkt_pointers(const struct bpf_insn
*insn
,
3241 struct bpf_reg_state
*dst_reg
,
3242 struct bpf_reg_state
*src_reg
,
3243 struct bpf_verifier_state
*this_branch
,
3244 struct bpf_verifier_state
*other_branch
)
3246 if (BPF_SRC(insn
->code
) != BPF_X
)
3249 switch (BPF_OP(insn
->code
)) {
3251 if ((dst_reg
->type
== PTR_TO_PACKET
&&
3252 src_reg
->type
== PTR_TO_PACKET_END
) ||
3253 (dst_reg
->type
== PTR_TO_PACKET_META
&&
3254 reg_is_init_pkt_pointer(src_reg
, PTR_TO_PACKET
))) {
3255 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
3256 find_good_pkt_pointers(this_branch
, dst_reg
,
3257 dst_reg
->type
, false);
3258 } else if ((dst_reg
->type
== PTR_TO_PACKET_END
&&
3259 src_reg
->type
== PTR_TO_PACKET
) ||
3260 (reg_is_init_pkt_pointer(dst_reg
, PTR_TO_PACKET
) &&
3261 src_reg
->type
== PTR_TO_PACKET_META
)) {
3262 /* pkt_end > pkt_data', pkt_data > pkt_meta' */
3263 find_good_pkt_pointers(other_branch
, src_reg
,
3264 src_reg
->type
, true);
3270 if ((dst_reg
->type
== PTR_TO_PACKET
&&
3271 src_reg
->type
== PTR_TO_PACKET_END
) ||
3272 (dst_reg
->type
== PTR_TO_PACKET_META
&&
3273 reg_is_init_pkt_pointer(src_reg
, PTR_TO_PACKET
))) {
3274 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
3275 find_good_pkt_pointers(other_branch
, dst_reg
,
3276 dst_reg
->type
, true);
3277 } else if ((dst_reg
->type
== PTR_TO_PACKET_END
&&
3278 src_reg
->type
== PTR_TO_PACKET
) ||
3279 (reg_is_init_pkt_pointer(dst_reg
, PTR_TO_PACKET
) &&
3280 src_reg
->type
== PTR_TO_PACKET_META
)) {
3281 /* pkt_end < pkt_data', pkt_data > pkt_meta' */
3282 find_good_pkt_pointers(this_branch
, src_reg
,
3283 src_reg
->type
, false);
3289 if ((dst_reg
->type
== PTR_TO_PACKET
&&
3290 src_reg
->type
== PTR_TO_PACKET_END
) ||
3291 (dst_reg
->type
== PTR_TO_PACKET_META
&&
3292 reg_is_init_pkt_pointer(src_reg
, PTR_TO_PACKET
))) {
3293 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
3294 find_good_pkt_pointers(this_branch
, dst_reg
,
3295 dst_reg
->type
, true);
3296 } else if ((dst_reg
->type
== PTR_TO_PACKET_END
&&
3297 src_reg
->type
== PTR_TO_PACKET
) ||
3298 (reg_is_init_pkt_pointer(dst_reg
, PTR_TO_PACKET
) &&
3299 src_reg
->type
== PTR_TO_PACKET_META
)) {
3300 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
3301 find_good_pkt_pointers(other_branch
, src_reg
,
3302 src_reg
->type
, false);
3308 if ((dst_reg
->type
== PTR_TO_PACKET
&&
3309 src_reg
->type
== PTR_TO_PACKET_END
) ||
3310 (dst_reg
->type
== PTR_TO_PACKET_META
&&
3311 reg_is_init_pkt_pointer(src_reg
, PTR_TO_PACKET
))) {
3312 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
3313 find_good_pkt_pointers(other_branch
, dst_reg
,
3314 dst_reg
->type
, false);
3315 } else if ((dst_reg
->type
== PTR_TO_PACKET_END
&&
3316 src_reg
->type
== PTR_TO_PACKET
) ||
3317 (reg_is_init_pkt_pointer(dst_reg
, PTR_TO_PACKET
) &&
3318 src_reg
->type
== PTR_TO_PACKET_META
)) {
3319 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
3320 find_good_pkt_pointers(this_branch
, src_reg
,
3321 src_reg
->type
, true);
3333 static int check_cond_jmp_op(struct bpf_verifier_env
*env
,
3334 struct bpf_insn
*insn
, int *insn_idx
)
3336 struct bpf_verifier_state
*other_branch
, *this_branch
= env
->cur_state
;
3337 struct bpf_reg_state
*regs
= this_branch
->regs
, *dst_reg
;
3338 u8 opcode
= BPF_OP(insn
->code
);
3341 if (opcode
> BPF_JSLE
) {
3342 verbose(env
, "invalid BPF_JMP opcode %x\n", opcode
);
3346 if (BPF_SRC(insn
->code
) == BPF_X
) {
3347 if (insn
->imm
!= 0) {
3348 verbose(env
, "BPF_JMP uses reserved fields\n");
3352 /* check src1 operand */
3353 err
= check_reg_arg(env
, insn
->src_reg
, SRC_OP
);
3357 if (is_pointer_value(env
, insn
->src_reg
)) {
3358 verbose(env
, "R%d pointer comparison prohibited\n",
3363 if (insn
->src_reg
!= BPF_REG_0
) {
3364 verbose(env
, "BPF_JMP uses reserved fields\n");
3369 /* check src2 operand */
3370 err
= check_reg_arg(env
, insn
->dst_reg
, SRC_OP
);
3374 dst_reg
= ®s
[insn
->dst_reg
];
3376 if (BPF_SRC(insn
->code
) == BPF_K
) {
3377 int pred
= is_branch_taken(dst_reg
, insn
->imm
, opcode
);
3380 /* only follow the goto, ignore fall-through */
3381 *insn_idx
+= insn
->off
;
3383 } else if (pred
== 0) {
3384 /* only follow fall-through branch, since
3385 * that's where the program will go
3391 other_branch
= push_stack(env
, *insn_idx
+ insn
->off
+ 1, *insn_idx
,
3396 /* detect if we are comparing against a constant value so we can adjust
3397 * our min/max values for our dst register.
3398 * this is only legit if both are scalars (or pointers to the same
3399 * object, I suppose, but we don't support that right now), because
3400 * otherwise the different base pointers mean the offsets aren't
3403 if (BPF_SRC(insn
->code
) == BPF_X
) {
3404 if (dst_reg
->type
== SCALAR_VALUE
&&
3405 regs
[insn
->src_reg
].type
== SCALAR_VALUE
) {
3406 if (tnum_is_const(regs
[insn
->src_reg
].var_off
))
3407 reg_set_min_max(&other_branch
->regs
[insn
->dst_reg
],
3408 dst_reg
, regs
[insn
->src_reg
].var_off
.value
,
3410 else if (tnum_is_const(dst_reg
->var_off
))
3411 reg_set_min_max_inv(&other_branch
->regs
[insn
->src_reg
],
3412 ®s
[insn
->src_reg
],
3413 dst_reg
->var_off
.value
, opcode
);
3414 else if (opcode
== BPF_JEQ
|| opcode
== BPF_JNE
)
3415 /* Comparing for equality, we can combine knowledge */
3416 reg_combine_min_max(&other_branch
->regs
[insn
->src_reg
],
3417 &other_branch
->regs
[insn
->dst_reg
],
3418 ®s
[insn
->src_reg
],
3419 ®s
[insn
->dst_reg
], opcode
);
3421 } else if (dst_reg
->type
== SCALAR_VALUE
) {
3422 reg_set_min_max(&other_branch
->regs
[insn
->dst_reg
],
3423 dst_reg
, insn
->imm
, opcode
);
3426 /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
3427 if (BPF_SRC(insn
->code
) == BPF_K
&&
3428 insn
->imm
== 0 && (opcode
== BPF_JEQ
|| opcode
== BPF_JNE
) &&
3429 dst_reg
->type
== PTR_TO_MAP_VALUE_OR_NULL
) {
3430 /* Mark all identical map registers in each branch as either
3431 * safe or unknown depending R == 0 or R != 0 conditional.
3433 mark_map_regs(this_branch
, insn
->dst_reg
, opcode
== BPF_JNE
);
3434 mark_map_regs(other_branch
, insn
->dst_reg
, opcode
== BPF_JEQ
);
3435 } else if (!try_match_pkt_pointers(insn
, dst_reg
, ®s
[insn
->src_reg
],
3436 this_branch
, other_branch
) &&
3437 is_pointer_value(env
, insn
->dst_reg
)) {
3438 verbose(env
, "R%d pointer comparison prohibited\n",
3443 print_verifier_state(env
, this_branch
);
3447 /* return the map pointer stored inside BPF_LD_IMM64 instruction */
3448 static struct bpf_map
*ld_imm64_to_map_ptr(struct bpf_insn
*insn
)
3450 u64 imm64
= ((u64
) (u32
) insn
[0].imm
) | ((u64
) (u32
) insn
[1].imm
) << 32;
3452 return (struct bpf_map
*) (unsigned long) imm64
;
3455 /* verify BPF_LD_IMM64 instruction */
3456 static int check_ld_imm(struct bpf_verifier_env
*env
, struct bpf_insn
*insn
)
3458 struct bpf_reg_state
*regs
= cur_regs(env
);
3461 if (BPF_SIZE(insn
->code
) != BPF_DW
) {
3462 verbose(env
, "invalid BPF_LD_IMM insn\n");
3465 if (insn
->off
!= 0) {
3466 verbose(env
, "BPF_LD_IMM64 uses reserved fields\n");
3470 err
= check_reg_arg(env
, insn
->dst_reg
, DST_OP
);
3474 if (insn
->src_reg
== 0) {
3475 u64 imm
= ((u64
)(insn
+ 1)->imm
<< 32) | (u32
)insn
->imm
;
3477 regs
[insn
->dst_reg
].type
= SCALAR_VALUE
;
3478 __mark_reg_known(®s
[insn
->dst_reg
], imm
);
3482 /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
3483 BUG_ON(insn
->src_reg
!= BPF_PSEUDO_MAP_FD
);
3485 regs
[insn
->dst_reg
].type
= CONST_PTR_TO_MAP
;
3486 regs
[insn
->dst_reg
].map_ptr
= ld_imm64_to_map_ptr(insn
);
3490 static bool may_access_skb(enum bpf_prog_type type
)
3493 case BPF_PROG_TYPE_SOCKET_FILTER
:
3494 case BPF_PROG_TYPE_SCHED_CLS
:
3495 case BPF_PROG_TYPE_SCHED_ACT
:
3502 /* verify safety of LD_ABS|LD_IND instructions:
3503 * - they can only appear in the programs where ctx == skb
3504 * - since they are wrappers of function calls, they scratch R1-R5 registers,
3505 * preserve R6-R9, and store return value into R0
3508 * ctx == skb == R6 == CTX
3511 * SRC == any register
3512 * IMM == 32-bit immediate
3515 * R0 - 8/16/32-bit skb data converted to cpu endianness
3517 static int check_ld_abs(struct bpf_verifier_env
*env
, struct bpf_insn
*insn
)
3519 struct bpf_reg_state
*regs
= cur_regs(env
);
3520 u8 mode
= BPF_MODE(insn
->code
);
3523 if (!may_access_skb(env
->prog
->type
)) {
3524 verbose(env
, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
3528 if (insn
->dst_reg
!= BPF_REG_0
|| insn
->off
!= 0 ||
3529 BPF_SIZE(insn
->code
) == BPF_DW
||
3530 (mode
== BPF_ABS
&& insn
->src_reg
!= BPF_REG_0
)) {
3531 verbose(env
, "BPF_LD_[ABS|IND] uses reserved fields\n");
3535 /* check whether implicit source operand (register R6) is readable */
3536 err
= check_reg_arg(env
, BPF_REG_6
, SRC_OP
);
3540 if (regs
[BPF_REG_6
].type
!= PTR_TO_CTX
) {
3542 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
3546 if (mode
== BPF_IND
) {
3547 /* check explicit source operand */
3548 err
= check_reg_arg(env
, insn
->src_reg
, SRC_OP
);
3553 /* reset caller saved regs to unreadable */
3554 for (i
= 0; i
< CALLER_SAVED_REGS
; i
++) {
3555 mark_reg_not_init(env
, regs
, caller_saved
[i
]);
3556 check_reg_arg(env
, caller_saved
[i
], DST_OP_NO_MARK
);
3559 /* mark destination R0 register as readable, since it contains
3560 * the value fetched from the packet.
3561 * Already marked as written above.
3563 mark_reg_unknown(env
, regs
, BPF_REG_0
);
3567 static int check_return_code(struct bpf_verifier_env
*env
)
3569 struct bpf_reg_state
*reg
;
3570 struct tnum range
= tnum_range(0, 1);
3572 switch (env
->prog
->type
) {
3573 case BPF_PROG_TYPE_CGROUP_SKB
:
3574 case BPF_PROG_TYPE_CGROUP_SOCK
:
3575 case BPF_PROG_TYPE_SOCK_OPS
:
3576 case BPF_PROG_TYPE_CGROUP_DEVICE
:
3582 reg
= cur_regs(env
) + BPF_REG_0
;
3583 if (reg
->type
!= SCALAR_VALUE
) {
3584 verbose(env
, "At program exit the register R0 is not a known value (%s)\n",
3585 reg_type_str
[reg
->type
]);
3589 if (!tnum_in(range
, reg
->var_off
)) {
3590 verbose(env
, "At program exit the register R0 ");
3591 if (!tnum_is_unknown(reg
->var_off
)) {
3594 tnum_strn(tn_buf
, sizeof(tn_buf
), reg
->var_off
);
3595 verbose(env
, "has value %s", tn_buf
);
3597 verbose(env
, "has unknown scalar value");
3599 verbose(env
, " should have been 0 or 1\n");
3605 /* non-recursive DFS pseudo code
3606 * 1 procedure DFS-iterative(G,v):
3607 * 2 label v as discovered
3608 * 3 let S be a stack
3610 * 5 while S is not empty
3612 * 7 if t is what we're looking for:
3614 * 9 for all edges e in G.adjacentEdges(t) do
3615 * 10 if edge e is already labelled
3616 * 11 continue with the next edge
3617 * 12 w <- G.adjacentVertex(t,e)
3618 * 13 if vertex w is not discovered and not explored
3619 * 14 label e as tree-edge
3620 * 15 label w as discovered
3623 * 18 else if vertex w is discovered
3624 * 19 label e as back-edge
3626 * 21 // vertex w is explored
3627 * 22 label e as forward- or cross-edge
3628 * 23 label t as explored
3633 * 0x11 - discovered and fall-through edge labelled
3634 * 0x12 - discovered and fall-through and branch edges labelled
3645 #define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
3647 static int *insn_stack
; /* stack of insns to process */
3648 static int cur_stack
; /* current stack index */
3649 static int *insn_state
;
3651 /* t, w, e - match pseudo-code above:
3652 * t - index of current instruction
3653 * w - next instruction
3656 static int push_insn(int t
, int w
, int e
, struct bpf_verifier_env
*env
)
3658 if (e
== FALLTHROUGH
&& insn_state
[t
] >= (DISCOVERED
| FALLTHROUGH
))
3661 if (e
== BRANCH
&& insn_state
[t
] >= (DISCOVERED
| BRANCH
))
3664 if (w
< 0 || w
>= env
->prog
->len
) {
3665 verbose(env
, "jump out of range from insn %d to %d\n", t
, w
);
3670 /* mark branch target for state pruning */
3671 env
->explored_states
[w
] = STATE_LIST_MARK
;
3673 if (insn_state
[w
] == 0) {
3675 insn_state
[t
] = DISCOVERED
| e
;
3676 insn_state
[w
] = DISCOVERED
;
3677 if (cur_stack
>= env
->prog
->len
)
3679 insn_stack
[cur_stack
++] = w
;
3681 } else if ((insn_state
[w
] & 0xF0) == DISCOVERED
) {
3682 verbose(env
, "back-edge from insn %d to %d\n", t
, w
);
3684 } else if (insn_state
[w
] == EXPLORED
) {
3685 /* forward- or cross-edge */
3686 insn_state
[t
] = DISCOVERED
| e
;
3688 verbose(env
, "insn state internal bug\n");
3694 /* non-recursive depth-first-search to detect loops in BPF program
3695 * loop == back-edge in directed graph
3697 static int check_cfg(struct bpf_verifier_env
*env
)
3699 struct bpf_insn
*insns
= env
->prog
->insnsi
;
3700 int insn_cnt
= env
->prog
->len
;
3704 insn_state
= kcalloc(insn_cnt
, sizeof(int), GFP_KERNEL
);
3708 insn_stack
= kcalloc(insn_cnt
, sizeof(int), GFP_KERNEL
);
3714 insn_state
[0] = DISCOVERED
; /* mark 1st insn as discovered */
3715 insn_stack
[0] = 0; /* 0 is the first instruction */
3721 t
= insn_stack
[cur_stack
- 1];
3723 if (BPF_CLASS(insns
[t
].code
) == BPF_JMP
) {
3724 u8 opcode
= BPF_OP(insns
[t
].code
);
3726 if (opcode
== BPF_EXIT
) {
3728 } else if (opcode
== BPF_CALL
) {
3729 ret
= push_insn(t
, t
+ 1, FALLTHROUGH
, env
);
3734 if (t
+ 1 < insn_cnt
)
3735 env
->explored_states
[t
+ 1] = STATE_LIST_MARK
;
3736 } else if (opcode
== BPF_JA
) {
3737 if (BPF_SRC(insns
[t
].code
) != BPF_K
) {
3741 /* unconditional jump with single edge */
3742 ret
= push_insn(t
, t
+ insns
[t
].off
+ 1,
3748 /* tell verifier to check for equivalent states
3749 * after every call and jump
3751 if (t
+ 1 < insn_cnt
)
3752 env
->explored_states
[t
+ 1] = STATE_LIST_MARK
;
3754 /* conditional jump with two edges */
3755 env
->explored_states
[t
] = STATE_LIST_MARK
;
3756 ret
= push_insn(t
, t
+ 1, FALLTHROUGH
, env
);
3762 ret
= push_insn(t
, t
+ insns
[t
].off
+ 1, BRANCH
, env
);
3769 /* all other non-branch instructions with single
3772 ret
= push_insn(t
, t
+ 1, FALLTHROUGH
, env
);
3780 insn_state
[t
] = EXPLORED
;
3781 if (cur_stack
-- <= 0) {
3782 verbose(env
, "pop stack internal bug\n");
3789 for (i
= 0; i
< insn_cnt
; i
++) {
3790 if (insn_state
[i
] != EXPLORED
) {
3791 verbose(env
, "unreachable insn %d\n", i
);
3796 ret
= 0; /* cfg looks good */
3804 /* check %cur's range satisfies %old's */
3805 static bool range_within(struct bpf_reg_state
*old
,
3806 struct bpf_reg_state
*cur
)
3808 return old
->umin_value
<= cur
->umin_value
&&
3809 old
->umax_value
>= cur
->umax_value
&&
3810 old
->smin_value
<= cur
->smin_value
&&
3811 old
->smax_value
>= cur
->smax_value
;
3814 /* Maximum number of register states that can exist at once */
3815 #define ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
3821 /* If in the old state two registers had the same id, then they need to have
3822 * the same id in the new state as well. But that id could be different from
3823 * the old state, so we need to track the mapping from old to new ids.
3824 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
3825 * regs with old id 5 must also have new id 9 for the new state to be safe. But
3826 * regs with a different old id could still have new id 9, we don't care about
3828 * So we look through our idmap to see if this old id has been seen before. If
3829 * so, we require the new id to match; otherwise, we add the id pair to the map.
3831 static bool check_ids(u32 old_id
, u32 cur_id
, struct idpair
*idmap
)
3835 for (i
= 0; i
< ID_MAP_SIZE
; i
++) {
3836 if (!idmap
[i
].old
) {
3837 /* Reached an empty slot; haven't seen this id before */
3838 idmap
[i
].old
= old_id
;
3839 idmap
[i
].cur
= cur_id
;
3842 if (idmap
[i
].old
== old_id
)
3843 return idmap
[i
].cur
== cur_id
;
3845 /* We ran out of idmap slots, which should be impossible */
3850 /* Returns true if (rold safe implies rcur safe) */
3851 static bool regsafe(struct bpf_reg_state
*rold
, struct bpf_reg_state
*rcur
,
3852 struct idpair
*idmap
)
3854 if (!(rold
->live
& REG_LIVE_READ
))
3855 /* explored state didn't use this */
3858 if (memcmp(rold
, rcur
, offsetof(struct bpf_reg_state
, live
)) == 0)
3861 if (rold
->type
== NOT_INIT
)
3862 /* explored state can't have used this */
3864 if (rcur
->type
== NOT_INIT
)
3866 switch (rold
->type
) {
3868 if (rcur
->type
== SCALAR_VALUE
) {
3869 /* new val must satisfy old val knowledge */
3870 return range_within(rold
, rcur
) &&
3871 tnum_in(rold
->var_off
, rcur
->var_off
);
3873 /* We're trying to use a pointer in place of a scalar.
3874 * Even if the scalar was unbounded, this could lead to
3875 * pointer leaks because scalars are allowed to leak
3876 * while pointers are not. We could make this safe in
3877 * special cases if root is calling us, but it's
3878 * probably not worth the hassle.
3882 case PTR_TO_MAP_VALUE
:
3883 /* If the new min/max/var_off satisfy the old ones and
3884 * everything else matches, we are OK.
3885 * We don't care about the 'id' value, because nothing
3886 * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL)
3888 return memcmp(rold
, rcur
, offsetof(struct bpf_reg_state
, id
)) == 0 &&
3889 range_within(rold
, rcur
) &&
3890 tnum_in(rold
->var_off
, rcur
->var_off
);
3891 case PTR_TO_MAP_VALUE_OR_NULL
:
3892 /* a PTR_TO_MAP_VALUE could be safe to use as a
3893 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
3894 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
3895 * checked, doing so could have affected others with the same
3896 * id, and we can't check for that because we lost the id when
3897 * we converted to a PTR_TO_MAP_VALUE.
3899 if (rcur
->type
!= PTR_TO_MAP_VALUE_OR_NULL
)
3901 if (memcmp(rold
, rcur
, offsetof(struct bpf_reg_state
, id
)))
3903 /* Check our ids match any regs they're supposed to */
3904 return check_ids(rold
->id
, rcur
->id
, idmap
);
3905 case PTR_TO_PACKET_META
:
3907 if (rcur
->type
!= rold
->type
)
3909 /* We must have at least as much range as the old ptr
3910 * did, so that any accesses which were safe before are
3911 * still safe. This is true even if old range < old off,
3912 * since someone could have accessed through (ptr - k), or
3913 * even done ptr -= k in a register, to get a safe access.
3915 if (rold
->range
> rcur
->range
)
3917 /* If the offsets don't match, we can't trust our alignment;
3918 * nor can we be sure that we won't fall out of range.
3920 if (rold
->off
!= rcur
->off
)
3922 /* id relations must be preserved */
3923 if (rold
->id
&& !check_ids(rold
->id
, rcur
->id
, idmap
))
3925 /* new val must satisfy old val knowledge */
3926 return range_within(rold
, rcur
) &&
3927 tnum_in(rold
->var_off
, rcur
->var_off
);
3929 case CONST_PTR_TO_MAP
:
3931 case PTR_TO_PACKET_END
:
3932 /* Only valid matches are exact, which memcmp() above
3933 * would have accepted
3936 /* Don't know what's going on, just say it's not safe */
3940 /* Shouldn't get here; if we do, say it's not safe */
3945 static bool stacksafe(struct bpf_verifier_state
*old
,
3946 struct bpf_verifier_state
*cur
,
3947 struct idpair
*idmap
)
3951 /* if explored stack has more populated slots than current stack
3952 * such stacks are not equivalent
3954 if (old
->allocated_stack
> cur
->allocated_stack
)
3957 /* walk slots of the explored stack and ignore any additional
3958 * slots in the current stack, since explored(safe) state
3961 for (i
= 0; i
< old
->allocated_stack
; i
++) {
3962 spi
= i
/ BPF_REG_SIZE
;
3964 if (old
->stack
[spi
].slot_type
[i
% BPF_REG_SIZE
] == STACK_INVALID
)
3966 if (old
->stack
[spi
].slot_type
[i
% BPF_REG_SIZE
] !=
3967 cur
->stack
[spi
].slot_type
[i
% BPF_REG_SIZE
])
3968 /* Ex: old explored (safe) state has STACK_SPILL in
3969 * this stack slot, but current has has STACK_MISC ->
3970 * this verifier states are not equivalent,
3971 * return false to continue verification of this path
3974 if (i
% BPF_REG_SIZE
)
3976 if (old
->stack
[spi
].slot_type
[0] != STACK_SPILL
)
3978 if (!regsafe(&old
->stack
[spi
].spilled_ptr
,
3979 &cur
->stack
[spi
].spilled_ptr
,
3981 /* when explored and current stack slot are both storing
3982 * spilled registers, check that stored pointers types
3983 * are the same as well.
3984 * Ex: explored safe path could have stored
3985 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
3986 * but current path has stored:
3987 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
3988 * such verifier states are not equivalent.
3989 * return false to continue verification of this path
3996 /* compare two verifier states
3998 * all states stored in state_list are known to be valid, since
3999 * verifier reached 'bpf_exit' instruction through them
4001 * this function is called when verifier exploring different branches of
4002 * execution popped from the state stack. If it sees an old state that has
4003 * more strict register state and more strict stack state then this execution
4004 * branch doesn't need to be explored further, since verifier already
4005 * concluded that more strict state leads to valid finish.
4007 * Therefore two states are equivalent if register state is more conservative
4008 * and explored stack state is more conservative than the current one.
4011 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
4012 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
4014 * In other words if current stack state (one being explored) has more
4015 * valid slots than old one that already passed validation, it means
4016 * the verifier can stop exploring and conclude that current state is valid too
4018 * Similarly with registers. If explored state has register type as invalid
4019 * whereas register type in current state is meaningful, it means that
4020 * the current state will reach 'bpf_exit' instruction safely
4022 static bool states_equal(struct bpf_verifier_env
*env
,
4023 struct bpf_verifier_state
*old
,
4024 struct bpf_verifier_state
*cur
)
4026 struct idpair
*idmap
;
4030 /* Verification state from speculative execution simulation
4031 * must never prune a non-speculative execution one.
4033 if (old
->speculative
&& !cur
->speculative
)
4036 idmap
= kcalloc(ID_MAP_SIZE
, sizeof(struct idpair
), GFP_KERNEL
);
4037 /* If we failed to allocate the idmap, just say it's not safe */
4041 for (i
= 0; i
< MAX_BPF_REG
; i
++) {
4042 if (!regsafe(&old
->regs
[i
], &cur
->regs
[i
], idmap
))
4046 if (!stacksafe(old
, cur
, idmap
))
4054 /* A write screens off any subsequent reads; but write marks come from the
4055 * straight-line code between a state and its parent. When we arrive at a
4056 * jump target (in the first iteration of the propagate_liveness() loop),
4057 * we didn't arrive by the straight-line code, so read marks in state must
4058 * propagate to parent regardless of state's write marks.
4060 static bool do_propagate_liveness(const struct bpf_verifier_state
*state
,
4061 struct bpf_verifier_state
*parent
)
4063 bool writes
= parent
== state
->parent
; /* Observe write marks */
4064 bool touched
= false; /* any changes made? */
4069 /* Propagate read liveness of registers... */
4070 BUILD_BUG_ON(BPF_REG_FP
+ 1 != MAX_BPF_REG
);
4071 /* We don't need to worry about FP liveness because it's read-only */
4072 for (i
= 0; i
< BPF_REG_FP
; i
++) {
4073 if (parent
->regs
[i
].live
& REG_LIVE_READ
)
4075 if (writes
&& (state
->regs
[i
].live
& REG_LIVE_WRITTEN
))
4077 if (state
->regs
[i
].live
& REG_LIVE_READ
) {
4078 parent
->regs
[i
].live
|= REG_LIVE_READ
;
4082 /* ... and stack slots */
4083 for (i
= 0; i
< state
->allocated_stack
/ BPF_REG_SIZE
&&
4084 i
< parent
->allocated_stack
/ BPF_REG_SIZE
; i
++) {
4085 if (parent
->stack
[i
].slot_type
[0] != STACK_SPILL
)
4087 if (state
->stack
[i
].slot_type
[0] != STACK_SPILL
)
4089 if (parent
->stack
[i
].spilled_ptr
.live
& REG_LIVE_READ
)
4092 (state
->stack
[i
].spilled_ptr
.live
& REG_LIVE_WRITTEN
))
4094 if (state
->stack
[i
].spilled_ptr
.live
& REG_LIVE_READ
) {
4095 parent
->stack
[i
].spilled_ptr
.live
|= REG_LIVE_READ
;
4102 /* "parent" is "a state from which we reach the current state", but initially
4103 * it is not the state->parent (i.e. "the state whose straight-line code leads
4104 * to the current state"), instead it is the state that happened to arrive at
4105 * a (prunable) equivalent of the current state. See comment above
4106 * do_propagate_liveness() for consequences of this.
4107 * This function is just a more efficient way of calling mark_reg_read() or
4108 * mark_stack_slot_read() on each reg in "parent" that is read in "state",
4109 * though it requires that parent != state->parent in the call arguments.
4111 static void propagate_liveness(const struct bpf_verifier_state
*state
,
4112 struct bpf_verifier_state
*parent
)
4114 while (do_propagate_liveness(state
, parent
)) {
4115 /* Something changed, so we need to feed those changes onward */
4117 parent
= state
->parent
;
4121 static int is_state_visited(struct bpf_verifier_env
*env
, int insn_idx
)
4123 struct bpf_verifier_state_list
*new_sl
;
4124 struct bpf_verifier_state_list
*sl
;
4125 struct bpf_verifier_state
*cur
= env
->cur_state
;
4126 int i
, err
, states_cnt
= 0;
4128 sl
= env
->explored_states
[insn_idx
];
4130 /* this 'insn_idx' instruction wasn't marked, so we will not
4131 * be doing state search here
4135 while (sl
!= STATE_LIST_MARK
) {
4136 if (states_equal(env
, &sl
->state
, cur
)) {
4137 /* reached equivalent register/stack state,
4139 * Registers read by the continuation are read by us.
4140 * If we have any write marks in env->cur_state, they
4141 * will prevent corresponding reads in the continuation
4142 * from reaching our parent (an explored_state). Our
4143 * own state will get the read marks recorded, but
4144 * they'll be immediately forgotten as we're pruning
4145 * this state and will pop a new one.
4147 propagate_liveness(&sl
->state
, cur
);
4154 if (!env
->allow_ptr_leaks
&& states_cnt
> BPF_COMPLEXITY_LIMIT_STATES
)
4157 /* there were no equivalent states, remember current one.
4158 * technically the current state is not proven to be safe yet,
4159 * but it will either reach bpf_exit (which means it's safe) or
4160 * it will be rejected. Since there are no loops, we won't be
4161 * seeing this 'insn_idx' instruction again on the way to bpf_exit
4163 new_sl
= kzalloc(sizeof(struct bpf_verifier_state_list
), GFP_KERNEL
);
4167 /* add new state to the head of linked list */
4168 err
= copy_verifier_state(&new_sl
->state
, cur
);
4170 free_verifier_state(&new_sl
->state
, false);
4174 new_sl
->next
= env
->explored_states
[insn_idx
];
4175 env
->explored_states
[insn_idx
] = new_sl
;
4176 /* connect new state to parentage chain */
4177 cur
->parent
= &new_sl
->state
;
4178 /* clear write marks in current state: the writes we did are not writes
4179 * our child did, so they don't screen off its reads from us.
4180 * (There are no read marks in current state, because reads always mark
4181 * their parent and current state never has children yet. Only
4182 * explored_states can get read marks.)
4184 for (i
= 0; i
< BPF_REG_FP
; i
++)
4185 cur
->regs
[i
].live
= REG_LIVE_NONE
;
4186 for (i
= 0; i
< cur
->allocated_stack
/ BPF_REG_SIZE
; i
++)
4187 if (cur
->stack
[i
].slot_type
[0] == STACK_SPILL
)
4188 cur
->stack
[i
].spilled_ptr
.live
= REG_LIVE_NONE
;
4192 static int ext_analyzer_insn_hook(struct bpf_verifier_env
*env
,
4193 int insn_idx
, int prev_insn_idx
)
4195 if (env
->dev_ops
&& env
->dev_ops
->insn_hook
)
4196 return env
->dev_ops
->insn_hook(env
, insn_idx
, prev_insn_idx
);
4201 static int do_check(struct bpf_verifier_env
*env
)
4203 struct bpf_verifier_state
*state
;
4204 struct bpf_insn
*insns
= env
->prog
->insnsi
;
4205 struct bpf_reg_state
*regs
;
4206 int insn_cnt
= env
->prog
->len
;
4207 int insn_processed
= 0;
4208 bool do_print_state
= false;
4210 state
= kzalloc(sizeof(struct bpf_verifier_state
), GFP_KERNEL
);
4213 env
->cur_state
= state
;
4214 init_reg_state(env
, state
->regs
);
4215 state
->parent
= NULL
;
4216 state
->speculative
= false;
4219 struct bpf_insn
*insn
;
4223 if (env
->insn_idx
>= insn_cnt
) {
4224 verbose(env
, "invalid insn idx %d insn_cnt %d\n",
4225 env
->insn_idx
, insn_cnt
);
4229 insn
= &insns
[env
->insn_idx
];
4230 class = BPF_CLASS(insn
->code
);
4232 if (++insn_processed
> BPF_COMPLEXITY_LIMIT_INSNS
) {
4234 "BPF program is too large. Processed %d insn\n",
4239 err
= is_state_visited(env
, env
->insn_idx
);
4243 /* found equivalent state, can prune the search */
4244 if (env
->log
.level
) {
4246 verbose(env
, "\nfrom %d to %d%s: safe\n",
4247 env
->prev_insn_idx
, env
->insn_idx
,
4248 env
->cur_state
->speculative
?
4249 " (speculative execution)" : "");
4251 verbose(env
, "%d: safe\n", env
->insn_idx
);
4253 goto process_bpf_exit
;
4256 if (signal_pending(current
))
4262 if (env
->log
.level
> 1 || (env
->log
.level
&& do_print_state
)) {
4263 if (env
->log
.level
> 1)
4264 verbose(env
, "%d:", env
->insn_idx
);
4266 verbose(env
, "\nfrom %d to %d%s:",
4267 env
->prev_insn_idx
, env
->insn_idx
,
4268 env
->cur_state
->speculative
?
4269 " (speculative execution)" : "");
4270 print_verifier_state(env
, state
);
4271 do_print_state
= false;
4274 if (env
->log
.level
) {
4275 verbose(env
, "%d: ", env
->insn_idx
);
4276 print_bpf_insn(verbose
, env
, insn
,
4277 env
->allow_ptr_leaks
);
4280 err
= ext_analyzer_insn_hook(env
, env
->insn_idx
, env
->prev_insn_idx
);
4284 regs
= cur_regs(env
);
4285 env
->insn_aux_data
[env
->insn_idx
].seen
= true;
4287 if (class == BPF_ALU
|| class == BPF_ALU64
) {
4288 err
= check_alu_op(env
, insn
);
4292 } else if (class == BPF_LDX
) {
4293 enum bpf_reg_type
*prev_src_type
, src_reg_type
;
4295 /* check for reserved fields is already done */
4297 /* check src operand */
4298 err
= check_reg_arg(env
, insn
->src_reg
, SRC_OP
);
4302 err
= check_reg_arg(env
, insn
->dst_reg
, DST_OP_NO_MARK
);
4306 src_reg_type
= regs
[insn
->src_reg
].type
;
4308 /* check that memory (src_reg + off) is readable,
4309 * the state of dst_reg will be updated by this func
4311 err
= check_mem_access(env
, env
->insn_idx
, insn
->src_reg
,
4312 insn
->off
, BPF_SIZE(insn
->code
),
4313 BPF_READ
, insn
->dst_reg
, false);
4317 prev_src_type
= &env
->insn_aux_data
[env
->insn_idx
].ptr_type
;
4319 if (*prev_src_type
== NOT_INIT
) {
4321 * dst_reg = *(u32 *)(src_reg + off)
4322 * save type to validate intersecting paths
4324 *prev_src_type
= src_reg_type
;
4326 } else if (src_reg_type
!= *prev_src_type
&&
4327 (src_reg_type
== PTR_TO_CTX
||
4328 *prev_src_type
== PTR_TO_CTX
)) {
4329 /* ABuser program is trying to use the same insn
4330 * dst_reg = *(u32*) (src_reg + off)
4331 * with different pointer types:
4332 * src_reg == ctx in one branch and
4333 * src_reg == stack|map in some other branch.
4336 verbose(env
, "same insn cannot be used with different pointers\n");
4340 } else if (class == BPF_STX
) {
4341 enum bpf_reg_type
*prev_dst_type
, dst_reg_type
;
4343 if (BPF_MODE(insn
->code
) == BPF_XADD
) {
4344 err
= check_xadd(env
, env
->insn_idx
, insn
);
4351 /* check src1 operand */
4352 err
= check_reg_arg(env
, insn
->src_reg
, SRC_OP
);
4355 /* check src2 operand */
4356 err
= check_reg_arg(env
, insn
->dst_reg
, SRC_OP
);
4360 dst_reg_type
= regs
[insn
->dst_reg
].type
;
4362 /* check that memory (dst_reg + off) is writeable */
4363 err
= check_mem_access(env
, env
->insn_idx
, insn
->dst_reg
,
4364 insn
->off
, BPF_SIZE(insn
->code
),
4365 BPF_WRITE
, insn
->src_reg
, false);
4369 prev_dst_type
= &env
->insn_aux_data
[env
->insn_idx
].ptr_type
;
4371 if (*prev_dst_type
== NOT_INIT
) {
4372 *prev_dst_type
= dst_reg_type
;
4373 } else if (dst_reg_type
!= *prev_dst_type
&&
4374 (dst_reg_type
== PTR_TO_CTX
||
4375 *prev_dst_type
== PTR_TO_CTX
)) {
4376 verbose(env
, "same insn cannot be used with different pointers\n");
4380 } else if (class == BPF_ST
) {
4381 if (BPF_MODE(insn
->code
) != BPF_MEM
||
4382 insn
->src_reg
!= BPF_REG_0
) {
4383 verbose(env
, "BPF_ST uses reserved fields\n");
4386 /* check src operand */
4387 err
= check_reg_arg(env
, insn
->dst_reg
, SRC_OP
);
4391 if (is_ctx_reg(env
, insn
->dst_reg
)) {
4392 verbose(env
, "BPF_ST stores into R%d context is not allowed\n",
4397 /* check that memory (dst_reg + off) is writeable */
4398 err
= check_mem_access(env
, env
->insn_idx
, insn
->dst_reg
,
4399 insn
->off
, BPF_SIZE(insn
->code
),
4400 BPF_WRITE
, -1, false);
4404 } else if (class == BPF_JMP
) {
4405 u8 opcode
= BPF_OP(insn
->code
);
4407 if (opcode
== BPF_CALL
) {
4408 if (BPF_SRC(insn
->code
) != BPF_K
||
4410 insn
->src_reg
!= BPF_REG_0
||
4411 insn
->dst_reg
!= BPF_REG_0
) {
4412 verbose(env
, "BPF_CALL uses reserved fields\n");
4416 err
= check_call(env
, insn
->imm
, env
->insn_idx
);
4420 } else if (opcode
== BPF_JA
) {
4421 if (BPF_SRC(insn
->code
) != BPF_K
||
4423 insn
->src_reg
!= BPF_REG_0
||
4424 insn
->dst_reg
!= BPF_REG_0
) {
4425 verbose(env
, "BPF_JA uses reserved fields\n");
4429 env
->insn_idx
+= insn
->off
+ 1;
4432 } else if (opcode
== BPF_EXIT
) {
4433 if (BPF_SRC(insn
->code
) != BPF_K
||
4435 insn
->src_reg
!= BPF_REG_0
||
4436 insn
->dst_reg
!= BPF_REG_0
) {
4437 verbose(env
, "BPF_EXIT uses reserved fields\n");
4441 /* eBPF calling convetion is such that R0 is used
4442 * to return the value from eBPF program.
4443 * Make sure that it's readable at this time
4444 * of bpf_exit, which means that program wrote
4445 * something into it earlier
4447 err
= check_reg_arg(env
, BPF_REG_0
, SRC_OP
);
4451 if (is_pointer_value(env
, BPF_REG_0
)) {
4452 verbose(env
, "R0 leaks addr as return value\n");
4456 err
= check_return_code(env
);
4460 err
= pop_stack(env
, &env
->prev_insn_idx
,
4467 do_print_state
= true;
4471 err
= check_cond_jmp_op(env
, insn
, &env
->insn_idx
);
4475 } else if (class == BPF_LD
) {
4476 u8 mode
= BPF_MODE(insn
->code
);
4478 if (mode
== BPF_ABS
|| mode
== BPF_IND
) {
4479 err
= check_ld_abs(env
, insn
);
4483 } else if (mode
== BPF_IMM
) {
4484 err
= check_ld_imm(env
, insn
);
4489 env
->insn_aux_data
[env
->insn_idx
].seen
= true;
4491 verbose(env
, "invalid BPF_LD mode\n");
4495 verbose(env
, "unknown insn class %d\n", class);
4502 verbose(env
, "processed %d insns, stack depth %d\n", insn_processed
,
4503 env
->prog
->aux
->stack_depth
);
4507 static int check_map_prealloc(struct bpf_map
*map
)
4509 return (map
->map_type
!= BPF_MAP_TYPE_HASH
&&
4510 map
->map_type
!= BPF_MAP_TYPE_PERCPU_HASH
&&
4511 map
->map_type
!= BPF_MAP_TYPE_HASH_OF_MAPS
) ||
4512 !(map
->map_flags
& BPF_F_NO_PREALLOC
);
4515 static int check_map_prog_compatibility(struct bpf_verifier_env
*env
,
4516 struct bpf_map
*map
,
4517 struct bpf_prog
*prog
)
4520 /* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use
4521 * preallocated hash maps, since doing memory allocation
4522 * in overflow_handler can crash depending on where nmi got
4525 if (prog
->type
== BPF_PROG_TYPE_PERF_EVENT
) {
4526 if (!check_map_prealloc(map
)) {
4527 verbose(env
, "perf_event programs can only use preallocated hash map\n");
4530 if (map
->inner_map_meta
&&
4531 !check_map_prealloc(map
->inner_map_meta
)) {
4532 verbose(env
, "perf_event programs can only use preallocated inner hash map\n");
4539 /* look for pseudo eBPF instructions that access map FDs and
4540 * replace them with actual map pointers
4542 static int replace_map_fd_with_map_ptr(struct bpf_verifier_env
*env
)
4544 struct bpf_insn
*insn
= env
->prog
->insnsi
;
4545 int insn_cnt
= env
->prog
->len
;
4548 err
= bpf_prog_calc_tag(env
->prog
);
4552 for (i
= 0; i
< insn_cnt
; i
++, insn
++) {
4553 if (BPF_CLASS(insn
->code
) == BPF_LDX
&&
4554 (BPF_MODE(insn
->code
) != BPF_MEM
|| insn
->imm
!= 0)) {
4555 verbose(env
, "BPF_LDX uses reserved fields\n");
4559 if (BPF_CLASS(insn
->code
) == BPF_STX
&&
4560 ((BPF_MODE(insn
->code
) != BPF_MEM
&&
4561 BPF_MODE(insn
->code
) != BPF_XADD
) || insn
->imm
!= 0)) {
4562 verbose(env
, "BPF_STX uses reserved fields\n");
4566 if (insn
[0].code
== (BPF_LD
| BPF_IMM
| BPF_DW
)) {
4567 struct bpf_map
*map
;
4570 if (i
== insn_cnt
- 1 || insn
[1].code
!= 0 ||
4571 insn
[1].dst_reg
!= 0 || insn
[1].src_reg
!= 0 ||
4573 verbose(env
, "invalid bpf_ld_imm64 insn\n");
4577 if (insn
->src_reg
== 0)
4578 /* valid generic load 64-bit imm */
4581 if (insn
->src_reg
!= BPF_PSEUDO_MAP_FD
) {
4583 "unrecognized bpf_ld_imm64 insn\n");
4587 f
= fdget(insn
->imm
);
4588 map
= __bpf_map_get(f
);
4590 verbose(env
, "fd %d is not pointing to valid bpf_map\n",
4592 return PTR_ERR(map
);
4595 err
= check_map_prog_compatibility(env
, map
, env
->prog
);
4601 /* store map pointer inside BPF_LD_IMM64 instruction */
4602 insn
[0].imm
= (u32
) (unsigned long) map
;
4603 insn
[1].imm
= ((u64
) (unsigned long) map
) >> 32;
4605 /* check whether we recorded this map already */
4606 for (j
= 0; j
< env
->used_map_cnt
; j
++)
4607 if (env
->used_maps
[j
] == map
) {
4612 if (env
->used_map_cnt
>= MAX_USED_MAPS
) {
4617 /* hold the map. If the program is rejected by verifier,
4618 * the map will be released by release_maps() or it
4619 * will be used by the valid program until it's unloaded
4620 * and all maps are released in free_used_maps()
4622 map
= bpf_map_inc(map
, false);
4625 return PTR_ERR(map
);
4627 env
->used_maps
[env
->used_map_cnt
++] = map
;
4636 /* now all pseudo BPF_LD_IMM64 instructions load valid
4637 * 'struct bpf_map *' into a register instead of user map_fd.
4638 * These pointers will be used later by verifier to validate map access.
4643 /* drop refcnt of maps used by the rejected program */
4644 static void release_maps(struct bpf_verifier_env
*env
)
4648 for (i
= 0; i
< env
->used_map_cnt
; i
++)
4649 bpf_map_put(env
->used_maps
[i
]);
4652 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
4653 static void convert_pseudo_ld_imm64(struct bpf_verifier_env
*env
)
4655 struct bpf_insn
*insn
= env
->prog
->insnsi
;
4656 int insn_cnt
= env
->prog
->len
;
4659 for (i
= 0; i
< insn_cnt
; i
++, insn
++)
4660 if (insn
->code
== (BPF_LD
| BPF_IMM
| BPF_DW
))
4664 /* single env->prog->insni[off] instruction was replaced with the range
4665 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
4666 * [0, off) and [off, end) to new locations, so the patched range stays zero
4668 static int adjust_insn_aux_data(struct bpf_verifier_env
*env
, u32 prog_len
,
4671 struct bpf_insn_aux_data
*new_data
, *old_data
= env
->insn_aux_data
;
4676 new_data
= vzalloc(sizeof(struct bpf_insn_aux_data
) * prog_len
);
4679 memcpy(new_data
, old_data
, sizeof(struct bpf_insn_aux_data
) * off
);
4680 memcpy(new_data
+ off
+ cnt
- 1, old_data
+ off
,
4681 sizeof(struct bpf_insn_aux_data
) * (prog_len
- off
- cnt
+ 1));
4682 for (i
= off
; i
< off
+ cnt
- 1; i
++)
4683 new_data
[i
].seen
= true;
4684 env
->insn_aux_data
= new_data
;
4689 static struct bpf_prog
*bpf_patch_insn_data(struct bpf_verifier_env
*env
, u32 off
,
4690 const struct bpf_insn
*patch
, u32 len
)
4692 struct bpf_prog
*new_prog
;
4694 new_prog
= bpf_patch_insn_single(env
->prog
, off
, patch
, len
);
4697 if (adjust_insn_aux_data(env
, new_prog
->len
, off
, len
))
4702 /* The verifier does more data flow analysis than llvm and will not explore
4703 * branches that are dead at run time. Malicious programs can have dead code
4704 * too. Therefore replace all dead at-run-time code with nops.
4706 static void sanitize_dead_code(struct bpf_verifier_env
*env
)
4708 struct bpf_insn_aux_data
*aux_data
= env
->insn_aux_data
;
4709 struct bpf_insn nop
= BPF_MOV64_REG(BPF_REG_0
, BPF_REG_0
);
4710 struct bpf_insn
*insn
= env
->prog
->insnsi
;
4711 const int insn_cnt
= env
->prog
->len
;
4714 for (i
= 0; i
< insn_cnt
; i
++) {
4715 if (aux_data
[i
].seen
)
4717 memcpy(insn
+ i
, &nop
, sizeof(nop
));
4721 /* convert load instructions that access fields of 'struct __sk_buff'
4722 * into sequence of instructions that access fields of 'struct sk_buff'
4724 static int convert_ctx_accesses(struct bpf_verifier_env
*env
)
4726 const struct bpf_verifier_ops
*ops
= env
->ops
;
4727 int i
, cnt
, size
, ctx_field_size
, delta
= 0;
4728 const int insn_cnt
= env
->prog
->len
;
4729 struct bpf_insn insn_buf
[16], *insn
;
4730 struct bpf_prog
*new_prog
;
4731 enum bpf_access_type type
;
4732 bool is_narrower_load
;
4735 if (ops
->gen_prologue
) {
4736 cnt
= ops
->gen_prologue(insn_buf
, env
->seen_direct_write
,
4738 if (cnt
>= ARRAY_SIZE(insn_buf
)) {
4739 verbose(env
, "bpf verifier is misconfigured\n");
4742 new_prog
= bpf_patch_insn_data(env
, 0, insn_buf
, cnt
);
4746 env
->prog
= new_prog
;
4751 if (!ops
->convert_ctx_access
)
4754 insn
= env
->prog
->insnsi
+ delta
;
4756 for (i
= 0; i
< insn_cnt
; i
++, insn
++) {
4757 if (insn
->code
== (BPF_LDX
| BPF_MEM
| BPF_B
) ||
4758 insn
->code
== (BPF_LDX
| BPF_MEM
| BPF_H
) ||
4759 insn
->code
== (BPF_LDX
| BPF_MEM
| BPF_W
) ||
4760 insn
->code
== (BPF_LDX
| BPF_MEM
| BPF_DW
))
4762 else if (insn
->code
== (BPF_STX
| BPF_MEM
| BPF_B
) ||
4763 insn
->code
== (BPF_STX
| BPF_MEM
| BPF_H
) ||
4764 insn
->code
== (BPF_STX
| BPF_MEM
| BPF_W
) ||
4765 insn
->code
== (BPF_STX
| BPF_MEM
| BPF_DW
))
4770 if (type
== BPF_WRITE
&&
4771 env
->insn_aux_data
[i
+ delta
].sanitize_stack_off
) {
4772 struct bpf_insn patch
[] = {
4773 /* Sanitize suspicious stack slot with zero.
4774 * There are no memory dependencies for this store,
4775 * since it's only using frame pointer and immediate
4778 BPF_ST_MEM(BPF_DW
, BPF_REG_FP
,
4779 env
->insn_aux_data
[i
+ delta
].sanitize_stack_off
,
4781 /* the original STX instruction will immediately
4782 * overwrite the same stack slot with appropriate value
4787 cnt
= ARRAY_SIZE(patch
);
4788 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, patch
, cnt
);
4793 env
->prog
= new_prog
;
4794 insn
= new_prog
->insnsi
+ i
+ delta
;
4798 if (env
->insn_aux_data
[i
+ delta
].ptr_type
!= PTR_TO_CTX
)
4801 ctx_field_size
= env
->insn_aux_data
[i
+ delta
].ctx_field_size
;
4802 size
= BPF_LDST_BYTES(insn
);
4804 /* If the read access is a narrower load of the field,
4805 * convert to a 4/8-byte load, to minimum program type specific
4806 * convert_ctx_access changes. If conversion is successful,
4807 * we will apply proper mask to the result.
4809 is_narrower_load
= size
< ctx_field_size
;
4810 if (is_narrower_load
) {
4811 u32 off
= insn
->off
;
4814 if (type
== BPF_WRITE
) {
4815 verbose(env
, "bpf verifier narrow ctx access misconfigured\n");
4820 if (ctx_field_size
== 4)
4822 else if (ctx_field_size
== 8)
4825 insn
->off
= off
& ~(ctx_field_size
- 1);
4826 insn
->code
= BPF_LDX
| BPF_MEM
| size_code
;
4830 cnt
= ops
->convert_ctx_access(type
, insn
, insn_buf
, env
->prog
,
4832 if (cnt
== 0 || cnt
>= ARRAY_SIZE(insn_buf
) ||
4833 (ctx_field_size
&& !target_size
)) {
4834 verbose(env
, "bpf verifier is misconfigured\n");
4838 if (is_narrower_load
&& size
< target_size
) {
4839 if (ctx_field_size
<= 4)
4840 insn_buf
[cnt
++] = BPF_ALU32_IMM(BPF_AND
, insn
->dst_reg
,
4841 (1 << size
* 8) - 1);
4843 insn_buf
[cnt
++] = BPF_ALU64_IMM(BPF_AND
, insn
->dst_reg
,
4844 (1 << size
* 8) - 1);
4847 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, insn_buf
, cnt
);
4853 /* keep walking new program and skip insns we just inserted */
4854 env
->prog
= new_prog
;
4855 insn
= new_prog
->insnsi
+ i
+ delta
;
4861 /* fixup insn->imm field of bpf_call instructions
4862 * and inline eligible helpers as explicit sequence of BPF instructions
4864 * this function is called after eBPF program passed verification
4866 static int fixup_bpf_calls(struct bpf_verifier_env
*env
)
4868 struct bpf_prog
*prog
= env
->prog
;
4869 struct bpf_insn
*insn
= prog
->insnsi
;
4870 const struct bpf_func_proto
*fn
;
4871 const int insn_cnt
= prog
->len
;
4872 struct bpf_insn_aux_data
*aux
;
4873 struct bpf_insn insn_buf
[16];
4874 struct bpf_prog
*new_prog
;
4875 struct bpf_map
*map_ptr
;
4876 int i
, cnt
, delta
= 0;
4878 for (i
= 0; i
< insn_cnt
; i
++, insn
++) {
4879 if (insn
->code
== (BPF_ALU
| BPF_MOD
| BPF_X
) ||
4880 insn
->code
== (BPF_ALU
| BPF_DIV
| BPF_X
)) {
4881 /* due to JIT bugs clear upper 32-bits of src register
4882 * before div/mod operation
4884 insn_buf
[0] = BPF_MOV32_REG(insn
->src_reg
, insn
->src_reg
);
4885 insn_buf
[1] = *insn
;
4887 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, insn_buf
, cnt
);
4892 env
->prog
= prog
= new_prog
;
4893 insn
= new_prog
->insnsi
+ i
+ delta
;
4897 if (insn
->code
== (BPF_ALU64
| BPF_ADD
| BPF_X
) ||
4898 insn
->code
== (BPF_ALU64
| BPF_SUB
| BPF_X
)) {
4899 const u8 code_add
= BPF_ALU64
| BPF_ADD
| BPF_X
;
4900 const u8 code_sub
= BPF_ALU64
| BPF_SUB
| BPF_X
;
4901 struct bpf_insn insn_buf
[16];
4902 struct bpf_insn
*patch
= &insn_buf
[0];
4906 aux
= &env
->insn_aux_data
[i
+ delta
];
4907 if (!aux
->alu_state
||
4908 aux
->alu_state
== BPF_ALU_NON_POINTER
)
4911 isneg
= aux
->alu_state
& BPF_ALU_NEG_VALUE
;
4912 issrc
= (aux
->alu_state
& BPF_ALU_SANITIZE
) ==
4913 BPF_ALU_SANITIZE_SRC
;
4915 off_reg
= issrc
? insn
->src_reg
: insn
->dst_reg
;
4917 *patch
++ = BPF_ALU64_IMM(BPF_MUL
, off_reg
, -1);
4918 *patch
++ = BPF_MOV32_IMM(BPF_REG_AX
, aux
->alu_limit
- 1);
4919 *patch
++ = BPF_ALU64_REG(BPF_SUB
, BPF_REG_AX
, off_reg
);
4920 *patch
++ = BPF_ALU64_REG(BPF_OR
, BPF_REG_AX
, off_reg
);
4921 *patch
++ = BPF_ALU64_IMM(BPF_NEG
, BPF_REG_AX
, 0);
4922 *patch
++ = BPF_ALU64_IMM(BPF_ARSH
, BPF_REG_AX
, 63);
4924 *patch
++ = BPF_ALU64_REG(BPF_AND
, BPF_REG_AX
,
4926 insn
->src_reg
= BPF_REG_AX
;
4928 *patch
++ = BPF_ALU64_REG(BPF_AND
, off_reg
,
4932 insn
->code
= insn
->code
== code_add
?
4933 code_sub
: code_add
;
4936 *patch
++ = BPF_ALU64_IMM(BPF_MUL
, off_reg
, -1);
4937 cnt
= patch
- insn_buf
;
4939 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, insn_buf
, cnt
);
4944 env
->prog
= prog
= new_prog
;
4945 insn
= new_prog
->insnsi
+ i
+ delta
;
4949 if (insn
->code
!= (BPF_JMP
| BPF_CALL
))
4952 if (insn
->imm
== BPF_FUNC_get_route_realm
)
4953 prog
->dst_needed
= 1;
4954 if (insn
->imm
== BPF_FUNC_get_prandom_u32
)
4955 bpf_user_rnd_init_once();
4956 if (insn
->imm
== BPF_FUNC_tail_call
) {
4957 /* If we tail call into other programs, we
4958 * cannot make any assumptions since they can
4959 * be replaced dynamically during runtime in
4960 * the program array.
4962 prog
->cb_access
= 1;
4963 env
->prog
->aux
->stack_depth
= MAX_BPF_STACK
;
4965 /* mark bpf_tail_call as different opcode to avoid
4966 * conditional branch in the interpeter for every normal
4967 * call and to prevent accidental JITing by JIT compiler
4968 * that doesn't support bpf_tail_call yet
4971 insn
->code
= BPF_JMP
| BPF_TAIL_CALL
;
4973 aux
= &env
->insn_aux_data
[i
+ delta
];
4974 if (!bpf_map_ptr_unpriv(aux
))
4977 /* instead of changing every JIT dealing with tail_call
4978 * emit two extra insns:
4979 * if (index >= max_entries) goto out;
4980 * index &= array->index_mask;
4981 * to avoid out-of-bounds cpu speculation
4983 if (bpf_map_ptr_poisoned(aux
)) {
4984 verbose(env
, "tail_call abusing map_ptr\n");
4988 map_ptr
= BPF_MAP_PTR(aux
->map_state
);
4989 insn_buf
[0] = BPF_JMP_IMM(BPF_JGE
, BPF_REG_3
,
4990 map_ptr
->max_entries
, 2);
4991 insn_buf
[1] = BPF_ALU32_IMM(BPF_AND
, BPF_REG_3
,
4992 container_of(map_ptr
,
4995 insn_buf
[2] = *insn
;
4997 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, insn_buf
, cnt
);
5002 env
->prog
= prog
= new_prog
;
5003 insn
= new_prog
->insnsi
+ i
+ delta
;
5007 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
5008 * handlers are currently limited to 64 bit only.
5010 if (ebpf_jit_enabled() && BITS_PER_LONG
== 64 &&
5011 insn
->imm
== BPF_FUNC_map_lookup_elem
) {
5012 aux
= &env
->insn_aux_data
[i
+ delta
];
5013 if (bpf_map_ptr_poisoned(aux
))
5014 goto patch_call_imm
;
5016 map_ptr
= BPF_MAP_PTR(aux
->map_state
);
5017 if (!map_ptr
->ops
->map_gen_lookup
)
5018 goto patch_call_imm
;
5020 cnt
= map_ptr
->ops
->map_gen_lookup(map_ptr
, insn_buf
);
5021 if (cnt
== 0 || cnt
>= ARRAY_SIZE(insn_buf
)) {
5022 verbose(env
, "bpf verifier is misconfigured\n");
5026 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, insn_buf
,
5033 /* keep walking new program and skip insns we just inserted */
5034 env
->prog
= prog
= new_prog
;
5035 insn
= new_prog
->insnsi
+ i
+ delta
;
5039 if (insn
->imm
== BPF_FUNC_redirect_map
) {
5040 /* Note, we cannot use prog directly as imm as subsequent
5041 * rewrites would still change the prog pointer. The only
5042 * stable address we can use is aux, which also works with
5043 * prog clones during blinding.
5045 u64 addr
= (unsigned long)prog
->aux
;
5046 struct bpf_insn r4_ld
[] = {
5047 BPF_LD_IMM64(BPF_REG_4
, addr
),
5050 cnt
= ARRAY_SIZE(r4_ld
);
5052 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, r4_ld
, cnt
);
5057 env
->prog
= prog
= new_prog
;
5058 insn
= new_prog
->insnsi
+ i
+ delta
;
5061 fn
= env
->ops
->get_func_proto(insn
->imm
);
5062 /* all functions that have prototype and verifier allowed
5063 * programs to call them, must be real in-kernel functions
5067 "kernel subsystem misconfigured func %s#%d\n",
5068 func_id_name(insn
->imm
), insn
->imm
);
5071 insn
->imm
= fn
->func
- __bpf_call_base
;
5077 static void free_states(struct bpf_verifier_env
*env
)
5079 struct bpf_verifier_state_list
*sl
, *sln
;
5082 if (!env
->explored_states
)
5085 for (i
= 0; i
< env
->prog
->len
; i
++) {
5086 sl
= env
->explored_states
[i
];
5089 while (sl
!= STATE_LIST_MARK
) {
5091 free_verifier_state(&sl
->state
, false);
5097 kfree(env
->explored_states
);
5100 int bpf_check(struct bpf_prog
**prog
, union bpf_attr
*attr
)
5102 struct bpf_verifier_env
*env
;
5103 struct bpf_verifer_log
*log
;
5106 /* no program is valid */
5107 if (ARRAY_SIZE(bpf_verifier_ops
) == 0)
5110 /* 'struct bpf_verifier_env' can be global, but since it's not small,
5111 * allocate/free it every time bpf_check() is called
5113 env
= kzalloc(sizeof(struct bpf_verifier_env
), GFP_KERNEL
);
5118 env
->insn_aux_data
= vzalloc(sizeof(struct bpf_insn_aux_data
) *
5121 if (!env
->insn_aux_data
)
5124 env
->ops
= bpf_verifier_ops
[env
->prog
->type
];
5126 /* grab the mutex to protect few globals used by verifier */
5127 mutex_lock(&bpf_verifier_lock
);
5129 if (attr
->log_level
|| attr
->log_buf
|| attr
->log_size
) {
5130 /* user requested verbose verifier output
5131 * and supplied buffer to store the verification trace
5133 log
->level
= attr
->log_level
;
5134 log
->ubuf
= (char __user
*) (unsigned long) attr
->log_buf
;
5135 log
->len_total
= attr
->log_size
;
5138 /* log attributes have to be sane */
5139 if (log
->len_total
< 128 || log
->len_total
> UINT_MAX
>> 8 ||
5140 !log
->level
|| !log
->ubuf
)
5144 env
->strict_alignment
= !!(attr
->prog_flags
& BPF_F_STRICT_ALIGNMENT
);
5145 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
))
5146 env
->strict_alignment
= true;
5148 if (env
->prog
->aux
->offload
) {
5149 ret
= bpf_prog_offload_verifier_prep(env
);
5154 ret
= replace_map_fd_with_map_ptr(env
);
5156 goto skip_full_check
;
5158 env
->explored_states
= kcalloc(env
->prog
->len
,
5159 sizeof(struct bpf_verifier_state_list
*),
5162 if (!env
->explored_states
)
5163 goto skip_full_check
;
5165 ret
= check_cfg(env
);
5167 goto skip_full_check
;
5169 env
->allow_ptr_leaks
= capable(CAP_SYS_ADMIN
);
5171 ret
= do_check(env
);
5172 if (env
->cur_state
) {
5173 free_verifier_state(env
->cur_state
, true);
5174 env
->cur_state
= NULL
;
5178 while (!pop_stack(env
, NULL
, NULL
));
5182 sanitize_dead_code(env
);
5185 /* program is valid, convert *(u32*)(ctx + off) accesses */
5186 ret
= convert_ctx_accesses(env
);
5189 ret
= fixup_bpf_calls(env
);
5191 if (log
->level
&& bpf_verifier_log_full(log
))
5193 if (log
->level
&& !log
->ubuf
) {
5195 goto err_release_maps
;
5198 if (ret
== 0 && env
->used_map_cnt
) {
5199 /* if program passed verifier, update used_maps in bpf_prog_info */
5200 env
->prog
->aux
->used_maps
= kmalloc_array(env
->used_map_cnt
,
5201 sizeof(env
->used_maps
[0]),
5204 if (!env
->prog
->aux
->used_maps
) {
5206 goto err_release_maps
;
5209 memcpy(env
->prog
->aux
->used_maps
, env
->used_maps
,
5210 sizeof(env
->used_maps
[0]) * env
->used_map_cnt
);
5211 env
->prog
->aux
->used_map_cnt
= env
->used_map_cnt
;
5213 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
5214 * bpf_ld_imm64 instructions
5216 convert_pseudo_ld_imm64(env
);
5220 if (!env
->prog
->aux
->used_maps
)
5221 /* if we didn't copy map pointers into bpf_prog_info, release
5222 * them now. Otherwise free_used_maps() will release them.
5227 mutex_unlock(&bpf_verifier_lock
);
5228 vfree(env
->insn_aux_data
);