]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame_incremental - kernel/bpf/verifier.c
bpf: Try harder when allocating memory for large maps
[mirror_ubuntu-hirsute-kernel.git] / kernel / bpf / verifier.c
... / ...
CommitLineData
1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 * Copyright (c) 2016 Facebook
3 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 */
14#include <uapi/linux/btf.h>
15#include <linux/kernel.h>
16#include <linux/types.h>
17#include <linux/slab.h>
18#include <linux/bpf.h>
19#include <linux/btf.h>
20#include <linux/bpf_verifier.h>
21#include <linux/filter.h>
22#include <net/netlink.h>
23#include <linux/file.h>
24#include <linux/vmalloc.h>
25#include <linux/stringify.h>
26#include <linux/bsearch.h>
27#include <linux/sort.h>
28#include <linux/perf_event.h>
29#include <linux/ctype.h>
30
31#include "disasm.h"
32
33static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
34#define BPF_PROG_TYPE(_id, _name) \
35 [_id] = & _name ## _verifier_ops,
36#define BPF_MAP_TYPE(_id, _ops)
37#include <linux/bpf_types.h>
38#undef BPF_PROG_TYPE
39#undef BPF_MAP_TYPE
40};
41
42/* bpf_check() is a static code analyzer that walks eBPF program
43 * instruction by instruction and updates register/stack state.
44 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
45 *
46 * The first pass is depth-first-search to check that the program is a DAG.
47 * It rejects the following programs:
48 * - larger than BPF_MAXINSNS insns
49 * - if loop is present (detected via back-edge)
50 * - unreachable insns exist (shouldn't be a forest. program = one function)
51 * - out of bounds or malformed jumps
52 * The second pass is all possible path descent from the 1st insn.
53 * Since it's analyzing all pathes through the program, the length of the
54 * analysis is limited to 64k insn, which may be hit even if total number of
55 * insn is less then 4K, but there are too many branches that change stack/regs.
56 * Number of 'branches to be analyzed' is limited to 1k
57 *
58 * On entry to each instruction, each register has a type, and the instruction
59 * changes the types of the registers depending on instruction semantics.
60 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
61 * copied to R1.
62 *
63 * All registers are 64-bit.
64 * R0 - return register
65 * R1-R5 argument passing registers
66 * R6-R9 callee saved registers
67 * R10 - frame pointer read-only
68 *
69 * At the start of BPF program the register R1 contains a pointer to bpf_context
70 * and has type PTR_TO_CTX.
71 *
72 * Verifier tracks arithmetic operations on pointers in case:
73 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
74 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
75 * 1st insn copies R10 (which has FRAME_PTR) type into R1
76 * and 2nd arithmetic instruction is pattern matched to recognize
77 * that it wants to construct a pointer to some element within stack.
78 * So after 2nd insn, the register R1 has type PTR_TO_STACK
79 * (and -20 constant is saved for further stack bounds checking).
80 * Meaning that this reg is a pointer to stack plus known immediate constant.
81 *
82 * Most of the time the registers have SCALAR_VALUE type, which
83 * means the register has some value, but it's not a valid pointer.
84 * (like pointer plus pointer becomes SCALAR_VALUE type)
85 *
86 * When verifier sees load or store instructions the type of base register
87 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
88 * four pointer types recognized by check_mem_access() function.
89 *
90 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
91 * and the range of [ptr, ptr + map's value_size) is accessible.
92 *
93 * registers used to pass values to function calls are checked against
94 * function argument constraints.
95 *
96 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
97 * It means that the register type passed to this function must be
98 * PTR_TO_STACK and it will be used inside the function as
99 * 'pointer to map element key'
100 *
101 * For example the argument constraints for bpf_map_lookup_elem():
102 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
103 * .arg1_type = ARG_CONST_MAP_PTR,
104 * .arg2_type = ARG_PTR_TO_MAP_KEY,
105 *
106 * ret_type says that this function returns 'pointer to map elem value or null'
107 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
108 * 2nd argument should be a pointer to stack, which will be used inside
109 * the helper function as a pointer to map element key.
110 *
111 * On the kernel side the helper function looks like:
112 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
113 * {
114 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
115 * void *key = (void *) (unsigned long) r2;
116 * void *value;
117 *
118 * here kernel can access 'key' and 'map' pointers safely, knowing that
119 * [key, key + map->key_size) bytes are valid and were initialized on
120 * the stack of eBPF program.
121 * }
122 *
123 * Corresponding eBPF program may look like:
124 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
125 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
126 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
127 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
128 * here verifier looks at prototype of map_lookup_elem() and sees:
129 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
130 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
131 *
132 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
133 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
134 * and were initialized prior to this call.
135 * If it's ok, then verifier allows this BPF_CALL insn and looks at
136 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
137 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
138 * returns ether pointer to map value or NULL.
139 *
140 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
141 * insn, the register holding that pointer in the true branch changes state to
142 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
143 * branch. See check_cond_jmp_op().
144 *
145 * After the call R0 is set to return type of the function and registers R1-R5
146 * are set to NOT_INIT to indicate that they are no longer readable.
147 *
148 * The following reference types represent a potential reference to a kernel
149 * resource which, after first being allocated, must be checked and freed by
150 * the BPF program:
151 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
152 *
153 * When the verifier sees a helper call return a reference type, it allocates a
154 * pointer id for the reference and stores it in the current function state.
155 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
156 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
157 * passes through a NULL-check conditional. For the branch wherein the state is
158 * changed to CONST_IMM, the verifier releases the reference.
159 *
160 * For each helper function that allocates a reference, such as
161 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
162 * bpf_sk_release(). When a reference type passes into the release function,
163 * the verifier also releases the reference. If any unchecked or unreleased
164 * reference remains at the end of the program, the verifier rejects it.
165 */
166
167/* verifier_state + insn_idx are pushed to stack when branch is encountered */
168struct bpf_verifier_stack_elem {
169 /* verifer state is 'st'
170 * before processing instruction 'insn_idx'
171 * and after processing instruction 'prev_insn_idx'
172 */
173 struct bpf_verifier_state st;
174 int insn_idx;
175 int prev_insn_idx;
176 struct bpf_verifier_stack_elem *next;
177};
178
179#define BPF_COMPLEXITY_LIMIT_INSNS 131072
180#define BPF_COMPLEXITY_LIMIT_STACK 1024
181#define BPF_COMPLEXITY_LIMIT_STATES 64
182
183#define BPF_MAP_PTR_UNPRIV 1UL
184#define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \
185 POISON_POINTER_DELTA))
186#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
187
188static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
189{
190 return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON;
191}
192
193static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
194{
195 return aux->map_state & BPF_MAP_PTR_UNPRIV;
196}
197
198static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
199 const struct bpf_map *map, bool unpriv)
200{
201 BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
202 unpriv |= bpf_map_ptr_unpriv(aux);
203 aux->map_state = (unsigned long)map |
204 (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
205}
206
207struct bpf_call_arg_meta {
208 struct bpf_map *map_ptr;
209 bool raw_mode;
210 bool pkt_access;
211 int regno;
212 int access_size;
213 s64 msize_smax_value;
214 u64 msize_umax_value;
215 int ref_obj_id;
216 int func_id;
217};
218
219static DEFINE_MUTEX(bpf_verifier_lock);
220
221static const struct bpf_line_info *
222find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
223{
224 const struct bpf_line_info *linfo;
225 const struct bpf_prog *prog;
226 u32 i, nr_linfo;
227
228 prog = env->prog;
229 nr_linfo = prog->aux->nr_linfo;
230
231 if (!nr_linfo || insn_off >= prog->len)
232 return NULL;
233
234 linfo = prog->aux->linfo;
235 for (i = 1; i < nr_linfo; i++)
236 if (insn_off < linfo[i].insn_off)
237 break;
238
239 return &linfo[i - 1];
240}
241
242void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
243 va_list args)
244{
245 unsigned int n;
246
247 n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
248
249 WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
250 "verifier log line truncated - local buffer too short\n");
251
252 n = min(log->len_total - log->len_used - 1, n);
253 log->kbuf[n] = '\0';
254
255 if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
256 log->len_used += n;
257 else
258 log->ubuf = NULL;
259}
260
261/* log_level controls verbosity level of eBPF verifier.
262 * bpf_verifier_log_write() is used to dump the verification trace to the log,
263 * so the user can figure out what's wrong with the program
264 */
265__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
266 const char *fmt, ...)
267{
268 va_list args;
269
270 if (!bpf_verifier_log_needed(&env->log))
271 return;
272
273 va_start(args, fmt);
274 bpf_verifier_vlog(&env->log, fmt, args);
275 va_end(args);
276}
277EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
278
279__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
280{
281 struct bpf_verifier_env *env = private_data;
282 va_list args;
283
284 if (!bpf_verifier_log_needed(&env->log))
285 return;
286
287 va_start(args, fmt);
288 bpf_verifier_vlog(&env->log, fmt, args);
289 va_end(args);
290}
291
292static const char *ltrim(const char *s)
293{
294 while (isspace(*s))
295 s++;
296
297 return s;
298}
299
300__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
301 u32 insn_off,
302 const char *prefix_fmt, ...)
303{
304 const struct bpf_line_info *linfo;
305
306 if (!bpf_verifier_log_needed(&env->log))
307 return;
308
309 linfo = find_linfo(env, insn_off);
310 if (!linfo || linfo == env->prev_linfo)
311 return;
312
313 if (prefix_fmt) {
314 va_list args;
315
316 va_start(args, prefix_fmt);
317 bpf_verifier_vlog(&env->log, prefix_fmt, args);
318 va_end(args);
319 }
320
321 verbose(env, "%s\n",
322 ltrim(btf_name_by_offset(env->prog->aux->btf,
323 linfo->line_off)));
324
325 env->prev_linfo = linfo;
326}
327
328static bool type_is_pkt_pointer(enum bpf_reg_type type)
329{
330 return type == PTR_TO_PACKET ||
331 type == PTR_TO_PACKET_META;
332}
333
334static bool type_is_sk_pointer(enum bpf_reg_type type)
335{
336 return type == PTR_TO_SOCKET ||
337 type == PTR_TO_SOCK_COMMON ||
338 type == PTR_TO_TCP_SOCK;
339}
340
341static bool reg_type_may_be_null(enum bpf_reg_type type)
342{
343 return type == PTR_TO_MAP_VALUE_OR_NULL ||
344 type == PTR_TO_SOCKET_OR_NULL ||
345 type == PTR_TO_SOCK_COMMON_OR_NULL ||
346 type == PTR_TO_TCP_SOCK_OR_NULL;
347}
348
349static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
350{
351 return reg->type == PTR_TO_MAP_VALUE &&
352 map_value_has_spin_lock(reg->map_ptr);
353}
354
355static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
356{
357 return type == ARG_PTR_TO_SOCK_COMMON;
358}
359
360/* Determine whether the function releases some resources allocated by another
361 * function call. The first reference type argument will be assumed to be
362 * released by release_reference().
363 */
364static bool is_release_function(enum bpf_func_id func_id)
365{
366 return func_id == BPF_FUNC_sk_release;
367}
368
369static bool is_acquire_function(enum bpf_func_id func_id)
370{
371 return func_id == BPF_FUNC_sk_lookup_tcp ||
372 func_id == BPF_FUNC_sk_lookup_udp;
373}
374
375static bool is_ptr_cast_function(enum bpf_func_id func_id)
376{
377 return func_id == BPF_FUNC_tcp_sock ||
378 func_id == BPF_FUNC_sk_fullsock;
379}
380
381/* string representation of 'enum bpf_reg_type' */
382static const char * const reg_type_str[] = {
383 [NOT_INIT] = "?",
384 [SCALAR_VALUE] = "inv",
385 [PTR_TO_CTX] = "ctx",
386 [CONST_PTR_TO_MAP] = "map_ptr",
387 [PTR_TO_MAP_VALUE] = "map_value",
388 [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
389 [PTR_TO_STACK] = "fp",
390 [PTR_TO_PACKET] = "pkt",
391 [PTR_TO_PACKET_META] = "pkt_meta",
392 [PTR_TO_PACKET_END] = "pkt_end",
393 [PTR_TO_FLOW_KEYS] = "flow_keys",
394 [PTR_TO_SOCKET] = "sock",
395 [PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
396 [PTR_TO_SOCK_COMMON] = "sock_common",
397 [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
398 [PTR_TO_TCP_SOCK] = "tcp_sock",
399 [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
400};
401
402static char slot_type_char[] = {
403 [STACK_INVALID] = '?',
404 [STACK_SPILL] = 'r',
405 [STACK_MISC] = 'm',
406 [STACK_ZERO] = '0',
407};
408
409static void print_liveness(struct bpf_verifier_env *env,
410 enum bpf_reg_liveness live)
411{
412 if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
413 verbose(env, "_");
414 if (live & REG_LIVE_READ)
415 verbose(env, "r");
416 if (live & REG_LIVE_WRITTEN)
417 verbose(env, "w");
418 if (live & REG_LIVE_DONE)
419 verbose(env, "D");
420}
421
422static struct bpf_func_state *func(struct bpf_verifier_env *env,
423 const struct bpf_reg_state *reg)
424{
425 struct bpf_verifier_state *cur = env->cur_state;
426
427 return cur->frame[reg->frameno];
428}
429
430static void print_verifier_state(struct bpf_verifier_env *env,
431 const struct bpf_func_state *state)
432{
433 const struct bpf_reg_state *reg;
434 enum bpf_reg_type t;
435 int i;
436
437 if (state->frameno)
438 verbose(env, " frame%d:", state->frameno);
439 for (i = 0; i < MAX_BPF_REG; i++) {
440 reg = &state->regs[i];
441 t = reg->type;
442 if (t == NOT_INIT)
443 continue;
444 verbose(env, " R%d", i);
445 print_liveness(env, reg->live);
446 verbose(env, "=%s", reg_type_str[t]);
447 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
448 tnum_is_const(reg->var_off)) {
449 /* reg->off should be 0 for SCALAR_VALUE */
450 verbose(env, "%lld", reg->var_off.value + reg->off);
451 if (t == PTR_TO_STACK)
452 verbose(env, ",call_%d", func(env, reg)->callsite);
453 } else {
454 verbose(env, "(id=%d ref_obj_id=%d", reg->id,
455 reg->ref_obj_id);
456 if (t != SCALAR_VALUE)
457 verbose(env, ",off=%d", reg->off);
458 if (type_is_pkt_pointer(t))
459 verbose(env, ",r=%d", reg->range);
460 else if (t == CONST_PTR_TO_MAP ||
461 t == PTR_TO_MAP_VALUE ||
462 t == PTR_TO_MAP_VALUE_OR_NULL)
463 verbose(env, ",ks=%d,vs=%d",
464 reg->map_ptr->key_size,
465 reg->map_ptr->value_size);
466 if (tnum_is_const(reg->var_off)) {
467 /* Typically an immediate SCALAR_VALUE, but
468 * could be a pointer whose offset is too big
469 * for reg->off
470 */
471 verbose(env, ",imm=%llx", reg->var_off.value);
472 } else {
473 if (reg->smin_value != reg->umin_value &&
474 reg->smin_value != S64_MIN)
475 verbose(env, ",smin_value=%lld",
476 (long long)reg->smin_value);
477 if (reg->smax_value != reg->umax_value &&
478 reg->smax_value != S64_MAX)
479 verbose(env, ",smax_value=%lld",
480 (long long)reg->smax_value);
481 if (reg->umin_value != 0)
482 verbose(env, ",umin_value=%llu",
483 (unsigned long long)reg->umin_value);
484 if (reg->umax_value != U64_MAX)
485 verbose(env, ",umax_value=%llu",
486 (unsigned long long)reg->umax_value);
487 if (!tnum_is_unknown(reg->var_off)) {
488 char tn_buf[48];
489
490 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
491 verbose(env, ",var_off=%s", tn_buf);
492 }
493 }
494 verbose(env, ")");
495 }
496 }
497 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
498 char types_buf[BPF_REG_SIZE + 1];
499 bool valid = false;
500 int j;
501
502 for (j = 0; j < BPF_REG_SIZE; j++) {
503 if (state->stack[i].slot_type[j] != STACK_INVALID)
504 valid = true;
505 types_buf[j] = slot_type_char[
506 state->stack[i].slot_type[j]];
507 }
508 types_buf[BPF_REG_SIZE] = 0;
509 if (!valid)
510 continue;
511 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
512 print_liveness(env, state->stack[i].spilled_ptr.live);
513 if (state->stack[i].slot_type[0] == STACK_SPILL)
514 verbose(env, "=%s",
515 reg_type_str[state->stack[i].spilled_ptr.type]);
516 else
517 verbose(env, "=%s", types_buf);
518 }
519 if (state->acquired_refs && state->refs[0].id) {
520 verbose(env, " refs=%d", state->refs[0].id);
521 for (i = 1; i < state->acquired_refs; i++)
522 if (state->refs[i].id)
523 verbose(env, ",%d", state->refs[i].id);
524 }
525 verbose(env, "\n");
526}
527
528#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE) \
529static int copy_##NAME##_state(struct bpf_func_state *dst, \
530 const struct bpf_func_state *src) \
531{ \
532 if (!src->FIELD) \
533 return 0; \
534 if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) { \
535 /* internal bug, make state invalid to reject the program */ \
536 memset(dst, 0, sizeof(*dst)); \
537 return -EFAULT; \
538 } \
539 memcpy(dst->FIELD, src->FIELD, \
540 sizeof(*src->FIELD) * (src->COUNT / SIZE)); \
541 return 0; \
542}
543/* copy_reference_state() */
544COPY_STATE_FN(reference, acquired_refs, refs, 1)
545/* copy_stack_state() */
546COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
547#undef COPY_STATE_FN
548
549#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE) \
550static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
551 bool copy_old) \
552{ \
553 u32 old_size = state->COUNT; \
554 struct bpf_##NAME##_state *new_##FIELD; \
555 int slot = size / SIZE; \
556 \
557 if (size <= old_size || !size) { \
558 if (copy_old) \
559 return 0; \
560 state->COUNT = slot * SIZE; \
561 if (!size && old_size) { \
562 kfree(state->FIELD); \
563 state->FIELD = NULL; \
564 } \
565 return 0; \
566 } \
567 new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
568 GFP_KERNEL); \
569 if (!new_##FIELD) \
570 return -ENOMEM; \
571 if (copy_old) { \
572 if (state->FIELD) \
573 memcpy(new_##FIELD, state->FIELD, \
574 sizeof(*new_##FIELD) * (old_size / SIZE)); \
575 memset(new_##FIELD + old_size / SIZE, 0, \
576 sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
577 } \
578 state->COUNT = slot * SIZE; \
579 kfree(state->FIELD); \
580 state->FIELD = new_##FIELD; \
581 return 0; \
582}
583/* realloc_reference_state() */
584REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
585/* realloc_stack_state() */
586REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
587#undef REALLOC_STATE_FN
588
589/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
590 * make it consume minimal amount of memory. check_stack_write() access from
591 * the program calls into realloc_func_state() to grow the stack size.
592 * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
593 * which realloc_stack_state() copies over. It points to previous
594 * bpf_verifier_state which is never reallocated.
595 */
596static int realloc_func_state(struct bpf_func_state *state, int stack_size,
597 int refs_size, bool copy_old)
598{
599 int err = realloc_reference_state(state, refs_size, copy_old);
600 if (err)
601 return err;
602 return realloc_stack_state(state, stack_size, copy_old);
603}
604
605/* Acquire a pointer id from the env and update the state->refs to include
606 * this new pointer reference.
607 * On success, returns a valid pointer id to associate with the register
608 * On failure, returns a negative errno.
609 */
610static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
611{
612 struct bpf_func_state *state = cur_func(env);
613 int new_ofs = state->acquired_refs;
614 int id, err;
615
616 err = realloc_reference_state(state, state->acquired_refs + 1, true);
617 if (err)
618 return err;
619 id = ++env->id_gen;
620 state->refs[new_ofs].id = id;
621 state->refs[new_ofs].insn_idx = insn_idx;
622
623 return id;
624}
625
626/* release function corresponding to acquire_reference_state(). Idempotent. */
627static int release_reference_state(struct bpf_func_state *state, int ptr_id)
628{
629 int i, last_idx;
630
631 last_idx = state->acquired_refs - 1;
632 for (i = 0; i < state->acquired_refs; i++) {
633 if (state->refs[i].id == ptr_id) {
634 if (last_idx && i != last_idx)
635 memcpy(&state->refs[i], &state->refs[last_idx],
636 sizeof(*state->refs));
637 memset(&state->refs[last_idx], 0, sizeof(*state->refs));
638 state->acquired_refs--;
639 return 0;
640 }
641 }
642 return -EINVAL;
643}
644
645static int transfer_reference_state(struct bpf_func_state *dst,
646 struct bpf_func_state *src)
647{
648 int err = realloc_reference_state(dst, src->acquired_refs, false);
649 if (err)
650 return err;
651 err = copy_reference_state(dst, src);
652 if (err)
653 return err;
654 return 0;
655}
656
657static void free_func_state(struct bpf_func_state *state)
658{
659 if (!state)
660 return;
661 kfree(state->refs);
662 kfree(state->stack);
663 kfree(state);
664}
665
666static void free_verifier_state(struct bpf_verifier_state *state,
667 bool free_self)
668{
669 int i;
670
671 for (i = 0; i <= state->curframe; i++) {
672 free_func_state(state->frame[i]);
673 state->frame[i] = NULL;
674 }
675 if (free_self)
676 kfree(state);
677}
678
679/* copy verifier state from src to dst growing dst stack space
680 * when necessary to accommodate larger src stack
681 */
682static int copy_func_state(struct bpf_func_state *dst,
683 const struct bpf_func_state *src)
684{
685 int err;
686
687 err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
688 false);
689 if (err)
690 return err;
691 memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
692 err = copy_reference_state(dst, src);
693 if (err)
694 return err;
695 return copy_stack_state(dst, src);
696}
697
698static int copy_verifier_state(struct bpf_verifier_state *dst_state,
699 const struct bpf_verifier_state *src)
700{
701 struct bpf_func_state *dst;
702 int i, err;
703
704 /* if dst has more stack frames then src frame, free them */
705 for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
706 free_func_state(dst_state->frame[i]);
707 dst_state->frame[i] = NULL;
708 }
709 dst_state->speculative = src->speculative;
710 dst_state->curframe = src->curframe;
711 dst_state->active_spin_lock = src->active_spin_lock;
712 for (i = 0; i <= src->curframe; i++) {
713 dst = dst_state->frame[i];
714 if (!dst) {
715 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
716 if (!dst)
717 return -ENOMEM;
718 dst_state->frame[i] = dst;
719 }
720 err = copy_func_state(dst, src->frame[i]);
721 if (err)
722 return err;
723 }
724 return 0;
725}
726
727static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
728 int *insn_idx)
729{
730 struct bpf_verifier_state *cur = env->cur_state;
731 struct bpf_verifier_stack_elem *elem, *head = env->head;
732 int err;
733
734 if (env->head == NULL)
735 return -ENOENT;
736
737 if (cur) {
738 err = copy_verifier_state(cur, &head->st);
739 if (err)
740 return err;
741 }
742 if (insn_idx)
743 *insn_idx = head->insn_idx;
744 if (prev_insn_idx)
745 *prev_insn_idx = head->prev_insn_idx;
746 elem = head->next;
747 free_verifier_state(&head->st, false);
748 kfree(head);
749 env->head = elem;
750 env->stack_size--;
751 return 0;
752}
753
754static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
755 int insn_idx, int prev_insn_idx,
756 bool speculative)
757{
758 struct bpf_verifier_state *cur = env->cur_state;
759 struct bpf_verifier_stack_elem *elem;
760 int err;
761
762 elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
763 if (!elem)
764 goto err;
765
766 elem->insn_idx = insn_idx;
767 elem->prev_insn_idx = prev_insn_idx;
768 elem->next = env->head;
769 env->head = elem;
770 env->stack_size++;
771 err = copy_verifier_state(&elem->st, cur);
772 if (err)
773 goto err;
774 elem->st.speculative |= speculative;
775 if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
776 verbose(env, "BPF program is too complex\n");
777 goto err;
778 }
779 return &elem->st;
780err:
781 free_verifier_state(env->cur_state, true);
782 env->cur_state = NULL;
783 /* pop all elements and return */
784 while (!pop_stack(env, NULL, NULL));
785 return NULL;
786}
787
788#define CALLER_SAVED_REGS 6
789static const int caller_saved[CALLER_SAVED_REGS] = {
790 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
791};
792
793static void __mark_reg_not_init(struct bpf_reg_state *reg);
794
795/* Mark the unknown part of a register (variable offset or scalar value) as
796 * known to have the value @imm.
797 */
798static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
799{
800 /* Clear id, off, and union(map_ptr, range) */
801 memset(((u8 *)reg) + sizeof(reg->type), 0,
802 offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
803 reg->var_off = tnum_const(imm);
804 reg->smin_value = (s64)imm;
805 reg->smax_value = (s64)imm;
806 reg->umin_value = imm;
807 reg->umax_value = imm;
808}
809
810/* Mark the 'variable offset' part of a register as zero. This should be
811 * used only on registers holding a pointer type.
812 */
813static void __mark_reg_known_zero(struct bpf_reg_state *reg)
814{
815 __mark_reg_known(reg, 0);
816}
817
818static void __mark_reg_const_zero(struct bpf_reg_state *reg)
819{
820 __mark_reg_known(reg, 0);
821 reg->type = SCALAR_VALUE;
822}
823
824static void mark_reg_known_zero(struct bpf_verifier_env *env,
825 struct bpf_reg_state *regs, u32 regno)
826{
827 if (WARN_ON(regno >= MAX_BPF_REG)) {
828 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
829 /* Something bad happened, let's kill all regs */
830 for (regno = 0; regno < MAX_BPF_REG; regno++)
831 __mark_reg_not_init(regs + regno);
832 return;
833 }
834 __mark_reg_known_zero(regs + regno);
835}
836
837static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
838{
839 return type_is_pkt_pointer(reg->type);
840}
841
842static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
843{
844 return reg_is_pkt_pointer(reg) ||
845 reg->type == PTR_TO_PACKET_END;
846}
847
848/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
849static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
850 enum bpf_reg_type which)
851{
852 /* The register can already have a range from prior markings.
853 * This is fine as long as it hasn't been advanced from its
854 * origin.
855 */
856 return reg->type == which &&
857 reg->id == 0 &&
858 reg->off == 0 &&
859 tnum_equals_const(reg->var_off, 0);
860}
861
862/* Attempts to improve min/max values based on var_off information */
863static void __update_reg_bounds(struct bpf_reg_state *reg)
864{
865 /* min signed is max(sign bit) | min(other bits) */
866 reg->smin_value = max_t(s64, reg->smin_value,
867 reg->var_off.value | (reg->var_off.mask & S64_MIN));
868 /* max signed is min(sign bit) | max(other bits) */
869 reg->smax_value = min_t(s64, reg->smax_value,
870 reg->var_off.value | (reg->var_off.mask & S64_MAX));
871 reg->umin_value = max(reg->umin_value, reg->var_off.value);
872 reg->umax_value = min(reg->umax_value,
873 reg->var_off.value | reg->var_off.mask);
874}
875
876/* Uses signed min/max values to inform unsigned, and vice-versa */
877static void __reg_deduce_bounds(struct bpf_reg_state *reg)
878{
879 /* Learn sign from signed bounds.
880 * If we cannot cross the sign boundary, then signed and unsigned bounds
881 * are the same, so combine. This works even in the negative case, e.g.
882 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
883 */
884 if (reg->smin_value >= 0 || reg->smax_value < 0) {
885 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
886 reg->umin_value);
887 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
888 reg->umax_value);
889 return;
890 }
891 /* Learn sign from unsigned bounds. Signed bounds cross the sign
892 * boundary, so we must be careful.
893 */
894 if ((s64)reg->umax_value >= 0) {
895 /* Positive. We can't learn anything from the smin, but smax
896 * is positive, hence safe.
897 */
898 reg->smin_value = reg->umin_value;
899 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
900 reg->umax_value);
901 } else if ((s64)reg->umin_value < 0) {
902 /* Negative. We can't learn anything from the smax, but smin
903 * is negative, hence safe.
904 */
905 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
906 reg->umin_value);
907 reg->smax_value = reg->umax_value;
908 }
909}
910
911/* Attempts to improve var_off based on unsigned min/max information */
912static void __reg_bound_offset(struct bpf_reg_state *reg)
913{
914 reg->var_off = tnum_intersect(reg->var_off,
915 tnum_range(reg->umin_value,
916 reg->umax_value));
917}
918
919/* Reset the min/max bounds of a register */
920static void __mark_reg_unbounded(struct bpf_reg_state *reg)
921{
922 reg->smin_value = S64_MIN;
923 reg->smax_value = S64_MAX;
924 reg->umin_value = 0;
925 reg->umax_value = U64_MAX;
926}
927
928/* Mark a register as having a completely unknown (scalar) value. */
929static void __mark_reg_unknown(struct bpf_reg_state *reg)
930{
931 /*
932 * Clear type, id, off, and union(map_ptr, range) and
933 * padding between 'type' and union
934 */
935 memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
936 reg->type = SCALAR_VALUE;
937 reg->var_off = tnum_unknown;
938 reg->frameno = 0;
939 __mark_reg_unbounded(reg);
940}
941
942static void mark_reg_unknown(struct bpf_verifier_env *env,
943 struct bpf_reg_state *regs, u32 regno)
944{
945 if (WARN_ON(regno >= MAX_BPF_REG)) {
946 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
947 /* Something bad happened, let's kill all regs except FP */
948 for (regno = 0; regno < BPF_REG_FP; regno++)
949 __mark_reg_not_init(regs + regno);
950 return;
951 }
952 __mark_reg_unknown(regs + regno);
953}
954
955static void __mark_reg_not_init(struct bpf_reg_state *reg)
956{
957 __mark_reg_unknown(reg);
958 reg->type = NOT_INIT;
959}
960
961static void mark_reg_not_init(struct bpf_verifier_env *env,
962 struct bpf_reg_state *regs, u32 regno)
963{
964 if (WARN_ON(regno >= MAX_BPF_REG)) {
965 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
966 /* Something bad happened, let's kill all regs except FP */
967 for (regno = 0; regno < BPF_REG_FP; regno++)
968 __mark_reg_not_init(regs + regno);
969 return;
970 }
971 __mark_reg_not_init(regs + regno);
972}
973
974static void init_reg_state(struct bpf_verifier_env *env,
975 struct bpf_func_state *state)
976{
977 struct bpf_reg_state *regs = state->regs;
978 int i;
979
980 for (i = 0; i < MAX_BPF_REG; i++) {
981 mark_reg_not_init(env, regs, i);
982 regs[i].live = REG_LIVE_NONE;
983 regs[i].parent = NULL;
984 }
985
986 /* frame pointer */
987 regs[BPF_REG_FP].type = PTR_TO_STACK;
988 mark_reg_known_zero(env, regs, BPF_REG_FP);
989 regs[BPF_REG_FP].frameno = state->frameno;
990
991 /* 1st arg to a function */
992 regs[BPF_REG_1].type = PTR_TO_CTX;
993 mark_reg_known_zero(env, regs, BPF_REG_1);
994}
995
996#define BPF_MAIN_FUNC (-1)
997static void init_func_state(struct bpf_verifier_env *env,
998 struct bpf_func_state *state,
999 int callsite, int frameno, int subprogno)
1000{
1001 state->callsite = callsite;
1002 state->frameno = frameno;
1003 state->subprogno = subprogno;
1004 init_reg_state(env, state);
1005}
1006
1007enum reg_arg_type {
1008 SRC_OP, /* register is used as source operand */
1009 DST_OP, /* register is used as destination operand */
1010 DST_OP_NO_MARK /* same as above, check only, don't mark */
1011};
1012
1013static int cmp_subprogs(const void *a, const void *b)
1014{
1015 return ((struct bpf_subprog_info *)a)->start -
1016 ((struct bpf_subprog_info *)b)->start;
1017}
1018
1019static int find_subprog(struct bpf_verifier_env *env, int off)
1020{
1021 struct bpf_subprog_info *p;
1022
1023 p = bsearch(&off, env->subprog_info, env->subprog_cnt,
1024 sizeof(env->subprog_info[0]), cmp_subprogs);
1025 if (!p)
1026 return -ENOENT;
1027 return p - env->subprog_info;
1028
1029}
1030
1031static int add_subprog(struct bpf_verifier_env *env, int off)
1032{
1033 int insn_cnt = env->prog->len;
1034 int ret;
1035
1036 if (off >= insn_cnt || off < 0) {
1037 verbose(env, "call to invalid destination\n");
1038 return -EINVAL;
1039 }
1040 ret = find_subprog(env, off);
1041 if (ret >= 0)
1042 return 0;
1043 if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
1044 verbose(env, "too many subprograms\n");
1045 return -E2BIG;
1046 }
1047 env->subprog_info[env->subprog_cnt++].start = off;
1048 sort(env->subprog_info, env->subprog_cnt,
1049 sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
1050 return 0;
1051}
1052
1053static int check_subprogs(struct bpf_verifier_env *env)
1054{
1055 int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
1056 struct bpf_subprog_info *subprog = env->subprog_info;
1057 struct bpf_insn *insn = env->prog->insnsi;
1058 int insn_cnt = env->prog->len;
1059
1060 /* Add entry function. */
1061 ret = add_subprog(env, 0);
1062 if (ret < 0)
1063 return ret;
1064
1065 /* determine subprog starts. The end is one before the next starts */
1066 for (i = 0; i < insn_cnt; i++) {
1067 if (insn[i].code != (BPF_JMP | BPF_CALL))
1068 continue;
1069 if (insn[i].src_reg != BPF_PSEUDO_CALL)
1070 continue;
1071 if (!env->allow_ptr_leaks) {
1072 verbose(env, "function calls to other bpf functions are allowed for root only\n");
1073 return -EPERM;
1074 }
1075 ret = add_subprog(env, i + insn[i].imm + 1);
1076 if (ret < 0)
1077 return ret;
1078 }
1079
1080 /* Add a fake 'exit' subprog which could simplify subprog iteration
1081 * logic. 'subprog_cnt' should not be increased.
1082 */
1083 subprog[env->subprog_cnt].start = insn_cnt;
1084
1085 if (env->log.level > 1)
1086 for (i = 0; i < env->subprog_cnt; i++)
1087 verbose(env, "func#%d @%d\n", i, subprog[i].start);
1088
1089 /* now check that all jumps are within the same subprog */
1090 subprog_start = subprog[cur_subprog].start;
1091 subprog_end = subprog[cur_subprog + 1].start;
1092 for (i = 0; i < insn_cnt; i++) {
1093 u8 code = insn[i].code;
1094
1095 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
1096 goto next;
1097 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
1098 goto next;
1099 off = i + insn[i].off + 1;
1100 if (off < subprog_start || off >= subprog_end) {
1101 verbose(env, "jump out of range from insn %d to %d\n", i, off);
1102 return -EINVAL;
1103 }
1104next:
1105 if (i == subprog_end - 1) {
1106 /* to avoid fall-through from one subprog into another
1107 * the last insn of the subprog should be either exit
1108 * or unconditional jump back
1109 */
1110 if (code != (BPF_JMP | BPF_EXIT) &&
1111 code != (BPF_JMP | BPF_JA)) {
1112 verbose(env, "last insn is not an exit or jmp\n");
1113 return -EINVAL;
1114 }
1115 subprog_start = subprog_end;
1116 cur_subprog++;
1117 if (cur_subprog < env->subprog_cnt)
1118 subprog_end = subprog[cur_subprog + 1].start;
1119 }
1120 }
1121 return 0;
1122}
1123
1124/* Parentage chain of this register (or stack slot) should take care of all
1125 * issues like callee-saved registers, stack slot allocation time, etc.
1126 */
1127static int mark_reg_read(struct bpf_verifier_env *env,
1128 const struct bpf_reg_state *state,
1129 struct bpf_reg_state *parent)
1130{
1131 bool writes = parent == state->parent; /* Observe write marks */
1132
1133 while (parent) {
1134 /* if read wasn't screened by an earlier write ... */
1135 if (writes && state->live & REG_LIVE_WRITTEN)
1136 break;
1137 if (parent->live & REG_LIVE_DONE) {
1138 verbose(env, "verifier BUG type %s var_off %lld off %d\n",
1139 reg_type_str[parent->type],
1140 parent->var_off.value, parent->off);
1141 return -EFAULT;
1142 }
1143 /* ... then we depend on parent's value */
1144 parent->live |= REG_LIVE_READ;
1145 state = parent;
1146 parent = state->parent;
1147 writes = true;
1148 }
1149 return 0;
1150}
1151
1152static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
1153 enum reg_arg_type t)
1154{
1155 struct bpf_verifier_state *vstate = env->cur_state;
1156 struct bpf_func_state *state = vstate->frame[vstate->curframe];
1157 struct bpf_reg_state *regs = state->regs;
1158
1159 if (regno >= MAX_BPF_REG) {
1160 verbose(env, "R%d is invalid\n", regno);
1161 return -EINVAL;
1162 }
1163
1164 if (t == SRC_OP) {
1165 /* check whether register used as source operand can be read */
1166 if (regs[regno].type == NOT_INIT) {
1167 verbose(env, "R%d !read_ok\n", regno);
1168 return -EACCES;
1169 }
1170 /* We don't need to worry about FP liveness because it's read-only */
1171 if (regno != BPF_REG_FP)
1172 return mark_reg_read(env, &regs[regno],
1173 regs[regno].parent);
1174 } else {
1175 /* check whether register used as dest operand can be written to */
1176 if (regno == BPF_REG_FP) {
1177 verbose(env, "frame pointer is read only\n");
1178 return -EACCES;
1179 }
1180 regs[regno].live |= REG_LIVE_WRITTEN;
1181 if (t == DST_OP)
1182 mark_reg_unknown(env, regs, regno);
1183 }
1184 return 0;
1185}
1186
1187static bool is_spillable_regtype(enum bpf_reg_type type)
1188{
1189 switch (type) {
1190 case PTR_TO_MAP_VALUE:
1191 case PTR_TO_MAP_VALUE_OR_NULL:
1192 case PTR_TO_STACK:
1193 case PTR_TO_CTX:
1194 case PTR_TO_PACKET:
1195 case PTR_TO_PACKET_META:
1196 case PTR_TO_PACKET_END:
1197 case PTR_TO_FLOW_KEYS:
1198 case CONST_PTR_TO_MAP:
1199 case PTR_TO_SOCKET:
1200 case PTR_TO_SOCKET_OR_NULL:
1201 case PTR_TO_SOCK_COMMON:
1202 case PTR_TO_SOCK_COMMON_OR_NULL:
1203 case PTR_TO_TCP_SOCK:
1204 case PTR_TO_TCP_SOCK_OR_NULL:
1205 return true;
1206 default:
1207 return false;
1208 }
1209}
1210
1211/* Does this register contain a constant zero? */
1212static bool register_is_null(struct bpf_reg_state *reg)
1213{
1214 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
1215}
1216
1217/* check_stack_read/write functions track spill/fill of registers,
1218 * stack boundary and alignment are checked in check_mem_access()
1219 */
1220static int check_stack_write(struct bpf_verifier_env *env,
1221 struct bpf_func_state *state, /* func where register points to */
1222 int off, int size, int value_regno, int insn_idx)
1223{
1224 struct bpf_func_state *cur; /* state of the current function */
1225 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
1226 enum bpf_reg_type type;
1227
1228 err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
1229 state->acquired_refs, true);
1230 if (err)
1231 return err;
1232 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
1233 * so it's aligned access and [off, off + size) are within stack limits
1234 */
1235 if (!env->allow_ptr_leaks &&
1236 state->stack[spi].slot_type[0] == STACK_SPILL &&
1237 size != BPF_REG_SIZE) {
1238 verbose(env, "attempt to corrupt spilled pointer on stack\n");
1239 return -EACCES;
1240 }
1241
1242 cur = env->cur_state->frame[env->cur_state->curframe];
1243 if (value_regno >= 0 &&
1244 is_spillable_regtype((type = cur->regs[value_regno].type))) {
1245
1246 /* register containing pointer is being spilled into stack */
1247 if (size != BPF_REG_SIZE) {
1248 verbose(env, "invalid size of register spill\n");
1249 return -EACCES;
1250 }
1251
1252 if (state != cur && type == PTR_TO_STACK) {
1253 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
1254 return -EINVAL;
1255 }
1256
1257 /* save register state */
1258 state->stack[spi].spilled_ptr = cur->regs[value_regno];
1259 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1260
1261 for (i = 0; i < BPF_REG_SIZE; i++) {
1262 if (state->stack[spi].slot_type[i] == STACK_MISC &&
1263 !env->allow_ptr_leaks) {
1264 int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
1265 int soff = (-spi - 1) * BPF_REG_SIZE;
1266
1267 /* detected reuse of integer stack slot with a pointer
1268 * which means either llvm is reusing stack slot or
1269 * an attacker is trying to exploit CVE-2018-3639
1270 * (speculative store bypass)
1271 * Have to sanitize that slot with preemptive
1272 * store of zero.
1273 */
1274 if (*poff && *poff != soff) {
1275 /* disallow programs where single insn stores
1276 * into two different stack slots, since verifier
1277 * cannot sanitize them
1278 */
1279 verbose(env,
1280 "insn %d cannot access two stack slots fp%d and fp%d",
1281 insn_idx, *poff, soff);
1282 return -EINVAL;
1283 }
1284 *poff = soff;
1285 }
1286 state->stack[spi].slot_type[i] = STACK_SPILL;
1287 }
1288 } else {
1289 u8 type = STACK_MISC;
1290
1291 /* regular write of data into stack destroys any spilled ptr */
1292 state->stack[spi].spilled_ptr.type = NOT_INIT;
1293 /* Mark slots as STACK_MISC if they belonged to spilled ptr. */
1294 if (state->stack[spi].slot_type[0] == STACK_SPILL)
1295 for (i = 0; i < BPF_REG_SIZE; i++)
1296 state->stack[spi].slot_type[i] = STACK_MISC;
1297
1298 /* only mark the slot as written if all 8 bytes were written
1299 * otherwise read propagation may incorrectly stop too soon
1300 * when stack slots are partially written.
1301 * This heuristic means that read propagation will be
1302 * conservative, since it will add reg_live_read marks
1303 * to stack slots all the way to first state when programs
1304 * writes+reads less than 8 bytes
1305 */
1306 if (size == BPF_REG_SIZE)
1307 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1308
1309 /* when we zero initialize stack slots mark them as such */
1310 if (value_regno >= 0 &&
1311 register_is_null(&cur->regs[value_regno]))
1312 type = STACK_ZERO;
1313
1314 /* Mark slots affected by this stack write. */
1315 for (i = 0; i < size; i++)
1316 state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
1317 type;
1318 }
1319 return 0;
1320}
1321
1322static int check_stack_read(struct bpf_verifier_env *env,
1323 struct bpf_func_state *reg_state /* func where register points to */,
1324 int off, int size, int value_regno)
1325{
1326 struct bpf_verifier_state *vstate = env->cur_state;
1327 struct bpf_func_state *state = vstate->frame[vstate->curframe];
1328 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
1329 u8 *stype;
1330
1331 if (reg_state->allocated_stack <= slot) {
1332 verbose(env, "invalid read from stack off %d+0 size %d\n",
1333 off, size);
1334 return -EACCES;
1335 }
1336 stype = reg_state->stack[spi].slot_type;
1337
1338 if (stype[0] == STACK_SPILL) {
1339 if (size != BPF_REG_SIZE) {
1340 verbose(env, "invalid size of register spill\n");
1341 return -EACCES;
1342 }
1343 for (i = 1; i < BPF_REG_SIZE; i++) {
1344 if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
1345 verbose(env, "corrupted spill memory\n");
1346 return -EACCES;
1347 }
1348 }
1349
1350 if (value_regno >= 0) {
1351 /* restore register state from stack */
1352 state->regs[value_regno] = reg_state->stack[spi].spilled_ptr;
1353 /* mark reg as written since spilled pointer state likely
1354 * has its liveness marks cleared by is_state_visited()
1355 * which resets stack/reg liveness for state transitions
1356 */
1357 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1358 }
1359 mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
1360 reg_state->stack[spi].spilled_ptr.parent);
1361 return 0;
1362 } else {
1363 int zeros = 0;
1364
1365 for (i = 0; i < size; i++) {
1366 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
1367 continue;
1368 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
1369 zeros++;
1370 continue;
1371 }
1372 verbose(env, "invalid read from stack off %d+%d size %d\n",
1373 off, i, size);
1374 return -EACCES;
1375 }
1376 mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
1377 reg_state->stack[spi].spilled_ptr.parent);
1378 if (value_regno >= 0) {
1379 if (zeros == size) {
1380 /* any size read into register is zero extended,
1381 * so the whole register == const_zero
1382 */
1383 __mark_reg_const_zero(&state->regs[value_regno]);
1384 } else {
1385 /* have read misc data from the stack */
1386 mark_reg_unknown(env, state->regs, value_regno);
1387 }
1388 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1389 }
1390 return 0;
1391 }
1392}
1393
1394static int check_stack_access(struct bpf_verifier_env *env,
1395 const struct bpf_reg_state *reg,
1396 int off, int size)
1397{
1398 /* Stack accesses must be at a fixed offset, so that we
1399 * can determine what type of data were returned. See
1400 * check_stack_read().
1401 */
1402 if (!tnum_is_const(reg->var_off)) {
1403 char tn_buf[48];
1404
1405 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1406 verbose(env, "variable stack access var_off=%s off=%d size=%d",
1407 tn_buf, off, size);
1408 return -EACCES;
1409 }
1410
1411 if (off >= 0 || off < -MAX_BPF_STACK) {
1412 verbose(env, "invalid stack off=%d size=%d\n", off, size);
1413 return -EACCES;
1414 }
1415
1416 return 0;
1417}
1418
1419/* check read/write into map element returned by bpf_map_lookup_elem() */
1420static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
1421 int size, bool zero_size_allowed)
1422{
1423 struct bpf_reg_state *regs = cur_regs(env);
1424 struct bpf_map *map = regs[regno].map_ptr;
1425
1426 if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1427 off + size > map->value_size) {
1428 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
1429 map->value_size, off, size);
1430 return -EACCES;
1431 }
1432 return 0;
1433}
1434
1435/* check read/write into a map element with possible variable offset */
1436static int check_map_access(struct bpf_verifier_env *env, u32 regno,
1437 int off, int size, bool zero_size_allowed)
1438{
1439 struct bpf_verifier_state *vstate = env->cur_state;
1440 struct bpf_func_state *state = vstate->frame[vstate->curframe];
1441 struct bpf_reg_state *reg = &state->regs[regno];
1442 int err;
1443
1444 /* We may have adjusted the register to this map value, so we
1445 * need to try adding each of min_value and max_value to off
1446 * to make sure our theoretical access will be safe.
1447 */
1448 if (env->log.level)
1449 print_verifier_state(env, state);
1450
1451 /* The minimum value is only important with signed
1452 * comparisons where we can't assume the floor of a
1453 * value is 0. If we are using signed variables for our
1454 * index'es we need to make sure that whatever we use
1455 * will have a set floor within our range.
1456 */
1457 if (reg->smin_value < 0 &&
1458 (reg->smin_value == S64_MIN ||
1459 (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
1460 reg->smin_value + off < 0)) {
1461 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1462 regno);
1463 return -EACCES;
1464 }
1465 err = __check_map_access(env, regno, reg->smin_value + off, size,
1466 zero_size_allowed);
1467 if (err) {
1468 verbose(env, "R%d min value is outside of the array range\n",
1469 regno);
1470 return err;
1471 }
1472
1473 /* If we haven't set a max value then we need to bail since we can't be
1474 * sure we won't do bad things.
1475 * If reg->umax_value + off could overflow, treat that as unbounded too.
1476 */
1477 if (reg->umax_value >= BPF_MAX_VAR_OFF) {
1478 verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
1479 regno);
1480 return -EACCES;
1481 }
1482 err = __check_map_access(env, regno, reg->umax_value + off, size,
1483 zero_size_allowed);
1484 if (err)
1485 verbose(env, "R%d max value is outside of the array range\n",
1486 regno);
1487
1488 if (map_value_has_spin_lock(reg->map_ptr)) {
1489 u32 lock = reg->map_ptr->spin_lock_off;
1490
1491 /* if any part of struct bpf_spin_lock can be touched by
1492 * load/store reject this program.
1493 * To check that [x1, x2) overlaps with [y1, y2)
1494 * it is sufficient to check x1 < y2 && y1 < x2.
1495 */
1496 if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
1497 lock < reg->umax_value + off + size) {
1498 verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
1499 return -EACCES;
1500 }
1501 }
1502 return err;
1503}
1504
1505#define MAX_PACKET_OFF 0xffff
1506
1507static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
1508 const struct bpf_call_arg_meta *meta,
1509 enum bpf_access_type t)
1510{
1511 switch (env->prog->type) {
1512 /* Program types only with direct read access go here! */
1513 case BPF_PROG_TYPE_LWT_IN:
1514 case BPF_PROG_TYPE_LWT_OUT:
1515 case BPF_PROG_TYPE_LWT_SEG6LOCAL:
1516 case BPF_PROG_TYPE_SK_REUSEPORT:
1517 case BPF_PROG_TYPE_FLOW_DISSECTOR:
1518 case BPF_PROG_TYPE_CGROUP_SKB:
1519 if (t == BPF_WRITE)
1520 return false;
1521 /* fallthrough */
1522
1523 /* Program types with direct read + write access go here! */
1524 case BPF_PROG_TYPE_SCHED_CLS:
1525 case BPF_PROG_TYPE_SCHED_ACT:
1526 case BPF_PROG_TYPE_XDP:
1527 case BPF_PROG_TYPE_LWT_XMIT:
1528 case BPF_PROG_TYPE_SK_SKB:
1529 case BPF_PROG_TYPE_SK_MSG:
1530 if (meta)
1531 return meta->pkt_access;
1532
1533 env->seen_direct_write = true;
1534 return true;
1535 default:
1536 return false;
1537 }
1538}
1539
1540static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
1541 int off, int size, bool zero_size_allowed)
1542{
1543 struct bpf_reg_state *regs = cur_regs(env);
1544 struct bpf_reg_state *reg = &regs[regno];
1545
1546 if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1547 (u64)off + size > reg->range) {
1548 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
1549 off, size, regno, reg->id, reg->off, reg->range);
1550 return -EACCES;
1551 }
1552 return 0;
1553}
1554
1555static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
1556 int size, bool zero_size_allowed)
1557{
1558 struct bpf_reg_state *regs = cur_regs(env);
1559 struct bpf_reg_state *reg = &regs[regno];
1560 int err;
1561
1562 /* We may have added a variable offset to the packet pointer; but any
1563 * reg->range we have comes after that. We are only checking the fixed
1564 * offset.
1565 */
1566
1567 /* We don't allow negative numbers, because we aren't tracking enough
1568 * detail to prove they're safe.
1569 */
1570 if (reg->smin_value < 0) {
1571 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1572 regno);
1573 return -EACCES;
1574 }
1575 err = __check_packet_access(env, regno, off, size, zero_size_allowed);
1576 if (err) {
1577 verbose(env, "R%d offset is outside of the packet\n", regno);
1578 return err;
1579 }
1580
1581 /* __check_packet_access has made sure "off + size - 1" is within u16.
1582 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
1583 * otherwise find_good_pkt_pointers would have refused to set range info
1584 * that __check_packet_access would have rejected this pkt access.
1585 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
1586 */
1587 env->prog->aux->max_pkt_offset =
1588 max_t(u32, env->prog->aux->max_pkt_offset,
1589 off + reg->umax_value + size - 1);
1590
1591 return err;
1592}
1593
1594/* check access to 'struct bpf_context' fields. Supports fixed offsets only */
1595static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
1596 enum bpf_access_type t, enum bpf_reg_type *reg_type)
1597{
1598 struct bpf_insn_access_aux info = {
1599 .reg_type = *reg_type,
1600 };
1601
1602 if (env->ops->is_valid_access &&
1603 env->ops->is_valid_access(off, size, t, env->prog, &info)) {
1604 /* A non zero info.ctx_field_size indicates that this field is a
1605 * candidate for later verifier transformation to load the whole
1606 * field and then apply a mask when accessed with a narrower
1607 * access than actual ctx access size. A zero info.ctx_field_size
1608 * will only allow for whole field access and rejects any other
1609 * type of narrower access.
1610 */
1611 *reg_type = info.reg_type;
1612
1613 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
1614 /* remember the offset of last byte accessed in ctx */
1615 if (env->prog->aux->max_ctx_offset < off + size)
1616 env->prog->aux->max_ctx_offset = off + size;
1617 return 0;
1618 }
1619
1620 verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
1621 return -EACCES;
1622}
1623
1624static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
1625 int size)
1626{
1627 if (size < 0 || off < 0 ||
1628 (u64)off + size > sizeof(struct bpf_flow_keys)) {
1629 verbose(env, "invalid access to flow keys off=%d size=%d\n",
1630 off, size);
1631 return -EACCES;
1632 }
1633 return 0;
1634}
1635
1636static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
1637 u32 regno, int off, int size,
1638 enum bpf_access_type t)
1639{
1640 struct bpf_reg_state *regs = cur_regs(env);
1641 struct bpf_reg_state *reg = &regs[regno];
1642 struct bpf_insn_access_aux info = {};
1643 bool valid;
1644
1645 if (reg->smin_value < 0) {
1646 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1647 regno);
1648 return -EACCES;
1649 }
1650
1651 switch (reg->type) {
1652 case PTR_TO_SOCK_COMMON:
1653 valid = bpf_sock_common_is_valid_access(off, size, t, &info);
1654 break;
1655 case PTR_TO_SOCKET:
1656 valid = bpf_sock_is_valid_access(off, size, t, &info);
1657 break;
1658 case PTR_TO_TCP_SOCK:
1659 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
1660 break;
1661 default:
1662 valid = false;
1663 }
1664
1665
1666 if (valid) {
1667 env->insn_aux_data[insn_idx].ctx_field_size =
1668 info.ctx_field_size;
1669 return 0;
1670 }
1671
1672 verbose(env, "R%d invalid %s access off=%d size=%d\n",
1673 regno, reg_type_str[reg->type], off, size);
1674
1675 return -EACCES;
1676}
1677
1678static bool __is_pointer_value(bool allow_ptr_leaks,
1679 const struct bpf_reg_state *reg)
1680{
1681 if (allow_ptr_leaks)
1682 return false;
1683
1684 return reg->type != SCALAR_VALUE;
1685}
1686
1687static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
1688{
1689 return cur_regs(env) + regno;
1690}
1691
1692static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
1693{
1694 return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
1695}
1696
1697static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
1698{
1699 const struct bpf_reg_state *reg = reg_state(env, regno);
1700
1701 return reg->type == PTR_TO_CTX;
1702}
1703
1704static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
1705{
1706 const struct bpf_reg_state *reg = reg_state(env, regno);
1707
1708 return type_is_sk_pointer(reg->type);
1709}
1710
1711static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
1712{
1713 const struct bpf_reg_state *reg = reg_state(env, regno);
1714
1715 return type_is_pkt_pointer(reg->type);
1716}
1717
1718static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
1719{
1720 const struct bpf_reg_state *reg = reg_state(env, regno);
1721
1722 /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
1723 return reg->type == PTR_TO_FLOW_KEYS;
1724}
1725
1726static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
1727 const struct bpf_reg_state *reg,
1728 int off, int size, bool strict)
1729{
1730 struct tnum reg_off;
1731 int ip_align;
1732
1733 /* Byte size accesses are always allowed. */
1734 if (!strict || size == 1)
1735 return 0;
1736
1737 /* For platforms that do not have a Kconfig enabling
1738 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
1739 * NET_IP_ALIGN is universally set to '2'. And on platforms
1740 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
1741 * to this code only in strict mode where we want to emulate
1742 * the NET_IP_ALIGN==2 checking. Therefore use an
1743 * unconditional IP align value of '2'.
1744 */
1745 ip_align = 2;
1746
1747 reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
1748 if (!tnum_is_aligned(reg_off, size)) {
1749 char tn_buf[48];
1750
1751 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1752 verbose(env,
1753 "misaligned packet access off %d+%s+%d+%d size %d\n",
1754 ip_align, tn_buf, reg->off, off, size);
1755 return -EACCES;
1756 }
1757
1758 return 0;
1759}
1760
1761static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
1762 const struct bpf_reg_state *reg,
1763 const char *pointer_desc,
1764 int off, int size, bool strict)
1765{
1766 struct tnum reg_off;
1767
1768 /* Byte size accesses are always allowed. */
1769 if (!strict || size == 1)
1770 return 0;
1771
1772 reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
1773 if (!tnum_is_aligned(reg_off, size)) {
1774 char tn_buf[48];
1775
1776 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1777 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
1778 pointer_desc, tn_buf, reg->off, off, size);
1779 return -EACCES;
1780 }
1781
1782 return 0;
1783}
1784
1785static int check_ptr_alignment(struct bpf_verifier_env *env,
1786 const struct bpf_reg_state *reg, int off,
1787 int size, bool strict_alignment_once)
1788{
1789 bool strict = env->strict_alignment || strict_alignment_once;
1790 const char *pointer_desc = "";
1791
1792 switch (reg->type) {
1793 case PTR_TO_PACKET:
1794 case PTR_TO_PACKET_META:
1795 /* Special case, because of NET_IP_ALIGN. Given metadata sits
1796 * right in front, treat it the very same way.
1797 */
1798 return check_pkt_ptr_alignment(env, reg, off, size, strict);
1799 case PTR_TO_FLOW_KEYS:
1800 pointer_desc = "flow keys ";
1801 break;
1802 case PTR_TO_MAP_VALUE:
1803 pointer_desc = "value ";
1804 break;
1805 case PTR_TO_CTX:
1806 pointer_desc = "context ";
1807 break;
1808 case PTR_TO_STACK:
1809 pointer_desc = "stack ";
1810 /* The stack spill tracking logic in check_stack_write()
1811 * and check_stack_read() relies on stack accesses being
1812 * aligned.
1813 */
1814 strict = true;
1815 break;
1816 case PTR_TO_SOCKET:
1817 pointer_desc = "sock ";
1818 break;
1819 case PTR_TO_SOCK_COMMON:
1820 pointer_desc = "sock_common ";
1821 break;
1822 case PTR_TO_TCP_SOCK:
1823 pointer_desc = "tcp_sock ";
1824 break;
1825 default:
1826 break;
1827 }
1828 return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
1829 strict);
1830}
1831
1832static int update_stack_depth(struct bpf_verifier_env *env,
1833 const struct bpf_func_state *func,
1834 int off)
1835{
1836 u16 stack = env->subprog_info[func->subprogno].stack_depth;
1837
1838 if (stack >= -off)
1839 return 0;
1840
1841 /* update known max for given subprogram */
1842 env->subprog_info[func->subprogno].stack_depth = -off;
1843 return 0;
1844}
1845
1846/* starting from main bpf function walk all instructions of the function
1847 * and recursively walk all callees that given function can call.
1848 * Ignore jump and exit insns.
1849 * Since recursion is prevented by check_cfg() this algorithm
1850 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
1851 */
1852static int check_max_stack_depth(struct bpf_verifier_env *env)
1853{
1854 int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
1855 struct bpf_subprog_info *subprog = env->subprog_info;
1856 struct bpf_insn *insn = env->prog->insnsi;
1857 int ret_insn[MAX_CALL_FRAMES];
1858 int ret_prog[MAX_CALL_FRAMES];
1859
1860process_func:
1861 /* round up to 32-bytes, since this is granularity
1862 * of interpreter stack size
1863 */
1864 depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
1865 if (depth > MAX_BPF_STACK) {
1866 verbose(env, "combined stack size of %d calls is %d. Too large\n",
1867 frame + 1, depth);
1868 return -EACCES;
1869 }
1870continue_func:
1871 subprog_end = subprog[idx + 1].start;
1872 for (; i < subprog_end; i++) {
1873 if (insn[i].code != (BPF_JMP | BPF_CALL))
1874 continue;
1875 if (insn[i].src_reg != BPF_PSEUDO_CALL)
1876 continue;
1877 /* remember insn and function to return to */
1878 ret_insn[frame] = i + 1;
1879 ret_prog[frame] = idx;
1880
1881 /* find the callee */
1882 i = i + insn[i].imm + 1;
1883 idx = find_subprog(env, i);
1884 if (idx < 0) {
1885 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1886 i);
1887 return -EFAULT;
1888 }
1889 frame++;
1890 if (frame >= MAX_CALL_FRAMES) {
1891 WARN_ONCE(1, "verifier bug. Call stack is too deep\n");
1892 return -EFAULT;
1893 }
1894 goto process_func;
1895 }
1896 /* end of for() loop means the last insn of the 'subprog'
1897 * was reached. Doesn't matter whether it was JA or EXIT
1898 */
1899 if (frame == 0)
1900 return 0;
1901 depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
1902 frame--;
1903 i = ret_insn[frame];
1904 idx = ret_prog[frame];
1905 goto continue_func;
1906}
1907
1908#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1909static int get_callee_stack_depth(struct bpf_verifier_env *env,
1910 const struct bpf_insn *insn, int idx)
1911{
1912 int start = idx + insn->imm + 1, subprog;
1913
1914 subprog = find_subprog(env, start);
1915 if (subprog < 0) {
1916 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1917 start);
1918 return -EFAULT;
1919 }
1920 return env->subprog_info[subprog].stack_depth;
1921}
1922#endif
1923
1924static int check_ctx_reg(struct bpf_verifier_env *env,
1925 const struct bpf_reg_state *reg, int regno)
1926{
1927 /* Access to ctx or passing it to a helper is only allowed in
1928 * its original, unmodified form.
1929 */
1930
1931 if (reg->off) {
1932 verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
1933 regno, reg->off);
1934 return -EACCES;
1935 }
1936
1937 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
1938 char tn_buf[48];
1939
1940 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1941 verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
1942 return -EACCES;
1943 }
1944
1945 return 0;
1946}
1947
1948/* truncate register to smaller size (in bytes)
1949 * must be called with size < BPF_REG_SIZE
1950 */
1951static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
1952{
1953 u64 mask;
1954
1955 /* clear high bits in bit representation */
1956 reg->var_off = tnum_cast(reg->var_off, size);
1957
1958 /* fix arithmetic bounds */
1959 mask = ((u64)1 << (size * 8)) - 1;
1960 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
1961 reg->umin_value &= mask;
1962 reg->umax_value &= mask;
1963 } else {
1964 reg->umin_value = 0;
1965 reg->umax_value = mask;
1966 }
1967 reg->smin_value = reg->umin_value;
1968 reg->smax_value = reg->umax_value;
1969}
1970
1971/* check whether memory at (regno + off) is accessible for t = (read | write)
1972 * if t==write, value_regno is a register which value is stored into memory
1973 * if t==read, value_regno is a register which will receive the value from memory
1974 * if t==write && value_regno==-1, some unknown value is stored into memory
1975 * if t==read && value_regno==-1, don't care what we read from memory
1976 */
1977static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
1978 int off, int bpf_size, enum bpf_access_type t,
1979 int value_regno, bool strict_alignment_once)
1980{
1981 struct bpf_reg_state *regs = cur_regs(env);
1982 struct bpf_reg_state *reg = regs + regno;
1983 struct bpf_func_state *state;
1984 int size, err = 0;
1985
1986 size = bpf_size_to_bytes(bpf_size);
1987 if (size < 0)
1988 return size;
1989
1990 /* alignment checks will add in reg->off themselves */
1991 err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
1992 if (err)
1993 return err;
1994
1995 /* for access checks, reg->off is just part of off */
1996 off += reg->off;
1997
1998 if (reg->type == PTR_TO_MAP_VALUE) {
1999 if (t == BPF_WRITE && value_regno >= 0 &&
2000 is_pointer_value(env, value_regno)) {
2001 verbose(env, "R%d leaks addr into map\n", value_regno);
2002 return -EACCES;
2003 }
2004
2005 err = check_map_access(env, regno, off, size, false);
2006 if (!err && t == BPF_READ && value_regno >= 0)
2007 mark_reg_unknown(env, regs, value_regno);
2008
2009 } else if (reg->type == PTR_TO_CTX) {
2010 enum bpf_reg_type reg_type = SCALAR_VALUE;
2011
2012 if (t == BPF_WRITE && value_regno >= 0 &&
2013 is_pointer_value(env, value_regno)) {
2014 verbose(env, "R%d leaks addr into ctx\n", value_regno);
2015 return -EACCES;
2016 }
2017
2018 err = check_ctx_reg(env, reg, regno);
2019 if (err < 0)
2020 return err;
2021
2022 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
2023 if (!err && t == BPF_READ && value_regno >= 0) {
2024 /* ctx access returns either a scalar, or a
2025 * PTR_TO_PACKET[_META,_END]. In the latter
2026 * case, we know the offset is zero.
2027 */
2028 if (reg_type == SCALAR_VALUE) {
2029 mark_reg_unknown(env, regs, value_regno);
2030 } else {
2031 mark_reg_known_zero(env, regs,
2032 value_regno);
2033 if (reg_type_may_be_null(reg_type))
2034 regs[value_regno].id = ++env->id_gen;
2035 }
2036 regs[value_regno].type = reg_type;
2037 }
2038
2039 } else if (reg->type == PTR_TO_STACK) {
2040 off += reg->var_off.value;
2041 err = check_stack_access(env, reg, off, size);
2042 if (err)
2043 return err;
2044
2045 state = func(env, reg);
2046 err = update_stack_depth(env, state, off);
2047 if (err)
2048 return err;
2049
2050 if (t == BPF_WRITE)
2051 err = check_stack_write(env, state, off, size,
2052 value_regno, insn_idx);
2053 else
2054 err = check_stack_read(env, state, off, size,
2055 value_regno);
2056 } else if (reg_is_pkt_pointer(reg)) {
2057 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
2058 verbose(env, "cannot write into packet\n");
2059 return -EACCES;
2060 }
2061 if (t == BPF_WRITE && value_regno >= 0 &&
2062 is_pointer_value(env, value_regno)) {
2063 verbose(env, "R%d leaks addr into packet\n",
2064 value_regno);
2065 return -EACCES;
2066 }
2067 err = check_packet_access(env, regno, off, size, false);
2068 if (!err && t == BPF_READ && value_regno >= 0)
2069 mark_reg_unknown(env, regs, value_regno);
2070 } else if (reg->type == PTR_TO_FLOW_KEYS) {
2071 if (t == BPF_WRITE && value_regno >= 0 &&
2072 is_pointer_value(env, value_regno)) {
2073 verbose(env, "R%d leaks addr into flow keys\n",
2074 value_regno);
2075 return -EACCES;
2076 }
2077
2078 err = check_flow_keys_access(env, off, size);
2079 if (!err && t == BPF_READ && value_regno >= 0)
2080 mark_reg_unknown(env, regs, value_regno);
2081 } else if (type_is_sk_pointer(reg->type)) {
2082 if (t == BPF_WRITE) {
2083 verbose(env, "R%d cannot write into %s\n",
2084 regno, reg_type_str[reg->type]);
2085 return -EACCES;
2086 }
2087 err = check_sock_access(env, insn_idx, regno, off, size, t);
2088 if (!err && value_regno >= 0)
2089 mark_reg_unknown(env, regs, value_regno);
2090 } else {
2091 verbose(env, "R%d invalid mem access '%s'\n", regno,
2092 reg_type_str[reg->type]);
2093 return -EACCES;
2094 }
2095
2096 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
2097 regs[value_regno].type == SCALAR_VALUE) {
2098 /* b/h/w load zero-extends, mark upper bits as known 0 */
2099 coerce_reg_to_size(&regs[value_regno], size);
2100 }
2101 return err;
2102}
2103
2104static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
2105{
2106 int err;
2107
2108 if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
2109 insn->imm != 0) {
2110 verbose(env, "BPF_XADD uses reserved fields\n");
2111 return -EINVAL;
2112 }
2113
2114 /* check src1 operand */
2115 err = check_reg_arg(env, insn->src_reg, SRC_OP);
2116 if (err)
2117 return err;
2118
2119 /* check src2 operand */
2120 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
2121 if (err)
2122 return err;
2123
2124 if (is_pointer_value(env, insn->src_reg)) {
2125 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
2126 return -EACCES;
2127 }
2128
2129 if (is_ctx_reg(env, insn->dst_reg) ||
2130 is_pkt_reg(env, insn->dst_reg) ||
2131 is_flow_key_reg(env, insn->dst_reg) ||
2132 is_sk_reg(env, insn->dst_reg)) {
2133 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
2134 insn->dst_reg,
2135 reg_type_str[reg_state(env, insn->dst_reg)->type]);
2136 return -EACCES;
2137 }
2138
2139 /* check whether atomic_add can read the memory */
2140 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
2141 BPF_SIZE(insn->code), BPF_READ, -1, true);
2142 if (err)
2143 return err;
2144
2145 /* check whether atomic_add can write into the same memory */
2146 return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
2147 BPF_SIZE(insn->code), BPF_WRITE, -1, true);
2148}
2149
2150/* when register 'regno' is passed into function that will read 'access_size'
2151 * bytes from that pointer, make sure that it's within stack boundary
2152 * and all elements of stack are initialized.
2153 * Unlike most pointer bounds-checking functions, this one doesn't take an
2154 * 'off' argument, so it has to add in reg->off itself.
2155 */
2156static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
2157 int access_size, bool zero_size_allowed,
2158 struct bpf_call_arg_meta *meta)
2159{
2160 struct bpf_reg_state *reg = reg_state(env, regno);
2161 struct bpf_func_state *state = func(env, reg);
2162 int off, i, slot, spi;
2163
2164 if (reg->type != PTR_TO_STACK) {
2165 /* Allow zero-byte read from NULL, regardless of pointer type */
2166 if (zero_size_allowed && access_size == 0 &&
2167 register_is_null(reg))
2168 return 0;
2169
2170 verbose(env, "R%d type=%s expected=%s\n", regno,
2171 reg_type_str[reg->type],
2172 reg_type_str[PTR_TO_STACK]);
2173 return -EACCES;
2174 }
2175
2176 /* Only allow fixed-offset stack reads */
2177 if (!tnum_is_const(reg->var_off)) {
2178 char tn_buf[48];
2179
2180 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2181 verbose(env, "invalid variable stack read R%d var_off=%s\n",
2182 regno, tn_buf);
2183 return -EACCES;
2184 }
2185 off = reg->off + reg->var_off.value;
2186 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
2187 access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
2188 verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
2189 regno, off, access_size);
2190 return -EACCES;
2191 }
2192
2193 if (meta && meta->raw_mode) {
2194 meta->access_size = access_size;
2195 meta->regno = regno;
2196 return 0;
2197 }
2198
2199 for (i = 0; i < access_size; i++) {
2200 u8 *stype;
2201
2202 slot = -(off + i) - 1;
2203 spi = slot / BPF_REG_SIZE;
2204 if (state->allocated_stack <= slot)
2205 goto err;
2206 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
2207 if (*stype == STACK_MISC)
2208 goto mark;
2209 if (*stype == STACK_ZERO) {
2210 /* helper can write anything into the stack */
2211 *stype = STACK_MISC;
2212 goto mark;
2213 }
2214err:
2215 verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
2216 off, i, access_size);
2217 return -EACCES;
2218mark:
2219 /* reading any byte out of 8-byte 'spill_slot' will cause
2220 * the whole slot to be marked as 'read'
2221 */
2222 mark_reg_read(env, &state->stack[spi].spilled_ptr,
2223 state->stack[spi].spilled_ptr.parent);
2224 }
2225 return update_stack_depth(env, state, off);
2226}
2227
2228static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
2229 int access_size, bool zero_size_allowed,
2230 struct bpf_call_arg_meta *meta)
2231{
2232 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
2233
2234 switch (reg->type) {
2235 case PTR_TO_PACKET:
2236 case PTR_TO_PACKET_META:
2237 return check_packet_access(env, regno, reg->off, access_size,
2238 zero_size_allowed);
2239 case PTR_TO_MAP_VALUE:
2240 return check_map_access(env, regno, reg->off, access_size,
2241 zero_size_allowed);
2242 default: /* scalar_value|ptr_to_stack or invalid ptr */
2243 return check_stack_boundary(env, regno, access_size,
2244 zero_size_allowed, meta);
2245 }
2246}
2247
2248/* Implementation details:
2249 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
2250 * Two bpf_map_lookups (even with the same key) will have different reg->id.
2251 * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
2252 * value_or_null->value transition, since the verifier only cares about
2253 * the range of access to valid map value pointer and doesn't care about actual
2254 * address of the map element.
2255 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
2256 * reg->id > 0 after value_or_null->value transition. By doing so
2257 * two bpf_map_lookups will be considered two different pointers that
2258 * point to different bpf_spin_locks.
2259 * The verifier allows taking only one bpf_spin_lock at a time to avoid
2260 * dead-locks.
2261 * Since only one bpf_spin_lock is allowed the checks are simpler than
2262 * reg_is_refcounted() logic. The verifier needs to remember only
2263 * one spin_lock instead of array of acquired_refs.
2264 * cur_state->active_spin_lock remembers which map value element got locked
2265 * and clears it after bpf_spin_unlock.
2266 */
2267static int process_spin_lock(struct bpf_verifier_env *env, int regno,
2268 bool is_lock)
2269{
2270 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
2271 struct bpf_verifier_state *cur = env->cur_state;
2272 bool is_const = tnum_is_const(reg->var_off);
2273 struct bpf_map *map = reg->map_ptr;
2274 u64 val = reg->var_off.value;
2275
2276 if (reg->type != PTR_TO_MAP_VALUE) {
2277 verbose(env, "R%d is not a pointer to map_value\n", regno);
2278 return -EINVAL;
2279 }
2280 if (!is_const) {
2281 verbose(env,
2282 "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
2283 regno);
2284 return -EINVAL;
2285 }
2286 if (!map->btf) {
2287 verbose(env,
2288 "map '%s' has to have BTF in order to use bpf_spin_lock\n",
2289 map->name);
2290 return -EINVAL;
2291 }
2292 if (!map_value_has_spin_lock(map)) {
2293 if (map->spin_lock_off == -E2BIG)
2294 verbose(env,
2295 "map '%s' has more than one 'struct bpf_spin_lock'\n",
2296 map->name);
2297 else if (map->spin_lock_off == -ENOENT)
2298 verbose(env,
2299 "map '%s' doesn't have 'struct bpf_spin_lock'\n",
2300 map->name);
2301 else
2302 verbose(env,
2303 "map '%s' is not a struct type or bpf_spin_lock is mangled\n",
2304 map->name);
2305 return -EINVAL;
2306 }
2307 if (map->spin_lock_off != val + reg->off) {
2308 verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
2309 val + reg->off);
2310 return -EINVAL;
2311 }
2312 if (is_lock) {
2313 if (cur->active_spin_lock) {
2314 verbose(env,
2315 "Locking two bpf_spin_locks are not allowed\n");
2316 return -EINVAL;
2317 }
2318 cur->active_spin_lock = reg->id;
2319 } else {
2320 if (!cur->active_spin_lock) {
2321 verbose(env, "bpf_spin_unlock without taking a lock\n");
2322 return -EINVAL;
2323 }
2324 if (cur->active_spin_lock != reg->id) {
2325 verbose(env, "bpf_spin_unlock of different lock\n");
2326 return -EINVAL;
2327 }
2328 cur->active_spin_lock = 0;
2329 }
2330 return 0;
2331}
2332
2333static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
2334{
2335 return type == ARG_PTR_TO_MEM ||
2336 type == ARG_PTR_TO_MEM_OR_NULL ||
2337 type == ARG_PTR_TO_UNINIT_MEM;
2338}
2339
2340static bool arg_type_is_mem_size(enum bpf_arg_type type)
2341{
2342 return type == ARG_CONST_SIZE ||
2343 type == ARG_CONST_SIZE_OR_ZERO;
2344}
2345
2346static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
2347 enum bpf_arg_type arg_type,
2348 struct bpf_call_arg_meta *meta)
2349{
2350 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
2351 enum bpf_reg_type expected_type, type = reg->type;
2352 int err = 0;
2353
2354 if (arg_type == ARG_DONTCARE)
2355 return 0;
2356
2357 err = check_reg_arg(env, regno, SRC_OP);
2358 if (err)
2359 return err;
2360
2361 if (arg_type == ARG_ANYTHING) {
2362 if (is_pointer_value(env, regno)) {
2363 verbose(env, "R%d leaks addr into helper function\n",
2364 regno);
2365 return -EACCES;
2366 }
2367 return 0;
2368 }
2369
2370 if (type_is_pkt_pointer(type) &&
2371 !may_access_direct_pkt_data(env, meta, BPF_READ)) {
2372 verbose(env, "helper access to the packet is not allowed\n");
2373 return -EACCES;
2374 }
2375
2376 if (arg_type == ARG_PTR_TO_MAP_KEY ||
2377 arg_type == ARG_PTR_TO_MAP_VALUE ||
2378 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
2379 expected_type = PTR_TO_STACK;
2380 if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE &&
2381 type != expected_type)
2382 goto err_type;
2383 } else if (arg_type == ARG_CONST_SIZE ||
2384 arg_type == ARG_CONST_SIZE_OR_ZERO) {
2385 expected_type = SCALAR_VALUE;
2386 if (type != expected_type)
2387 goto err_type;
2388 } else if (arg_type == ARG_CONST_MAP_PTR) {
2389 expected_type = CONST_PTR_TO_MAP;
2390 if (type != expected_type)
2391 goto err_type;
2392 } else if (arg_type == ARG_PTR_TO_CTX) {
2393 expected_type = PTR_TO_CTX;
2394 if (type != expected_type)
2395 goto err_type;
2396 err = check_ctx_reg(env, reg, regno);
2397 if (err < 0)
2398 return err;
2399 } else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
2400 expected_type = PTR_TO_SOCK_COMMON;
2401 /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
2402 if (!type_is_sk_pointer(type))
2403 goto err_type;
2404 if (reg->ref_obj_id) {
2405 if (meta->ref_obj_id) {
2406 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
2407 regno, reg->ref_obj_id,
2408 meta->ref_obj_id);
2409 return -EFAULT;
2410 }
2411 meta->ref_obj_id = reg->ref_obj_id;
2412 }
2413 } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
2414 if (meta->func_id == BPF_FUNC_spin_lock) {
2415 if (process_spin_lock(env, regno, true))
2416 return -EACCES;
2417 } else if (meta->func_id == BPF_FUNC_spin_unlock) {
2418 if (process_spin_lock(env, regno, false))
2419 return -EACCES;
2420 } else {
2421 verbose(env, "verifier internal error\n");
2422 return -EFAULT;
2423 }
2424 } else if (arg_type_is_mem_ptr(arg_type)) {
2425 expected_type = PTR_TO_STACK;
2426 /* One exception here. In case function allows for NULL to be
2427 * passed in as argument, it's a SCALAR_VALUE type. Final test
2428 * happens during stack boundary checking.
2429 */
2430 if (register_is_null(reg) &&
2431 arg_type == ARG_PTR_TO_MEM_OR_NULL)
2432 /* final test in check_stack_boundary() */;
2433 else if (!type_is_pkt_pointer(type) &&
2434 type != PTR_TO_MAP_VALUE &&
2435 type != expected_type)
2436 goto err_type;
2437 meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
2438 } else {
2439 verbose(env, "unsupported arg_type %d\n", arg_type);
2440 return -EFAULT;
2441 }
2442
2443 if (arg_type == ARG_CONST_MAP_PTR) {
2444 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
2445 meta->map_ptr = reg->map_ptr;
2446 } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
2447 /* bpf_map_xxx(..., map_ptr, ..., key) call:
2448 * check that [key, key + map->key_size) are within
2449 * stack limits and initialized
2450 */
2451 if (!meta->map_ptr) {
2452 /* in function declaration map_ptr must come before
2453 * map_key, so that it's verified and known before
2454 * we have to check map_key here. Otherwise it means
2455 * that kernel subsystem misconfigured verifier
2456 */
2457 verbose(env, "invalid map_ptr to access map->key\n");
2458 return -EACCES;
2459 }
2460 err = check_helper_mem_access(env, regno,
2461 meta->map_ptr->key_size, false,
2462 NULL);
2463 } else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
2464 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
2465 /* bpf_map_xxx(..., map_ptr, ..., value) call:
2466 * check [value, value + map->value_size) validity
2467 */
2468 if (!meta->map_ptr) {
2469 /* kernel subsystem misconfigured verifier */
2470 verbose(env, "invalid map_ptr to access map->value\n");
2471 return -EACCES;
2472 }
2473 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
2474 err = check_helper_mem_access(env, regno,
2475 meta->map_ptr->value_size, false,
2476 meta);
2477 } else if (arg_type_is_mem_size(arg_type)) {
2478 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
2479
2480 /* remember the mem_size which may be used later
2481 * to refine return values.
2482 */
2483 meta->msize_smax_value = reg->smax_value;
2484 meta->msize_umax_value = reg->umax_value;
2485
2486 /* The register is SCALAR_VALUE; the access check
2487 * happens using its boundaries.
2488 */
2489 if (!tnum_is_const(reg->var_off))
2490 /* For unprivileged variable accesses, disable raw
2491 * mode so that the program is required to
2492 * initialize all the memory that the helper could
2493 * just partially fill up.
2494 */
2495 meta = NULL;
2496
2497 if (reg->smin_value < 0) {
2498 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
2499 regno);
2500 return -EACCES;
2501 }
2502
2503 if (reg->umin_value == 0) {
2504 err = check_helper_mem_access(env, regno - 1, 0,
2505 zero_size_allowed,
2506 meta);
2507 if (err)
2508 return err;
2509 }
2510
2511 if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
2512 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
2513 regno);
2514 return -EACCES;
2515 }
2516 err = check_helper_mem_access(env, regno - 1,
2517 reg->umax_value,
2518 zero_size_allowed, meta);
2519 }
2520
2521 return err;
2522err_type:
2523 verbose(env, "R%d type=%s expected=%s\n", regno,
2524 reg_type_str[type], reg_type_str[expected_type]);
2525 return -EACCES;
2526}
2527
2528static int check_map_func_compatibility(struct bpf_verifier_env *env,
2529 struct bpf_map *map, int func_id)
2530{
2531 if (!map)
2532 return 0;
2533
2534 /* We need a two way check, first is from map perspective ... */
2535 switch (map->map_type) {
2536 case BPF_MAP_TYPE_PROG_ARRAY:
2537 if (func_id != BPF_FUNC_tail_call)
2538 goto error;
2539 break;
2540 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
2541 if (func_id != BPF_FUNC_perf_event_read &&
2542 func_id != BPF_FUNC_perf_event_output &&
2543 func_id != BPF_FUNC_perf_event_read_value)
2544 goto error;
2545 break;
2546 case BPF_MAP_TYPE_STACK_TRACE:
2547 if (func_id != BPF_FUNC_get_stackid)
2548 goto error;
2549 break;
2550 case BPF_MAP_TYPE_CGROUP_ARRAY:
2551 if (func_id != BPF_FUNC_skb_under_cgroup &&
2552 func_id != BPF_FUNC_current_task_under_cgroup)
2553 goto error;
2554 break;
2555 case BPF_MAP_TYPE_CGROUP_STORAGE:
2556 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
2557 if (func_id != BPF_FUNC_get_local_storage)
2558 goto error;
2559 break;
2560 /* devmap returns a pointer to a live net_device ifindex that we cannot
2561 * allow to be modified from bpf side. So do not allow lookup elements
2562 * for now.
2563 */
2564 case BPF_MAP_TYPE_DEVMAP:
2565 if (func_id != BPF_FUNC_redirect_map)
2566 goto error;
2567 break;
2568 /* Restrict bpf side of cpumap and xskmap, open when use-cases
2569 * appear.
2570 */
2571 case BPF_MAP_TYPE_CPUMAP:
2572 case BPF_MAP_TYPE_XSKMAP:
2573 if (func_id != BPF_FUNC_redirect_map)
2574 goto error;
2575 break;
2576 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
2577 case BPF_MAP_TYPE_HASH_OF_MAPS:
2578 if (func_id != BPF_FUNC_map_lookup_elem)
2579 goto error;
2580 break;
2581 case BPF_MAP_TYPE_SOCKMAP:
2582 if (func_id != BPF_FUNC_sk_redirect_map &&
2583 func_id != BPF_FUNC_sock_map_update &&
2584 func_id != BPF_FUNC_map_delete_elem &&
2585 func_id != BPF_FUNC_msg_redirect_map)
2586 goto error;
2587 break;
2588 case BPF_MAP_TYPE_SOCKHASH:
2589 if (func_id != BPF_FUNC_sk_redirect_hash &&
2590 func_id != BPF_FUNC_sock_hash_update &&
2591 func_id != BPF_FUNC_map_delete_elem &&
2592 func_id != BPF_FUNC_msg_redirect_hash)
2593 goto error;
2594 break;
2595 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
2596 if (func_id != BPF_FUNC_sk_select_reuseport)
2597 goto error;
2598 break;
2599 case BPF_MAP_TYPE_QUEUE:
2600 case BPF_MAP_TYPE_STACK:
2601 if (func_id != BPF_FUNC_map_peek_elem &&
2602 func_id != BPF_FUNC_map_pop_elem &&
2603 func_id != BPF_FUNC_map_push_elem)
2604 goto error;
2605 break;
2606 default:
2607 break;
2608 }
2609
2610 /* ... and second from the function itself. */
2611 switch (func_id) {
2612 case BPF_FUNC_tail_call:
2613 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
2614 goto error;
2615 if (env->subprog_cnt > 1) {
2616 verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
2617 return -EINVAL;
2618 }
2619 break;
2620 case BPF_FUNC_perf_event_read:
2621 case BPF_FUNC_perf_event_output:
2622 case BPF_FUNC_perf_event_read_value:
2623 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
2624 goto error;
2625 break;
2626 case BPF_FUNC_get_stackid:
2627 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
2628 goto error;
2629 break;
2630 case BPF_FUNC_current_task_under_cgroup:
2631 case BPF_FUNC_skb_under_cgroup:
2632 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
2633 goto error;
2634 break;
2635 case BPF_FUNC_redirect_map:
2636 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
2637 map->map_type != BPF_MAP_TYPE_CPUMAP &&
2638 map->map_type != BPF_MAP_TYPE_XSKMAP)
2639 goto error;
2640 break;
2641 case BPF_FUNC_sk_redirect_map:
2642 case BPF_FUNC_msg_redirect_map:
2643 case BPF_FUNC_sock_map_update:
2644 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
2645 goto error;
2646 break;
2647 case BPF_FUNC_sk_redirect_hash:
2648 case BPF_FUNC_msg_redirect_hash:
2649 case BPF_FUNC_sock_hash_update:
2650 if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
2651 goto error;
2652 break;
2653 case BPF_FUNC_get_local_storage:
2654 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
2655 map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
2656 goto error;
2657 break;
2658 case BPF_FUNC_sk_select_reuseport:
2659 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
2660 goto error;
2661 break;
2662 case BPF_FUNC_map_peek_elem:
2663 case BPF_FUNC_map_pop_elem:
2664 case BPF_FUNC_map_push_elem:
2665 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
2666 map->map_type != BPF_MAP_TYPE_STACK)
2667 goto error;
2668 break;
2669 default:
2670 break;
2671 }
2672
2673 return 0;
2674error:
2675 verbose(env, "cannot pass map_type %d into func %s#%d\n",
2676 map->map_type, func_id_name(func_id), func_id);
2677 return -EINVAL;
2678}
2679
2680static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
2681{
2682 int count = 0;
2683
2684 if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
2685 count++;
2686 if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
2687 count++;
2688 if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
2689 count++;
2690 if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
2691 count++;
2692 if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
2693 count++;
2694
2695 /* We only support one arg being in raw mode at the moment,
2696 * which is sufficient for the helper functions we have
2697 * right now.
2698 */
2699 return count <= 1;
2700}
2701
2702static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
2703 enum bpf_arg_type arg_next)
2704{
2705 return (arg_type_is_mem_ptr(arg_curr) &&
2706 !arg_type_is_mem_size(arg_next)) ||
2707 (!arg_type_is_mem_ptr(arg_curr) &&
2708 arg_type_is_mem_size(arg_next));
2709}
2710
2711static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
2712{
2713 /* bpf_xxx(..., buf, len) call will access 'len'
2714 * bytes from memory 'buf'. Both arg types need
2715 * to be paired, so make sure there's no buggy
2716 * helper function specification.
2717 */
2718 if (arg_type_is_mem_size(fn->arg1_type) ||
2719 arg_type_is_mem_ptr(fn->arg5_type) ||
2720 check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
2721 check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
2722 check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
2723 check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
2724 return false;
2725
2726 return true;
2727}
2728
2729static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
2730{
2731 int count = 0;
2732
2733 if (arg_type_may_be_refcounted(fn->arg1_type))
2734 count++;
2735 if (arg_type_may_be_refcounted(fn->arg2_type))
2736 count++;
2737 if (arg_type_may_be_refcounted(fn->arg3_type))
2738 count++;
2739 if (arg_type_may_be_refcounted(fn->arg4_type))
2740 count++;
2741 if (arg_type_may_be_refcounted(fn->arg5_type))
2742 count++;
2743
2744 /* A reference acquiring function cannot acquire
2745 * another refcounted ptr.
2746 */
2747 if (is_acquire_function(func_id) && count)
2748 return false;
2749
2750 /* We only support one arg being unreferenced at the moment,
2751 * which is sufficient for the helper functions we have right now.
2752 */
2753 return count <= 1;
2754}
2755
2756static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
2757{
2758 return check_raw_mode_ok(fn) &&
2759 check_arg_pair_ok(fn) &&
2760 check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
2761}
2762
2763/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
2764 * are now invalid, so turn them into unknown SCALAR_VALUE.
2765 */
2766static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
2767 struct bpf_func_state *state)
2768{
2769 struct bpf_reg_state *regs = state->regs, *reg;
2770 int i;
2771
2772 for (i = 0; i < MAX_BPF_REG; i++)
2773 if (reg_is_pkt_pointer_any(&regs[i]))
2774 mark_reg_unknown(env, regs, i);
2775
2776 bpf_for_each_spilled_reg(i, state, reg) {
2777 if (!reg)
2778 continue;
2779 if (reg_is_pkt_pointer_any(reg))
2780 __mark_reg_unknown(reg);
2781 }
2782}
2783
2784static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
2785{
2786 struct bpf_verifier_state *vstate = env->cur_state;
2787 int i;
2788
2789 for (i = 0; i <= vstate->curframe; i++)
2790 __clear_all_pkt_pointers(env, vstate->frame[i]);
2791}
2792
2793static void release_reg_references(struct bpf_verifier_env *env,
2794 struct bpf_func_state *state,
2795 int ref_obj_id)
2796{
2797 struct bpf_reg_state *regs = state->regs, *reg;
2798 int i;
2799
2800 for (i = 0; i < MAX_BPF_REG; i++)
2801 if (regs[i].ref_obj_id == ref_obj_id)
2802 mark_reg_unknown(env, regs, i);
2803
2804 bpf_for_each_spilled_reg(i, state, reg) {
2805 if (!reg)
2806 continue;
2807 if (reg->ref_obj_id == ref_obj_id)
2808 __mark_reg_unknown(reg);
2809 }
2810}
2811
2812/* The pointer with the specified id has released its reference to kernel
2813 * resources. Identify all copies of the same pointer and clear the reference.
2814 */
2815static int release_reference(struct bpf_verifier_env *env,
2816 int ref_obj_id)
2817{
2818 struct bpf_verifier_state *vstate = env->cur_state;
2819 int err;
2820 int i;
2821
2822 err = release_reference_state(cur_func(env), ref_obj_id);
2823 if (err)
2824 return err;
2825
2826 for (i = 0; i <= vstate->curframe; i++)
2827 release_reg_references(env, vstate->frame[i], ref_obj_id);
2828
2829 return 0;
2830}
2831
2832static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
2833 int *insn_idx)
2834{
2835 struct bpf_verifier_state *state = env->cur_state;
2836 struct bpf_func_state *caller, *callee;
2837 int i, err, subprog, target_insn;
2838
2839 if (state->curframe + 1 >= MAX_CALL_FRAMES) {
2840 verbose(env, "the call stack of %d frames is too deep\n",
2841 state->curframe + 2);
2842 return -E2BIG;
2843 }
2844
2845 target_insn = *insn_idx + insn->imm;
2846 subprog = find_subprog(env, target_insn + 1);
2847 if (subprog < 0) {
2848 verbose(env, "verifier bug. No program starts at insn %d\n",
2849 target_insn + 1);
2850 return -EFAULT;
2851 }
2852
2853 caller = state->frame[state->curframe];
2854 if (state->frame[state->curframe + 1]) {
2855 verbose(env, "verifier bug. Frame %d already allocated\n",
2856 state->curframe + 1);
2857 return -EFAULT;
2858 }
2859
2860 callee = kzalloc(sizeof(*callee), GFP_KERNEL);
2861 if (!callee)
2862 return -ENOMEM;
2863 state->frame[state->curframe + 1] = callee;
2864
2865 /* callee cannot access r0, r6 - r9 for reading and has to write
2866 * into its own stack before reading from it.
2867 * callee can read/write into caller's stack
2868 */
2869 init_func_state(env, callee,
2870 /* remember the callsite, it will be used by bpf_exit */
2871 *insn_idx /* callsite */,
2872 state->curframe + 1 /* frameno within this callchain */,
2873 subprog /* subprog number within this prog */);
2874
2875 /* Transfer references to the callee */
2876 err = transfer_reference_state(callee, caller);
2877 if (err)
2878 return err;
2879
2880 /* copy r1 - r5 args that callee can access. The copy includes parent
2881 * pointers, which connects us up to the liveness chain
2882 */
2883 for (i = BPF_REG_1; i <= BPF_REG_5; i++)
2884 callee->regs[i] = caller->regs[i];
2885
2886 /* after the call registers r0 - r5 were scratched */
2887 for (i = 0; i < CALLER_SAVED_REGS; i++) {
2888 mark_reg_not_init(env, caller->regs, caller_saved[i]);
2889 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
2890 }
2891
2892 /* only increment it after check_reg_arg() finished */
2893 state->curframe++;
2894
2895 /* and go analyze first insn of the callee */
2896 *insn_idx = target_insn;
2897
2898 if (env->log.level) {
2899 verbose(env, "caller:\n");
2900 print_verifier_state(env, caller);
2901 verbose(env, "callee:\n");
2902 print_verifier_state(env, callee);
2903 }
2904 return 0;
2905}
2906
2907static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
2908{
2909 struct bpf_verifier_state *state = env->cur_state;
2910 struct bpf_func_state *caller, *callee;
2911 struct bpf_reg_state *r0;
2912 int err;
2913
2914 callee = state->frame[state->curframe];
2915 r0 = &callee->regs[BPF_REG_0];
2916 if (r0->type == PTR_TO_STACK) {
2917 /* technically it's ok to return caller's stack pointer
2918 * (or caller's caller's pointer) back to the caller,
2919 * since these pointers are valid. Only current stack
2920 * pointer will be invalid as soon as function exits,
2921 * but let's be conservative
2922 */
2923 verbose(env, "cannot return stack pointer to the caller\n");
2924 return -EINVAL;
2925 }
2926
2927 state->curframe--;
2928 caller = state->frame[state->curframe];
2929 /* return to the caller whatever r0 had in the callee */
2930 caller->regs[BPF_REG_0] = *r0;
2931
2932 /* Transfer references to the caller */
2933 err = transfer_reference_state(caller, callee);
2934 if (err)
2935 return err;
2936
2937 *insn_idx = callee->callsite + 1;
2938 if (env->log.level) {
2939 verbose(env, "returning from callee:\n");
2940 print_verifier_state(env, callee);
2941 verbose(env, "to caller at %d:\n", *insn_idx);
2942 print_verifier_state(env, caller);
2943 }
2944 /* clear everything in the callee */
2945 free_func_state(callee);
2946 state->frame[state->curframe + 1] = NULL;
2947 return 0;
2948}
2949
2950static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
2951 int func_id,
2952 struct bpf_call_arg_meta *meta)
2953{
2954 struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
2955
2956 if (ret_type != RET_INTEGER ||
2957 (func_id != BPF_FUNC_get_stack &&
2958 func_id != BPF_FUNC_probe_read_str))
2959 return;
2960
2961 ret_reg->smax_value = meta->msize_smax_value;
2962 ret_reg->umax_value = meta->msize_umax_value;
2963 __reg_deduce_bounds(ret_reg);
2964 __reg_bound_offset(ret_reg);
2965}
2966
2967static int
2968record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
2969 int func_id, int insn_idx)
2970{
2971 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
2972
2973 if (func_id != BPF_FUNC_tail_call &&
2974 func_id != BPF_FUNC_map_lookup_elem &&
2975 func_id != BPF_FUNC_map_update_elem &&
2976 func_id != BPF_FUNC_map_delete_elem &&
2977 func_id != BPF_FUNC_map_push_elem &&
2978 func_id != BPF_FUNC_map_pop_elem &&
2979 func_id != BPF_FUNC_map_peek_elem)
2980 return 0;
2981
2982 if (meta->map_ptr == NULL) {
2983 verbose(env, "kernel subsystem misconfigured verifier\n");
2984 return -EINVAL;
2985 }
2986
2987 if (!BPF_MAP_PTR(aux->map_state))
2988 bpf_map_ptr_store(aux, meta->map_ptr,
2989 meta->map_ptr->unpriv_array);
2990 else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr)
2991 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
2992 meta->map_ptr->unpriv_array);
2993 return 0;
2994}
2995
2996static int check_reference_leak(struct bpf_verifier_env *env)
2997{
2998 struct bpf_func_state *state = cur_func(env);
2999 int i;
3000
3001 for (i = 0; i < state->acquired_refs; i++) {
3002 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
3003 state->refs[i].id, state->refs[i].insn_idx);
3004 }
3005 return state->acquired_refs ? -EINVAL : 0;
3006}
3007
3008static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
3009{
3010 const struct bpf_func_proto *fn = NULL;
3011 struct bpf_reg_state *regs;
3012 struct bpf_call_arg_meta meta;
3013 bool changes_data;
3014 int i, err;
3015
3016 /* find function prototype */
3017 if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
3018 verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
3019 func_id);
3020 return -EINVAL;
3021 }
3022
3023 if (env->ops->get_func_proto)
3024 fn = env->ops->get_func_proto(func_id, env->prog);
3025 if (!fn) {
3026 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
3027 func_id);
3028 return -EINVAL;
3029 }
3030
3031 /* eBPF programs must be GPL compatible to use GPL-ed functions */
3032 if (!env->prog->gpl_compatible && fn->gpl_only) {
3033 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
3034 return -EINVAL;
3035 }
3036
3037 /* With LD_ABS/IND some JITs save/restore skb from r1. */
3038 changes_data = bpf_helper_changes_pkt_data(fn->func);
3039 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
3040 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
3041 func_id_name(func_id), func_id);
3042 return -EINVAL;
3043 }
3044
3045 memset(&meta, 0, sizeof(meta));
3046 meta.pkt_access = fn->pkt_access;
3047
3048 err = check_func_proto(fn, func_id);
3049 if (err) {
3050 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
3051 func_id_name(func_id), func_id);
3052 return err;
3053 }
3054
3055 meta.func_id = func_id;
3056 /* check args */
3057 err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
3058 if (err)
3059 return err;
3060 err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
3061 if (err)
3062 return err;
3063 err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
3064 if (err)
3065 return err;
3066 err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
3067 if (err)
3068 return err;
3069 err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
3070 if (err)
3071 return err;
3072
3073 err = record_func_map(env, &meta, func_id, insn_idx);
3074 if (err)
3075 return err;
3076
3077 /* Mark slots with STACK_MISC in case of raw mode, stack offset
3078 * is inferred from register state.
3079 */
3080 for (i = 0; i < meta.access_size; i++) {
3081 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
3082 BPF_WRITE, -1, false);
3083 if (err)
3084 return err;
3085 }
3086
3087 if (func_id == BPF_FUNC_tail_call) {
3088 err = check_reference_leak(env);
3089 if (err) {
3090 verbose(env, "tail_call would lead to reference leak\n");
3091 return err;
3092 }
3093 } else if (is_release_function(func_id)) {
3094 err = release_reference(env, meta.ref_obj_id);
3095 if (err) {
3096 verbose(env, "func %s#%d reference has not been acquired before\n",
3097 func_id_name(func_id), func_id);
3098 return err;
3099 }
3100 }
3101
3102 regs = cur_regs(env);
3103
3104 /* check that flags argument in get_local_storage(map, flags) is 0,
3105 * this is required because get_local_storage() can't return an error.
3106 */
3107 if (func_id == BPF_FUNC_get_local_storage &&
3108 !register_is_null(&regs[BPF_REG_2])) {
3109 verbose(env, "get_local_storage() doesn't support non-zero flags\n");
3110 return -EINVAL;
3111 }
3112
3113 /* reset caller saved regs */
3114 for (i = 0; i < CALLER_SAVED_REGS; i++) {
3115 mark_reg_not_init(env, regs, caller_saved[i]);
3116 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
3117 }
3118
3119 /* update return register (already marked as written above) */
3120 if (fn->ret_type == RET_INTEGER) {
3121 /* sets type to SCALAR_VALUE */
3122 mark_reg_unknown(env, regs, BPF_REG_0);
3123 } else if (fn->ret_type == RET_VOID) {
3124 regs[BPF_REG_0].type = NOT_INIT;
3125 } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
3126 fn->ret_type == RET_PTR_TO_MAP_VALUE) {
3127 /* There is no offset yet applied, variable or fixed */
3128 mark_reg_known_zero(env, regs, BPF_REG_0);
3129 /* remember map_ptr, so that check_map_access()
3130 * can check 'value_size' boundary of memory access
3131 * to map element returned from bpf_map_lookup_elem()
3132 */
3133 if (meta.map_ptr == NULL) {
3134 verbose(env,
3135 "kernel subsystem misconfigured verifier\n");
3136 return -EINVAL;
3137 }
3138 regs[BPF_REG_0].map_ptr = meta.map_ptr;
3139 if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
3140 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
3141 if (map_value_has_spin_lock(meta.map_ptr))
3142 regs[BPF_REG_0].id = ++env->id_gen;
3143 } else {
3144 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
3145 regs[BPF_REG_0].id = ++env->id_gen;
3146 }
3147 } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
3148 mark_reg_known_zero(env, regs, BPF_REG_0);
3149 regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
3150 if (is_acquire_function(func_id)) {
3151 int id = acquire_reference_state(env, insn_idx);
3152
3153 if (id < 0)
3154 return id;
3155 /* For mark_ptr_or_null_reg() */
3156 regs[BPF_REG_0].id = id;
3157 /* For release_reference() */
3158 regs[BPF_REG_0].ref_obj_id = id;
3159 } else {
3160 /* For mark_ptr_or_null_reg() */
3161 regs[BPF_REG_0].id = ++env->id_gen;
3162 }
3163 } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
3164 mark_reg_known_zero(env, regs, BPF_REG_0);
3165 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
3166 regs[BPF_REG_0].id = ++env->id_gen;
3167 } else {
3168 verbose(env, "unknown return type %d of func %s#%d\n",
3169 fn->ret_type, func_id_name(func_id), func_id);
3170 return -EINVAL;
3171 }
3172
3173 if (is_ptr_cast_function(func_id))
3174 /* For release_reference() */
3175 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
3176
3177 do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
3178
3179 err = check_map_func_compatibility(env, meta.map_ptr, func_id);
3180 if (err)
3181 return err;
3182
3183 if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
3184 const char *err_str;
3185
3186#ifdef CONFIG_PERF_EVENTS
3187 err = get_callchain_buffers(sysctl_perf_event_max_stack);
3188 err_str = "cannot get callchain buffer for func %s#%d\n";
3189#else
3190 err = -ENOTSUPP;
3191 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
3192#endif
3193 if (err) {
3194 verbose(env, err_str, func_id_name(func_id), func_id);
3195 return err;
3196 }
3197
3198 env->prog->has_callchain_buf = true;
3199 }
3200
3201 if (changes_data)
3202 clear_all_pkt_pointers(env);
3203 return 0;
3204}
3205
3206static bool signed_add_overflows(s64 a, s64 b)
3207{
3208 /* Do the add in u64, where overflow is well-defined */
3209 s64 res = (s64)((u64)a + (u64)b);
3210
3211 if (b < 0)
3212 return res > a;
3213 return res < a;
3214}
3215
3216static bool signed_sub_overflows(s64 a, s64 b)
3217{
3218 /* Do the sub in u64, where overflow is well-defined */
3219 s64 res = (s64)((u64)a - (u64)b);
3220
3221 if (b < 0)
3222 return res < a;
3223 return res > a;
3224}
3225
3226static bool check_reg_sane_offset(struct bpf_verifier_env *env,
3227 const struct bpf_reg_state *reg,
3228 enum bpf_reg_type type)
3229{
3230 bool known = tnum_is_const(reg->var_off);
3231 s64 val = reg->var_off.value;
3232 s64 smin = reg->smin_value;
3233
3234 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
3235 verbose(env, "math between %s pointer and %lld is not allowed\n",
3236 reg_type_str[type], val);
3237 return false;
3238 }
3239
3240 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
3241 verbose(env, "%s pointer offset %d is not allowed\n",
3242 reg_type_str[type], reg->off);
3243 return false;
3244 }
3245
3246 if (smin == S64_MIN) {
3247 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
3248 reg_type_str[type]);
3249 return false;
3250 }
3251
3252 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
3253 verbose(env, "value %lld makes %s pointer be out of bounds\n",
3254 smin, reg_type_str[type]);
3255 return false;
3256 }
3257
3258 return true;
3259}
3260
3261static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
3262{
3263 return &env->insn_aux_data[env->insn_idx];
3264}
3265
3266static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
3267 u32 *ptr_limit, u8 opcode, bool off_is_neg)
3268{
3269 bool mask_to_left = (opcode == BPF_ADD && off_is_neg) ||
3270 (opcode == BPF_SUB && !off_is_neg);
3271 u32 off;
3272
3273 switch (ptr_reg->type) {
3274 case PTR_TO_STACK:
3275 off = ptr_reg->off + ptr_reg->var_off.value;
3276 if (mask_to_left)
3277 *ptr_limit = MAX_BPF_STACK + off;
3278 else
3279 *ptr_limit = -off;
3280 return 0;
3281 case PTR_TO_MAP_VALUE:
3282 if (mask_to_left) {
3283 *ptr_limit = ptr_reg->umax_value + ptr_reg->off;
3284 } else {
3285 off = ptr_reg->smin_value + ptr_reg->off;
3286 *ptr_limit = ptr_reg->map_ptr->value_size - off;
3287 }
3288 return 0;
3289 default:
3290 return -EINVAL;
3291 }
3292}
3293
3294static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
3295 const struct bpf_insn *insn)
3296{
3297 return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K;
3298}
3299
3300static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
3301 u32 alu_state, u32 alu_limit)
3302{
3303 /* If we arrived here from different branches with different
3304 * state or limits to sanitize, then this won't work.
3305 */
3306 if (aux->alu_state &&
3307 (aux->alu_state != alu_state ||
3308 aux->alu_limit != alu_limit))
3309 return -EACCES;
3310
3311 /* Corresponding fixup done in fixup_bpf_calls(). */
3312 aux->alu_state = alu_state;
3313 aux->alu_limit = alu_limit;
3314 return 0;
3315}
3316
3317static int sanitize_val_alu(struct bpf_verifier_env *env,
3318 struct bpf_insn *insn)
3319{
3320 struct bpf_insn_aux_data *aux = cur_aux(env);
3321
3322 if (can_skip_alu_sanitation(env, insn))
3323 return 0;
3324
3325 return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
3326}
3327
3328static int sanitize_ptr_alu(struct bpf_verifier_env *env,
3329 struct bpf_insn *insn,
3330 const struct bpf_reg_state *ptr_reg,
3331 struct bpf_reg_state *dst_reg,
3332 bool off_is_neg)
3333{
3334 struct bpf_verifier_state *vstate = env->cur_state;
3335 struct bpf_insn_aux_data *aux = cur_aux(env);
3336 bool ptr_is_dst_reg = ptr_reg == dst_reg;
3337 u8 opcode = BPF_OP(insn->code);
3338 u32 alu_state, alu_limit;
3339 struct bpf_reg_state tmp;
3340 bool ret;
3341
3342 if (can_skip_alu_sanitation(env, insn))
3343 return 0;
3344
3345 /* We already marked aux for masking from non-speculative
3346 * paths, thus we got here in the first place. We only care
3347 * to explore bad access from here.
3348 */
3349 if (vstate->speculative)
3350 goto do_sim;
3351
3352 alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
3353 alu_state |= ptr_is_dst_reg ?
3354 BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
3355
3356 if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg))
3357 return 0;
3358 if (update_alu_sanitation_state(aux, alu_state, alu_limit))
3359 return -EACCES;
3360do_sim:
3361 /* Simulate and find potential out-of-bounds access under
3362 * speculative execution from truncation as a result of
3363 * masking when off was not within expected range. If off
3364 * sits in dst, then we temporarily need to move ptr there
3365 * to simulate dst (== 0) +/-= ptr. Needed, for example,
3366 * for cases where we use K-based arithmetic in one direction
3367 * and truncated reg-based in the other in order to explore
3368 * bad access.
3369 */
3370 if (!ptr_is_dst_reg) {
3371 tmp = *dst_reg;
3372 *dst_reg = *ptr_reg;
3373 }
3374 ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
3375 if (!ptr_is_dst_reg)
3376 *dst_reg = tmp;
3377 return !ret ? -EFAULT : 0;
3378}
3379
3380/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
3381 * Caller should also handle BPF_MOV case separately.
3382 * If we return -EACCES, caller may want to try again treating pointer as a
3383 * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks.
3384 */
3385static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
3386 struct bpf_insn *insn,
3387 const struct bpf_reg_state *ptr_reg,
3388 const struct bpf_reg_state *off_reg)
3389{
3390 struct bpf_verifier_state *vstate = env->cur_state;
3391 struct bpf_func_state *state = vstate->frame[vstate->curframe];
3392 struct bpf_reg_state *regs = state->regs, *dst_reg;
3393 bool known = tnum_is_const(off_reg->var_off);
3394 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
3395 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
3396 u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
3397 umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
3398 u32 dst = insn->dst_reg, src = insn->src_reg;
3399 u8 opcode = BPF_OP(insn->code);
3400 int ret;
3401
3402 dst_reg = &regs[dst];
3403
3404 if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
3405 smin_val > smax_val || umin_val > umax_val) {
3406 /* Taint dst register if offset had invalid bounds derived from
3407 * e.g. dead branches.
3408 */
3409 __mark_reg_unknown(dst_reg);
3410 return 0;
3411 }
3412
3413 if (BPF_CLASS(insn->code) != BPF_ALU64) {
3414 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
3415 verbose(env,
3416 "R%d 32-bit pointer arithmetic prohibited\n",
3417 dst);
3418 return -EACCES;
3419 }
3420
3421 switch (ptr_reg->type) {
3422 case PTR_TO_MAP_VALUE_OR_NULL:
3423 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
3424 dst, reg_type_str[ptr_reg->type]);
3425 return -EACCES;
3426 case CONST_PTR_TO_MAP:
3427 case PTR_TO_PACKET_END:
3428 case PTR_TO_SOCKET:
3429 case PTR_TO_SOCKET_OR_NULL:
3430 case PTR_TO_SOCK_COMMON:
3431 case PTR_TO_SOCK_COMMON_OR_NULL:
3432 case PTR_TO_TCP_SOCK:
3433 case PTR_TO_TCP_SOCK_OR_NULL:
3434 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
3435 dst, reg_type_str[ptr_reg->type]);
3436 return -EACCES;
3437 case PTR_TO_MAP_VALUE:
3438 if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
3439 verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
3440 off_reg == dst_reg ? dst : src);
3441 return -EACCES;
3442 }
3443 /* fall-through */
3444 default:
3445 break;
3446 }
3447
3448 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
3449 * The id may be overwritten later if we create a new variable offset.
3450 */
3451 dst_reg->type = ptr_reg->type;
3452 dst_reg->id = ptr_reg->id;
3453
3454 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
3455 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
3456 return -EINVAL;
3457
3458 switch (opcode) {
3459 case BPF_ADD:
3460 ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
3461 if (ret < 0) {
3462 verbose(env, "R%d tried to add from different maps or paths\n", dst);
3463 return ret;
3464 }
3465 /* We can take a fixed offset as long as it doesn't overflow
3466 * the s32 'off' field
3467 */
3468 if (known && (ptr_reg->off + smin_val ==
3469 (s64)(s32)(ptr_reg->off + smin_val))) {
3470 /* pointer += K. Accumulate it into fixed offset */
3471 dst_reg->smin_value = smin_ptr;
3472 dst_reg->smax_value = smax_ptr;
3473 dst_reg->umin_value = umin_ptr;
3474 dst_reg->umax_value = umax_ptr;
3475 dst_reg->var_off = ptr_reg->var_off;
3476 dst_reg->off = ptr_reg->off + smin_val;
3477 dst_reg->raw = ptr_reg->raw;
3478 break;
3479 }
3480 /* A new variable offset is created. Note that off_reg->off
3481 * == 0, since it's a scalar.
3482 * dst_reg gets the pointer type and since some positive
3483 * integer value was added to the pointer, give it a new 'id'
3484 * if it's a PTR_TO_PACKET.
3485 * this creates a new 'base' pointer, off_reg (variable) gets
3486 * added into the variable offset, and we copy the fixed offset
3487 * from ptr_reg.
3488 */
3489 if (signed_add_overflows(smin_ptr, smin_val) ||
3490 signed_add_overflows(smax_ptr, smax_val)) {
3491 dst_reg->smin_value = S64_MIN;
3492 dst_reg->smax_value = S64_MAX;
3493 } else {
3494 dst_reg->smin_value = smin_ptr + smin_val;
3495 dst_reg->smax_value = smax_ptr + smax_val;
3496 }
3497 if (umin_ptr + umin_val < umin_ptr ||
3498 umax_ptr + umax_val < umax_ptr) {
3499 dst_reg->umin_value = 0;
3500 dst_reg->umax_value = U64_MAX;
3501 } else {
3502 dst_reg->umin_value = umin_ptr + umin_val;
3503 dst_reg->umax_value = umax_ptr + umax_val;
3504 }
3505 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
3506 dst_reg->off = ptr_reg->off;
3507 dst_reg->raw = ptr_reg->raw;
3508 if (reg_is_pkt_pointer(ptr_reg)) {
3509 dst_reg->id = ++env->id_gen;
3510 /* something was added to pkt_ptr, set range to zero */
3511 dst_reg->raw = 0;
3512 }
3513 break;
3514 case BPF_SUB:
3515 ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
3516 if (ret < 0) {
3517 verbose(env, "R%d tried to sub from different maps or paths\n", dst);
3518 return ret;
3519 }
3520 if (dst_reg == off_reg) {
3521 /* scalar -= pointer. Creates an unknown scalar */
3522 verbose(env, "R%d tried to subtract pointer from scalar\n",
3523 dst);
3524 return -EACCES;
3525 }
3526 /* We don't allow subtraction from FP, because (according to
3527 * test_verifier.c test "invalid fp arithmetic", JITs might not
3528 * be able to deal with it.
3529 */
3530 if (ptr_reg->type == PTR_TO_STACK) {
3531 verbose(env, "R%d subtraction from stack pointer prohibited\n",
3532 dst);
3533 return -EACCES;
3534 }
3535 if (known && (ptr_reg->off - smin_val ==
3536 (s64)(s32)(ptr_reg->off - smin_val))) {
3537 /* pointer -= K. Subtract it from fixed offset */
3538 dst_reg->smin_value = smin_ptr;
3539 dst_reg->smax_value = smax_ptr;
3540 dst_reg->umin_value = umin_ptr;
3541 dst_reg->umax_value = umax_ptr;
3542 dst_reg->var_off = ptr_reg->var_off;
3543 dst_reg->id = ptr_reg->id;
3544 dst_reg->off = ptr_reg->off - smin_val;
3545 dst_reg->raw = ptr_reg->raw;
3546 break;
3547 }
3548 /* A new variable offset is created. If the subtrahend is known
3549 * nonnegative, then any reg->range we had before is still good.
3550 */
3551 if (signed_sub_overflows(smin_ptr, smax_val) ||
3552 signed_sub_overflows(smax_ptr, smin_val)) {
3553 /* Overflow possible, we know nothing */
3554 dst_reg->smin_value = S64_MIN;
3555 dst_reg->smax_value = S64_MAX;
3556 } else {
3557 dst_reg->smin_value = smin_ptr - smax_val;
3558 dst_reg->smax_value = smax_ptr - smin_val;
3559 }
3560 if (umin_ptr < umax_val) {
3561 /* Overflow possible, we know nothing */
3562 dst_reg->umin_value = 0;
3563 dst_reg->umax_value = U64_MAX;
3564 } else {
3565 /* Cannot overflow (as long as bounds are consistent) */
3566 dst_reg->umin_value = umin_ptr - umax_val;
3567 dst_reg->umax_value = umax_ptr - umin_val;
3568 }
3569 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
3570 dst_reg->off = ptr_reg->off;
3571 dst_reg->raw = ptr_reg->raw;
3572 if (reg_is_pkt_pointer(ptr_reg)) {
3573 dst_reg->id = ++env->id_gen;
3574 /* something was added to pkt_ptr, set range to zero */
3575 if (smin_val < 0)
3576 dst_reg->raw = 0;
3577 }
3578 break;
3579 case BPF_AND:
3580 case BPF_OR:
3581 case BPF_XOR:
3582 /* bitwise ops on pointers are troublesome, prohibit. */
3583 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
3584 dst, bpf_alu_string[opcode >> 4]);
3585 return -EACCES;
3586 default:
3587 /* other operators (e.g. MUL,LSH) produce non-pointer results */
3588 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
3589 dst, bpf_alu_string[opcode >> 4]);
3590 return -EACCES;
3591 }
3592
3593 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
3594 return -EINVAL;
3595
3596 __update_reg_bounds(dst_reg);
3597 __reg_deduce_bounds(dst_reg);
3598 __reg_bound_offset(dst_reg);
3599
3600 /* For unprivileged we require that resulting offset must be in bounds
3601 * in order to be able to sanitize access later on.
3602 */
3603 if (!env->allow_ptr_leaks) {
3604 if (dst_reg->type == PTR_TO_MAP_VALUE &&
3605 check_map_access(env, dst, dst_reg->off, 1, false)) {
3606 verbose(env, "R%d pointer arithmetic of map value goes out of range, "
3607 "prohibited for !root\n", dst);
3608 return -EACCES;
3609 } else if (dst_reg->type == PTR_TO_STACK &&
3610 check_stack_access(env, dst_reg, dst_reg->off +
3611 dst_reg->var_off.value, 1)) {
3612 verbose(env, "R%d stack pointer arithmetic goes out of range, "
3613 "prohibited for !root\n", dst);
3614 return -EACCES;
3615 }
3616 }
3617
3618 return 0;
3619}
3620
3621/* WARNING: This function does calculations on 64-bit values, but the actual
3622 * execution may occur on 32-bit values. Therefore, things like bitshifts
3623 * need extra checks in the 32-bit case.
3624 */
3625static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
3626 struct bpf_insn *insn,
3627 struct bpf_reg_state *dst_reg,
3628 struct bpf_reg_state src_reg)
3629{
3630 struct bpf_reg_state *regs = cur_regs(env);
3631 u8 opcode = BPF_OP(insn->code);
3632 bool src_known, dst_known;
3633 s64 smin_val, smax_val;
3634 u64 umin_val, umax_val;
3635 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
3636 u32 dst = insn->dst_reg;
3637 int ret;
3638
3639 if (insn_bitness == 32) {
3640 /* Relevant for 32-bit RSH: Information can propagate towards
3641 * LSB, so it isn't sufficient to only truncate the output to
3642 * 32 bits.
3643 */
3644 coerce_reg_to_size(dst_reg, 4);
3645 coerce_reg_to_size(&src_reg, 4);
3646 }
3647
3648 smin_val = src_reg.smin_value;
3649 smax_val = src_reg.smax_value;
3650 umin_val = src_reg.umin_value;
3651 umax_val = src_reg.umax_value;
3652 src_known = tnum_is_const(src_reg.var_off);
3653 dst_known = tnum_is_const(dst_reg->var_off);
3654
3655 if ((src_known && (smin_val != smax_val || umin_val != umax_val)) ||
3656 smin_val > smax_val || umin_val > umax_val) {
3657 /* Taint dst register if offset had invalid bounds derived from
3658 * e.g. dead branches.
3659 */
3660 __mark_reg_unknown(dst_reg);
3661 return 0;
3662 }
3663
3664 if (!src_known &&
3665 opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
3666 __mark_reg_unknown(dst_reg);
3667 return 0;
3668 }
3669
3670 switch (opcode) {
3671 case BPF_ADD:
3672 ret = sanitize_val_alu(env, insn);
3673 if (ret < 0) {
3674 verbose(env, "R%d tried to add from different pointers or scalars\n", dst);
3675 return ret;
3676 }
3677 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
3678 signed_add_overflows(dst_reg->smax_value, smax_val)) {
3679 dst_reg->smin_value = S64_MIN;
3680 dst_reg->smax_value = S64_MAX;
3681 } else {
3682 dst_reg->smin_value += smin_val;
3683 dst_reg->smax_value += smax_val;
3684 }
3685 if (dst_reg->umin_value + umin_val < umin_val ||
3686 dst_reg->umax_value + umax_val < umax_val) {
3687 dst_reg->umin_value = 0;
3688 dst_reg->umax_value = U64_MAX;
3689 } else {
3690 dst_reg->umin_value += umin_val;
3691 dst_reg->umax_value += umax_val;
3692 }
3693 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
3694 break;
3695 case BPF_SUB:
3696 ret = sanitize_val_alu(env, insn);
3697 if (ret < 0) {
3698 verbose(env, "R%d tried to sub from different pointers or scalars\n", dst);
3699 return ret;
3700 }
3701 if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
3702 signed_sub_overflows(dst_reg->smax_value, smin_val)) {
3703 /* Overflow possible, we know nothing */
3704 dst_reg->smin_value = S64_MIN;
3705 dst_reg->smax_value = S64_MAX;
3706 } else {
3707 dst_reg->smin_value -= smax_val;
3708 dst_reg->smax_value -= smin_val;
3709 }
3710 if (dst_reg->umin_value < umax_val) {
3711 /* Overflow possible, we know nothing */
3712 dst_reg->umin_value = 0;
3713 dst_reg->umax_value = U64_MAX;
3714 } else {
3715 /* Cannot overflow (as long as bounds are consistent) */
3716 dst_reg->umin_value -= umax_val;
3717 dst_reg->umax_value -= umin_val;
3718 }
3719 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
3720 break;
3721 case BPF_MUL:
3722 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
3723 if (smin_val < 0 || dst_reg->smin_value < 0) {
3724 /* Ain't nobody got time to multiply that sign */
3725 __mark_reg_unbounded(dst_reg);
3726 __update_reg_bounds(dst_reg);
3727 break;
3728 }
3729 /* Both values are positive, so we can work with unsigned and
3730 * copy the result to signed (unless it exceeds S64_MAX).
3731 */
3732 if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
3733 /* Potential overflow, we know nothing */
3734 __mark_reg_unbounded(dst_reg);
3735 /* (except what we can learn from the var_off) */
3736 __update_reg_bounds(dst_reg);
3737 break;
3738 }
3739 dst_reg->umin_value *= umin_val;
3740 dst_reg->umax_value *= umax_val;
3741 if (dst_reg->umax_value > S64_MAX) {
3742 /* Overflow possible, we know nothing */
3743 dst_reg->smin_value = S64_MIN;
3744 dst_reg->smax_value = S64_MAX;
3745 } else {
3746 dst_reg->smin_value = dst_reg->umin_value;
3747 dst_reg->smax_value = dst_reg->umax_value;
3748 }
3749 break;
3750 case BPF_AND:
3751 if (src_known && dst_known) {
3752 __mark_reg_known(dst_reg, dst_reg->var_off.value &
3753 src_reg.var_off.value);
3754 break;
3755 }
3756 /* We get our minimum from the var_off, since that's inherently
3757 * bitwise. Our maximum is the minimum of the operands' maxima.
3758 */
3759 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
3760 dst_reg->umin_value = dst_reg->var_off.value;
3761 dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
3762 if (dst_reg->smin_value < 0 || smin_val < 0) {
3763 /* Lose signed bounds when ANDing negative numbers,
3764 * ain't nobody got time for that.
3765 */
3766 dst_reg->smin_value = S64_MIN;
3767 dst_reg->smax_value = S64_MAX;
3768 } else {
3769 /* ANDing two positives gives a positive, so safe to
3770 * cast result into s64.
3771 */
3772 dst_reg->smin_value = dst_reg->umin_value;
3773 dst_reg->smax_value = dst_reg->umax_value;
3774 }
3775 /* We may learn something more from the var_off */
3776 __update_reg_bounds(dst_reg);
3777 break;
3778 case BPF_OR:
3779 if (src_known && dst_known) {
3780 __mark_reg_known(dst_reg, dst_reg->var_off.value |
3781 src_reg.var_off.value);
3782 break;
3783 }
3784 /* We get our maximum from the var_off, and our minimum is the
3785 * maximum of the operands' minima
3786 */
3787 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
3788 dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
3789 dst_reg->umax_value = dst_reg->var_off.value |
3790 dst_reg->var_off.mask;
3791 if (dst_reg->smin_value < 0 || smin_val < 0) {
3792 /* Lose signed bounds when ORing negative numbers,
3793 * ain't nobody got time for that.
3794 */
3795 dst_reg->smin_value = S64_MIN;
3796 dst_reg->smax_value = S64_MAX;
3797 } else {
3798 /* ORing two positives gives a positive, so safe to
3799 * cast result into s64.
3800 */
3801 dst_reg->smin_value = dst_reg->umin_value;
3802 dst_reg->smax_value = dst_reg->umax_value;
3803 }
3804 /* We may learn something more from the var_off */
3805 __update_reg_bounds(dst_reg);
3806 break;
3807 case BPF_LSH:
3808 if (umax_val >= insn_bitness) {
3809 /* Shifts greater than 31 or 63 are undefined.
3810 * This includes shifts by a negative number.
3811 */
3812 mark_reg_unknown(env, regs, insn->dst_reg);
3813 break;
3814 }
3815 /* We lose all sign bit information (except what we can pick
3816 * up from var_off)
3817 */
3818 dst_reg->smin_value = S64_MIN;
3819 dst_reg->smax_value = S64_MAX;
3820 /* If we might shift our top bit out, then we know nothing */
3821 if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
3822 dst_reg->umin_value = 0;
3823 dst_reg->umax_value = U64_MAX;
3824 } else {
3825 dst_reg->umin_value <<= umin_val;
3826 dst_reg->umax_value <<= umax_val;
3827 }
3828 dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
3829 /* We may learn something more from the var_off */
3830 __update_reg_bounds(dst_reg);
3831 break;
3832 case BPF_RSH:
3833 if (umax_val >= insn_bitness) {
3834 /* Shifts greater than 31 or 63 are undefined.
3835 * This includes shifts by a negative number.
3836 */
3837 mark_reg_unknown(env, regs, insn->dst_reg);
3838 break;
3839 }
3840 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
3841 * be negative, then either:
3842 * 1) src_reg might be zero, so the sign bit of the result is
3843 * unknown, so we lose our signed bounds
3844 * 2) it's known negative, thus the unsigned bounds capture the
3845 * signed bounds
3846 * 3) the signed bounds cross zero, so they tell us nothing
3847 * about the result
3848 * If the value in dst_reg is known nonnegative, then again the
3849 * unsigned bounts capture the signed bounds.
3850 * Thus, in all cases it suffices to blow away our signed bounds
3851 * and rely on inferring new ones from the unsigned bounds and
3852 * var_off of the result.
3853 */
3854 dst_reg->smin_value = S64_MIN;
3855 dst_reg->smax_value = S64_MAX;
3856 dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
3857 dst_reg->umin_value >>= umax_val;
3858 dst_reg->umax_value >>= umin_val;
3859 /* We may learn something more from the var_off */
3860 __update_reg_bounds(dst_reg);
3861 break;
3862 case BPF_ARSH:
3863 if (umax_val >= insn_bitness) {
3864 /* Shifts greater than 31 or 63 are undefined.
3865 * This includes shifts by a negative number.
3866 */
3867 mark_reg_unknown(env, regs, insn->dst_reg);
3868 break;
3869 }
3870
3871 /* Upon reaching here, src_known is true and
3872 * umax_val is equal to umin_val.
3873 */
3874 dst_reg->smin_value >>= umin_val;
3875 dst_reg->smax_value >>= umin_val;
3876 dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val);
3877
3878 /* blow away the dst_reg umin_value/umax_value and rely on
3879 * dst_reg var_off to refine the result.
3880 */
3881 dst_reg->umin_value = 0;
3882 dst_reg->umax_value = U64_MAX;
3883 __update_reg_bounds(dst_reg);
3884 break;
3885 default:
3886 mark_reg_unknown(env, regs, insn->dst_reg);
3887 break;
3888 }
3889
3890 if (BPF_CLASS(insn->code) != BPF_ALU64) {
3891 /* 32-bit ALU ops are (32,32)->32 */
3892 coerce_reg_to_size(dst_reg, 4);
3893 }
3894
3895 __reg_deduce_bounds(dst_reg);
3896 __reg_bound_offset(dst_reg);
3897 return 0;
3898}
3899
3900/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
3901 * and var_off.
3902 */
3903static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
3904 struct bpf_insn *insn)
3905{
3906 struct bpf_verifier_state *vstate = env->cur_state;
3907 struct bpf_func_state *state = vstate->frame[vstate->curframe];
3908 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
3909 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
3910 u8 opcode = BPF_OP(insn->code);
3911
3912 dst_reg = &regs[insn->dst_reg];
3913 src_reg = NULL;
3914 if (dst_reg->type != SCALAR_VALUE)
3915 ptr_reg = dst_reg;
3916 if (BPF_SRC(insn->code) == BPF_X) {
3917 src_reg = &regs[insn->src_reg];
3918 if (src_reg->type != SCALAR_VALUE) {
3919 if (dst_reg->type != SCALAR_VALUE) {
3920 /* Combining two pointers by any ALU op yields
3921 * an arbitrary scalar. Disallow all math except
3922 * pointer subtraction
3923 */
3924 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
3925 mark_reg_unknown(env, regs, insn->dst_reg);
3926 return 0;
3927 }
3928 verbose(env, "R%d pointer %s pointer prohibited\n",
3929 insn->dst_reg,
3930 bpf_alu_string[opcode >> 4]);
3931 return -EACCES;
3932 } else {
3933 /* scalar += pointer
3934 * This is legal, but we have to reverse our
3935 * src/dest handling in computing the range
3936 */
3937 return adjust_ptr_min_max_vals(env, insn,
3938 src_reg, dst_reg);
3939 }
3940 } else if (ptr_reg) {
3941 /* pointer += scalar */
3942 return adjust_ptr_min_max_vals(env, insn,
3943 dst_reg, src_reg);
3944 }
3945 } else {
3946 /* Pretend the src is a reg with a known value, since we only
3947 * need to be able to read from this state.
3948 */
3949 off_reg.type = SCALAR_VALUE;
3950 __mark_reg_known(&off_reg, insn->imm);
3951 src_reg = &off_reg;
3952 if (ptr_reg) /* pointer += K */
3953 return adjust_ptr_min_max_vals(env, insn,
3954 ptr_reg, src_reg);
3955 }
3956
3957 /* Got here implies adding two SCALAR_VALUEs */
3958 if (WARN_ON_ONCE(ptr_reg)) {
3959 print_verifier_state(env, state);
3960 verbose(env, "verifier internal error: unexpected ptr_reg\n");
3961 return -EINVAL;
3962 }
3963 if (WARN_ON(!src_reg)) {
3964 print_verifier_state(env, state);
3965 verbose(env, "verifier internal error: no src_reg\n");
3966 return -EINVAL;
3967 }
3968 return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
3969}
3970
3971/* check validity of 32-bit and 64-bit arithmetic operations */
3972static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
3973{
3974 struct bpf_reg_state *regs = cur_regs(env);
3975 u8 opcode = BPF_OP(insn->code);
3976 int err;
3977
3978 if (opcode == BPF_END || opcode == BPF_NEG) {
3979 if (opcode == BPF_NEG) {
3980 if (BPF_SRC(insn->code) != 0 ||
3981 insn->src_reg != BPF_REG_0 ||
3982 insn->off != 0 || insn->imm != 0) {
3983 verbose(env, "BPF_NEG uses reserved fields\n");
3984 return -EINVAL;
3985 }
3986 } else {
3987 if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
3988 (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
3989 BPF_CLASS(insn->code) == BPF_ALU64) {
3990 verbose(env, "BPF_END uses reserved fields\n");
3991 return -EINVAL;
3992 }
3993 }
3994
3995 /* check src operand */
3996 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
3997 if (err)
3998 return err;
3999
4000 if (is_pointer_value(env, insn->dst_reg)) {
4001 verbose(env, "R%d pointer arithmetic prohibited\n",
4002 insn->dst_reg);
4003 return -EACCES;
4004 }
4005
4006 /* check dest operand */
4007 err = check_reg_arg(env, insn->dst_reg, DST_OP);
4008 if (err)
4009 return err;
4010
4011 } else if (opcode == BPF_MOV) {
4012
4013 if (BPF_SRC(insn->code) == BPF_X) {
4014 if (insn->imm != 0 || insn->off != 0) {
4015 verbose(env, "BPF_MOV uses reserved fields\n");
4016 return -EINVAL;
4017 }
4018
4019 /* check src operand */
4020 err = check_reg_arg(env, insn->src_reg, SRC_OP);
4021 if (err)
4022 return err;
4023 } else {
4024 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
4025 verbose(env, "BPF_MOV uses reserved fields\n");
4026 return -EINVAL;
4027 }
4028 }
4029
4030 /* check dest operand, mark as required later */
4031 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
4032 if (err)
4033 return err;
4034
4035 if (BPF_SRC(insn->code) == BPF_X) {
4036 struct bpf_reg_state *src_reg = regs + insn->src_reg;
4037 struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
4038
4039 if (BPF_CLASS(insn->code) == BPF_ALU64) {
4040 /* case: R1 = R2
4041 * copy register state to dest reg
4042 */
4043 *dst_reg = *src_reg;
4044 dst_reg->live |= REG_LIVE_WRITTEN;
4045 } else {
4046 /* R1 = (u32) R2 */
4047 if (is_pointer_value(env, insn->src_reg)) {
4048 verbose(env,
4049 "R%d partial copy of pointer\n",
4050 insn->src_reg);
4051 return -EACCES;
4052 } else if (src_reg->type == SCALAR_VALUE) {
4053 *dst_reg = *src_reg;
4054 dst_reg->live |= REG_LIVE_WRITTEN;
4055 } else {
4056 mark_reg_unknown(env, regs,
4057 insn->dst_reg);
4058 }
4059 coerce_reg_to_size(dst_reg, 4);
4060 }
4061 } else {
4062 /* case: R = imm
4063 * remember the value we stored into this reg
4064 */
4065 /* clear any state __mark_reg_known doesn't set */
4066 mark_reg_unknown(env, regs, insn->dst_reg);
4067 regs[insn->dst_reg].type = SCALAR_VALUE;
4068 if (BPF_CLASS(insn->code) == BPF_ALU64) {
4069 __mark_reg_known(regs + insn->dst_reg,
4070 insn->imm);
4071 } else {
4072 __mark_reg_known(regs + insn->dst_reg,
4073 (u32)insn->imm);
4074 }
4075 }
4076
4077 } else if (opcode > BPF_END) {
4078 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
4079 return -EINVAL;
4080
4081 } else { /* all other ALU ops: and, sub, xor, add, ... */
4082
4083 if (BPF_SRC(insn->code) == BPF_X) {
4084 if (insn->imm != 0 || insn->off != 0) {
4085 verbose(env, "BPF_ALU uses reserved fields\n");
4086 return -EINVAL;
4087 }
4088 /* check src1 operand */
4089 err = check_reg_arg(env, insn->src_reg, SRC_OP);
4090 if (err)
4091 return err;
4092 } else {
4093 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
4094 verbose(env, "BPF_ALU uses reserved fields\n");
4095 return -EINVAL;
4096 }
4097 }
4098
4099 /* check src2 operand */
4100 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
4101 if (err)
4102 return err;
4103
4104 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
4105 BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
4106 verbose(env, "div by zero\n");
4107 return -EINVAL;
4108 }
4109
4110 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
4111 opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
4112 int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
4113
4114 if (insn->imm < 0 || insn->imm >= size) {
4115 verbose(env, "invalid shift %d\n", insn->imm);
4116 return -EINVAL;
4117 }
4118 }
4119
4120 /* check dest operand */
4121 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
4122 if (err)
4123 return err;
4124
4125 return adjust_reg_min_max_vals(env, insn);
4126 }
4127
4128 return 0;
4129}
4130
4131static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
4132 struct bpf_reg_state *dst_reg,
4133 enum bpf_reg_type type,
4134 bool range_right_open)
4135{
4136 struct bpf_func_state *state = vstate->frame[vstate->curframe];
4137 struct bpf_reg_state *regs = state->regs, *reg;
4138 u16 new_range;
4139 int i, j;
4140
4141 if (dst_reg->off < 0 ||
4142 (dst_reg->off == 0 && range_right_open))
4143 /* This doesn't give us any range */
4144 return;
4145
4146 if (dst_reg->umax_value > MAX_PACKET_OFF ||
4147 dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
4148 /* Risk of overflow. For instance, ptr + (1<<63) may be less
4149 * than pkt_end, but that's because it's also less than pkt.
4150 */
4151 return;
4152
4153 new_range = dst_reg->off;
4154 if (range_right_open)
4155 new_range--;
4156
4157 /* Examples for register markings:
4158 *
4159 * pkt_data in dst register:
4160 *
4161 * r2 = r3;
4162 * r2 += 8;
4163 * if (r2 > pkt_end) goto <handle exception>
4164 * <access okay>
4165 *
4166 * r2 = r3;
4167 * r2 += 8;
4168 * if (r2 < pkt_end) goto <access okay>
4169 * <handle exception>
4170 *
4171 * Where:
4172 * r2 == dst_reg, pkt_end == src_reg
4173 * r2=pkt(id=n,off=8,r=0)
4174 * r3=pkt(id=n,off=0,r=0)
4175 *
4176 * pkt_data in src register:
4177 *
4178 * r2 = r3;
4179 * r2 += 8;
4180 * if (pkt_end >= r2) goto <access okay>
4181 * <handle exception>
4182 *
4183 * r2 = r3;
4184 * r2 += 8;
4185 * if (pkt_end <= r2) goto <handle exception>
4186 * <access okay>
4187 *
4188 * Where:
4189 * pkt_end == dst_reg, r2 == src_reg
4190 * r2=pkt(id=n,off=8,r=0)
4191 * r3=pkt(id=n,off=0,r=0)
4192 *
4193 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
4194 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
4195 * and [r3, r3 + 8-1) respectively is safe to access depending on
4196 * the check.
4197 */
4198
4199 /* If our ids match, then we must have the same max_value. And we
4200 * don't care about the other reg's fixed offset, since if it's too big
4201 * the range won't allow anything.
4202 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
4203 */
4204 for (i = 0; i < MAX_BPF_REG; i++)
4205 if (regs[i].type == type && regs[i].id == dst_reg->id)
4206 /* keep the maximum range already checked */
4207 regs[i].range = max(regs[i].range, new_range);
4208
4209 for (j = 0; j <= vstate->curframe; j++) {
4210 state = vstate->frame[j];
4211 bpf_for_each_spilled_reg(i, state, reg) {
4212 if (!reg)
4213 continue;
4214 if (reg->type == type && reg->id == dst_reg->id)
4215 reg->range = max(reg->range, new_range);
4216 }
4217 }
4218}
4219
4220/* compute branch direction of the expression "if (reg opcode val) goto target;"
4221 * and return:
4222 * 1 - branch will be taken and "goto target" will be executed
4223 * 0 - branch will not be taken and fall-through to next insn
4224 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10]
4225 */
4226static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
4227 bool is_jmp32)
4228{
4229 struct bpf_reg_state reg_lo;
4230 s64 sval;
4231
4232 if (__is_pointer_value(false, reg))
4233 return -1;
4234
4235 if (is_jmp32) {
4236 reg_lo = *reg;
4237 reg = &reg_lo;
4238 /* For JMP32, only low 32 bits are compared, coerce_reg_to_size
4239 * could truncate high bits and update umin/umax according to
4240 * information of low bits.
4241 */
4242 coerce_reg_to_size(reg, 4);
4243 /* smin/smax need special handling. For example, after coerce,
4244 * if smin_value is 0x00000000ffffffffLL, the value is -1 when
4245 * used as operand to JMP32. It is a negative number from s32's
4246 * point of view, while it is a positive number when seen as
4247 * s64. The smin/smax are kept as s64, therefore, when used with
4248 * JMP32, they need to be transformed into s32, then sign
4249 * extended back to s64.
4250 *
4251 * Also, smin/smax were copied from umin/umax. If umin/umax has
4252 * different sign bit, then min/max relationship doesn't
4253 * maintain after casting into s32, for this case, set smin/smax
4254 * to safest range.
4255 */
4256 if ((reg->umax_value ^ reg->umin_value) &
4257 (1ULL << 31)) {
4258 reg->smin_value = S32_MIN;
4259 reg->smax_value = S32_MAX;
4260 }
4261 reg->smin_value = (s64)(s32)reg->smin_value;
4262 reg->smax_value = (s64)(s32)reg->smax_value;
4263
4264 val = (u32)val;
4265 sval = (s64)(s32)val;
4266 } else {
4267 sval = (s64)val;
4268 }
4269
4270 switch (opcode) {
4271 case BPF_JEQ:
4272 if (tnum_is_const(reg->var_off))
4273 return !!tnum_equals_const(reg->var_off, val);
4274 break;
4275 case BPF_JNE:
4276 if (tnum_is_const(reg->var_off))
4277 return !tnum_equals_const(reg->var_off, val);
4278 break;
4279 case BPF_JSET:
4280 if ((~reg->var_off.mask & reg->var_off.value) & val)
4281 return 1;
4282 if (!((reg->var_off.mask | reg->var_off.value) & val))
4283 return 0;
4284 break;
4285 case BPF_JGT:
4286 if (reg->umin_value > val)
4287 return 1;
4288 else if (reg->umax_value <= val)
4289 return 0;
4290 break;
4291 case BPF_JSGT:
4292 if (reg->smin_value > sval)
4293 return 1;
4294 else if (reg->smax_value < sval)
4295 return 0;
4296 break;
4297 case BPF_JLT:
4298 if (reg->umax_value < val)
4299 return 1;
4300 else if (reg->umin_value >= val)
4301 return 0;
4302 break;
4303 case BPF_JSLT:
4304 if (reg->smax_value < sval)
4305 return 1;
4306 else if (reg->smin_value >= sval)
4307 return 0;
4308 break;
4309 case BPF_JGE:
4310 if (reg->umin_value >= val)
4311 return 1;
4312 else if (reg->umax_value < val)
4313 return 0;
4314 break;
4315 case BPF_JSGE:
4316 if (reg->smin_value >= sval)
4317 return 1;
4318 else if (reg->smax_value < sval)
4319 return 0;
4320 break;
4321 case BPF_JLE:
4322 if (reg->umax_value <= val)
4323 return 1;
4324 else if (reg->umin_value > val)
4325 return 0;
4326 break;
4327 case BPF_JSLE:
4328 if (reg->smax_value <= sval)
4329 return 1;
4330 else if (reg->smin_value > sval)
4331 return 0;
4332 break;
4333 }
4334
4335 return -1;
4336}
4337
4338/* Generate min value of the high 32-bit from TNUM info. */
4339static u64 gen_hi_min(struct tnum var)
4340{
4341 return var.value & ~0xffffffffULL;
4342}
4343
4344/* Generate max value of the high 32-bit from TNUM info. */
4345static u64 gen_hi_max(struct tnum var)
4346{
4347 return (var.value | var.mask) & ~0xffffffffULL;
4348}
4349
4350/* Return true if VAL is compared with a s64 sign extended from s32, and they
4351 * are with the same signedness.
4352 */
4353static bool cmp_val_with_extended_s64(s64 sval, struct bpf_reg_state *reg)
4354{
4355 return ((s32)sval >= 0 &&
4356 reg->smin_value >= 0 && reg->smax_value <= S32_MAX) ||
4357 ((s32)sval < 0 &&
4358 reg->smax_value <= 0 && reg->smin_value >= S32_MIN);
4359}
4360
4361/* Adjusts the register min/max values in the case that the dst_reg is the
4362 * variable register that we are working on, and src_reg is a constant or we're
4363 * simply doing a BPF_K check.
4364 * In JEQ/JNE cases we also adjust the var_off values.
4365 */
4366static void reg_set_min_max(struct bpf_reg_state *true_reg,
4367 struct bpf_reg_state *false_reg, u64 val,
4368 u8 opcode, bool is_jmp32)
4369{
4370 s64 sval;
4371
4372 /* If the dst_reg is a pointer, we can't learn anything about its
4373 * variable offset from the compare (unless src_reg were a pointer into
4374 * the same object, but we don't bother with that.
4375 * Since false_reg and true_reg have the same type by construction, we
4376 * only need to check one of them for pointerness.
4377 */
4378 if (__is_pointer_value(false, false_reg))
4379 return;
4380
4381 val = is_jmp32 ? (u32)val : val;
4382 sval = is_jmp32 ? (s64)(s32)val : (s64)val;
4383
4384 switch (opcode) {
4385 case BPF_JEQ:
4386 case BPF_JNE:
4387 {
4388 struct bpf_reg_state *reg =
4389 opcode == BPF_JEQ ? true_reg : false_reg;
4390
4391 /* For BPF_JEQ, if this is false we know nothing Jon Snow, but
4392 * if it is true we know the value for sure. Likewise for
4393 * BPF_JNE.
4394 */
4395 if (is_jmp32) {
4396 u64 old_v = reg->var_off.value;
4397 u64 hi_mask = ~0xffffffffULL;
4398
4399 reg->var_off.value = (old_v & hi_mask) | val;
4400 reg->var_off.mask &= hi_mask;
4401 } else {
4402 __mark_reg_known(reg, val);
4403 }
4404 break;
4405 }
4406 case BPF_JSET:
4407 false_reg->var_off = tnum_and(false_reg->var_off,
4408 tnum_const(~val));
4409 if (is_power_of_2(val))
4410 true_reg->var_off = tnum_or(true_reg->var_off,
4411 tnum_const(val));
4412 break;
4413 case BPF_JGE:
4414 case BPF_JGT:
4415 {
4416 u64 false_umax = opcode == BPF_JGT ? val : val - 1;
4417 u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
4418
4419 if (is_jmp32) {
4420 false_umax += gen_hi_max(false_reg->var_off);
4421 true_umin += gen_hi_min(true_reg->var_off);
4422 }
4423 false_reg->umax_value = min(false_reg->umax_value, false_umax);
4424 true_reg->umin_value = max(true_reg->umin_value, true_umin);
4425 break;
4426 }
4427 case BPF_JSGE:
4428 case BPF_JSGT:
4429 {
4430 s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1;
4431 s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
4432
4433 /* If the full s64 was not sign-extended from s32 then don't
4434 * deduct further info.
4435 */
4436 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4437 break;
4438 false_reg->smax_value = min(false_reg->smax_value, false_smax);
4439 true_reg->smin_value = max(true_reg->smin_value, true_smin);
4440 break;
4441 }
4442 case BPF_JLE:
4443 case BPF_JLT:
4444 {
4445 u64 false_umin = opcode == BPF_JLT ? val : val + 1;
4446 u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
4447
4448 if (is_jmp32) {
4449 false_umin += gen_hi_min(false_reg->var_off);
4450 true_umax += gen_hi_max(true_reg->var_off);
4451 }
4452 false_reg->umin_value = max(false_reg->umin_value, false_umin);
4453 true_reg->umax_value = min(true_reg->umax_value, true_umax);
4454 break;
4455 }
4456 case BPF_JSLE:
4457 case BPF_JSLT:
4458 {
4459 s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1;
4460 s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
4461
4462 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4463 break;
4464 false_reg->smin_value = max(false_reg->smin_value, false_smin);
4465 true_reg->smax_value = min(true_reg->smax_value, true_smax);
4466 break;
4467 }
4468 default:
4469 break;
4470 }
4471
4472 __reg_deduce_bounds(false_reg);
4473 __reg_deduce_bounds(true_reg);
4474 /* We might have learned some bits from the bounds. */
4475 __reg_bound_offset(false_reg);
4476 __reg_bound_offset(true_reg);
4477 /* Intersecting with the old var_off might have improved our bounds
4478 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
4479 * then new var_off is (0; 0x7f...fc) which improves our umax.
4480 */
4481 __update_reg_bounds(false_reg);
4482 __update_reg_bounds(true_reg);
4483}
4484
4485/* Same as above, but for the case that dst_reg holds a constant and src_reg is
4486 * the variable reg.
4487 */
4488static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
4489 struct bpf_reg_state *false_reg, u64 val,
4490 u8 opcode, bool is_jmp32)
4491{
4492 s64 sval;
4493
4494 if (__is_pointer_value(false, false_reg))
4495 return;
4496
4497 val = is_jmp32 ? (u32)val : val;
4498 sval = is_jmp32 ? (s64)(s32)val : (s64)val;
4499
4500 switch (opcode) {
4501 case BPF_JEQ:
4502 case BPF_JNE:
4503 {
4504 struct bpf_reg_state *reg =
4505 opcode == BPF_JEQ ? true_reg : false_reg;
4506
4507 if (is_jmp32) {
4508 u64 old_v = reg->var_off.value;
4509 u64 hi_mask = ~0xffffffffULL;
4510
4511 reg->var_off.value = (old_v & hi_mask) | val;
4512 reg->var_off.mask &= hi_mask;
4513 } else {
4514 __mark_reg_known(reg, val);
4515 }
4516 break;
4517 }
4518 case BPF_JSET:
4519 false_reg->var_off = tnum_and(false_reg->var_off,
4520 tnum_const(~val));
4521 if (is_power_of_2(val))
4522 true_reg->var_off = tnum_or(true_reg->var_off,
4523 tnum_const(val));
4524 break;
4525 case BPF_JGE:
4526 case BPF_JGT:
4527 {
4528 u64 false_umin = opcode == BPF_JGT ? val : val + 1;
4529 u64 true_umax = opcode == BPF_JGT ? val - 1 : val;
4530
4531 if (is_jmp32) {
4532 false_umin += gen_hi_min(false_reg->var_off);
4533 true_umax += gen_hi_max(true_reg->var_off);
4534 }
4535 false_reg->umin_value = max(false_reg->umin_value, false_umin);
4536 true_reg->umax_value = min(true_reg->umax_value, true_umax);
4537 break;
4538 }
4539 case BPF_JSGE:
4540 case BPF_JSGT:
4541 {
4542 s64 false_smin = opcode == BPF_JSGT ? sval : sval + 1;
4543 s64 true_smax = opcode == BPF_JSGT ? sval - 1 : sval;
4544
4545 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4546 break;
4547 false_reg->smin_value = max(false_reg->smin_value, false_smin);
4548 true_reg->smax_value = min(true_reg->smax_value, true_smax);
4549 break;
4550 }
4551 case BPF_JLE:
4552 case BPF_JLT:
4553 {
4554 u64 false_umax = opcode == BPF_JLT ? val : val - 1;
4555 u64 true_umin = opcode == BPF_JLT ? val + 1 : val;
4556
4557 if (is_jmp32) {
4558 false_umax += gen_hi_max(false_reg->var_off);
4559 true_umin += gen_hi_min(true_reg->var_off);
4560 }
4561 false_reg->umax_value = min(false_reg->umax_value, false_umax);
4562 true_reg->umin_value = max(true_reg->umin_value, true_umin);
4563 break;
4564 }
4565 case BPF_JSLE:
4566 case BPF_JSLT:
4567 {
4568 s64 false_smax = opcode == BPF_JSLT ? sval : sval - 1;
4569 s64 true_smin = opcode == BPF_JSLT ? sval + 1 : sval;
4570
4571 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4572 break;
4573 false_reg->smax_value = min(false_reg->smax_value, false_smax);
4574 true_reg->smin_value = max(true_reg->smin_value, true_smin);
4575 break;
4576 }
4577 default:
4578 break;
4579 }
4580
4581 __reg_deduce_bounds(false_reg);
4582 __reg_deduce_bounds(true_reg);
4583 /* We might have learned some bits from the bounds. */
4584 __reg_bound_offset(false_reg);
4585 __reg_bound_offset(true_reg);
4586 /* Intersecting with the old var_off might have improved our bounds
4587 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
4588 * then new var_off is (0; 0x7f...fc) which improves our umax.
4589 */
4590 __update_reg_bounds(false_reg);
4591 __update_reg_bounds(true_reg);
4592}
4593
4594/* Regs are known to be equal, so intersect their min/max/var_off */
4595static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
4596 struct bpf_reg_state *dst_reg)
4597{
4598 src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
4599 dst_reg->umin_value);
4600 src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
4601 dst_reg->umax_value);
4602 src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
4603 dst_reg->smin_value);
4604 src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
4605 dst_reg->smax_value);
4606 src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
4607 dst_reg->var_off);
4608 /* We might have learned new bounds from the var_off. */
4609 __update_reg_bounds(src_reg);
4610 __update_reg_bounds(dst_reg);
4611 /* We might have learned something about the sign bit. */
4612 __reg_deduce_bounds(src_reg);
4613 __reg_deduce_bounds(dst_reg);
4614 /* We might have learned some bits from the bounds. */
4615 __reg_bound_offset(src_reg);
4616 __reg_bound_offset(dst_reg);
4617 /* Intersecting with the old var_off might have improved our bounds
4618 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
4619 * then new var_off is (0; 0x7f...fc) which improves our umax.
4620 */
4621 __update_reg_bounds(src_reg);
4622 __update_reg_bounds(dst_reg);
4623}
4624
4625static void reg_combine_min_max(struct bpf_reg_state *true_src,
4626 struct bpf_reg_state *true_dst,
4627 struct bpf_reg_state *false_src,
4628 struct bpf_reg_state *false_dst,
4629 u8 opcode)
4630{
4631 switch (opcode) {
4632 case BPF_JEQ:
4633 __reg_combine_min_max(true_src, true_dst);
4634 break;
4635 case BPF_JNE:
4636 __reg_combine_min_max(false_src, false_dst);
4637 break;
4638 }
4639}
4640
4641static void mark_ptr_or_null_reg(struct bpf_func_state *state,
4642 struct bpf_reg_state *reg, u32 id,
4643 bool is_null)
4644{
4645 if (reg_type_may_be_null(reg->type) && reg->id == id) {
4646 /* Old offset (both fixed and variable parts) should
4647 * have been known-zero, because we don't allow pointer
4648 * arithmetic on pointers that might be NULL.
4649 */
4650 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
4651 !tnum_equals_const(reg->var_off, 0) ||
4652 reg->off)) {
4653 __mark_reg_known_zero(reg);
4654 reg->off = 0;
4655 }
4656 if (is_null) {
4657 reg->type = SCALAR_VALUE;
4658 } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
4659 if (reg->map_ptr->inner_map_meta) {
4660 reg->type = CONST_PTR_TO_MAP;
4661 reg->map_ptr = reg->map_ptr->inner_map_meta;
4662 } else {
4663 reg->type = PTR_TO_MAP_VALUE;
4664 }
4665 } else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
4666 reg->type = PTR_TO_SOCKET;
4667 } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
4668 reg->type = PTR_TO_SOCK_COMMON;
4669 } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
4670 reg->type = PTR_TO_TCP_SOCK;
4671 }
4672 if (is_null) {
4673 /* We don't need id and ref_obj_id from this point
4674 * onwards anymore, thus we should better reset it,
4675 * so that state pruning has chances to take effect.
4676 */
4677 reg->id = 0;
4678 reg->ref_obj_id = 0;
4679 } else if (!reg_may_point_to_spin_lock(reg)) {
4680 /* For not-NULL ptr, reg->ref_obj_id will be reset
4681 * in release_reg_references().
4682 *
4683 * reg->id is still used by spin_lock ptr. Other
4684 * than spin_lock ptr type, reg->id can be reset.
4685 */
4686 reg->id = 0;
4687 }
4688 }
4689}
4690
4691/* The logic is similar to find_good_pkt_pointers(), both could eventually
4692 * be folded together at some point.
4693 */
4694static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
4695 bool is_null)
4696{
4697 struct bpf_func_state *state = vstate->frame[vstate->curframe];
4698 struct bpf_reg_state *reg, *regs = state->regs;
4699 u32 ref_obj_id = regs[regno].ref_obj_id;
4700 u32 id = regs[regno].id;
4701 int i, j;
4702
4703 if (ref_obj_id && ref_obj_id == id && is_null)
4704 /* regs[regno] is in the " == NULL" branch.
4705 * No one could have freed the reference state before
4706 * doing the NULL check.
4707 */
4708 WARN_ON_ONCE(release_reference_state(state, id));
4709
4710 for (i = 0; i < MAX_BPF_REG; i++)
4711 mark_ptr_or_null_reg(state, &regs[i], id, is_null);
4712
4713 for (j = 0; j <= vstate->curframe; j++) {
4714 state = vstate->frame[j];
4715 bpf_for_each_spilled_reg(i, state, reg) {
4716 if (!reg)
4717 continue;
4718 mark_ptr_or_null_reg(state, reg, id, is_null);
4719 }
4720 }
4721}
4722
4723static bool try_match_pkt_pointers(const struct bpf_insn *insn,
4724 struct bpf_reg_state *dst_reg,
4725 struct bpf_reg_state *src_reg,
4726 struct bpf_verifier_state *this_branch,
4727 struct bpf_verifier_state *other_branch)
4728{
4729 if (BPF_SRC(insn->code) != BPF_X)
4730 return false;
4731
4732 /* Pointers are always 64-bit. */
4733 if (BPF_CLASS(insn->code) == BPF_JMP32)
4734 return false;
4735
4736 switch (BPF_OP(insn->code)) {
4737 case BPF_JGT:
4738 if ((dst_reg->type == PTR_TO_PACKET &&
4739 src_reg->type == PTR_TO_PACKET_END) ||
4740 (dst_reg->type == PTR_TO_PACKET_META &&
4741 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4742 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
4743 find_good_pkt_pointers(this_branch, dst_reg,
4744 dst_reg->type, false);
4745 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4746 src_reg->type == PTR_TO_PACKET) ||
4747 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4748 src_reg->type == PTR_TO_PACKET_META)) {
4749 /* pkt_end > pkt_data', pkt_data > pkt_meta' */
4750 find_good_pkt_pointers(other_branch, src_reg,
4751 src_reg->type, true);
4752 } else {
4753 return false;
4754 }
4755 break;
4756 case BPF_JLT:
4757 if ((dst_reg->type == PTR_TO_PACKET &&
4758 src_reg->type == PTR_TO_PACKET_END) ||
4759 (dst_reg->type == PTR_TO_PACKET_META &&
4760 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4761 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
4762 find_good_pkt_pointers(other_branch, dst_reg,
4763 dst_reg->type, true);
4764 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4765 src_reg->type == PTR_TO_PACKET) ||
4766 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4767 src_reg->type == PTR_TO_PACKET_META)) {
4768 /* pkt_end < pkt_data', pkt_data > pkt_meta' */
4769 find_good_pkt_pointers(this_branch, src_reg,
4770 src_reg->type, false);
4771 } else {
4772 return false;
4773 }
4774 break;
4775 case BPF_JGE:
4776 if ((dst_reg->type == PTR_TO_PACKET &&
4777 src_reg->type == PTR_TO_PACKET_END) ||
4778 (dst_reg->type == PTR_TO_PACKET_META &&
4779 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4780 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
4781 find_good_pkt_pointers(this_branch, dst_reg,
4782 dst_reg->type, true);
4783 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4784 src_reg->type == PTR_TO_PACKET) ||
4785 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4786 src_reg->type == PTR_TO_PACKET_META)) {
4787 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
4788 find_good_pkt_pointers(other_branch, src_reg,
4789 src_reg->type, false);
4790 } else {
4791 return false;
4792 }
4793 break;
4794 case BPF_JLE:
4795 if ((dst_reg->type == PTR_TO_PACKET &&
4796 src_reg->type == PTR_TO_PACKET_END) ||
4797 (dst_reg->type == PTR_TO_PACKET_META &&
4798 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4799 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
4800 find_good_pkt_pointers(other_branch, dst_reg,
4801 dst_reg->type, false);
4802 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4803 src_reg->type == PTR_TO_PACKET) ||
4804 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4805 src_reg->type == PTR_TO_PACKET_META)) {
4806 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
4807 find_good_pkt_pointers(this_branch, src_reg,
4808 src_reg->type, true);
4809 } else {
4810 return false;
4811 }
4812 break;
4813 default:
4814 return false;
4815 }
4816
4817 return true;
4818}
4819
4820static int check_cond_jmp_op(struct bpf_verifier_env *env,
4821 struct bpf_insn *insn, int *insn_idx)
4822{
4823 struct bpf_verifier_state *this_branch = env->cur_state;
4824 struct bpf_verifier_state *other_branch;
4825 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
4826 struct bpf_reg_state *dst_reg, *other_branch_regs;
4827 u8 opcode = BPF_OP(insn->code);
4828 bool is_jmp32;
4829 int err;
4830
4831 /* Only conditional jumps are expected to reach here. */
4832 if (opcode == BPF_JA || opcode > BPF_JSLE) {
4833 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
4834 return -EINVAL;
4835 }
4836
4837 if (BPF_SRC(insn->code) == BPF_X) {
4838 if (insn->imm != 0) {
4839 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
4840 return -EINVAL;
4841 }
4842
4843 /* check src1 operand */
4844 err = check_reg_arg(env, insn->src_reg, SRC_OP);
4845 if (err)
4846 return err;
4847
4848 if (is_pointer_value(env, insn->src_reg)) {
4849 verbose(env, "R%d pointer comparison prohibited\n",
4850 insn->src_reg);
4851 return -EACCES;
4852 }
4853 } else {
4854 if (insn->src_reg != BPF_REG_0) {
4855 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
4856 return -EINVAL;
4857 }
4858 }
4859
4860 /* check src2 operand */
4861 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
4862 if (err)
4863 return err;
4864
4865 dst_reg = &regs[insn->dst_reg];
4866 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
4867
4868 if (BPF_SRC(insn->code) == BPF_K) {
4869 int pred = is_branch_taken(dst_reg, insn->imm, opcode,
4870 is_jmp32);
4871
4872 if (pred == 1) {
4873 /* only follow the goto, ignore fall-through */
4874 *insn_idx += insn->off;
4875 return 0;
4876 } else if (pred == 0) {
4877 /* only follow fall-through branch, since
4878 * that's where the program will go
4879 */
4880 return 0;
4881 }
4882 }
4883
4884 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
4885 false);
4886 if (!other_branch)
4887 return -EFAULT;
4888 other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
4889
4890 /* detect if we are comparing against a constant value so we can adjust
4891 * our min/max values for our dst register.
4892 * this is only legit if both are scalars (or pointers to the same
4893 * object, I suppose, but we don't support that right now), because
4894 * otherwise the different base pointers mean the offsets aren't
4895 * comparable.
4896 */
4897 if (BPF_SRC(insn->code) == BPF_X) {
4898 struct bpf_reg_state *src_reg = &regs[insn->src_reg];
4899 struct bpf_reg_state lo_reg0 = *dst_reg;
4900 struct bpf_reg_state lo_reg1 = *src_reg;
4901 struct bpf_reg_state *src_lo, *dst_lo;
4902
4903 dst_lo = &lo_reg0;
4904 src_lo = &lo_reg1;
4905 coerce_reg_to_size(dst_lo, 4);
4906 coerce_reg_to_size(src_lo, 4);
4907
4908 if (dst_reg->type == SCALAR_VALUE &&
4909 src_reg->type == SCALAR_VALUE) {
4910 if (tnum_is_const(src_reg->var_off) ||
4911 (is_jmp32 && tnum_is_const(src_lo->var_off)))
4912 reg_set_min_max(&other_branch_regs[insn->dst_reg],
4913 dst_reg,
4914 is_jmp32
4915 ? src_lo->var_off.value
4916 : src_reg->var_off.value,
4917 opcode, is_jmp32);
4918 else if (tnum_is_const(dst_reg->var_off) ||
4919 (is_jmp32 && tnum_is_const(dst_lo->var_off)))
4920 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
4921 src_reg,
4922 is_jmp32
4923 ? dst_lo->var_off.value
4924 : dst_reg->var_off.value,
4925 opcode, is_jmp32);
4926 else if (!is_jmp32 &&
4927 (opcode == BPF_JEQ || opcode == BPF_JNE))
4928 /* Comparing for equality, we can combine knowledge */
4929 reg_combine_min_max(&other_branch_regs[insn->src_reg],
4930 &other_branch_regs[insn->dst_reg],
4931 src_reg, dst_reg, opcode);
4932 }
4933 } else if (dst_reg->type == SCALAR_VALUE) {
4934 reg_set_min_max(&other_branch_regs[insn->dst_reg],
4935 dst_reg, insn->imm, opcode, is_jmp32);
4936 }
4937
4938 /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
4939 * NOTE: these optimizations below are related with pointer comparison
4940 * which will never be JMP32.
4941 */
4942 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
4943 insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
4944 reg_type_may_be_null(dst_reg->type)) {
4945 /* Mark all identical registers in each branch as either
4946 * safe or unknown depending R == 0 or R != 0 conditional.
4947 */
4948 mark_ptr_or_null_regs(this_branch, insn->dst_reg,
4949 opcode == BPF_JNE);
4950 mark_ptr_or_null_regs(other_branch, insn->dst_reg,
4951 opcode == BPF_JEQ);
4952 } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
4953 this_branch, other_branch) &&
4954 is_pointer_value(env, insn->dst_reg)) {
4955 verbose(env, "R%d pointer comparison prohibited\n",
4956 insn->dst_reg);
4957 return -EACCES;
4958 }
4959 if (env->log.level)
4960 print_verifier_state(env, this_branch->frame[this_branch->curframe]);
4961 return 0;
4962}
4963
4964/* return the map pointer stored inside BPF_LD_IMM64 instruction */
4965static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
4966{
4967 u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
4968
4969 return (struct bpf_map *) (unsigned long) imm64;
4970}
4971
4972/* verify BPF_LD_IMM64 instruction */
4973static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
4974{
4975 struct bpf_reg_state *regs = cur_regs(env);
4976 int err;
4977
4978 if (BPF_SIZE(insn->code) != BPF_DW) {
4979 verbose(env, "invalid BPF_LD_IMM insn\n");
4980 return -EINVAL;
4981 }
4982 if (insn->off != 0) {
4983 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
4984 return -EINVAL;
4985 }
4986
4987 err = check_reg_arg(env, insn->dst_reg, DST_OP);
4988 if (err)
4989 return err;
4990
4991 if (insn->src_reg == 0) {
4992 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
4993
4994 regs[insn->dst_reg].type = SCALAR_VALUE;
4995 __mark_reg_known(&regs[insn->dst_reg], imm);
4996 return 0;
4997 }
4998
4999 /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
5000 BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
5001
5002 regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
5003 regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
5004 return 0;
5005}
5006
5007static bool may_access_skb(enum bpf_prog_type type)
5008{
5009 switch (type) {
5010 case BPF_PROG_TYPE_SOCKET_FILTER:
5011 case BPF_PROG_TYPE_SCHED_CLS:
5012 case BPF_PROG_TYPE_SCHED_ACT:
5013 return true;
5014 default:
5015 return false;
5016 }
5017}
5018
5019/* verify safety of LD_ABS|LD_IND instructions:
5020 * - they can only appear in the programs where ctx == skb
5021 * - since they are wrappers of function calls, they scratch R1-R5 registers,
5022 * preserve R6-R9, and store return value into R0
5023 *
5024 * Implicit input:
5025 * ctx == skb == R6 == CTX
5026 *
5027 * Explicit input:
5028 * SRC == any register
5029 * IMM == 32-bit immediate
5030 *
5031 * Output:
5032 * R0 - 8/16/32-bit skb data converted to cpu endianness
5033 */
5034static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
5035{
5036 struct bpf_reg_state *regs = cur_regs(env);
5037 u8 mode = BPF_MODE(insn->code);
5038 int i, err;
5039
5040 if (!may_access_skb(env->prog->type)) {
5041 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
5042 return -EINVAL;
5043 }
5044
5045 if (!env->ops->gen_ld_abs) {
5046 verbose(env, "bpf verifier is misconfigured\n");
5047 return -EINVAL;
5048 }
5049
5050 if (env->subprog_cnt > 1) {
5051 /* when program has LD_ABS insn JITs and interpreter assume
5052 * that r1 == ctx == skb which is not the case for callees
5053 * that can have arbitrary arguments. It's problematic
5054 * for main prog as well since JITs would need to analyze
5055 * all functions in order to make proper register save/restore
5056 * decisions in the main prog. Hence disallow LD_ABS with calls
5057 */
5058 verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
5059 return -EINVAL;
5060 }
5061
5062 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
5063 BPF_SIZE(insn->code) == BPF_DW ||
5064 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
5065 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
5066 return -EINVAL;
5067 }
5068
5069 /* check whether implicit source operand (register R6) is readable */
5070 err = check_reg_arg(env, BPF_REG_6, SRC_OP);
5071 if (err)
5072 return err;
5073
5074 /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
5075 * gen_ld_abs() may terminate the program at runtime, leading to
5076 * reference leak.
5077 */
5078 err = check_reference_leak(env);
5079 if (err) {
5080 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
5081 return err;
5082 }
5083
5084 if (env->cur_state->active_spin_lock) {
5085 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
5086 return -EINVAL;
5087 }
5088
5089 if (regs[BPF_REG_6].type != PTR_TO_CTX) {
5090 verbose(env,
5091 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
5092 return -EINVAL;
5093 }
5094
5095 if (mode == BPF_IND) {
5096 /* check explicit source operand */
5097 err = check_reg_arg(env, insn->src_reg, SRC_OP);
5098 if (err)
5099 return err;
5100 }
5101
5102 /* reset caller saved regs to unreadable */
5103 for (i = 0; i < CALLER_SAVED_REGS; i++) {
5104 mark_reg_not_init(env, regs, caller_saved[i]);
5105 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
5106 }
5107
5108 /* mark destination R0 register as readable, since it contains
5109 * the value fetched from the packet.
5110 * Already marked as written above.
5111 */
5112 mark_reg_unknown(env, regs, BPF_REG_0);
5113 return 0;
5114}
5115
5116static int check_return_code(struct bpf_verifier_env *env)
5117{
5118 struct bpf_reg_state *reg;
5119 struct tnum range = tnum_range(0, 1);
5120
5121 switch (env->prog->type) {
5122 case BPF_PROG_TYPE_CGROUP_SKB:
5123 case BPF_PROG_TYPE_CGROUP_SOCK:
5124 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
5125 case BPF_PROG_TYPE_SOCK_OPS:
5126 case BPF_PROG_TYPE_CGROUP_DEVICE:
5127 break;
5128 default:
5129 return 0;
5130 }
5131
5132 reg = cur_regs(env) + BPF_REG_0;
5133 if (reg->type != SCALAR_VALUE) {
5134 verbose(env, "At program exit the register R0 is not a known value (%s)\n",
5135 reg_type_str[reg->type]);
5136 return -EINVAL;
5137 }
5138
5139 if (!tnum_in(range, reg->var_off)) {
5140 verbose(env, "At program exit the register R0 ");
5141 if (!tnum_is_unknown(reg->var_off)) {
5142 char tn_buf[48];
5143
5144 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5145 verbose(env, "has value %s", tn_buf);
5146 } else {
5147 verbose(env, "has unknown scalar value");
5148 }
5149 verbose(env, " should have been 0 or 1\n");
5150 return -EINVAL;
5151 }
5152 return 0;
5153}
5154
5155/* non-recursive DFS pseudo code
5156 * 1 procedure DFS-iterative(G,v):
5157 * 2 label v as discovered
5158 * 3 let S be a stack
5159 * 4 S.push(v)
5160 * 5 while S is not empty
5161 * 6 t <- S.pop()
5162 * 7 if t is what we're looking for:
5163 * 8 return t
5164 * 9 for all edges e in G.adjacentEdges(t) do
5165 * 10 if edge e is already labelled
5166 * 11 continue with the next edge
5167 * 12 w <- G.adjacentVertex(t,e)
5168 * 13 if vertex w is not discovered and not explored
5169 * 14 label e as tree-edge
5170 * 15 label w as discovered
5171 * 16 S.push(w)
5172 * 17 continue at 5
5173 * 18 else if vertex w is discovered
5174 * 19 label e as back-edge
5175 * 20 else
5176 * 21 // vertex w is explored
5177 * 22 label e as forward- or cross-edge
5178 * 23 label t as explored
5179 * 24 S.pop()
5180 *
5181 * convention:
5182 * 0x10 - discovered
5183 * 0x11 - discovered and fall-through edge labelled
5184 * 0x12 - discovered and fall-through and branch edges labelled
5185 * 0x20 - explored
5186 */
5187
5188enum {
5189 DISCOVERED = 0x10,
5190 EXPLORED = 0x20,
5191 FALLTHROUGH = 1,
5192 BRANCH = 2,
5193};
5194
5195#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
5196
5197static int *insn_stack; /* stack of insns to process */
5198static int cur_stack; /* current stack index */
5199static int *insn_state;
5200
5201/* t, w, e - match pseudo-code above:
5202 * t - index of current instruction
5203 * w - next instruction
5204 * e - edge
5205 */
5206static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
5207{
5208 if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
5209 return 0;
5210
5211 if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
5212 return 0;
5213
5214 if (w < 0 || w >= env->prog->len) {
5215 verbose_linfo(env, t, "%d: ", t);
5216 verbose(env, "jump out of range from insn %d to %d\n", t, w);
5217 return -EINVAL;
5218 }
5219
5220 if (e == BRANCH)
5221 /* mark branch target for state pruning */
5222 env->explored_states[w] = STATE_LIST_MARK;
5223
5224 if (insn_state[w] == 0) {
5225 /* tree-edge */
5226 insn_state[t] = DISCOVERED | e;
5227 insn_state[w] = DISCOVERED;
5228 if (cur_stack >= env->prog->len)
5229 return -E2BIG;
5230 insn_stack[cur_stack++] = w;
5231 return 1;
5232 } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
5233 verbose_linfo(env, t, "%d: ", t);
5234 verbose_linfo(env, w, "%d: ", w);
5235 verbose(env, "back-edge from insn %d to %d\n", t, w);
5236 return -EINVAL;
5237 } else if (insn_state[w] == EXPLORED) {
5238 /* forward- or cross-edge */
5239 insn_state[t] = DISCOVERED | e;
5240 } else {
5241 verbose(env, "insn state internal bug\n");
5242 return -EFAULT;
5243 }
5244 return 0;
5245}
5246
5247/* non-recursive depth-first-search to detect loops in BPF program
5248 * loop == back-edge in directed graph
5249 */
5250static int check_cfg(struct bpf_verifier_env *env)
5251{
5252 struct bpf_insn *insns = env->prog->insnsi;
5253 int insn_cnt = env->prog->len;
5254 int ret = 0;
5255 int i, t;
5256
5257 insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
5258 if (!insn_state)
5259 return -ENOMEM;
5260
5261 insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
5262 if (!insn_stack) {
5263 kfree(insn_state);
5264 return -ENOMEM;
5265 }
5266
5267 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
5268 insn_stack[0] = 0; /* 0 is the first instruction */
5269 cur_stack = 1;
5270
5271peek_stack:
5272 if (cur_stack == 0)
5273 goto check_state;
5274 t = insn_stack[cur_stack - 1];
5275
5276 if (BPF_CLASS(insns[t].code) == BPF_JMP ||
5277 BPF_CLASS(insns[t].code) == BPF_JMP32) {
5278 u8 opcode = BPF_OP(insns[t].code);
5279
5280 if (opcode == BPF_EXIT) {
5281 goto mark_explored;
5282 } else if (opcode == BPF_CALL) {
5283 ret = push_insn(t, t + 1, FALLTHROUGH, env);
5284 if (ret == 1)
5285 goto peek_stack;
5286 else if (ret < 0)
5287 goto err_free;
5288 if (t + 1 < insn_cnt)
5289 env->explored_states[t + 1] = STATE_LIST_MARK;
5290 if (insns[t].src_reg == BPF_PSEUDO_CALL) {
5291 env->explored_states[t] = STATE_LIST_MARK;
5292 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
5293 if (ret == 1)
5294 goto peek_stack;
5295 else if (ret < 0)
5296 goto err_free;
5297 }
5298 } else if (opcode == BPF_JA) {
5299 if (BPF_SRC(insns[t].code) != BPF_K) {
5300 ret = -EINVAL;
5301 goto err_free;
5302 }
5303 /* unconditional jump with single edge */
5304 ret = push_insn(t, t + insns[t].off + 1,
5305 FALLTHROUGH, env);
5306 if (ret == 1)
5307 goto peek_stack;
5308 else if (ret < 0)
5309 goto err_free;
5310 /* tell verifier to check for equivalent states
5311 * after every call and jump
5312 */
5313 if (t + 1 < insn_cnt)
5314 env->explored_states[t + 1] = STATE_LIST_MARK;
5315 } else {
5316 /* conditional jump with two edges */
5317 env->explored_states[t] = STATE_LIST_MARK;
5318 ret = push_insn(t, t + 1, FALLTHROUGH, env);
5319 if (ret == 1)
5320 goto peek_stack;
5321 else if (ret < 0)
5322 goto err_free;
5323
5324 ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
5325 if (ret == 1)
5326 goto peek_stack;
5327 else if (ret < 0)
5328 goto err_free;
5329 }
5330 } else {
5331 /* all other non-branch instructions with single
5332 * fall-through edge
5333 */
5334 ret = push_insn(t, t + 1, FALLTHROUGH, env);
5335 if (ret == 1)
5336 goto peek_stack;
5337 else if (ret < 0)
5338 goto err_free;
5339 }
5340
5341mark_explored:
5342 insn_state[t] = EXPLORED;
5343 if (cur_stack-- <= 0) {
5344 verbose(env, "pop stack internal bug\n");
5345 ret = -EFAULT;
5346 goto err_free;
5347 }
5348 goto peek_stack;
5349
5350check_state:
5351 for (i = 0; i < insn_cnt; i++) {
5352 if (insn_state[i] != EXPLORED) {
5353 verbose(env, "unreachable insn %d\n", i);
5354 ret = -EINVAL;
5355 goto err_free;
5356 }
5357 }
5358 ret = 0; /* cfg looks good */
5359
5360err_free:
5361 kfree(insn_state);
5362 kfree(insn_stack);
5363 return ret;
5364}
5365
5366/* The minimum supported BTF func info size */
5367#define MIN_BPF_FUNCINFO_SIZE 8
5368#define MAX_FUNCINFO_REC_SIZE 252
5369
5370static int check_btf_func(struct bpf_verifier_env *env,
5371 const union bpf_attr *attr,
5372 union bpf_attr __user *uattr)
5373{
5374 u32 i, nfuncs, urec_size, min_size;
5375 u32 krec_size = sizeof(struct bpf_func_info);
5376 struct bpf_func_info *krecord;
5377 const struct btf_type *type;
5378 struct bpf_prog *prog;
5379 const struct btf *btf;
5380 void __user *urecord;
5381 u32 prev_offset = 0;
5382 int ret = 0;
5383
5384 nfuncs = attr->func_info_cnt;
5385 if (!nfuncs)
5386 return 0;
5387
5388 if (nfuncs != env->subprog_cnt) {
5389 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
5390 return -EINVAL;
5391 }
5392
5393 urec_size = attr->func_info_rec_size;
5394 if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
5395 urec_size > MAX_FUNCINFO_REC_SIZE ||
5396 urec_size % sizeof(u32)) {
5397 verbose(env, "invalid func info rec size %u\n", urec_size);
5398 return -EINVAL;
5399 }
5400
5401 prog = env->prog;
5402 btf = prog->aux->btf;
5403
5404 urecord = u64_to_user_ptr(attr->func_info);
5405 min_size = min_t(u32, krec_size, urec_size);
5406
5407 krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
5408 if (!krecord)
5409 return -ENOMEM;
5410
5411 for (i = 0; i < nfuncs; i++) {
5412 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
5413 if (ret) {
5414 if (ret == -E2BIG) {
5415 verbose(env, "nonzero tailing record in func info");
5416 /* set the size kernel expects so loader can zero
5417 * out the rest of the record.
5418 */
5419 if (put_user(min_size, &uattr->func_info_rec_size))
5420 ret = -EFAULT;
5421 }
5422 goto err_free;
5423 }
5424
5425 if (copy_from_user(&krecord[i], urecord, min_size)) {
5426 ret = -EFAULT;
5427 goto err_free;
5428 }
5429
5430 /* check insn_off */
5431 if (i == 0) {
5432 if (krecord[i].insn_off) {
5433 verbose(env,
5434 "nonzero insn_off %u for the first func info record",
5435 krecord[i].insn_off);
5436 ret = -EINVAL;
5437 goto err_free;
5438 }
5439 } else if (krecord[i].insn_off <= prev_offset) {
5440 verbose(env,
5441 "same or smaller insn offset (%u) than previous func info record (%u)",
5442 krecord[i].insn_off, prev_offset);
5443 ret = -EINVAL;
5444 goto err_free;
5445 }
5446
5447 if (env->subprog_info[i].start != krecord[i].insn_off) {
5448 verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
5449 ret = -EINVAL;
5450 goto err_free;
5451 }
5452
5453 /* check type_id */
5454 type = btf_type_by_id(btf, krecord[i].type_id);
5455 if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) {
5456 verbose(env, "invalid type id %d in func info",
5457 krecord[i].type_id);
5458 ret = -EINVAL;
5459 goto err_free;
5460 }
5461
5462 prev_offset = krecord[i].insn_off;
5463 urecord += urec_size;
5464 }
5465
5466 prog->aux->func_info = krecord;
5467 prog->aux->func_info_cnt = nfuncs;
5468 return 0;
5469
5470err_free:
5471 kvfree(krecord);
5472 return ret;
5473}
5474
5475static void adjust_btf_func(struct bpf_verifier_env *env)
5476{
5477 int i;
5478
5479 if (!env->prog->aux->func_info)
5480 return;
5481
5482 for (i = 0; i < env->subprog_cnt; i++)
5483 env->prog->aux->func_info[i].insn_off = env->subprog_info[i].start;
5484}
5485
5486#define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \
5487 sizeof(((struct bpf_line_info *)(0))->line_col))
5488#define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
5489
5490static int check_btf_line(struct bpf_verifier_env *env,
5491 const union bpf_attr *attr,
5492 union bpf_attr __user *uattr)
5493{
5494 u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
5495 struct bpf_subprog_info *sub;
5496 struct bpf_line_info *linfo;
5497 struct bpf_prog *prog;
5498 const struct btf *btf;
5499 void __user *ulinfo;
5500 int err;
5501
5502 nr_linfo = attr->line_info_cnt;
5503 if (!nr_linfo)
5504 return 0;
5505
5506 rec_size = attr->line_info_rec_size;
5507 if (rec_size < MIN_BPF_LINEINFO_SIZE ||
5508 rec_size > MAX_LINEINFO_REC_SIZE ||
5509 rec_size & (sizeof(u32) - 1))
5510 return -EINVAL;
5511
5512 /* Need to zero it in case the userspace may
5513 * pass in a smaller bpf_line_info object.
5514 */
5515 linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
5516 GFP_KERNEL | __GFP_NOWARN);
5517 if (!linfo)
5518 return -ENOMEM;
5519
5520 prog = env->prog;
5521 btf = prog->aux->btf;
5522
5523 s = 0;
5524 sub = env->subprog_info;
5525 ulinfo = u64_to_user_ptr(attr->line_info);
5526 expected_size = sizeof(struct bpf_line_info);
5527 ncopy = min_t(u32, expected_size, rec_size);
5528 for (i = 0; i < nr_linfo; i++) {
5529 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
5530 if (err) {
5531 if (err == -E2BIG) {
5532 verbose(env, "nonzero tailing record in line_info");
5533 if (put_user(expected_size,
5534 &uattr->line_info_rec_size))
5535 err = -EFAULT;
5536 }
5537 goto err_free;
5538 }
5539
5540 if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
5541 err = -EFAULT;
5542 goto err_free;
5543 }
5544
5545 /*
5546 * Check insn_off to ensure
5547 * 1) strictly increasing AND
5548 * 2) bounded by prog->len
5549 *
5550 * The linfo[0].insn_off == 0 check logically falls into
5551 * the later "missing bpf_line_info for func..." case
5552 * because the first linfo[0].insn_off must be the
5553 * first sub also and the first sub must have
5554 * subprog_info[0].start == 0.
5555 */
5556 if ((i && linfo[i].insn_off <= prev_offset) ||
5557 linfo[i].insn_off >= prog->len) {
5558 verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
5559 i, linfo[i].insn_off, prev_offset,
5560 prog->len);
5561 err = -EINVAL;
5562 goto err_free;
5563 }
5564
5565 if (!prog->insnsi[linfo[i].insn_off].code) {
5566 verbose(env,
5567 "Invalid insn code at line_info[%u].insn_off\n",
5568 i);
5569 err = -EINVAL;
5570 goto err_free;
5571 }
5572
5573 if (!btf_name_by_offset(btf, linfo[i].line_off) ||
5574 !btf_name_by_offset(btf, linfo[i].file_name_off)) {
5575 verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
5576 err = -EINVAL;
5577 goto err_free;
5578 }
5579
5580 if (s != env->subprog_cnt) {
5581 if (linfo[i].insn_off == sub[s].start) {
5582 sub[s].linfo_idx = i;
5583 s++;
5584 } else if (sub[s].start < linfo[i].insn_off) {
5585 verbose(env, "missing bpf_line_info for func#%u\n", s);
5586 err = -EINVAL;
5587 goto err_free;
5588 }
5589 }
5590
5591 prev_offset = linfo[i].insn_off;
5592 ulinfo += rec_size;
5593 }
5594
5595 if (s != env->subprog_cnt) {
5596 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
5597 env->subprog_cnt - s, s);
5598 err = -EINVAL;
5599 goto err_free;
5600 }
5601
5602 prog->aux->linfo = linfo;
5603 prog->aux->nr_linfo = nr_linfo;
5604
5605 return 0;
5606
5607err_free:
5608 kvfree(linfo);
5609 return err;
5610}
5611
5612static int check_btf_info(struct bpf_verifier_env *env,
5613 const union bpf_attr *attr,
5614 union bpf_attr __user *uattr)
5615{
5616 struct btf *btf;
5617 int err;
5618
5619 if (!attr->func_info_cnt && !attr->line_info_cnt)
5620 return 0;
5621
5622 btf = btf_get_by_fd(attr->prog_btf_fd);
5623 if (IS_ERR(btf))
5624 return PTR_ERR(btf);
5625 env->prog->aux->btf = btf;
5626
5627 err = check_btf_func(env, attr, uattr);
5628 if (err)
5629 return err;
5630
5631 err = check_btf_line(env, attr, uattr);
5632 if (err)
5633 return err;
5634
5635 return 0;
5636}
5637
5638/* check %cur's range satisfies %old's */
5639static bool range_within(struct bpf_reg_state *old,
5640 struct bpf_reg_state *cur)
5641{
5642 return old->umin_value <= cur->umin_value &&
5643 old->umax_value >= cur->umax_value &&
5644 old->smin_value <= cur->smin_value &&
5645 old->smax_value >= cur->smax_value;
5646}
5647
5648/* Maximum number of register states that can exist at once */
5649#define ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
5650struct idpair {
5651 u32 old;
5652 u32 cur;
5653};
5654
5655/* If in the old state two registers had the same id, then they need to have
5656 * the same id in the new state as well. But that id could be different from
5657 * the old state, so we need to track the mapping from old to new ids.
5658 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
5659 * regs with old id 5 must also have new id 9 for the new state to be safe. But
5660 * regs with a different old id could still have new id 9, we don't care about
5661 * that.
5662 * So we look through our idmap to see if this old id has been seen before. If
5663 * so, we require the new id to match; otherwise, we add the id pair to the map.
5664 */
5665static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
5666{
5667 unsigned int i;
5668
5669 for (i = 0; i < ID_MAP_SIZE; i++) {
5670 if (!idmap[i].old) {
5671 /* Reached an empty slot; haven't seen this id before */
5672 idmap[i].old = old_id;
5673 idmap[i].cur = cur_id;
5674 return true;
5675 }
5676 if (idmap[i].old == old_id)
5677 return idmap[i].cur == cur_id;
5678 }
5679 /* We ran out of idmap slots, which should be impossible */
5680 WARN_ON_ONCE(1);
5681 return false;
5682}
5683
5684static void clean_func_state(struct bpf_verifier_env *env,
5685 struct bpf_func_state *st)
5686{
5687 enum bpf_reg_liveness live;
5688 int i, j;
5689
5690 for (i = 0; i < BPF_REG_FP; i++) {
5691 live = st->regs[i].live;
5692 /* liveness must not touch this register anymore */
5693 st->regs[i].live |= REG_LIVE_DONE;
5694 if (!(live & REG_LIVE_READ))
5695 /* since the register is unused, clear its state
5696 * to make further comparison simpler
5697 */
5698 __mark_reg_not_init(&st->regs[i]);
5699 }
5700
5701 for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
5702 live = st->stack[i].spilled_ptr.live;
5703 /* liveness must not touch this stack slot anymore */
5704 st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
5705 if (!(live & REG_LIVE_READ)) {
5706 __mark_reg_not_init(&st->stack[i].spilled_ptr);
5707 for (j = 0; j < BPF_REG_SIZE; j++)
5708 st->stack[i].slot_type[j] = STACK_INVALID;
5709 }
5710 }
5711}
5712
5713static void clean_verifier_state(struct bpf_verifier_env *env,
5714 struct bpf_verifier_state *st)
5715{
5716 int i;
5717
5718 if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
5719 /* all regs in this state in all frames were already marked */
5720 return;
5721
5722 for (i = 0; i <= st->curframe; i++)
5723 clean_func_state(env, st->frame[i]);
5724}
5725
5726/* the parentage chains form a tree.
5727 * the verifier states are added to state lists at given insn and
5728 * pushed into state stack for future exploration.
5729 * when the verifier reaches bpf_exit insn some of the verifer states
5730 * stored in the state lists have their final liveness state already,
5731 * but a lot of states will get revised from liveness point of view when
5732 * the verifier explores other branches.
5733 * Example:
5734 * 1: r0 = 1
5735 * 2: if r1 == 100 goto pc+1
5736 * 3: r0 = 2
5737 * 4: exit
5738 * when the verifier reaches exit insn the register r0 in the state list of
5739 * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
5740 * of insn 2 and goes exploring further. At the insn 4 it will walk the
5741 * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
5742 *
5743 * Since the verifier pushes the branch states as it sees them while exploring
5744 * the program the condition of walking the branch instruction for the second
5745 * time means that all states below this branch were already explored and
5746 * their final liveness markes are already propagated.
5747 * Hence when the verifier completes the search of state list in is_state_visited()
5748 * we can call this clean_live_states() function to mark all liveness states
5749 * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
5750 * will not be used.
5751 * This function also clears the registers and stack for states that !READ
5752 * to simplify state merging.
5753 *
5754 * Important note here that walking the same branch instruction in the callee
5755 * doesn't meant that the states are DONE. The verifier has to compare
5756 * the callsites
5757 */
5758static void clean_live_states(struct bpf_verifier_env *env, int insn,
5759 struct bpf_verifier_state *cur)
5760{
5761 struct bpf_verifier_state_list *sl;
5762 int i;
5763
5764 sl = env->explored_states[insn];
5765 if (!sl)
5766 return;
5767
5768 while (sl != STATE_LIST_MARK) {
5769 if (sl->state.curframe != cur->curframe)
5770 goto next;
5771 for (i = 0; i <= cur->curframe; i++)
5772 if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
5773 goto next;
5774 clean_verifier_state(env, &sl->state);
5775next:
5776 sl = sl->next;
5777 }
5778}
5779
5780/* Returns true if (rold safe implies rcur safe) */
5781static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
5782 struct idpair *idmap)
5783{
5784 bool equal;
5785
5786 if (!(rold->live & REG_LIVE_READ))
5787 /* explored state didn't use this */
5788 return true;
5789
5790 equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
5791
5792 if (rold->type == PTR_TO_STACK)
5793 /* two stack pointers are equal only if they're pointing to
5794 * the same stack frame, since fp-8 in foo != fp-8 in bar
5795 */
5796 return equal && rold->frameno == rcur->frameno;
5797
5798 if (equal)
5799 return true;
5800
5801 if (rold->type == NOT_INIT)
5802 /* explored state can't have used this */
5803 return true;
5804 if (rcur->type == NOT_INIT)
5805 return false;
5806 switch (rold->type) {
5807 case SCALAR_VALUE:
5808 if (rcur->type == SCALAR_VALUE) {
5809 /* new val must satisfy old val knowledge */
5810 return range_within(rold, rcur) &&
5811 tnum_in(rold->var_off, rcur->var_off);
5812 } else {
5813 /* We're trying to use a pointer in place of a scalar.
5814 * Even if the scalar was unbounded, this could lead to
5815 * pointer leaks because scalars are allowed to leak
5816 * while pointers are not. We could make this safe in
5817 * special cases if root is calling us, but it's
5818 * probably not worth the hassle.
5819 */
5820 return false;
5821 }
5822 case PTR_TO_MAP_VALUE:
5823 /* If the new min/max/var_off satisfy the old ones and
5824 * everything else matches, we are OK.
5825 * 'id' is not compared, since it's only used for maps with
5826 * bpf_spin_lock inside map element and in such cases if
5827 * the rest of the prog is valid for one map element then
5828 * it's valid for all map elements regardless of the key
5829 * used in bpf_map_lookup()
5830 */
5831 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
5832 range_within(rold, rcur) &&
5833 tnum_in(rold->var_off, rcur->var_off);
5834 case PTR_TO_MAP_VALUE_OR_NULL:
5835 /* a PTR_TO_MAP_VALUE could be safe to use as a
5836 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
5837 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
5838 * checked, doing so could have affected others with the same
5839 * id, and we can't check for that because we lost the id when
5840 * we converted to a PTR_TO_MAP_VALUE.
5841 */
5842 if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
5843 return false;
5844 if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
5845 return false;
5846 /* Check our ids match any regs they're supposed to */
5847 return check_ids(rold->id, rcur->id, idmap);
5848 case PTR_TO_PACKET_META:
5849 case PTR_TO_PACKET:
5850 if (rcur->type != rold->type)
5851 return false;
5852 /* We must have at least as much range as the old ptr
5853 * did, so that any accesses which were safe before are
5854 * still safe. This is true even if old range < old off,
5855 * since someone could have accessed through (ptr - k), or
5856 * even done ptr -= k in a register, to get a safe access.
5857 */
5858 if (rold->range > rcur->range)
5859 return false;
5860 /* If the offsets don't match, we can't trust our alignment;
5861 * nor can we be sure that we won't fall out of range.
5862 */
5863 if (rold->off != rcur->off)
5864 return false;
5865 /* id relations must be preserved */
5866 if (rold->id && !check_ids(rold->id, rcur->id, idmap))
5867 return false;
5868 /* new val must satisfy old val knowledge */
5869 return range_within(rold, rcur) &&
5870 tnum_in(rold->var_off, rcur->var_off);
5871 case PTR_TO_CTX:
5872 case CONST_PTR_TO_MAP:
5873 case PTR_TO_PACKET_END:
5874 case PTR_TO_FLOW_KEYS:
5875 case PTR_TO_SOCKET:
5876 case PTR_TO_SOCKET_OR_NULL:
5877 case PTR_TO_SOCK_COMMON:
5878 case PTR_TO_SOCK_COMMON_OR_NULL:
5879 case PTR_TO_TCP_SOCK:
5880 case PTR_TO_TCP_SOCK_OR_NULL:
5881 /* Only valid matches are exact, which memcmp() above
5882 * would have accepted
5883 */
5884 default:
5885 /* Don't know what's going on, just say it's not safe */
5886 return false;
5887 }
5888
5889 /* Shouldn't get here; if we do, say it's not safe */
5890 WARN_ON_ONCE(1);
5891 return false;
5892}
5893
5894static bool stacksafe(struct bpf_func_state *old,
5895 struct bpf_func_state *cur,
5896 struct idpair *idmap)
5897{
5898 int i, spi;
5899
5900 /* walk slots of the explored stack and ignore any additional
5901 * slots in the current stack, since explored(safe) state
5902 * didn't use them
5903 */
5904 for (i = 0; i < old->allocated_stack; i++) {
5905 spi = i / BPF_REG_SIZE;
5906
5907 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
5908 i += BPF_REG_SIZE - 1;
5909 /* explored state didn't use this */
5910 continue;
5911 }
5912
5913 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
5914 continue;
5915
5916 /* explored stack has more populated slots than current stack
5917 * and these slots were used
5918 */
5919 if (i >= cur->allocated_stack)
5920 return false;
5921
5922 /* if old state was safe with misc data in the stack
5923 * it will be safe with zero-initialized stack.
5924 * The opposite is not true
5925 */
5926 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
5927 cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
5928 continue;
5929 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
5930 cur->stack[spi].slot_type[i % BPF_REG_SIZE])
5931 /* Ex: old explored (safe) state has STACK_SPILL in
5932 * this stack slot, but current has has STACK_MISC ->
5933 * this verifier states are not equivalent,
5934 * return false to continue verification of this path
5935 */
5936 return false;
5937 if (i % BPF_REG_SIZE)
5938 continue;
5939 if (old->stack[spi].slot_type[0] != STACK_SPILL)
5940 continue;
5941 if (!regsafe(&old->stack[spi].spilled_ptr,
5942 &cur->stack[spi].spilled_ptr,
5943 idmap))
5944 /* when explored and current stack slot are both storing
5945 * spilled registers, check that stored pointers types
5946 * are the same as well.
5947 * Ex: explored safe path could have stored
5948 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
5949 * but current path has stored:
5950 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
5951 * such verifier states are not equivalent.
5952 * return false to continue verification of this path
5953 */
5954 return false;
5955 }
5956 return true;
5957}
5958
5959static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
5960{
5961 if (old->acquired_refs != cur->acquired_refs)
5962 return false;
5963 return !memcmp(old->refs, cur->refs,
5964 sizeof(*old->refs) * old->acquired_refs);
5965}
5966
5967/* compare two verifier states
5968 *
5969 * all states stored in state_list are known to be valid, since
5970 * verifier reached 'bpf_exit' instruction through them
5971 *
5972 * this function is called when verifier exploring different branches of
5973 * execution popped from the state stack. If it sees an old state that has
5974 * more strict register state and more strict stack state then this execution
5975 * branch doesn't need to be explored further, since verifier already
5976 * concluded that more strict state leads to valid finish.
5977 *
5978 * Therefore two states are equivalent if register state is more conservative
5979 * and explored stack state is more conservative than the current one.
5980 * Example:
5981 * explored current
5982 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
5983 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
5984 *
5985 * In other words if current stack state (one being explored) has more
5986 * valid slots than old one that already passed validation, it means
5987 * the verifier can stop exploring and conclude that current state is valid too
5988 *
5989 * Similarly with registers. If explored state has register type as invalid
5990 * whereas register type in current state is meaningful, it means that
5991 * the current state will reach 'bpf_exit' instruction safely
5992 */
5993static bool func_states_equal(struct bpf_func_state *old,
5994 struct bpf_func_state *cur)
5995{
5996 struct idpair *idmap;
5997 bool ret = false;
5998 int i;
5999
6000 idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
6001 /* If we failed to allocate the idmap, just say it's not safe */
6002 if (!idmap)
6003 return false;
6004
6005 for (i = 0; i < MAX_BPF_REG; i++) {
6006 if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
6007 goto out_free;
6008 }
6009
6010 if (!stacksafe(old, cur, idmap))
6011 goto out_free;
6012
6013 if (!refsafe(old, cur))
6014 goto out_free;
6015 ret = true;
6016out_free:
6017 kfree(idmap);
6018 return ret;
6019}
6020
6021static bool states_equal(struct bpf_verifier_env *env,
6022 struct bpf_verifier_state *old,
6023 struct bpf_verifier_state *cur)
6024{
6025 int i;
6026
6027 if (old->curframe != cur->curframe)
6028 return false;
6029
6030 /* Verification state from speculative execution simulation
6031 * must never prune a non-speculative execution one.
6032 */
6033 if (old->speculative && !cur->speculative)
6034 return false;
6035
6036 if (old->active_spin_lock != cur->active_spin_lock)
6037 return false;
6038
6039 /* for states to be equal callsites have to be the same
6040 * and all frame states need to be equivalent
6041 */
6042 for (i = 0; i <= old->curframe; i++) {
6043 if (old->frame[i]->callsite != cur->frame[i]->callsite)
6044 return false;
6045 if (!func_states_equal(old->frame[i], cur->frame[i]))
6046 return false;
6047 }
6048 return true;
6049}
6050
6051/* A write screens off any subsequent reads; but write marks come from the
6052 * straight-line code between a state and its parent. When we arrive at an
6053 * equivalent state (jump target or such) we didn't arrive by the straight-line
6054 * code, so read marks in the state must propagate to the parent regardless
6055 * of the state's write marks. That's what 'parent == state->parent' comparison
6056 * in mark_reg_read() is for.
6057 */
6058static int propagate_liveness(struct bpf_verifier_env *env,
6059 const struct bpf_verifier_state *vstate,
6060 struct bpf_verifier_state *vparent)
6061{
6062 int i, frame, err = 0;
6063 struct bpf_func_state *state, *parent;
6064
6065 if (vparent->curframe != vstate->curframe) {
6066 WARN(1, "propagate_live: parent frame %d current frame %d\n",
6067 vparent->curframe, vstate->curframe);
6068 return -EFAULT;
6069 }
6070 /* Propagate read liveness of registers... */
6071 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
6072 /* We don't need to worry about FP liveness because it's read-only */
6073 for (i = 0; i < BPF_REG_FP; i++) {
6074 if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ)
6075 continue;
6076 if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) {
6077 err = mark_reg_read(env, &vstate->frame[vstate->curframe]->regs[i],
6078 &vparent->frame[vstate->curframe]->regs[i]);
6079 if (err)
6080 return err;
6081 }
6082 }
6083
6084 /* ... and stack slots */
6085 for (frame = 0; frame <= vstate->curframe; frame++) {
6086 state = vstate->frame[frame];
6087 parent = vparent->frame[frame];
6088 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
6089 i < parent->allocated_stack / BPF_REG_SIZE; i++) {
6090 if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
6091 continue;
6092 if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
6093 mark_reg_read(env, &state->stack[i].spilled_ptr,
6094 &parent->stack[i].spilled_ptr);
6095 }
6096 }
6097 return err;
6098}
6099
6100static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
6101{
6102 struct bpf_verifier_state_list *new_sl;
6103 struct bpf_verifier_state_list *sl;
6104 struct bpf_verifier_state *cur = env->cur_state, *new;
6105 int i, j, err, states_cnt = 0;
6106
6107 sl = env->explored_states[insn_idx];
6108 if (!sl)
6109 /* this 'insn_idx' instruction wasn't marked, so we will not
6110 * be doing state search here
6111 */
6112 return 0;
6113
6114 clean_live_states(env, insn_idx, cur);
6115
6116 while (sl != STATE_LIST_MARK) {
6117 if (states_equal(env, &sl->state, cur)) {
6118 /* reached equivalent register/stack state,
6119 * prune the search.
6120 * Registers read by the continuation are read by us.
6121 * If we have any write marks in env->cur_state, they
6122 * will prevent corresponding reads in the continuation
6123 * from reaching our parent (an explored_state). Our
6124 * own state will get the read marks recorded, but
6125 * they'll be immediately forgotten as we're pruning
6126 * this state and will pop a new one.
6127 */
6128 err = propagate_liveness(env, &sl->state, cur);
6129 if (err)
6130 return err;
6131 return 1;
6132 }
6133 sl = sl->next;
6134 states_cnt++;
6135 }
6136
6137 if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
6138 return 0;
6139
6140 /* there were no equivalent states, remember current one.
6141 * technically the current state is not proven to be safe yet,
6142 * but it will either reach outer most bpf_exit (which means it's safe)
6143 * or it will be rejected. Since there are no loops, we won't be
6144 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
6145 * again on the way to bpf_exit
6146 */
6147 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
6148 if (!new_sl)
6149 return -ENOMEM;
6150
6151 /* add new state to the head of linked list */
6152 new = &new_sl->state;
6153 err = copy_verifier_state(new, cur);
6154 if (err) {
6155 free_verifier_state(new, false);
6156 kfree(new_sl);
6157 return err;
6158 }
6159 new_sl->next = env->explored_states[insn_idx];
6160 env->explored_states[insn_idx] = new_sl;
6161 /* connect new state to parentage chain. Current frame needs all
6162 * registers connected. Only r6 - r9 of the callers are alive (pushed
6163 * to the stack implicitly by JITs) so in callers' frames connect just
6164 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
6165 * the state of the call instruction (with WRITTEN set), and r0 comes
6166 * from callee with its full parentage chain, anyway.
6167 */
6168 for (j = 0; j <= cur->curframe; j++)
6169 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
6170 cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
6171 /* clear write marks in current state: the writes we did are not writes
6172 * our child did, so they don't screen off its reads from us.
6173 * (There are no read marks in current state, because reads always mark
6174 * their parent and current state never has children yet. Only
6175 * explored_states can get read marks.)
6176 */
6177 for (i = 0; i < BPF_REG_FP; i++)
6178 cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE;
6179
6180 /* all stack frames are accessible from callee, clear them all */
6181 for (j = 0; j <= cur->curframe; j++) {
6182 struct bpf_func_state *frame = cur->frame[j];
6183 struct bpf_func_state *newframe = new->frame[j];
6184
6185 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
6186 frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
6187 frame->stack[i].spilled_ptr.parent =
6188 &newframe->stack[i].spilled_ptr;
6189 }
6190 }
6191 return 0;
6192}
6193
6194/* Return true if it's OK to have the same insn return a different type. */
6195static bool reg_type_mismatch_ok(enum bpf_reg_type type)
6196{
6197 switch (type) {
6198 case PTR_TO_CTX:
6199 case PTR_TO_SOCKET:
6200 case PTR_TO_SOCKET_OR_NULL:
6201 case PTR_TO_SOCK_COMMON:
6202 case PTR_TO_SOCK_COMMON_OR_NULL:
6203 case PTR_TO_TCP_SOCK:
6204 case PTR_TO_TCP_SOCK_OR_NULL:
6205 return false;
6206 default:
6207 return true;
6208 }
6209}
6210
6211/* If an instruction was previously used with particular pointer types, then we
6212 * need to be careful to avoid cases such as the below, where it may be ok
6213 * for one branch accessing the pointer, but not ok for the other branch:
6214 *
6215 * R1 = sock_ptr
6216 * goto X;
6217 * ...
6218 * R1 = some_other_valid_ptr;
6219 * goto X;
6220 * ...
6221 * R2 = *(u32 *)(R1 + 0);
6222 */
6223static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
6224{
6225 return src != prev && (!reg_type_mismatch_ok(src) ||
6226 !reg_type_mismatch_ok(prev));
6227}
6228
6229static int do_check(struct bpf_verifier_env *env)
6230{
6231 struct bpf_verifier_state *state;
6232 struct bpf_insn *insns = env->prog->insnsi;
6233 struct bpf_reg_state *regs;
6234 int insn_cnt = env->prog->len, i;
6235 int insn_processed = 0;
6236 bool do_print_state = false;
6237
6238 env->prev_linfo = NULL;
6239
6240 state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
6241 if (!state)
6242 return -ENOMEM;
6243 state->curframe = 0;
6244 state->speculative = false;
6245 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
6246 if (!state->frame[0]) {
6247 kfree(state);
6248 return -ENOMEM;
6249 }
6250 env->cur_state = state;
6251 init_func_state(env, state->frame[0],
6252 BPF_MAIN_FUNC /* callsite */,
6253 0 /* frameno */,
6254 0 /* subprogno, zero == main subprog */);
6255
6256 for (;;) {
6257 struct bpf_insn *insn;
6258 u8 class;
6259 int err;
6260
6261 if (env->insn_idx >= insn_cnt) {
6262 verbose(env, "invalid insn idx %d insn_cnt %d\n",
6263 env->insn_idx, insn_cnt);
6264 return -EFAULT;
6265 }
6266
6267 insn = &insns[env->insn_idx];
6268 class = BPF_CLASS(insn->code);
6269
6270 if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
6271 verbose(env,
6272 "BPF program is too large. Processed %d insn\n",
6273 insn_processed);
6274 return -E2BIG;
6275 }
6276
6277 err = is_state_visited(env, env->insn_idx);
6278 if (err < 0)
6279 return err;
6280 if (err == 1) {
6281 /* found equivalent state, can prune the search */
6282 if (env->log.level) {
6283 if (do_print_state)
6284 verbose(env, "\nfrom %d to %d%s: safe\n",
6285 env->prev_insn_idx, env->insn_idx,
6286 env->cur_state->speculative ?
6287 " (speculative execution)" : "");
6288 else
6289 verbose(env, "%d: safe\n", env->insn_idx);
6290 }
6291 goto process_bpf_exit;
6292 }
6293
6294 if (signal_pending(current))
6295 return -EAGAIN;
6296
6297 if (need_resched())
6298 cond_resched();
6299
6300 if (env->log.level > 1 || (env->log.level && do_print_state)) {
6301 if (env->log.level > 1)
6302 verbose(env, "%d:", env->insn_idx);
6303 else
6304 verbose(env, "\nfrom %d to %d%s:",
6305 env->prev_insn_idx, env->insn_idx,
6306 env->cur_state->speculative ?
6307 " (speculative execution)" : "");
6308 print_verifier_state(env, state->frame[state->curframe]);
6309 do_print_state = false;
6310 }
6311
6312 if (env->log.level) {
6313 const struct bpf_insn_cbs cbs = {
6314 .cb_print = verbose,
6315 .private_data = env,
6316 };
6317
6318 verbose_linfo(env, env->insn_idx, "; ");
6319 verbose(env, "%d: ", env->insn_idx);
6320 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
6321 }
6322
6323 if (bpf_prog_is_dev_bound(env->prog->aux)) {
6324 err = bpf_prog_offload_verify_insn(env, env->insn_idx,
6325 env->prev_insn_idx);
6326 if (err)
6327 return err;
6328 }
6329
6330 regs = cur_regs(env);
6331 env->insn_aux_data[env->insn_idx].seen = true;
6332
6333 if (class == BPF_ALU || class == BPF_ALU64) {
6334 err = check_alu_op(env, insn);
6335 if (err)
6336 return err;
6337
6338 } else if (class == BPF_LDX) {
6339 enum bpf_reg_type *prev_src_type, src_reg_type;
6340
6341 /* check for reserved fields is already done */
6342
6343 /* check src operand */
6344 err = check_reg_arg(env, insn->src_reg, SRC_OP);
6345 if (err)
6346 return err;
6347
6348 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
6349 if (err)
6350 return err;
6351
6352 src_reg_type = regs[insn->src_reg].type;
6353
6354 /* check that memory (src_reg + off) is readable,
6355 * the state of dst_reg will be updated by this func
6356 */
6357 err = check_mem_access(env, env->insn_idx, insn->src_reg,
6358 insn->off, BPF_SIZE(insn->code),
6359 BPF_READ, insn->dst_reg, false);
6360 if (err)
6361 return err;
6362
6363 prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
6364
6365 if (*prev_src_type == NOT_INIT) {
6366 /* saw a valid insn
6367 * dst_reg = *(u32 *)(src_reg + off)
6368 * save type to validate intersecting paths
6369 */
6370 *prev_src_type = src_reg_type;
6371
6372 } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
6373 /* ABuser program is trying to use the same insn
6374 * dst_reg = *(u32*) (src_reg + off)
6375 * with different pointer types:
6376 * src_reg == ctx in one branch and
6377 * src_reg == stack|map in some other branch.
6378 * Reject it.
6379 */
6380 verbose(env, "same insn cannot be used with different pointers\n");
6381 return -EINVAL;
6382 }
6383
6384 } else if (class == BPF_STX) {
6385 enum bpf_reg_type *prev_dst_type, dst_reg_type;
6386
6387 if (BPF_MODE(insn->code) == BPF_XADD) {
6388 err = check_xadd(env, env->insn_idx, insn);
6389 if (err)
6390 return err;
6391 env->insn_idx++;
6392 continue;
6393 }
6394
6395 /* check src1 operand */
6396 err = check_reg_arg(env, insn->src_reg, SRC_OP);
6397 if (err)
6398 return err;
6399 /* check src2 operand */
6400 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6401 if (err)
6402 return err;
6403
6404 dst_reg_type = regs[insn->dst_reg].type;
6405
6406 /* check that memory (dst_reg + off) is writeable */
6407 err = check_mem_access(env, env->insn_idx, insn->dst_reg,
6408 insn->off, BPF_SIZE(insn->code),
6409 BPF_WRITE, insn->src_reg, false);
6410 if (err)
6411 return err;
6412
6413 prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
6414
6415 if (*prev_dst_type == NOT_INIT) {
6416 *prev_dst_type = dst_reg_type;
6417 } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
6418 verbose(env, "same insn cannot be used with different pointers\n");
6419 return -EINVAL;
6420 }
6421
6422 } else if (class == BPF_ST) {
6423 if (BPF_MODE(insn->code) != BPF_MEM ||
6424 insn->src_reg != BPF_REG_0) {
6425 verbose(env, "BPF_ST uses reserved fields\n");
6426 return -EINVAL;
6427 }
6428 /* check src operand */
6429 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6430 if (err)
6431 return err;
6432
6433 if (is_ctx_reg(env, insn->dst_reg)) {
6434 verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
6435 insn->dst_reg,
6436 reg_type_str[reg_state(env, insn->dst_reg)->type]);
6437 return -EACCES;
6438 }
6439
6440 /* check that memory (dst_reg + off) is writeable */
6441 err = check_mem_access(env, env->insn_idx, insn->dst_reg,
6442 insn->off, BPF_SIZE(insn->code),
6443 BPF_WRITE, -1, false);
6444 if (err)
6445 return err;
6446
6447 } else if (class == BPF_JMP || class == BPF_JMP32) {
6448 u8 opcode = BPF_OP(insn->code);
6449
6450 if (opcode == BPF_CALL) {
6451 if (BPF_SRC(insn->code) != BPF_K ||
6452 insn->off != 0 ||
6453 (insn->src_reg != BPF_REG_0 &&
6454 insn->src_reg != BPF_PSEUDO_CALL) ||
6455 insn->dst_reg != BPF_REG_0 ||
6456 class == BPF_JMP32) {
6457 verbose(env, "BPF_CALL uses reserved fields\n");
6458 return -EINVAL;
6459 }
6460
6461 if (env->cur_state->active_spin_lock &&
6462 (insn->src_reg == BPF_PSEUDO_CALL ||
6463 insn->imm != BPF_FUNC_spin_unlock)) {
6464 verbose(env, "function calls are not allowed while holding a lock\n");
6465 return -EINVAL;
6466 }
6467 if (insn->src_reg == BPF_PSEUDO_CALL)
6468 err = check_func_call(env, insn, &env->insn_idx);
6469 else
6470 err = check_helper_call(env, insn->imm, env->insn_idx);
6471 if (err)
6472 return err;
6473
6474 } else if (opcode == BPF_JA) {
6475 if (BPF_SRC(insn->code) != BPF_K ||
6476 insn->imm != 0 ||
6477 insn->src_reg != BPF_REG_0 ||
6478 insn->dst_reg != BPF_REG_0 ||
6479 class == BPF_JMP32) {
6480 verbose(env, "BPF_JA uses reserved fields\n");
6481 return -EINVAL;
6482 }
6483
6484 env->insn_idx += insn->off + 1;
6485 continue;
6486
6487 } else if (opcode == BPF_EXIT) {
6488 if (BPF_SRC(insn->code) != BPF_K ||
6489 insn->imm != 0 ||
6490 insn->src_reg != BPF_REG_0 ||
6491 insn->dst_reg != BPF_REG_0 ||
6492 class == BPF_JMP32) {
6493 verbose(env, "BPF_EXIT uses reserved fields\n");
6494 return -EINVAL;
6495 }
6496
6497 if (env->cur_state->active_spin_lock) {
6498 verbose(env, "bpf_spin_unlock is missing\n");
6499 return -EINVAL;
6500 }
6501
6502 if (state->curframe) {
6503 /* exit from nested function */
6504 env->prev_insn_idx = env->insn_idx;
6505 err = prepare_func_exit(env, &env->insn_idx);
6506 if (err)
6507 return err;
6508 do_print_state = true;
6509 continue;
6510 }
6511
6512 err = check_reference_leak(env);
6513 if (err)
6514 return err;
6515
6516 /* eBPF calling convetion is such that R0 is used
6517 * to return the value from eBPF program.
6518 * Make sure that it's readable at this time
6519 * of bpf_exit, which means that program wrote
6520 * something into it earlier
6521 */
6522 err = check_reg_arg(env, BPF_REG_0, SRC_OP);
6523 if (err)
6524 return err;
6525
6526 if (is_pointer_value(env, BPF_REG_0)) {
6527 verbose(env, "R0 leaks addr as return value\n");
6528 return -EACCES;
6529 }
6530
6531 err = check_return_code(env);
6532 if (err)
6533 return err;
6534process_bpf_exit:
6535 err = pop_stack(env, &env->prev_insn_idx,
6536 &env->insn_idx);
6537 if (err < 0) {
6538 if (err != -ENOENT)
6539 return err;
6540 break;
6541 } else {
6542 do_print_state = true;
6543 continue;
6544 }
6545 } else {
6546 err = check_cond_jmp_op(env, insn, &env->insn_idx);
6547 if (err)
6548 return err;
6549 }
6550 } else if (class == BPF_LD) {
6551 u8 mode = BPF_MODE(insn->code);
6552
6553 if (mode == BPF_ABS || mode == BPF_IND) {
6554 err = check_ld_abs(env, insn);
6555 if (err)
6556 return err;
6557
6558 } else if (mode == BPF_IMM) {
6559 err = check_ld_imm(env, insn);
6560 if (err)
6561 return err;
6562
6563 env->insn_idx++;
6564 env->insn_aux_data[env->insn_idx].seen = true;
6565 } else {
6566 verbose(env, "invalid BPF_LD mode\n");
6567 return -EINVAL;
6568 }
6569 } else {
6570 verbose(env, "unknown insn class %d\n", class);
6571 return -EINVAL;
6572 }
6573
6574 env->insn_idx++;
6575 }
6576
6577 verbose(env, "processed %d insns (limit %d), stack depth ",
6578 insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
6579 for (i = 0; i < env->subprog_cnt; i++) {
6580 u32 depth = env->subprog_info[i].stack_depth;
6581
6582 verbose(env, "%d", depth);
6583 if (i + 1 < env->subprog_cnt)
6584 verbose(env, "+");
6585 }
6586 verbose(env, "\n");
6587 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
6588 return 0;
6589}
6590
6591static int check_map_prealloc(struct bpf_map *map)
6592{
6593 return (map->map_type != BPF_MAP_TYPE_HASH &&
6594 map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
6595 map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
6596 !(map->map_flags & BPF_F_NO_PREALLOC);
6597}
6598
6599static bool is_tracing_prog_type(enum bpf_prog_type type)
6600{
6601 switch (type) {
6602 case BPF_PROG_TYPE_KPROBE:
6603 case BPF_PROG_TYPE_TRACEPOINT:
6604 case BPF_PROG_TYPE_PERF_EVENT:
6605 case BPF_PROG_TYPE_RAW_TRACEPOINT:
6606 return true;
6607 default:
6608 return false;
6609 }
6610}
6611
6612static int check_map_prog_compatibility(struct bpf_verifier_env *env,
6613 struct bpf_map *map,
6614 struct bpf_prog *prog)
6615
6616{
6617 /* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use
6618 * preallocated hash maps, since doing memory allocation
6619 * in overflow_handler can crash depending on where nmi got
6620 * triggered.
6621 */
6622 if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
6623 if (!check_map_prealloc(map)) {
6624 verbose(env, "perf_event programs can only use preallocated hash map\n");
6625 return -EINVAL;
6626 }
6627 if (map->inner_map_meta &&
6628 !check_map_prealloc(map->inner_map_meta)) {
6629 verbose(env, "perf_event programs can only use preallocated inner hash map\n");
6630 return -EINVAL;
6631 }
6632 }
6633
6634 if ((is_tracing_prog_type(prog->type) ||
6635 prog->type == BPF_PROG_TYPE_SOCKET_FILTER) &&
6636 map_value_has_spin_lock(map)) {
6637 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
6638 return -EINVAL;
6639 }
6640
6641 if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
6642 !bpf_offload_prog_map_match(prog, map)) {
6643 verbose(env, "offload device mismatch between prog and map\n");
6644 return -EINVAL;
6645 }
6646
6647 return 0;
6648}
6649
6650static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
6651{
6652 return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
6653 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
6654}
6655
6656/* look for pseudo eBPF instructions that access map FDs and
6657 * replace them with actual map pointers
6658 */
6659static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
6660{
6661 struct bpf_insn *insn = env->prog->insnsi;
6662 int insn_cnt = env->prog->len;
6663 int i, j, err;
6664
6665 err = bpf_prog_calc_tag(env->prog);
6666 if (err)
6667 return err;
6668
6669 for (i = 0; i < insn_cnt; i++, insn++) {
6670 if (BPF_CLASS(insn->code) == BPF_LDX &&
6671 (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
6672 verbose(env, "BPF_LDX uses reserved fields\n");
6673 return -EINVAL;
6674 }
6675
6676 if (BPF_CLASS(insn->code) == BPF_STX &&
6677 ((BPF_MODE(insn->code) != BPF_MEM &&
6678 BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
6679 verbose(env, "BPF_STX uses reserved fields\n");
6680 return -EINVAL;
6681 }
6682
6683 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
6684 struct bpf_map *map;
6685 struct fd f;
6686
6687 if (i == insn_cnt - 1 || insn[1].code != 0 ||
6688 insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
6689 insn[1].off != 0) {
6690 verbose(env, "invalid bpf_ld_imm64 insn\n");
6691 return -EINVAL;
6692 }
6693
6694 if (insn->src_reg == 0)
6695 /* valid generic load 64-bit imm */
6696 goto next_insn;
6697
6698 if (insn[0].src_reg != BPF_PSEUDO_MAP_FD ||
6699 insn[1].imm != 0) {
6700 verbose(env, "unrecognized bpf_ld_imm64 insn\n");
6701 return -EINVAL;
6702 }
6703
6704 f = fdget(insn[0].imm);
6705 map = __bpf_map_get(f);
6706 if (IS_ERR(map)) {
6707 verbose(env, "fd %d is not pointing to valid bpf_map\n",
6708 insn[0].imm);
6709 return PTR_ERR(map);
6710 }
6711
6712 err = check_map_prog_compatibility(env, map, env->prog);
6713 if (err) {
6714 fdput(f);
6715 return err;
6716 }
6717
6718 /* store map pointer inside BPF_LD_IMM64 instruction */
6719 insn[0].imm = (u32) (unsigned long) map;
6720 insn[1].imm = ((u64) (unsigned long) map) >> 32;
6721
6722 /* check whether we recorded this map already */
6723 for (j = 0; j < env->used_map_cnt; j++)
6724 if (env->used_maps[j] == map) {
6725 fdput(f);
6726 goto next_insn;
6727 }
6728
6729 if (env->used_map_cnt >= MAX_USED_MAPS) {
6730 fdput(f);
6731 return -E2BIG;
6732 }
6733
6734 /* hold the map. If the program is rejected by verifier,
6735 * the map will be released by release_maps() or it
6736 * will be used by the valid program until it's unloaded
6737 * and all maps are released in free_used_maps()
6738 */
6739 map = bpf_map_inc(map, false);
6740 if (IS_ERR(map)) {
6741 fdput(f);
6742 return PTR_ERR(map);
6743 }
6744 env->used_maps[env->used_map_cnt++] = map;
6745
6746 if (bpf_map_is_cgroup_storage(map) &&
6747 bpf_cgroup_storage_assign(env->prog, map)) {
6748 verbose(env, "only one cgroup storage of each type is allowed\n");
6749 fdput(f);
6750 return -EBUSY;
6751 }
6752
6753 fdput(f);
6754next_insn:
6755 insn++;
6756 i++;
6757 continue;
6758 }
6759
6760 /* Basic sanity check before we invest more work here. */
6761 if (!bpf_opcode_in_insntable(insn->code)) {
6762 verbose(env, "unknown opcode %02x\n", insn->code);
6763 return -EINVAL;
6764 }
6765 }
6766
6767 /* now all pseudo BPF_LD_IMM64 instructions load valid
6768 * 'struct bpf_map *' into a register instead of user map_fd.
6769 * These pointers will be used later by verifier to validate map access.
6770 */
6771 return 0;
6772}
6773
6774/* drop refcnt of maps used by the rejected program */
6775static void release_maps(struct bpf_verifier_env *env)
6776{
6777 enum bpf_cgroup_storage_type stype;
6778 int i;
6779
6780 for_each_cgroup_storage_type(stype) {
6781 if (!env->prog->aux->cgroup_storage[stype])
6782 continue;
6783 bpf_cgroup_storage_release(env->prog,
6784 env->prog->aux->cgroup_storage[stype]);
6785 }
6786
6787 for (i = 0; i < env->used_map_cnt; i++)
6788 bpf_map_put(env->used_maps[i]);
6789}
6790
6791/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
6792static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
6793{
6794 struct bpf_insn *insn = env->prog->insnsi;
6795 int insn_cnt = env->prog->len;
6796 int i;
6797
6798 for (i = 0; i < insn_cnt; i++, insn++)
6799 if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
6800 insn->src_reg = 0;
6801}
6802
6803/* single env->prog->insni[off] instruction was replaced with the range
6804 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
6805 * [0, off) and [off, end) to new locations, so the patched range stays zero
6806 */
6807static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
6808 u32 off, u32 cnt)
6809{
6810 struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
6811 int i;
6812
6813 if (cnt == 1)
6814 return 0;
6815 new_data = vzalloc(array_size(prog_len,
6816 sizeof(struct bpf_insn_aux_data)));
6817 if (!new_data)
6818 return -ENOMEM;
6819 memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
6820 memcpy(new_data + off + cnt - 1, old_data + off,
6821 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
6822 for (i = off; i < off + cnt - 1; i++)
6823 new_data[i].seen = true;
6824 env->insn_aux_data = new_data;
6825 vfree(old_data);
6826 return 0;
6827}
6828
6829static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
6830{
6831 int i;
6832
6833 if (len == 1)
6834 return;
6835 /* NOTE: fake 'exit' subprog should be updated as well. */
6836 for (i = 0; i <= env->subprog_cnt; i++) {
6837 if (env->subprog_info[i].start <= off)
6838 continue;
6839 env->subprog_info[i].start += len - 1;
6840 }
6841}
6842
6843static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
6844 const struct bpf_insn *patch, u32 len)
6845{
6846 struct bpf_prog *new_prog;
6847
6848 new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
6849 if (!new_prog)
6850 return NULL;
6851 if (adjust_insn_aux_data(env, new_prog->len, off, len))
6852 return NULL;
6853 adjust_subprog_starts(env, off, len);
6854 return new_prog;
6855}
6856
6857static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
6858 u32 off, u32 cnt)
6859{
6860 int i, j;
6861
6862 /* find first prog starting at or after off (first to remove) */
6863 for (i = 0; i < env->subprog_cnt; i++)
6864 if (env->subprog_info[i].start >= off)
6865 break;
6866 /* find first prog starting at or after off + cnt (first to stay) */
6867 for (j = i; j < env->subprog_cnt; j++)
6868 if (env->subprog_info[j].start >= off + cnt)
6869 break;
6870 /* if j doesn't start exactly at off + cnt, we are just removing
6871 * the front of previous prog
6872 */
6873 if (env->subprog_info[j].start != off + cnt)
6874 j--;
6875
6876 if (j > i) {
6877 struct bpf_prog_aux *aux = env->prog->aux;
6878 int move;
6879
6880 /* move fake 'exit' subprog as well */
6881 move = env->subprog_cnt + 1 - j;
6882
6883 memmove(env->subprog_info + i,
6884 env->subprog_info + j,
6885 sizeof(*env->subprog_info) * move);
6886 env->subprog_cnt -= j - i;
6887
6888 /* remove func_info */
6889 if (aux->func_info) {
6890 move = aux->func_info_cnt - j;
6891
6892 memmove(aux->func_info + i,
6893 aux->func_info + j,
6894 sizeof(*aux->func_info) * move);
6895 aux->func_info_cnt -= j - i;
6896 /* func_info->insn_off is set after all code rewrites,
6897 * in adjust_btf_func() - no need to adjust
6898 */
6899 }
6900 } else {
6901 /* convert i from "first prog to remove" to "first to adjust" */
6902 if (env->subprog_info[i].start == off)
6903 i++;
6904 }
6905
6906 /* update fake 'exit' subprog as well */
6907 for (; i <= env->subprog_cnt; i++)
6908 env->subprog_info[i].start -= cnt;
6909
6910 return 0;
6911}
6912
6913static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
6914 u32 cnt)
6915{
6916 struct bpf_prog *prog = env->prog;
6917 u32 i, l_off, l_cnt, nr_linfo;
6918 struct bpf_line_info *linfo;
6919
6920 nr_linfo = prog->aux->nr_linfo;
6921 if (!nr_linfo)
6922 return 0;
6923
6924 linfo = prog->aux->linfo;
6925
6926 /* find first line info to remove, count lines to be removed */
6927 for (i = 0; i < nr_linfo; i++)
6928 if (linfo[i].insn_off >= off)
6929 break;
6930
6931 l_off = i;
6932 l_cnt = 0;
6933 for (; i < nr_linfo; i++)
6934 if (linfo[i].insn_off < off + cnt)
6935 l_cnt++;
6936 else
6937 break;
6938
6939 /* First live insn doesn't match first live linfo, it needs to "inherit"
6940 * last removed linfo. prog is already modified, so prog->len == off
6941 * means no live instructions after (tail of the program was removed).
6942 */
6943 if (prog->len != off && l_cnt &&
6944 (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
6945 l_cnt--;
6946 linfo[--i].insn_off = off + cnt;
6947 }
6948
6949 /* remove the line info which refer to the removed instructions */
6950 if (l_cnt) {
6951 memmove(linfo + l_off, linfo + i,
6952 sizeof(*linfo) * (nr_linfo - i));
6953
6954 prog->aux->nr_linfo -= l_cnt;
6955 nr_linfo = prog->aux->nr_linfo;
6956 }
6957
6958 /* pull all linfo[i].insn_off >= off + cnt in by cnt */
6959 for (i = l_off; i < nr_linfo; i++)
6960 linfo[i].insn_off -= cnt;
6961
6962 /* fix up all subprogs (incl. 'exit') which start >= off */
6963 for (i = 0; i <= env->subprog_cnt; i++)
6964 if (env->subprog_info[i].linfo_idx > l_off) {
6965 /* program may have started in the removed region but
6966 * may not be fully removed
6967 */
6968 if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
6969 env->subprog_info[i].linfo_idx -= l_cnt;
6970 else
6971 env->subprog_info[i].linfo_idx = l_off;
6972 }
6973
6974 return 0;
6975}
6976
6977static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
6978{
6979 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
6980 unsigned int orig_prog_len = env->prog->len;
6981 int err;
6982
6983 if (bpf_prog_is_dev_bound(env->prog->aux))
6984 bpf_prog_offload_remove_insns(env, off, cnt);
6985
6986 err = bpf_remove_insns(env->prog, off, cnt);
6987 if (err)
6988 return err;
6989
6990 err = adjust_subprog_starts_after_remove(env, off, cnt);
6991 if (err)
6992 return err;
6993
6994 err = bpf_adj_linfo_after_remove(env, off, cnt);
6995 if (err)
6996 return err;
6997
6998 memmove(aux_data + off, aux_data + off + cnt,
6999 sizeof(*aux_data) * (orig_prog_len - off - cnt));
7000
7001 return 0;
7002}
7003
7004/* The verifier does more data flow analysis than llvm and will not
7005 * explore branches that are dead at run time. Malicious programs can
7006 * have dead code too. Therefore replace all dead at-run-time code
7007 * with 'ja -1'.
7008 *
7009 * Just nops are not optimal, e.g. if they would sit at the end of the
7010 * program and through another bug we would manage to jump there, then
7011 * we'd execute beyond program memory otherwise. Returning exception
7012 * code also wouldn't work since we can have subprogs where the dead
7013 * code could be located.
7014 */
7015static void sanitize_dead_code(struct bpf_verifier_env *env)
7016{
7017 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
7018 struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
7019 struct bpf_insn *insn = env->prog->insnsi;
7020 const int insn_cnt = env->prog->len;
7021 int i;
7022
7023 for (i = 0; i < insn_cnt; i++) {
7024 if (aux_data[i].seen)
7025 continue;
7026 memcpy(insn + i, &trap, sizeof(trap));
7027 }
7028}
7029
7030static bool insn_is_cond_jump(u8 code)
7031{
7032 u8 op;
7033
7034 if (BPF_CLASS(code) == BPF_JMP32)
7035 return true;
7036
7037 if (BPF_CLASS(code) != BPF_JMP)
7038 return false;
7039
7040 op = BPF_OP(code);
7041 return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
7042}
7043
7044static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
7045{
7046 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
7047 struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
7048 struct bpf_insn *insn = env->prog->insnsi;
7049 const int insn_cnt = env->prog->len;
7050 int i;
7051
7052 for (i = 0; i < insn_cnt; i++, insn++) {
7053 if (!insn_is_cond_jump(insn->code))
7054 continue;
7055
7056 if (!aux_data[i + 1].seen)
7057 ja.off = insn->off;
7058 else if (!aux_data[i + 1 + insn->off].seen)
7059 ja.off = 0;
7060 else
7061 continue;
7062
7063 if (bpf_prog_is_dev_bound(env->prog->aux))
7064 bpf_prog_offload_replace_insn(env, i, &ja);
7065
7066 memcpy(insn, &ja, sizeof(ja));
7067 }
7068}
7069
7070static int opt_remove_dead_code(struct bpf_verifier_env *env)
7071{
7072 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
7073 int insn_cnt = env->prog->len;
7074 int i, err;
7075
7076 for (i = 0; i < insn_cnt; i++) {
7077 int j;
7078
7079 j = 0;
7080 while (i + j < insn_cnt && !aux_data[i + j].seen)
7081 j++;
7082 if (!j)
7083 continue;
7084
7085 err = verifier_remove_insns(env, i, j);
7086 if (err)
7087 return err;
7088 insn_cnt = env->prog->len;
7089 }
7090
7091 return 0;
7092}
7093
7094static int opt_remove_nops(struct bpf_verifier_env *env)
7095{
7096 const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
7097 struct bpf_insn *insn = env->prog->insnsi;
7098 int insn_cnt = env->prog->len;
7099 int i, err;
7100
7101 for (i = 0; i < insn_cnt; i++) {
7102 if (memcmp(&insn[i], &ja, sizeof(ja)))
7103 continue;
7104
7105 err = verifier_remove_insns(env, i, 1);
7106 if (err)
7107 return err;
7108 insn_cnt--;
7109 i--;
7110 }
7111
7112 return 0;
7113}
7114
7115/* convert load instructions that access fields of a context type into a
7116 * sequence of instructions that access fields of the underlying structure:
7117 * struct __sk_buff -> struct sk_buff
7118 * struct bpf_sock_ops -> struct sock
7119 */
7120static int convert_ctx_accesses(struct bpf_verifier_env *env)
7121{
7122 const struct bpf_verifier_ops *ops = env->ops;
7123 int i, cnt, size, ctx_field_size, delta = 0;
7124 const int insn_cnt = env->prog->len;
7125 struct bpf_insn insn_buf[16], *insn;
7126 u32 target_size, size_default, off;
7127 struct bpf_prog *new_prog;
7128 enum bpf_access_type type;
7129 bool is_narrower_load;
7130
7131 if (ops->gen_prologue || env->seen_direct_write) {
7132 if (!ops->gen_prologue) {
7133 verbose(env, "bpf verifier is misconfigured\n");
7134 return -EINVAL;
7135 }
7136 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
7137 env->prog);
7138 if (cnt >= ARRAY_SIZE(insn_buf)) {
7139 verbose(env, "bpf verifier is misconfigured\n");
7140 return -EINVAL;
7141 } else if (cnt) {
7142 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
7143 if (!new_prog)
7144 return -ENOMEM;
7145
7146 env->prog = new_prog;
7147 delta += cnt - 1;
7148 }
7149 }
7150
7151 if (bpf_prog_is_dev_bound(env->prog->aux))
7152 return 0;
7153
7154 insn = env->prog->insnsi + delta;
7155
7156 for (i = 0; i < insn_cnt; i++, insn++) {
7157 bpf_convert_ctx_access_t convert_ctx_access;
7158
7159 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
7160 insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
7161 insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
7162 insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
7163 type = BPF_READ;
7164 else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
7165 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
7166 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
7167 insn->code == (BPF_STX | BPF_MEM | BPF_DW))
7168 type = BPF_WRITE;
7169 else
7170 continue;
7171
7172 if (type == BPF_WRITE &&
7173 env->insn_aux_data[i + delta].sanitize_stack_off) {
7174 struct bpf_insn patch[] = {
7175 /* Sanitize suspicious stack slot with zero.
7176 * There are no memory dependencies for this store,
7177 * since it's only using frame pointer and immediate
7178 * constant of zero
7179 */
7180 BPF_ST_MEM(BPF_DW, BPF_REG_FP,
7181 env->insn_aux_data[i + delta].sanitize_stack_off,
7182 0),
7183 /* the original STX instruction will immediately
7184 * overwrite the same stack slot with appropriate value
7185 */
7186 *insn,
7187 };
7188
7189 cnt = ARRAY_SIZE(patch);
7190 new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
7191 if (!new_prog)
7192 return -ENOMEM;
7193
7194 delta += cnt - 1;
7195 env->prog = new_prog;
7196 insn = new_prog->insnsi + i + delta;
7197 continue;
7198 }
7199
7200 switch (env->insn_aux_data[i + delta].ptr_type) {
7201 case PTR_TO_CTX:
7202 if (!ops->convert_ctx_access)
7203 continue;
7204 convert_ctx_access = ops->convert_ctx_access;
7205 break;
7206 case PTR_TO_SOCKET:
7207 case PTR_TO_SOCK_COMMON:
7208 convert_ctx_access = bpf_sock_convert_ctx_access;
7209 break;
7210 case PTR_TO_TCP_SOCK:
7211 convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
7212 break;
7213 default:
7214 continue;
7215 }
7216
7217 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
7218 size = BPF_LDST_BYTES(insn);
7219
7220 /* If the read access is a narrower load of the field,
7221 * convert to a 4/8-byte load, to minimum program type specific
7222 * convert_ctx_access changes. If conversion is successful,
7223 * we will apply proper mask to the result.
7224 */
7225 is_narrower_load = size < ctx_field_size;
7226 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
7227 off = insn->off;
7228 if (is_narrower_load) {
7229 u8 size_code;
7230
7231 if (type == BPF_WRITE) {
7232 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
7233 return -EINVAL;
7234 }
7235
7236 size_code = BPF_H;
7237 if (ctx_field_size == 4)
7238 size_code = BPF_W;
7239 else if (ctx_field_size == 8)
7240 size_code = BPF_DW;
7241
7242 insn->off = off & ~(size_default - 1);
7243 insn->code = BPF_LDX | BPF_MEM | size_code;
7244 }
7245
7246 target_size = 0;
7247 cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
7248 &target_size);
7249 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
7250 (ctx_field_size && !target_size)) {
7251 verbose(env, "bpf verifier is misconfigured\n");
7252 return -EINVAL;
7253 }
7254
7255 if (is_narrower_load && size < target_size) {
7256 u8 shift = (off & (size_default - 1)) * 8;
7257
7258 if (ctx_field_size <= 4) {
7259 if (shift)
7260 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
7261 insn->dst_reg,
7262 shift);
7263 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
7264 (1 << size * 8) - 1);
7265 } else {
7266 if (shift)
7267 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
7268 insn->dst_reg,
7269 shift);
7270 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
7271 (1 << size * 8) - 1);
7272 }
7273 }
7274
7275 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
7276 if (!new_prog)
7277 return -ENOMEM;
7278
7279 delta += cnt - 1;
7280
7281 /* keep walking new program and skip insns we just inserted */
7282 env->prog = new_prog;
7283 insn = new_prog->insnsi + i + delta;
7284 }
7285
7286 return 0;
7287}
7288
7289static int jit_subprogs(struct bpf_verifier_env *env)
7290{
7291 struct bpf_prog *prog = env->prog, **func, *tmp;
7292 int i, j, subprog_start, subprog_end = 0, len, subprog;
7293 struct bpf_insn *insn;
7294 void *old_bpf_func;
7295 int err;
7296
7297 if (env->subprog_cnt <= 1)
7298 return 0;
7299
7300 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
7301 if (insn->code != (BPF_JMP | BPF_CALL) ||
7302 insn->src_reg != BPF_PSEUDO_CALL)
7303 continue;
7304 /* Upon error here we cannot fall back to interpreter but
7305 * need a hard reject of the program. Thus -EFAULT is
7306 * propagated in any case.
7307 */
7308 subprog = find_subprog(env, i + insn->imm + 1);
7309 if (subprog < 0) {
7310 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
7311 i + insn->imm + 1);
7312 return -EFAULT;
7313 }
7314 /* temporarily remember subprog id inside insn instead of
7315 * aux_data, since next loop will split up all insns into funcs
7316 */
7317 insn->off = subprog;
7318 /* remember original imm in case JIT fails and fallback
7319 * to interpreter will be needed
7320 */
7321 env->insn_aux_data[i].call_imm = insn->imm;
7322 /* point imm to __bpf_call_base+1 from JITs point of view */
7323 insn->imm = 1;
7324 }
7325
7326 err = bpf_prog_alloc_jited_linfo(prog);
7327 if (err)
7328 goto out_undo_insn;
7329
7330 err = -ENOMEM;
7331 func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
7332 if (!func)
7333 goto out_undo_insn;
7334
7335 for (i = 0; i < env->subprog_cnt; i++) {
7336 subprog_start = subprog_end;
7337 subprog_end = env->subprog_info[i + 1].start;
7338
7339 len = subprog_end - subprog_start;
7340 /* BPF_PROG_RUN doesn't call subprogs directly,
7341 * hence main prog stats include the runtime of subprogs.
7342 * subprogs don't have IDs and not reachable via prog_get_next_id
7343 * func[i]->aux->stats will never be accessed and stays NULL
7344 */
7345 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
7346 if (!func[i])
7347 goto out_free;
7348 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
7349 len * sizeof(struct bpf_insn));
7350 func[i]->type = prog->type;
7351 func[i]->len = len;
7352 if (bpf_prog_calc_tag(func[i]))
7353 goto out_free;
7354 func[i]->is_func = 1;
7355 func[i]->aux->func_idx = i;
7356 /* the btf and func_info will be freed only at prog->aux */
7357 func[i]->aux->btf = prog->aux->btf;
7358 func[i]->aux->func_info = prog->aux->func_info;
7359
7360 /* Use bpf_prog_F_tag to indicate functions in stack traces.
7361 * Long term would need debug info to populate names
7362 */
7363 func[i]->aux->name[0] = 'F';
7364 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
7365 func[i]->jit_requested = 1;
7366 func[i]->aux->linfo = prog->aux->linfo;
7367 func[i]->aux->nr_linfo = prog->aux->nr_linfo;
7368 func[i]->aux->jited_linfo = prog->aux->jited_linfo;
7369 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
7370 func[i] = bpf_int_jit_compile(func[i]);
7371 if (!func[i]->jited) {
7372 err = -ENOTSUPP;
7373 goto out_free;
7374 }
7375 cond_resched();
7376 }
7377 /* at this point all bpf functions were successfully JITed
7378 * now populate all bpf_calls with correct addresses and
7379 * run last pass of JIT
7380 */
7381 for (i = 0; i < env->subprog_cnt; i++) {
7382 insn = func[i]->insnsi;
7383 for (j = 0; j < func[i]->len; j++, insn++) {
7384 if (insn->code != (BPF_JMP | BPF_CALL) ||
7385 insn->src_reg != BPF_PSEUDO_CALL)
7386 continue;
7387 subprog = insn->off;
7388 insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
7389 func[subprog]->bpf_func -
7390 __bpf_call_base;
7391 }
7392
7393 /* we use the aux data to keep a list of the start addresses
7394 * of the JITed images for each function in the program
7395 *
7396 * for some architectures, such as powerpc64, the imm field
7397 * might not be large enough to hold the offset of the start
7398 * address of the callee's JITed image from __bpf_call_base
7399 *
7400 * in such cases, we can lookup the start address of a callee
7401 * by using its subprog id, available from the off field of
7402 * the call instruction, as an index for this list
7403 */
7404 func[i]->aux->func = func;
7405 func[i]->aux->func_cnt = env->subprog_cnt;
7406 }
7407 for (i = 0; i < env->subprog_cnt; i++) {
7408 old_bpf_func = func[i]->bpf_func;
7409 tmp = bpf_int_jit_compile(func[i]);
7410 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
7411 verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
7412 err = -ENOTSUPP;
7413 goto out_free;
7414 }
7415 cond_resched();
7416 }
7417
7418 /* finally lock prog and jit images for all functions and
7419 * populate kallsysm
7420 */
7421 for (i = 0; i < env->subprog_cnt; i++) {
7422 bpf_prog_lock_ro(func[i]);
7423 bpf_prog_kallsyms_add(func[i]);
7424 }
7425
7426 /* Last step: make now unused interpreter insns from main
7427 * prog consistent for later dump requests, so they can
7428 * later look the same as if they were interpreted only.
7429 */
7430 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
7431 if (insn->code != (BPF_JMP | BPF_CALL) ||
7432 insn->src_reg != BPF_PSEUDO_CALL)
7433 continue;
7434 insn->off = env->insn_aux_data[i].call_imm;
7435 subprog = find_subprog(env, i + insn->off + 1);
7436 insn->imm = subprog;
7437 }
7438
7439 prog->jited = 1;
7440 prog->bpf_func = func[0]->bpf_func;
7441 prog->aux->func = func;
7442 prog->aux->func_cnt = env->subprog_cnt;
7443 bpf_prog_free_unused_jited_linfo(prog);
7444 return 0;
7445out_free:
7446 for (i = 0; i < env->subprog_cnt; i++)
7447 if (func[i])
7448 bpf_jit_free(func[i]);
7449 kfree(func);
7450out_undo_insn:
7451 /* cleanup main prog to be interpreted */
7452 prog->jit_requested = 0;
7453 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
7454 if (insn->code != (BPF_JMP | BPF_CALL) ||
7455 insn->src_reg != BPF_PSEUDO_CALL)
7456 continue;
7457 insn->off = 0;
7458 insn->imm = env->insn_aux_data[i].call_imm;
7459 }
7460 bpf_prog_free_jited_linfo(prog);
7461 return err;
7462}
7463
7464static int fixup_call_args(struct bpf_verifier_env *env)
7465{
7466#ifndef CONFIG_BPF_JIT_ALWAYS_ON
7467 struct bpf_prog *prog = env->prog;
7468 struct bpf_insn *insn = prog->insnsi;
7469 int i, depth;
7470#endif
7471 int err = 0;
7472
7473 if (env->prog->jit_requested &&
7474 !bpf_prog_is_dev_bound(env->prog->aux)) {
7475 err = jit_subprogs(env);
7476 if (err == 0)
7477 return 0;
7478 if (err == -EFAULT)
7479 return err;
7480 }
7481#ifndef CONFIG_BPF_JIT_ALWAYS_ON
7482 for (i = 0; i < prog->len; i++, insn++) {
7483 if (insn->code != (BPF_JMP | BPF_CALL) ||
7484 insn->src_reg != BPF_PSEUDO_CALL)
7485 continue;
7486 depth = get_callee_stack_depth(env, insn, i);
7487 if (depth < 0)
7488 return depth;
7489 bpf_patch_call_args(insn, depth);
7490 }
7491 err = 0;
7492#endif
7493 return err;
7494}
7495
7496/* fixup insn->imm field of bpf_call instructions
7497 * and inline eligible helpers as explicit sequence of BPF instructions
7498 *
7499 * this function is called after eBPF program passed verification
7500 */
7501static int fixup_bpf_calls(struct bpf_verifier_env *env)
7502{
7503 struct bpf_prog *prog = env->prog;
7504 struct bpf_insn *insn = prog->insnsi;
7505 const struct bpf_func_proto *fn;
7506 const int insn_cnt = prog->len;
7507 const struct bpf_map_ops *ops;
7508 struct bpf_insn_aux_data *aux;
7509 struct bpf_insn insn_buf[16];
7510 struct bpf_prog *new_prog;
7511 struct bpf_map *map_ptr;
7512 int i, cnt, delta = 0;
7513
7514 for (i = 0; i < insn_cnt; i++, insn++) {
7515 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
7516 insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
7517 insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
7518 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
7519 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
7520 struct bpf_insn mask_and_div[] = {
7521 BPF_MOV32_REG(insn->src_reg, insn->src_reg),
7522 /* Rx div 0 -> 0 */
7523 BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
7524 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
7525 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
7526 *insn,
7527 };
7528 struct bpf_insn mask_and_mod[] = {
7529 BPF_MOV32_REG(insn->src_reg, insn->src_reg),
7530 /* Rx mod 0 -> Rx */
7531 BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
7532 *insn,
7533 };
7534 struct bpf_insn *patchlet;
7535
7536 if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
7537 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
7538 patchlet = mask_and_div + (is64 ? 1 : 0);
7539 cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
7540 } else {
7541 patchlet = mask_and_mod + (is64 ? 1 : 0);
7542 cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
7543 }
7544
7545 new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
7546 if (!new_prog)
7547 return -ENOMEM;
7548
7549 delta += cnt - 1;
7550 env->prog = prog = new_prog;
7551 insn = new_prog->insnsi + i + delta;
7552 continue;
7553 }
7554
7555 if (BPF_CLASS(insn->code) == BPF_LD &&
7556 (BPF_MODE(insn->code) == BPF_ABS ||
7557 BPF_MODE(insn->code) == BPF_IND)) {
7558 cnt = env->ops->gen_ld_abs(insn, insn_buf);
7559 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
7560 verbose(env, "bpf verifier is misconfigured\n");
7561 return -EINVAL;
7562 }
7563
7564 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
7565 if (!new_prog)
7566 return -ENOMEM;
7567
7568 delta += cnt - 1;
7569 env->prog = prog = new_prog;
7570 insn = new_prog->insnsi + i + delta;
7571 continue;
7572 }
7573
7574 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
7575 insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
7576 const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
7577 const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
7578 struct bpf_insn insn_buf[16];
7579 struct bpf_insn *patch = &insn_buf[0];
7580 bool issrc, isneg;
7581 u32 off_reg;
7582
7583 aux = &env->insn_aux_data[i + delta];
7584 if (!aux->alu_state ||
7585 aux->alu_state == BPF_ALU_NON_POINTER)
7586 continue;
7587
7588 isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
7589 issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
7590 BPF_ALU_SANITIZE_SRC;
7591
7592 off_reg = issrc ? insn->src_reg : insn->dst_reg;
7593 if (isneg)
7594 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
7595 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1);
7596 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
7597 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
7598 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
7599 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
7600 if (issrc) {
7601 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX,
7602 off_reg);
7603 insn->src_reg = BPF_REG_AX;
7604 } else {
7605 *patch++ = BPF_ALU64_REG(BPF_AND, off_reg,
7606 BPF_REG_AX);
7607 }
7608 if (isneg)
7609 insn->code = insn->code == code_add ?
7610 code_sub : code_add;
7611 *patch++ = *insn;
7612 if (issrc && isneg)
7613 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
7614 cnt = patch - insn_buf;
7615
7616 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
7617 if (!new_prog)
7618 return -ENOMEM;
7619
7620 delta += cnt - 1;
7621 env->prog = prog = new_prog;
7622 insn = new_prog->insnsi + i + delta;
7623 continue;
7624 }
7625
7626 if (insn->code != (BPF_JMP | BPF_CALL))
7627 continue;
7628 if (insn->src_reg == BPF_PSEUDO_CALL)
7629 continue;
7630
7631 if (insn->imm == BPF_FUNC_get_route_realm)
7632 prog->dst_needed = 1;
7633 if (insn->imm == BPF_FUNC_get_prandom_u32)
7634 bpf_user_rnd_init_once();
7635 if (insn->imm == BPF_FUNC_override_return)
7636 prog->kprobe_override = 1;
7637 if (insn->imm == BPF_FUNC_tail_call) {
7638 /* If we tail call into other programs, we
7639 * cannot make any assumptions since they can
7640 * be replaced dynamically during runtime in
7641 * the program array.
7642 */
7643 prog->cb_access = 1;
7644 env->prog->aux->stack_depth = MAX_BPF_STACK;
7645 env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
7646
7647 /* mark bpf_tail_call as different opcode to avoid
7648 * conditional branch in the interpeter for every normal
7649 * call and to prevent accidental JITing by JIT compiler
7650 * that doesn't support bpf_tail_call yet
7651 */
7652 insn->imm = 0;
7653 insn->code = BPF_JMP | BPF_TAIL_CALL;
7654
7655 aux = &env->insn_aux_data[i + delta];
7656 if (!bpf_map_ptr_unpriv(aux))
7657 continue;
7658
7659 /* instead of changing every JIT dealing with tail_call
7660 * emit two extra insns:
7661 * if (index >= max_entries) goto out;
7662 * index &= array->index_mask;
7663 * to avoid out-of-bounds cpu speculation
7664 */
7665 if (bpf_map_ptr_poisoned(aux)) {
7666 verbose(env, "tail_call abusing map_ptr\n");
7667 return -EINVAL;
7668 }
7669
7670 map_ptr = BPF_MAP_PTR(aux->map_state);
7671 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
7672 map_ptr->max_entries, 2);
7673 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
7674 container_of(map_ptr,
7675 struct bpf_array,
7676 map)->index_mask);
7677 insn_buf[2] = *insn;
7678 cnt = 3;
7679 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
7680 if (!new_prog)
7681 return -ENOMEM;
7682
7683 delta += cnt - 1;
7684 env->prog = prog = new_prog;
7685 insn = new_prog->insnsi + i + delta;
7686 continue;
7687 }
7688
7689 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
7690 * and other inlining handlers are currently limited to 64 bit
7691 * only.
7692 */
7693 if (prog->jit_requested && BITS_PER_LONG == 64 &&
7694 (insn->imm == BPF_FUNC_map_lookup_elem ||
7695 insn->imm == BPF_FUNC_map_update_elem ||
7696 insn->imm == BPF_FUNC_map_delete_elem ||
7697 insn->imm == BPF_FUNC_map_push_elem ||
7698 insn->imm == BPF_FUNC_map_pop_elem ||
7699 insn->imm == BPF_FUNC_map_peek_elem)) {
7700 aux = &env->insn_aux_data[i + delta];
7701 if (bpf_map_ptr_poisoned(aux))
7702 goto patch_call_imm;
7703
7704 map_ptr = BPF_MAP_PTR(aux->map_state);
7705 ops = map_ptr->ops;
7706 if (insn->imm == BPF_FUNC_map_lookup_elem &&
7707 ops->map_gen_lookup) {
7708 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
7709 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
7710 verbose(env, "bpf verifier is misconfigured\n");
7711 return -EINVAL;
7712 }
7713
7714 new_prog = bpf_patch_insn_data(env, i + delta,
7715 insn_buf, cnt);
7716 if (!new_prog)
7717 return -ENOMEM;
7718
7719 delta += cnt - 1;
7720 env->prog = prog = new_prog;
7721 insn = new_prog->insnsi + i + delta;
7722 continue;
7723 }
7724
7725 BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
7726 (void *(*)(struct bpf_map *map, void *key))NULL));
7727 BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
7728 (int (*)(struct bpf_map *map, void *key))NULL));
7729 BUILD_BUG_ON(!__same_type(ops->map_update_elem,
7730 (int (*)(struct bpf_map *map, void *key, void *value,
7731 u64 flags))NULL));
7732 BUILD_BUG_ON(!__same_type(ops->map_push_elem,
7733 (int (*)(struct bpf_map *map, void *value,
7734 u64 flags))NULL));
7735 BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
7736 (int (*)(struct bpf_map *map, void *value))NULL));
7737 BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
7738 (int (*)(struct bpf_map *map, void *value))NULL));
7739
7740 switch (insn->imm) {
7741 case BPF_FUNC_map_lookup_elem:
7742 insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
7743 __bpf_call_base;
7744 continue;
7745 case BPF_FUNC_map_update_elem:
7746 insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
7747 __bpf_call_base;
7748 continue;
7749 case BPF_FUNC_map_delete_elem:
7750 insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
7751 __bpf_call_base;
7752 continue;
7753 case BPF_FUNC_map_push_elem:
7754 insn->imm = BPF_CAST_CALL(ops->map_push_elem) -
7755 __bpf_call_base;
7756 continue;
7757 case BPF_FUNC_map_pop_elem:
7758 insn->imm = BPF_CAST_CALL(ops->map_pop_elem) -
7759 __bpf_call_base;
7760 continue;
7761 case BPF_FUNC_map_peek_elem:
7762 insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
7763 __bpf_call_base;
7764 continue;
7765 }
7766
7767 goto patch_call_imm;
7768 }
7769
7770patch_call_imm:
7771 fn = env->ops->get_func_proto(insn->imm, env->prog);
7772 /* all functions that have prototype and verifier allowed
7773 * programs to call them, must be real in-kernel functions
7774 */
7775 if (!fn->func) {
7776 verbose(env,
7777 "kernel subsystem misconfigured func %s#%d\n",
7778 func_id_name(insn->imm), insn->imm);
7779 return -EFAULT;
7780 }
7781 insn->imm = fn->func - __bpf_call_base;
7782 }
7783
7784 return 0;
7785}
7786
7787static void free_states(struct bpf_verifier_env *env)
7788{
7789 struct bpf_verifier_state_list *sl, *sln;
7790 int i;
7791
7792 if (!env->explored_states)
7793 return;
7794
7795 for (i = 0; i < env->prog->len; i++) {
7796 sl = env->explored_states[i];
7797
7798 if (sl)
7799 while (sl != STATE_LIST_MARK) {
7800 sln = sl->next;
7801 free_verifier_state(&sl->state, false);
7802 kfree(sl);
7803 sl = sln;
7804 }
7805 }
7806
7807 kfree(env->explored_states);
7808}
7809
7810int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
7811 union bpf_attr __user *uattr)
7812{
7813 struct bpf_verifier_env *env;
7814 struct bpf_verifier_log *log;
7815 int i, len, ret = -EINVAL;
7816 bool is_priv;
7817
7818 /* no program is valid */
7819 if (ARRAY_SIZE(bpf_verifier_ops) == 0)
7820 return -EINVAL;
7821
7822 /* 'struct bpf_verifier_env' can be global, but since it's not small,
7823 * allocate/free it every time bpf_check() is called
7824 */
7825 env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
7826 if (!env)
7827 return -ENOMEM;
7828 log = &env->log;
7829
7830 len = (*prog)->len;
7831 env->insn_aux_data =
7832 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
7833 ret = -ENOMEM;
7834 if (!env->insn_aux_data)
7835 goto err_free_env;
7836 for (i = 0; i < len; i++)
7837 env->insn_aux_data[i].orig_idx = i;
7838 env->prog = *prog;
7839 env->ops = bpf_verifier_ops[env->prog->type];
7840
7841 /* grab the mutex to protect few globals used by verifier */
7842 mutex_lock(&bpf_verifier_lock);
7843
7844 if (attr->log_level || attr->log_buf || attr->log_size) {
7845 /* user requested verbose verifier output
7846 * and supplied buffer to store the verification trace
7847 */
7848 log->level = attr->log_level;
7849 log->ubuf = (char __user *) (unsigned long) attr->log_buf;
7850 log->len_total = attr->log_size;
7851
7852 ret = -EINVAL;
7853 /* log attributes have to be sane */
7854 if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
7855 !log->level || !log->ubuf)
7856 goto err_unlock;
7857 }
7858
7859 env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
7860 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
7861 env->strict_alignment = true;
7862 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
7863 env->strict_alignment = false;
7864
7865 is_priv = capable(CAP_SYS_ADMIN);
7866 env->allow_ptr_leaks = is_priv;
7867
7868 ret = replace_map_fd_with_map_ptr(env);
7869 if (ret < 0)
7870 goto skip_full_check;
7871
7872 if (bpf_prog_is_dev_bound(env->prog->aux)) {
7873 ret = bpf_prog_offload_verifier_prep(env->prog);
7874 if (ret)
7875 goto skip_full_check;
7876 }
7877
7878 env->explored_states = kcalloc(env->prog->len,
7879 sizeof(struct bpf_verifier_state_list *),
7880 GFP_USER);
7881 ret = -ENOMEM;
7882 if (!env->explored_states)
7883 goto skip_full_check;
7884
7885 ret = check_subprogs(env);
7886 if (ret < 0)
7887 goto skip_full_check;
7888
7889 ret = check_btf_info(env, attr, uattr);
7890 if (ret < 0)
7891 goto skip_full_check;
7892
7893 ret = check_cfg(env);
7894 if (ret < 0)
7895 goto skip_full_check;
7896
7897 ret = do_check(env);
7898 if (env->cur_state) {
7899 free_verifier_state(env->cur_state, true);
7900 env->cur_state = NULL;
7901 }
7902
7903 if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
7904 ret = bpf_prog_offload_finalize(env);
7905
7906skip_full_check:
7907 while (!pop_stack(env, NULL, NULL));
7908 free_states(env);
7909
7910 if (ret == 0)
7911 ret = check_max_stack_depth(env);
7912
7913 /* instruction rewrites happen after this point */
7914 if (is_priv) {
7915 if (ret == 0)
7916 opt_hard_wire_dead_code_branches(env);
7917 if (ret == 0)
7918 ret = opt_remove_dead_code(env);
7919 if (ret == 0)
7920 ret = opt_remove_nops(env);
7921 } else {
7922 if (ret == 0)
7923 sanitize_dead_code(env);
7924 }
7925
7926 if (ret == 0)
7927 /* program is valid, convert *(u32*)(ctx + off) accesses */
7928 ret = convert_ctx_accesses(env);
7929
7930 if (ret == 0)
7931 ret = fixup_bpf_calls(env);
7932
7933 if (ret == 0)
7934 ret = fixup_call_args(env);
7935
7936 if (log->level && bpf_verifier_log_full(log))
7937 ret = -ENOSPC;
7938 if (log->level && !log->ubuf) {
7939 ret = -EFAULT;
7940 goto err_release_maps;
7941 }
7942
7943 if (ret == 0 && env->used_map_cnt) {
7944 /* if program passed verifier, update used_maps in bpf_prog_info */
7945 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
7946 sizeof(env->used_maps[0]),
7947 GFP_KERNEL);
7948
7949 if (!env->prog->aux->used_maps) {
7950 ret = -ENOMEM;
7951 goto err_release_maps;
7952 }
7953
7954 memcpy(env->prog->aux->used_maps, env->used_maps,
7955 sizeof(env->used_maps[0]) * env->used_map_cnt);
7956 env->prog->aux->used_map_cnt = env->used_map_cnt;
7957
7958 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
7959 * bpf_ld_imm64 instructions
7960 */
7961 convert_pseudo_ld_imm64(env);
7962 }
7963
7964 if (ret == 0)
7965 adjust_btf_func(env);
7966
7967err_release_maps:
7968 if (!env->prog->aux->used_maps)
7969 /* if we didn't copy map pointers into bpf_prog_info, release
7970 * them now. Otherwise free_used_maps() will release them.
7971 */
7972 release_maps(env);
7973 *prog = env->prog;
7974err_unlock:
7975 mutex_unlock(&bpf_verifier_lock);
7976 vfree(env->insn_aux_data);
7977err_free_env:
7978 kfree(env);
7979 return ret;
7980}