]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - kernel/bpf/verifier.c
Merge branch 'support-alu32_arsh'
[mirror_ubuntu-jammy-kernel.git] / kernel / bpf / verifier.c
CommitLineData
51580e79 1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
969bf05e 2 * Copyright (c) 2016 Facebook
fd978bf7 3 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
51580e79
AS
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 */
838e9690 14#include <uapi/linux/btf.h>
51580e79
AS
15#include <linux/kernel.h>
16#include <linux/types.h>
17#include <linux/slab.h>
18#include <linux/bpf.h>
838e9690 19#include <linux/btf.h>
58e2af8b 20#include <linux/bpf_verifier.h>
51580e79
AS
21#include <linux/filter.h>
22#include <net/netlink.h>
23#include <linux/file.h>
24#include <linux/vmalloc.h>
ebb676da 25#include <linux/stringify.h>
cc8b0b92
AS
26#include <linux/bsearch.h>
27#include <linux/sort.h>
c195651e 28#include <linux/perf_event.h>
51580e79 29
f4ac7e0b
JK
30#include "disasm.h"
31
00176a34
JK
32static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
33#define BPF_PROG_TYPE(_id, _name) \
34 [_id] = & _name ## _verifier_ops,
35#define BPF_MAP_TYPE(_id, _ops)
36#include <linux/bpf_types.h>
37#undef BPF_PROG_TYPE
38#undef BPF_MAP_TYPE
39};
40
51580e79
AS
41/* bpf_check() is a static code analyzer that walks eBPF program
42 * instruction by instruction and updates register/stack state.
43 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
44 *
45 * The first pass is depth-first-search to check that the program is a DAG.
46 * It rejects the following programs:
47 * - larger than BPF_MAXINSNS insns
48 * - if loop is present (detected via back-edge)
49 * - unreachable insns exist (shouldn't be a forest. program = one function)
50 * - out of bounds or malformed jumps
51 * The second pass is all possible path descent from the 1st insn.
52 * Since it's analyzing all pathes through the program, the length of the
eba38a96 53 * analysis is limited to 64k insn, which may be hit even if total number of
51580e79
AS
54 * insn is less then 4K, but there are too many branches that change stack/regs.
55 * Number of 'branches to be analyzed' is limited to 1k
56 *
57 * On entry to each instruction, each register has a type, and the instruction
58 * changes the types of the registers depending on instruction semantics.
59 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
60 * copied to R1.
61 *
62 * All registers are 64-bit.
63 * R0 - return register
64 * R1-R5 argument passing registers
65 * R6-R9 callee saved registers
66 * R10 - frame pointer read-only
67 *
68 * At the start of BPF program the register R1 contains a pointer to bpf_context
69 * and has type PTR_TO_CTX.
70 *
71 * Verifier tracks arithmetic operations on pointers in case:
72 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
73 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
74 * 1st insn copies R10 (which has FRAME_PTR) type into R1
75 * and 2nd arithmetic instruction is pattern matched to recognize
76 * that it wants to construct a pointer to some element within stack.
77 * So after 2nd insn, the register R1 has type PTR_TO_STACK
78 * (and -20 constant is saved for further stack bounds checking).
79 * Meaning that this reg is a pointer to stack plus known immediate constant.
80 *
f1174f77 81 * Most of the time the registers have SCALAR_VALUE type, which
51580e79 82 * means the register has some value, but it's not a valid pointer.
f1174f77 83 * (like pointer plus pointer becomes SCALAR_VALUE type)
51580e79
AS
84 *
85 * When verifier sees load or store instructions the type of base register
c64b7983
JS
86 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
87 * four pointer types recognized by check_mem_access() function.
51580e79
AS
88 *
89 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
90 * and the range of [ptr, ptr + map's value_size) is accessible.
91 *
92 * registers used to pass values to function calls are checked against
93 * function argument constraints.
94 *
95 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
96 * It means that the register type passed to this function must be
97 * PTR_TO_STACK and it will be used inside the function as
98 * 'pointer to map element key'
99 *
100 * For example the argument constraints for bpf_map_lookup_elem():
101 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
102 * .arg1_type = ARG_CONST_MAP_PTR,
103 * .arg2_type = ARG_PTR_TO_MAP_KEY,
104 *
105 * ret_type says that this function returns 'pointer to map elem value or null'
106 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
107 * 2nd argument should be a pointer to stack, which will be used inside
108 * the helper function as a pointer to map element key.
109 *
110 * On the kernel side the helper function looks like:
111 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
112 * {
113 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
114 * void *key = (void *) (unsigned long) r2;
115 * void *value;
116 *
117 * here kernel can access 'key' and 'map' pointers safely, knowing that
118 * [key, key + map->key_size) bytes are valid and were initialized on
119 * the stack of eBPF program.
120 * }
121 *
122 * Corresponding eBPF program may look like:
123 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
124 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
125 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
126 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
127 * here verifier looks at prototype of map_lookup_elem() and sees:
128 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
129 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
130 *
131 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
132 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
133 * and were initialized prior to this call.
134 * If it's ok, then verifier allows this BPF_CALL insn and looks at
135 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
136 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
137 * returns ether pointer to map value or NULL.
138 *
139 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
140 * insn, the register holding that pointer in the true branch changes state to
141 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
142 * branch. See check_cond_jmp_op().
143 *
144 * After the call R0 is set to return type of the function and registers R1-R5
145 * are set to NOT_INIT to indicate that they are no longer readable.
fd978bf7
JS
146 *
147 * The following reference types represent a potential reference to a kernel
148 * resource which, after first being allocated, must be checked and freed by
149 * the BPF program:
150 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
151 *
152 * When the verifier sees a helper call return a reference type, it allocates a
153 * pointer id for the reference and stores it in the current function state.
154 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
155 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
156 * passes through a NULL-check conditional. For the branch wherein the state is
157 * changed to CONST_IMM, the verifier releases the reference.
6acc9b43
JS
158 *
159 * For each helper function that allocates a reference, such as
160 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
161 * bpf_sk_release(). When a reference type passes into the release function,
162 * the verifier also releases the reference. If any unchecked or unreleased
163 * reference remains at the end of the program, the verifier rejects it.
51580e79
AS
164 */
165
17a52670 166/* verifier_state + insn_idx are pushed to stack when branch is encountered */
58e2af8b 167struct bpf_verifier_stack_elem {
17a52670
AS
168 /* verifer state is 'st'
169 * before processing instruction 'insn_idx'
170 * and after processing instruction 'prev_insn_idx'
171 */
58e2af8b 172 struct bpf_verifier_state st;
17a52670
AS
173 int insn_idx;
174 int prev_insn_idx;
58e2af8b 175 struct bpf_verifier_stack_elem *next;
cbd35700
AS
176};
177
8e17c1b1 178#define BPF_COMPLEXITY_LIMIT_INSNS 131072
07016151
DB
179#define BPF_COMPLEXITY_LIMIT_STACK 1024
180
c93552c4
DB
181#define BPF_MAP_PTR_UNPRIV 1UL
182#define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \
183 POISON_POINTER_DELTA))
184#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
185
186static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
187{
188 return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON;
189}
190
191static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
192{
193 return aux->map_state & BPF_MAP_PTR_UNPRIV;
194}
195
196static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
197 const struct bpf_map *map, bool unpriv)
198{
199 BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
200 unpriv |= bpf_map_ptr_unpriv(aux);
201 aux->map_state = (unsigned long)map |
202 (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
203}
fad73a1a 204
33ff9823
DB
205struct bpf_call_arg_meta {
206 struct bpf_map *map_ptr;
435faee1 207 bool raw_mode;
36bbef52 208 bool pkt_access;
435faee1
DB
209 int regno;
210 int access_size;
849fa506
YS
211 s64 msize_smax_value;
212 u64 msize_umax_value;
fd978bf7 213 int ptr_id;
33ff9823
DB
214};
215
cbd35700
AS
216static DEFINE_MUTEX(bpf_verifier_lock);
217
77d2e05a
MKL
218void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
219 va_list args)
cbd35700 220{
a2a7d570 221 unsigned int n;
cbd35700 222
a2a7d570 223 n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
a2a7d570
JK
224
225 WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
226 "verifier log line truncated - local buffer too short\n");
227
228 n = min(log->len_total - log->len_used - 1, n);
229 log->kbuf[n] = '\0';
230
231 if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
232 log->len_used += n;
233 else
234 log->ubuf = NULL;
cbd35700 235}
abe08840
JO
236
237/* log_level controls verbosity level of eBPF verifier.
238 * bpf_verifier_log_write() is used to dump the verification trace to the log,
239 * so the user can figure out what's wrong with the program
430e68d1 240 */
abe08840
JO
241__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
242 const char *fmt, ...)
243{
244 va_list args;
245
77d2e05a
MKL
246 if (!bpf_verifier_log_needed(&env->log))
247 return;
248
abe08840 249 va_start(args, fmt);
77d2e05a 250 bpf_verifier_vlog(&env->log, fmt, args);
abe08840
JO
251 va_end(args);
252}
253EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
254
255__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
256{
77d2e05a 257 struct bpf_verifier_env *env = private_data;
abe08840
JO
258 va_list args;
259
77d2e05a
MKL
260 if (!bpf_verifier_log_needed(&env->log))
261 return;
262
abe08840 263 va_start(args, fmt);
77d2e05a 264 bpf_verifier_vlog(&env->log, fmt, args);
abe08840
JO
265 va_end(args);
266}
cbd35700 267
de8f3a83
DB
268static bool type_is_pkt_pointer(enum bpf_reg_type type)
269{
270 return type == PTR_TO_PACKET ||
271 type == PTR_TO_PACKET_META;
272}
273
840b9615
JS
274static bool reg_type_may_be_null(enum bpf_reg_type type)
275{
fd978bf7
JS
276 return type == PTR_TO_MAP_VALUE_OR_NULL ||
277 type == PTR_TO_SOCKET_OR_NULL;
278}
279
280static bool type_is_refcounted(enum bpf_reg_type type)
281{
282 return type == PTR_TO_SOCKET;
283}
284
285static bool type_is_refcounted_or_null(enum bpf_reg_type type)
286{
287 return type == PTR_TO_SOCKET || type == PTR_TO_SOCKET_OR_NULL;
288}
289
290static bool reg_is_refcounted(const struct bpf_reg_state *reg)
291{
292 return type_is_refcounted(reg->type);
293}
294
295static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg)
296{
297 return type_is_refcounted_or_null(reg->type);
298}
299
300static bool arg_type_is_refcounted(enum bpf_arg_type type)
301{
302 return type == ARG_PTR_TO_SOCKET;
303}
304
305/* Determine whether the function releases some resources allocated by another
306 * function call. The first reference type argument will be assumed to be
307 * released by release_reference().
308 */
309static bool is_release_function(enum bpf_func_id func_id)
310{
6acc9b43 311 return func_id == BPF_FUNC_sk_release;
840b9615
JS
312}
313
17a52670
AS
314/* string representation of 'enum bpf_reg_type' */
315static const char * const reg_type_str[] = {
316 [NOT_INIT] = "?",
f1174f77 317 [SCALAR_VALUE] = "inv",
17a52670
AS
318 [PTR_TO_CTX] = "ctx",
319 [CONST_PTR_TO_MAP] = "map_ptr",
320 [PTR_TO_MAP_VALUE] = "map_value",
321 [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
17a52670 322 [PTR_TO_STACK] = "fp",
969bf05e 323 [PTR_TO_PACKET] = "pkt",
de8f3a83 324 [PTR_TO_PACKET_META] = "pkt_meta",
969bf05e 325 [PTR_TO_PACKET_END] = "pkt_end",
d58e468b 326 [PTR_TO_FLOW_KEYS] = "flow_keys",
c64b7983
JS
327 [PTR_TO_SOCKET] = "sock",
328 [PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
17a52670
AS
329};
330
8efea21d
EC
331static char slot_type_char[] = {
332 [STACK_INVALID] = '?',
333 [STACK_SPILL] = 'r',
334 [STACK_MISC] = 'm',
335 [STACK_ZERO] = '0',
336};
337
4e92024a
AS
338static void print_liveness(struct bpf_verifier_env *env,
339 enum bpf_reg_liveness live)
340{
341 if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN))
342 verbose(env, "_");
343 if (live & REG_LIVE_READ)
344 verbose(env, "r");
345 if (live & REG_LIVE_WRITTEN)
346 verbose(env, "w");
347}
348
f4d7e40a
AS
349static struct bpf_func_state *func(struct bpf_verifier_env *env,
350 const struct bpf_reg_state *reg)
351{
352 struct bpf_verifier_state *cur = env->cur_state;
353
354 return cur->frame[reg->frameno];
355}
356
61bd5218 357static void print_verifier_state(struct bpf_verifier_env *env,
f4d7e40a 358 const struct bpf_func_state *state)
17a52670 359{
f4d7e40a 360 const struct bpf_reg_state *reg;
17a52670
AS
361 enum bpf_reg_type t;
362 int i;
363
f4d7e40a
AS
364 if (state->frameno)
365 verbose(env, " frame%d:", state->frameno);
17a52670 366 for (i = 0; i < MAX_BPF_REG; i++) {
1a0dc1ac
AS
367 reg = &state->regs[i];
368 t = reg->type;
17a52670
AS
369 if (t == NOT_INIT)
370 continue;
4e92024a
AS
371 verbose(env, " R%d", i);
372 print_liveness(env, reg->live);
373 verbose(env, "=%s", reg_type_str[t]);
f1174f77
EC
374 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
375 tnum_is_const(reg->var_off)) {
376 /* reg->off should be 0 for SCALAR_VALUE */
61bd5218 377 verbose(env, "%lld", reg->var_off.value + reg->off);
f4d7e40a
AS
378 if (t == PTR_TO_STACK)
379 verbose(env, ",call_%d", func(env, reg)->callsite);
f1174f77 380 } else {
61bd5218 381 verbose(env, "(id=%d", reg->id);
f1174f77 382 if (t != SCALAR_VALUE)
61bd5218 383 verbose(env, ",off=%d", reg->off);
de8f3a83 384 if (type_is_pkt_pointer(t))
61bd5218 385 verbose(env, ",r=%d", reg->range);
f1174f77
EC
386 else if (t == CONST_PTR_TO_MAP ||
387 t == PTR_TO_MAP_VALUE ||
388 t == PTR_TO_MAP_VALUE_OR_NULL)
61bd5218 389 verbose(env, ",ks=%d,vs=%d",
f1174f77
EC
390 reg->map_ptr->key_size,
391 reg->map_ptr->value_size);
7d1238f2
EC
392 if (tnum_is_const(reg->var_off)) {
393 /* Typically an immediate SCALAR_VALUE, but
394 * could be a pointer whose offset is too big
395 * for reg->off
396 */
61bd5218 397 verbose(env, ",imm=%llx", reg->var_off.value);
7d1238f2
EC
398 } else {
399 if (reg->smin_value != reg->umin_value &&
400 reg->smin_value != S64_MIN)
61bd5218 401 verbose(env, ",smin_value=%lld",
7d1238f2
EC
402 (long long)reg->smin_value);
403 if (reg->smax_value != reg->umax_value &&
404 reg->smax_value != S64_MAX)
61bd5218 405 verbose(env, ",smax_value=%lld",
7d1238f2
EC
406 (long long)reg->smax_value);
407 if (reg->umin_value != 0)
61bd5218 408 verbose(env, ",umin_value=%llu",
7d1238f2
EC
409 (unsigned long long)reg->umin_value);
410 if (reg->umax_value != U64_MAX)
61bd5218 411 verbose(env, ",umax_value=%llu",
7d1238f2
EC
412 (unsigned long long)reg->umax_value);
413 if (!tnum_is_unknown(reg->var_off)) {
414 char tn_buf[48];
f1174f77 415
7d1238f2 416 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 417 verbose(env, ",var_off=%s", tn_buf);
7d1238f2 418 }
f1174f77 419 }
61bd5218 420 verbose(env, ")");
f1174f77 421 }
17a52670 422 }
638f5b90 423 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
8efea21d
EC
424 char types_buf[BPF_REG_SIZE + 1];
425 bool valid = false;
426 int j;
427
428 for (j = 0; j < BPF_REG_SIZE; j++) {
429 if (state->stack[i].slot_type[j] != STACK_INVALID)
430 valid = true;
431 types_buf[j] = slot_type_char[
432 state->stack[i].slot_type[j]];
433 }
434 types_buf[BPF_REG_SIZE] = 0;
435 if (!valid)
436 continue;
437 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
438 print_liveness(env, state->stack[i].spilled_ptr.live);
439 if (state->stack[i].slot_type[0] == STACK_SPILL)
4e92024a 440 verbose(env, "=%s",
638f5b90 441 reg_type_str[state->stack[i].spilled_ptr.type]);
8efea21d
EC
442 else
443 verbose(env, "=%s", types_buf);
17a52670 444 }
fd978bf7
JS
445 if (state->acquired_refs && state->refs[0].id) {
446 verbose(env, " refs=%d", state->refs[0].id);
447 for (i = 1; i < state->acquired_refs; i++)
448 if (state->refs[i].id)
449 verbose(env, ",%d", state->refs[i].id);
450 }
61bd5218 451 verbose(env, "\n");
17a52670
AS
452}
453
84dbf350
JS
454#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE) \
455static int copy_##NAME##_state(struct bpf_func_state *dst, \
456 const struct bpf_func_state *src) \
457{ \
458 if (!src->FIELD) \
459 return 0; \
460 if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) { \
461 /* internal bug, make state invalid to reject the program */ \
462 memset(dst, 0, sizeof(*dst)); \
463 return -EFAULT; \
464 } \
465 memcpy(dst->FIELD, src->FIELD, \
466 sizeof(*src->FIELD) * (src->COUNT / SIZE)); \
467 return 0; \
638f5b90 468}
fd978bf7
JS
469/* copy_reference_state() */
470COPY_STATE_FN(reference, acquired_refs, refs, 1)
84dbf350
JS
471/* copy_stack_state() */
472COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
473#undef COPY_STATE_FN
474
475#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE) \
476static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
477 bool copy_old) \
478{ \
479 u32 old_size = state->COUNT; \
480 struct bpf_##NAME##_state *new_##FIELD; \
481 int slot = size / SIZE; \
482 \
483 if (size <= old_size || !size) { \
484 if (copy_old) \
485 return 0; \
486 state->COUNT = slot * SIZE; \
487 if (!size && old_size) { \
488 kfree(state->FIELD); \
489 state->FIELD = NULL; \
490 } \
491 return 0; \
492 } \
493 new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
494 GFP_KERNEL); \
495 if (!new_##FIELD) \
496 return -ENOMEM; \
497 if (copy_old) { \
498 if (state->FIELD) \
499 memcpy(new_##FIELD, state->FIELD, \
500 sizeof(*new_##FIELD) * (old_size / SIZE)); \
501 memset(new_##FIELD + old_size / SIZE, 0, \
502 sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
503 } \
504 state->COUNT = slot * SIZE; \
505 kfree(state->FIELD); \
506 state->FIELD = new_##FIELD; \
507 return 0; \
508}
fd978bf7
JS
509/* realloc_reference_state() */
510REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
84dbf350
JS
511/* realloc_stack_state() */
512REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
513#undef REALLOC_STATE_FN
638f5b90
AS
514
515/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
516 * make it consume minimal amount of memory. check_stack_write() access from
f4d7e40a 517 * the program calls into realloc_func_state() to grow the stack size.
84dbf350
JS
518 * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
519 * which realloc_stack_state() copies over. It points to previous
520 * bpf_verifier_state which is never reallocated.
638f5b90 521 */
fd978bf7
JS
522static int realloc_func_state(struct bpf_func_state *state, int stack_size,
523 int refs_size, bool copy_old)
638f5b90 524{
fd978bf7
JS
525 int err = realloc_reference_state(state, refs_size, copy_old);
526 if (err)
527 return err;
528 return realloc_stack_state(state, stack_size, copy_old);
529}
530
531/* Acquire a pointer id from the env and update the state->refs to include
532 * this new pointer reference.
533 * On success, returns a valid pointer id to associate with the register
534 * On failure, returns a negative errno.
638f5b90 535 */
fd978bf7 536static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
638f5b90 537{
fd978bf7
JS
538 struct bpf_func_state *state = cur_func(env);
539 int new_ofs = state->acquired_refs;
540 int id, err;
541
542 err = realloc_reference_state(state, state->acquired_refs + 1, true);
543 if (err)
544 return err;
545 id = ++env->id_gen;
546 state->refs[new_ofs].id = id;
547 state->refs[new_ofs].insn_idx = insn_idx;
638f5b90 548
fd978bf7
JS
549 return id;
550}
551
552/* release function corresponding to acquire_reference_state(). Idempotent. */
553static int __release_reference_state(struct bpf_func_state *state, int ptr_id)
554{
555 int i, last_idx;
556
557 if (!ptr_id)
558 return -EFAULT;
559
560 last_idx = state->acquired_refs - 1;
561 for (i = 0; i < state->acquired_refs; i++) {
562 if (state->refs[i].id == ptr_id) {
563 if (last_idx && i != last_idx)
564 memcpy(&state->refs[i], &state->refs[last_idx],
565 sizeof(*state->refs));
566 memset(&state->refs[last_idx], 0, sizeof(*state->refs));
567 state->acquired_refs--;
638f5b90 568 return 0;
638f5b90 569 }
638f5b90 570 }
fd978bf7
JS
571 return -EFAULT;
572}
573
574/* variation on the above for cases where we expect that there must be an
575 * outstanding reference for the specified ptr_id.
576 */
577static int release_reference_state(struct bpf_verifier_env *env, int ptr_id)
578{
579 struct bpf_func_state *state = cur_func(env);
580 int err;
581
582 err = __release_reference_state(state, ptr_id);
583 if (WARN_ON_ONCE(err != 0))
584 verbose(env, "verifier internal error: can't release reference\n");
585 return err;
586}
587
588static int transfer_reference_state(struct bpf_func_state *dst,
589 struct bpf_func_state *src)
590{
591 int err = realloc_reference_state(dst, src->acquired_refs, false);
592 if (err)
593 return err;
594 err = copy_reference_state(dst, src);
595 if (err)
596 return err;
638f5b90
AS
597 return 0;
598}
599
f4d7e40a
AS
600static void free_func_state(struct bpf_func_state *state)
601{
5896351e
AS
602 if (!state)
603 return;
fd978bf7 604 kfree(state->refs);
f4d7e40a
AS
605 kfree(state->stack);
606 kfree(state);
607}
608
1969db47
AS
609static void free_verifier_state(struct bpf_verifier_state *state,
610 bool free_self)
638f5b90 611{
f4d7e40a
AS
612 int i;
613
614 for (i = 0; i <= state->curframe; i++) {
615 free_func_state(state->frame[i]);
616 state->frame[i] = NULL;
617 }
1969db47
AS
618 if (free_self)
619 kfree(state);
638f5b90
AS
620}
621
622/* copy verifier state from src to dst growing dst stack space
623 * when necessary to accommodate larger src stack
624 */
f4d7e40a
AS
625static int copy_func_state(struct bpf_func_state *dst,
626 const struct bpf_func_state *src)
638f5b90
AS
627{
628 int err;
629
fd978bf7
JS
630 err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
631 false);
632 if (err)
633 return err;
634 memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
635 err = copy_reference_state(dst, src);
638f5b90
AS
636 if (err)
637 return err;
638f5b90
AS
638 return copy_stack_state(dst, src);
639}
640
f4d7e40a
AS
641static int copy_verifier_state(struct bpf_verifier_state *dst_state,
642 const struct bpf_verifier_state *src)
643{
644 struct bpf_func_state *dst;
645 int i, err;
646
647 /* if dst has more stack frames then src frame, free them */
648 for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
649 free_func_state(dst_state->frame[i]);
650 dst_state->frame[i] = NULL;
651 }
652 dst_state->curframe = src->curframe;
f4d7e40a
AS
653 for (i = 0; i <= src->curframe; i++) {
654 dst = dst_state->frame[i];
655 if (!dst) {
656 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
657 if (!dst)
658 return -ENOMEM;
659 dst_state->frame[i] = dst;
660 }
661 err = copy_func_state(dst, src->frame[i]);
662 if (err)
663 return err;
664 }
665 return 0;
666}
667
638f5b90
AS
668static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
669 int *insn_idx)
670{
671 struct bpf_verifier_state *cur = env->cur_state;
672 struct bpf_verifier_stack_elem *elem, *head = env->head;
673 int err;
17a52670
AS
674
675 if (env->head == NULL)
638f5b90 676 return -ENOENT;
17a52670 677
638f5b90
AS
678 if (cur) {
679 err = copy_verifier_state(cur, &head->st);
680 if (err)
681 return err;
682 }
683 if (insn_idx)
684 *insn_idx = head->insn_idx;
17a52670 685 if (prev_insn_idx)
638f5b90
AS
686 *prev_insn_idx = head->prev_insn_idx;
687 elem = head->next;
1969db47 688 free_verifier_state(&head->st, false);
638f5b90 689 kfree(head);
17a52670
AS
690 env->head = elem;
691 env->stack_size--;
638f5b90 692 return 0;
17a52670
AS
693}
694
58e2af8b
JK
695static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
696 int insn_idx, int prev_insn_idx)
17a52670 697{
638f5b90 698 struct bpf_verifier_state *cur = env->cur_state;
58e2af8b 699 struct bpf_verifier_stack_elem *elem;
638f5b90 700 int err;
17a52670 701
638f5b90 702 elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
17a52670
AS
703 if (!elem)
704 goto err;
705
17a52670
AS
706 elem->insn_idx = insn_idx;
707 elem->prev_insn_idx = prev_insn_idx;
708 elem->next = env->head;
709 env->head = elem;
710 env->stack_size++;
1969db47
AS
711 err = copy_verifier_state(&elem->st, cur);
712 if (err)
713 goto err;
07016151 714 if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
61bd5218 715 verbose(env, "BPF program is too complex\n");
17a52670
AS
716 goto err;
717 }
718 return &elem->st;
719err:
5896351e
AS
720 free_verifier_state(env->cur_state, true);
721 env->cur_state = NULL;
17a52670 722 /* pop all elements and return */
638f5b90 723 while (!pop_stack(env, NULL, NULL));
17a52670
AS
724 return NULL;
725}
726
727#define CALLER_SAVED_REGS 6
728static const int caller_saved[CALLER_SAVED_REGS] = {
729 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
730};
731
f1174f77
EC
732static void __mark_reg_not_init(struct bpf_reg_state *reg);
733
b03c9f9f
EC
734/* Mark the unknown part of a register (variable offset or scalar value) as
735 * known to have the value @imm.
736 */
737static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
738{
a9c676bc
AS
739 /* Clear id, off, and union(map_ptr, range) */
740 memset(((u8 *)reg) + sizeof(reg->type), 0,
741 offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
b03c9f9f
EC
742 reg->var_off = tnum_const(imm);
743 reg->smin_value = (s64)imm;
744 reg->smax_value = (s64)imm;
745 reg->umin_value = imm;
746 reg->umax_value = imm;
747}
748
f1174f77
EC
749/* Mark the 'variable offset' part of a register as zero. This should be
750 * used only on registers holding a pointer type.
751 */
752static void __mark_reg_known_zero(struct bpf_reg_state *reg)
a9789ef9 753{
b03c9f9f 754 __mark_reg_known(reg, 0);
f1174f77 755}
a9789ef9 756
cc2b14d5
AS
757static void __mark_reg_const_zero(struct bpf_reg_state *reg)
758{
759 __mark_reg_known(reg, 0);
cc2b14d5
AS
760 reg->type = SCALAR_VALUE;
761}
762
61bd5218
JK
763static void mark_reg_known_zero(struct bpf_verifier_env *env,
764 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
765{
766 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 767 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
f1174f77
EC
768 /* Something bad happened, let's kill all regs */
769 for (regno = 0; regno < MAX_BPF_REG; regno++)
770 __mark_reg_not_init(regs + regno);
771 return;
772 }
773 __mark_reg_known_zero(regs + regno);
774}
775
de8f3a83
DB
776static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
777{
778 return type_is_pkt_pointer(reg->type);
779}
780
781static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
782{
783 return reg_is_pkt_pointer(reg) ||
784 reg->type == PTR_TO_PACKET_END;
785}
786
787/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
788static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
789 enum bpf_reg_type which)
790{
791 /* The register can already have a range from prior markings.
792 * This is fine as long as it hasn't been advanced from its
793 * origin.
794 */
795 return reg->type == which &&
796 reg->id == 0 &&
797 reg->off == 0 &&
798 tnum_equals_const(reg->var_off, 0);
799}
800
b03c9f9f
EC
801/* Attempts to improve min/max values based on var_off information */
802static void __update_reg_bounds(struct bpf_reg_state *reg)
803{
804 /* min signed is max(sign bit) | min(other bits) */
805 reg->smin_value = max_t(s64, reg->smin_value,
806 reg->var_off.value | (reg->var_off.mask & S64_MIN));
807 /* max signed is min(sign bit) | max(other bits) */
808 reg->smax_value = min_t(s64, reg->smax_value,
809 reg->var_off.value | (reg->var_off.mask & S64_MAX));
810 reg->umin_value = max(reg->umin_value, reg->var_off.value);
811 reg->umax_value = min(reg->umax_value,
812 reg->var_off.value | reg->var_off.mask);
813}
814
815/* Uses signed min/max values to inform unsigned, and vice-versa */
816static void __reg_deduce_bounds(struct bpf_reg_state *reg)
817{
818 /* Learn sign from signed bounds.
819 * If we cannot cross the sign boundary, then signed and unsigned bounds
820 * are the same, so combine. This works even in the negative case, e.g.
821 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
822 */
823 if (reg->smin_value >= 0 || reg->smax_value < 0) {
824 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
825 reg->umin_value);
826 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
827 reg->umax_value);
828 return;
829 }
830 /* Learn sign from unsigned bounds. Signed bounds cross the sign
831 * boundary, so we must be careful.
832 */
833 if ((s64)reg->umax_value >= 0) {
834 /* Positive. We can't learn anything from the smin, but smax
835 * is positive, hence safe.
836 */
837 reg->smin_value = reg->umin_value;
838 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
839 reg->umax_value);
840 } else if ((s64)reg->umin_value < 0) {
841 /* Negative. We can't learn anything from the smax, but smin
842 * is negative, hence safe.
843 */
844 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
845 reg->umin_value);
846 reg->smax_value = reg->umax_value;
847 }
848}
849
850/* Attempts to improve var_off based on unsigned min/max information */
851static void __reg_bound_offset(struct bpf_reg_state *reg)
852{
853 reg->var_off = tnum_intersect(reg->var_off,
854 tnum_range(reg->umin_value,
855 reg->umax_value));
856}
857
858/* Reset the min/max bounds of a register */
859static void __mark_reg_unbounded(struct bpf_reg_state *reg)
860{
861 reg->smin_value = S64_MIN;
862 reg->smax_value = S64_MAX;
863 reg->umin_value = 0;
864 reg->umax_value = U64_MAX;
865}
866
f1174f77
EC
867/* Mark a register as having a completely unknown (scalar) value. */
868static void __mark_reg_unknown(struct bpf_reg_state *reg)
869{
a9c676bc
AS
870 /*
871 * Clear type, id, off, and union(map_ptr, range) and
872 * padding between 'type' and union
873 */
874 memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
f1174f77 875 reg->type = SCALAR_VALUE;
f1174f77 876 reg->var_off = tnum_unknown;
f4d7e40a 877 reg->frameno = 0;
b03c9f9f 878 __mark_reg_unbounded(reg);
f1174f77
EC
879}
880
61bd5218
JK
881static void mark_reg_unknown(struct bpf_verifier_env *env,
882 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
883{
884 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 885 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
19ceb417
AS
886 /* Something bad happened, let's kill all regs except FP */
887 for (regno = 0; regno < BPF_REG_FP; regno++)
f1174f77
EC
888 __mark_reg_not_init(regs + regno);
889 return;
890 }
891 __mark_reg_unknown(regs + regno);
892}
893
894static void __mark_reg_not_init(struct bpf_reg_state *reg)
895{
896 __mark_reg_unknown(reg);
897 reg->type = NOT_INIT;
898}
899
61bd5218
JK
900static void mark_reg_not_init(struct bpf_verifier_env *env,
901 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
902{
903 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 904 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
19ceb417
AS
905 /* Something bad happened, let's kill all regs except FP */
906 for (regno = 0; regno < BPF_REG_FP; regno++)
f1174f77
EC
907 __mark_reg_not_init(regs + regno);
908 return;
909 }
910 __mark_reg_not_init(regs + regno);
a9789ef9
DB
911}
912
61bd5218 913static void init_reg_state(struct bpf_verifier_env *env,
f4d7e40a 914 struct bpf_func_state *state)
17a52670 915{
f4d7e40a 916 struct bpf_reg_state *regs = state->regs;
17a52670
AS
917 int i;
918
dc503a8a 919 for (i = 0; i < MAX_BPF_REG; i++) {
61bd5218 920 mark_reg_not_init(env, regs, i);
dc503a8a 921 regs[i].live = REG_LIVE_NONE;
679c782d 922 regs[i].parent = NULL;
dc503a8a 923 }
17a52670
AS
924
925 /* frame pointer */
f1174f77 926 regs[BPF_REG_FP].type = PTR_TO_STACK;
61bd5218 927 mark_reg_known_zero(env, regs, BPF_REG_FP);
f4d7e40a 928 regs[BPF_REG_FP].frameno = state->frameno;
17a52670
AS
929
930 /* 1st arg to a function */
931 regs[BPF_REG_1].type = PTR_TO_CTX;
61bd5218 932 mark_reg_known_zero(env, regs, BPF_REG_1);
6760bf2d
DB
933}
934
f4d7e40a
AS
935#define BPF_MAIN_FUNC (-1)
936static void init_func_state(struct bpf_verifier_env *env,
937 struct bpf_func_state *state,
938 int callsite, int frameno, int subprogno)
939{
940 state->callsite = callsite;
941 state->frameno = frameno;
942 state->subprogno = subprogno;
943 init_reg_state(env, state);
944}
945
17a52670
AS
946enum reg_arg_type {
947 SRC_OP, /* register is used as source operand */
948 DST_OP, /* register is used as destination operand */
949 DST_OP_NO_MARK /* same as above, check only, don't mark */
950};
951
cc8b0b92
AS
952static int cmp_subprogs(const void *a, const void *b)
953{
9c8105bd
JW
954 return ((struct bpf_subprog_info *)a)->start -
955 ((struct bpf_subprog_info *)b)->start;
cc8b0b92
AS
956}
957
958static int find_subprog(struct bpf_verifier_env *env, int off)
959{
9c8105bd 960 struct bpf_subprog_info *p;
cc8b0b92 961
9c8105bd
JW
962 p = bsearch(&off, env->subprog_info, env->subprog_cnt,
963 sizeof(env->subprog_info[0]), cmp_subprogs);
cc8b0b92
AS
964 if (!p)
965 return -ENOENT;
9c8105bd 966 return p - env->subprog_info;
cc8b0b92
AS
967
968}
969
970static int add_subprog(struct bpf_verifier_env *env, int off)
971{
972 int insn_cnt = env->prog->len;
973 int ret;
974
975 if (off >= insn_cnt || off < 0) {
976 verbose(env, "call to invalid destination\n");
977 return -EINVAL;
978 }
979 ret = find_subprog(env, off);
980 if (ret >= 0)
981 return 0;
4cb3d99c 982 if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
cc8b0b92
AS
983 verbose(env, "too many subprograms\n");
984 return -E2BIG;
985 }
9c8105bd
JW
986 env->subprog_info[env->subprog_cnt++].start = off;
987 sort(env->subprog_info, env->subprog_cnt,
988 sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
cc8b0b92
AS
989 return 0;
990}
991
992static int check_subprogs(struct bpf_verifier_env *env)
993{
994 int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
9c8105bd 995 struct bpf_subprog_info *subprog = env->subprog_info;
cc8b0b92
AS
996 struct bpf_insn *insn = env->prog->insnsi;
997 int insn_cnt = env->prog->len;
998
f910cefa
JW
999 /* Add entry function. */
1000 ret = add_subprog(env, 0);
1001 if (ret < 0)
1002 return ret;
1003
cc8b0b92
AS
1004 /* determine subprog starts. The end is one before the next starts */
1005 for (i = 0; i < insn_cnt; i++) {
1006 if (insn[i].code != (BPF_JMP | BPF_CALL))
1007 continue;
1008 if (insn[i].src_reg != BPF_PSEUDO_CALL)
1009 continue;
1010 if (!env->allow_ptr_leaks) {
1011 verbose(env, "function calls to other bpf functions are allowed for root only\n");
1012 return -EPERM;
1013 }
cc8b0b92
AS
1014 ret = add_subprog(env, i + insn[i].imm + 1);
1015 if (ret < 0)
1016 return ret;
1017 }
1018
4cb3d99c
JW
1019 /* Add a fake 'exit' subprog which could simplify subprog iteration
1020 * logic. 'subprog_cnt' should not be increased.
1021 */
1022 subprog[env->subprog_cnt].start = insn_cnt;
1023
cc8b0b92
AS
1024 if (env->log.level > 1)
1025 for (i = 0; i < env->subprog_cnt; i++)
9c8105bd 1026 verbose(env, "func#%d @%d\n", i, subprog[i].start);
cc8b0b92
AS
1027
1028 /* now check that all jumps are within the same subprog */
4cb3d99c
JW
1029 subprog_start = subprog[cur_subprog].start;
1030 subprog_end = subprog[cur_subprog + 1].start;
cc8b0b92
AS
1031 for (i = 0; i < insn_cnt; i++) {
1032 u8 code = insn[i].code;
1033
1034 if (BPF_CLASS(code) != BPF_JMP)
1035 goto next;
1036 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
1037 goto next;
1038 off = i + insn[i].off + 1;
1039 if (off < subprog_start || off >= subprog_end) {
1040 verbose(env, "jump out of range from insn %d to %d\n", i, off);
1041 return -EINVAL;
1042 }
1043next:
1044 if (i == subprog_end - 1) {
1045 /* to avoid fall-through from one subprog into another
1046 * the last insn of the subprog should be either exit
1047 * or unconditional jump back
1048 */
1049 if (code != (BPF_JMP | BPF_EXIT) &&
1050 code != (BPF_JMP | BPF_JA)) {
1051 verbose(env, "last insn is not an exit or jmp\n");
1052 return -EINVAL;
1053 }
1054 subprog_start = subprog_end;
4cb3d99c
JW
1055 cur_subprog++;
1056 if (cur_subprog < env->subprog_cnt)
9c8105bd 1057 subprog_end = subprog[cur_subprog + 1].start;
cc8b0b92
AS
1058 }
1059 }
1060 return 0;
1061}
1062
679c782d
EC
1063/* Parentage chain of this register (or stack slot) should take care of all
1064 * issues like callee-saved registers, stack slot allocation time, etc.
1065 */
f4d7e40a 1066static int mark_reg_read(struct bpf_verifier_env *env,
679c782d
EC
1067 const struct bpf_reg_state *state,
1068 struct bpf_reg_state *parent)
f4d7e40a
AS
1069{
1070 bool writes = parent == state->parent; /* Observe write marks */
dc503a8a
EC
1071
1072 while (parent) {
1073 /* if read wasn't screened by an earlier write ... */
679c782d 1074 if (writes && state->live & REG_LIVE_WRITTEN)
dc503a8a
EC
1075 break;
1076 /* ... then we depend on parent's value */
679c782d 1077 parent->live |= REG_LIVE_READ;
dc503a8a
EC
1078 state = parent;
1079 parent = state->parent;
f4d7e40a 1080 writes = true;
dc503a8a 1081 }
f4d7e40a 1082 return 0;
dc503a8a
EC
1083}
1084
1085static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
17a52670
AS
1086 enum reg_arg_type t)
1087{
f4d7e40a
AS
1088 struct bpf_verifier_state *vstate = env->cur_state;
1089 struct bpf_func_state *state = vstate->frame[vstate->curframe];
1090 struct bpf_reg_state *regs = state->regs;
dc503a8a 1091
17a52670 1092 if (regno >= MAX_BPF_REG) {
61bd5218 1093 verbose(env, "R%d is invalid\n", regno);
17a52670
AS
1094 return -EINVAL;
1095 }
1096
1097 if (t == SRC_OP) {
1098 /* check whether register used as source operand can be read */
1099 if (regs[regno].type == NOT_INIT) {
61bd5218 1100 verbose(env, "R%d !read_ok\n", regno);
17a52670
AS
1101 return -EACCES;
1102 }
679c782d
EC
1103 /* We don't need to worry about FP liveness because it's read-only */
1104 if (regno != BPF_REG_FP)
1105 return mark_reg_read(env, &regs[regno],
1106 regs[regno].parent);
17a52670
AS
1107 } else {
1108 /* check whether register used as dest operand can be written to */
1109 if (regno == BPF_REG_FP) {
61bd5218 1110 verbose(env, "frame pointer is read only\n");
17a52670
AS
1111 return -EACCES;
1112 }
dc503a8a 1113 regs[regno].live |= REG_LIVE_WRITTEN;
17a52670 1114 if (t == DST_OP)
61bd5218 1115 mark_reg_unknown(env, regs, regno);
17a52670
AS
1116 }
1117 return 0;
1118}
1119
1be7f75d
AS
1120static bool is_spillable_regtype(enum bpf_reg_type type)
1121{
1122 switch (type) {
1123 case PTR_TO_MAP_VALUE:
1124 case PTR_TO_MAP_VALUE_OR_NULL:
1125 case PTR_TO_STACK:
1126 case PTR_TO_CTX:
969bf05e 1127 case PTR_TO_PACKET:
de8f3a83 1128 case PTR_TO_PACKET_META:
969bf05e 1129 case PTR_TO_PACKET_END:
d58e468b 1130 case PTR_TO_FLOW_KEYS:
1be7f75d 1131 case CONST_PTR_TO_MAP:
c64b7983
JS
1132 case PTR_TO_SOCKET:
1133 case PTR_TO_SOCKET_OR_NULL:
1be7f75d
AS
1134 return true;
1135 default:
1136 return false;
1137 }
1138}
1139
cc2b14d5
AS
1140/* Does this register contain a constant zero? */
1141static bool register_is_null(struct bpf_reg_state *reg)
1142{
1143 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
1144}
1145
17a52670
AS
1146/* check_stack_read/write functions track spill/fill of registers,
1147 * stack boundary and alignment are checked in check_mem_access()
1148 */
61bd5218 1149static int check_stack_write(struct bpf_verifier_env *env,
f4d7e40a 1150 struct bpf_func_state *state, /* func where register points to */
af86ca4e 1151 int off, int size, int value_regno, int insn_idx)
17a52670 1152{
f4d7e40a 1153 struct bpf_func_state *cur; /* state of the current function */
638f5b90 1154 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
f4d7e40a 1155 enum bpf_reg_type type;
638f5b90 1156
f4d7e40a 1157 err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
fd978bf7 1158 state->acquired_refs, true);
638f5b90
AS
1159 if (err)
1160 return err;
9c399760
AS
1161 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
1162 * so it's aligned access and [off, off + size) are within stack limits
1163 */
638f5b90
AS
1164 if (!env->allow_ptr_leaks &&
1165 state->stack[spi].slot_type[0] == STACK_SPILL &&
1166 size != BPF_REG_SIZE) {
1167 verbose(env, "attempt to corrupt spilled pointer on stack\n");
1168 return -EACCES;
1169 }
17a52670 1170
f4d7e40a 1171 cur = env->cur_state->frame[env->cur_state->curframe];
17a52670 1172 if (value_regno >= 0 &&
f4d7e40a 1173 is_spillable_regtype((type = cur->regs[value_regno].type))) {
17a52670
AS
1174
1175 /* register containing pointer is being spilled into stack */
9c399760 1176 if (size != BPF_REG_SIZE) {
61bd5218 1177 verbose(env, "invalid size of register spill\n");
17a52670
AS
1178 return -EACCES;
1179 }
1180
f4d7e40a
AS
1181 if (state != cur && type == PTR_TO_STACK) {
1182 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
1183 return -EINVAL;
1184 }
1185
17a52670 1186 /* save register state */
f4d7e40a 1187 state->stack[spi].spilled_ptr = cur->regs[value_regno];
638f5b90 1188 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
17a52670 1189
af86ca4e
AS
1190 for (i = 0; i < BPF_REG_SIZE; i++) {
1191 if (state->stack[spi].slot_type[i] == STACK_MISC &&
1192 !env->allow_ptr_leaks) {
1193 int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
1194 int soff = (-spi - 1) * BPF_REG_SIZE;
1195
1196 /* detected reuse of integer stack slot with a pointer
1197 * which means either llvm is reusing stack slot or
1198 * an attacker is trying to exploit CVE-2018-3639
1199 * (speculative store bypass)
1200 * Have to sanitize that slot with preemptive
1201 * store of zero.
1202 */
1203 if (*poff && *poff != soff) {
1204 /* disallow programs where single insn stores
1205 * into two different stack slots, since verifier
1206 * cannot sanitize them
1207 */
1208 verbose(env,
1209 "insn %d cannot access two stack slots fp%d and fp%d",
1210 insn_idx, *poff, soff);
1211 return -EINVAL;
1212 }
1213 *poff = soff;
1214 }
638f5b90 1215 state->stack[spi].slot_type[i] = STACK_SPILL;
af86ca4e 1216 }
9c399760 1217 } else {
cc2b14d5
AS
1218 u8 type = STACK_MISC;
1219
679c782d
EC
1220 /* regular write of data into stack destroys any spilled ptr */
1221 state->stack[spi].spilled_ptr.type = NOT_INIT;
9c399760 1222
cc2b14d5
AS
1223 /* only mark the slot as written if all 8 bytes were written
1224 * otherwise read propagation may incorrectly stop too soon
1225 * when stack slots are partially written.
1226 * This heuristic means that read propagation will be
1227 * conservative, since it will add reg_live_read marks
1228 * to stack slots all the way to first state when programs
1229 * writes+reads less than 8 bytes
1230 */
1231 if (size == BPF_REG_SIZE)
1232 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1233
1234 /* when we zero initialize stack slots mark them as such */
1235 if (value_regno >= 0 &&
1236 register_is_null(&cur->regs[value_regno]))
1237 type = STACK_ZERO;
1238
9c399760 1239 for (i = 0; i < size; i++)
638f5b90 1240 state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
cc2b14d5 1241 type;
17a52670
AS
1242 }
1243 return 0;
1244}
1245
61bd5218 1246static int check_stack_read(struct bpf_verifier_env *env,
f4d7e40a
AS
1247 struct bpf_func_state *reg_state /* func where register points to */,
1248 int off, int size, int value_regno)
17a52670 1249{
f4d7e40a
AS
1250 struct bpf_verifier_state *vstate = env->cur_state;
1251 struct bpf_func_state *state = vstate->frame[vstate->curframe];
638f5b90
AS
1252 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
1253 u8 *stype;
17a52670 1254
f4d7e40a 1255 if (reg_state->allocated_stack <= slot) {
638f5b90
AS
1256 verbose(env, "invalid read from stack off %d+0 size %d\n",
1257 off, size);
1258 return -EACCES;
1259 }
f4d7e40a 1260 stype = reg_state->stack[spi].slot_type;
17a52670 1261
638f5b90 1262 if (stype[0] == STACK_SPILL) {
9c399760 1263 if (size != BPF_REG_SIZE) {
61bd5218 1264 verbose(env, "invalid size of register spill\n");
17a52670
AS
1265 return -EACCES;
1266 }
9c399760 1267 for (i = 1; i < BPF_REG_SIZE; i++) {
638f5b90 1268 if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
61bd5218 1269 verbose(env, "corrupted spill memory\n");
17a52670
AS
1270 return -EACCES;
1271 }
1272 }
1273
dc503a8a 1274 if (value_regno >= 0) {
17a52670 1275 /* restore register state from stack */
f4d7e40a 1276 state->regs[value_regno] = reg_state->stack[spi].spilled_ptr;
2f18f62e
AS
1277 /* mark reg as written since spilled pointer state likely
1278 * has its liveness marks cleared by is_state_visited()
1279 * which resets stack/reg liveness for state transitions
1280 */
1281 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
dc503a8a 1282 }
679c782d
EC
1283 mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
1284 reg_state->stack[spi].spilled_ptr.parent);
17a52670
AS
1285 return 0;
1286 } else {
cc2b14d5
AS
1287 int zeros = 0;
1288
17a52670 1289 for (i = 0; i < size; i++) {
cc2b14d5
AS
1290 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
1291 continue;
1292 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
1293 zeros++;
1294 continue;
17a52670 1295 }
cc2b14d5
AS
1296 verbose(env, "invalid read from stack off %d+%d size %d\n",
1297 off, i, size);
1298 return -EACCES;
1299 }
679c782d
EC
1300 mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
1301 reg_state->stack[spi].spilled_ptr.parent);
cc2b14d5
AS
1302 if (value_regno >= 0) {
1303 if (zeros == size) {
1304 /* any size read into register is zero extended,
1305 * so the whole register == const_zero
1306 */
1307 __mark_reg_const_zero(&state->regs[value_regno]);
1308 } else {
1309 /* have read misc data from the stack */
1310 mark_reg_unknown(env, state->regs, value_regno);
1311 }
1312 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
17a52670 1313 }
17a52670
AS
1314 return 0;
1315 }
1316}
1317
1318/* check read/write into map element returned by bpf_map_lookup_elem() */
f1174f77 1319static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
9fd29c08 1320 int size, bool zero_size_allowed)
17a52670 1321{
638f5b90
AS
1322 struct bpf_reg_state *regs = cur_regs(env);
1323 struct bpf_map *map = regs[regno].map_ptr;
17a52670 1324
9fd29c08
YS
1325 if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1326 off + size > map->value_size) {
61bd5218 1327 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
17a52670
AS
1328 map->value_size, off, size);
1329 return -EACCES;
1330 }
1331 return 0;
1332}
1333
f1174f77
EC
1334/* check read/write into a map element with possible variable offset */
1335static int check_map_access(struct bpf_verifier_env *env, u32 regno,
9fd29c08 1336 int off, int size, bool zero_size_allowed)
dbcfe5f7 1337{
f4d7e40a
AS
1338 struct bpf_verifier_state *vstate = env->cur_state;
1339 struct bpf_func_state *state = vstate->frame[vstate->curframe];
dbcfe5f7
GB
1340 struct bpf_reg_state *reg = &state->regs[regno];
1341 int err;
1342
f1174f77
EC
1343 /* We may have adjusted the register to this map value, so we
1344 * need to try adding each of min_value and max_value to off
1345 * to make sure our theoretical access will be safe.
dbcfe5f7 1346 */
61bd5218
JK
1347 if (env->log.level)
1348 print_verifier_state(env, state);
dbcfe5f7
GB
1349 /* The minimum value is only important with signed
1350 * comparisons where we can't assume the floor of a
1351 * value is 0. If we are using signed variables for our
1352 * index'es we need to make sure that whatever we use
1353 * will have a set floor within our range.
1354 */
b03c9f9f 1355 if (reg->smin_value < 0) {
61bd5218 1356 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
dbcfe5f7
GB
1357 regno);
1358 return -EACCES;
1359 }
9fd29c08
YS
1360 err = __check_map_access(env, regno, reg->smin_value + off, size,
1361 zero_size_allowed);
dbcfe5f7 1362 if (err) {
61bd5218
JK
1363 verbose(env, "R%d min value is outside of the array range\n",
1364 regno);
dbcfe5f7
GB
1365 return err;
1366 }
1367
b03c9f9f
EC
1368 /* If we haven't set a max value then we need to bail since we can't be
1369 * sure we won't do bad things.
1370 * If reg->umax_value + off could overflow, treat that as unbounded too.
dbcfe5f7 1371 */
b03c9f9f 1372 if (reg->umax_value >= BPF_MAX_VAR_OFF) {
61bd5218 1373 verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
dbcfe5f7
GB
1374 regno);
1375 return -EACCES;
1376 }
9fd29c08
YS
1377 err = __check_map_access(env, regno, reg->umax_value + off, size,
1378 zero_size_allowed);
f1174f77 1379 if (err)
61bd5218
JK
1380 verbose(env, "R%d max value is outside of the array range\n",
1381 regno);
f1174f77 1382 return err;
dbcfe5f7
GB
1383}
1384
969bf05e
AS
1385#define MAX_PACKET_OFF 0xffff
1386
58e2af8b 1387static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
3a0af8fd
TG
1388 const struct bpf_call_arg_meta *meta,
1389 enum bpf_access_type t)
4acf6c0b 1390{
36bbef52 1391 switch (env->prog->type) {
5d66fa7d 1392 /* Program types only with direct read access go here! */
3a0af8fd
TG
1393 case BPF_PROG_TYPE_LWT_IN:
1394 case BPF_PROG_TYPE_LWT_OUT:
004d4b27 1395 case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2dbb9b9e 1396 case BPF_PROG_TYPE_SK_REUSEPORT:
5d66fa7d 1397 case BPF_PROG_TYPE_FLOW_DISSECTOR:
d5563d36 1398 case BPF_PROG_TYPE_CGROUP_SKB:
3a0af8fd
TG
1399 if (t == BPF_WRITE)
1400 return false;
7e57fbb2 1401 /* fallthrough */
5d66fa7d
DB
1402
1403 /* Program types with direct read + write access go here! */
36bbef52
DB
1404 case BPF_PROG_TYPE_SCHED_CLS:
1405 case BPF_PROG_TYPE_SCHED_ACT:
4acf6c0b 1406 case BPF_PROG_TYPE_XDP:
3a0af8fd 1407 case BPF_PROG_TYPE_LWT_XMIT:
8a31db56 1408 case BPF_PROG_TYPE_SK_SKB:
4f738adb 1409 case BPF_PROG_TYPE_SK_MSG:
36bbef52
DB
1410 if (meta)
1411 return meta->pkt_access;
1412
1413 env->seen_direct_write = true;
4acf6c0b
BB
1414 return true;
1415 default:
1416 return false;
1417 }
1418}
1419
f1174f77 1420static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
9fd29c08 1421 int off, int size, bool zero_size_allowed)
969bf05e 1422{
638f5b90 1423 struct bpf_reg_state *regs = cur_regs(env);
58e2af8b 1424 struct bpf_reg_state *reg = &regs[regno];
969bf05e 1425
9fd29c08
YS
1426 if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1427 (u64)off + size > reg->range) {
61bd5218 1428 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
d91b28ed 1429 off, size, regno, reg->id, reg->off, reg->range);
969bf05e
AS
1430 return -EACCES;
1431 }
1432 return 0;
1433}
1434
f1174f77 1435static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
9fd29c08 1436 int size, bool zero_size_allowed)
f1174f77 1437{
638f5b90 1438 struct bpf_reg_state *regs = cur_regs(env);
f1174f77
EC
1439 struct bpf_reg_state *reg = &regs[regno];
1440 int err;
1441
1442 /* We may have added a variable offset to the packet pointer; but any
1443 * reg->range we have comes after that. We are only checking the fixed
1444 * offset.
1445 */
1446
1447 /* We don't allow negative numbers, because we aren't tracking enough
1448 * detail to prove they're safe.
1449 */
b03c9f9f 1450 if (reg->smin_value < 0) {
61bd5218 1451 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
f1174f77
EC
1452 regno);
1453 return -EACCES;
1454 }
9fd29c08 1455 err = __check_packet_access(env, regno, off, size, zero_size_allowed);
f1174f77 1456 if (err) {
61bd5218 1457 verbose(env, "R%d offset is outside of the packet\n", regno);
f1174f77
EC
1458 return err;
1459 }
e647815a
JW
1460
1461 /* __check_packet_access has made sure "off + size - 1" is within u16.
1462 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
1463 * otherwise find_good_pkt_pointers would have refused to set range info
1464 * that __check_packet_access would have rejected this pkt access.
1465 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
1466 */
1467 env->prog->aux->max_pkt_offset =
1468 max_t(u32, env->prog->aux->max_pkt_offset,
1469 off + reg->umax_value + size - 1);
1470
f1174f77
EC
1471 return err;
1472}
1473
1474/* check access to 'struct bpf_context' fields. Supports fixed offsets only */
31fd8581 1475static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
19de99f7 1476 enum bpf_access_type t, enum bpf_reg_type *reg_type)
17a52670 1477{
f96da094
DB
1478 struct bpf_insn_access_aux info = {
1479 .reg_type = *reg_type,
1480 };
31fd8581 1481
4f9218aa 1482 if (env->ops->is_valid_access &&
5e43f899 1483 env->ops->is_valid_access(off, size, t, env->prog, &info)) {
f96da094
DB
1484 /* A non zero info.ctx_field_size indicates that this field is a
1485 * candidate for later verifier transformation to load the whole
1486 * field and then apply a mask when accessed with a narrower
1487 * access than actual ctx access size. A zero info.ctx_field_size
1488 * will only allow for whole field access and rejects any other
1489 * type of narrower access.
31fd8581 1490 */
23994631 1491 *reg_type = info.reg_type;
31fd8581 1492
4f9218aa 1493 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
32bbe007
AS
1494 /* remember the offset of last byte accessed in ctx */
1495 if (env->prog->aux->max_ctx_offset < off + size)
1496 env->prog->aux->max_ctx_offset = off + size;
17a52670 1497 return 0;
32bbe007 1498 }
17a52670 1499
61bd5218 1500 verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
17a52670
AS
1501 return -EACCES;
1502}
1503
d58e468b
PP
1504static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
1505 int size)
1506{
1507 if (size < 0 || off < 0 ||
1508 (u64)off + size > sizeof(struct bpf_flow_keys)) {
1509 verbose(env, "invalid access to flow keys off=%d size=%d\n",
1510 off, size);
1511 return -EACCES;
1512 }
1513 return 0;
1514}
1515
c64b7983
JS
1516static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off,
1517 int size, enum bpf_access_type t)
1518{
1519 struct bpf_reg_state *regs = cur_regs(env);
1520 struct bpf_reg_state *reg = &regs[regno];
1521 struct bpf_insn_access_aux info;
1522
1523 if (reg->smin_value < 0) {
1524 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1525 regno);
1526 return -EACCES;
1527 }
1528
1529 if (!bpf_sock_is_valid_access(off, size, t, &info)) {
1530 verbose(env, "invalid bpf_sock access off=%d size=%d\n",
1531 off, size);
1532 return -EACCES;
1533 }
1534
1535 return 0;
1536}
1537
4cabc5b1
DB
1538static bool __is_pointer_value(bool allow_ptr_leaks,
1539 const struct bpf_reg_state *reg)
1be7f75d 1540{
4cabc5b1 1541 if (allow_ptr_leaks)
1be7f75d
AS
1542 return false;
1543
f1174f77 1544 return reg->type != SCALAR_VALUE;
1be7f75d
AS
1545}
1546
2a159c6f
DB
1547static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
1548{
1549 return cur_regs(env) + regno;
1550}
1551
4cabc5b1
DB
1552static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
1553{
2a159c6f 1554 return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
4cabc5b1
DB
1555}
1556
f37a8cb8
DB
1557static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
1558{
2a159c6f 1559 const struct bpf_reg_state *reg = reg_state(env, regno);
f37a8cb8 1560
fd978bf7
JS
1561 return reg->type == PTR_TO_CTX ||
1562 reg->type == PTR_TO_SOCKET;
f37a8cb8
DB
1563}
1564
ca369602
DB
1565static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
1566{
2a159c6f 1567 const struct bpf_reg_state *reg = reg_state(env, regno);
ca369602
DB
1568
1569 return type_is_pkt_pointer(reg->type);
1570}
1571
4b5defde
DB
1572static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
1573{
1574 const struct bpf_reg_state *reg = reg_state(env, regno);
1575
1576 /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
1577 return reg->type == PTR_TO_FLOW_KEYS;
1578}
1579
61bd5218
JK
1580static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
1581 const struct bpf_reg_state *reg,
d1174416 1582 int off, int size, bool strict)
969bf05e 1583{
f1174f77 1584 struct tnum reg_off;
e07b98d9 1585 int ip_align;
d1174416
DM
1586
1587 /* Byte size accesses are always allowed. */
1588 if (!strict || size == 1)
1589 return 0;
1590
e4eda884
DM
1591 /* For platforms that do not have a Kconfig enabling
1592 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
1593 * NET_IP_ALIGN is universally set to '2'. And on platforms
1594 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
1595 * to this code only in strict mode where we want to emulate
1596 * the NET_IP_ALIGN==2 checking. Therefore use an
1597 * unconditional IP align value of '2'.
e07b98d9 1598 */
e4eda884 1599 ip_align = 2;
f1174f77
EC
1600
1601 reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
1602 if (!tnum_is_aligned(reg_off, size)) {
1603 char tn_buf[48];
1604
1605 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218
JK
1606 verbose(env,
1607 "misaligned packet access off %d+%s+%d+%d size %d\n",
f1174f77 1608 ip_align, tn_buf, reg->off, off, size);
969bf05e
AS
1609 return -EACCES;
1610 }
79adffcd 1611
969bf05e
AS
1612 return 0;
1613}
1614
61bd5218
JK
1615static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
1616 const struct bpf_reg_state *reg,
f1174f77
EC
1617 const char *pointer_desc,
1618 int off, int size, bool strict)
79adffcd 1619{
f1174f77
EC
1620 struct tnum reg_off;
1621
1622 /* Byte size accesses are always allowed. */
1623 if (!strict || size == 1)
1624 return 0;
1625
1626 reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
1627 if (!tnum_is_aligned(reg_off, size)) {
1628 char tn_buf[48];
1629
1630 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 1631 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
f1174f77 1632 pointer_desc, tn_buf, reg->off, off, size);
79adffcd
DB
1633 return -EACCES;
1634 }
1635
969bf05e
AS
1636 return 0;
1637}
1638
e07b98d9 1639static int check_ptr_alignment(struct bpf_verifier_env *env,
ca369602
DB
1640 const struct bpf_reg_state *reg, int off,
1641 int size, bool strict_alignment_once)
79adffcd 1642{
ca369602 1643 bool strict = env->strict_alignment || strict_alignment_once;
f1174f77 1644 const char *pointer_desc = "";
d1174416 1645
79adffcd
DB
1646 switch (reg->type) {
1647 case PTR_TO_PACKET:
de8f3a83
DB
1648 case PTR_TO_PACKET_META:
1649 /* Special case, because of NET_IP_ALIGN. Given metadata sits
1650 * right in front, treat it the very same way.
1651 */
61bd5218 1652 return check_pkt_ptr_alignment(env, reg, off, size, strict);
d58e468b
PP
1653 case PTR_TO_FLOW_KEYS:
1654 pointer_desc = "flow keys ";
1655 break;
f1174f77
EC
1656 case PTR_TO_MAP_VALUE:
1657 pointer_desc = "value ";
1658 break;
1659 case PTR_TO_CTX:
1660 pointer_desc = "context ";
1661 break;
1662 case PTR_TO_STACK:
1663 pointer_desc = "stack ";
a5ec6ae1
JH
1664 /* The stack spill tracking logic in check_stack_write()
1665 * and check_stack_read() relies on stack accesses being
1666 * aligned.
1667 */
1668 strict = true;
f1174f77 1669 break;
c64b7983
JS
1670 case PTR_TO_SOCKET:
1671 pointer_desc = "sock ";
1672 break;
79adffcd 1673 default:
f1174f77 1674 break;
79adffcd 1675 }
61bd5218
JK
1676 return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
1677 strict);
79adffcd
DB
1678}
1679
f4d7e40a
AS
1680static int update_stack_depth(struct bpf_verifier_env *env,
1681 const struct bpf_func_state *func,
1682 int off)
1683{
9c8105bd 1684 u16 stack = env->subprog_info[func->subprogno].stack_depth;
f4d7e40a
AS
1685
1686 if (stack >= -off)
1687 return 0;
1688
1689 /* update known max for given subprogram */
9c8105bd 1690 env->subprog_info[func->subprogno].stack_depth = -off;
70a87ffe
AS
1691 return 0;
1692}
f4d7e40a 1693
70a87ffe
AS
1694/* starting from main bpf function walk all instructions of the function
1695 * and recursively walk all callees that given function can call.
1696 * Ignore jump and exit insns.
1697 * Since recursion is prevented by check_cfg() this algorithm
1698 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
1699 */
1700static int check_max_stack_depth(struct bpf_verifier_env *env)
1701{
9c8105bd
JW
1702 int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
1703 struct bpf_subprog_info *subprog = env->subprog_info;
70a87ffe 1704 struct bpf_insn *insn = env->prog->insnsi;
70a87ffe
AS
1705 int ret_insn[MAX_CALL_FRAMES];
1706 int ret_prog[MAX_CALL_FRAMES];
f4d7e40a 1707
70a87ffe
AS
1708process_func:
1709 /* round up to 32-bytes, since this is granularity
1710 * of interpreter stack size
1711 */
9c8105bd 1712 depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
70a87ffe 1713 if (depth > MAX_BPF_STACK) {
f4d7e40a 1714 verbose(env, "combined stack size of %d calls is %d. Too large\n",
70a87ffe 1715 frame + 1, depth);
f4d7e40a
AS
1716 return -EACCES;
1717 }
70a87ffe 1718continue_func:
4cb3d99c 1719 subprog_end = subprog[idx + 1].start;
70a87ffe
AS
1720 for (; i < subprog_end; i++) {
1721 if (insn[i].code != (BPF_JMP | BPF_CALL))
1722 continue;
1723 if (insn[i].src_reg != BPF_PSEUDO_CALL)
1724 continue;
1725 /* remember insn and function to return to */
1726 ret_insn[frame] = i + 1;
9c8105bd 1727 ret_prog[frame] = idx;
70a87ffe
AS
1728
1729 /* find the callee */
1730 i = i + insn[i].imm + 1;
9c8105bd
JW
1731 idx = find_subprog(env, i);
1732 if (idx < 0) {
70a87ffe
AS
1733 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1734 i);
1735 return -EFAULT;
1736 }
70a87ffe
AS
1737 frame++;
1738 if (frame >= MAX_CALL_FRAMES) {
1739 WARN_ONCE(1, "verifier bug. Call stack is too deep\n");
1740 return -EFAULT;
1741 }
1742 goto process_func;
1743 }
1744 /* end of for() loop means the last insn of the 'subprog'
1745 * was reached. Doesn't matter whether it was JA or EXIT
1746 */
1747 if (frame == 0)
1748 return 0;
9c8105bd 1749 depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
70a87ffe
AS
1750 frame--;
1751 i = ret_insn[frame];
9c8105bd 1752 idx = ret_prog[frame];
70a87ffe 1753 goto continue_func;
f4d7e40a
AS
1754}
1755
19d28fbd 1756#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
1757static int get_callee_stack_depth(struct bpf_verifier_env *env,
1758 const struct bpf_insn *insn, int idx)
1759{
1760 int start = idx + insn->imm + 1, subprog;
1761
1762 subprog = find_subprog(env, start);
1763 if (subprog < 0) {
1764 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1765 start);
1766 return -EFAULT;
1767 }
9c8105bd 1768 return env->subprog_info[subprog].stack_depth;
1ea47e01 1769}
19d28fbd 1770#endif
1ea47e01 1771
58990d1f
DB
1772static int check_ctx_reg(struct bpf_verifier_env *env,
1773 const struct bpf_reg_state *reg, int regno)
1774{
1775 /* Access to ctx or passing it to a helper is only allowed in
1776 * its original, unmodified form.
1777 */
1778
1779 if (reg->off) {
1780 verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
1781 regno, reg->off);
1782 return -EACCES;
1783 }
1784
1785 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
1786 char tn_buf[48];
1787
1788 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1789 verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
1790 return -EACCES;
1791 }
1792
1793 return 0;
1794}
1795
0c17d1d2
JH
1796/* truncate register to smaller size (in bytes)
1797 * must be called with size < BPF_REG_SIZE
1798 */
1799static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
1800{
1801 u64 mask;
1802
1803 /* clear high bits in bit representation */
1804 reg->var_off = tnum_cast(reg->var_off, size);
1805
1806 /* fix arithmetic bounds */
1807 mask = ((u64)1 << (size * 8)) - 1;
1808 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
1809 reg->umin_value &= mask;
1810 reg->umax_value &= mask;
1811 } else {
1812 reg->umin_value = 0;
1813 reg->umax_value = mask;
1814 }
1815 reg->smin_value = reg->umin_value;
1816 reg->smax_value = reg->umax_value;
1817}
1818
17a52670
AS
1819/* check whether memory at (regno + off) is accessible for t = (read | write)
1820 * if t==write, value_regno is a register which value is stored into memory
1821 * if t==read, value_regno is a register which will receive the value from memory
1822 * if t==write && value_regno==-1, some unknown value is stored into memory
1823 * if t==read && value_regno==-1, don't care what we read from memory
1824 */
ca369602
DB
1825static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
1826 int off, int bpf_size, enum bpf_access_type t,
1827 int value_regno, bool strict_alignment_once)
17a52670 1828{
638f5b90
AS
1829 struct bpf_reg_state *regs = cur_regs(env);
1830 struct bpf_reg_state *reg = regs + regno;
f4d7e40a 1831 struct bpf_func_state *state;
17a52670
AS
1832 int size, err = 0;
1833
1834 size = bpf_size_to_bytes(bpf_size);
1835 if (size < 0)
1836 return size;
1837
f1174f77 1838 /* alignment checks will add in reg->off themselves */
ca369602 1839 err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
969bf05e
AS
1840 if (err)
1841 return err;
17a52670 1842
f1174f77
EC
1843 /* for access checks, reg->off is just part of off */
1844 off += reg->off;
1845
1846 if (reg->type == PTR_TO_MAP_VALUE) {
1be7f75d
AS
1847 if (t == BPF_WRITE && value_regno >= 0 &&
1848 is_pointer_value(env, value_regno)) {
61bd5218 1849 verbose(env, "R%d leaks addr into map\n", value_regno);
1be7f75d
AS
1850 return -EACCES;
1851 }
48461135 1852
9fd29c08 1853 err = check_map_access(env, regno, off, size, false);
17a52670 1854 if (!err && t == BPF_READ && value_regno >= 0)
638f5b90 1855 mark_reg_unknown(env, regs, value_regno);
17a52670 1856
1a0dc1ac 1857 } else if (reg->type == PTR_TO_CTX) {
f1174f77 1858 enum bpf_reg_type reg_type = SCALAR_VALUE;
19de99f7 1859
1be7f75d
AS
1860 if (t == BPF_WRITE && value_regno >= 0 &&
1861 is_pointer_value(env, value_regno)) {
61bd5218 1862 verbose(env, "R%d leaks addr into ctx\n", value_regno);
1be7f75d
AS
1863 return -EACCES;
1864 }
f1174f77 1865
58990d1f
DB
1866 err = check_ctx_reg(env, reg, regno);
1867 if (err < 0)
1868 return err;
1869
31fd8581 1870 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
969bf05e 1871 if (!err && t == BPF_READ && value_regno >= 0) {
f1174f77 1872 /* ctx access returns either a scalar, or a
de8f3a83
DB
1873 * PTR_TO_PACKET[_META,_END]. In the latter
1874 * case, we know the offset is zero.
f1174f77
EC
1875 */
1876 if (reg_type == SCALAR_VALUE)
638f5b90 1877 mark_reg_unknown(env, regs, value_regno);
f1174f77 1878 else
638f5b90 1879 mark_reg_known_zero(env, regs,
61bd5218 1880 value_regno);
638f5b90 1881 regs[value_regno].type = reg_type;
969bf05e 1882 }
17a52670 1883
f1174f77
EC
1884 } else if (reg->type == PTR_TO_STACK) {
1885 /* stack accesses must be at a fixed offset, so that we can
1886 * determine what type of data were returned.
1887 * See check_stack_read().
1888 */
1889 if (!tnum_is_const(reg->var_off)) {
1890 char tn_buf[48];
1891
1892 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 1893 verbose(env, "variable stack access var_off=%s off=%d size=%d",
f1174f77
EC
1894 tn_buf, off, size);
1895 return -EACCES;
1896 }
1897 off += reg->var_off.value;
17a52670 1898 if (off >= 0 || off < -MAX_BPF_STACK) {
61bd5218
JK
1899 verbose(env, "invalid stack off=%d size=%d\n", off,
1900 size);
17a52670
AS
1901 return -EACCES;
1902 }
8726679a 1903
f4d7e40a
AS
1904 state = func(env, reg);
1905 err = update_stack_depth(env, state, off);
1906 if (err)
1907 return err;
8726679a 1908
638f5b90 1909 if (t == BPF_WRITE)
61bd5218 1910 err = check_stack_write(env, state, off, size,
af86ca4e 1911 value_regno, insn_idx);
638f5b90 1912 else
61bd5218
JK
1913 err = check_stack_read(env, state, off, size,
1914 value_regno);
de8f3a83 1915 } else if (reg_is_pkt_pointer(reg)) {
3a0af8fd 1916 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
61bd5218 1917 verbose(env, "cannot write into packet\n");
969bf05e
AS
1918 return -EACCES;
1919 }
4acf6c0b
BB
1920 if (t == BPF_WRITE && value_regno >= 0 &&
1921 is_pointer_value(env, value_regno)) {
61bd5218
JK
1922 verbose(env, "R%d leaks addr into packet\n",
1923 value_regno);
4acf6c0b
BB
1924 return -EACCES;
1925 }
9fd29c08 1926 err = check_packet_access(env, regno, off, size, false);
969bf05e 1927 if (!err && t == BPF_READ && value_regno >= 0)
638f5b90 1928 mark_reg_unknown(env, regs, value_regno);
d58e468b
PP
1929 } else if (reg->type == PTR_TO_FLOW_KEYS) {
1930 if (t == BPF_WRITE && value_regno >= 0 &&
1931 is_pointer_value(env, value_regno)) {
1932 verbose(env, "R%d leaks addr into flow keys\n",
1933 value_regno);
1934 return -EACCES;
1935 }
1936
1937 err = check_flow_keys_access(env, off, size);
1938 if (!err && t == BPF_READ && value_regno >= 0)
1939 mark_reg_unknown(env, regs, value_regno);
c64b7983
JS
1940 } else if (reg->type == PTR_TO_SOCKET) {
1941 if (t == BPF_WRITE) {
1942 verbose(env, "cannot write into socket\n");
1943 return -EACCES;
1944 }
1945 err = check_sock_access(env, regno, off, size, t);
1946 if (!err && value_regno >= 0)
1947 mark_reg_unknown(env, regs, value_regno);
17a52670 1948 } else {
61bd5218
JK
1949 verbose(env, "R%d invalid mem access '%s'\n", regno,
1950 reg_type_str[reg->type]);
17a52670
AS
1951 return -EACCES;
1952 }
969bf05e 1953
f1174f77 1954 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
638f5b90 1955 regs[value_regno].type == SCALAR_VALUE) {
f1174f77 1956 /* b/h/w load zero-extends, mark upper bits as known 0 */
0c17d1d2 1957 coerce_reg_to_size(&regs[value_regno], size);
969bf05e 1958 }
17a52670
AS
1959 return err;
1960}
1961
31fd8581 1962static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
17a52670 1963{
17a52670
AS
1964 int err;
1965
1966 if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
1967 insn->imm != 0) {
61bd5218 1968 verbose(env, "BPF_XADD uses reserved fields\n");
17a52670
AS
1969 return -EINVAL;
1970 }
1971
1972 /* check src1 operand */
dc503a8a 1973 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
1974 if (err)
1975 return err;
1976
1977 /* check src2 operand */
dc503a8a 1978 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
1979 if (err)
1980 return err;
1981
6bdf6abc 1982 if (is_pointer_value(env, insn->src_reg)) {
61bd5218 1983 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
6bdf6abc
DB
1984 return -EACCES;
1985 }
1986
ca369602 1987 if (is_ctx_reg(env, insn->dst_reg) ||
4b5defde
DB
1988 is_pkt_reg(env, insn->dst_reg) ||
1989 is_flow_key_reg(env, insn->dst_reg)) {
ca369602 1990 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
2a159c6f
DB
1991 insn->dst_reg,
1992 reg_type_str[reg_state(env, insn->dst_reg)->type]);
f37a8cb8
DB
1993 return -EACCES;
1994 }
1995
17a52670 1996 /* check whether atomic_add can read the memory */
31fd8581 1997 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
ca369602 1998 BPF_SIZE(insn->code), BPF_READ, -1, true);
17a52670
AS
1999 if (err)
2000 return err;
2001
2002 /* check whether atomic_add can write into the same memory */
31fd8581 2003 return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
ca369602 2004 BPF_SIZE(insn->code), BPF_WRITE, -1, true);
17a52670
AS
2005}
2006
2007/* when register 'regno' is passed into function that will read 'access_size'
2008 * bytes from that pointer, make sure that it's within stack boundary
f1174f77
EC
2009 * and all elements of stack are initialized.
2010 * Unlike most pointer bounds-checking functions, this one doesn't take an
2011 * 'off' argument, so it has to add in reg->off itself.
17a52670 2012 */
58e2af8b 2013static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
435faee1
DB
2014 int access_size, bool zero_size_allowed,
2015 struct bpf_call_arg_meta *meta)
17a52670 2016{
2a159c6f 2017 struct bpf_reg_state *reg = reg_state(env, regno);
f4d7e40a 2018 struct bpf_func_state *state = func(env, reg);
638f5b90 2019 int off, i, slot, spi;
17a52670 2020
914cb781 2021 if (reg->type != PTR_TO_STACK) {
f1174f77 2022 /* Allow zero-byte read from NULL, regardless of pointer type */
8e2fe1d9 2023 if (zero_size_allowed && access_size == 0 &&
914cb781 2024 register_is_null(reg))
8e2fe1d9
DB
2025 return 0;
2026
61bd5218 2027 verbose(env, "R%d type=%s expected=%s\n", regno,
914cb781 2028 reg_type_str[reg->type],
8e2fe1d9 2029 reg_type_str[PTR_TO_STACK]);
17a52670 2030 return -EACCES;
8e2fe1d9 2031 }
17a52670 2032
f1174f77 2033 /* Only allow fixed-offset stack reads */
914cb781 2034 if (!tnum_is_const(reg->var_off)) {
f1174f77
EC
2035 char tn_buf[48];
2036
914cb781 2037 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 2038 verbose(env, "invalid variable stack read R%d var_off=%s\n",
f1174f77 2039 regno, tn_buf);
ea25f914 2040 return -EACCES;
f1174f77 2041 }
914cb781 2042 off = reg->off + reg->var_off.value;
17a52670 2043 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
9fd29c08 2044 access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
61bd5218 2045 verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
17a52670
AS
2046 regno, off, access_size);
2047 return -EACCES;
2048 }
2049
435faee1
DB
2050 if (meta && meta->raw_mode) {
2051 meta->access_size = access_size;
2052 meta->regno = regno;
2053 return 0;
2054 }
2055
17a52670 2056 for (i = 0; i < access_size; i++) {
cc2b14d5
AS
2057 u8 *stype;
2058
638f5b90
AS
2059 slot = -(off + i) - 1;
2060 spi = slot / BPF_REG_SIZE;
cc2b14d5
AS
2061 if (state->allocated_stack <= slot)
2062 goto err;
2063 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
2064 if (*stype == STACK_MISC)
2065 goto mark;
2066 if (*stype == STACK_ZERO) {
2067 /* helper can write anything into the stack */
2068 *stype = STACK_MISC;
2069 goto mark;
17a52670 2070 }
cc2b14d5
AS
2071err:
2072 verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
2073 off, i, access_size);
2074 return -EACCES;
2075mark:
2076 /* reading any byte out of 8-byte 'spill_slot' will cause
2077 * the whole slot to be marked as 'read'
2078 */
679c782d
EC
2079 mark_reg_read(env, &state->stack[spi].spilled_ptr,
2080 state->stack[spi].spilled_ptr.parent);
17a52670 2081 }
f4d7e40a 2082 return update_stack_depth(env, state, off);
17a52670
AS
2083}
2084
06c1c049
GB
2085static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
2086 int access_size, bool zero_size_allowed,
2087 struct bpf_call_arg_meta *meta)
2088{
638f5b90 2089 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
06c1c049 2090
f1174f77 2091 switch (reg->type) {
06c1c049 2092 case PTR_TO_PACKET:
de8f3a83 2093 case PTR_TO_PACKET_META:
9fd29c08
YS
2094 return check_packet_access(env, regno, reg->off, access_size,
2095 zero_size_allowed);
06c1c049 2096 case PTR_TO_MAP_VALUE:
9fd29c08
YS
2097 return check_map_access(env, regno, reg->off, access_size,
2098 zero_size_allowed);
f1174f77 2099 default: /* scalar_value|ptr_to_stack or invalid ptr */
06c1c049
GB
2100 return check_stack_boundary(env, regno, access_size,
2101 zero_size_allowed, meta);
2102 }
2103}
2104
90133415
DB
2105static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
2106{
2107 return type == ARG_PTR_TO_MEM ||
2108 type == ARG_PTR_TO_MEM_OR_NULL ||
2109 type == ARG_PTR_TO_UNINIT_MEM;
2110}
2111
2112static bool arg_type_is_mem_size(enum bpf_arg_type type)
2113{
2114 return type == ARG_CONST_SIZE ||
2115 type == ARG_CONST_SIZE_OR_ZERO;
2116}
2117
58e2af8b 2118static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
33ff9823
DB
2119 enum bpf_arg_type arg_type,
2120 struct bpf_call_arg_meta *meta)
17a52670 2121{
638f5b90 2122 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6841de8b 2123 enum bpf_reg_type expected_type, type = reg->type;
17a52670
AS
2124 int err = 0;
2125
80f1d68c 2126 if (arg_type == ARG_DONTCARE)
17a52670
AS
2127 return 0;
2128
dc503a8a
EC
2129 err = check_reg_arg(env, regno, SRC_OP);
2130 if (err)
2131 return err;
17a52670 2132
1be7f75d
AS
2133 if (arg_type == ARG_ANYTHING) {
2134 if (is_pointer_value(env, regno)) {
61bd5218
JK
2135 verbose(env, "R%d leaks addr into helper function\n",
2136 regno);
1be7f75d
AS
2137 return -EACCES;
2138 }
80f1d68c 2139 return 0;
1be7f75d 2140 }
80f1d68c 2141
de8f3a83 2142 if (type_is_pkt_pointer(type) &&
3a0af8fd 2143 !may_access_direct_pkt_data(env, meta, BPF_READ)) {
61bd5218 2144 verbose(env, "helper access to the packet is not allowed\n");
6841de8b
AS
2145 return -EACCES;
2146 }
2147
8e2fe1d9 2148 if (arg_type == ARG_PTR_TO_MAP_KEY ||
2ea864c5
MV
2149 arg_type == ARG_PTR_TO_MAP_VALUE ||
2150 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
17a52670 2151 expected_type = PTR_TO_STACK;
d71962f3 2152 if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE &&
de8f3a83 2153 type != expected_type)
6841de8b 2154 goto err_type;
39f19ebb
AS
2155 } else if (arg_type == ARG_CONST_SIZE ||
2156 arg_type == ARG_CONST_SIZE_OR_ZERO) {
f1174f77
EC
2157 expected_type = SCALAR_VALUE;
2158 if (type != expected_type)
6841de8b 2159 goto err_type;
17a52670
AS
2160 } else if (arg_type == ARG_CONST_MAP_PTR) {
2161 expected_type = CONST_PTR_TO_MAP;
6841de8b
AS
2162 if (type != expected_type)
2163 goto err_type;
608cd71a
AS
2164 } else if (arg_type == ARG_PTR_TO_CTX) {
2165 expected_type = PTR_TO_CTX;
6841de8b
AS
2166 if (type != expected_type)
2167 goto err_type;
58990d1f
DB
2168 err = check_ctx_reg(env, reg, regno);
2169 if (err < 0)
2170 return err;
c64b7983
JS
2171 } else if (arg_type == ARG_PTR_TO_SOCKET) {
2172 expected_type = PTR_TO_SOCKET;
2173 if (type != expected_type)
2174 goto err_type;
fd978bf7
JS
2175 if (meta->ptr_id || !reg->id) {
2176 verbose(env, "verifier internal error: mismatched references meta=%d, reg=%d\n",
2177 meta->ptr_id, reg->id);
2178 return -EFAULT;
2179 }
2180 meta->ptr_id = reg->id;
90133415 2181 } else if (arg_type_is_mem_ptr(arg_type)) {
8e2fe1d9
DB
2182 expected_type = PTR_TO_STACK;
2183 /* One exception here. In case function allows for NULL to be
f1174f77 2184 * passed in as argument, it's a SCALAR_VALUE type. Final test
8e2fe1d9
DB
2185 * happens during stack boundary checking.
2186 */
914cb781 2187 if (register_is_null(reg) &&
db1ac496 2188 arg_type == ARG_PTR_TO_MEM_OR_NULL)
6841de8b 2189 /* final test in check_stack_boundary() */;
de8f3a83
DB
2190 else if (!type_is_pkt_pointer(type) &&
2191 type != PTR_TO_MAP_VALUE &&
f1174f77 2192 type != expected_type)
6841de8b 2193 goto err_type;
39f19ebb 2194 meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
17a52670 2195 } else {
61bd5218 2196 verbose(env, "unsupported arg_type %d\n", arg_type);
17a52670
AS
2197 return -EFAULT;
2198 }
2199
17a52670
AS
2200 if (arg_type == ARG_CONST_MAP_PTR) {
2201 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
33ff9823 2202 meta->map_ptr = reg->map_ptr;
17a52670
AS
2203 } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
2204 /* bpf_map_xxx(..., map_ptr, ..., key) call:
2205 * check that [key, key + map->key_size) are within
2206 * stack limits and initialized
2207 */
33ff9823 2208 if (!meta->map_ptr) {
17a52670
AS
2209 /* in function declaration map_ptr must come before
2210 * map_key, so that it's verified and known before
2211 * we have to check map_key here. Otherwise it means
2212 * that kernel subsystem misconfigured verifier
2213 */
61bd5218 2214 verbose(env, "invalid map_ptr to access map->key\n");
17a52670
AS
2215 return -EACCES;
2216 }
d71962f3
PC
2217 err = check_helper_mem_access(env, regno,
2218 meta->map_ptr->key_size, false,
2219 NULL);
2ea864c5
MV
2220 } else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
2221 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
17a52670
AS
2222 /* bpf_map_xxx(..., map_ptr, ..., value) call:
2223 * check [value, value + map->value_size) validity
2224 */
33ff9823 2225 if (!meta->map_ptr) {
17a52670 2226 /* kernel subsystem misconfigured verifier */
61bd5218 2227 verbose(env, "invalid map_ptr to access map->value\n");
17a52670
AS
2228 return -EACCES;
2229 }
2ea864c5 2230 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
d71962f3
PC
2231 err = check_helper_mem_access(env, regno,
2232 meta->map_ptr->value_size, false,
2ea864c5 2233 meta);
90133415 2234 } else if (arg_type_is_mem_size(arg_type)) {
39f19ebb 2235 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
17a52670 2236
849fa506
YS
2237 /* remember the mem_size which may be used later
2238 * to refine return values.
2239 */
2240 meta->msize_smax_value = reg->smax_value;
2241 meta->msize_umax_value = reg->umax_value;
2242
f1174f77
EC
2243 /* The register is SCALAR_VALUE; the access check
2244 * happens using its boundaries.
06c1c049 2245 */
f1174f77 2246 if (!tnum_is_const(reg->var_off))
06c1c049
GB
2247 /* For unprivileged variable accesses, disable raw
2248 * mode so that the program is required to
2249 * initialize all the memory that the helper could
2250 * just partially fill up.
2251 */
2252 meta = NULL;
2253
b03c9f9f 2254 if (reg->smin_value < 0) {
61bd5218 2255 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
f1174f77
EC
2256 regno);
2257 return -EACCES;
2258 }
06c1c049 2259
b03c9f9f 2260 if (reg->umin_value == 0) {
f1174f77
EC
2261 err = check_helper_mem_access(env, regno - 1, 0,
2262 zero_size_allowed,
2263 meta);
06c1c049
GB
2264 if (err)
2265 return err;
06c1c049 2266 }
f1174f77 2267
b03c9f9f 2268 if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
61bd5218 2269 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
f1174f77
EC
2270 regno);
2271 return -EACCES;
2272 }
2273 err = check_helper_mem_access(env, regno - 1,
b03c9f9f 2274 reg->umax_value,
f1174f77 2275 zero_size_allowed, meta);
17a52670
AS
2276 }
2277
2278 return err;
6841de8b 2279err_type:
61bd5218 2280 verbose(env, "R%d type=%s expected=%s\n", regno,
6841de8b
AS
2281 reg_type_str[type], reg_type_str[expected_type]);
2282 return -EACCES;
17a52670
AS
2283}
2284
61bd5218
JK
2285static int check_map_func_compatibility(struct bpf_verifier_env *env,
2286 struct bpf_map *map, int func_id)
35578d79 2287{
35578d79
KX
2288 if (!map)
2289 return 0;
2290
6aff67c8
AS
2291 /* We need a two way check, first is from map perspective ... */
2292 switch (map->map_type) {
2293 case BPF_MAP_TYPE_PROG_ARRAY:
2294 if (func_id != BPF_FUNC_tail_call)
2295 goto error;
2296 break;
2297 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
2298 if (func_id != BPF_FUNC_perf_event_read &&
908432ca
YS
2299 func_id != BPF_FUNC_perf_event_output &&
2300 func_id != BPF_FUNC_perf_event_read_value)
6aff67c8
AS
2301 goto error;
2302 break;
2303 case BPF_MAP_TYPE_STACK_TRACE:
2304 if (func_id != BPF_FUNC_get_stackid)
2305 goto error;
2306 break;
4ed8ec52 2307 case BPF_MAP_TYPE_CGROUP_ARRAY:
60747ef4 2308 if (func_id != BPF_FUNC_skb_under_cgroup &&
60d20f91 2309 func_id != BPF_FUNC_current_task_under_cgroup)
4a482f34
MKL
2310 goto error;
2311 break;
cd339431 2312 case BPF_MAP_TYPE_CGROUP_STORAGE:
b741f163 2313 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
cd339431
RG
2314 if (func_id != BPF_FUNC_get_local_storage)
2315 goto error;
2316 break;
546ac1ff
JF
2317 /* devmap returns a pointer to a live net_device ifindex that we cannot
2318 * allow to be modified from bpf side. So do not allow lookup elements
2319 * for now.
2320 */
2321 case BPF_MAP_TYPE_DEVMAP:
2ddf71e2 2322 if (func_id != BPF_FUNC_redirect_map)
546ac1ff
JF
2323 goto error;
2324 break;
fbfc504a
BT
2325 /* Restrict bpf side of cpumap and xskmap, open when use-cases
2326 * appear.
2327 */
6710e112 2328 case BPF_MAP_TYPE_CPUMAP:
fbfc504a 2329 case BPF_MAP_TYPE_XSKMAP:
6710e112
JDB
2330 if (func_id != BPF_FUNC_redirect_map)
2331 goto error;
2332 break;
56f668df 2333 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
bcc6b1b7 2334 case BPF_MAP_TYPE_HASH_OF_MAPS:
56f668df
MKL
2335 if (func_id != BPF_FUNC_map_lookup_elem)
2336 goto error;
16a43625 2337 break;
174a79ff
JF
2338 case BPF_MAP_TYPE_SOCKMAP:
2339 if (func_id != BPF_FUNC_sk_redirect_map &&
2340 func_id != BPF_FUNC_sock_map_update &&
4f738adb
JF
2341 func_id != BPF_FUNC_map_delete_elem &&
2342 func_id != BPF_FUNC_msg_redirect_map)
174a79ff
JF
2343 goto error;
2344 break;
81110384
JF
2345 case BPF_MAP_TYPE_SOCKHASH:
2346 if (func_id != BPF_FUNC_sk_redirect_hash &&
2347 func_id != BPF_FUNC_sock_hash_update &&
2348 func_id != BPF_FUNC_map_delete_elem &&
2349 func_id != BPF_FUNC_msg_redirect_hash)
2350 goto error;
2351 break;
2dbb9b9e
MKL
2352 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
2353 if (func_id != BPF_FUNC_sk_select_reuseport)
2354 goto error;
2355 break;
f1a2e44a
MV
2356 case BPF_MAP_TYPE_QUEUE:
2357 case BPF_MAP_TYPE_STACK:
2358 if (func_id != BPF_FUNC_map_peek_elem &&
2359 func_id != BPF_FUNC_map_pop_elem &&
2360 func_id != BPF_FUNC_map_push_elem)
2361 goto error;
2362 break;
6aff67c8
AS
2363 default:
2364 break;
2365 }
2366
2367 /* ... and second from the function itself. */
2368 switch (func_id) {
2369 case BPF_FUNC_tail_call:
2370 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
2371 goto error;
f910cefa 2372 if (env->subprog_cnt > 1) {
f4d7e40a
AS
2373 verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
2374 return -EINVAL;
2375 }
6aff67c8
AS
2376 break;
2377 case BPF_FUNC_perf_event_read:
2378 case BPF_FUNC_perf_event_output:
908432ca 2379 case BPF_FUNC_perf_event_read_value:
6aff67c8
AS
2380 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
2381 goto error;
2382 break;
2383 case BPF_FUNC_get_stackid:
2384 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
2385 goto error;
2386 break;
60d20f91 2387 case BPF_FUNC_current_task_under_cgroup:
747ea55e 2388 case BPF_FUNC_skb_under_cgroup:
4a482f34
MKL
2389 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
2390 goto error;
2391 break;
97f91a7c 2392 case BPF_FUNC_redirect_map:
9c270af3 2393 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
fbfc504a
BT
2394 map->map_type != BPF_MAP_TYPE_CPUMAP &&
2395 map->map_type != BPF_MAP_TYPE_XSKMAP)
97f91a7c
JF
2396 goto error;
2397 break;
174a79ff 2398 case BPF_FUNC_sk_redirect_map:
4f738adb 2399 case BPF_FUNC_msg_redirect_map:
81110384 2400 case BPF_FUNC_sock_map_update:
174a79ff
JF
2401 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
2402 goto error;
2403 break;
81110384
JF
2404 case BPF_FUNC_sk_redirect_hash:
2405 case BPF_FUNC_msg_redirect_hash:
2406 case BPF_FUNC_sock_hash_update:
2407 if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
174a79ff
JF
2408 goto error;
2409 break;
cd339431 2410 case BPF_FUNC_get_local_storage:
b741f163
RG
2411 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
2412 map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
cd339431
RG
2413 goto error;
2414 break;
2dbb9b9e
MKL
2415 case BPF_FUNC_sk_select_reuseport:
2416 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
2417 goto error;
2418 break;
f1a2e44a
MV
2419 case BPF_FUNC_map_peek_elem:
2420 case BPF_FUNC_map_pop_elem:
2421 case BPF_FUNC_map_push_elem:
2422 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
2423 map->map_type != BPF_MAP_TYPE_STACK)
2424 goto error;
2425 break;
6aff67c8
AS
2426 default:
2427 break;
35578d79
KX
2428 }
2429
2430 return 0;
6aff67c8 2431error:
61bd5218 2432 verbose(env, "cannot pass map_type %d into func %s#%d\n",
ebb676da 2433 map->map_type, func_id_name(func_id), func_id);
6aff67c8 2434 return -EINVAL;
35578d79
KX
2435}
2436
90133415 2437static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
435faee1
DB
2438{
2439 int count = 0;
2440
39f19ebb 2441 if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 2442 count++;
39f19ebb 2443 if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 2444 count++;
39f19ebb 2445 if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 2446 count++;
39f19ebb 2447 if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 2448 count++;
39f19ebb 2449 if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
435faee1
DB
2450 count++;
2451
90133415
DB
2452 /* We only support one arg being in raw mode at the moment,
2453 * which is sufficient for the helper functions we have
2454 * right now.
2455 */
2456 return count <= 1;
2457}
2458
2459static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
2460 enum bpf_arg_type arg_next)
2461{
2462 return (arg_type_is_mem_ptr(arg_curr) &&
2463 !arg_type_is_mem_size(arg_next)) ||
2464 (!arg_type_is_mem_ptr(arg_curr) &&
2465 arg_type_is_mem_size(arg_next));
2466}
2467
2468static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
2469{
2470 /* bpf_xxx(..., buf, len) call will access 'len'
2471 * bytes from memory 'buf'. Both arg types need
2472 * to be paired, so make sure there's no buggy
2473 * helper function specification.
2474 */
2475 if (arg_type_is_mem_size(fn->arg1_type) ||
2476 arg_type_is_mem_ptr(fn->arg5_type) ||
2477 check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
2478 check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
2479 check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
2480 check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
2481 return false;
2482
2483 return true;
2484}
2485
fd978bf7
JS
2486static bool check_refcount_ok(const struct bpf_func_proto *fn)
2487{
2488 int count = 0;
2489
2490 if (arg_type_is_refcounted(fn->arg1_type))
2491 count++;
2492 if (arg_type_is_refcounted(fn->arg2_type))
2493 count++;
2494 if (arg_type_is_refcounted(fn->arg3_type))
2495 count++;
2496 if (arg_type_is_refcounted(fn->arg4_type))
2497 count++;
2498 if (arg_type_is_refcounted(fn->arg5_type))
2499 count++;
2500
2501 /* We only support one arg being unreferenced at the moment,
2502 * which is sufficient for the helper functions we have right now.
2503 */
2504 return count <= 1;
2505}
2506
90133415
DB
2507static int check_func_proto(const struct bpf_func_proto *fn)
2508{
2509 return check_raw_mode_ok(fn) &&
fd978bf7
JS
2510 check_arg_pair_ok(fn) &&
2511 check_refcount_ok(fn) ? 0 : -EINVAL;
435faee1
DB
2512}
2513
de8f3a83
DB
2514/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
2515 * are now invalid, so turn them into unknown SCALAR_VALUE.
f1174f77 2516 */
f4d7e40a
AS
2517static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
2518 struct bpf_func_state *state)
969bf05e 2519{
58e2af8b 2520 struct bpf_reg_state *regs = state->regs, *reg;
969bf05e
AS
2521 int i;
2522
2523 for (i = 0; i < MAX_BPF_REG; i++)
de8f3a83 2524 if (reg_is_pkt_pointer_any(&regs[i]))
61bd5218 2525 mark_reg_unknown(env, regs, i);
969bf05e 2526
f3709f69
JS
2527 bpf_for_each_spilled_reg(i, state, reg) {
2528 if (!reg)
969bf05e 2529 continue;
de8f3a83
DB
2530 if (reg_is_pkt_pointer_any(reg))
2531 __mark_reg_unknown(reg);
969bf05e
AS
2532 }
2533}
2534
f4d7e40a
AS
2535static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
2536{
2537 struct bpf_verifier_state *vstate = env->cur_state;
2538 int i;
2539
2540 for (i = 0; i <= vstate->curframe; i++)
2541 __clear_all_pkt_pointers(env, vstate->frame[i]);
2542}
2543
fd978bf7
JS
2544static void release_reg_references(struct bpf_verifier_env *env,
2545 struct bpf_func_state *state, int id)
2546{
2547 struct bpf_reg_state *regs = state->regs, *reg;
2548 int i;
2549
2550 for (i = 0; i < MAX_BPF_REG; i++)
2551 if (regs[i].id == id)
2552 mark_reg_unknown(env, regs, i);
2553
2554 bpf_for_each_spilled_reg(i, state, reg) {
2555 if (!reg)
2556 continue;
2557 if (reg_is_refcounted(reg) && reg->id == id)
2558 __mark_reg_unknown(reg);
2559 }
2560}
2561
2562/* The pointer with the specified id has released its reference to kernel
2563 * resources. Identify all copies of the same pointer and clear the reference.
2564 */
2565static int release_reference(struct bpf_verifier_env *env,
2566 struct bpf_call_arg_meta *meta)
2567{
2568 struct bpf_verifier_state *vstate = env->cur_state;
2569 int i;
2570
2571 for (i = 0; i <= vstate->curframe; i++)
2572 release_reg_references(env, vstate->frame[i], meta->ptr_id);
2573
2574 return release_reference_state(env, meta->ptr_id);
2575}
2576
f4d7e40a
AS
2577static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
2578 int *insn_idx)
2579{
2580 struct bpf_verifier_state *state = env->cur_state;
2581 struct bpf_func_state *caller, *callee;
fd978bf7 2582 int i, err, subprog, target_insn;
f4d7e40a 2583
aada9ce6 2584 if (state->curframe + 1 >= MAX_CALL_FRAMES) {
f4d7e40a 2585 verbose(env, "the call stack of %d frames is too deep\n",
aada9ce6 2586 state->curframe + 2);
f4d7e40a
AS
2587 return -E2BIG;
2588 }
2589
2590 target_insn = *insn_idx + insn->imm;
2591 subprog = find_subprog(env, target_insn + 1);
2592 if (subprog < 0) {
2593 verbose(env, "verifier bug. No program starts at insn %d\n",
2594 target_insn + 1);
2595 return -EFAULT;
2596 }
2597
2598 caller = state->frame[state->curframe];
2599 if (state->frame[state->curframe + 1]) {
2600 verbose(env, "verifier bug. Frame %d already allocated\n",
2601 state->curframe + 1);
2602 return -EFAULT;
2603 }
2604
2605 callee = kzalloc(sizeof(*callee), GFP_KERNEL);
2606 if (!callee)
2607 return -ENOMEM;
2608 state->frame[state->curframe + 1] = callee;
2609
2610 /* callee cannot access r0, r6 - r9 for reading and has to write
2611 * into its own stack before reading from it.
2612 * callee can read/write into caller's stack
2613 */
2614 init_func_state(env, callee,
2615 /* remember the callsite, it will be used by bpf_exit */
2616 *insn_idx /* callsite */,
2617 state->curframe + 1 /* frameno within this callchain */,
f910cefa 2618 subprog /* subprog number within this prog */);
f4d7e40a 2619
fd978bf7
JS
2620 /* Transfer references to the callee */
2621 err = transfer_reference_state(callee, caller);
2622 if (err)
2623 return err;
2624
679c782d
EC
2625 /* copy r1 - r5 args that callee can access. The copy includes parent
2626 * pointers, which connects us up to the liveness chain
2627 */
f4d7e40a
AS
2628 for (i = BPF_REG_1; i <= BPF_REG_5; i++)
2629 callee->regs[i] = caller->regs[i];
2630
679c782d 2631 /* after the call registers r0 - r5 were scratched */
f4d7e40a
AS
2632 for (i = 0; i < CALLER_SAVED_REGS; i++) {
2633 mark_reg_not_init(env, caller->regs, caller_saved[i]);
2634 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
2635 }
2636
2637 /* only increment it after check_reg_arg() finished */
2638 state->curframe++;
2639
2640 /* and go analyze first insn of the callee */
2641 *insn_idx = target_insn;
2642
2643 if (env->log.level) {
2644 verbose(env, "caller:\n");
2645 print_verifier_state(env, caller);
2646 verbose(env, "callee:\n");
2647 print_verifier_state(env, callee);
2648 }
2649 return 0;
2650}
2651
2652static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
2653{
2654 struct bpf_verifier_state *state = env->cur_state;
2655 struct bpf_func_state *caller, *callee;
2656 struct bpf_reg_state *r0;
fd978bf7 2657 int err;
f4d7e40a
AS
2658
2659 callee = state->frame[state->curframe];
2660 r0 = &callee->regs[BPF_REG_0];
2661 if (r0->type == PTR_TO_STACK) {
2662 /* technically it's ok to return caller's stack pointer
2663 * (or caller's caller's pointer) back to the caller,
2664 * since these pointers are valid. Only current stack
2665 * pointer will be invalid as soon as function exits,
2666 * but let's be conservative
2667 */
2668 verbose(env, "cannot return stack pointer to the caller\n");
2669 return -EINVAL;
2670 }
2671
2672 state->curframe--;
2673 caller = state->frame[state->curframe];
2674 /* return to the caller whatever r0 had in the callee */
2675 caller->regs[BPF_REG_0] = *r0;
2676
fd978bf7
JS
2677 /* Transfer references to the caller */
2678 err = transfer_reference_state(caller, callee);
2679 if (err)
2680 return err;
2681
f4d7e40a
AS
2682 *insn_idx = callee->callsite + 1;
2683 if (env->log.level) {
2684 verbose(env, "returning from callee:\n");
2685 print_verifier_state(env, callee);
2686 verbose(env, "to caller at %d:\n", *insn_idx);
2687 print_verifier_state(env, caller);
2688 }
2689 /* clear everything in the callee */
2690 free_func_state(callee);
2691 state->frame[state->curframe + 1] = NULL;
2692 return 0;
2693}
2694
849fa506
YS
2695static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
2696 int func_id,
2697 struct bpf_call_arg_meta *meta)
2698{
2699 struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
2700
2701 if (ret_type != RET_INTEGER ||
2702 (func_id != BPF_FUNC_get_stack &&
2703 func_id != BPF_FUNC_probe_read_str))
2704 return;
2705
2706 ret_reg->smax_value = meta->msize_smax_value;
2707 ret_reg->umax_value = meta->msize_umax_value;
2708 __reg_deduce_bounds(ret_reg);
2709 __reg_bound_offset(ret_reg);
2710}
2711
c93552c4
DB
2712static int
2713record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
2714 int func_id, int insn_idx)
2715{
2716 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
2717
2718 if (func_id != BPF_FUNC_tail_call &&
09772d92
DB
2719 func_id != BPF_FUNC_map_lookup_elem &&
2720 func_id != BPF_FUNC_map_update_elem &&
f1a2e44a
MV
2721 func_id != BPF_FUNC_map_delete_elem &&
2722 func_id != BPF_FUNC_map_push_elem &&
2723 func_id != BPF_FUNC_map_pop_elem &&
2724 func_id != BPF_FUNC_map_peek_elem)
c93552c4 2725 return 0;
09772d92 2726
c93552c4
DB
2727 if (meta->map_ptr == NULL) {
2728 verbose(env, "kernel subsystem misconfigured verifier\n");
2729 return -EINVAL;
2730 }
2731
2732 if (!BPF_MAP_PTR(aux->map_state))
2733 bpf_map_ptr_store(aux, meta->map_ptr,
2734 meta->map_ptr->unpriv_array);
2735 else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr)
2736 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
2737 meta->map_ptr->unpriv_array);
2738 return 0;
2739}
2740
fd978bf7
JS
2741static int check_reference_leak(struct bpf_verifier_env *env)
2742{
2743 struct bpf_func_state *state = cur_func(env);
2744 int i;
2745
2746 for (i = 0; i < state->acquired_refs; i++) {
2747 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
2748 state->refs[i].id, state->refs[i].insn_idx);
2749 }
2750 return state->acquired_refs ? -EINVAL : 0;
2751}
2752
f4d7e40a 2753static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
17a52670 2754{
17a52670 2755 const struct bpf_func_proto *fn = NULL;
638f5b90 2756 struct bpf_reg_state *regs;
33ff9823 2757 struct bpf_call_arg_meta meta;
969bf05e 2758 bool changes_data;
17a52670
AS
2759 int i, err;
2760
2761 /* find function prototype */
2762 if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
61bd5218
JK
2763 verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
2764 func_id);
17a52670
AS
2765 return -EINVAL;
2766 }
2767
00176a34 2768 if (env->ops->get_func_proto)
5e43f899 2769 fn = env->ops->get_func_proto(func_id, env->prog);
17a52670 2770 if (!fn) {
61bd5218
JK
2771 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
2772 func_id);
17a52670
AS
2773 return -EINVAL;
2774 }
2775
2776 /* eBPF programs must be GPL compatible to use GPL-ed functions */
24701ece 2777 if (!env->prog->gpl_compatible && fn->gpl_only) {
3fe2867c 2778 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
17a52670
AS
2779 return -EINVAL;
2780 }
2781
04514d13 2782 /* With LD_ABS/IND some JITs save/restore skb from r1. */
17bedab2 2783 changes_data = bpf_helper_changes_pkt_data(fn->func);
04514d13
DB
2784 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
2785 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
2786 func_id_name(func_id), func_id);
2787 return -EINVAL;
2788 }
969bf05e 2789
33ff9823 2790 memset(&meta, 0, sizeof(meta));
36bbef52 2791 meta.pkt_access = fn->pkt_access;
33ff9823 2792
90133415 2793 err = check_func_proto(fn);
435faee1 2794 if (err) {
61bd5218 2795 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
ebb676da 2796 func_id_name(func_id), func_id);
435faee1
DB
2797 return err;
2798 }
2799
17a52670 2800 /* check args */
33ff9823 2801 err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
17a52670
AS
2802 if (err)
2803 return err;
33ff9823 2804 err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
17a52670
AS
2805 if (err)
2806 return err;
33ff9823 2807 err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
17a52670
AS
2808 if (err)
2809 return err;
33ff9823 2810 err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
17a52670
AS
2811 if (err)
2812 return err;
33ff9823 2813 err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
17a52670
AS
2814 if (err)
2815 return err;
2816
c93552c4
DB
2817 err = record_func_map(env, &meta, func_id, insn_idx);
2818 if (err)
2819 return err;
2820
435faee1
DB
2821 /* Mark slots with STACK_MISC in case of raw mode, stack offset
2822 * is inferred from register state.
2823 */
2824 for (i = 0; i < meta.access_size; i++) {
ca369602
DB
2825 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
2826 BPF_WRITE, -1, false);
435faee1
DB
2827 if (err)
2828 return err;
2829 }
2830
fd978bf7
JS
2831 if (func_id == BPF_FUNC_tail_call) {
2832 err = check_reference_leak(env);
2833 if (err) {
2834 verbose(env, "tail_call would lead to reference leak\n");
2835 return err;
2836 }
2837 } else if (is_release_function(func_id)) {
2838 err = release_reference(env, &meta);
2839 if (err)
2840 return err;
2841 }
2842
638f5b90 2843 regs = cur_regs(env);
cd339431
RG
2844
2845 /* check that flags argument in get_local_storage(map, flags) is 0,
2846 * this is required because get_local_storage() can't return an error.
2847 */
2848 if (func_id == BPF_FUNC_get_local_storage &&
2849 !register_is_null(&regs[BPF_REG_2])) {
2850 verbose(env, "get_local_storage() doesn't support non-zero flags\n");
2851 return -EINVAL;
2852 }
2853
17a52670 2854 /* reset caller saved regs */
dc503a8a 2855 for (i = 0; i < CALLER_SAVED_REGS; i++) {
61bd5218 2856 mark_reg_not_init(env, regs, caller_saved[i]);
dc503a8a
EC
2857 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
2858 }
17a52670 2859
dc503a8a 2860 /* update return register (already marked as written above) */
17a52670 2861 if (fn->ret_type == RET_INTEGER) {
f1174f77 2862 /* sets type to SCALAR_VALUE */
61bd5218 2863 mark_reg_unknown(env, regs, BPF_REG_0);
17a52670
AS
2864 } else if (fn->ret_type == RET_VOID) {
2865 regs[BPF_REG_0].type = NOT_INIT;
3e6a4b3e
RG
2866 } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
2867 fn->ret_type == RET_PTR_TO_MAP_VALUE) {
f1174f77 2868 /* There is no offset yet applied, variable or fixed */
61bd5218 2869 mark_reg_known_zero(env, regs, BPF_REG_0);
17a52670
AS
2870 /* remember map_ptr, so that check_map_access()
2871 * can check 'value_size' boundary of memory access
2872 * to map element returned from bpf_map_lookup_elem()
2873 */
33ff9823 2874 if (meta.map_ptr == NULL) {
61bd5218
JK
2875 verbose(env,
2876 "kernel subsystem misconfigured verifier\n");
17a52670
AS
2877 return -EINVAL;
2878 }
33ff9823 2879 regs[BPF_REG_0].map_ptr = meta.map_ptr;
4d31f301
DB
2880 if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
2881 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
2882 } else {
2883 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
2884 regs[BPF_REG_0].id = ++env->id_gen;
2885 }
c64b7983 2886 } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
fd978bf7
JS
2887 int id = acquire_reference_state(env, insn_idx);
2888 if (id < 0)
2889 return id;
c64b7983
JS
2890 mark_reg_known_zero(env, regs, BPF_REG_0);
2891 regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
fd978bf7 2892 regs[BPF_REG_0].id = id;
17a52670 2893 } else {
61bd5218 2894 verbose(env, "unknown return type %d of func %s#%d\n",
ebb676da 2895 fn->ret_type, func_id_name(func_id), func_id);
17a52670
AS
2896 return -EINVAL;
2897 }
04fd61ab 2898
849fa506
YS
2899 do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
2900
61bd5218 2901 err = check_map_func_compatibility(env, meta.map_ptr, func_id);
35578d79
KX
2902 if (err)
2903 return err;
04fd61ab 2904
c195651e
YS
2905 if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
2906 const char *err_str;
2907
2908#ifdef CONFIG_PERF_EVENTS
2909 err = get_callchain_buffers(sysctl_perf_event_max_stack);
2910 err_str = "cannot get callchain buffer for func %s#%d\n";
2911#else
2912 err = -ENOTSUPP;
2913 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
2914#endif
2915 if (err) {
2916 verbose(env, err_str, func_id_name(func_id), func_id);
2917 return err;
2918 }
2919
2920 env->prog->has_callchain_buf = true;
2921 }
2922
969bf05e
AS
2923 if (changes_data)
2924 clear_all_pkt_pointers(env);
2925 return 0;
2926}
2927
b03c9f9f
EC
2928static bool signed_add_overflows(s64 a, s64 b)
2929{
2930 /* Do the add in u64, where overflow is well-defined */
2931 s64 res = (s64)((u64)a + (u64)b);
2932
2933 if (b < 0)
2934 return res > a;
2935 return res < a;
2936}
2937
2938static bool signed_sub_overflows(s64 a, s64 b)
2939{
2940 /* Do the sub in u64, where overflow is well-defined */
2941 s64 res = (s64)((u64)a - (u64)b);
2942
2943 if (b < 0)
2944 return res < a;
2945 return res > a;
969bf05e
AS
2946}
2947
bb7f0f98
AS
2948static bool check_reg_sane_offset(struct bpf_verifier_env *env,
2949 const struct bpf_reg_state *reg,
2950 enum bpf_reg_type type)
2951{
2952 bool known = tnum_is_const(reg->var_off);
2953 s64 val = reg->var_off.value;
2954 s64 smin = reg->smin_value;
2955
2956 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
2957 verbose(env, "math between %s pointer and %lld is not allowed\n",
2958 reg_type_str[type], val);
2959 return false;
2960 }
2961
2962 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
2963 verbose(env, "%s pointer offset %d is not allowed\n",
2964 reg_type_str[type], reg->off);
2965 return false;
2966 }
2967
2968 if (smin == S64_MIN) {
2969 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
2970 reg_type_str[type]);
2971 return false;
2972 }
2973
2974 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
2975 verbose(env, "value %lld makes %s pointer be out of bounds\n",
2976 smin, reg_type_str[type]);
2977 return false;
2978 }
2979
2980 return true;
2981}
2982
f1174f77 2983/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
f1174f77
EC
2984 * Caller should also handle BPF_MOV case separately.
2985 * If we return -EACCES, caller may want to try again treating pointer as a
2986 * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks.
2987 */
2988static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
2989 struct bpf_insn *insn,
2990 const struct bpf_reg_state *ptr_reg,
2991 const struct bpf_reg_state *off_reg)
969bf05e 2992{
f4d7e40a
AS
2993 struct bpf_verifier_state *vstate = env->cur_state;
2994 struct bpf_func_state *state = vstate->frame[vstate->curframe];
2995 struct bpf_reg_state *regs = state->regs, *dst_reg;
f1174f77 2996 bool known = tnum_is_const(off_reg->var_off);
b03c9f9f
EC
2997 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
2998 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
2999 u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
3000 umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
969bf05e 3001 u8 opcode = BPF_OP(insn->code);
f1174f77 3002 u32 dst = insn->dst_reg;
969bf05e 3003
f1174f77 3004 dst_reg = &regs[dst];
969bf05e 3005
6f16101e
DB
3006 if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
3007 smin_val > smax_val || umin_val > umax_val) {
3008 /* Taint dst register if offset had invalid bounds derived from
3009 * e.g. dead branches.
3010 */
3011 __mark_reg_unknown(dst_reg);
3012 return 0;
f1174f77
EC
3013 }
3014
3015 if (BPF_CLASS(insn->code) != BPF_ALU64) {
3016 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
82abbf8d
AS
3017 verbose(env,
3018 "R%d 32-bit pointer arithmetic prohibited\n",
3019 dst);
f1174f77 3020 return -EACCES;
969bf05e
AS
3021 }
3022
aad2eeaf
JS
3023 switch (ptr_reg->type) {
3024 case PTR_TO_MAP_VALUE_OR_NULL:
3025 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
3026 dst, reg_type_str[ptr_reg->type]);
f1174f77 3027 return -EACCES;
aad2eeaf
JS
3028 case CONST_PTR_TO_MAP:
3029 case PTR_TO_PACKET_END:
c64b7983
JS
3030 case PTR_TO_SOCKET:
3031 case PTR_TO_SOCKET_OR_NULL:
aad2eeaf
JS
3032 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
3033 dst, reg_type_str[ptr_reg->type]);
f1174f77 3034 return -EACCES;
aad2eeaf
JS
3035 default:
3036 break;
f1174f77
EC
3037 }
3038
3039 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
3040 * The id may be overwritten later if we create a new variable offset.
969bf05e 3041 */
f1174f77
EC
3042 dst_reg->type = ptr_reg->type;
3043 dst_reg->id = ptr_reg->id;
969bf05e 3044
bb7f0f98
AS
3045 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
3046 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
3047 return -EINVAL;
3048
f1174f77
EC
3049 switch (opcode) {
3050 case BPF_ADD:
3051 /* We can take a fixed offset as long as it doesn't overflow
3052 * the s32 'off' field
969bf05e 3053 */
b03c9f9f
EC
3054 if (known && (ptr_reg->off + smin_val ==
3055 (s64)(s32)(ptr_reg->off + smin_val))) {
f1174f77 3056 /* pointer += K. Accumulate it into fixed offset */
b03c9f9f
EC
3057 dst_reg->smin_value = smin_ptr;
3058 dst_reg->smax_value = smax_ptr;
3059 dst_reg->umin_value = umin_ptr;
3060 dst_reg->umax_value = umax_ptr;
f1174f77 3061 dst_reg->var_off = ptr_reg->var_off;
b03c9f9f 3062 dst_reg->off = ptr_reg->off + smin_val;
0962590e 3063 dst_reg->raw = ptr_reg->raw;
f1174f77
EC
3064 break;
3065 }
f1174f77
EC
3066 /* A new variable offset is created. Note that off_reg->off
3067 * == 0, since it's a scalar.
3068 * dst_reg gets the pointer type and since some positive
3069 * integer value was added to the pointer, give it a new 'id'
3070 * if it's a PTR_TO_PACKET.
3071 * this creates a new 'base' pointer, off_reg (variable) gets
3072 * added into the variable offset, and we copy the fixed offset
3073 * from ptr_reg.
969bf05e 3074 */
b03c9f9f
EC
3075 if (signed_add_overflows(smin_ptr, smin_val) ||
3076 signed_add_overflows(smax_ptr, smax_val)) {
3077 dst_reg->smin_value = S64_MIN;
3078 dst_reg->smax_value = S64_MAX;
3079 } else {
3080 dst_reg->smin_value = smin_ptr + smin_val;
3081 dst_reg->smax_value = smax_ptr + smax_val;
3082 }
3083 if (umin_ptr + umin_val < umin_ptr ||
3084 umax_ptr + umax_val < umax_ptr) {
3085 dst_reg->umin_value = 0;
3086 dst_reg->umax_value = U64_MAX;
3087 } else {
3088 dst_reg->umin_value = umin_ptr + umin_val;
3089 dst_reg->umax_value = umax_ptr + umax_val;
3090 }
f1174f77
EC
3091 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
3092 dst_reg->off = ptr_reg->off;
0962590e 3093 dst_reg->raw = ptr_reg->raw;
de8f3a83 3094 if (reg_is_pkt_pointer(ptr_reg)) {
f1174f77
EC
3095 dst_reg->id = ++env->id_gen;
3096 /* something was added to pkt_ptr, set range to zero */
0962590e 3097 dst_reg->raw = 0;
f1174f77
EC
3098 }
3099 break;
3100 case BPF_SUB:
3101 if (dst_reg == off_reg) {
3102 /* scalar -= pointer. Creates an unknown scalar */
82abbf8d
AS
3103 verbose(env, "R%d tried to subtract pointer from scalar\n",
3104 dst);
f1174f77
EC
3105 return -EACCES;
3106 }
3107 /* We don't allow subtraction from FP, because (according to
3108 * test_verifier.c test "invalid fp arithmetic", JITs might not
3109 * be able to deal with it.
969bf05e 3110 */
f1174f77 3111 if (ptr_reg->type == PTR_TO_STACK) {
82abbf8d
AS
3112 verbose(env, "R%d subtraction from stack pointer prohibited\n",
3113 dst);
f1174f77
EC
3114 return -EACCES;
3115 }
b03c9f9f
EC
3116 if (known && (ptr_reg->off - smin_val ==
3117 (s64)(s32)(ptr_reg->off - smin_val))) {
f1174f77 3118 /* pointer -= K. Subtract it from fixed offset */
b03c9f9f
EC
3119 dst_reg->smin_value = smin_ptr;
3120 dst_reg->smax_value = smax_ptr;
3121 dst_reg->umin_value = umin_ptr;
3122 dst_reg->umax_value = umax_ptr;
f1174f77
EC
3123 dst_reg->var_off = ptr_reg->var_off;
3124 dst_reg->id = ptr_reg->id;
b03c9f9f 3125 dst_reg->off = ptr_reg->off - smin_val;
0962590e 3126 dst_reg->raw = ptr_reg->raw;
f1174f77
EC
3127 break;
3128 }
f1174f77
EC
3129 /* A new variable offset is created. If the subtrahend is known
3130 * nonnegative, then any reg->range we had before is still good.
969bf05e 3131 */
b03c9f9f
EC
3132 if (signed_sub_overflows(smin_ptr, smax_val) ||
3133 signed_sub_overflows(smax_ptr, smin_val)) {
3134 /* Overflow possible, we know nothing */
3135 dst_reg->smin_value = S64_MIN;
3136 dst_reg->smax_value = S64_MAX;
3137 } else {
3138 dst_reg->smin_value = smin_ptr - smax_val;
3139 dst_reg->smax_value = smax_ptr - smin_val;
3140 }
3141 if (umin_ptr < umax_val) {
3142 /* Overflow possible, we know nothing */
3143 dst_reg->umin_value = 0;
3144 dst_reg->umax_value = U64_MAX;
3145 } else {
3146 /* Cannot overflow (as long as bounds are consistent) */
3147 dst_reg->umin_value = umin_ptr - umax_val;
3148 dst_reg->umax_value = umax_ptr - umin_val;
3149 }
f1174f77
EC
3150 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
3151 dst_reg->off = ptr_reg->off;
0962590e 3152 dst_reg->raw = ptr_reg->raw;
de8f3a83 3153 if (reg_is_pkt_pointer(ptr_reg)) {
f1174f77
EC
3154 dst_reg->id = ++env->id_gen;
3155 /* something was added to pkt_ptr, set range to zero */
b03c9f9f 3156 if (smin_val < 0)
0962590e 3157 dst_reg->raw = 0;
43188702 3158 }
f1174f77
EC
3159 break;
3160 case BPF_AND:
3161 case BPF_OR:
3162 case BPF_XOR:
82abbf8d
AS
3163 /* bitwise ops on pointers are troublesome, prohibit. */
3164 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
3165 dst, bpf_alu_string[opcode >> 4]);
f1174f77
EC
3166 return -EACCES;
3167 default:
3168 /* other operators (e.g. MUL,LSH) produce non-pointer results */
82abbf8d
AS
3169 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
3170 dst, bpf_alu_string[opcode >> 4]);
f1174f77 3171 return -EACCES;
43188702
JF
3172 }
3173
bb7f0f98
AS
3174 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
3175 return -EINVAL;
3176
b03c9f9f
EC
3177 __update_reg_bounds(dst_reg);
3178 __reg_deduce_bounds(dst_reg);
3179 __reg_bound_offset(dst_reg);
43188702
JF
3180 return 0;
3181}
3182
468f6eaf
JH
3183/* WARNING: This function does calculations on 64-bit values, but the actual
3184 * execution may occur on 32-bit values. Therefore, things like bitshifts
3185 * need extra checks in the 32-bit case.
3186 */
f1174f77
EC
3187static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
3188 struct bpf_insn *insn,
3189 struct bpf_reg_state *dst_reg,
3190 struct bpf_reg_state src_reg)
969bf05e 3191{
638f5b90 3192 struct bpf_reg_state *regs = cur_regs(env);
48461135 3193 u8 opcode = BPF_OP(insn->code);
f1174f77 3194 bool src_known, dst_known;
b03c9f9f
EC
3195 s64 smin_val, smax_val;
3196 u64 umin_val, umax_val;
468f6eaf 3197 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
48461135 3198
b799207e
JH
3199 if (insn_bitness == 32) {
3200 /* Relevant for 32-bit RSH: Information can propagate towards
3201 * LSB, so it isn't sufficient to only truncate the output to
3202 * 32 bits.
3203 */
3204 coerce_reg_to_size(dst_reg, 4);
3205 coerce_reg_to_size(&src_reg, 4);
3206 }
3207
b03c9f9f
EC
3208 smin_val = src_reg.smin_value;
3209 smax_val = src_reg.smax_value;
3210 umin_val = src_reg.umin_value;
3211 umax_val = src_reg.umax_value;
f1174f77
EC
3212 src_known = tnum_is_const(src_reg.var_off);
3213 dst_known = tnum_is_const(dst_reg->var_off);
f23cc643 3214
6f16101e
DB
3215 if ((src_known && (smin_val != smax_val || umin_val != umax_val)) ||
3216 smin_val > smax_val || umin_val > umax_val) {
3217 /* Taint dst register if offset had invalid bounds derived from
3218 * e.g. dead branches.
3219 */
3220 __mark_reg_unknown(dst_reg);
3221 return 0;
3222 }
3223
bb7f0f98
AS
3224 if (!src_known &&
3225 opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
3226 __mark_reg_unknown(dst_reg);
3227 return 0;
3228 }
3229
48461135
JB
3230 switch (opcode) {
3231 case BPF_ADD:
b03c9f9f
EC
3232 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
3233 signed_add_overflows(dst_reg->smax_value, smax_val)) {
3234 dst_reg->smin_value = S64_MIN;
3235 dst_reg->smax_value = S64_MAX;
3236 } else {
3237 dst_reg->smin_value += smin_val;
3238 dst_reg->smax_value += smax_val;
3239 }
3240 if (dst_reg->umin_value + umin_val < umin_val ||
3241 dst_reg->umax_value + umax_val < umax_val) {
3242 dst_reg->umin_value = 0;
3243 dst_reg->umax_value = U64_MAX;
3244 } else {
3245 dst_reg->umin_value += umin_val;
3246 dst_reg->umax_value += umax_val;
3247 }
f1174f77 3248 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
48461135
JB
3249 break;
3250 case BPF_SUB:
b03c9f9f
EC
3251 if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
3252 signed_sub_overflows(dst_reg->smax_value, smin_val)) {
3253 /* Overflow possible, we know nothing */
3254 dst_reg->smin_value = S64_MIN;
3255 dst_reg->smax_value = S64_MAX;
3256 } else {
3257 dst_reg->smin_value -= smax_val;
3258 dst_reg->smax_value -= smin_val;
3259 }
3260 if (dst_reg->umin_value < umax_val) {
3261 /* Overflow possible, we know nothing */
3262 dst_reg->umin_value = 0;
3263 dst_reg->umax_value = U64_MAX;
3264 } else {
3265 /* Cannot overflow (as long as bounds are consistent) */
3266 dst_reg->umin_value -= umax_val;
3267 dst_reg->umax_value -= umin_val;
3268 }
f1174f77 3269 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
48461135
JB
3270 break;
3271 case BPF_MUL:
b03c9f9f
EC
3272 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
3273 if (smin_val < 0 || dst_reg->smin_value < 0) {
f1174f77 3274 /* Ain't nobody got time to multiply that sign */
b03c9f9f
EC
3275 __mark_reg_unbounded(dst_reg);
3276 __update_reg_bounds(dst_reg);
f1174f77
EC
3277 break;
3278 }
b03c9f9f
EC
3279 /* Both values are positive, so we can work with unsigned and
3280 * copy the result to signed (unless it exceeds S64_MAX).
f1174f77 3281 */
b03c9f9f
EC
3282 if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
3283 /* Potential overflow, we know nothing */
3284 __mark_reg_unbounded(dst_reg);
3285 /* (except what we can learn from the var_off) */
3286 __update_reg_bounds(dst_reg);
3287 break;
3288 }
3289 dst_reg->umin_value *= umin_val;
3290 dst_reg->umax_value *= umax_val;
3291 if (dst_reg->umax_value > S64_MAX) {
3292 /* Overflow possible, we know nothing */
3293 dst_reg->smin_value = S64_MIN;
3294 dst_reg->smax_value = S64_MAX;
3295 } else {
3296 dst_reg->smin_value = dst_reg->umin_value;
3297 dst_reg->smax_value = dst_reg->umax_value;
3298 }
48461135
JB
3299 break;
3300 case BPF_AND:
f1174f77 3301 if (src_known && dst_known) {
b03c9f9f
EC
3302 __mark_reg_known(dst_reg, dst_reg->var_off.value &
3303 src_reg.var_off.value);
f1174f77
EC
3304 break;
3305 }
b03c9f9f
EC
3306 /* We get our minimum from the var_off, since that's inherently
3307 * bitwise. Our maximum is the minimum of the operands' maxima.
f23cc643 3308 */
f1174f77 3309 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
b03c9f9f
EC
3310 dst_reg->umin_value = dst_reg->var_off.value;
3311 dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
3312 if (dst_reg->smin_value < 0 || smin_val < 0) {
3313 /* Lose signed bounds when ANDing negative numbers,
3314 * ain't nobody got time for that.
3315 */
3316 dst_reg->smin_value = S64_MIN;
3317 dst_reg->smax_value = S64_MAX;
3318 } else {
3319 /* ANDing two positives gives a positive, so safe to
3320 * cast result into s64.
3321 */
3322 dst_reg->smin_value = dst_reg->umin_value;
3323 dst_reg->smax_value = dst_reg->umax_value;
3324 }
3325 /* We may learn something more from the var_off */
3326 __update_reg_bounds(dst_reg);
f1174f77
EC
3327 break;
3328 case BPF_OR:
3329 if (src_known && dst_known) {
b03c9f9f
EC
3330 __mark_reg_known(dst_reg, dst_reg->var_off.value |
3331 src_reg.var_off.value);
f1174f77
EC
3332 break;
3333 }
b03c9f9f
EC
3334 /* We get our maximum from the var_off, and our minimum is the
3335 * maximum of the operands' minima
f1174f77
EC
3336 */
3337 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
b03c9f9f
EC
3338 dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
3339 dst_reg->umax_value = dst_reg->var_off.value |
3340 dst_reg->var_off.mask;
3341 if (dst_reg->smin_value < 0 || smin_val < 0) {
3342 /* Lose signed bounds when ORing negative numbers,
3343 * ain't nobody got time for that.
3344 */
3345 dst_reg->smin_value = S64_MIN;
3346 dst_reg->smax_value = S64_MAX;
f1174f77 3347 } else {
b03c9f9f
EC
3348 /* ORing two positives gives a positive, so safe to
3349 * cast result into s64.
3350 */
3351 dst_reg->smin_value = dst_reg->umin_value;
3352 dst_reg->smax_value = dst_reg->umax_value;
f1174f77 3353 }
b03c9f9f
EC
3354 /* We may learn something more from the var_off */
3355 __update_reg_bounds(dst_reg);
48461135
JB
3356 break;
3357 case BPF_LSH:
468f6eaf
JH
3358 if (umax_val >= insn_bitness) {
3359 /* Shifts greater than 31 or 63 are undefined.
3360 * This includes shifts by a negative number.
b03c9f9f 3361 */
61bd5218 3362 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77
EC
3363 break;
3364 }
b03c9f9f
EC
3365 /* We lose all sign bit information (except what we can pick
3366 * up from var_off)
48461135 3367 */
b03c9f9f
EC
3368 dst_reg->smin_value = S64_MIN;
3369 dst_reg->smax_value = S64_MAX;
3370 /* If we might shift our top bit out, then we know nothing */
3371 if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
3372 dst_reg->umin_value = 0;
3373 dst_reg->umax_value = U64_MAX;
d1174416 3374 } else {
b03c9f9f
EC
3375 dst_reg->umin_value <<= umin_val;
3376 dst_reg->umax_value <<= umax_val;
d1174416 3377 }
afbe1a5b 3378 dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
b03c9f9f
EC
3379 /* We may learn something more from the var_off */
3380 __update_reg_bounds(dst_reg);
48461135
JB
3381 break;
3382 case BPF_RSH:
468f6eaf
JH
3383 if (umax_val >= insn_bitness) {
3384 /* Shifts greater than 31 or 63 are undefined.
3385 * This includes shifts by a negative number.
b03c9f9f 3386 */
61bd5218 3387 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77
EC
3388 break;
3389 }
4374f256
EC
3390 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
3391 * be negative, then either:
3392 * 1) src_reg might be zero, so the sign bit of the result is
3393 * unknown, so we lose our signed bounds
3394 * 2) it's known negative, thus the unsigned bounds capture the
3395 * signed bounds
3396 * 3) the signed bounds cross zero, so they tell us nothing
3397 * about the result
3398 * If the value in dst_reg is known nonnegative, then again the
3399 * unsigned bounts capture the signed bounds.
3400 * Thus, in all cases it suffices to blow away our signed bounds
3401 * and rely on inferring new ones from the unsigned bounds and
3402 * var_off of the result.
3403 */
3404 dst_reg->smin_value = S64_MIN;
3405 dst_reg->smax_value = S64_MAX;
afbe1a5b 3406 dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
b03c9f9f
EC
3407 dst_reg->umin_value >>= umax_val;
3408 dst_reg->umax_value >>= umin_val;
3409 /* We may learn something more from the var_off */
3410 __update_reg_bounds(dst_reg);
48461135 3411 break;
9cbe1f5a
YS
3412 case BPF_ARSH:
3413 if (umax_val >= insn_bitness) {
3414 /* Shifts greater than 31 or 63 are undefined.
3415 * This includes shifts by a negative number.
3416 */
3417 mark_reg_unknown(env, regs, insn->dst_reg);
3418 break;
3419 }
3420
3421 /* Upon reaching here, src_known is true and
3422 * umax_val is equal to umin_val.
3423 */
3424 dst_reg->smin_value >>= umin_val;
3425 dst_reg->smax_value >>= umin_val;
3426 dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val);
3427
3428 /* blow away the dst_reg umin_value/umax_value and rely on
3429 * dst_reg var_off to refine the result.
3430 */
3431 dst_reg->umin_value = 0;
3432 dst_reg->umax_value = U64_MAX;
3433 __update_reg_bounds(dst_reg);
3434 break;
48461135 3435 default:
61bd5218 3436 mark_reg_unknown(env, regs, insn->dst_reg);
48461135
JB
3437 break;
3438 }
3439
468f6eaf
JH
3440 if (BPF_CLASS(insn->code) != BPF_ALU64) {
3441 /* 32-bit ALU ops are (32,32)->32 */
3442 coerce_reg_to_size(dst_reg, 4);
468f6eaf
JH
3443 }
3444
b03c9f9f
EC
3445 __reg_deduce_bounds(dst_reg);
3446 __reg_bound_offset(dst_reg);
f1174f77
EC
3447 return 0;
3448}
3449
3450/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
3451 * and var_off.
3452 */
3453static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
3454 struct bpf_insn *insn)
3455{
f4d7e40a
AS
3456 struct bpf_verifier_state *vstate = env->cur_state;
3457 struct bpf_func_state *state = vstate->frame[vstate->curframe];
3458 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
f1174f77
EC
3459 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
3460 u8 opcode = BPF_OP(insn->code);
f1174f77
EC
3461
3462 dst_reg = &regs[insn->dst_reg];
f1174f77
EC
3463 src_reg = NULL;
3464 if (dst_reg->type != SCALAR_VALUE)
3465 ptr_reg = dst_reg;
3466 if (BPF_SRC(insn->code) == BPF_X) {
3467 src_reg = &regs[insn->src_reg];
f1174f77
EC
3468 if (src_reg->type != SCALAR_VALUE) {
3469 if (dst_reg->type != SCALAR_VALUE) {
3470 /* Combining two pointers by any ALU op yields
82abbf8d
AS
3471 * an arbitrary scalar. Disallow all math except
3472 * pointer subtraction
f1174f77 3473 */
dd066823 3474 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
82abbf8d
AS
3475 mark_reg_unknown(env, regs, insn->dst_reg);
3476 return 0;
f1174f77 3477 }
82abbf8d
AS
3478 verbose(env, "R%d pointer %s pointer prohibited\n",
3479 insn->dst_reg,
3480 bpf_alu_string[opcode >> 4]);
3481 return -EACCES;
f1174f77
EC
3482 } else {
3483 /* scalar += pointer
3484 * This is legal, but we have to reverse our
3485 * src/dest handling in computing the range
3486 */
82abbf8d
AS
3487 return adjust_ptr_min_max_vals(env, insn,
3488 src_reg, dst_reg);
f1174f77
EC
3489 }
3490 } else if (ptr_reg) {
3491 /* pointer += scalar */
82abbf8d
AS
3492 return adjust_ptr_min_max_vals(env, insn,
3493 dst_reg, src_reg);
f1174f77
EC
3494 }
3495 } else {
3496 /* Pretend the src is a reg with a known value, since we only
3497 * need to be able to read from this state.
3498 */
3499 off_reg.type = SCALAR_VALUE;
b03c9f9f 3500 __mark_reg_known(&off_reg, insn->imm);
f1174f77 3501 src_reg = &off_reg;
82abbf8d
AS
3502 if (ptr_reg) /* pointer += K */
3503 return adjust_ptr_min_max_vals(env, insn,
3504 ptr_reg, src_reg);
f1174f77
EC
3505 }
3506
3507 /* Got here implies adding two SCALAR_VALUEs */
3508 if (WARN_ON_ONCE(ptr_reg)) {
f4d7e40a 3509 print_verifier_state(env, state);
61bd5218 3510 verbose(env, "verifier internal error: unexpected ptr_reg\n");
f1174f77
EC
3511 return -EINVAL;
3512 }
3513 if (WARN_ON(!src_reg)) {
f4d7e40a 3514 print_verifier_state(env, state);
61bd5218 3515 verbose(env, "verifier internal error: no src_reg\n");
f1174f77
EC
3516 return -EINVAL;
3517 }
3518 return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
48461135
JB
3519}
3520
17a52670 3521/* check validity of 32-bit and 64-bit arithmetic operations */
58e2af8b 3522static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
17a52670 3523{
638f5b90 3524 struct bpf_reg_state *regs = cur_regs(env);
17a52670
AS
3525 u8 opcode = BPF_OP(insn->code);
3526 int err;
3527
3528 if (opcode == BPF_END || opcode == BPF_NEG) {
3529 if (opcode == BPF_NEG) {
3530 if (BPF_SRC(insn->code) != 0 ||
3531 insn->src_reg != BPF_REG_0 ||
3532 insn->off != 0 || insn->imm != 0) {
61bd5218 3533 verbose(env, "BPF_NEG uses reserved fields\n");
17a52670
AS
3534 return -EINVAL;
3535 }
3536 } else {
3537 if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
e67b8a68
EC
3538 (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
3539 BPF_CLASS(insn->code) == BPF_ALU64) {
61bd5218 3540 verbose(env, "BPF_END uses reserved fields\n");
17a52670
AS
3541 return -EINVAL;
3542 }
3543 }
3544
3545 /* check src operand */
dc503a8a 3546 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
3547 if (err)
3548 return err;
3549
1be7f75d 3550 if (is_pointer_value(env, insn->dst_reg)) {
61bd5218 3551 verbose(env, "R%d pointer arithmetic prohibited\n",
1be7f75d
AS
3552 insn->dst_reg);
3553 return -EACCES;
3554 }
3555
17a52670 3556 /* check dest operand */
dc503a8a 3557 err = check_reg_arg(env, insn->dst_reg, DST_OP);
17a52670
AS
3558 if (err)
3559 return err;
3560
3561 } else if (opcode == BPF_MOV) {
3562
3563 if (BPF_SRC(insn->code) == BPF_X) {
3564 if (insn->imm != 0 || insn->off != 0) {
61bd5218 3565 verbose(env, "BPF_MOV uses reserved fields\n");
17a52670
AS
3566 return -EINVAL;
3567 }
3568
3569 /* check src operand */
dc503a8a 3570 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
3571 if (err)
3572 return err;
3573 } else {
3574 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
61bd5218 3575 verbose(env, "BPF_MOV uses reserved fields\n");
17a52670
AS
3576 return -EINVAL;
3577 }
3578 }
3579
fbeb1603
AF
3580 /* check dest operand, mark as required later */
3581 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
17a52670
AS
3582 if (err)
3583 return err;
3584
3585 if (BPF_SRC(insn->code) == BPF_X) {
3586 if (BPF_CLASS(insn->code) == BPF_ALU64) {
3587 /* case: R1 = R2
3588 * copy register state to dest reg
3589 */
3590 regs[insn->dst_reg] = regs[insn->src_reg];
8fe2d6cc 3591 regs[insn->dst_reg].live |= REG_LIVE_WRITTEN;
17a52670 3592 } else {
f1174f77 3593 /* R1 = (u32) R2 */
1be7f75d 3594 if (is_pointer_value(env, insn->src_reg)) {
61bd5218
JK
3595 verbose(env,
3596 "R%d partial copy of pointer\n",
1be7f75d
AS
3597 insn->src_reg);
3598 return -EACCES;
3599 }
61bd5218 3600 mark_reg_unknown(env, regs, insn->dst_reg);
0c17d1d2 3601 coerce_reg_to_size(&regs[insn->dst_reg], 4);
17a52670
AS
3602 }
3603 } else {
3604 /* case: R = imm
3605 * remember the value we stored into this reg
3606 */
fbeb1603
AF
3607 /* clear any state __mark_reg_known doesn't set */
3608 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77 3609 regs[insn->dst_reg].type = SCALAR_VALUE;
95a762e2
JH
3610 if (BPF_CLASS(insn->code) == BPF_ALU64) {
3611 __mark_reg_known(regs + insn->dst_reg,
3612 insn->imm);
3613 } else {
3614 __mark_reg_known(regs + insn->dst_reg,
3615 (u32)insn->imm);
3616 }
17a52670
AS
3617 }
3618
3619 } else if (opcode > BPF_END) {
61bd5218 3620 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
17a52670
AS
3621 return -EINVAL;
3622
3623 } else { /* all other ALU ops: and, sub, xor, add, ... */
3624
17a52670
AS
3625 if (BPF_SRC(insn->code) == BPF_X) {
3626 if (insn->imm != 0 || insn->off != 0) {
61bd5218 3627 verbose(env, "BPF_ALU uses reserved fields\n");
17a52670
AS
3628 return -EINVAL;
3629 }
3630 /* check src1 operand */
dc503a8a 3631 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
3632 if (err)
3633 return err;
3634 } else {
3635 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
61bd5218 3636 verbose(env, "BPF_ALU uses reserved fields\n");
17a52670
AS
3637 return -EINVAL;
3638 }
3639 }
3640
3641 /* check src2 operand */
dc503a8a 3642 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
3643 if (err)
3644 return err;
3645
3646 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
3647 BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
61bd5218 3648 verbose(env, "div by zero\n");
17a52670
AS
3649 return -EINVAL;
3650 }
3651
229394e8
RV
3652 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
3653 opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
3654 int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
3655
3656 if (insn->imm < 0 || insn->imm >= size) {
61bd5218 3657 verbose(env, "invalid shift %d\n", insn->imm);
229394e8
RV
3658 return -EINVAL;
3659 }
3660 }
3661
1a0dc1ac 3662 /* check dest operand */
dc503a8a 3663 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
1a0dc1ac
AS
3664 if (err)
3665 return err;
3666
f1174f77 3667 return adjust_reg_min_max_vals(env, insn);
17a52670
AS
3668 }
3669
3670 return 0;
3671}
3672
f4d7e40a 3673static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
de8f3a83 3674 struct bpf_reg_state *dst_reg,
f8ddadc4 3675 enum bpf_reg_type type,
fb2a311a 3676 bool range_right_open)
969bf05e 3677{
f4d7e40a 3678 struct bpf_func_state *state = vstate->frame[vstate->curframe];
58e2af8b 3679 struct bpf_reg_state *regs = state->regs, *reg;
fb2a311a 3680 u16 new_range;
f4d7e40a 3681 int i, j;
2d2be8ca 3682
fb2a311a
DB
3683 if (dst_reg->off < 0 ||
3684 (dst_reg->off == 0 && range_right_open))
f1174f77
EC
3685 /* This doesn't give us any range */
3686 return;
3687
b03c9f9f
EC
3688 if (dst_reg->umax_value > MAX_PACKET_OFF ||
3689 dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
f1174f77
EC
3690 /* Risk of overflow. For instance, ptr + (1<<63) may be less
3691 * than pkt_end, but that's because it's also less than pkt.
3692 */
3693 return;
3694
fb2a311a
DB
3695 new_range = dst_reg->off;
3696 if (range_right_open)
3697 new_range--;
3698
3699 /* Examples for register markings:
2d2be8ca 3700 *
fb2a311a 3701 * pkt_data in dst register:
2d2be8ca
DB
3702 *
3703 * r2 = r3;
3704 * r2 += 8;
3705 * if (r2 > pkt_end) goto <handle exception>
3706 * <access okay>
3707 *
b4e432f1
DB
3708 * r2 = r3;
3709 * r2 += 8;
3710 * if (r2 < pkt_end) goto <access okay>
3711 * <handle exception>
3712 *
2d2be8ca
DB
3713 * Where:
3714 * r2 == dst_reg, pkt_end == src_reg
3715 * r2=pkt(id=n,off=8,r=0)
3716 * r3=pkt(id=n,off=0,r=0)
3717 *
fb2a311a 3718 * pkt_data in src register:
2d2be8ca
DB
3719 *
3720 * r2 = r3;
3721 * r2 += 8;
3722 * if (pkt_end >= r2) goto <access okay>
3723 * <handle exception>
3724 *
b4e432f1
DB
3725 * r2 = r3;
3726 * r2 += 8;
3727 * if (pkt_end <= r2) goto <handle exception>
3728 * <access okay>
3729 *
2d2be8ca
DB
3730 * Where:
3731 * pkt_end == dst_reg, r2 == src_reg
3732 * r2=pkt(id=n,off=8,r=0)
3733 * r3=pkt(id=n,off=0,r=0)
3734 *
3735 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
fb2a311a
DB
3736 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
3737 * and [r3, r3 + 8-1) respectively is safe to access depending on
3738 * the check.
969bf05e 3739 */
2d2be8ca 3740
f1174f77
EC
3741 /* If our ids match, then we must have the same max_value. And we
3742 * don't care about the other reg's fixed offset, since if it's too big
3743 * the range won't allow anything.
3744 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
3745 */
969bf05e 3746 for (i = 0; i < MAX_BPF_REG; i++)
de8f3a83 3747 if (regs[i].type == type && regs[i].id == dst_reg->id)
b1977682 3748 /* keep the maximum range already checked */
fb2a311a 3749 regs[i].range = max(regs[i].range, new_range);
969bf05e 3750
f4d7e40a
AS
3751 for (j = 0; j <= vstate->curframe; j++) {
3752 state = vstate->frame[j];
f3709f69
JS
3753 bpf_for_each_spilled_reg(i, state, reg) {
3754 if (!reg)
f4d7e40a 3755 continue;
f4d7e40a
AS
3756 if (reg->type == type && reg->id == dst_reg->id)
3757 reg->range = max(reg->range, new_range);
3758 }
969bf05e
AS
3759 }
3760}
3761
48461135
JB
3762/* Adjusts the register min/max values in the case that the dst_reg is the
3763 * variable register that we are working on, and src_reg is a constant or we're
3764 * simply doing a BPF_K check.
f1174f77 3765 * In JEQ/JNE cases we also adjust the var_off values.
48461135
JB
3766 */
3767static void reg_set_min_max(struct bpf_reg_state *true_reg,
3768 struct bpf_reg_state *false_reg, u64 val,
3769 u8 opcode)
3770{
f1174f77
EC
3771 /* If the dst_reg is a pointer, we can't learn anything about its
3772 * variable offset from the compare (unless src_reg were a pointer into
3773 * the same object, but we don't bother with that.
3774 * Since false_reg and true_reg have the same type by construction, we
3775 * only need to check one of them for pointerness.
3776 */
3777 if (__is_pointer_value(false, false_reg))
3778 return;
4cabc5b1 3779
48461135
JB
3780 switch (opcode) {
3781 case BPF_JEQ:
3782 /* If this is false then we know nothing Jon Snow, but if it is
3783 * true then we know for sure.
3784 */
b03c9f9f 3785 __mark_reg_known(true_reg, val);
48461135
JB
3786 break;
3787 case BPF_JNE:
3788 /* If this is true we know nothing Jon Snow, but if it is false
3789 * we know the value for sure;
3790 */
b03c9f9f 3791 __mark_reg_known(false_reg, val);
48461135
JB
3792 break;
3793 case BPF_JGT:
b03c9f9f
EC
3794 false_reg->umax_value = min(false_reg->umax_value, val);
3795 true_reg->umin_value = max(true_reg->umin_value, val + 1);
3796 break;
48461135 3797 case BPF_JSGT:
b03c9f9f
EC
3798 false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
3799 true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
48461135 3800 break;
b4e432f1
DB
3801 case BPF_JLT:
3802 false_reg->umin_value = max(false_reg->umin_value, val);
3803 true_reg->umax_value = min(true_reg->umax_value, val - 1);
3804 break;
3805 case BPF_JSLT:
3806 false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
3807 true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
3808 break;
48461135 3809 case BPF_JGE:
b03c9f9f
EC
3810 false_reg->umax_value = min(false_reg->umax_value, val - 1);
3811 true_reg->umin_value = max(true_reg->umin_value, val);
3812 break;
48461135 3813 case BPF_JSGE:
b03c9f9f
EC
3814 false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
3815 true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
48461135 3816 break;
b4e432f1
DB
3817 case BPF_JLE:
3818 false_reg->umin_value = max(false_reg->umin_value, val + 1);
3819 true_reg->umax_value = min(true_reg->umax_value, val);
3820 break;
3821 case BPF_JSLE:
3822 false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
3823 true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
3824 break;
48461135
JB
3825 default:
3826 break;
3827 }
3828
b03c9f9f
EC
3829 __reg_deduce_bounds(false_reg);
3830 __reg_deduce_bounds(true_reg);
3831 /* We might have learned some bits from the bounds. */
3832 __reg_bound_offset(false_reg);
3833 __reg_bound_offset(true_reg);
3834 /* Intersecting with the old var_off might have improved our bounds
3835 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
3836 * then new var_off is (0; 0x7f...fc) which improves our umax.
3837 */
3838 __update_reg_bounds(false_reg);
3839 __update_reg_bounds(true_reg);
48461135
JB
3840}
3841
f1174f77
EC
3842/* Same as above, but for the case that dst_reg holds a constant and src_reg is
3843 * the variable reg.
48461135
JB
3844 */
3845static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
3846 struct bpf_reg_state *false_reg, u64 val,
3847 u8 opcode)
3848{
f1174f77
EC
3849 if (__is_pointer_value(false, false_reg))
3850 return;
4cabc5b1 3851
48461135
JB
3852 switch (opcode) {
3853 case BPF_JEQ:
3854 /* If this is false then we know nothing Jon Snow, but if it is
3855 * true then we know for sure.
3856 */
b03c9f9f 3857 __mark_reg_known(true_reg, val);
48461135
JB
3858 break;
3859 case BPF_JNE:
3860 /* If this is true we know nothing Jon Snow, but if it is false
3861 * we know the value for sure;
3862 */
b03c9f9f 3863 __mark_reg_known(false_reg, val);
48461135
JB
3864 break;
3865 case BPF_JGT:
b03c9f9f
EC
3866 true_reg->umax_value = min(true_reg->umax_value, val - 1);
3867 false_reg->umin_value = max(false_reg->umin_value, val);
3868 break;
48461135 3869 case BPF_JSGT:
b03c9f9f
EC
3870 true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
3871 false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
48461135 3872 break;
b4e432f1
DB
3873 case BPF_JLT:
3874 true_reg->umin_value = max(true_reg->umin_value, val + 1);
3875 false_reg->umax_value = min(false_reg->umax_value, val);
3876 break;
3877 case BPF_JSLT:
3878 true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
3879 false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
3880 break;
48461135 3881 case BPF_JGE:
b03c9f9f
EC
3882 true_reg->umax_value = min(true_reg->umax_value, val);
3883 false_reg->umin_value = max(false_reg->umin_value, val + 1);
3884 break;
48461135 3885 case BPF_JSGE:
b03c9f9f
EC
3886 true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
3887 false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
48461135 3888 break;
b4e432f1
DB
3889 case BPF_JLE:
3890 true_reg->umin_value = max(true_reg->umin_value, val);
3891 false_reg->umax_value = min(false_reg->umax_value, val - 1);
3892 break;
3893 case BPF_JSLE:
3894 true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
3895 false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
3896 break;
48461135
JB
3897 default:
3898 break;
3899 }
3900
b03c9f9f
EC
3901 __reg_deduce_bounds(false_reg);
3902 __reg_deduce_bounds(true_reg);
3903 /* We might have learned some bits from the bounds. */
3904 __reg_bound_offset(false_reg);
3905 __reg_bound_offset(true_reg);
3906 /* Intersecting with the old var_off might have improved our bounds
3907 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
3908 * then new var_off is (0; 0x7f...fc) which improves our umax.
3909 */
3910 __update_reg_bounds(false_reg);
3911 __update_reg_bounds(true_reg);
f1174f77
EC
3912}
3913
3914/* Regs are known to be equal, so intersect their min/max/var_off */
3915static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
3916 struct bpf_reg_state *dst_reg)
3917{
b03c9f9f
EC
3918 src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
3919 dst_reg->umin_value);
3920 src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
3921 dst_reg->umax_value);
3922 src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
3923 dst_reg->smin_value);
3924 src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
3925 dst_reg->smax_value);
f1174f77
EC
3926 src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
3927 dst_reg->var_off);
b03c9f9f
EC
3928 /* We might have learned new bounds from the var_off. */
3929 __update_reg_bounds(src_reg);
3930 __update_reg_bounds(dst_reg);
3931 /* We might have learned something about the sign bit. */
3932 __reg_deduce_bounds(src_reg);
3933 __reg_deduce_bounds(dst_reg);
3934 /* We might have learned some bits from the bounds. */
3935 __reg_bound_offset(src_reg);
3936 __reg_bound_offset(dst_reg);
3937 /* Intersecting with the old var_off might have improved our bounds
3938 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
3939 * then new var_off is (0; 0x7f...fc) which improves our umax.
3940 */
3941 __update_reg_bounds(src_reg);
3942 __update_reg_bounds(dst_reg);
f1174f77
EC
3943}
3944
3945static void reg_combine_min_max(struct bpf_reg_state *true_src,
3946 struct bpf_reg_state *true_dst,
3947 struct bpf_reg_state *false_src,
3948 struct bpf_reg_state *false_dst,
3949 u8 opcode)
3950{
3951 switch (opcode) {
3952 case BPF_JEQ:
3953 __reg_combine_min_max(true_src, true_dst);
3954 break;
3955 case BPF_JNE:
3956 __reg_combine_min_max(false_src, false_dst);
b03c9f9f 3957 break;
4cabc5b1 3958 }
48461135
JB
3959}
3960
fd978bf7
JS
3961static void mark_ptr_or_null_reg(struct bpf_func_state *state,
3962 struct bpf_reg_state *reg, u32 id,
840b9615 3963 bool is_null)
57a09bf0 3964{
840b9615 3965 if (reg_type_may_be_null(reg->type) && reg->id == id) {
f1174f77
EC
3966 /* Old offset (both fixed and variable parts) should
3967 * have been known-zero, because we don't allow pointer
3968 * arithmetic on pointers that might be NULL.
3969 */
b03c9f9f
EC
3970 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
3971 !tnum_equals_const(reg->var_off, 0) ||
f1174f77 3972 reg->off)) {
b03c9f9f
EC
3973 __mark_reg_known_zero(reg);
3974 reg->off = 0;
f1174f77
EC
3975 }
3976 if (is_null) {
3977 reg->type = SCALAR_VALUE;
840b9615
JS
3978 } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
3979 if (reg->map_ptr->inner_map_meta) {
3980 reg->type = CONST_PTR_TO_MAP;
3981 reg->map_ptr = reg->map_ptr->inner_map_meta;
3982 } else {
3983 reg->type = PTR_TO_MAP_VALUE;
3984 }
c64b7983
JS
3985 } else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
3986 reg->type = PTR_TO_SOCKET;
56f668df 3987 }
fd978bf7
JS
3988 if (is_null || !reg_is_refcounted(reg)) {
3989 /* We don't need id from this point onwards anymore,
3990 * thus we should better reset it, so that state
3991 * pruning has chances to take effect.
3992 */
3993 reg->id = 0;
56f668df 3994 }
57a09bf0
TG
3995 }
3996}
3997
3998/* The logic is similar to find_good_pkt_pointers(), both could eventually
3999 * be folded together at some point.
4000 */
840b9615
JS
4001static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
4002 bool is_null)
57a09bf0 4003{
f4d7e40a 4004 struct bpf_func_state *state = vstate->frame[vstate->curframe];
f3709f69 4005 struct bpf_reg_state *reg, *regs = state->regs;
a08dd0da 4006 u32 id = regs[regno].id;
f4d7e40a 4007 int i, j;
57a09bf0 4008
fd978bf7
JS
4009 if (reg_is_refcounted_or_null(&regs[regno]) && is_null)
4010 __release_reference_state(state, id);
4011
57a09bf0 4012 for (i = 0; i < MAX_BPF_REG; i++)
fd978bf7 4013 mark_ptr_or_null_reg(state, &regs[i], id, is_null);
57a09bf0 4014
f4d7e40a
AS
4015 for (j = 0; j <= vstate->curframe; j++) {
4016 state = vstate->frame[j];
f3709f69
JS
4017 bpf_for_each_spilled_reg(i, state, reg) {
4018 if (!reg)
f4d7e40a 4019 continue;
fd978bf7 4020 mark_ptr_or_null_reg(state, reg, id, is_null);
f4d7e40a 4021 }
57a09bf0
TG
4022 }
4023}
4024
5beca081
DB
4025static bool try_match_pkt_pointers(const struct bpf_insn *insn,
4026 struct bpf_reg_state *dst_reg,
4027 struct bpf_reg_state *src_reg,
4028 struct bpf_verifier_state *this_branch,
4029 struct bpf_verifier_state *other_branch)
4030{
4031 if (BPF_SRC(insn->code) != BPF_X)
4032 return false;
4033
4034 switch (BPF_OP(insn->code)) {
4035 case BPF_JGT:
4036 if ((dst_reg->type == PTR_TO_PACKET &&
4037 src_reg->type == PTR_TO_PACKET_END) ||
4038 (dst_reg->type == PTR_TO_PACKET_META &&
4039 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4040 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
4041 find_good_pkt_pointers(this_branch, dst_reg,
4042 dst_reg->type, false);
4043 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4044 src_reg->type == PTR_TO_PACKET) ||
4045 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4046 src_reg->type == PTR_TO_PACKET_META)) {
4047 /* pkt_end > pkt_data', pkt_data > pkt_meta' */
4048 find_good_pkt_pointers(other_branch, src_reg,
4049 src_reg->type, true);
4050 } else {
4051 return false;
4052 }
4053 break;
4054 case BPF_JLT:
4055 if ((dst_reg->type == PTR_TO_PACKET &&
4056 src_reg->type == PTR_TO_PACKET_END) ||
4057 (dst_reg->type == PTR_TO_PACKET_META &&
4058 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4059 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
4060 find_good_pkt_pointers(other_branch, dst_reg,
4061 dst_reg->type, true);
4062 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4063 src_reg->type == PTR_TO_PACKET) ||
4064 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4065 src_reg->type == PTR_TO_PACKET_META)) {
4066 /* pkt_end < pkt_data', pkt_data > pkt_meta' */
4067 find_good_pkt_pointers(this_branch, src_reg,
4068 src_reg->type, false);
4069 } else {
4070 return false;
4071 }
4072 break;
4073 case BPF_JGE:
4074 if ((dst_reg->type == PTR_TO_PACKET &&
4075 src_reg->type == PTR_TO_PACKET_END) ||
4076 (dst_reg->type == PTR_TO_PACKET_META &&
4077 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4078 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
4079 find_good_pkt_pointers(this_branch, dst_reg,
4080 dst_reg->type, true);
4081 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4082 src_reg->type == PTR_TO_PACKET) ||
4083 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4084 src_reg->type == PTR_TO_PACKET_META)) {
4085 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
4086 find_good_pkt_pointers(other_branch, src_reg,
4087 src_reg->type, false);
4088 } else {
4089 return false;
4090 }
4091 break;
4092 case BPF_JLE:
4093 if ((dst_reg->type == PTR_TO_PACKET &&
4094 src_reg->type == PTR_TO_PACKET_END) ||
4095 (dst_reg->type == PTR_TO_PACKET_META &&
4096 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4097 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
4098 find_good_pkt_pointers(other_branch, dst_reg,
4099 dst_reg->type, false);
4100 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4101 src_reg->type == PTR_TO_PACKET) ||
4102 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4103 src_reg->type == PTR_TO_PACKET_META)) {
4104 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
4105 find_good_pkt_pointers(this_branch, src_reg,
4106 src_reg->type, true);
4107 } else {
4108 return false;
4109 }
4110 break;
4111 default:
4112 return false;
4113 }
4114
4115 return true;
4116}
4117
58e2af8b 4118static int check_cond_jmp_op(struct bpf_verifier_env *env,
17a52670
AS
4119 struct bpf_insn *insn, int *insn_idx)
4120{
f4d7e40a
AS
4121 struct bpf_verifier_state *this_branch = env->cur_state;
4122 struct bpf_verifier_state *other_branch;
4123 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
4124 struct bpf_reg_state *dst_reg, *other_branch_regs;
17a52670
AS
4125 u8 opcode = BPF_OP(insn->code);
4126 int err;
4127
b4e432f1 4128 if (opcode > BPF_JSLE) {
61bd5218 4129 verbose(env, "invalid BPF_JMP opcode %x\n", opcode);
17a52670
AS
4130 return -EINVAL;
4131 }
4132
4133 if (BPF_SRC(insn->code) == BPF_X) {
4134 if (insn->imm != 0) {
61bd5218 4135 verbose(env, "BPF_JMP uses reserved fields\n");
17a52670
AS
4136 return -EINVAL;
4137 }
4138
4139 /* check src1 operand */
dc503a8a 4140 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
4141 if (err)
4142 return err;
1be7f75d
AS
4143
4144 if (is_pointer_value(env, insn->src_reg)) {
61bd5218 4145 verbose(env, "R%d pointer comparison prohibited\n",
1be7f75d
AS
4146 insn->src_reg);
4147 return -EACCES;
4148 }
17a52670
AS
4149 } else {
4150 if (insn->src_reg != BPF_REG_0) {
61bd5218 4151 verbose(env, "BPF_JMP uses reserved fields\n");
17a52670
AS
4152 return -EINVAL;
4153 }
4154 }
4155
4156 /* check src2 operand */
dc503a8a 4157 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
4158 if (err)
4159 return err;
4160
1a0dc1ac
AS
4161 dst_reg = &regs[insn->dst_reg];
4162
17a52670
AS
4163 /* detect if R == 0 where R was initialized to zero earlier */
4164 if (BPF_SRC(insn->code) == BPF_K &&
4165 (opcode == BPF_JEQ || opcode == BPF_JNE) &&
f1174f77 4166 dst_reg->type == SCALAR_VALUE &&
3bf15921
AS
4167 tnum_is_const(dst_reg->var_off)) {
4168 if ((opcode == BPF_JEQ && dst_reg->var_off.value == insn->imm) ||
4169 (opcode == BPF_JNE && dst_reg->var_off.value != insn->imm)) {
17a52670
AS
4170 /* if (imm == imm) goto pc+off;
4171 * only follow the goto, ignore fall-through
4172 */
4173 *insn_idx += insn->off;
4174 return 0;
4175 } else {
4176 /* if (imm != imm) goto pc+off;
4177 * only follow fall-through branch, since
4178 * that's where the program will go
4179 */
4180 return 0;
4181 }
4182 }
4183
4184 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
4185 if (!other_branch)
4186 return -EFAULT;
f4d7e40a 4187 other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
17a52670 4188
48461135
JB
4189 /* detect if we are comparing against a constant value so we can adjust
4190 * our min/max values for our dst register.
f1174f77
EC
4191 * this is only legit if both are scalars (or pointers to the same
4192 * object, I suppose, but we don't support that right now), because
4193 * otherwise the different base pointers mean the offsets aren't
4194 * comparable.
48461135
JB
4195 */
4196 if (BPF_SRC(insn->code) == BPF_X) {
f1174f77
EC
4197 if (dst_reg->type == SCALAR_VALUE &&
4198 regs[insn->src_reg].type == SCALAR_VALUE) {
4199 if (tnum_is_const(regs[insn->src_reg].var_off))
f4d7e40a 4200 reg_set_min_max(&other_branch_regs[insn->dst_reg],
f1174f77
EC
4201 dst_reg, regs[insn->src_reg].var_off.value,
4202 opcode);
4203 else if (tnum_is_const(dst_reg->var_off))
f4d7e40a 4204 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
f1174f77
EC
4205 &regs[insn->src_reg],
4206 dst_reg->var_off.value, opcode);
4207 else if (opcode == BPF_JEQ || opcode == BPF_JNE)
4208 /* Comparing for equality, we can combine knowledge */
f4d7e40a
AS
4209 reg_combine_min_max(&other_branch_regs[insn->src_reg],
4210 &other_branch_regs[insn->dst_reg],
f1174f77
EC
4211 &regs[insn->src_reg],
4212 &regs[insn->dst_reg], opcode);
4213 }
4214 } else if (dst_reg->type == SCALAR_VALUE) {
f4d7e40a 4215 reg_set_min_max(&other_branch_regs[insn->dst_reg],
48461135
JB
4216 dst_reg, insn->imm, opcode);
4217 }
4218
58e2af8b 4219 /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
17a52670 4220 if (BPF_SRC(insn->code) == BPF_K &&
1a0dc1ac 4221 insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
840b9615
JS
4222 reg_type_may_be_null(dst_reg->type)) {
4223 /* Mark all identical registers in each branch as either
57a09bf0
TG
4224 * safe or unknown depending R == 0 or R != 0 conditional.
4225 */
840b9615
JS
4226 mark_ptr_or_null_regs(this_branch, insn->dst_reg,
4227 opcode == BPF_JNE);
4228 mark_ptr_or_null_regs(other_branch, insn->dst_reg,
4229 opcode == BPF_JEQ);
5beca081
DB
4230 } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
4231 this_branch, other_branch) &&
4232 is_pointer_value(env, insn->dst_reg)) {
61bd5218
JK
4233 verbose(env, "R%d pointer comparison prohibited\n",
4234 insn->dst_reg);
1be7f75d 4235 return -EACCES;
17a52670 4236 }
61bd5218 4237 if (env->log.level)
f4d7e40a 4238 print_verifier_state(env, this_branch->frame[this_branch->curframe]);
17a52670
AS
4239 return 0;
4240}
4241
0246e64d
AS
4242/* return the map pointer stored inside BPF_LD_IMM64 instruction */
4243static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
4244{
4245 u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
4246
4247 return (struct bpf_map *) (unsigned long) imm64;
4248}
4249
17a52670 4250/* verify BPF_LD_IMM64 instruction */
58e2af8b 4251static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
17a52670 4252{
638f5b90 4253 struct bpf_reg_state *regs = cur_regs(env);
17a52670
AS
4254 int err;
4255
4256 if (BPF_SIZE(insn->code) != BPF_DW) {
61bd5218 4257 verbose(env, "invalid BPF_LD_IMM insn\n");
17a52670
AS
4258 return -EINVAL;
4259 }
4260 if (insn->off != 0) {
61bd5218 4261 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
17a52670
AS
4262 return -EINVAL;
4263 }
4264
dc503a8a 4265 err = check_reg_arg(env, insn->dst_reg, DST_OP);
17a52670
AS
4266 if (err)
4267 return err;
4268
6b173873 4269 if (insn->src_reg == 0) {
6b173873
JK
4270 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
4271
f1174f77 4272 regs[insn->dst_reg].type = SCALAR_VALUE;
b03c9f9f 4273 __mark_reg_known(&regs[insn->dst_reg], imm);
17a52670 4274 return 0;
6b173873 4275 }
17a52670
AS
4276
4277 /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
4278 BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
4279
4280 regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
4281 regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
4282 return 0;
4283}
4284
96be4325
DB
4285static bool may_access_skb(enum bpf_prog_type type)
4286{
4287 switch (type) {
4288 case BPF_PROG_TYPE_SOCKET_FILTER:
4289 case BPF_PROG_TYPE_SCHED_CLS:
94caee8c 4290 case BPF_PROG_TYPE_SCHED_ACT:
96be4325
DB
4291 return true;
4292 default:
4293 return false;
4294 }
4295}
4296
ddd872bc
AS
4297/* verify safety of LD_ABS|LD_IND instructions:
4298 * - they can only appear in the programs where ctx == skb
4299 * - since they are wrappers of function calls, they scratch R1-R5 registers,
4300 * preserve R6-R9, and store return value into R0
4301 *
4302 * Implicit input:
4303 * ctx == skb == R6 == CTX
4304 *
4305 * Explicit input:
4306 * SRC == any register
4307 * IMM == 32-bit immediate
4308 *
4309 * Output:
4310 * R0 - 8/16/32-bit skb data converted to cpu endianness
4311 */
58e2af8b 4312static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
ddd872bc 4313{
638f5b90 4314 struct bpf_reg_state *regs = cur_regs(env);
ddd872bc 4315 u8 mode = BPF_MODE(insn->code);
ddd872bc
AS
4316 int i, err;
4317
24701ece 4318 if (!may_access_skb(env->prog->type)) {
61bd5218 4319 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
ddd872bc
AS
4320 return -EINVAL;
4321 }
4322
e0cea7ce
DB
4323 if (!env->ops->gen_ld_abs) {
4324 verbose(env, "bpf verifier is misconfigured\n");
4325 return -EINVAL;
4326 }
4327
f910cefa 4328 if (env->subprog_cnt > 1) {
f4d7e40a
AS
4329 /* when program has LD_ABS insn JITs and interpreter assume
4330 * that r1 == ctx == skb which is not the case for callees
4331 * that can have arbitrary arguments. It's problematic
4332 * for main prog as well since JITs would need to analyze
4333 * all functions in order to make proper register save/restore
4334 * decisions in the main prog. Hence disallow LD_ABS with calls
4335 */
4336 verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
4337 return -EINVAL;
4338 }
4339
ddd872bc 4340 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
d82bccc6 4341 BPF_SIZE(insn->code) == BPF_DW ||
ddd872bc 4342 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
61bd5218 4343 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
ddd872bc
AS
4344 return -EINVAL;
4345 }
4346
4347 /* check whether implicit source operand (register R6) is readable */
dc503a8a 4348 err = check_reg_arg(env, BPF_REG_6, SRC_OP);
ddd872bc
AS
4349 if (err)
4350 return err;
4351
fd978bf7
JS
4352 /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
4353 * gen_ld_abs() may terminate the program at runtime, leading to
4354 * reference leak.
4355 */
4356 err = check_reference_leak(env);
4357 if (err) {
4358 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
4359 return err;
4360 }
4361
ddd872bc 4362 if (regs[BPF_REG_6].type != PTR_TO_CTX) {
61bd5218
JK
4363 verbose(env,
4364 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
ddd872bc
AS
4365 return -EINVAL;
4366 }
4367
4368 if (mode == BPF_IND) {
4369 /* check explicit source operand */
dc503a8a 4370 err = check_reg_arg(env, insn->src_reg, SRC_OP);
ddd872bc
AS
4371 if (err)
4372 return err;
4373 }
4374
4375 /* reset caller saved regs to unreadable */
dc503a8a 4376 for (i = 0; i < CALLER_SAVED_REGS; i++) {
61bd5218 4377 mark_reg_not_init(env, regs, caller_saved[i]);
dc503a8a
EC
4378 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
4379 }
ddd872bc
AS
4380
4381 /* mark destination R0 register as readable, since it contains
dc503a8a
EC
4382 * the value fetched from the packet.
4383 * Already marked as written above.
ddd872bc 4384 */
61bd5218 4385 mark_reg_unknown(env, regs, BPF_REG_0);
ddd872bc
AS
4386 return 0;
4387}
4388
390ee7e2
AS
4389static int check_return_code(struct bpf_verifier_env *env)
4390{
4391 struct bpf_reg_state *reg;
4392 struct tnum range = tnum_range(0, 1);
4393
4394 switch (env->prog->type) {
4395 case BPF_PROG_TYPE_CGROUP_SKB:
4396 case BPF_PROG_TYPE_CGROUP_SOCK:
4fbac77d 4397 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
390ee7e2 4398 case BPF_PROG_TYPE_SOCK_OPS:
ebc614f6 4399 case BPF_PROG_TYPE_CGROUP_DEVICE:
390ee7e2
AS
4400 break;
4401 default:
4402 return 0;
4403 }
4404
638f5b90 4405 reg = cur_regs(env) + BPF_REG_0;
390ee7e2 4406 if (reg->type != SCALAR_VALUE) {
61bd5218 4407 verbose(env, "At program exit the register R0 is not a known value (%s)\n",
390ee7e2
AS
4408 reg_type_str[reg->type]);
4409 return -EINVAL;
4410 }
4411
4412 if (!tnum_in(range, reg->var_off)) {
61bd5218 4413 verbose(env, "At program exit the register R0 ");
390ee7e2
AS
4414 if (!tnum_is_unknown(reg->var_off)) {
4415 char tn_buf[48];
4416
4417 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 4418 verbose(env, "has value %s", tn_buf);
390ee7e2 4419 } else {
61bd5218 4420 verbose(env, "has unknown scalar value");
390ee7e2 4421 }
61bd5218 4422 verbose(env, " should have been 0 or 1\n");
390ee7e2
AS
4423 return -EINVAL;
4424 }
4425 return 0;
4426}
4427
475fb78f
AS
4428/* non-recursive DFS pseudo code
4429 * 1 procedure DFS-iterative(G,v):
4430 * 2 label v as discovered
4431 * 3 let S be a stack
4432 * 4 S.push(v)
4433 * 5 while S is not empty
4434 * 6 t <- S.pop()
4435 * 7 if t is what we're looking for:
4436 * 8 return t
4437 * 9 for all edges e in G.adjacentEdges(t) do
4438 * 10 if edge e is already labelled
4439 * 11 continue with the next edge
4440 * 12 w <- G.adjacentVertex(t,e)
4441 * 13 if vertex w is not discovered and not explored
4442 * 14 label e as tree-edge
4443 * 15 label w as discovered
4444 * 16 S.push(w)
4445 * 17 continue at 5
4446 * 18 else if vertex w is discovered
4447 * 19 label e as back-edge
4448 * 20 else
4449 * 21 // vertex w is explored
4450 * 22 label e as forward- or cross-edge
4451 * 23 label t as explored
4452 * 24 S.pop()
4453 *
4454 * convention:
4455 * 0x10 - discovered
4456 * 0x11 - discovered and fall-through edge labelled
4457 * 0x12 - discovered and fall-through and branch edges labelled
4458 * 0x20 - explored
4459 */
4460
4461enum {
4462 DISCOVERED = 0x10,
4463 EXPLORED = 0x20,
4464 FALLTHROUGH = 1,
4465 BRANCH = 2,
4466};
4467
58e2af8b 4468#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
f1bca824 4469
475fb78f
AS
4470static int *insn_stack; /* stack of insns to process */
4471static int cur_stack; /* current stack index */
4472static int *insn_state;
4473
4474/* t, w, e - match pseudo-code above:
4475 * t - index of current instruction
4476 * w - next instruction
4477 * e - edge
4478 */
58e2af8b 4479static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
475fb78f
AS
4480{
4481 if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
4482 return 0;
4483
4484 if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
4485 return 0;
4486
4487 if (w < 0 || w >= env->prog->len) {
61bd5218 4488 verbose(env, "jump out of range from insn %d to %d\n", t, w);
475fb78f
AS
4489 return -EINVAL;
4490 }
4491
f1bca824
AS
4492 if (e == BRANCH)
4493 /* mark branch target for state pruning */
4494 env->explored_states[w] = STATE_LIST_MARK;
4495
475fb78f
AS
4496 if (insn_state[w] == 0) {
4497 /* tree-edge */
4498 insn_state[t] = DISCOVERED | e;
4499 insn_state[w] = DISCOVERED;
4500 if (cur_stack >= env->prog->len)
4501 return -E2BIG;
4502 insn_stack[cur_stack++] = w;
4503 return 1;
4504 } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
61bd5218 4505 verbose(env, "back-edge from insn %d to %d\n", t, w);
475fb78f
AS
4506 return -EINVAL;
4507 } else if (insn_state[w] == EXPLORED) {
4508 /* forward- or cross-edge */
4509 insn_state[t] = DISCOVERED | e;
4510 } else {
61bd5218 4511 verbose(env, "insn state internal bug\n");
475fb78f
AS
4512 return -EFAULT;
4513 }
4514 return 0;
4515}
4516
4517/* non-recursive depth-first-search to detect loops in BPF program
4518 * loop == back-edge in directed graph
4519 */
58e2af8b 4520static int check_cfg(struct bpf_verifier_env *env)
475fb78f
AS
4521{
4522 struct bpf_insn *insns = env->prog->insnsi;
4523 int insn_cnt = env->prog->len;
4524 int ret = 0;
4525 int i, t;
4526
cc8b0b92
AS
4527 ret = check_subprogs(env);
4528 if (ret < 0)
4529 return ret;
4530
475fb78f
AS
4531 insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
4532 if (!insn_state)
4533 return -ENOMEM;
4534
4535 insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
4536 if (!insn_stack) {
4537 kfree(insn_state);
4538 return -ENOMEM;
4539 }
4540
4541 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
4542 insn_stack[0] = 0; /* 0 is the first instruction */
4543 cur_stack = 1;
4544
4545peek_stack:
4546 if (cur_stack == 0)
4547 goto check_state;
4548 t = insn_stack[cur_stack - 1];
4549
4550 if (BPF_CLASS(insns[t].code) == BPF_JMP) {
4551 u8 opcode = BPF_OP(insns[t].code);
4552
4553 if (opcode == BPF_EXIT) {
4554 goto mark_explored;
4555 } else if (opcode == BPF_CALL) {
4556 ret = push_insn(t, t + 1, FALLTHROUGH, env);
4557 if (ret == 1)
4558 goto peek_stack;
4559 else if (ret < 0)
4560 goto err_free;
07016151
DB
4561 if (t + 1 < insn_cnt)
4562 env->explored_states[t + 1] = STATE_LIST_MARK;
cc8b0b92
AS
4563 if (insns[t].src_reg == BPF_PSEUDO_CALL) {
4564 env->explored_states[t] = STATE_LIST_MARK;
4565 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
4566 if (ret == 1)
4567 goto peek_stack;
4568 else if (ret < 0)
4569 goto err_free;
4570 }
475fb78f
AS
4571 } else if (opcode == BPF_JA) {
4572 if (BPF_SRC(insns[t].code) != BPF_K) {
4573 ret = -EINVAL;
4574 goto err_free;
4575 }
4576 /* unconditional jump with single edge */
4577 ret = push_insn(t, t + insns[t].off + 1,
4578 FALLTHROUGH, env);
4579 if (ret == 1)
4580 goto peek_stack;
4581 else if (ret < 0)
4582 goto err_free;
f1bca824
AS
4583 /* tell verifier to check for equivalent states
4584 * after every call and jump
4585 */
c3de6317
AS
4586 if (t + 1 < insn_cnt)
4587 env->explored_states[t + 1] = STATE_LIST_MARK;
475fb78f
AS
4588 } else {
4589 /* conditional jump with two edges */
3c2ce60b 4590 env->explored_states[t] = STATE_LIST_MARK;
475fb78f
AS
4591 ret = push_insn(t, t + 1, FALLTHROUGH, env);
4592 if (ret == 1)
4593 goto peek_stack;
4594 else if (ret < 0)
4595 goto err_free;
4596
4597 ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
4598 if (ret == 1)
4599 goto peek_stack;
4600 else if (ret < 0)
4601 goto err_free;
4602 }
4603 } else {
4604 /* all other non-branch instructions with single
4605 * fall-through edge
4606 */
4607 ret = push_insn(t, t + 1, FALLTHROUGH, env);
4608 if (ret == 1)
4609 goto peek_stack;
4610 else if (ret < 0)
4611 goto err_free;
4612 }
4613
4614mark_explored:
4615 insn_state[t] = EXPLORED;
4616 if (cur_stack-- <= 0) {
61bd5218 4617 verbose(env, "pop stack internal bug\n");
475fb78f
AS
4618 ret = -EFAULT;
4619 goto err_free;
4620 }
4621 goto peek_stack;
4622
4623check_state:
4624 for (i = 0; i < insn_cnt; i++) {
4625 if (insn_state[i] != EXPLORED) {
61bd5218 4626 verbose(env, "unreachable insn %d\n", i);
475fb78f
AS
4627 ret = -EINVAL;
4628 goto err_free;
4629 }
4630 }
4631 ret = 0; /* cfg looks good */
4632
4633err_free:
4634 kfree(insn_state);
4635 kfree(insn_stack);
4636 return ret;
4637}
4638
838e9690
YS
4639/* The minimum supported BTF func info size */
4640#define MIN_BPF_FUNCINFO_SIZE 8
4641#define MAX_FUNCINFO_REC_SIZE 252
4642
4643static int check_btf_func(struct bpf_prog *prog, struct bpf_verifier_env *env,
4644 union bpf_attr *attr, union bpf_attr __user *uattr)
4645{
4646 u32 i, nfuncs, urec_size, min_size, prev_offset;
4647 u32 krec_size = sizeof(struct bpf_func_info);
ba64e7d8 4648 struct bpf_func_info *krecord = NULL;
838e9690
YS
4649 const struct btf_type *type;
4650 void __user *urecord;
4651 struct btf *btf;
4652 int ret = 0;
4653
4654 nfuncs = attr->func_info_cnt;
4655 if (!nfuncs)
4656 return 0;
4657
4658 if (nfuncs != env->subprog_cnt) {
4659 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
4660 return -EINVAL;
4661 }
4662
4663 urec_size = attr->func_info_rec_size;
4664 if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
4665 urec_size > MAX_FUNCINFO_REC_SIZE ||
4666 urec_size % sizeof(u32)) {
4667 verbose(env, "invalid func info rec size %u\n", urec_size);
4668 return -EINVAL;
4669 }
4670
4671 btf = btf_get_by_fd(attr->prog_btf_fd);
4672 if (IS_ERR(btf)) {
4673 verbose(env, "unable to get btf from fd\n");
4674 return PTR_ERR(btf);
4675 }
4676
4677 urecord = u64_to_user_ptr(attr->func_info);
4678 min_size = min_t(u32, krec_size, urec_size);
4679
ba64e7d8
YS
4680 krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
4681 if (!krecord) {
4682 ret = -ENOMEM;
4683 goto free_btf;
4684 }
4685
838e9690
YS
4686 for (i = 0; i < nfuncs; i++) {
4687 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
4688 if (ret) {
4689 if (ret == -E2BIG) {
4690 verbose(env, "nonzero tailing record in func info");
4691 /* set the size kernel expects so loader can zero
4692 * out the rest of the record.
4693 */
4694 if (put_user(min_size, &uattr->func_info_rec_size))
4695 ret = -EFAULT;
4696 }
4697 goto free_btf;
4698 }
4699
ba64e7d8 4700 if (copy_from_user(&krecord[i], urecord, min_size)) {
838e9690
YS
4701 ret = -EFAULT;
4702 goto free_btf;
4703 }
4704
d30d42e0 4705 /* check insn_off */
838e9690 4706 if (i == 0) {
d30d42e0 4707 if (krecord[i].insn_off) {
838e9690 4708 verbose(env,
d30d42e0
MKL
4709 "nonzero insn_off %u for the first func info record",
4710 krecord[i].insn_off);
838e9690
YS
4711 ret = -EINVAL;
4712 goto free_btf;
4713 }
d30d42e0 4714 } else if (krecord[i].insn_off <= prev_offset) {
838e9690
YS
4715 verbose(env,
4716 "same or smaller insn offset (%u) than previous func info record (%u)",
d30d42e0 4717 krecord[i].insn_off, prev_offset);
838e9690
YS
4718 ret = -EINVAL;
4719 goto free_btf;
4720 }
4721
d30d42e0 4722 if (env->subprog_info[i].start != krecord[i].insn_off) {
838e9690
YS
4723 verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
4724 ret = -EINVAL;
4725 goto free_btf;
4726 }
4727
4728 /* check type_id */
ba64e7d8 4729 type = btf_type_by_id(btf, krecord[i].type_id);
838e9690
YS
4730 if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) {
4731 verbose(env, "invalid type id %d in func info",
ba64e7d8 4732 krecord[i].type_id);
838e9690
YS
4733 ret = -EINVAL;
4734 goto free_btf;
4735 }
4736
d30d42e0 4737 prev_offset = krecord[i].insn_off;
838e9690
YS
4738 urecord += urec_size;
4739 }
4740
4741 prog->aux->btf = btf;
ba64e7d8
YS
4742 prog->aux->func_info = krecord;
4743 prog->aux->func_info_cnt = nfuncs;
838e9690
YS
4744 return 0;
4745
4746free_btf:
4747 btf_put(btf);
ba64e7d8 4748 kvfree(krecord);
838e9690
YS
4749 return ret;
4750}
4751
ba64e7d8
YS
4752static void adjust_btf_func(struct bpf_verifier_env *env)
4753{
4754 int i;
4755
4756 if (!env->prog->aux->func_info)
4757 return;
4758
4759 for (i = 0; i < env->subprog_cnt; i++)
d30d42e0 4760 env->prog->aux->func_info[i].insn_off = env->subprog_info[i].start;
ba64e7d8
YS
4761}
4762
f1174f77
EC
4763/* check %cur's range satisfies %old's */
4764static bool range_within(struct bpf_reg_state *old,
4765 struct bpf_reg_state *cur)
4766{
b03c9f9f
EC
4767 return old->umin_value <= cur->umin_value &&
4768 old->umax_value >= cur->umax_value &&
4769 old->smin_value <= cur->smin_value &&
4770 old->smax_value >= cur->smax_value;
f1174f77
EC
4771}
4772
4773/* Maximum number of register states that can exist at once */
4774#define ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
4775struct idpair {
4776 u32 old;
4777 u32 cur;
4778};
4779
4780/* If in the old state two registers had the same id, then they need to have
4781 * the same id in the new state as well. But that id could be different from
4782 * the old state, so we need to track the mapping from old to new ids.
4783 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
4784 * regs with old id 5 must also have new id 9 for the new state to be safe. But
4785 * regs with a different old id could still have new id 9, we don't care about
4786 * that.
4787 * So we look through our idmap to see if this old id has been seen before. If
4788 * so, we require the new id to match; otherwise, we add the id pair to the map.
969bf05e 4789 */
f1174f77 4790static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
969bf05e 4791{
f1174f77 4792 unsigned int i;
969bf05e 4793
f1174f77
EC
4794 for (i = 0; i < ID_MAP_SIZE; i++) {
4795 if (!idmap[i].old) {
4796 /* Reached an empty slot; haven't seen this id before */
4797 idmap[i].old = old_id;
4798 idmap[i].cur = cur_id;
4799 return true;
4800 }
4801 if (idmap[i].old == old_id)
4802 return idmap[i].cur == cur_id;
4803 }
4804 /* We ran out of idmap slots, which should be impossible */
4805 WARN_ON_ONCE(1);
4806 return false;
4807}
4808
4809/* Returns true if (rold safe implies rcur safe) */
1b688a19
EC
4810static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
4811 struct idpair *idmap)
f1174f77 4812{
f4d7e40a
AS
4813 bool equal;
4814
dc503a8a
EC
4815 if (!(rold->live & REG_LIVE_READ))
4816 /* explored state didn't use this */
4817 return true;
4818
679c782d 4819 equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
f4d7e40a
AS
4820
4821 if (rold->type == PTR_TO_STACK)
4822 /* two stack pointers are equal only if they're pointing to
4823 * the same stack frame, since fp-8 in foo != fp-8 in bar
4824 */
4825 return equal && rold->frameno == rcur->frameno;
4826
4827 if (equal)
969bf05e
AS
4828 return true;
4829
f1174f77
EC
4830 if (rold->type == NOT_INIT)
4831 /* explored state can't have used this */
969bf05e 4832 return true;
f1174f77
EC
4833 if (rcur->type == NOT_INIT)
4834 return false;
4835 switch (rold->type) {
4836 case SCALAR_VALUE:
4837 if (rcur->type == SCALAR_VALUE) {
4838 /* new val must satisfy old val knowledge */
4839 return range_within(rold, rcur) &&
4840 tnum_in(rold->var_off, rcur->var_off);
4841 } else {
179d1c56
JH
4842 /* We're trying to use a pointer in place of a scalar.
4843 * Even if the scalar was unbounded, this could lead to
4844 * pointer leaks because scalars are allowed to leak
4845 * while pointers are not. We could make this safe in
4846 * special cases if root is calling us, but it's
4847 * probably not worth the hassle.
f1174f77 4848 */
179d1c56 4849 return false;
f1174f77
EC
4850 }
4851 case PTR_TO_MAP_VALUE:
1b688a19
EC
4852 /* If the new min/max/var_off satisfy the old ones and
4853 * everything else matches, we are OK.
4854 * We don't care about the 'id' value, because nothing
4855 * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL)
4856 */
4857 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
4858 range_within(rold, rcur) &&
4859 tnum_in(rold->var_off, rcur->var_off);
f1174f77
EC
4860 case PTR_TO_MAP_VALUE_OR_NULL:
4861 /* a PTR_TO_MAP_VALUE could be safe to use as a
4862 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
4863 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
4864 * checked, doing so could have affected others with the same
4865 * id, and we can't check for that because we lost the id when
4866 * we converted to a PTR_TO_MAP_VALUE.
4867 */
4868 if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
4869 return false;
4870 if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
4871 return false;
4872 /* Check our ids match any regs they're supposed to */
4873 return check_ids(rold->id, rcur->id, idmap);
de8f3a83 4874 case PTR_TO_PACKET_META:
f1174f77 4875 case PTR_TO_PACKET:
de8f3a83 4876 if (rcur->type != rold->type)
f1174f77
EC
4877 return false;
4878 /* We must have at least as much range as the old ptr
4879 * did, so that any accesses which were safe before are
4880 * still safe. This is true even if old range < old off,
4881 * since someone could have accessed through (ptr - k), or
4882 * even done ptr -= k in a register, to get a safe access.
4883 */
4884 if (rold->range > rcur->range)
4885 return false;
4886 /* If the offsets don't match, we can't trust our alignment;
4887 * nor can we be sure that we won't fall out of range.
4888 */
4889 if (rold->off != rcur->off)
4890 return false;
4891 /* id relations must be preserved */
4892 if (rold->id && !check_ids(rold->id, rcur->id, idmap))
4893 return false;
4894 /* new val must satisfy old val knowledge */
4895 return range_within(rold, rcur) &&
4896 tnum_in(rold->var_off, rcur->var_off);
4897 case PTR_TO_CTX:
4898 case CONST_PTR_TO_MAP:
f1174f77 4899 case PTR_TO_PACKET_END:
d58e468b 4900 case PTR_TO_FLOW_KEYS:
c64b7983
JS
4901 case PTR_TO_SOCKET:
4902 case PTR_TO_SOCKET_OR_NULL:
f1174f77
EC
4903 /* Only valid matches are exact, which memcmp() above
4904 * would have accepted
4905 */
4906 default:
4907 /* Don't know what's going on, just say it's not safe */
4908 return false;
4909 }
969bf05e 4910
f1174f77
EC
4911 /* Shouldn't get here; if we do, say it's not safe */
4912 WARN_ON_ONCE(1);
969bf05e
AS
4913 return false;
4914}
4915
f4d7e40a
AS
4916static bool stacksafe(struct bpf_func_state *old,
4917 struct bpf_func_state *cur,
638f5b90
AS
4918 struct idpair *idmap)
4919{
4920 int i, spi;
4921
4922 /* if explored stack has more populated slots than current stack
4923 * such stacks are not equivalent
4924 */
4925 if (old->allocated_stack > cur->allocated_stack)
4926 return false;
4927
4928 /* walk slots of the explored stack and ignore any additional
4929 * slots in the current stack, since explored(safe) state
4930 * didn't use them
4931 */
4932 for (i = 0; i < old->allocated_stack; i++) {
4933 spi = i / BPF_REG_SIZE;
4934
cc2b14d5
AS
4935 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ))
4936 /* explored state didn't use this */
fd05e57b 4937 continue;
cc2b14d5 4938
638f5b90
AS
4939 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
4940 continue;
cc2b14d5
AS
4941 /* if old state was safe with misc data in the stack
4942 * it will be safe with zero-initialized stack.
4943 * The opposite is not true
4944 */
4945 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
4946 cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
4947 continue;
638f5b90
AS
4948 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
4949 cur->stack[spi].slot_type[i % BPF_REG_SIZE])
4950 /* Ex: old explored (safe) state has STACK_SPILL in
4951 * this stack slot, but current has has STACK_MISC ->
4952 * this verifier states are not equivalent,
4953 * return false to continue verification of this path
4954 */
4955 return false;
4956 if (i % BPF_REG_SIZE)
4957 continue;
4958 if (old->stack[spi].slot_type[0] != STACK_SPILL)
4959 continue;
4960 if (!regsafe(&old->stack[spi].spilled_ptr,
4961 &cur->stack[spi].spilled_ptr,
4962 idmap))
4963 /* when explored and current stack slot are both storing
4964 * spilled registers, check that stored pointers types
4965 * are the same as well.
4966 * Ex: explored safe path could have stored
4967 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
4968 * but current path has stored:
4969 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
4970 * such verifier states are not equivalent.
4971 * return false to continue verification of this path
4972 */
4973 return false;
4974 }
4975 return true;
4976}
4977
fd978bf7
JS
4978static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
4979{
4980 if (old->acquired_refs != cur->acquired_refs)
4981 return false;
4982 return !memcmp(old->refs, cur->refs,
4983 sizeof(*old->refs) * old->acquired_refs);
4984}
4985
f1bca824
AS
4986/* compare two verifier states
4987 *
4988 * all states stored in state_list are known to be valid, since
4989 * verifier reached 'bpf_exit' instruction through them
4990 *
4991 * this function is called when verifier exploring different branches of
4992 * execution popped from the state stack. If it sees an old state that has
4993 * more strict register state and more strict stack state then this execution
4994 * branch doesn't need to be explored further, since verifier already
4995 * concluded that more strict state leads to valid finish.
4996 *
4997 * Therefore two states are equivalent if register state is more conservative
4998 * and explored stack state is more conservative than the current one.
4999 * Example:
5000 * explored current
5001 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
5002 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
5003 *
5004 * In other words if current stack state (one being explored) has more
5005 * valid slots than old one that already passed validation, it means
5006 * the verifier can stop exploring and conclude that current state is valid too
5007 *
5008 * Similarly with registers. If explored state has register type as invalid
5009 * whereas register type in current state is meaningful, it means that
5010 * the current state will reach 'bpf_exit' instruction safely
5011 */
f4d7e40a
AS
5012static bool func_states_equal(struct bpf_func_state *old,
5013 struct bpf_func_state *cur)
f1bca824 5014{
f1174f77
EC
5015 struct idpair *idmap;
5016 bool ret = false;
f1bca824
AS
5017 int i;
5018
f1174f77
EC
5019 idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
5020 /* If we failed to allocate the idmap, just say it's not safe */
5021 if (!idmap)
1a0dc1ac 5022 return false;
f1174f77
EC
5023
5024 for (i = 0; i < MAX_BPF_REG; i++) {
1b688a19 5025 if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
f1174f77 5026 goto out_free;
f1bca824
AS
5027 }
5028
638f5b90
AS
5029 if (!stacksafe(old, cur, idmap))
5030 goto out_free;
fd978bf7
JS
5031
5032 if (!refsafe(old, cur))
5033 goto out_free;
f1174f77
EC
5034 ret = true;
5035out_free:
5036 kfree(idmap);
5037 return ret;
f1bca824
AS
5038}
5039
f4d7e40a
AS
5040static bool states_equal(struct bpf_verifier_env *env,
5041 struct bpf_verifier_state *old,
5042 struct bpf_verifier_state *cur)
5043{
5044 int i;
5045
5046 if (old->curframe != cur->curframe)
5047 return false;
5048
5049 /* for states to be equal callsites have to be the same
5050 * and all frame states need to be equivalent
5051 */
5052 for (i = 0; i <= old->curframe; i++) {
5053 if (old->frame[i]->callsite != cur->frame[i]->callsite)
5054 return false;
5055 if (!func_states_equal(old->frame[i], cur->frame[i]))
5056 return false;
5057 }
5058 return true;
5059}
5060
8e9cd9ce 5061/* A write screens off any subsequent reads; but write marks come from the
f4d7e40a
AS
5062 * straight-line code between a state and its parent. When we arrive at an
5063 * equivalent state (jump target or such) we didn't arrive by the straight-line
5064 * code, so read marks in the state must propagate to the parent regardless
5065 * of the state's write marks. That's what 'parent == state->parent' comparison
679c782d 5066 * in mark_reg_read() is for.
8e9cd9ce 5067 */
f4d7e40a
AS
5068static int propagate_liveness(struct bpf_verifier_env *env,
5069 const struct bpf_verifier_state *vstate,
5070 struct bpf_verifier_state *vparent)
dc503a8a 5071{
f4d7e40a
AS
5072 int i, frame, err = 0;
5073 struct bpf_func_state *state, *parent;
dc503a8a 5074
f4d7e40a
AS
5075 if (vparent->curframe != vstate->curframe) {
5076 WARN(1, "propagate_live: parent frame %d current frame %d\n",
5077 vparent->curframe, vstate->curframe);
5078 return -EFAULT;
5079 }
dc503a8a
EC
5080 /* Propagate read liveness of registers... */
5081 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
5082 /* We don't need to worry about FP liveness because it's read-only */
5083 for (i = 0; i < BPF_REG_FP; i++) {
f4d7e40a 5084 if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ)
63f45f84 5085 continue;
f4d7e40a 5086 if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) {
679c782d
EC
5087 err = mark_reg_read(env, &vstate->frame[vstate->curframe]->regs[i],
5088 &vparent->frame[vstate->curframe]->regs[i]);
f4d7e40a
AS
5089 if (err)
5090 return err;
dc503a8a
EC
5091 }
5092 }
f4d7e40a 5093
dc503a8a 5094 /* ... and stack slots */
f4d7e40a
AS
5095 for (frame = 0; frame <= vstate->curframe; frame++) {
5096 state = vstate->frame[frame];
5097 parent = vparent->frame[frame];
5098 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
5099 i < parent->allocated_stack / BPF_REG_SIZE; i++) {
f4d7e40a
AS
5100 if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
5101 continue;
5102 if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
679c782d
EC
5103 mark_reg_read(env, &state->stack[i].spilled_ptr,
5104 &parent->stack[i].spilled_ptr);
dc503a8a
EC
5105 }
5106 }
f4d7e40a 5107 return err;
dc503a8a
EC
5108}
5109
58e2af8b 5110static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
f1bca824 5111{
58e2af8b
JK
5112 struct bpf_verifier_state_list *new_sl;
5113 struct bpf_verifier_state_list *sl;
679c782d 5114 struct bpf_verifier_state *cur = env->cur_state, *new;
f4d7e40a 5115 int i, j, err;
f1bca824
AS
5116
5117 sl = env->explored_states[insn_idx];
5118 if (!sl)
5119 /* this 'insn_idx' instruction wasn't marked, so we will not
5120 * be doing state search here
5121 */
5122 return 0;
5123
5124 while (sl != STATE_LIST_MARK) {
638f5b90 5125 if (states_equal(env, &sl->state, cur)) {
f1bca824 5126 /* reached equivalent register/stack state,
dc503a8a
EC
5127 * prune the search.
5128 * Registers read by the continuation are read by us.
8e9cd9ce
EC
5129 * If we have any write marks in env->cur_state, they
5130 * will prevent corresponding reads in the continuation
5131 * from reaching our parent (an explored_state). Our
5132 * own state will get the read marks recorded, but
5133 * they'll be immediately forgotten as we're pruning
5134 * this state and will pop a new one.
f1bca824 5135 */
f4d7e40a
AS
5136 err = propagate_liveness(env, &sl->state, cur);
5137 if (err)
5138 return err;
f1bca824 5139 return 1;
dc503a8a 5140 }
f1bca824
AS
5141 sl = sl->next;
5142 }
5143
5144 /* there were no equivalent states, remember current one.
5145 * technically the current state is not proven to be safe yet,
f4d7e40a
AS
5146 * but it will either reach outer most bpf_exit (which means it's safe)
5147 * or it will be rejected. Since there are no loops, we won't be
5148 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
5149 * again on the way to bpf_exit
f1bca824 5150 */
638f5b90 5151 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
f1bca824
AS
5152 if (!new_sl)
5153 return -ENOMEM;
5154
5155 /* add new state to the head of linked list */
679c782d
EC
5156 new = &new_sl->state;
5157 err = copy_verifier_state(new, cur);
1969db47 5158 if (err) {
679c782d 5159 free_verifier_state(new, false);
1969db47
AS
5160 kfree(new_sl);
5161 return err;
5162 }
f1bca824
AS
5163 new_sl->next = env->explored_states[insn_idx];
5164 env->explored_states[insn_idx] = new_sl;
dc503a8a 5165 /* connect new state to parentage chain */
679c782d
EC
5166 for (i = 0; i < BPF_REG_FP; i++)
5167 cur_regs(env)[i].parent = &new->frame[new->curframe]->regs[i];
8e9cd9ce
EC
5168 /* clear write marks in current state: the writes we did are not writes
5169 * our child did, so they don't screen off its reads from us.
5170 * (There are no read marks in current state, because reads always mark
5171 * their parent and current state never has children yet. Only
5172 * explored_states can get read marks.)
5173 */
dc503a8a 5174 for (i = 0; i < BPF_REG_FP; i++)
f4d7e40a
AS
5175 cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE;
5176
5177 /* all stack frames are accessible from callee, clear them all */
5178 for (j = 0; j <= cur->curframe; j++) {
5179 struct bpf_func_state *frame = cur->frame[j];
679c782d 5180 struct bpf_func_state *newframe = new->frame[j];
f4d7e40a 5181
679c782d 5182 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
cc2b14d5 5183 frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
679c782d
EC
5184 frame->stack[i].spilled_ptr.parent =
5185 &newframe->stack[i].spilled_ptr;
5186 }
f4d7e40a 5187 }
f1bca824
AS
5188 return 0;
5189}
5190
c64b7983
JS
5191/* Return true if it's OK to have the same insn return a different type. */
5192static bool reg_type_mismatch_ok(enum bpf_reg_type type)
5193{
5194 switch (type) {
5195 case PTR_TO_CTX:
5196 case PTR_TO_SOCKET:
5197 case PTR_TO_SOCKET_OR_NULL:
5198 return false;
5199 default:
5200 return true;
5201 }
5202}
5203
5204/* If an instruction was previously used with particular pointer types, then we
5205 * need to be careful to avoid cases such as the below, where it may be ok
5206 * for one branch accessing the pointer, but not ok for the other branch:
5207 *
5208 * R1 = sock_ptr
5209 * goto X;
5210 * ...
5211 * R1 = some_other_valid_ptr;
5212 * goto X;
5213 * ...
5214 * R2 = *(u32 *)(R1 + 0);
5215 */
5216static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
5217{
5218 return src != prev && (!reg_type_mismatch_ok(src) ||
5219 !reg_type_mismatch_ok(prev));
5220}
5221
58e2af8b 5222static int do_check(struct bpf_verifier_env *env)
17a52670 5223{
638f5b90 5224 struct bpf_verifier_state *state;
17a52670 5225 struct bpf_insn *insns = env->prog->insnsi;
638f5b90 5226 struct bpf_reg_state *regs;
f4d7e40a 5227 int insn_cnt = env->prog->len, i;
17a52670
AS
5228 int insn_idx, prev_insn_idx = 0;
5229 int insn_processed = 0;
5230 bool do_print_state = false;
5231
638f5b90
AS
5232 state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
5233 if (!state)
5234 return -ENOMEM;
f4d7e40a 5235 state->curframe = 0;
f4d7e40a
AS
5236 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
5237 if (!state->frame[0]) {
5238 kfree(state);
5239 return -ENOMEM;
5240 }
5241 env->cur_state = state;
5242 init_func_state(env, state->frame[0],
5243 BPF_MAIN_FUNC /* callsite */,
5244 0 /* frameno */,
5245 0 /* subprogno, zero == main subprog */);
17a52670
AS
5246 insn_idx = 0;
5247 for (;;) {
5248 struct bpf_insn *insn;
5249 u8 class;
5250 int err;
5251
5252 if (insn_idx >= insn_cnt) {
61bd5218 5253 verbose(env, "invalid insn idx %d insn_cnt %d\n",
17a52670
AS
5254 insn_idx, insn_cnt);
5255 return -EFAULT;
5256 }
5257
5258 insn = &insns[insn_idx];
5259 class = BPF_CLASS(insn->code);
5260
07016151 5261 if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
61bd5218
JK
5262 verbose(env,
5263 "BPF program is too large. Processed %d insn\n",
17a52670
AS
5264 insn_processed);
5265 return -E2BIG;
5266 }
5267
f1bca824
AS
5268 err = is_state_visited(env, insn_idx);
5269 if (err < 0)
5270 return err;
5271 if (err == 1) {
5272 /* found equivalent state, can prune the search */
61bd5218 5273 if (env->log.level) {
f1bca824 5274 if (do_print_state)
61bd5218 5275 verbose(env, "\nfrom %d to %d: safe\n",
f1bca824
AS
5276 prev_insn_idx, insn_idx);
5277 else
61bd5218 5278 verbose(env, "%d: safe\n", insn_idx);
f1bca824
AS
5279 }
5280 goto process_bpf_exit;
5281 }
5282
3c2ce60b
DB
5283 if (need_resched())
5284 cond_resched();
5285
61bd5218
JK
5286 if (env->log.level > 1 || (env->log.level && do_print_state)) {
5287 if (env->log.level > 1)
5288 verbose(env, "%d:", insn_idx);
c5fc9692 5289 else
61bd5218 5290 verbose(env, "\nfrom %d to %d:",
c5fc9692 5291 prev_insn_idx, insn_idx);
f4d7e40a 5292 print_verifier_state(env, state->frame[state->curframe]);
17a52670
AS
5293 do_print_state = false;
5294 }
5295
61bd5218 5296 if (env->log.level) {
7105e828
DB
5297 const struct bpf_insn_cbs cbs = {
5298 .cb_print = verbose,
abe08840 5299 .private_data = env,
7105e828
DB
5300 };
5301
61bd5218 5302 verbose(env, "%d: ", insn_idx);
abe08840 5303 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
17a52670
AS
5304 }
5305
cae1927c
JK
5306 if (bpf_prog_is_dev_bound(env->prog->aux)) {
5307 err = bpf_prog_offload_verify_insn(env, insn_idx,
5308 prev_insn_idx);
5309 if (err)
5310 return err;
5311 }
13a27dfc 5312
638f5b90 5313 regs = cur_regs(env);
c131187d 5314 env->insn_aux_data[insn_idx].seen = true;
fd978bf7 5315
17a52670 5316 if (class == BPF_ALU || class == BPF_ALU64) {
1be7f75d 5317 err = check_alu_op(env, insn);
17a52670
AS
5318 if (err)
5319 return err;
5320
5321 } else if (class == BPF_LDX) {
3df126f3 5322 enum bpf_reg_type *prev_src_type, src_reg_type;
9bac3d6d
AS
5323
5324 /* check for reserved fields is already done */
5325
17a52670 5326 /* check src operand */
dc503a8a 5327 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
5328 if (err)
5329 return err;
5330
dc503a8a 5331 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
17a52670
AS
5332 if (err)
5333 return err;
5334
725f9dcd
AS
5335 src_reg_type = regs[insn->src_reg].type;
5336
17a52670
AS
5337 /* check that memory (src_reg + off) is readable,
5338 * the state of dst_reg will be updated by this func
5339 */
31fd8581 5340 err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
17a52670 5341 BPF_SIZE(insn->code), BPF_READ,
ca369602 5342 insn->dst_reg, false);
17a52670
AS
5343 if (err)
5344 return err;
5345
3df126f3
JK
5346 prev_src_type = &env->insn_aux_data[insn_idx].ptr_type;
5347
5348 if (*prev_src_type == NOT_INIT) {
9bac3d6d
AS
5349 /* saw a valid insn
5350 * dst_reg = *(u32 *)(src_reg + off)
3df126f3 5351 * save type to validate intersecting paths
9bac3d6d 5352 */
3df126f3 5353 *prev_src_type = src_reg_type;
9bac3d6d 5354
c64b7983 5355 } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
9bac3d6d
AS
5356 /* ABuser program is trying to use the same insn
5357 * dst_reg = *(u32*) (src_reg + off)
5358 * with different pointer types:
5359 * src_reg == ctx in one branch and
5360 * src_reg == stack|map in some other branch.
5361 * Reject it.
5362 */
61bd5218 5363 verbose(env, "same insn cannot be used with different pointers\n");
9bac3d6d
AS
5364 return -EINVAL;
5365 }
5366
17a52670 5367 } else if (class == BPF_STX) {
3df126f3 5368 enum bpf_reg_type *prev_dst_type, dst_reg_type;
d691f9e8 5369
17a52670 5370 if (BPF_MODE(insn->code) == BPF_XADD) {
31fd8581 5371 err = check_xadd(env, insn_idx, insn);
17a52670
AS
5372 if (err)
5373 return err;
5374 insn_idx++;
5375 continue;
5376 }
5377
17a52670 5378 /* check src1 operand */
dc503a8a 5379 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
5380 if (err)
5381 return err;
5382 /* check src2 operand */
dc503a8a 5383 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
5384 if (err)
5385 return err;
5386
d691f9e8
AS
5387 dst_reg_type = regs[insn->dst_reg].type;
5388
17a52670 5389 /* check that memory (dst_reg + off) is writeable */
31fd8581 5390 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
17a52670 5391 BPF_SIZE(insn->code), BPF_WRITE,
ca369602 5392 insn->src_reg, false);
17a52670
AS
5393 if (err)
5394 return err;
5395
3df126f3
JK
5396 prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type;
5397
5398 if (*prev_dst_type == NOT_INIT) {
5399 *prev_dst_type = dst_reg_type;
c64b7983 5400 } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
61bd5218 5401 verbose(env, "same insn cannot be used with different pointers\n");
d691f9e8
AS
5402 return -EINVAL;
5403 }
5404
17a52670
AS
5405 } else if (class == BPF_ST) {
5406 if (BPF_MODE(insn->code) != BPF_MEM ||
5407 insn->src_reg != BPF_REG_0) {
61bd5218 5408 verbose(env, "BPF_ST uses reserved fields\n");
17a52670
AS
5409 return -EINVAL;
5410 }
5411 /* check src operand */
dc503a8a 5412 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
5413 if (err)
5414 return err;
5415
f37a8cb8 5416 if (is_ctx_reg(env, insn->dst_reg)) {
9d2be44a 5417 verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
2a159c6f
DB
5418 insn->dst_reg,
5419 reg_type_str[reg_state(env, insn->dst_reg)->type]);
f37a8cb8
DB
5420 return -EACCES;
5421 }
5422
17a52670 5423 /* check that memory (dst_reg + off) is writeable */
31fd8581 5424 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
17a52670 5425 BPF_SIZE(insn->code), BPF_WRITE,
ca369602 5426 -1, false);
17a52670
AS
5427 if (err)
5428 return err;
5429
5430 } else if (class == BPF_JMP) {
5431 u8 opcode = BPF_OP(insn->code);
5432
5433 if (opcode == BPF_CALL) {
5434 if (BPF_SRC(insn->code) != BPF_K ||
5435 insn->off != 0 ||
f4d7e40a
AS
5436 (insn->src_reg != BPF_REG_0 &&
5437 insn->src_reg != BPF_PSEUDO_CALL) ||
17a52670 5438 insn->dst_reg != BPF_REG_0) {
61bd5218 5439 verbose(env, "BPF_CALL uses reserved fields\n");
17a52670
AS
5440 return -EINVAL;
5441 }
5442
f4d7e40a
AS
5443 if (insn->src_reg == BPF_PSEUDO_CALL)
5444 err = check_func_call(env, insn, &insn_idx);
5445 else
5446 err = check_helper_call(env, insn->imm, insn_idx);
17a52670
AS
5447 if (err)
5448 return err;
5449
5450 } else if (opcode == BPF_JA) {
5451 if (BPF_SRC(insn->code) != BPF_K ||
5452 insn->imm != 0 ||
5453 insn->src_reg != BPF_REG_0 ||
5454 insn->dst_reg != BPF_REG_0) {
61bd5218 5455 verbose(env, "BPF_JA uses reserved fields\n");
17a52670
AS
5456 return -EINVAL;
5457 }
5458
5459 insn_idx += insn->off + 1;
5460 continue;
5461
5462 } else if (opcode == BPF_EXIT) {
5463 if (BPF_SRC(insn->code) != BPF_K ||
5464 insn->imm != 0 ||
5465 insn->src_reg != BPF_REG_0 ||
5466 insn->dst_reg != BPF_REG_0) {
61bd5218 5467 verbose(env, "BPF_EXIT uses reserved fields\n");
17a52670
AS
5468 return -EINVAL;
5469 }
5470
f4d7e40a
AS
5471 if (state->curframe) {
5472 /* exit from nested function */
5473 prev_insn_idx = insn_idx;
5474 err = prepare_func_exit(env, &insn_idx);
5475 if (err)
5476 return err;
5477 do_print_state = true;
5478 continue;
5479 }
5480
fd978bf7
JS
5481 err = check_reference_leak(env);
5482 if (err)
5483 return err;
5484
17a52670
AS
5485 /* eBPF calling convetion is such that R0 is used
5486 * to return the value from eBPF program.
5487 * Make sure that it's readable at this time
5488 * of bpf_exit, which means that program wrote
5489 * something into it earlier
5490 */
dc503a8a 5491 err = check_reg_arg(env, BPF_REG_0, SRC_OP);
17a52670
AS
5492 if (err)
5493 return err;
5494
1be7f75d 5495 if (is_pointer_value(env, BPF_REG_0)) {
61bd5218 5496 verbose(env, "R0 leaks addr as return value\n");
1be7f75d
AS
5497 return -EACCES;
5498 }
5499
390ee7e2
AS
5500 err = check_return_code(env);
5501 if (err)
5502 return err;
f1bca824 5503process_bpf_exit:
638f5b90
AS
5504 err = pop_stack(env, &prev_insn_idx, &insn_idx);
5505 if (err < 0) {
5506 if (err != -ENOENT)
5507 return err;
17a52670
AS
5508 break;
5509 } else {
5510 do_print_state = true;
5511 continue;
5512 }
5513 } else {
5514 err = check_cond_jmp_op(env, insn, &insn_idx);
5515 if (err)
5516 return err;
5517 }
5518 } else if (class == BPF_LD) {
5519 u8 mode = BPF_MODE(insn->code);
5520
5521 if (mode == BPF_ABS || mode == BPF_IND) {
ddd872bc
AS
5522 err = check_ld_abs(env, insn);
5523 if (err)
5524 return err;
5525
17a52670
AS
5526 } else if (mode == BPF_IMM) {
5527 err = check_ld_imm(env, insn);
5528 if (err)
5529 return err;
5530
5531 insn_idx++;
c131187d 5532 env->insn_aux_data[insn_idx].seen = true;
17a52670 5533 } else {
61bd5218 5534 verbose(env, "invalid BPF_LD mode\n");
17a52670
AS
5535 return -EINVAL;
5536 }
5537 } else {
61bd5218 5538 verbose(env, "unknown insn class %d\n", class);
17a52670
AS
5539 return -EINVAL;
5540 }
5541
5542 insn_idx++;
5543 }
5544
4bd95f4b
DB
5545 verbose(env, "processed %d insns (limit %d), stack depth ",
5546 insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
f910cefa 5547 for (i = 0; i < env->subprog_cnt; i++) {
9c8105bd 5548 u32 depth = env->subprog_info[i].stack_depth;
f4d7e40a
AS
5549
5550 verbose(env, "%d", depth);
f910cefa 5551 if (i + 1 < env->subprog_cnt)
f4d7e40a
AS
5552 verbose(env, "+");
5553 }
5554 verbose(env, "\n");
9c8105bd 5555 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
17a52670
AS
5556 return 0;
5557}
5558
56f668df
MKL
5559static int check_map_prealloc(struct bpf_map *map)
5560{
5561 return (map->map_type != BPF_MAP_TYPE_HASH &&
bcc6b1b7
MKL
5562 map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
5563 map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
56f668df
MKL
5564 !(map->map_flags & BPF_F_NO_PREALLOC);
5565}
5566
61bd5218
JK
5567static int check_map_prog_compatibility(struct bpf_verifier_env *env,
5568 struct bpf_map *map,
fdc15d38
AS
5569 struct bpf_prog *prog)
5570
5571{
56f668df
MKL
5572 /* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use
5573 * preallocated hash maps, since doing memory allocation
5574 * in overflow_handler can crash depending on where nmi got
5575 * triggered.
5576 */
5577 if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
5578 if (!check_map_prealloc(map)) {
61bd5218 5579 verbose(env, "perf_event programs can only use preallocated hash map\n");
56f668df
MKL
5580 return -EINVAL;
5581 }
5582 if (map->inner_map_meta &&
5583 !check_map_prealloc(map->inner_map_meta)) {
61bd5218 5584 verbose(env, "perf_event programs can only use preallocated inner hash map\n");
56f668df
MKL
5585 return -EINVAL;
5586 }
fdc15d38 5587 }
a3884572
JK
5588
5589 if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
09728266 5590 !bpf_offload_prog_map_match(prog, map)) {
a3884572
JK
5591 verbose(env, "offload device mismatch between prog and map\n");
5592 return -EINVAL;
5593 }
5594
fdc15d38
AS
5595 return 0;
5596}
5597
b741f163
RG
5598static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
5599{
5600 return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
5601 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
5602}
5603
0246e64d
AS
5604/* look for pseudo eBPF instructions that access map FDs and
5605 * replace them with actual map pointers
5606 */
58e2af8b 5607static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
0246e64d
AS
5608{
5609 struct bpf_insn *insn = env->prog->insnsi;
5610 int insn_cnt = env->prog->len;
fdc15d38 5611 int i, j, err;
0246e64d 5612
f1f7714e 5613 err = bpf_prog_calc_tag(env->prog);
aafe6ae9
DB
5614 if (err)
5615 return err;
5616
0246e64d 5617 for (i = 0; i < insn_cnt; i++, insn++) {
9bac3d6d 5618 if (BPF_CLASS(insn->code) == BPF_LDX &&
d691f9e8 5619 (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
61bd5218 5620 verbose(env, "BPF_LDX uses reserved fields\n");
9bac3d6d
AS
5621 return -EINVAL;
5622 }
5623
d691f9e8
AS
5624 if (BPF_CLASS(insn->code) == BPF_STX &&
5625 ((BPF_MODE(insn->code) != BPF_MEM &&
5626 BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
61bd5218 5627 verbose(env, "BPF_STX uses reserved fields\n");
d691f9e8
AS
5628 return -EINVAL;
5629 }
5630
0246e64d
AS
5631 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
5632 struct bpf_map *map;
5633 struct fd f;
5634
5635 if (i == insn_cnt - 1 || insn[1].code != 0 ||
5636 insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
5637 insn[1].off != 0) {
61bd5218 5638 verbose(env, "invalid bpf_ld_imm64 insn\n");
0246e64d
AS
5639 return -EINVAL;
5640 }
5641
5642 if (insn->src_reg == 0)
5643 /* valid generic load 64-bit imm */
5644 goto next_insn;
5645
5646 if (insn->src_reg != BPF_PSEUDO_MAP_FD) {
61bd5218
JK
5647 verbose(env,
5648 "unrecognized bpf_ld_imm64 insn\n");
0246e64d
AS
5649 return -EINVAL;
5650 }
5651
5652 f = fdget(insn->imm);
c2101297 5653 map = __bpf_map_get(f);
0246e64d 5654 if (IS_ERR(map)) {
61bd5218 5655 verbose(env, "fd %d is not pointing to valid bpf_map\n",
0246e64d 5656 insn->imm);
0246e64d
AS
5657 return PTR_ERR(map);
5658 }
5659
61bd5218 5660 err = check_map_prog_compatibility(env, map, env->prog);
fdc15d38
AS
5661 if (err) {
5662 fdput(f);
5663 return err;
5664 }
5665
0246e64d
AS
5666 /* store map pointer inside BPF_LD_IMM64 instruction */
5667 insn[0].imm = (u32) (unsigned long) map;
5668 insn[1].imm = ((u64) (unsigned long) map) >> 32;
5669
5670 /* check whether we recorded this map already */
5671 for (j = 0; j < env->used_map_cnt; j++)
5672 if (env->used_maps[j] == map) {
5673 fdput(f);
5674 goto next_insn;
5675 }
5676
5677 if (env->used_map_cnt >= MAX_USED_MAPS) {
5678 fdput(f);
5679 return -E2BIG;
5680 }
5681
0246e64d
AS
5682 /* hold the map. If the program is rejected by verifier,
5683 * the map will be released by release_maps() or it
5684 * will be used by the valid program until it's unloaded
ab7f5bf0 5685 * and all maps are released in free_used_maps()
0246e64d 5686 */
92117d84
AS
5687 map = bpf_map_inc(map, false);
5688 if (IS_ERR(map)) {
5689 fdput(f);
5690 return PTR_ERR(map);
5691 }
5692 env->used_maps[env->used_map_cnt++] = map;
5693
b741f163 5694 if (bpf_map_is_cgroup_storage(map) &&
de9cbbaa 5695 bpf_cgroup_storage_assign(env->prog, map)) {
b741f163 5696 verbose(env, "only one cgroup storage of each type is allowed\n");
de9cbbaa
RG
5697 fdput(f);
5698 return -EBUSY;
5699 }
5700
0246e64d
AS
5701 fdput(f);
5702next_insn:
5703 insn++;
5704 i++;
5e581dad
DB
5705 continue;
5706 }
5707
5708 /* Basic sanity check before we invest more work here. */
5709 if (!bpf_opcode_in_insntable(insn->code)) {
5710 verbose(env, "unknown opcode %02x\n", insn->code);
5711 return -EINVAL;
0246e64d
AS
5712 }
5713 }
5714
5715 /* now all pseudo BPF_LD_IMM64 instructions load valid
5716 * 'struct bpf_map *' into a register instead of user map_fd.
5717 * These pointers will be used later by verifier to validate map access.
5718 */
5719 return 0;
5720}
5721
5722/* drop refcnt of maps used by the rejected program */
58e2af8b 5723static void release_maps(struct bpf_verifier_env *env)
0246e64d 5724{
8bad74f9 5725 enum bpf_cgroup_storage_type stype;
0246e64d
AS
5726 int i;
5727
8bad74f9
RG
5728 for_each_cgroup_storage_type(stype) {
5729 if (!env->prog->aux->cgroup_storage[stype])
5730 continue;
de9cbbaa 5731 bpf_cgroup_storage_release(env->prog,
8bad74f9
RG
5732 env->prog->aux->cgroup_storage[stype]);
5733 }
de9cbbaa 5734
0246e64d
AS
5735 for (i = 0; i < env->used_map_cnt; i++)
5736 bpf_map_put(env->used_maps[i]);
5737}
5738
5739/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
58e2af8b 5740static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
0246e64d
AS
5741{
5742 struct bpf_insn *insn = env->prog->insnsi;
5743 int insn_cnt = env->prog->len;
5744 int i;
5745
5746 for (i = 0; i < insn_cnt; i++, insn++)
5747 if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
5748 insn->src_reg = 0;
5749}
5750
8041902d
AS
5751/* single env->prog->insni[off] instruction was replaced with the range
5752 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
5753 * [0, off) and [off, end) to new locations, so the patched range stays zero
5754 */
5755static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
5756 u32 off, u32 cnt)
5757{
5758 struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
c131187d 5759 int i;
8041902d
AS
5760
5761 if (cnt == 1)
5762 return 0;
fad953ce
KC
5763 new_data = vzalloc(array_size(prog_len,
5764 sizeof(struct bpf_insn_aux_data)));
8041902d
AS
5765 if (!new_data)
5766 return -ENOMEM;
5767 memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
5768 memcpy(new_data + off + cnt - 1, old_data + off,
5769 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
c131187d
AS
5770 for (i = off; i < off + cnt - 1; i++)
5771 new_data[i].seen = true;
8041902d
AS
5772 env->insn_aux_data = new_data;
5773 vfree(old_data);
5774 return 0;
5775}
5776
cc8b0b92
AS
5777static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
5778{
5779 int i;
5780
5781 if (len == 1)
5782 return;
4cb3d99c
JW
5783 /* NOTE: fake 'exit' subprog should be updated as well. */
5784 for (i = 0; i <= env->subprog_cnt; i++) {
afd59424 5785 if (env->subprog_info[i].start <= off)
cc8b0b92 5786 continue;
9c8105bd 5787 env->subprog_info[i].start += len - 1;
cc8b0b92
AS
5788 }
5789}
5790
8041902d
AS
5791static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
5792 const struct bpf_insn *patch, u32 len)
5793{
5794 struct bpf_prog *new_prog;
5795
5796 new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
5797 if (!new_prog)
5798 return NULL;
5799 if (adjust_insn_aux_data(env, new_prog->len, off, len))
5800 return NULL;
cc8b0b92 5801 adjust_subprog_starts(env, off, len);
8041902d
AS
5802 return new_prog;
5803}
5804
2a5418a1
DB
5805/* The verifier does more data flow analysis than llvm and will not
5806 * explore branches that are dead at run time. Malicious programs can
5807 * have dead code too. Therefore replace all dead at-run-time code
5808 * with 'ja -1'.
5809 *
5810 * Just nops are not optimal, e.g. if they would sit at the end of the
5811 * program and through another bug we would manage to jump there, then
5812 * we'd execute beyond program memory otherwise. Returning exception
5813 * code also wouldn't work since we can have subprogs where the dead
5814 * code could be located.
c131187d
AS
5815 */
5816static void sanitize_dead_code(struct bpf_verifier_env *env)
5817{
5818 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
2a5418a1 5819 struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
c131187d
AS
5820 struct bpf_insn *insn = env->prog->insnsi;
5821 const int insn_cnt = env->prog->len;
5822 int i;
5823
5824 for (i = 0; i < insn_cnt; i++) {
5825 if (aux_data[i].seen)
5826 continue;
2a5418a1 5827 memcpy(insn + i, &trap, sizeof(trap));
c131187d
AS
5828 }
5829}
5830
c64b7983
JS
5831/* convert load instructions that access fields of a context type into a
5832 * sequence of instructions that access fields of the underlying structure:
5833 * struct __sk_buff -> struct sk_buff
5834 * struct bpf_sock_ops -> struct sock
9bac3d6d 5835 */
58e2af8b 5836static int convert_ctx_accesses(struct bpf_verifier_env *env)
9bac3d6d 5837{
00176a34 5838 const struct bpf_verifier_ops *ops = env->ops;
f96da094 5839 int i, cnt, size, ctx_field_size, delta = 0;
3df126f3 5840 const int insn_cnt = env->prog->len;
36bbef52 5841 struct bpf_insn insn_buf[16], *insn;
46f53a65 5842 u32 target_size, size_default, off;
9bac3d6d 5843 struct bpf_prog *new_prog;
d691f9e8 5844 enum bpf_access_type type;
f96da094 5845 bool is_narrower_load;
9bac3d6d 5846
b09928b9
DB
5847 if (ops->gen_prologue || env->seen_direct_write) {
5848 if (!ops->gen_prologue) {
5849 verbose(env, "bpf verifier is misconfigured\n");
5850 return -EINVAL;
5851 }
36bbef52
DB
5852 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
5853 env->prog);
5854 if (cnt >= ARRAY_SIZE(insn_buf)) {
61bd5218 5855 verbose(env, "bpf verifier is misconfigured\n");
36bbef52
DB
5856 return -EINVAL;
5857 } else if (cnt) {
8041902d 5858 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
36bbef52
DB
5859 if (!new_prog)
5860 return -ENOMEM;
8041902d 5861
36bbef52 5862 env->prog = new_prog;
3df126f3 5863 delta += cnt - 1;
36bbef52
DB
5864 }
5865 }
5866
c64b7983 5867 if (bpf_prog_is_dev_bound(env->prog->aux))
9bac3d6d
AS
5868 return 0;
5869
3df126f3 5870 insn = env->prog->insnsi + delta;
36bbef52 5871
9bac3d6d 5872 for (i = 0; i < insn_cnt; i++, insn++) {
c64b7983
JS
5873 bpf_convert_ctx_access_t convert_ctx_access;
5874
62c7989b
DB
5875 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
5876 insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
5877 insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
ea2e7ce5 5878 insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
d691f9e8 5879 type = BPF_READ;
62c7989b
DB
5880 else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
5881 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
5882 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
ea2e7ce5 5883 insn->code == (BPF_STX | BPF_MEM | BPF_DW))
d691f9e8
AS
5884 type = BPF_WRITE;
5885 else
9bac3d6d
AS
5886 continue;
5887
af86ca4e
AS
5888 if (type == BPF_WRITE &&
5889 env->insn_aux_data[i + delta].sanitize_stack_off) {
5890 struct bpf_insn patch[] = {
5891 /* Sanitize suspicious stack slot with zero.
5892 * There are no memory dependencies for this store,
5893 * since it's only using frame pointer and immediate
5894 * constant of zero
5895 */
5896 BPF_ST_MEM(BPF_DW, BPF_REG_FP,
5897 env->insn_aux_data[i + delta].sanitize_stack_off,
5898 0),
5899 /* the original STX instruction will immediately
5900 * overwrite the same stack slot with appropriate value
5901 */
5902 *insn,
5903 };
5904
5905 cnt = ARRAY_SIZE(patch);
5906 new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
5907 if (!new_prog)
5908 return -ENOMEM;
5909
5910 delta += cnt - 1;
5911 env->prog = new_prog;
5912 insn = new_prog->insnsi + i + delta;
5913 continue;
5914 }
5915
c64b7983
JS
5916 switch (env->insn_aux_data[i + delta].ptr_type) {
5917 case PTR_TO_CTX:
5918 if (!ops->convert_ctx_access)
5919 continue;
5920 convert_ctx_access = ops->convert_ctx_access;
5921 break;
5922 case PTR_TO_SOCKET:
5923 convert_ctx_access = bpf_sock_convert_ctx_access;
5924 break;
5925 default:
9bac3d6d 5926 continue;
c64b7983 5927 }
9bac3d6d 5928
31fd8581 5929 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
f96da094 5930 size = BPF_LDST_BYTES(insn);
31fd8581
YS
5931
5932 /* If the read access is a narrower load of the field,
5933 * convert to a 4/8-byte load, to minimum program type specific
5934 * convert_ctx_access changes. If conversion is successful,
5935 * we will apply proper mask to the result.
5936 */
f96da094 5937 is_narrower_load = size < ctx_field_size;
46f53a65
AI
5938 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
5939 off = insn->off;
31fd8581 5940 if (is_narrower_load) {
f96da094
DB
5941 u8 size_code;
5942
5943 if (type == BPF_WRITE) {
61bd5218 5944 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
f96da094
DB
5945 return -EINVAL;
5946 }
31fd8581 5947
f96da094 5948 size_code = BPF_H;
31fd8581
YS
5949 if (ctx_field_size == 4)
5950 size_code = BPF_W;
5951 else if (ctx_field_size == 8)
5952 size_code = BPF_DW;
f96da094 5953
bc23105c 5954 insn->off = off & ~(size_default - 1);
31fd8581
YS
5955 insn->code = BPF_LDX | BPF_MEM | size_code;
5956 }
f96da094
DB
5957
5958 target_size = 0;
c64b7983
JS
5959 cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
5960 &target_size);
f96da094
DB
5961 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
5962 (ctx_field_size && !target_size)) {
61bd5218 5963 verbose(env, "bpf verifier is misconfigured\n");
9bac3d6d
AS
5964 return -EINVAL;
5965 }
f96da094
DB
5966
5967 if (is_narrower_load && size < target_size) {
46f53a65
AI
5968 u8 shift = (off & (size_default - 1)) * 8;
5969
5970 if (ctx_field_size <= 4) {
5971 if (shift)
5972 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
5973 insn->dst_reg,
5974 shift);
31fd8581 5975 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
f96da094 5976 (1 << size * 8) - 1);
46f53a65
AI
5977 } else {
5978 if (shift)
5979 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
5980 insn->dst_reg,
5981 shift);
31fd8581 5982 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
f96da094 5983 (1 << size * 8) - 1);
46f53a65 5984 }
31fd8581 5985 }
9bac3d6d 5986
8041902d 5987 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
9bac3d6d
AS
5988 if (!new_prog)
5989 return -ENOMEM;
5990
3df126f3 5991 delta += cnt - 1;
9bac3d6d
AS
5992
5993 /* keep walking new program and skip insns we just inserted */
5994 env->prog = new_prog;
3df126f3 5995 insn = new_prog->insnsi + i + delta;
9bac3d6d
AS
5996 }
5997
5998 return 0;
5999}
6000
1c2a088a
AS
6001static int jit_subprogs(struct bpf_verifier_env *env)
6002{
6003 struct bpf_prog *prog = env->prog, **func, *tmp;
6004 int i, j, subprog_start, subprog_end = 0, len, subprog;
7105e828 6005 struct bpf_insn *insn;
1c2a088a
AS
6006 void *old_bpf_func;
6007 int err = -ENOMEM;
6008
f910cefa 6009 if (env->subprog_cnt <= 1)
1c2a088a
AS
6010 return 0;
6011
7105e828 6012 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
1c2a088a
AS
6013 if (insn->code != (BPF_JMP | BPF_CALL) ||
6014 insn->src_reg != BPF_PSEUDO_CALL)
6015 continue;
c7a89784
DB
6016 /* Upon error here we cannot fall back to interpreter but
6017 * need a hard reject of the program. Thus -EFAULT is
6018 * propagated in any case.
6019 */
1c2a088a
AS
6020 subprog = find_subprog(env, i + insn->imm + 1);
6021 if (subprog < 0) {
6022 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
6023 i + insn->imm + 1);
6024 return -EFAULT;
6025 }
6026 /* temporarily remember subprog id inside insn instead of
6027 * aux_data, since next loop will split up all insns into funcs
6028 */
f910cefa 6029 insn->off = subprog;
1c2a088a
AS
6030 /* remember original imm in case JIT fails and fallback
6031 * to interpreter will be needed
6032 */
6033 env->insn_aux_data[i].call_imm = insn->imm;
6034 /* point imm to __bpf_call_base+1 from JITs point of view */
6035 insn->imm = 1;
6036 }
6037
6396bb22 6038 func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
1c2a088a 6039 if (!func)
c7a89784 6040 goto out_undo_insn;
1c2a088a 6041
f910cefa 6042 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a 6043 subprog_start = subprog_end;
4cb3d99c 6044 subprog_end = env->subprog_info[i + 1].start;
1c2a088a
AS
6045
6046 len = subprog_end - subprog_start;
6047 func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER);
6048 if (!func[i])
6049 goto out_free;
6050 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
6051 len * sizeof(struct bpf_insn));
4f74d809 6052 func[i]->type = prog->type;
1c2a088a 6053 func[i]->len = len;
4f74d809
DB
6054 if (bpf_prog_calc_tag(func[i]))
6055 goto out_free;
1c2a088a 6056 func[i]->is_func = 1;
ba64e7d8
YS
6057 func[i]->aux->func_idx = i;
6058 /* the btf and func_info will be freed only at prog->aux */
6059 func[i]->aux->btf = prog->aux->btf;
6060 func[i]->aux->func_info = prog->aux->func_info;
6061
1c2a088a
AS
6062 /* Use bpf_prog_F_tag to indicate functions in stack traces.
6063 * Long term would need debug info to populate names
6064 */
6065 func[i]->aux->name[0] = 'F';
9c8105bd 6066 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
1c2a088a
AS
6067 func[i]->jit_requested = 1;
6068 func[i] = bpf_int_jit_compile(func[i]);
6069 if (!func[i]->jited) {
6070 err = -ENOTSUPP;
6071 goto out_free;
6072 }
6073 cond_resched();
6074 }
6075 /* at this point all bpf functions were successfully JITed
6076 * now populate all bpf_calls with correct addresses and
6077 * run last pass of JIT
6078 */
f910cefa 6079 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
6080 insn = func[i]->insnsi;
6081 for (j = 0; j < func[i]->len; j++, insn++) {
6082 if (insn->code != (BPF_JMP | BPF_CALL) ||
6083 insn->src_reg != BPF_PSEUDO_CALL)
6084 continue;
6085 subprog = insn->off;
1c2a088a
AS
6086 insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
6087 func[subprog]->bpf_func -
6088 __bpf_call_base;
6089 }
2162fed4
SD
6090
6091 /* we use the aux data to keep a list of the start addresses
6092 * of the JITed images for each function in the program
6093 *
6094 * for some architectures, such as powerpc64, the imm field
6095 * might not be large enough to hold the offset of the start
6096 * address of the callee's JITed image from __bpf_call_base
6097 *
6098 * in such cases, we can lookup the start address of a callee
6099 * by using its subprog id, available from the off field of
6100 * the call instruction, as an index for this list
6101 */
6102 func[i]->aux->func = func;
6103 func[i]->aux->func_cnt = env->subprog_cnt;
1c2a088a 6104 }
f910cefa 6105 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
6106 old_bpf_func = func[i]->bpf_func;
6107 tmp = bpf_int_jit_compile(func[i]);
6108 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
6109 verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
c7a89784 6110 err = -ENOTSUPP;
1c2a088a
AS
6111 goto out_free;
6112 }
6113 cond_resched();
6114 }
6115
6116 /* finally lock prog and jit images for all functions and
6117 * populate kallsysm
6118 */
f910cefa 6119 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
6120 bpf_prog_lock_ro(func[i]);
6121 bpf_prog_kallsyms_add(func[i]);
6122 }
7105e828
DB
6123
6124 /* Last step: make now unused interpreter insns from main
6125 * prog consistent for later dump requests, so they can
6126 * later look the same as if they were interpreted only.
6127 */
6128 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
7105e828
DB
6129 if (insn->code != (BPF_JMP | BPF_CALL) ||
6130 insn->src_reg != BPF_PSEUDO_CALL)
6131 continue;
6132 insn->off = env->insn_aux_data[i].call_imm;
6133 subprog = find_subprog(env, i + insn->off + 1);
dbecd738 6134 insn->imm = subprog;
7105e828
DB
6135 }
6136
1c2a088a
AS
6137 prog->jited = 1;
6138 prog->bpf_func = func[0]->bpf_func;
6139 prog->aux->func = func;
f910cefa 6140 prog->aux->func_cnt = env->subprog_cnt;
1c2a088a
AS
6141 return 0;
6142out_free:
f910cefa 6143 for (i = 0; i < env->subprog_cnt; i++)
1c2a088a
AS
6144 if (func[i])
6145 bpf_jit_free(func[i]);
6146 kfree(func);
c7a89784 6147out_undo_insn:
1c2a088a
AS
6148 /* cleanup main prog to be interpreted */
6149 prog->jit_requested = 0;
6150 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
6151 if (insn->code != (BPF_JMP | BPF_CALL) ||
6152 insn->src_reg != BPF_PSEUDO_CALL)
6153 continue;
6154 insn->off = 0;
6155 insn->imm = env->insn_aux_data[i].call_imm;
6156 }
6157 return err;
6158}
6159
1ea47e01
AS
6160static int fixup_call_args(struct bpf_verifier_env *env)
6161{
19d28fbd 6162#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
6163 struct bpf_prog *prog = env->prog;
6164 struct bpf_insn *insn = prog->insnsi;
6165 int i, depth;
19d28fbd 6166#endif
e4052d06 6167 int err = 0;
1ea47e01 6168
e4052d06
QM
6169 if (env->prog->jit_requested &&
6170 !bpf_prog_is_dev_bound(env->prog->aux)) {
19d28fbd
DM
6171 err = jit_subprogs(env);
6172 if (err == 0)
1c2a088a 6173 return 0;
c7a89784
DB
6174 if (err == -EFAULT)
6175 return err;
19d28fbd
DM
6176 }
6177#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
6178 for (i = 0; i < prog->len; i++, insn++) {
6179 if (insn->code != (BPF_JMP | BPF_CALL) ||
6180 insn->src_reg != BPF_PSEUDO_CALL)
6181 continue;
6182 depth = get_callee_stack_depth(env, insn, i);
6183 if (depth < 0)
6184 return depth;
6185 bpf_patch_call_args(insn, depth);
6186 }
19d28fbd
DM
6187 err = 0;
6188#endif
6189 return err;
1ea47e01
AS
6190}
6191
79741b3b 6192/* fixup insn->imm field of bpf_call instructions
81ed18ab 6193 * and inline eligible helpers as explicit sequence of BPF instructions
e245c5c6
AS
6194 *
6195 * this function is called after eBPF program passed verification
6196 */
79741b3b 6197static int fixup_bpf_calls(struct bpf_verifier_env *env)
e245c5c6 6198{
79741b3b
AS
6199 struct bpf_prog *prog = env->prog;
6200 struct bpf_insn *insn = prog->insnsi;
e245c5c6 6201 const struct bpf_func_proto *fn;
79741b3b 6202 const int insn_cnt = prog->len;
09772d92 6203 const struct bpf_map_ops *ops;
c93552c4 6204 struct bpf_insn_aux_data *aux;
81ed18ab
AS
6205 struct bpf_insn insn_buf[16];
6206 struct bpf_prog *new_prog;
6207 struct bpf_map *map_ptr;
6208 int i, cnt, delta = 0;
e245c5c6 6209
79741b3b 6210 for (i = 0; i < insn_cnt; i++, insn++) {
f6b1b3bf
DB
6211 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
6212 insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
6213 insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
68fda450 6214 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
f6b1b3bf
DB
6215 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
6216 struct bpf_insn mask_and_div[] = {
6217 BPF_MOV32_REG(insn->src_reg, insn->src_reg),
6218 /* Rx div 0 -> 0 */
6219 BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
6220 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
6221 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
6222 *insn,
6223 };
6224 struct bpf_insn mask_and_mod[] = {
6225 BPF_MOV32_REG(insn->src_reg, insn->src_reg),
6226 /* Rx mod 0 -> Rx */
6227 BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
6228 *insn,
6229 };
6230 struct bpf_insn *patchlet;
6231
6232 if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
6233 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
6234 patchlet = mask_and_div + (is64 ? 1 : 0);
6235 cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
6236 } else {
6237 patchlet = mask_and_mod + (is64 ? 1 : 0);
6238 cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
6239 }
6240
6241 new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
68fda450
AS
6242 if (!new_prog)
6243 return -ENOMEM;
6244
6245 delta += cnt - 1;
6246 env->prog = prog = new_prog;
6247 insn = new_prog->insnsi + i + delta;
6248 continue;
6249 }
6250
e0cea7ce
DB
6251 if (BPF_CLASS(insn->code) == BPF_LD &&
6252 (BPF_MODE(insn->code) == BPF_ABS ||
6253 BPF_MODE(insn->code) == BPF_IND)) {
6254 cnt = env->ops->gen_ld_abs(insn, insn_buf);
6255 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
6256 verbose(env, "bpf verifier is misconfigured\n");
6257 return -EINVAL;
6258 }
6259
6260 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
6261 if (!new_prog)
6262 return -ENOMEM;
6263
6264 delta += cnt - 1;
6265 env->prog = prog = new_prog;
6266 insn = new_prog->insnsi + i + delta;
6267 continue;
6268 }
6269
79741b3b
AS
6270 if (insn->code != (BPF_JMP | BPF_CALL))
6271 continue;
cc8b0b92
AS
6272 if (insn->src_reg == BPF_PSEUDO_CALL)
6273 continue;
e245c5c6 6274
79741b3b
AS
6275 if (insn->imm == BPF_FUNC_get_route_realm)
6276 prog->dst_needed = 1;
6277 if (insn->imm == BPF_FUNC_get_prandom_u32)
6278 bpf_user_rnd_init_once();
9802d865
JB
6279 if (insn->imm == BPF_FUNC_override_return)
6280 prog->kprobe_override = 1;
79741b3b 6281 if (insn->imm == BPF_FUNC_tail_call) {
7b9f6da1
DM
6282 /* If we tail call into other programs, we
6283 * cannot make any assumptions since they can
6284 * be replaced dynamically during runtime in
6285 * the program array.
6286 */
6287 prog->cb_access = 1;
80a58d02 6288 env->prog->aux->stack_depth = MAX_BPF_STACK;
e647815a 6289 env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
7b9f6da1 6290
79741b3b
AS
6291 /* mark bpf_tail_call as different opcode to avoid
6292 * conditional branch in the interpeter for every normal
6293 * call and to prevent accidental JITing by JIT compiler
6294 * that doesn't support bpf_tail_call yet
e245c5c6 6295 */
79741b3b 6296 insn->imm = 0;
71189fa9 6297 insn->code = BPF_JMP | BPF_TAIL_CALL;
b2157399 6298
c93552c4
DB
6299 aux = &env->insn_aux_data[i + delta];
6300 if (!bpf_map_ptr_unpriv(aux))
6301 continue;
6302
b2157399
AS
6303 /* instead of changing every JIT dealing with tail_call
6304 * emit two extra insns:
6305 * if (index >= max_entries) goto out;
6306 * index &= array->index_mask;
6307 * to avoid out-of-bounds cpu speculation
6308 */
c93552c4 6309 if (bpf_map_ptr_poisoned(aux)) {
40950343 6310 verbose(env, "tail_call abusing map_ptr\n");
b2157399
AS
6311 return -EINVAL;
6312 }
c93552c4
DB
6313
6314 map_ptr = BPF_MAP_PTR(aux->map_state);
b2157399
AS
6315 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
6316 map_ptr->max_entries, 2);
6317 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
6318 container_of(map_ptr,
6319 struct bpf_array,
6320 map)->index_mask);
6321 insn_buf[2] = *insn;
6322 cnt = 3;
6323 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
6324 if (!new_prog)
6325 return -ENOMEM;
6326
6327 delta += cnt - 1;
6328 env->prog = prog = new_prog;
6329 insn = new_prog->insnsi + i + delta;
79741b3b
AS
6330 continue;
6331 }
e245c5c6 6332
89c63074 6333 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
09772d92
DB
6334 * and other inlining handlers are currently limited to 64 bit
6335 * only.
89c63074 6336 */
60b58afc 6337 if (prog->jit_requested && BITS_PER_LONG == 64 &&
09772d92
DB
6338 (insn->imm == BPF_FUNC_map_lookup_elem ||
6339 insn->imm == BPF_FUNC_map_update_elem ||
84430d42
DB
6340 insn->imm == BPF_FUNC_map_delete_elem ||
6341 insn->imm == BPF_FUNC_map_push_elem ||
6342 insn->imm == BPF_FUNC_map_pop_elem ||
6343 insn->imm == BPF_FUNC_map_peek_elem)) {
c93552c4
DB
6344 aux = &env->insn_aux_data[i + delta];
6345 if (bpf_map_ptr_poisoned(aux))
6346 goto patch_call_imm;
6347
6348 map_ptr = BPF_MAP_PTR(aux->map_state);
09772d92
DB
6349 ops = map_ptr->ops;
6350 if (insn->imm == BPF_FUNC_map_lookup_elem &&
6351 ops->map_gen_lookup) {
6352 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
6353 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
6354 verbose(env, "bpf verifier is misconfigured\n");
6355 return -EINVAL;
6356 }
81ed18ab 6357
09772d92
DB
6358 new_prog = bpf_patch_insn_data(env, i + delta,
6359 insn_buf, cnt);
6360 if (!new_prog)
6361 return -ENOMEM;
81ed18ab 6362
09772d92
DB
6363 delta += cnt - 1;
6364 env->prog = prog = new_prog;
6365 insn = new_prog->insnsi + i + delta;
6366 continue;
6367 }
81ed18ab 6368
09772d92
DB
6369 BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
6370 (void *(*)(struct bpf_map *map, void *key))NULL));
6371 BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
6372 (int (*)(struct bpf_map *map, void *key))NULL));
6373 BUILD_BUG_ON(!__same_type(ops->map_update_elem,
6374 (int (*)(struct bpf_map *map, void *key, void *value,
6375 u64 flags))NULL));
84430d42
DB
6376 BUILD_BUG_ON(!__same_type(ops->map_push_elem,
6377 (int (*)(struct bpf_map *map, void *value,
6378 u64 flags))NULL));
6379 BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
6380 (int (*)(struct bpf_map *map, void *value))NULL));
6381 BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
6382 (int (*)(struct bpf_map *map, void *value))NULL));
6383
09772d92
DB
6384 switch (insn->imm) {
6385 case BPF_FUNC_map_lookup_elem:
6386 insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
6387 __bpf_call_base;
6388 continue;
6389 case BPF_FUNC_map_update_elem:
6390 insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
6391 __bpf_call_base;
6392 continue;
6393 case BPF_FUNC_map_delete_elem:
6394 insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
6395 __bpf_call_base;
6396 continue;
84430d42
DB
6397 case BPF_FUNC_map_push_elem:
6398 insn->imm = BPF_CAST_CALL(ops->map_push_elem) -
6399 __bpf_call_base;
6400 continue;
6401 case BPF_FUNC_map_pop_elem:
6402 insn->imm = BPF_CAST_CALL(ops->map_pop_elem) -
6403 __bpf_call_base;
6404 continue;
6405 case BPF_FUNC_map_peek_elem:
6406 insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
6407 __bpf_call_base;
6408 continue;
09772d92 6409 }
81ed18ab 6410
09772d92 6411 goto patch_call_imm;
81ed18ab
AS
6412 }
6413
6414patch_call_imm:
5e43f899 6415 fn = env->ops->get_func_proto(insn->imm, env->prog);
79741b3b
AS
6416 /* all functions that have prototype and verifier allowed
6417 * programs to call them, must be real in-kernel functions
6418 */
6419 if (!fn->func) {
61bd5218
JK
6420 verbose(env,
6421 "kernel subsystem misconfigured func %s#%d\n",
79741b3b
AS
6422 func_id_name(insn->imm), insn->imm);
6423 return -EFAULT;
e245c5c6 6424 }
79741b3b 6425 insn->imm = fn->func - __bpf_call_base;
e245c5c6 6426 }
e245c5c6 6427
79741b3b
AS
6428 return 0;
6429}
e245c5c6 6430
58e2af8b 6431static void free_states(struct bpf_verifier_env *env)
f1bca824 6432{
58e2af8b 6433 struct bpf_verifier_state_list *sl, *sln;
f1bca824
AS
6434 int i;
6435
6436 if (!env->explored_states)
6437 return;
6438
6439 for (i = 0; i < env->prog->len; i++) {
6440 sl = env->explored_states[i];
6441
6442 if (sl)
6443 while (sl != STATE_LIST_MARK) {
6444 sln = sl->next;
1969db47 6445 free_verifier_state(&sl->state, false);
f1bca824
AS
6446 kfree(sl);
6447 sl = sln;
6448 }
6449 }
6450
6451 kfree(env->explored_states);
6452}
6453
838e9690
YS
6454int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
6455 union bpf_attr __user *uattr)
51580e79 6456{
58e2af8b 6457 struct bpf_verifier_env *env;
b9193c1b 6458 struct bpf_verifier_log *log;
51580e79
AS
6459 int ret = -EINVAL;
6460
eba0c929
AB
6461 /* no program is valid */
6462 if (ARRAY_SIZE(bpf_verifier_ops) == 0)
6463 return -EINVAL;
6464
58e2af8b 6465 /* 'struct bpf_verifier_env' can be global, but since it's not small,
cbd35700
AS
6466 * allocate/free it every time bpf_check() is called
6467 */
58e2af8b 6468 env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
cbd35700
AS
6469 if (!env)
6470 return -ENOMEM;
61bd5218 6471 log = &env->log;
cbd35700 6472
fad953ce
KC
6473 env->insn_aux_data =
6474 vzalloc(array_size(sizeof(struct bpf_insn_aux_data),
6475 (*prog)->len));
3df126f3
JK
6476 ret = -ENOMEM;
6477 if (!env->insn_aux_data)
6478 goto err_free_env;
9bac3d6d 6479 env->prog = *prog;
00176a34 6480 env->ops = bpf_verifier_ops[env->prog->type];
0246e64d 6481
cbd35700
AS
6482 /* grab the mutex to protect few globals used by verifier */
6483 mutex_lock(&bpf_verifier_lock);
6484
6485 if (attr->log_level || attr->log_buf || attr->log_size) {
6486 /* user requested verbose verifier output
6487 * and supplied buffer to store the verification trace
6488 */
e7bf8249
JK
6489 log->level = attr->log_level;
6490 log->ubuf = (char __user *) (unsigned long) attr->log_buf;
6491 log->len_total = attr->log_size;
cbd35700
AS
6492
6493 ret = -EINVAL;
e7bf8249
JK
6494 /* log attributes have to be sane */
6495 if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
6496 !log->level || !log->ubuf)
3df126f3 6497 goto err_unlock;
cbd35700 6498 }
1ad2f583
DB
6499
6500 env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
6501 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
e07b98d9 6502 env->strict_alignment = true;
e9ee9efc
DM
6503 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
6504 env->strict_alignment = false;
cbd35700 6505
f4e3ec0d
JK
6506 ret = replace_map_fd_with_map_ptr(env);
6507 if (ret < 0)
6508 goto skip_full_check;
6509
cae1927c 6510 if (bpf_prog_is_dev_bound(env->prog->aux)) {
a40a2632 6511 ret = bpf_prog_offload_verifier_prep(env->prog);
ab3f0063 6512 if (ret)
f4e3ec0d 6513 goto skip_full_check;
ab3f0063
JK
6514 }
6515
9bac3d6d 6516 env->explored_states = kcalloc(env->prog->len,
58e2af8b 6517 sizeof(struct bpf_verifier_state_list *),
f1bca824
AS
6518 GFP_USER);
6519 ret = -ENOMEM;
6520 if (!env->explored_states)
6521 goto skip_full_check;
6522
cc8b0b92
AS
6523 env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
6524
475fb78f
AS
6525 ret = check_cfg(env);
6526 if (ret < 0)
6527 goto skip_full_check;
6528
838e9690
YS
6529 ret = check_btf_func(env->prog, env, attr, uattr);
6530 if (ret < 0)
6531 goto skip_full_check;
6532
17a52670 6533 ret = do_check(env);
8c01c4f8
CG
6534 if (env->cur_state) {
6535 free_verifier_state(env->cur_state, true);
6536 env->cur_state = NULL;
6537 }
cbd35700 6538
c941ce9c
QM
6539 if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
6540 ret = bpf_prog_offload_finalize(env);
6541
0246e64d 6542skip_full_check:
638f5b90 6543 while (!pop_stack(env, NULL, NULL));
f1bca824 6544 free_states(env);
0246e64d 6545
c131187d
AS
6546 if (ret == 0)
6547 sanitize_dead_code(env);
6548
70a87ffe
AS
6549 if (ret == 0)
6550 ret = check_max_stack_depth(env);
6551
9bac3d6d
AS
6552 if (ret == 0)
6553 /* program is valid, convert *(u32*)(ctx + off) accesses */
6554 ret = convert_ctx_accesses(env);
6555
e245c5c6 6556 if (ret == 0)
79741b3b 6557 ret = fixup_bpf_calls(env);
e245c5c6 6558
1ea47e01
AS
6559 if (ret == 0)
6560 ret = fixup_call_args(env);
6561
a2a7d570 6562 if (log->level && bpf_verifier_log_full(log))
cbd35700 6563 ret = -ENOSPC;
a2a7d570 6564 if (log->level && !log->ubuf) {
cbd35700 6565 ret = -EFAULT;
a2a7d570 6566 goto err_release_maps;
cbd35700
AS
6567 }
6568
0246e64d
AS
6569 if (ret == 0 && env->used_map_cnt) {
6570 /* if program passed verifier, update used_maps in bpf_prog_info */
9bac3d6d
AS
6571 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
6572 sizeof(env->used_maps[0]),
6573 GFP_KERNEL);
0246e64d 6574
9bac3d6d 6575 if (!env->prog->aux->used_maps) {
0246e64d 6576 ret = -ENOMEM;
a2a7d570 6577 goto err_release_maps;
0246e64d
AS
6578 }
6579
9bac3d6d 6580 memcpy(env->prog->aux->used_maps, env->used_maps,
0246e64d 6581 sizeof(env->used_maps[0]) * env->used_map_cnt);
9bac3d6d 6582 env->prog->aux->used_map_cnt = env->used_map_cnt;
0246e64d
AS
6583
6584 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
6585 * bpf_ld_imm64 instructions
6586 */
6587 convert_pseudo_ld_imm64(env);
6588 }
cbd35700 6589
ba64e7d8
YS
6590 if (ret == 0)
6591 adjust_btf_func(env);
6592
a2a7d570 6593err_release_maps:
9bac3d6d 6594 if (!env->prog->aux->used_maps)
0246e64d 6595 /* if we didn't copy map pointers into bpf_prog_info, release
ab7f5bf0 6596 * them now. Otherwise free_used_maps() will release them.
0246e64d
AS
6597 */
6598 release_maps(env);
9bac3d6d 6599 *prog = env->prog;
3df126f3 6600err_unlock:
cbd35700 6601 mutex_unlock(&bpf_verifier_lock);
3df126f3
JK
6602 vfree(env->insn_aux_data);
6603err_free_env:
6604 kfree(env);
51580e79
AS
6605 return ret;
6606}