]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - kernel/bpf/verifier.c
tools: bpftool: Make libcap dependency optional
[mirror_ubuntu-hirsute-kernel.git] / kernel / bpf / verifier.c
CommitLineData
5b497af4 1// SPDX-License-Identifier: GPL-2.0-only
51580e79 2/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
969bf05e 3 * Copyright (c) 2016 Facebook
fd978bf7 4 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
51580e79 5 */
838e9690 6#include <uapi/linux/btf.h>
51580e79
AS
7#include <linux/kernel.h>
8#include <linux/types.h>
9#include <linux/slab.h>
10#include <linux/bpf.h>
838e9690 11#include <linux/btf.h>
58e2af8b 12#include <linux/bpf_verifier.h>
51580e79
AS
13#include <linux/filter.h>
14#include <net/netlink.h>
15#include <linux/file.h>
16#include <linux/vmalloc.h>
ebb676da 17#include <linux/stringify.h>
cc8b0b92
AS
18#include <linux/bsearch.h>
19#include <linux/sort.h>
c195651e 20#include <linux/perf_event.h>
d9762e84 21#include <linux/ctype.h>
6ba43b76 22#include <linux/error-injection.h>
9e4e01df 23#include <linux/bpf_lsm.h>
51580e79 24
f4ac7e0b
JK
25#include "disasm.h"
26
00176a34 27static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
91cc1a99 28#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
00176a34
JK
29 [_id] = & _name ## _verifier_ops,
30#define BPF_MAP_TYPE(_id, _ops)
f2e10bff 31#define BPF_LINK_TYPE(_id, _name)
00176a34
JK
32#include <linux/bpf_types.h>
33#undef BPF_PROG_TYPE
34#undef BPF_MAP_TYPE
f2e10bff 35#undef BPF_LINK_TYPE
00176a34
JK
36};
37
51580e79
AS
38/* bpf_check() is a static code analyzer that walks eBPF program
39 * instruction by instruction and updates register/stack state.
40 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
41 *
42 * The first pass is depth-first-search to check that the program is a DAG.
43 * It rejects the following programs:
44 * - larger than BPF_MAXINSNS insns
45 * - if loop is present (detected via back-edge)
46 * - unreachable insns exist (shouldn't be a forest. program = one function)
47 * - out of bounds or malformed jumps
48 * The second pass is all possible path descent from the 1st insn.
49 * Since it's analyzing all pathes through the program, the length of the
eba38a96 50 * analysis is limited to 64k insn, which may be hit even if total number of
51580e79
AS
51 * insn is less then 4K, but there are too many branches that change stack/regs.
52 * Number of 'branches to be analyzed' is limited to 1k
53 *
54 * On entry to each instruction, each register has a type, and the instruction
55 * changes the types of the registers depending on instruction semantics.
56 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
57 * copied to R1.
58 *
59 * All registers are 64-bit.
60 * R0 - return register
61 * R1-R5 argument passing registers
62 * R6-R9 callee saved registers
63 * R10 - frame pointer read-only
64 *
65 * At the start of BPF program the register R1 contains a pointer to bpf_context
66 * and has type PTR_TO_CTX.
67 *
68 * Verifier tracks arithmetic operations on pointers in case:
69 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
70 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
71 * 1st insn copies R10 (which has FRAME_PTR) type into R1
72 * and 2nd arithmetic instruction is pattern matched to recognize
73 * that it wants to construct a pointer to some element within stack.
74 * So after 2nd insn, the register R1 has type PTR_TO_STACK
75 * (and -20 constant is saved for further stack bounds checking).
76 * Meaning that this reg is a pointer to stack plus known immediate constant.
77 *
f1174f77 78 * Most of the time the registers have SCALAR_VALUE type, which
51580e79 79 * means the register has some value, but it's not a valid pointer.
f1174f77 80 * (like pointer plus pointer becomes SCALAR_VALUE type)
51580e79
AS
81 *
82 * When verifier sees load or store instructions the type of base register
c64b7983
JS
83 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
84 * four pointer types recognized by check_mem_access() function.
51580e79
AS
85 *
86 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
87 * and the range of [ptr, ptr + map's value_size) is accessible.
88 *
89 * registers used to pass values to function calls are checked against
90 * function argument constraints.
91 *
92 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
93 * It means that the register type passed to this function must be
94 * PTR_TO_STACK and it will be used inside the function as
95 * 'pointer to map element key'
96 *
97 * For example the argument constraints for bpf_map_lookup_elem():
98 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
99 * .arg1_type = ARG_CONST_MAP_PTR,
100 * .arg2_type = ARG_PTR_TO_MAP_KEY,
101 *
102 * ret_type says that this function returns 'pointer to map elem value or null'
103 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
104 * 2nd argument should be a pointer to stack, which will be used inside
105 * the helper function as a pointer to map element key.
106 *
107 * On the kernel side the helper function looks like:
108 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
109 * {
110 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
111 * void *key = (void *) (unsigned long) r2;
112 * void *value;
113 *
114 * here kernel can access 'key' and 'map' pointers safely, knowing that
115 * [key, key + map->key_size) bytes are valid and were initialized on
116 * the stack of eBPF program.
117 * }
118 *
119 * Corresponding eBPF program may look like:
120 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
121 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
122 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
123 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
124 * here verifier looks at prototype of map_lookup_elem() and sees:
125 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
126 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
127 *
128 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
129 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
130 * and were initialized prior to this call.
131 * If it's ok, then verifier allows this BPF_CALL insn and looks at
132 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
133 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
134 * returns ether pointer to map value or NULL.
135 *
136 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
137 * insn, the register holding that pointer in the true branch changes state to
138 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
139 * branch. See check_cond_jmp_op().
140 *
141 * After the call R0 is set to return type of the function and registers R1-R5
142 * are set to NOT_INIT to indicate that they are no longer readable.
fd978bf7
JS
143 *
144 * The following reference types represent a potential reference to a kernel
145 * resource which, after first being allocated, must be checked and freed by
146 * the BPF program:
147 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
148 *
149 * When the verifier sees a helper call return a reference type, it allocates a
150 * pointer id for the reference and stores it in the current function state.
151 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
152 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
153 * passes through a NULL-check conditional. For the branch wherein the state is
154 * changed to CONST_IMM, the verifier releases the reference.
6acc9b43
JS
155 *
156 * For each helper function that allocates a reference, such as
157 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
158 * bpf_sk_release(). When a reference type passes into the release function,
159 * the verifier also releases the reference. If any unchecked or unreleased
160 * reference remains at the end of the program, the verifier rejects it.
51580e79
AS
161 */
162
17a52670 163/* verifier_state + insn_idx are pushed to stack when branch is encountered */
58e2af8b 164struct bpf_verifier_stack_elem {
17a52670
AS
165 /* verifer state is 'st'
166 * before processing instruction 'insn_idx'
167 * and after processing instruction 'prev_insn_idx'
168 */
58e2af8b 169 struct bpf_verifier_state st;
17a52670
AS
170 int insn_idx;
171 int prev_insn_idx;
58e2af8b 172 struct bpf_verifier_stack_elem *next;
6f8a57cc
AN
173 /* length of verifier log at the time this state was pushed on stack */
174 u32 log_pos;
cbd35700
AS
175};
176
b285fcb7 177#define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192
ceefbc96 178#define BPF_COMPLEXITY_LIMIT_STATES 64
07016151 179
d2e4c1e6
DB
180#define BPF_MAP_KEY_POISON (1ULL << 63)
181#define BPF_MAP_KEY_SEEN (1ULL << 62)
182
c93552c4
DB
183#define BPF_MAP_PTR_UNPRIV 1UL
184#define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \
185 POISON_POINTER_DELTA))
186#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
187
188static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
189{
d2e4c1e6 190 return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
c93552c4
DB
191}
192
193static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
194{
d2e4c1e6 195 return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
c93552c4
DB
196}
197
198static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
199 const struct bpf_map *map, bool unpriv)
200{
201 BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
202 unpriv |= bpf_map_ptr_unpriv(aux);
d2e4c1e6
DB
203 aux->map_ptr_state = (unsigned long)map |
204 (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
205}
206
207static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
208{
209 return aux->map_key_state & BPF_MAP_KEY_POISON;
210}
211
212static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
213{
214 return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
215}
216
217static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
218{
219 return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
220}
221
222static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
223{
224 bool poisoned = bpf_map_key_poisoned(aux);
225
226 aux->map_key_state = state | BPF_MAP_KEY_SEEN |
227 (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
c93552c4 228}
fad73a1a 229
33ff9823
DB
230struct bpf_call_arg_meta {
231 struct bpf_map *map_ptr;
435faee1 232 bool raw_mode;
36bbef52 233 bool pkt_access;
435faee1
DB
234 int regno;
235 int access_size;
10060503 236 u64 msize_max_value;
1b986589 237 int ref_obj_id;
d83525ca 238 int func_id;
a7658e1a 239 u32 btf_id;
33ff9823
DB
240};
241
8580ac94
AS
242struct btf *btf_vmlinux;
243
cbd35700
AS
244static DEFINE_MUTEX(bpf_verifier_lock);
245
d9762e84
MKL
246static const struct bpf_line_info *
247find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
248{
249 const struct bpf_line_info *linfo;
250 const struct bpf_prog *prog;
251 u32 i, nr_linfo;
252
253 prog = env->prog;
254 nr_linfo = prog->aux->nr_linfo;
255
256 if (!nr_linfo || insn_off >= prog->len)
257 return NULL;
258
259 linfo = prog->aux->linfo;
260 for (i = 1; i < nr_linfo; i++)
261 if (insn_off < linfo[i].insn_off)
262 break;
263
264 return &linfo[i - 1];
265}
266
77d2e05a
MKL
267void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
268 va_list args)
cbd35700 269{
a2a7d570 270 unsigned int n;
cbd35700 271
a2a7d570 272 n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
a2a7d570
JK
273
274 WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
275 "verifier log line truncated - local buffer too short\n");
276
277 n = min(log->len_total - log->len_used - 1, n);
278 log->kbuf[n] = '\0';
279
8580ac94
AS
280 if (log->level == BPF_LOG_KERNEL) {
281 pr_err("BPF:%s\n", log->kbuf);
282 return;
283 }
a2a7d570
JK
284 if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
285 log->len_used += n;
286 else
287 log->ubuf = NULL;
cbd35700 288}
abe08840 289
6f8a57cc
AN
290static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
291{
292 char zero = 0;
293
294 if (!bpf_verifier_log_needed(log))
295 return;
296
297 log->len_used = new_pos;
298 if (put_user(zero, log->ubuf + new_pos))
299 log->ubuf = NULL;
300}
301
abe08840
JO
302/* log_level controls verbosity level of eBPF verifier.
303 * bpf_verifier_log_write() is used to dump the verification trace to the log,
304 * so the user can figure out what's wrong with the program
430e68d1 305 */
abe08840
JO
306__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
307 const char *fmt, ...)
308{
309 va_list args;
310
77d2e05a
MKL
311 if (!bpf_verifier_log_needed(&env->log))
312 return;
313
abe08840 314 va_start(args, fmt);
77d2e05a 315 bpf_verifier_vlog(&env->log, fmt, args);
abe08840
JO
316 va_end(args);
317}
318EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
319
320__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
321{
77d2e05a 322 struct bpf_verifier_env *env = private_data;
abe08840
JO
323 va_list args;
324
77d2e05a
MKL
325 if (!bpf_verifier_log_needed(&env->log))
326 return;
327
abe08840 328 va_start(args, fmt);
77d2e05a 329 bpf_verifier_vlog(&env->log, fmt, args);
abe08840
JO
330 va_end(args);
331}
cbd35700 332
9e15db66
AS
333__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
334 const char *fmt, ...)
335{
336 va_list args;
337
338 if (!bpf_verifier_log_needed(log))
339 return;
340
341 va_start(args, fmt);
342 bpf_verifier_vlog(log, fmt, args);
343 va_end(args);
344}
345
d9762e84
MKL
346static const char *ltrim(const char *s)
347{
348 while (isspace(*s))
349 s++;
350
351 return s;
352}
353
354__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
355 u32 insn_off,
356 const char *prefix_fmt, ...)
357{
358 const struct bpf_line_info *linfo;
359
360 if (!bpf_verifier_log_needed(&env->log))
361 return;
362
363 linfo = find_linfo(env, insn_off);
364 if (!linfo || linfo == env->prev_linfo)
365 return;
366
367 if (prefix_fmt) {
368 va_list args;
369
370 va_start(args, prefix_fmt);
371 bpf_verifier_vlog(&env->log, prefix_fmt, args);
372 va_end(args);
373 }
374
375 verbose(env, "%s\n",
376 ltrim(btf_name_by_offset(env->prog->aux->btf,
377 linfo->line_off)));
378
379 env->prev_linfo = linfo;
380}
381
de8f3a83
DB
382static bool type_is_pkt_pointer(enum bpf_reg_type type)
383{
384 return type == PTR_TO_PACKET ||
385 type == PTR_TO_PACKET_META;
386}
387
46f8bc92
MKL
388static bool type_is_sk_pointer(enum bpf_reg_type type)
389{
390 return type == PTR_TO_SOCKET ||
655a51e5 391 type == PTR_TO_SOCK_COMMON ||
fada7fdc
JL
392 type == PTR_TO_TCP_SOCK ||
393 type == PTR_TO_XDP_SOCK;
46f8bc92
MKL
394}
395
840b9615
JS
396static bool reg_type_may_be_null(enum bpf_reg_type type)
397{
fd978bf7 398 return type == PTR_TO_MAP_VALUE_OR_NULL ||
46f8bc92 399 type == PTR_TO_SOCKET_OR_NULL ||
655a51e5
MKL
400 type == PTR_TO_SOCK_COMMON_OR_NULL ||
401 type == PTR_TO_TCP_SOCK_OR_NULL;
fd978bf7
JS
402}
403
d83525ca
AS
404static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
405{
406 return reg->type == PTR_TO_MAP_VALUE &&
407 map_value_has_spin_lock(reg->map_ptr);
408}
409
cba368c1
MKL
410static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
411{
412 return type == PTR_TO_SOCKET ||
413 type == PTR_TO_SOCKET_OR_NULL ||
414 type == PTR_TO_TCP_SOCK ||
415 type == PTR_TO_TCP_SOCK_OR_NULL;
416}
417
1b986589 418static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
fd978bf7 419{
1b986589 420 return type == ARG_PTR_TO_SOCK_COMMON;
fd978bf7
JS
421}
422
423/* Determine whether the function releases some resources allocated by another
424 * function call. The first reference type argument will be assumed to be
425 * released by release_reference().
426 */
427static bool is_release_function(enum bpf_func_id func_id)
428{
6acc9b43 429 return func_id == BPF_FUNC_sk_release;
840b9615
JS
430}
431
46f8bc92
MKL
432static bool is_acquire_function(enum bpf_func_id func_id)
433{
434 return func_id == BPF_FUNC_sk_lookup_tcp ||
edbf8c01
LB
435 func_id == BPF_FUNC_sk_lookup_udp ||
436 func_id == BPF_FUNC_skc_lookup_tcp;
46f8bc92
MKL
437}
438
1b986589
MKL
439static bool is_ptr_cast_function(enum bpf_func_id func_id)
440{
441 return func_id == BPF_FUNC_tcp_sock ||
442 func_id == BPF_FUNC_sk_fullsock;
443}
444
17a52670
AS
445/* string representation of 'enum bpf_reg_type' */
446static const char * const reg_type_str[] = {
447 [NOT_INIT] = "?",
f1174f77 448 [SCALAR_VALUE] = "inv",
17a52670
AS
449 [PTR_TO_CTX] = "ctx",
450 [CONST_PTR_TO_MAP] = "map_ptr",
451 [PTR_TO_MAP_VALUE] = "map_value",
452 [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
17a52670 453 [PTR_TO_STACK] = "fp",
969bf05e 454 [PTR_TO_PACKET] = "pkt",
de8f3a83 455 [PTR_TO_PACKET_META] = "pkt_meta",
969bf05e 456 [PTR_TO_PACKET_END] = "pkt_end",
d58e468b 457 [PTR_TO_FLOW_KEYS] = "flow_keys",
c64b7983
JS
458 [PTR_TO_SOCKET] = "sock",
459 [PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
46f8bc92
MKL
460 [PTR_TO_SOCK_COMMON] = "sock_common",
461 [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
655a51e5
MKL
462 [PTR_TO_TCP_SOCK] = "tcp_sock",
463 [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
9df1c28b 464 [PTR_TO_TP_BUFFER] = "tp_buffer",
fada7fdc 465 [PTR_TO_XDP_SOCK] = "xdp_sock",
9e15db66 466 [PTR_TO_BTF_ID] = "ptr_",
17a52670
AS
467};
468
8efea21d
EC
469static char slot_type_char[] = {
470 [STACK_INVALID] = '?',
471 [STACK_SPILL] = 'r',
472 [STACK_MISC] = 'm',
473 [STACK_ZERO] = '0',
474};
475
4e92024a
AS
476static void print_liveness(struct bpf_verifier_env *env,
477 enum bpf_reg_liveness live)
478{
9242b5f5 479 if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
4e92024a
AS
480 verbose(env, "_");
481 if (live & REG_LIVE_READ)
482 verbose(env, "r");
483 if (live & REG_LIVE_WRITTEN)
484 verbose(env, "w");
9242b5f5
AS
485 if (live & REG_LIVE_DONE)
486 verbose(env, "D");
4e92024a
AS
487}
488
f4d7e40a
AS
489static struct bpf_func_state *func(struct bpf_verifier_env *env,
490 const struct bpf_reg_state *reg)
491{
492 struct bpf_verifier_state *cur = env->cur_state;
493
494 return cur->frame[reg->frameno];
495}
496
9e15db66
AS
497const char *kernel_type_name(u32 id)
498{
499 return btf_name_by_offset(btf_vmlinux,
500 btf_type_by_id(btf_vmlinux, id)->name_off);
501}
502
61bd5218 503static void print_verifier_state(struct bpf_verifier_env *env,
f4d7e40a 504 const struct bpf_func_state *state)
17a52670 505{
f4d7e40a 506 const struct bpf_reg_state *reg;
17a52670
AS
507 enum bpf_reg_type t;
508 int i;
509
f4d7e40a
AS
510 if (state->frameno)
511 verbose(env, " frame%d:", state->frameno);
17a52670 512 for (i = 0; i < MAX_BPF_REG; i++) {
1a0dc1ac
AS
513 reg = &state->regs[i];
514 t = reg->type;
17a52670
AS
515 if (t == NOT_INIT)
516 continue;
4e92024a
AS
517 verbose(env, " R%d", i);
518 print_liveness(env, reg->live);
519 verbose(env, "=%s", reg_type_str[t]);
b5dc0163
AS
520 if (t == SCALAR_VALUE && reg->precise)
521 verbose(env, "P");
f1174f77
EC
522 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
523 tnum_is_const(reg->var_off)) {
524 /* reg->off should be 0 for SCALAR_VALUE */
61bd5218 525 verbose(env, "%lld", reg->var_off.value + reg->off);
f1174f77 526 } else {
9e15db66
AS
527 if (t == PTR_TO_BTF_ID)
528 verbose(env, "%s", kernel_type_name(reg->btf_id));
cba368c1
MKL
529 verbose(env, "(id=%d", reg->id);
530 if (reg_type_may_be_refcounted_or_null(t))
531 verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
f1174f77 532 if (t != SCALAR_VALUE)
61bd5218 533 verbose(env, ",off=%d", reg->off);
de8f3a83 534 if (type_is_pkt_pointer(t))
61bd5218 535 verbose(env, ",r=%d", reg->range);
f1174f77
EC
536 else if (t == CONST_PTR_TO_MAP ||
537 t == PTR_TO_MAP_VALUE ||
538 t == PTR_TO_MAP_VALUE_OR_NULL)
61bd5218 539 verbose(env, ",ks=%d,vs=%d",
f1174f77
EC
540 reg->map_ptr->key_size,
541 reg->map_ptr->value_size);
7d1238f2
EC
542 if (tnum_is_const(reg->var_off)) {
543 /* Typically an immediate SCALAR_VALUE, but
544 * could be a pointer whose offset is too big
545 * for reg->off
546 */
61bd5218 547 verbose(env, ",imm=%llx", reg->var_off.value);
7d1238f2
EC
548 } else {
549 if (reg->smin_value != reg->umin_value &&
550 reg->smin_value != S64_MIN)
61bd5218 551 verbose(env, ",smin_value=%lld",
7d1238f2
EC
552 (long long)reg->smin_value);
553 if (reg->smax_value != reg->umax_value &&
554 reg->smax_value != S64_MAX)
61bd5218 555 verbose(env, ",smax_value=%lld",
7d1238f2
EC
556 (long long)reg->smax_value);
557 if (reg->umin_value != 0)
61bd5218 558 verbose(env, ",umin_value=%llu",
7d1238f2
EC
559 (unsigned long long)reg->umin_value);
560 if (reg->umax_value != U64_MAX)
61bd5218 561 verbose(env, ",umax_value=%llu",
7d1238f2
EC
562 (unsigned long long)reg->umax_value);
563 if (!tnum_is_unknown(reg->var_off)) {
564 char tn_buf[48];
f1174f77 565
7d1238f2 566 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 567 verbose(env, ",var_off=%s", tn_buf);
7d1238f2 568 }
3f50f132
JF
569 if (reg->s32_min_value != reg->smin_value &&
570 reg->s32_min_value != S32_MIN)
571 verbose(env, ",s32_min_value=%d",
572 (int)(reg->s32_min_value));
573 if (reg->s32_max_value != reg->smax_value &&
574 reg->s32_max_value != S32_MAX)
575 verbose(env, ",s32_max_value=%d",
576 (int)(reg->s32_max_value));
577 if (reg->u32_min_value != reg->umin_value &&
578 reg->u32_min_value != U32_MIN)
579 verbose(env, ",u32_min_value=%d",
580 (int)(reg->u32_min_value));
581 if (reg->u32_max_value != reg->umax_value &&
582 reg->u32_max_value != U32_MAX)
583 verbose(env, ",u32_max_value=%d",
584 (int)(reg->u32_max_value));
f1174f77 585 }
61bd5218 586 verbose(env, ")");
f1174f77 587 }
17a52670 588 }
638f5b90 589 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
8efea21d
EC
590 char types_buf[BPF_REG_SIZE + 1];
591 bool valid = false;
592 int j;
593
594 for (j = 0; j < BPF_REG_SIZE; j++) {
595 if (state->stack[i].slot_type[j] != STACK_INVALID)
596 valid = true;
597 types_buf[j] = slot_type_char[
598 state->stack[i].slot_type[j]];
599 }
600 types_buf[BPF_REG_SIZE] = 0;
601 if (!valid)
602 continue;
603 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
604 print_liveness(env, state->stack[i].spilled_ptr.live);
b5dc0163
AS
605 if (state->stack[i].slot_type[0] == STACK_SPILL) {
606 reg = &state->stack[i].spilled_ptr;
607 t = reg->type;
608 verbose(env, "=%s", reg_type_str[t]);
609 if (t == SCALAR_VALUE && reg->precise)
610 verbose(env, "P");
611 if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
612 verbose(env, "%lld", reg->var_off.value + reg->off);
613 } else {
8efea21d 614 verbose(env, "=%s", types_buf);
b5dc0163 615 }
17a52670 616 }
fd978bf7
JS
617 if (state->acquired_refs && state->refs[0].id) {
618 verbose(env, " refs=%d", state->refs[0].id);
619 for (i = 1; i < state->acquired_refs; i++)
620 if (state->refs[i].id)
621 verbose(env, ",%d", state->refs[i].id);
622 }
61bd5218 623 verbose(env, "\n");
17a52670
AS
624}
625
84dbf350
JS
626#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE) \
627static int copy_##NAME##_state(struct bpf_func_state *dst, \
628 const struct bpf_func_state *src) \
629{ \
630 if (!src->FIELD) \
631 return 0; \
632 if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) { \
633 /* internal bug, make state invalid to reject the program */ \
634 memset(dst, 0, sizeof(*dst)); \
635 return -EFAULT; \
636 } \
637 memcpy(dst->FIELD, src->FIELD, \
638 sizeof(*src->FIELD) * (src->COUNT / SIZE)); \
639 return 0; \
638f5b90 640}
fd978bf7
JS
641/* copy_reference_state() */
642COPY_STATE_FN(reference, acquired_refs, refs, 1)
84dbf350
JS
643/* copy_stack_state() */
644COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
645#undef COPY_STATE_FN
646
647#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE) \
648static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
649 bool copy_old) \
650{ \
651 u32 old_size = state->COUNT; \
652 struct bpf_##NAME##_state *new_##FIELD; \
653 int slot = size / SIZE; \
654 \
655 if (size <= old_size || !size) { \
656 if (copy_old) \
657 return 0; \
658 state->COUNT = slot * SIZE; \
659 if (!size && old_size) { \
660 kfree(state->FIELD); \
661 state->FIELD = NULL; \
662 } \
663 return 0; \
664 } \
665 new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
666 GFP_KERNEL); \
667 if (!new_##FIELD) \
668 return -ENOMEM; \
669 if (copy_old) { \
670 if (state->FIELD) \
671 memcpy(new_##FIELD, state->FIELD, \
672 sizeof(*new_##FIELD) * (old_size / SIZE)); \
673 memset(new_##FIELD + old_size / SIZE, 0, \
674 sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
675 } \
676 state->COUNT = slot * SIZE; \
677 kfree(state->FIELD); \
678 state->FIELD = new_##FIELD; \
679 return 0; \
680}
fd978bf7
JS
681/* realloc_reference_state() */
682REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
84dbf350
JS
683/* realloc_stack_state() */
684REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
685#undef REALLOC_STATE_FN
638f5b90
AS
686
687/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
688 * make it consume minimal amount of memory. check_stack_write() access from
f4d7e40a 689 * the program calls into realloc_func_state() to grow the stack size.
84dbf350
JS
690 * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
691 * which realloc_stack_state() copies over. It points to previous
692 * bpf_verifier_state which is never reallocated.
638f5b90 693 */
fd978bf7
JS
694static int realloc_func_state(struct bpf_func_state *state, int stack_size,
695 int refs_size, bool copy_old)
638f5b90 696{
fd978bf7
JS
697 int err = realloc_reference_state(state, refs_size, copy_old);
698 if (err)
699 return err;
700 return realloc_stack_state(state, stack_size, copy_old);
701}
702
703/* Acquire a pointer id from the env and update the state->refs to include
704 * this new pointer reference.
705 * On success, returns a valid pointer id to associate with the register
706 * On failure, returns a negative errno.
638f5b90 707 */
fd978bf7 708static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
638f5b90 709{
fd978bf7
JS
710 struct bpf_func_state *state = cur_func(env);
711 int new_ofs = state->acquired_refs;
712 int id, err;
713
714 err = realloc_reference_state(state, state->acquired_refs + 1, true);
715 if (err)
716 return err;
717 id = ++env->id_gen;
718 state->refs[new_ofs].id = id;
719 state->refs[new_ofs].insn_idx = insn_idx;
638f5b90 720
fd978bf7
JS
721 return id;
722}
723
724/* release function corresponding to acquire_reference_state(). Idempotent. */
46f8bc92 725static int release_reference_state(struct bpf_func_state *state, int ptr_id)
fd978bf7
JS
726{
727 int i, last_idx;
728
fd978bf7
JS
729 last_idx = state->acquired_refs - 1;
730 for (i = 0; i < state->acquired_refs; i++) {
731 if (state->refs[i].id == ptr_id) {
732 if (last_idx && i != last_idx)
733 memcpy(&state->refs[i], &state->refs[last_idx],
734 sizeof(*state->refs));
735 memset(&state->refs[last_idx], 0, sizeof(*state->refs));
736 state->acquired_refs--;
638f5b90 737 return 0;
638f5b90 738 }
638f5b90 739 }
46f8bc92 740 return -EINVAL;
fd978bf7
JS
741}
742
743static int transfer_reference_state(struct bpf_func_state *dst,
744 struct bpf_func_state *src)
745{
746 int err = realloc_reference_state(dst, src->acquired_refs, false);
747 if (err)
748 return err;
749 err = copy_reference_state(dst, src);
750 if (err)
751 return err;
638f5b90
AS
752 return 0;
753}
754
f4d7e40a
AS
755static void free_func_state(struct bpf_func_state *state)
756{
5896351e
AS
757 if (!state)
758 return;
fd978bf7 759 kfree(state->refs);
f4d7e40a
AS
760 kfree(state->stack);
761 kfree(state);
762}
763
b5dc0163
AS
764static void clear_jmp_history(struct bpf_verifier_state *state)
765{
766 kfree(state->jmp_history);
767 state->jmp_history = NULL;
768 state->jmp_history_cnt = 0;
769}
770
1969db47
AS
771static void free_verifier_state(struct bpf_verifier_state *state,
772 bool free_self)
638f5b90 773{
f4d7e40a
AS
774 int i;
775
776 for (i = 0; i <= state->curframe; i++) {
777 free_func_state(state->frame[i]);
778 state->frame[i] = NULL;
779 }
b5dc0163 780 clear_jmp_history(state);
1969db47
AS
781 if (free_self)
782 kfree(state);
638f5b90
AS
783}
784
785/* copy verifier state from src to dst growing dst stack space
786 * when necessary to accommodate larger src stack
787 */
f4d7e40a
AS
788static int copy_func_state(struct bpf_func_state *dst,
789 const struct bpf_func_state *src)
638f5b90
AS
790{
791 int err;
792
fd978bf7
JS
793 err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
794 false);
795 if (err)
796 return err;
797 memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
798 err = copy_reference_state(dst, src);
638f5b90
AS
799 if (err)
800 return err;
638f5b90
AS
801 return copy_stack_state(dst, src);
802}
803
f4d7e40a
AS
804static int copy_verifier_state(struct bpf_verifier_state *dst_state,
805 const struct bpf_verifier_state *src)
806{
807 struct bpf_func_state *dst;
b5dc0163 808 u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt;
f4d7e40a
AS
809 int i, err;
810
b5dc0163
AS
811 if (dst_state->jmp_history_cnt < src->jmp_history_cnt) {
812 kfree(dst_state->jmp_history);
813 dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER);
814 if (!dst_state->jmp_history)
815 return -ENOMEM;
816 }
817 memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz);
818 dst_state->jmp_history_cnt = src->jmp_history_cnt;
819
f4d7e40a
AS
820 /* if dst has more stack frames then src frame, free them */
821 for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
822 free_func_state(dst_state->frame[i]);
823 dst_state->frame[i] = NULL;
824 }
979d63d5 825 dst_state->speculative = src->speculative;
f4d7e40a 826 dst_state->curframe = src->curframe;
d83525ca 827 dst_state->active_spin_lock = src->active_spin_lock;
2589726d
AS
828 dst_state->branches = src->branches;
829 dst_state->parent = src->parent;
b5dc0163
AS
830 dst_state->first_insn_idx = src->first_insn_idx;
831 dst_state->last_insn_idx = src->last_insn_idx;
f4d7e40a
AS
832 for (i = 0; i <= src->curframe; i++) {
833 dst = dst_state->frame[i];
834 if (!dst) {
835 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
836 if (!dst)
837 return -ENOMEM;
838 dst_state->frame[i] = dst;
839 }
840 err = copy_func_state(dst, src->frame[i]);
841 if (err)
842 return err;
843 }
844 return 0;
845}
846
2589726d
AS
847static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
848{
849 while (st) {
850 u32 br = --st->branches;
851
852 /* WARN_ON(br > 1) technically makes sense here,
853 * but see comment in push_stack(), hence:
854 */
855 WARN_ONCE((int)br < 0,
856 "BUG update_branch_counts:branches_to_explore=%d\n",
857 br);
858 if (br)
859 break;
860 st = st->parent;
861 }
862}
863
638f5b90 864static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
6f8a57cc 865 int *insn_idx, bool pop_log)
638f5b90
AS
866{
867 struct bpf_verifier_state *cur = env->cur_state;
868 struct bpf_verifier_stack_elem *elem, *head = env->head;
869 int err;
17a52670
AS
870
871 if (env->head == NULL)
638f5b90 872 return -ENOENT;
17a52670 873
638f5b90
AS
874 if (cur) {
875 err = copy_verifier_state(cur, &head->st);
876 if (err)
877 return err;
878 }
6f8a57cc
AN
879 if (pop_log)
880 bpf_vlog_reset(&env->log, head->log_pos);
638f5b90
AS
881 if (insn_idx)
882 *insn_idx = head->insn_idx;
17a52670 883 if (prev_insn_idx)
638f5b90
AS
884 *prev_insn_idx = head->prev_insn_idx;
885 elem = head->next;
1969db47 886 free_verifier_state(&head->st, false);
638f5b90 887 kfree(head);
17a52670
AS
888 env->head = elem;
889 env->stack_size--;
638f5b90 890 return 0;
17a52670
AS
891}
892
58e2af8b 893static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
979d63d5
DB
894 int insn_idx, int prev_insn_idx,
895 bool speculative)
17a52670 896{
638f5b90 897 struct bpf_verifier_state *cur = env->cur_state;
58e2af8b 898 struct bpf_verifier_stack_elem *elem;
638f5b90 899 int err;
17a52670 900
638f5b90 901 elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
17a52670
AS
902 if (!elem)
903 goto err;
904
17a52670
AS
905 elem->insn_idx = insn_idx;
906 elem->prev_insn_idx = prev_insn_idx;
907 elem->next = env->head;
6f8a57cc 908 elem->log_pos = env->log.len_used;
17a52670
AS
909 env->head = elem;
910 env->stack_size++;
1969db47
AS
911 err = copy_verifier_state(&elem->st, cur);
912 if (err)
913 goto err;
979d63d5 914 elem->st.speculative |= speculative;
b285fcb7
AS
915 if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
916 verbose(env, "The sequence of %d jumps is too complex.\n",
917 env->stack_size);
17a52670
AS
918 goto err;
919 }
2589726d
AS
920 if (elem->st.parent) {
921 ++elem->st.parent->branches;
922 /* WARN_ON(branches > 2) technically makes sense here,
923 * but
924 * 1. speculative states will bump 'branches' for non-branch
925 * instructions
926 * 2. is_state_visited() heuristics may decide not to create
927 * a new state for a sequence of branches and all such current
928 * and cloned states will be pointing to a single parent state
929 * which might have large 'branches' count.
930 */
931 }
17a52670
AS
932 return &elem->st;
933err:
5896351e
AS
934 free_verifier_state(env->cur_state, true);
935 env->cur_state = NULL;
17a52670 936 /* pop all elements and return */
6f8a57cc 937 while (!pop_stack(env, NULL, NULL, false));
17a52670
AS
938 return NULL;
939}
940
941#define CALLER_SAVED_REGS 6
942static const int caller_saved[CALLER_SAVED_REGS] = {
943 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
944};
945
f54c7898
DB
946static void __mark_reg_not_init(const struct bpf_verifier_env *env,
947 struct bpf_reg_state *reg);
f1174f77 948
b03c9f9f
EC
949/* Mark the unknown part of a register (variable offset or scalar value) as
950 * known to have the value @imm.
951 */
952static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
953{
a9c676bc
AS
954 /* Clear id, off, and union(map_ptr, range) */
955 memset(((u8 *)reg) + sizeof(reg->type), 0,
956 offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
b03c9f9f
EC
957 reg->var_off = tnum_const(imm);
958 reg->smin_value = (s64)imm;
959 reg->smax_value = (s64)imm;
960 reg->umin_value = imm;
961 reg->umax_value = imm;
3f50f132
JF
962
963 reg->s32_min_value = (s32)imm;
964 reg->s32_max_value = (s32)imm;
965 reg->u32_min_value = (u32)imm;
966 reg->u32_max_value = (u32)imm;
967}
968
969static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
970{
971 reg->var_off = tnum_const_subreg(reg->var_off, imm);
972 reg->s32_min_value = (s32)imm;
973 reg->s32_max_value = (s32)imm;
974 reg->u32_min_value = (u32)imm;
975 reg->u32_max_value = (u32)imm;
b03c9f9f
EC
976}
977
f1174f77
EC
978/* Mark the 'variable offset' part of a register as zero. This should be
979 * used only on registers holding a pointer type.
980 */
981static void __mark_reg_known_zero(struct bpf_reg_state *reg)
a9789ef9 982{
b03c9f9f 983 __mark_reg_known(reg, 0);
f1174f77 984}
a9789ef9 985
cc2b14d5
AS
986static void __mark_reg_const_zero(struct bpf_reg_state *reg)
987{
988 __mark_reg_known(reg, 0);
cc2b14d5
AS
989 reg->type = SCALAR_VALUE;
990}
991
61bd5218
JK
992static void mark_reg_known_zero(struct bpf_verifier_env *env,
993 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
994{
995 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 996 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
f1174f77
EC
997 /* Something bad happened, let's kill all regs */
998 for (regno = 0; regno < MAX_BPF_REG; regno++)
f54c7898 999 __mark_reg_not_init(env, regs + regno);
f1174f77
EC
1000 return;
1001 }
1002 __mark_reg_known_zero(regs + regno);
1003}
1004
de8f3a83
DB
1005static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1006{
1007 return type_is_pkt_pointer(reg->type);
1008}
1009
1010static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1011{
1012 return reg_is_pkt_pointer(reg) ||
1013 reg->type == PTR_TO_PACKET_END;
1014}
1015
1016/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1017static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1018 enum bpf_reg_type which)
1019{
1020 /* The register can already have a range from prior markings.
1021 * This is fine as long as it hasn't been advanced from its
1022 * origin.
1023 */
1024 return reg->type == which &&
1025 reg->id == 0 &&
1026 reg->off == 0 &&
1027 tnum_equals_const(reg->var_off, 0);
1028}
1029
3f50f132
JF
1030/* Reset the min/max bounds of a register */
1031static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1032{
1033 reg->smin_value = S64_MIN;
1034 reg->smax_value = S64_MAX;
1035 reg->umin_value = 0;
1036 reg->umax_value = U64_MAX;
1037
1038 reg->s32_min_value = S32_MIN;
1039 reg->s32_max_value = S32_MAX;
1040 reg->u32_min_value = 0;
1041 reg->u32_max_value = U32_MAX;
1042}
1043
1044static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1045{
1046 reg->smin_value = S64_MIN;
1047 reg->smax_value = S64_MAX;
1048 reg->umin_value = 0;
1049 reg->umax_value = U64_MAX;
1050}
1051
1052static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1053{
1054 reg->s32_min_value = S32_MIN;
1055 reg->s32_max_value = S32_MAX;
1056 reg->u32_min_value = 0;
1057 reg->u32_max_value = U32_MAX;
1058}
1059
1060static void __update_reg32_bounds(struct bpf_reg_state *reg)
1061{
1062 struct tnum var32_off = tnum_subreg(reg->var_off);
1063
1064 /* min signed is max(sign bit) | min(other bits) */
1065 reg->s32_min_value = max_t(s32, reg->s32_min_value,
1066 var32_off.value | (var32_off.mask & S32_MIN));
1067 /* max signed is min(sign bit) | max(other bits) */
1068 reg->s32_max_value = min_t(s32, reg->s32_max_value,
1069 var32_off.value | (var32_off.mask & S32_MAX));
1070 reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1071 reg->u32_max_value = min(reg->u32_max_value,
1072 (u32)(var32_off.value | var32_off.mask));
1073}
1074
1075static void __update_reg64_bounds(struct bpf_reg_state *reg)
b03c9f9f
EC
1076{
1077 /* min signed is max(sign bit) | min(other bits) */
1078 reg->smin_value = max_t(s64, reg->smin_value,
1079 reg->var_off.value | (reg->var_off.mask & S64_MIN));
1080 /* max signed is min(sign bit) | max(other bits) */
1081 reg->smax_value = min_t(s64, reg->smax_value,
1082 reg->var_off.value | (reg->var_off.mask & S64_MAX));
1083 reg->umin_value = max(reg->umin_value, reg->var_off.value);
1084 reg->umax_value = min(reg->umax_value,
1085 reg->var_off.value | reg->var_off.mask);
1086}
1087
3f50f132
JF
1088static void __update_reg_bounds(struct bpf_reg_state *reg)
1089{
1090 __update_reg32_bounds(reg);
1091 __update_reg64_bounds(reg);
1092}
1093
b03c9f9f 1094/* Uses signed min/max values to inform unsigned, and vice-versa */
3f50f132
JF
1095static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
1096{
1097 /* Learn sign from signed bounds.
1098 * If we cannot cross the sign boundary, then signed and unsigned bounds
1099 * are the same, so combine. This works even in the negative case, e.g.
1100 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1101 */
1102 if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
1103 reg->s32_min_value = reg->u32_min_value =
1104 max_t(u32, reg->s32_min_value, reg->u32_min_value);
1105 reg->s32_max_value = reg->u32_max_value =
1106 min_t(u32, reg->s32_max_value, reg->u32_max_value);
1107 return;
1108 }
1109 /* Learn sign from unsigned bounds. Signed bounds cross the sign
1110 * boundary, so we must be careful.
1111 */
1112 if ((s32)reg->u32_max_value >= 0) {
1113 /* Positive. We can't learn anything from the smin, but smax
1114 * is positive, hence safe.
1115 */
1116 reg->s32_min_value = reg->u32_min_value;
1117 reg->s32_max_value = reg->u32_max_value =
1118 min_t(u32, reg->s32_max_value, reg->u32_max_value);
1119 } else if ((s32)reg->u32_min_value < 0) {
1120 /* Negative. We can't learn anything from the smax, but smin
1121 * is negative, hence safe.
1122 */
1123 reg->s32_min_value = reg->u32_min_value =
1124 max_t(u32, reg->s32_min_value, reg->u32_min_value);
1125 reg->s32_max_value = reg->u32_max_value;
1126 }
1127}
1128
1129static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
b03c9f9f
EC
1130{
1131 /* Learn sign from signed bounds.
1132 * If we cannot cross the sign boundary, then signed and unsigned bounds
1133 * are the same, so combine. This works even in the negative case, e.g.
1134 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1135 */
1136 if (reg->smin_value >= 0 || reg->smax_value < 0) {
1137 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1138 reg->umin_value);
1139 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1140 reg->umax_value);
1141 return;
1142 }
1143 /* Learn sign from unsigned bounds. Signed bounds cross the sign
1144 * boundary, so we must be careful.
1145 */
1146 if ((s64)reg->umax_value >= 0) {
1147 /* Positive. We can't learn anything from the smin, but smax
1148 * is positive, hence safe.
1149 */
1150 reg->smin_value = reg->umin_value;
1151 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1152 reg->umax_value);
1153 } else if ((s64)reg->umin_value < 0) {
1154 /* Negative. We can't learn anything from the smax, but smin
1155 * is negative, hence safe.
1156 */
1157 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1158 reg->umin_value);
1159 reg->smax_value = reg->umax_value;
1160 }
1161}
1162
3f50f132
JF
1163static void __reg_deduce_bounds(struct bpf_reg_state *reg)
1164{
1165 __reg32_deduce_bounds(reg);
1166 __reg64_deduce_bounds(reg);
1167}
1168
b03c9f9f
EC
1169/* Attempts to improve var_off based on unsigned min/max information */
1170static void __reg_bound_offset(struct bpf_reg_state *reg)
1171{
3f50f132
JF
1172 struct tnum var64_off = tnum_intersect(reg->var_off,
1173 tnum_range(reg->umin_value,
1174 reg->umax_value));
1175 struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off),
1176 tnum_range(reg->u32_min_value,
1177 reg->u32_max_value));
1178
1179 reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
b03c9f9f
EC
1180}
1181
3f50f132 1182static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
b03c9f9f 1183{
3f50f132
JF
1184 reg->umin_value = reg->u32_min_value;
1185 reg->umax_value = reg->u32_max_value;
1186 /* Attempt to pull 32-bit signed bounds into 64-bit bounds
1187 * but must be positive otherwise set to worse case bounds
1188 * and refine later from tnum.
1189 */
1190 if (reg->s32_min_value > 0)
1191 reg->smin_value = reg->s32_min_value;
1192 else
1193 reg->smin_value = 0;
1194 if (reg->s32_max_value > 0)
1195 reg->smax_value = reg->s32_max_value;
1196 else
1197 reg->smax_value = U32_MAX;
1198}
1199
1200static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
1201{
1202 /* special case when 64-bit register has upper 32-bit register
1203 * zeroed. Typically happens after zext or <<32, >>32 sequence
1204 * allowing us to use 32-bit bounds directly,
1205 */
1206 if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
1207 __reg_assign_32_into_64(reg);
1208 } else {
1209 /* Otherwise the best we can do is push lower 32bit known and
1210 * unknown bits into register (var_off set from jmp logic)
1211 * then learn as much as possible from the 64-bit tnum
1212 * known and unknown bits. The previous smin/smax bounds are
1213 * invalid here because of jmp32 compare so mark them unknown
1214 * so they do not impact tnum bounds calculation.
1215 */
1216 __mark_reg64_unbounded(reg);
1217 __update_reg_bounds(reg);
1218 }
1219
1220 /* Intersecting with the old var_off might have improved our bounds
1221 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1222 * then new var_off is (0; 0x7f...fc) which improves our umax.
1223 */
1224 __reg_deduce_bounds(reg);
1225 __reg_bound_offset(reg);
1226 __update_reg_bounds(reg);
1227}
1228
1229static bool __reg64_bound_s32(s64 a)
1230{
1231 if (a > S32_MIN && a < S32_MAX)
1232 return true;
1233 return false;
1234}
1235
1236static bool __reg64_bound_u32(u64 a)
1237{
1238 if (a > U32_MIN && a < U32_MAX)
1239 return true;
1240 return false;
1241}
1242
1243static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
1244{
1245 __mark_reg32_unbounded(reg);
1246
1247 if (__reg64_bound_s32(reg->smin_value))
1248 reg->s32_min_value = (s32)reg->smin_value;
1249 if (__reg64_bound_s32(reg->smax_value))
1250 reg->s32_max_value = (s32)reg->smax_value;
1251 if (__reg64_bound_u32(reg->umin_value))
1252 reg->u32_min_value = (u32)reg->umin_value;
1253 if (__reg64_bound_u32(reg->umax_value))
1254 reg->u32_max_value = (u32)reg->umax_value;
1255
1256 /* Intersecting with the old var_off might have improved our bounds
1257 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1258 * then new var_off is (0; 0x7f...fc) which improves our umax.
1259 */
1260 __reg_deduce_bounds(reg);
1261 __reg_bound_offset(reg);
1262 __update_reg_bounds(reg);
b03c9f9f
EC
1263}
1264
f1174f77 1265/* Mark a register as having a completely unknown (scalar) value. */
f54c7898
DB
1266static void __mark_reg_unknown(const struct bpf_verifier_env *env,
1267 struct bpf_reg_state *reg)
f1174f77 1268{
a9c676bc
AS
1269 /*
1270 * Clear type, id, off, and union(map_ptr, range) and
1271 * padding between 'type' and union
1272 */
1273 memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
f1174f77 1274 reg->type = SCALAR_VALUE;
f1174f77 1275 reg->var_off = tnum_unknown;
f4d7e40a 1276 reg->frameno = 0;
89f33dca 1277 reg->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks;
b03c9f9f 1278 __mark_reg_unbounded(reg);
f1174f77
EC
1279}
1280
61bd5218
JK
1281static void mark_reg_unknown(struct bpf_verifier_env *env,
1282 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
1283{
1284 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 1285 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
19ceb417
AS
1286 /* Something bad happened, let's kill all regs except FP */
1287 for (regno = 0; regno < BPF_REG_FP; regno++)
f54c7898 1288 __mark_reg_not_init(env, regs + regno);
f1174f77
EC
1289 return;
1290 }
f54c7898 1291 __mark_reg_unknown(env, regs + regno);
f1174f77
EC
1292}
1293
f54c7898
DB
1294static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1295 struct bpf_reg_state *reg)
f1174f77 1296{
f54c7898 1297 __mark_reg_unknown(env, reg);
f1174f77
EC
1298 reg->type = NOT_INIT;
1299}
1300
61bd5218
JK
1301static void mark_reg_not_init(struct bpf_verifier_env *env,
1302 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
1303{
1304 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 1305 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
19ceb417
AS
1306 /* Something bad happened, let's kill all regs except FP */
1307 for (regno = 0; regno < BPF_REG_FP; regno++)
f54c7898 1308 __mark_reg_not_init(env, regs + regno);
f1174f77
EC
1309 return;
1310 }
f54c7898 1311 __mark_reg_not_init(env, regs + regno);
a9789ef9
DB
1312}
1313
5327ed3d 1314#define DEF_NOT_SUBREG (0)
61bd5218 1315static void init_reg_state(struct bpf_verifier_env *env,
f4d7e40a 1316 struct bpf_func_state *state)
17a52670 1317{
f4d7e40a 1318 struct bpf_reg_state *regs = state->regs;
17a52670
AS
1319 int i;
1320
dc503a8a 1321 for (i = 0; i < MAX_BPF_REG; i++) {
61bd5218 1322 mark_reg_not_init(env, regs, i);
dc503a8a 1323 regs[i].live = REG_LIVE_NONE;
679c782d 1324 regs[i].parent = NULL;
5327ed3d 1325 regs[i].subreg_def = DEF_NOT_SUBREG;
dc503a8a 1326 }
17a52670
AS
1327
1328 /* frame pointer */
f1174f77 1329 regs[BPF_REG_FP].type = PTR_TO_STACK;
61bd5218 1330 mark_reg_known_zero(env, regs, BPF_REG_FP);
f4d7e40a 1331 regs[BPF_REG_FP].frameno = state->frameno;
6760bf2d
DB
1332}
1333
f4d7e40a
AS
1334#define BPF_MAIN_FUNC (-1)
1335static void init_func_state(struct bpf_verifier_env *env,
1336 struct bpf_func_state *state,
1337 int callsite, int frameno, int subprogno)
1338{
1339 state->callsite = callsite;
1340 state->frameno = frameno;
1341 state->subprogno = subprogno;
1342 init_reg_state(env, state);
1343}
1344
17a52670
AS
1345enum reg_arg_type {
1346 SRC_OP, /* register is used as source operand */
1347 DST_OP, /* register is used as destination operand */
1348 DST_OP_NO_MARK /* same as above, check only, don't mark */
1349};
1350
cc8b0b92
AS
1351static int cmp_subprogs(const void *a, const void *b)
1352{
9c8105bd
JW
1353 return ((struct bpf_subprog_info *)a)->start -
1354 ((struct bpf_subprog_info *)b)->start;
cc8b0b92
AS
1355}
1356
1357static int find_subprog(struct bpf_verifier_env *env, int off)
1358{
9c8105bd 1359 struct bpf_subprog_info *p;
cc8b0b92 1360
9c8105bd
JW
1361 p = bsearch(&off, env->subprog_info, env->subprog_cnt,
1362 sizeof(env->subprog_info[0]), cmp_subprogs);
cc8b0b92
AS
1363 if (!p)
1364 return -ENOENT;
9c8105bd 1365 return p - env->subprog_info;
cc8b0b92
AS
1366
1367}
1368
1369static int add_subprog(struct bpf_verifier_env *env, int off)
1370{
1371 int insn_cnt = env->prog->len;
1372 int ret;
1373
1374 if (off >= insn_cnt || off < 0) {
1375 verbose(env, "call to invalid destination\n");
1376 return -EINVAL;
1377 }
1378 ret = find_subprog(env, off);
1379 if (ret >= 0)
1380 return 0;
4cb3d99c 1381 if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
cc8b0b92
AS
1382 verbose(env, "too many subprograms\n");
1383 return -E2BIG;
1384 }
9c8105bd
JW
1385 env->subprog_info[env->subprog_cnt++].start = off;
1386 sort(env->subprog_info, env->subprog_cnt,
1387 sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
cc8b0b92
AS
1388 return 0;
1389}
1390
1391static int check_subprogs(struct bpf_verifier_env *env)
1392{
1393 int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
9c8105bd 1394 struct bpf_subprog_info *subprog = env->subprog_info;
cc8b0b92
AS
1395 struct bpf_insn *insn = env->prog->insnsi;
1396 int insn_cnt = env->prog->len;
1397
f910cefa
JW
1398 /* Add entry function. */
1399 ret = add_subprog(env, 0);
1400 if (ret < 0)
1401 return ret;
1402
cc8b0b92
AS
1403 /* determine subprog starts. The end is one before the next starts */
1404 for (i = 0; i < insn_cnt; i++) {
1405 if (insn[i].code != (BPF_JMP | BPF_CALL))
1406 continue;
1407 if (insn[i].src_reg != BPF_PSEUDO_CALL)
1408 continue;
1409 if (!env->allow_ptr_leaks) {
1410 verbose(env, "function calls to other bpf functions are allowed for root only\n");
1411 return -EPERM;
1412 }
cc8b0b92
AS
1413 ret = add_subprog(env, i + insn[i].imm + 1);
1414 if (ret < 0)
1415 return ret;
1416 }
1417
4cb3d99c
JW
1418 /* Add a fake 'exit' subprog which could simplify subprog iteration
1419 * logic. 'subprog_cnt' should not be increased.
1420 */
1421 subprog[env->subprog_cnt].start = insn_cnt;
1422
06ee7115 1423 if (env->log.level & BPF_LOG_LEVEL2)
cc8b0b92 1424 for (i = 0; i < env->subprog_cnt; i++)
9c8105bd 1425 verbose(env, "func#%d @%d\n", i, subprog[i].start);
cc8b0b92
AS
1426
1427 /* now check that all jumps are within the same subprog */
4cb3d99c
JW
1428 subprog_start = subprog[cur_subprog].start;
1429 subprog_end = subprog[cur_subprog + 1].start;
cc8b0b92
AS
1430 for (i = 0; i < insn_cnt; i++) {
1431 u8 code = insn[i].code;
1432
092ed096 1433 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
cc8b0b92
AS
1434 goto next;
1435 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
1436 goto next;
1437 off = i + insn[i].off + 1;
1438 if (off < subprog_start || off >= subprog_end) {
1439 verbose(env, "jump out of range from insn %d to %d\n", i, off);
1440 return -EINVAL;
1441 }
1442next:
1443 if (i == subprog_end - 1) {
1444 /* to avoid fall-through from one subprog into another
1445 * the last insn of the subprog should be either exit
1446 * or unconditional jump back
1447 */
1448 if (code != (BPF_JMP | BPF_EXIT) &&
1449 code != (BPF_JMP | BPF_JA)) {
1450 verbose(env, "last insn is not an exit or jmp\n");
1451 return -EINVAL;
1452 }
1453 subprog_start = subprog_end;
4cb3d99c
JW
1454 cur_subprog++;
1455 if (cur_subprog < env->subprog_cnt)
9c8105bd 1456 subprog_end = subprog[cur_subprog + 1].start;
cc8b0b92
AS
1457 }
1458 }
1459 return 0;
1460}
1461
679c782d
EC
1462/* Parentage chain of this register (or stack slot) should take care of all
1463 * issues like callee-saved registers, stack slot allocation time, etc.
1464 */
f4d7e40a 1465static int mark_reg_read(struct bpf_verifier_env *env,
679c782d 1466 const struct bpf_reg_state *state,
5327ed3d 1467 struct bpf_reg_state *parent, u8 flag)
f4d7e40a
AS
1468{
1469 bool writes = parent == state->parent; /* Observe write marks */
06ee7115 1470 int cnt = 0;
dc503a8a
EC
1471
1472 while (parent) {
1473 /* if read wasn't screened by an earlier write ... */
679c782d 1474 if (writes && state->live & REG_LIVE_WRITTEN)
dc503a8a 1475 break;
9242b5f5
AS
1476 if (parent->live & REG_LIVE_DONE) {
1477 verbose(env, "verifier BUG type %s var_off %lld off %d\n",
1478 reg_type_str[parent->type],
1479 parent->var_off.value, parent->off);
1480 return -EFAULT;
1481 }
5327ed3d
JW
1482 /* The first condition is more likely to be true than the
1483 * second, checked it first.
1484 */
1485 if ((parent->live & REG_LIVE_READ) == flag ||
1486 parent->live & REG_LIVE_READ64)
25af32da
AS
1487 /* The parentage chain never changes and
1488 * this parent was already marked as LIVE_READ.
1489 * There is no need to keep walking the chain again and
1490 * keep re-marking all parents as LIVE_READ.
1491 * This case happens when the same register is read
1492 * multiple times without writes into it in-between.
5327ed3d
JW
1493 * Also, if parent has the stronger REG_LIVE_READ64 set,
1494 * then no need to set the weak REG_LIVE_READ32.
25af32da
AS
1495 */
1496 break;
dc503a8a 1497 /* ... then we depend on parent's value */
5327ed3d
JW
1498 parent->live |= flag;
1499 /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
1500 if (flag == REG_LIVE_READ64)
1501 parent->live &= ~REG_LIVE_READ32;
dc503a8a
EC
1502 state = parent;
1503 parent = state->parent;
f4d7e40a 1504 writes = true;
06ee7115 1505 cnt++;
dc503a8a 1506 }
06ee7115
AS
1507
1508 if (env->longest_mark_read_walk < cnt)
1509 env->longest_mark_read_walk = cnt;
f4d7e40a 1510 return 0;
dc503a8a
EC
1511}
1512
5327ed3d
JW
1513/* This function is supposed to be used by the following 32-bit optimization
1514 * code only. It returns TRUE if the source or destination register operates
1515 * on 64-bit, otherwise return FALSE.
1516 */
1517static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
1518 u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
1519{
1520 u8 code, class, op;
1521
1522 code = insn->code;
1523 class = BPF_CLASS(code);
1524 op = BPF_OP(code);
1525 if (class == BPF_JMP) {
1526 /* BPF_EXIT for "main" will reach here. Return TRUE
1527 * conservatively.
1528 */
1529 if (op == BPF_EXIT)
1530 return true;
1531 if (op == BPF_CALL) {
1532 /* BPF to BPF call will reach here because of marking
1533 * caller saved clobber with DST_OP_NO_MARK for which we
1534 * don't care the register def because they are anyway
1535 * marked as NOT_INIT already.
1536 */
1537 if (insn->src_reg == BPF_PSEUDO_CALL)
1538 return false;
1539 /* Helper call will reach here because of arg type
1540 * check, conservatively return TRUE.
1541 */
1542 if (t == SRC_OP)
1543 return true;
1544
1545 return false;
1546 }
1547 }
1548
1549 if (class == BPF_ALU64 || class == BPF_JMP ||
1550 /* BPF_END always use BPF_ALU class. */
1551 (class == BPF_ALU && op == BPF_END && insn->imm == 64))
1552 return true;
1553
1554 if (class == BPF_ALU || class == BPF_JMP32)
1555 return false;
1556
1557 if (class == BPF_LDX) {
1558 if (t != SRC_OP)
1559 return BPF_SIZE(code) == BPF_DW;
1560 /* LDX source must be ptr. */
1561 return true;
1562 }
1563
1564 if (class == BPF_STX) {
1565 if (reg->type != SCALAR_VALUE)
1566 return true;
1567 return BPF_SIZE(code) == BPF_DW;
1568 }
1569
1570 if (class == BPF_LD) {
1571 u8 mode = BPF_MODE(code);
1572
1573 /* LD_IMM64 */
1574 if (mode == BPF_IMM)
1575 return true;
1576
1577 /* Both LD_IND and LD_ABS return 32-bit data. */
1578 if (t != SRC_OP)
1579 return false;
1580
1581 /* Implicit ctx ptr. */
1582 if (regno == BPF_REG_6)
1583 return true;
1584
1585 /* Explicit source could be any width. */
1586 return true;
1587 }
1588
1589 if (class == BPF_ST)
1590 /* The only source register for BPF_ST is a ptr. */
1591 return true;
1592
1593 /* Conservatively return true at default. */
1594 return true;
1595}
1596
b325fbca
JW
1597/* Return TRUE if INSN doesn't have explicit value define. */
1598static bool insn_no_def(struct bpf_insn *insn)
1599{
1600 u8 class = BPF_CLASS(insn->code);
1601
1602 return (class == BPF_JMP || class == BPF_JMP32 ||
1603 class == BPF_STX || class == BPF_ST);
1604}
1605
1606/* Return TRUE if INSN has defined any 32-bit value explicitly. */
1607static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
1608{
1609 if (insn_no_def(insn))
1610 return false;
1611
1612 return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP);
1613}
1614
5327ed3d
JW
1615static void mark_insn_zext(struct bpf_verifier_env *env,
1616 struct bpf_reg_state *reg)
1617{
1618 s32 def_idx = reg->subreg_def;
1619
1620 if (def_idx == DEF_NOT_SUBREG)
1621 return;
1622
1623 env->insn_aux_data[def_idx - 1].zext_dst = true;
1624 /* The dst will be zero extended, so won't be sub-register anymore. */
1625 reg->subreg_def = DEF_NOT_SUBREG;
1626}
1627
dc503a8a 1628static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
17a52670
AS
1629 enum reg_arg_type t)
1630{
f4d7e40a
AS
1631 struct bpf_verifier_state *vstate = env->cur_state;
1632 struct bpf_func_state *state = vstate->frame[vstate->curframe];
5327ed3d 1633 struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
c342dc10 1634 struct bpf_reg_state *reg, *regs = state->regs;
5327ed3d 1635 bool rw64;
dc503a8a 1636
17a52670 1637 if (regno >= MAX_BPF_REG) {
61bd5218 1638 verbose(env, "R%d is invalid\n", regno);
17a52670
AS
1639 return -EINVAL;
1640 }
1641
c342dc10 1642 reg = &regs[regno];
5327ed3d 1643 rw64 = is_reg64(env, insn, regno, reg, t);
17a52670
AS
1644 if (t == SRC_OP) {
1645 /* check whether register used as source operand can be read */
c342dc10 1646 if (reg->type == NOT_INIT) {
61bd5218 1647 verbose(env, "R%d !read_ok\n", regno);
17a52670
AS
1648 return -EACCES;
1649 }
679c782d 1650 /* We don't need to worry about FP liveness because it's read-only */
c342dc10
JW
1651 if (regno == BPF_REG_FP)
1652 return 0;
1653
5327ed3d
JW
1654 if (rw64)
1655 mark_insn_zext(env, reg);
1656
1657 return mark_reg_read(env, reg, reg->parent,
1658 rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
17a52670
AS
1659 } else {
1660 /* check whether register used as dest operand can be written to */
1661 if (regno == BPF_REG_FP) {
61bd5218 1662 verbose(env, "frame pointer is read only\n");
17a52670
AS
1663 return -EACCES;
1664 }
c342dc10 1665 reg->live |= REG_LIVE_WRITTEN;
5327ed3d 1666 reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
17a52670 1667 if (t == DST_OP)
61bd5218 1668 mark_reg_unknown(env, regs, regno);
17a52670
AS
1669 }
1670 return 0;
1671}
1672
b5dc0163
AS
1673/* for any branch, call, exit record the history of jmps in the given state */
1674static int push_jmp_history(struct bpf_verifier_env *env,
1675 struct bpf_verifier_state *cur)
1676{
1677 u32 cnt = cur->jmp_history_cnt;
1678 struct bpf_idx_pair *p;
1679
1680 cnt++;
1681 p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
1682 if (!p)
1683 return -ENOMEM;
1684 p[cnt - 1].idx = env->insn_idx;
1685 p[cnt - 1].prev_idx = env->prev_insn_idx;
1686 cur->jmp_history = p;
1687 cur->jmp_history_cnt = cnt;
1688 return 0;
1689}
1690
1691/* Backtrack one insn at a time. If idx is not at the top of recorded
1692 * history then previous instruction came from straight line execution.
1693 */
1694static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
1695 u32 *history)
1696{
1697 u32 cnt = *history;
1698
1699 if (cnt && st->jmp_history[cnt - 1].idx == i) {
1700 i = st->jmp_history[cnt - 1].prev_idx;
1701 (*history)--;
1702 } else {
1703 i--;
1704 }
1705 return i;
1706}
1707
1708/* For given verifier state backtrack_insn() is called from the last insn to
1709 * the first insn. Its purpose is to compute a bitmask of registers and
1710 * stack slots that needs precision in the parent verifier state.
1711 */
1712static int backtrack_insn(struct bpf_verifier_env *env, int idx,
1713 u32 *reg_mask, u64 *stack_mask)
1714{
1715 const struct bpf_insn_cbs cbs = {
1716 .cb_print = verbose,
1717 .private_data = env,
1718 };
1719 struct bpf_insn *insn = env->prog->insnsi + idx;
1720 u8 class = BPF_CLASS(insn->code);
1721 u8 opcode = BPF_OP(insn->code);
1722 u8 mode = BPF_MODE(insn->code);
1723 u32 dreg = 1u << insn->dst_reg;
1724 u32 sreg = 1u << insn->src_reg;
1725 u32 spi;
1726
1727 if (insn->code == 0)
1728 return 0;
1729 if (env->log.level & BPF_LOG_LEVEL) {
1730 verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
1731 verbose(env, "%d: ", idx);
1732 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
1733 }
1734
1735 if (class == BPF_ALU || class == BPF_ALU64) {
1736 if (!(*reg_mask & dreg))
1737 return 0;
1738 if (opcode == BPF_MOV) {
1739 if (BPF_SRC(insn->code) == BPF_X) {
1740 /* dreg = sreg
1741 * dreg needs precision after this insn
1742 * sreg needs precision before this insn
1743 */
1744 *reg_mask &= ~dreg;
1745 *reg_mask |= sreg;
1746 } else {
1747 /* dreg = K
1748 * dreg needs precision after this insn.
1749 * Corresponding register is already marked
1750 * as precise=true in this verifier state.
1751 * No further markings in parent are necessary
1752 */
1753 *reg_mask &= ~dreg;
1754 }
1755 } else {
1756 if (BPF_SRC(insn->code) == BPF_X) {
1757 /* dreg += sreg
1758 * both dreg and sreg need precision
1759 * before this insn
1760 */
1761 *reg_mask |= sreg;
1762 } /* else dreg += K
1763 * dreg still needs precision before this insn
1764 */
1765 }
1766 } else if (class == BPF_LDX) {
1767 if (!(*reg_mask & dreg))
1768 return 0;
1769 *reg_mask &= ~dreg;
1770
1771 /* scalars can only be spilled into stack w/o losing precision.
1772 * Load from any other memory can be zero extended.
1773 * The desire to keep that precision is already indicated
1774 * by 'precise' mark in corresponding register of this state.
1775 * No further tracking necessary.
1776 */
1777 if (insn->src_reg != BPF_REG_FP)
1778 return 0;
1779 if (BPF_SIZE(insn->code) != BPF_DW)
1780 return 0;
1781
1782 /* dreg = *(u64 *)[fp - off] was a fill from the stack.
1783 * that [fp - off] slot contains scalar that needs to be
1784 * tracked with precision
1785 */
1786 spi = (-insn->off - 1) / BPF_REG_SIZE;
1787 if (spi >= 64) {
1788 verbose(env, "BUG spi %d\n", spi);
1789 WARN_ONCE(1, "verifier backtracking bug");
1790 return -EFAULT;
1791 }
1792 *stack_mask |= 1ull << spi;
b3b50f05 1793 } else if (class == BPF_STX || class == BPF_ST) {
b5dc0163 1794 if (*reg_mask & dreg)
b3b50f05 1795 /* stx & st shouldn't be using _scalar_ dst_reg
b5dc0163
AS
1796 * to access memory. It means backtracking
1797 * encountered a case of pointer subtraction.
1798 */
1799 return -ENOTSUPP;
1800 /* scalars can only be spilled into stack */
1801 if (insn->dst_reg != BPF_REG_FP)
1802 return 0;
1803 if (BPF_SIZE(insn->code) != BPF_DW)
1804 return 0;
1805 spi = (-insn->off - 1) / BPF_REG_SIZE;
1806 if (spi >= 64) {
1807 verbose(env, "BUG spi %d\n", spi);
1808 WARN_ONCE(1, "verifier backtracking bug");
1809 return -EFAULT;
1810 }
1811 if (!(*stack_mask & (1ull << spi)))
1812 return 0;
1813 *stack_mask &= ~(1ull << spi);
b3b50f05
AN
1814 if (class == BPF_STX)
1815 *reg_mask |= sreg;
b5dc0163
AS
1816 } else if (class == BPF_JMP || class == BPF_JMP32) {
1817 if (opcode == BPF_CALL) {
1818 if (insn->src_reg == BPF_PSEUDO_CALL)
1819 return -ENOTSUPP;
1820 /* regular helper call sets R0 */
1821 *reg_mask &= ~1;
1822 if (*reg_mask & 0x3f) {
1823 /* if backtracing was looking for registers R1-R5
1824 * they should have been found already.
1825 */
1826 verbose(env, "BUG regs %x\n", *reg_mask);
1827 WARN_ONCE(1, "verifier backtracking bug");
1828 return -EFAULT;
1829 }
1830 } else if (opcode == BPF_EXIT) {
1831 return -ENOTSUPP;
1832 }
1833 } else if (class == BPF_LD) {
1834 if (!(*reg_mask & dreg))
1835 return 0;
1836 *reg_mask &= ~dreg;
1837 /* It's ld_imm64 or ld_abs or ld_ind.
1838 * For ld_imm64 no further tracking of precision
1839 * into parent is necessary
1840 */
1841 if (mode == BPF_IND || mode == BPF_ABS)
1842 /* to be analyzed */
1843 return -ENOTSUPP;
b5dc0163
AS
1844 }
1845 return 0;
1846}
1847
1848/* the scalar precision tracking algorithm:
1849 * . at the start all registers have precise=false.
1850 * . scalar ranges are tracked as normal through alu and jmp insns.
1851 * . once precise value of the scalar register is used in:
1852 * . ptr + scalar alu
1853 * . if (scalar cond K|scalar)
1854 * . helper_call(.., scalar, ...) where ARG_CONST is expected
1855 * backtrack through the verifier states and mark all registers and
1856 * stack slots with spilled constants that these scalar regisers
1857 * should be precise.
1858 * . during state pruning two registers (or spilled stack slots)
1859 * are equivalent if both are not precise.
1860 *
1861 * Note the verifier cannot simply walk register parentage chain,
1862 * since many different registers and stack slots could have been
1863 * used to compute single precise scalar.
1864 *
1865 * The approach of starting with precise=true for all registers and then
1866 * backtrack to mark a register as not precise when the verifier detects
1867 * that program doesn't care about specific value (e.g., when helper
1868 * takes register as ARG_ANYTHING parameter) is not safe.
1869 *
1870 * It's ok to walk single parentage chain of the verifier states.
1871 * It's possible that this backtracking will go all the way till 1st insn.
1872 * All other branches will be explored for needing precision later.
1873 *
1874 * The backtracking needs to deal with cases like:
1875 * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
1876 * r9 -= r8
1877 * r5 = r9
1878 * if r5 > 0x79f goto pc+7
1879 * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
1880 * r5 += 1
1881 * ...
1882 * call bpf_perf_event_output#25
1883 * where .arg5_type = ARG_CONST_SIZE_OR_ZERO
1884 *
1885 * and this case:
1886 * r6 = 1
1887 * call foo // uses callee's r6 inside to compute r0
1888 * r0 += r6
1889 * if r0 == 0 goto
1890 *
1891 * to track above reg_mask/stack_mask needs to be independent for each frame.
1892 *
1893 * Also if parent's curframe > frame where backtracking started,
1894 * the verifier need to mark registers in both frames, otherwise callees
1895 * may incorrectly prune callers. This is similar to
1896 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
1897 *
1898 * For now backtracking falls back into conservative marking.
1899 */
1900static void mark_all_scalars_precise(struct bpf_verifier_env *env,
1901 struct bpf_verifier_state *st)
1902{
1903 struct bpf_func_state *func;
1904 struct bpf_reg_state *reg;
1905 int i, j;
1906
1907 /* big hammer: mark all scalars precise in this path.
1908 * pop_stack may still get !precise scalars.
1909 */
1910 for (; st; st = st->parent)
1911 for (i = 0; i <= st->curframe; i++) {
1912 func = st->frame[i];
1913 for (j = 0; j < BPF_REG_FP; j++) {
1914 reg = &func->regs[j];
1915 if (reg->type != SCALAR_VALUE)
1916 continue;
1917 reg->precise = true;
1918 }
1919 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
1920 if (func->stack[j].slot_type[0] != STACK_SPILL)
1921 continue;
1922 reg = &func->stack[j].spilled_ptr;
1923 if (reg->type != SCALAR_VALUE)
1924 continue;
1925 reg->precise = true;
1926 }
1927 }
1928}
1929
a3ce685d
AS
1930static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
1931 int spi)
b5dc0163
AS
1932{
1933 struct bpf_verifier_state *st = env->cur_state;
1934 int first_idx = st->first_insn_idx;
1935 int last_idx = env->insn_idx;
1936 struct bpf_func_state *func;
1937 struct bpf_reg_state *reg;
a3ce685d
AS
1938 u32 reg_mask = regno >= 0 ? 1u << regno : 0;
1939 u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
b5dc0163 1940 bool skip_first = true;
a3ce685d 1941 bool new_marks = false;
b5dc0163
AS
1942 int i, err;
1943
1944 if (!env->allow_ptr_leaks)
1945 /* backtracking is root only for now */
1946 return 0;
1947
1948 func = st->frame[st->curframe];
a3ce685d
AS
1949 if (regno >= 0) {
1950 reg = &func->regs[regno];
1951 if (reg->type != SCALAR_VALUE) {
1952 WARN_ONCE(1, "backtracing misuse");
1953 return -EFAULT;
1954 }
1955 if (!reg->precise)
1956 new_marks = true;
1957 else
1958 reg_mask = 0;
1959 reg->precise = true;
b5dc0163 1960 }
b5dc0163 1961
a3ce685d
AS
1962 while (spi >= 0) {
1963 if (func->stack[spi].slot_type[0] != STACK_SPILL) {
1964 stack_mask = 0;
1965 break;
1966 }
1967 reg = &func->stack[spi].spilled_ptr;
1968 if (reg->type != SCALAR_VALUE) {
1969 stack_mask = 0;
1970 break;
1971 }
1972 if (!reg->precise)
1973 new_marks = true;
1974 else
1975 stack_mask = 0;
1976 reg->precise = true;
1977 break;
1978 }
1979
1980 if (!new_marks)
1981 return 0;
1982 if (!reg_mask && !stack_mask)
1983 return 0;
b5dc0163
AS
1984 for (;;) {
1985 DECLARE_BITMAP(mask, 64);
b5dc0163
AS
1986 u32 history = st->jmp_history_cnt;
1987
1988 if (env->log.level & BPF_LOG_LEVEL)
1989 verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
1990 for (i = last_idx;;) {
1991 if (skip_first) {
1992 err = 0;
1993 skip_first = false;
1994 } else {
1995 err = backtrack_insn(env, i, &reg_mask, &stack_mask);
1996 }
1997 if (err == -ENOTSUPP) {
1998 mark_all_scalars_precise(env, st);
1999 return 0;
2000 } else if (err) {
2001 return err;
2002 }
2003 if (!reg_mask && !stack_mask)
2004 /* Found assignment(s) into tracked register in this state.
2005 * Since this state is already marked, just return.
2006 * Nothing to be tracked further in the parent state.
2007 */
2008 return 0;
2009 if (i == first_idx)
2010 break;
2011 i = get_prev_insn_idx(st, i, &history);
2012 if (i >= env->prog->len) {
2013 /* This can happen if backtracking reached insn 0
2014 * and there are still reg_mask or stack_mask
2015 * to backtrack.
2016 * It means the backtracking missed the spot where
2017 * particular register was initialized with a constant.
2018 */
2019 verbose(env, "BUG backtracking idx %d\n", i);
2020 WARN_ONCE(1, "verifier backtracking bug");
2021 return -EFAULT;
2022 }
2023 }
2024 st = st->parent;
2025 if (!st)
2026 break;
2027
a3ce685d 2028 new_marks = false;
b5dc0163
AS
2029 func = st->frame[st->curframe];
2030 bitmap_from_u64(mask, reg_mask);
2031 for_each_set_bit(i, mask, 32) {
2032 reg = &func->regs[i];
a3ce685d
AS
2033 if (reg->type != SCALAR_VALUE) {
2034 reg_mask &= ~(1u << i);
b5dc0163 2035 continue;
a3ce685d 2036 }
b5dc0163
AS
2037 if (!reg->precise)
2038 new_marks = true;
2039 reg->precise = true;
2040 }
2041
2042 bitmap_from_u64(mask, stack_mask);
2043 for_each_set_bit(i, mask, 64) {
2044 if (i >= func->allocated_stack / BPF_REG_SIZE) {
2339cd6c
AS
2045 /* the sequence of instructions:
2046 * 2: (bf) r3 = r10
2047 * 3: (7b) *(u64 *)(r3 -8) = r0
2048 * 4: (79) r4 = *(u64 *)(r10 -8)
2049 * doesn't contain jmps. It's backtracked
2050 * as a single block.
2051 * During backtracking insn 3 is not recognized as
2052 * stack access, so at the end of backtracking
2053 * stack slot fp-8 is still marked in stack_mask.
2054 * However the parent state may not have accessed
2055 * fp-8 and it's "unallocated" stack space.
2056 * In such case fallback to conservative.
b5dc0163 2057 */
2339cd6c
AS
2058 mark_all_scalars_precise(env, st);
2059 return 0;
b5dc0163
AS
2060 }
2061
a3ce685d
AS
2062 if (func->stack[i].slot_type[0] != STACK_SPILL) {
2063 stack_mask &= ~(1ull << i);
b5dc0163 2064 continue;
a3ce685d 2065 }
b5dc0163 2066 reg = &func->stack[i].spilled_ptr;
a3ce685d
AS
2067 if (reg->type != SCALAR_VALUE) {
2068 stack_mask &= ~(1ull << i);
b5dc0163 2069 continue;
a3ce685d 2070 }
b5dc0163
AS
2071 if (!reg->precise)
2072 new_marks = true;
2073 reg->precise = true;
2074 }
2075 if (env->log.level & BPF_LOG_LEVEL) {
2076 print_verifier_state(env, func);
2077 verbose(env, "parent %s regs=%x stack=%llx marks\n",
2078 new_marks ? "didn't have" : "already had",
2079 reg_mask, stack_mask);
2080 }
2081
a3ce685d
AS
2082 if (!reg_mask && !stack_mask)
2083 break;
b5dc0163
AS
2084 if (!new_marks)
2085 break;
2086
2087 last_idx = st->last_insn_idx;
2088 first_idx = st->first_insn_idx;
2089 }
2090 return 0;
2091}
2092
a3ce685d
AS
2093static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
2094{
2095 return __mark_chain_precision(env, regno, -1);
2096}
2097
2098static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
2099{
2100 return __mark_chain_precision(env, -1, spi);
2101}
b5dc0163 2102
1be7f75d
AS
2103static bool is_spillable_regtype(enum bpf_reg_type type)
2104{
2105 switch (type) {
2106 case PTR_TO_MAP_VALUE:
2107 case PTR_TO_MAP_VALUE_OR_NULL:
2108 case PTR_TO_STACK:
2109 case PTR_TO_CTX:
969bf05e 2110 case PTR_TO_PACKET:
de8f3a83 2111 case PTR_TO_PACKET_META:
969bf05e 2112 case PTR_TO_PACKET_END:
d58e468b 2113 case PTR_TO_FLOW_KEYS:
1be7f75d 2114 case CONST_PTR_TO_MAP:
c64b7983
JS
2115 case PTR_TO_SOCKET:
2116 case PTR_TO_SOCKET_OR_NULL:
46f8bc92
MKL
2117 case PTR_TO_SOCK_COMMON:
2118 case PTR_TO_SOCK_COMMON_OR_NULL:
655a51e5
MKL
2119 case PTR_TO_TCP_SOCK:
2120 case PTR_TO_TCP_SOCK_OR_NULL:
fada7fdc 2121 case PTR_TO_XDP_SOCK:
65726b5b 2122 case PTR_TO_BTF_ID:
1be7f75d
AS
2123 return true;
2124 default:
2125 return false;
2126 }
2127}
2128
cc2b14d5
AS
2129/* Does this register contain a constant zero? */
2130static bool register_is_null(struct bpf_reg_state *reg)
2131{
2132 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
2133}
2134
f7cf25b2
AS
2135static bool register_is_const(struct bpf_reg_state *reg)
2136{
2137 return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
2138}
2139
6e7e63cb
JH
2140static bool __is_pointer_value(bool allow_ptr_leaks,
2141 const struct bpf_reg_state *reg)
2142{
2143 if (allow_ptr_leaks)
2144 return false;
2145
2146 return reg->type != SCALAR_VALUE;
2147}
2148
f7cf25b2
AS
2149static void save_register_state(struct bpf_func_state *state,
2150 int spi, struct bpf_reg_state *reg)
2151{
2152 int i;
2153
2154 state->stack[spi].spilled_ptr = *reg;
2155 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2156
2157 for (i = 0; i < BPF_REG_SIZE; i++)
2158 state->stack[spi].slot_type[i] = STACK_SPILL;
2159}
2160
17a52670
AS
2161/* check_stack_read/write functions track spill/fill of registers,
2162 * stack boundary and alignment are checked in check_mem_access()
2163 */
61bd5218 2164static int check_stack_write(struct bpf_verifier_env *env,
f4d7e40a 2165 struct bpf_func_state *state, /* func where register points to */
af86ca4e 2166 int off, int size, int value_regno, int insn_idx)
17a52670 2167{
f4d7e40a 2168 struct bpf_func_state *cur; /* state of the current function */
638f5b90 2169 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
b5dc0163 2170 u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
f7cf25b2 2171 struct bpf_reg_state *reg = NULL;
638f5b90 2172
f4d7e40a 2173 err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
fd978bf7 2174 state->acquired_refs, true);
638f5b90
AS
2175 if (err)
2176 return err;
9c399760
AS
2177 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
2178 * so it's aligned access and [off, off + size) are within stack limits
2179 */
638f5b90
AS
2180 if (!env->allow_ptr_leaks &&
2181 state->stack[spi].slot_type[0] == STACK_SPILL &&
2182 size != BPF_REG_SIZE) {
2183 verbose(env, "attempt to corrupt spilled pointer on stack\n");
2184 return -EACCES;
2185 }
17a52670 2186
f4d7e40a 2187 cur = env->cur_state->frame[env->cur_state->curframe];
f7cf25b2
AS
2188 if (value_regno >= 0)
2189 reg = &cur->regs[value_regno];
17a52670 2190
f7cf25b2
AS
2191 if (reg && size == BPF_REG_SIZE && register_is_const(reg) &&
2192 !register_is_null(reg) && env->allow_ptr_leaks) {
b5dc0163
AS
2193 if (dst_reg != BPF_REG_FP) {
2194 /* The backtracking logic can only recognize explicit
2195 * stack slot address like [fp - 8]. Other spill of
2196 * scalar via different register has to be conervative.
2197 * Backtrack from here and mark all registers as precise
2198 * that contributed into 'reg' being a constant.
2199 */
2200 err = mark_chain_precision(env, value_regno);
2201 if (err)
2202 return err;
2203 }
f7cf25b2
AS
2204 save_register_state(state, spi, reg);
2205 } else if (reg && is_spillable_regtype(reg->type)) {
17a52670 2206 /* register containing pointer is being spilled into stack */
9c399760 2207 if (size != BPF_REG_SIZE) {
f7cf25b2 2208 verbose_linfo(env, insn_idx, "; ");
61bd5218 2209 verbose(env, "invalid size of register spill\n");
17a52670
AS
2210 return -EACCES;
2211 }
2212
f7cf25b2 2213 if (state != cur && reg->type == PTR_TO_STACK) {
f4d7e40a
AS
2214 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
2215 return -EINVAL;
2216 }
2217
f7cf25b2
AS
2218 if (!env->allow_ptr_leaks) {
2219 bool sanitize = false;
17a52670 2220
f7cf25b2
AS
2221 if (state->stack[spi].slot_type[0] == STACK_SPILL &&
2222 register_is_const(&state->stack[spi].spilled_ptr))
2223 sanitize = true;
2224 for (i = 0; i < BPF_REG_SIZE; i++)
2225 if (state->stack[spi].slot_type[i] == STACK_MISC) {
2226 sanitize = true;
2227 break;
2228 }
2229 if (sanitize) {
af86ca4e
AS
2230 int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
2231 int soff = (-spi - 1) * BPF_REG_SIZE;
2232
2233 /* detected reuse of integer stack slot with a pointer
2234 * which means either llvm is reusing stack slot or
2235 * an attacker is trying to exploit CVE-2018-3639
2236 * (speculative store bypass)
2237 * Have to sanitize that slot with preemptive
2238 * store of zero.
2239 */
2240 if (*poff && *poff != soff) {
2241 /* disallow programs where single insn stores
2242 * into two different stack slots, since verifier
2243 * cannot sanitize them
2244 */
2245 verbose(env,
2246 "insn %d cannot access two stack slots fp%d and fp%d",
2247 insn_idx, *poff, soff);
2248 return -EINVAL;
2249 }
2250 *poff = soff;
2251 }
af86ca4e 2252 }
f7cf25b2 2253 save_register_state(state, spi, reg);
9c399760 2254 } else {
cc2b14d5
AS
2255 u8 type = STACK_MISC;
2256
679c782d
EC
2257 /* regular write of data into stack destroys any spilled ptr */
2258 state->stack[spi].spilled_ptr.type = NOT_INIT;
0bae2d4d
JW
2259 /* Mark slots as STACK_MISC if they belonged to spilled ptr. */
2260 if (state->stack[spi].slot_type[0] == STACK_SPILL)
2261 for (i = 0; i < BPF_REG_SIZE; i++)
2262 state->stack[spi].slot_type[i] = STACK_MISC;
9c399760 2263
cc2b14d5
AS
2264 /* only mark the slot as written if all 8 bytes were written
2265 * otherwise read propagation may incorrectly stop too soon
2266 * when stack slots are partially written.
2267 * This heuristic means that read propagation will be
2268 * conservative, since it will add reg_live_read marks
2269 * to stack slots all the way to first state when programs
2270 * writes+reads less than 8 bytes
2271 */
2272 if (size == BPF_REG_SIZE)
2273 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2274
2275 /* when we zero initialize stack slots mark them as such */
b5dc0163
AS
2276 if (reg && register_is_null(reg)) {
2277 /* backtracking doesn't work for STACK_ZERO yet. */
2278 err = mark_chain_precision(env, value_regno);
2279 if (err)
2280 return err;
cc2b14d5 2281 type = STACK_ZERO;
b5dc0163 2282 }
cc2b14d5 2283
0bae2d4d 2284 /* Mark slots affected by this stack write. */
9c399760 2285 for (i = 0; i < size; i++)
638f5b90 2286 state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
cc2b14d5 2287 type;
17a52670
AS
2288 }
2289 return 0;
2290}
2291
61bd5218 2292static int check_stack_read(struct bpf_verifier_env *env,
f4d7e40a
AS
2293 struct bpf_func_state *reg_state /* func where register points to */,
2294 int off, int size, int value_regno)
17a52670 2295{
f4d7e40a
AS
2296 struct bpf_verifier_state *vstate = env->cur_state;
2297 struct bpf_func_state *state = vstate->frame[vstate->curframe];
638f5b90 2298 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
f7cf25b2 2299 struct bpf_reg_state *reg;
638f5b90 2300 u8 *stype;
17a52670 2301
f4d7e40a 2302 if (reg_state->allocated_stack <= slot) {
638f5b90
AS
2303 verbose(env, "invalid read from stack off %d+0 size %d\n",
2304 off, size);
2305 return -EACCES;
2306 }
f4d7e40a 2307 stype = reg_state->stack[spi].slot_type;
f7cf25b2 2308 reg = &reg_state->stack[spi].spilled_ptr;
17a52670 2309
638f5b90 2310 if (stype[0] == STACK_SPILL) {
9c399760 2311 if (size != BPF_REG_SIZE) {
f7cf25b2
AS
2312 if (reg->type != SCALAR_VALUE) {
2313 verbose_linfo(env, env->insn_idx, "; ");
2314 verbose(env, "invalid size of register fill\n");
2315 return -EACCES;
2316 }
2317 if (value_regno >= 0) {
2318 mark_reg_unknown(env, state->regs, value_regno);
2319 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
2320 }
2321 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2322 return 0;
17a52670 2323 }
9c399760 2324 for (i = 1; i < BPF_REG_SIZE; i++) {
638f5b90 2325 if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
61bd5218 2326 verbose(env, "corrupted spill memory\n");
17a52670
AS
2327 return -EACCES;
2328 }
2329 }
2330
dc503a8a 2331 if (value_regno >= 0) {
17a52670 2332 /* restore register state from stack */
f7cf25b2 2333 state->regs[value_regno] = *reg;
2f18f62e
AS
2334 /* mark reg as written since spilled pointer state likely
2335 * has its liveness marks cleared by is_state_visited()
2336 * which resets stack/reg liveness for state transitions
2337 */
2338 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
6e7e63cb
JH
2339 } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
2340 /* If value_regno==-1, the caller is asking us whether
2341 * it is acceptable to use this value as a SCALAR_VALUE
2342 * (e.g. for XADD).
2343 * We must not allow unprivileged callers to do that
2344 * with spilled pointers.
2345 */
2346 verbose(env, "leaking pointer from stack off %d\n",
2347 off);
2348 return -EACCES;
dc503a8a 2349 }
f7cf25b2 2350 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
17a52670 2351 } else {
cc2b14d5
AS
2352 int zeros = 0;
2353
17a52670 2354 for (i = 0; i < size; i++) {
cc2b14d5
AS
2355 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
2356 continue;
2357 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
2358 zeros++;
2359 continue;
17a52670 2360 }
cc2b14d5
AS
2361 verbose(env, "invalid read from stack off %d+%d size %d\n",
2362 off, i, size);
2363 return -EACCES;
2364 }
f7cf25b2 2365 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
cc2b14d5
AS
2366 if (value_regno >= 0) {
2367 if (zeros == size) {
2368 /* any size read into register is zero extended,
2369 * so the whole register == const_zero
2370 */
2371 __mark_reg_const_zero(&state->regs[value_regno]);
b5dc0163
AS
2372 /* backtracking doesn't support STACK_ZERO yet,
2373 * so mark it precise here, so that later
2374 * backtracking can stop here.
2375 * Backtracking may not need this if this register
2376 * doesn't participate in pointer adjustment.
2377 * Forward propagation of precise flag is not
2378 * necessary either. This mark is only to stop
2379 * backtracking. Any register that contributed
2380 * to const 0 was marked precise before spill.
2381 */
2382 state->regs[value_regno].precise = true;
cc2b14d5
AS
2383 } else {
2384 /* have read misc data from the stack */
2385 mark_reg_unknown(env, state->regs, value_regno);
2386 }
2387 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
17a52670 2388 }
17a52670 2389 }
f7cf25b2 2390 return 0;
17a52670
AS
2391}
2392
e4298d25
DB
2393static int check_stack_access(struct bpf_verifier_env *env,
2394 const struct bpf_reg_state *reg,
2395 int off, int size)
2396{
2397 /* Stack accesses must be at a fixed offset, so that we
2398 * can determine what type of data were returned. See
2399 * check_stack_read().
2400 */
2401 if (!tnum_is_const(reg->var_off)) {
2402 char tn_buf[48];
2403
2404 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1fbd20f8 2405 verbose(env, "variable stack access var_off=%s off=%d size=%d\n",
e4298d25
DB
2406 tn_buf, off, size);
2407 return -EACCES;
2408 }
2409
2410 if (off >= 0 || off < -MAX_BPF_STACK) {
2411 verbose(env, "invalid stack off=%d size=%d\n", off, size);
2412 return -EACCES;
2413 }
2414
2415 return 0;
2416}
2417
591fe988
DB
2418static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
2419 int off, int size, enum bpf_access_type type)
2420{
2421 struct bpf_reg_state *regs = cur_regs(env);
2422 struct bpf_map *map = regs[regno].map_ptr;
2423 u32 cap = bpf_map_flags_to_cap(map);
2424
2425 if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
2426 verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
2427 map->value_size, off, size);
2428 return -EACCES;
2429 }
2430
2431 if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
2432 verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
2433 map->value_size, off, size);
2434 return -EACCES;
2435 }
2436
2437 return 0;
2438}
2439
17a52670 2440/* check read/write into map element returned by bpf_map_lookup_elem() */
f1174f77 2441static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
9fd29c08 2442 int size, bool zero_size_allowed)
17a52670 2443{
638f5b90
AS
2444 struct bpf_reg_state *regs = cur_regs(env);
2445 struct bpf_map *map = regs[regno].map_ptr;
17a52670 2446
9fd29c08
YS
2447 if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
2448 off + size > map->value_size) {
61bd5218 2449 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
17a52670
AS
2450 map->value_size, off, size);
2451 return -EACCES;
2452 }
2453 return 0;
2454}
2455
f1174f77
EC
2456/* check read/write into a map element with possible variable offset */
2457static int check_map_access(struct bpf_verifier_env *env, u32 regno,
9fd29c08 2458 int off, int size, bool zero_size_allowed)
dbcfe5f7 2459{
f4d7e40a
AS
2460 struct bpf_verifier_state *vstate = env->cur_state;
2461 struct bpf_func_state *state = vstate->frame[vstate->curframe];
dbcfe5f7
GB
2462 struct bpf_reg_state *reg = &state->regs[regno];
2463 int err;
2464
f1174f77
EC
2465 /* We may have adjusted the register to this map value, so we
2466 * need to try adding each of min_value and max_value to off
2467 * to make sure our theoretical access will be safe.
dbcfe5f7 2468 */
06ee7115 2469 if (env->log.level & BPF_LOG_LEVEL)
61bd5218 2470 print_verifier_state(env, state);
b7137c4e 2471
dbcfe5f7
GB
2472 /* The minimum value is only important with signed
2473 * comparisons where we can't assume the floor of a
2474 * value is 0. If we are using signed variables for our
2475 * index'es we need to make sure that whatever we use
2476 * will have a set floor within our range.
2477 */
b7137c4e
DB
2478 if (reg->smin_value < 0 &&
2479 (reg->smin_value == S64_MIN ||
2480 (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
2481 reg->smin_value + off < 0)) {
61bd5218 2482 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
dbcfe5f7
GB
2483 regno);
2484 return -EACCES;
2485 }
9fd29c08
YS
2486 err = __check_map_access(env, regno, reg->smin_value + off, size,
2487 zero_size_allowed);
dbcfe5f7 2488 if (err) {
61bd5218
JK
2489 verbose(env, "R%d min value is outside of the array range\n",
2490 regno);
dbcfe5f7
GB
2491 return err;
2492 }
2493
b03c9f9f
EC
2494 /* If we haven't set a max value then we need to bail since we can't be
2495 * sure we won't do bad things.
2496 * If reg->umax_value + off could overflow, treat that as unbounded too.
dbcfe5f7 2497 */
b03c9f9f 2498 if (reg->umax_value >= BPF_MAX_VAR_OFF) {
61bd5218 2499 verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
dbcfe5f7
GB
2500 regno);
2501 return -EACCES;
2502 }
9fd29c08
YS
2503 err = __check_map_access(env, regno, reg->umax_value + off, size,
2504 zero_size_allowed);
f1174f77 2505 if (err)
61bd5218
JK
2506 verbose(env, "R%d max value is outside of the array range\n",
2507 regno);
d83525ca
AS
2508
2509 if (map_value_has_spin_lock(reg->map_ptr)) {
2510 u32 lock = reg->map_ptr->spin_lock_off;
2511
2512 /* if any part of struct bpf_spin_lock can be touched by
2513 * load/store reject this program.
2514 * To check that [x1, x2) overlaps with [y1, y2)
2515 * it is sufficient to check x1 < y2 && y1 < x2.
2516 */
2517 if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
2518 lock < reg->umax_value + off + size) {
2519 verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
2520 return -EACCES;
2521 }
2522 }
f1174f77 2523 return err;
dbcfe5f7
GB
2524}
2525
969bf05e
AS
2526#define MAX_PACKET_OFF 0xffff
2527
58e2af8b 2528static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
3a0af8fd
TG
2529 const struct bpf_call_arg_meta *meta,
2530 enum bpf_access_type t)
4acf6c0b 2531{
36bbef52 2532 switch (env->prog->type) {
5d66fa7d 2533 /* Program types only with direct read access go here! */
3a0af8fd
TG
2534 case BPF_PROG_TYPE_LWT_IN:
2535 case BPF_PROG_TYPE_LWT_OUT:
004d4b27 2536 case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2dbb9b9e 2537 case BPF_PROG_TYPE_SK_REUSEPORT:
5d66fa7d 2538 case BPF_PROG_TYPE_FLOW_DISSECTOR:
d5563d36 2539 case BPF_PROG_TYPE_CGROUP_SKB:
3a0af8fd
TG
2540 if (t == BPF_WRITE)
2541 return false;
7e57fbb2 2542 /* fallthrough */
5d66fa7d
DB
2543
2544 /* Program types with direct read + write access go here! */
36bbef52
DB
2545 case BPF_PROG_TYPE_SCHED_CLS:
2546 case BPF_PROG_TYPE_SCHED_ACT:
4acf6c0b 2547 case BPF_PROG_TYPE_XDP:
3a0af8fd 2548 case BPF_PROG_TYPE_LWT_XMIT:
8a31db56 2549 case BPF_PROG_TYPE_SK_SKB:
4f738adb 2550 case BPF_PROG_TYPE_SK_MSG:
36bbef52
DB
2551 if (meta)
2552 return meta->pkt_access;
2553
2554 env->seen_direct_write = true;
4acf6c0b 2555 return true;
0d01da6a
SF
2556
2557 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2558 if (t == BPF_WRITE)
2559 env->seen_direct_write = true;
2560
2561 return true;
2562
4acf6c0b
BB
2563 default:
2564 return false;
2565 }
2566}
2567
f1174f77 2568static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
9fd29c08 2569 int off, int size, bool zero_size_allowed)
969bf05e 2570{
638f5b90 2571 struct bpf_reg_state *regs = cur_regs(env);
58e2af8b 2572 struct bpf_reg_state *reg = &regs[regno];
969bf05e 2573
9fd29c08
YS
2574 if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
2575 (u64)off + size > reg->range) {
61bd5218 2576 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
d91b28ed 2577 off, size, regno, reg->id, reg->off, reg->range);
969bf05e
AS
2578 return -EACCES;
2579 }
2580 return 0;
2581}
2582
f1174f77 2583static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
9fd29c08 2584 int size, bool zero_size_allowed)
f1174f77 2585{
638f5b90 2586 struct bpf_reg_state *regs = cur_regs(env);
f1174f77
EC
2587 struct bpf_reg_state *reg = &regs[regno];
2588 int err;
2589
2590 /* We may have added a variable offset to the packet pointer; but any
2591 * reg->range we have comes after that. We are only checking the fixed
2592 * offset.
2593 */
2594
2595 /* We don't allow negative numbers, because we aren't tracking enough
2596 * detail to prove they're safe.
2597 */
b03c9f9f 2598 if (reg->smin_value < 0) {
61bd5218 2599 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
f1174f77
EC
2600 regno);
2601 return -EACCES;
2602 }
9fd29c08 2603 err = __check_packet_access(env, regno, off, size, zero_size_allowed);
f1174f77 2604 if (err) {
61bd5218 2605 verbose(env, "R%d offset is outside of the packet\n", regno);
f1174f77
EC
2606 return err;
2607 }
e647815a
JW
2608
2609 /* __check_packet_access has made sure "off + size - 1" is within u16.
2610 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
2611 * otherwise find_good_pkt_pointers would have refused to set range info
2612 * that __check_packet_access would have rejected this pkt access.
2613 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
2614 */
2615 env->prog->aux->max_pkt_offset =
2616 max_t(u32, env->prog->aux->max_pkt_offset,
2617 off + reg->umax_value + size - 1);
2618
f1174f77
EC
2619 return err;
2620}
2621
2622/* check access to 'struct bpf_context' fields. Supports fixed offsets only */
31fd8581 2623static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
9e15db66
AS
2624 enum bpf_access_type t, enum bpf_reg_type *reg_type,
2625 u32 *btf_id)
17a52670 2626{
f96da094
DB
2627 struct bpf_insn_access_aux info = {
2628 .reg_type = *reg_type,
9e15db66 2629 .log = &env->log,
f96da094 2630 };
31fd8581 2631
4f9218aa 2632 if (env->ops->is_valid_access &&
5e43f899 2633 env->ops->is_valid_access(off, size, t, env->prog, &info)) {
f96da094
DB
2634 /* A non zero info.ctx_field_size indicates that this field is a
2635 * candidate for later verifier transformation to load the whole
2636 * field and then apply a mask when accessed with a narrower
2637 * access than actual ctx access size. A zero info.ctx_field_size
2638 * will only allow for whole field access and rejects any other
2639 * type of narrower access.
31fd8581 2640 */
23994631 2641 *reg_type = info.reg_type;
31fd8581 2642
9e15db66
AS
2643 if (*reg_type == PTR_TO_BTF_ID)
2644 *btf_id = info.btf_id;
2645 else
2646 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
32bbe007
AS
2647 /* remember the offset of last byte accessed in ctx */
2648 if (env->prog->aux->max_ctx_offset < off + size)
2649 env->prog->aux->max_ctx_offset = off + size;
17a52670 2650 return 0;
32bbe007 2651 }
17a52670 2652
61bd5218 2653 verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
17a52670
AS
2654 return -EACCES;
2655}
2656
d58e468b
PP
2657static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
2658 int size)
2659{
2660 if (size < 0 || off < 0 ||
2661 (u64)off + size > sizeof(struct bpf_flow_keys)) {
2662 verbose(env, "invalid access to flow keys off=%d size=%d\n",
2663 off, size);
2664 return -EACCES;
2665 }
2666 return 0;
2667}
2668
5f456649
MKL
2669static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
2670 u32 regno, int off, int size,
2671 enum bpf_access_type t)
c64b7983
JS
2672{
2673 struct bpf_reg_state *regs = cur_regs(env);
2674 struct bpf_reg_state *reg = &regs[regno];
5f456649 2675 struct bpf_insn_access_aux info = {};
46f8bc92 2676 bool valid;
c64b7983
JS
2677
2678 if (reg->smin_value < 0) {
2679 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
2680 regno);
2681 return -EACCES;
2682 }
2683
46f8bc92
MKL
2684 switch (reg->type) {
2685 case PTR_TO_SOCK_COMMON:
2686 valid = bpf_sock_common_is_valid_access(off, size, t, &info);
2687 break;
2688 case PTR_TO_SOCKET:
2689 valid = bpf_sock_is_valid_access(off, size, t, &info);
2690 break;
655a51e5
MKL
2691 case PTR_TO_TCP_SOCK:
2692 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
2693 break;
fada7fdc
JL
2694 case PTR_TO_XDP_SOCK:
2695 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
2696 break;
46f8bc92
MKL
2697 default:
2698 valid = false;
c64b7983
JS
2699 }
2700
5f456649 2701
46f8bc92
MKL
2702 if (valid) {
2703 env->insn_aux_data[insn_idx].ctx_field_size =
2704 info.ctx_field_size;
2705 return 0;
2706 }
2707
2708 verbose(env, "R%d invalid %s access off=%d size=%d\n",
2709 regno, reg_type_str[reg->type], off, size);
2710
2711 return -EACCES;
c64b7983
JS
2712}
2713
2a159c6f
DB
2714static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
2715{
2716 return cur_regs(env) + regno;
2717}
2718
4cabc5b1
DB
2719static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
2720{
2a159c6f 2721 return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
4cabc5b1
DB
2722}
2723
f37a8cb8
DB
2724static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
2725{
2a159c6f 2726 const struct bpf_reg_state *reg = reg_state(env, regno);
f37a8cb8 2727
46f8bc92
MKL
2728 return reg->type == PTR_TO_CTX;
2729}
2730
2731static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
2732{
2733 const struct bpf_reg_state *reg = reg_state(env, regno);
2734
2735 return type_is_sk_pointer(reg->type);
f37a8cb8
DB
2736}
2737
ca369602
DB
2738static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
2739{
2a159c6f 2740 const struct bpf_reg_state *reg = reg_state(env, regno);
ca369602
DB
2741
2742 return type_is_pkt_pointer(reg->type);
2743}
2744
4b5defde
DB
2745static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
2746{
2747 const struct bpf_reg_state *reg = reg_state(env, regno);
2748
2749 /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
2750 return reg->type == PTR_TO_FLOW_KEYS;
2751}
2752
61bd5218
JK
2753static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
2754 const struct bpf_reg_state *reg,
d1174416 2755 int off, int size, bool strict)
969bf05e 2756{
f1174f77 2757 struct tnum reg_off;
e07b98d9 2758 int ip_align;
d1174416
DM
2759
2760 /* Byte size accesses are always allowed. */
2761 if (!strict || size == 1)
2762 return 0;
2763
e4eda884
DM
2764 /* For platforms that do not have a Kconfig enabling
2765 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
2766 * NET_IP_ALIGN is universally set to '2'. And on platforms
2767 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
2768 * to this code only in strict mode where we want to emulate
2769 * the NET_IP_ALIGN==2 checking. Therefore use an
2770 * unconditional IP align value of '2'.
e07b98d9 2771 */
e4eda884 2772 ip_align = 2;
f1174f77
EC
2773
2774 reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
2775 if (!tnum_is_aligned(reg_off, size)) {
2776 char tn_buf[48];
2777
2778 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218
JK
2779 verbose(env,
2780 "misaligned packet access off %d+%s+%d+%d size %d\n",
f1174f77 2781 ip_align, tn_buf, reg->off, off, size);
969bf05e
AS
2782 return -EACCES;
2783 }
79adffcd 2784
969bf05e
AS
2785 return 0;
2786}
2787
61bd5218
JK
2788static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
2789 const struct bpf_reg_state *reg,
f1174f77
EC
2790 const char *pointer_desc,
2791 int off, int size, bool strict)
79adffcd 2792{
f1174f77
EC
2793 struct tnum reg_off;
2794
2795 /* Byte size accesses are always allowed. */
2796 if (!strict || size == 1)
2797 return 0;
2798
2799 reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
2800 if (!tnum_is_aligned(reg_off, size)) {
2801 char tn_buf[48];
2802
2803 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 2804 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
f1174f77 2805 pointer_desc, tn_buf, reg->off, off, size);
79adffcd
DB
2806 return -EACCES;
2807 }
2808
969bf05e
AS
2809 return 0;
2810}
2811
e07b98d9 2812static int check_ptr_alignment(struct bpf_verifier_env *env,
ca369602
DB
2813 const struct bpf_reg_state *reg, int off,
2814 int size, bool strict_alignment_once)
79adffcd 2815{
ca369602 2816 bool strict = env->strict_alignment || strict_alignment_once;
f1174f77 2817 const char *pointer_desc = "";
d1174416 2818
79adffcd
DB
2819 switch (reg->type) {
2820 case PTR_TO_PACKET:
de8f3a83
DB
2821 case PTR_TO_PACKET_META:
2822 /* Special case, because of NET_IP_ALIGN. Given metadata sits
2823 * right in front, treat it the very same way.
2824 */
61bd5218 2825 return check_pkt_ptr_alignment(env, reg, off, size, strict);
d58e468b
PP
2826 case PTR_TO_FLOW_KEYS:
2827 pointer_desc = "flow keys ";
2828 break;
f1174f77
EC
2829 case PTR_TO_MAP_VALUE:
2830 pointer_desc = "value ";
2831 break;
2832 case PTR_TO_CTX:
2833 pointer_desc = "context ";
2834 break;
2835 case PTR_TO_STACK:
2836 pointer_desc = "stack ";
a5ec6ae1
JH
2837 /* The stack spill tracking logic in check_stack_write()
2838 * and check_stack_read() relies on stack accesses being
2839 * aligned.
2840 */
2841 strict = true;
f1174f77 2842 break;
c64b7983
JS
2843 case PTR_TO_SOCKET:
2844 pointer_desc = "sock ";
2845 break;
46f8bc92
MKL
2846 case PTR_TO_SOCK_COMMON:
2847 pointer_desc = "sock_common ";
2848 break;
655a51e5
MKL
2849 case PTR_TO_TCP_SOCK:
2850 pointer_desc = "tcp_sock ";
2851 break;
fada7fdc
JL
2852 case PTR_TO_XDP_SOCK:
2853 pointer_desc = "xdp_sock ";
2854 break;
79adffcd 2855 default:
f1174f77 2856 break;
79adffcd 2857 }
61bd5218
JK
2858 return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
2859 strict);
79adffcd
DB
2860}
2861
f4d7e40a
AS
2862static int update_stack_depth(struct bpf_verifier_env *env,
2863 const struct bpf_func_state *func,
2864 int off)
2865{
9c8105bd 2866 u16 stack = env->subprog_info[func->subprogno].stack_depth;
f4d7e40a
AS
2867
2868 if (stack >= -off)
2869 return 0;
2870
2871 /* update known max for given subprogram */
9c8105bd 2872 env->subprog_info[func->subprogno].stack_depth = -off;
70a87ffe
AS
2873 return 0;
2874}
f4d7e40a 2875
70a87ffe
AS
2876/* starting from main bpf function walk all instructions of the function
2877 * and recursively walk all callees that given function can call.
2878 * Ignore jump and exit insns.
2879 * Since recursion is prevented by check_cfg() this algorithm
2880 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
2881 */
2882static int check_max_stack_depth(struct bpf_verifier_env *env)
2883{
9c8105bd
JW
2884 int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
2885 struct bpf_subprog_info *subprog = env->subprog_info;
70a87ffe 2886 struct bpf_insn *insn = env->prog->insnsi;
70a87ffe
AS
2887 int ret_insn[MAX_CALL_FRAMES];
2888 int ret_prog[MAX_CALL_FRAMES];
f4d7e40a 2889
70a87ffe
AS
2890process_func:
2891 /* round up to 32-bytes, since this is granularity
2892 * of interpreter stack size
2893 */
9c8105bd 2894 depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
70a87ffe 2895 if (depth > MAX_BPF_STACK) {
f4d7e40a 2896 verbose(env, "combined stack size of %d calls is %d. Too large\n",
70a87ffe 2897 frame + 1, depth);
f4d7e40a
AS
2898 return -EACCES;
2899 }
70a87ffe 2900continue_func:
4cb3d99c 2901 subprog_end = subprog[idx + 1].start;
70a87ffe
AS
2902 for (; i < subprog_end; i++) {
2903 if (insn[i].code != (BPF_JMP | BPF_CALL))
2904 continue;
2905 if (insn[i].src_reg != BPF_PSEUDO_CALL)
2906 continue;
2907 /* remember insn and function to return to */
2908 ret_insn[frame] = i + 1;
9c8105bd 2909 ret_prog[frame] = idx;
70a87ffe
AS
2910
2911 /* find the callee */
2912 i = i + insn[i].imm + 1;
9c8105bd
JW
2913 idx = find_subprog(env, i);
2914 if (idx < 0) {
70a87ffe
AS
2915 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
2916 i);
2917 return -EFAULT;
2918 }
70a87ffe
AS
2919 frame++;
2920 if (frame >= MAX_CALL_FRAMES) {
927cb781
PC
2921 verbose(env, "the call stack of %d frames is too deep !\n",
2922 frame);
2923 return -E2BIG;
70a87ffe
AS
2924 }
2925 goto process_func;
2926 }
2927 /* end of for() loop means the last insn of the 'subprog'
2928 * was reached. Doesn't matter whether it was JA or EXIT
2929 */
2930 if (frame == 0)
2931 return 0;
9c8105bd 2932 depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
70a87ffe
AS
2933 frame--;
2934 i = ret_insn[frame];
9c8105bd 2935 idx = ret_prog[frame];
70a87ffe 2936 goto continue_func;
f4d7e40a
AS
2937}
2938
19d28fbd 2939#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
2940static int get_callee_stack_depth(struct bpf_verifier_env *env,
2941 const struct bpf_insn *insn, int idx)
2942{
2943 int start = idx + insn->imm + 1, subprog;
2944
2945 subprog = find_subprog(env, start);
2946 if (subprog < 0) {
2947 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
2948 start);
2949 return -EFAULT;
2950 }
9c8105bd 2951 return env->subprog_info[subprog].stack_depth;
1ea47e01 2952}
19d28fbd 2953#endif
1ea47e01 2954
51c39bb1
AS
2955int check_ctx_reg(struct bpf_verifier_env *env,
2956 const struct bpf_reg_state *reg, int regno)
58990d1f
DB
2957{
2958 /* Access to ctx or passing it to a helper is only allowed in
2959 * its original, unmodified form.
2960 */
2961
2962 if (reg->off) {
2963 verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
2964 regno, reg->off);
2965 return -EACCES;
2966 }
2967
2968 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
2969 char tn_buf[48];
2970
2971 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2972 verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
2973 return -EACCES;
2974 }
2975
2976 return 0;
2977}
2978
9df1c28b
MM
2979static int check_tp_buffer_access(struct bpf_verifier_env *env,
2980 const struct bpf_reg_state *reg,
2981 int regno, int off, int size)
2982{
2983 if (off < 0) {
2984 verbose(env,
2985 "R%d invalid tracepoint buffer access: off=%d, size=%d",
2986 regno, off, size);
2987 return -EACCES;
2988 }
2989 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
2990 char tn_buf[48];
2991
2992 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2993 verbose(env,
2994 "R%d invalid variable buffer offset: off=%d, var_off=%s",
2995 regno, off, tn_buf);
2996 return -EACCES;
2997 }
2998 if (off + size > env->prog->aux->max_tp_access)
2999 env->prog->aux->max_tp_access = off + size;
3000
3001 return 0;
3002}
3003
3f50f132
JF
3004/* BPF architecture zero extends alu32 ops into 64-bit registesr */
3005static void zext_32_to_64(struct bpf_reg_state *reg)
3006{
3007 reg->var_off = tnum_subreg(reg->var_off);
3008 __reg_assign_32_into_64(reg);
3009}
9df1c28b 3010
0c17d1d2
JH
3011/* truncate register to smaller size (in bytes)
3012 * must be called with size < BPF_REG_SIZE
3013 */
3014static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
3015{
3016 u64 mask;
3017
3018 /* clear high bits in bit representation */
3019 reg->var_off = tnum_cast(reg->var_off, size);
3020
3021 /* fix arithmetic bounds */
3022 mask = ((u64)1 << (size * 8)) - 1;
3023 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
3024 reg->umin_value &= mask;
3025 reg->umax_value &= mask;
3026 } else {
3027 reg->umin_value = 0;
3028 reg->umax_value = mask;
3029 }
3030 reg->smin_value = reg->umin_value;
3031 reg->smax_value = reg->umax_value;
3f50f132
JF
3032
3033 /* If size is smaller than 32bit register the 32bit register
3034 * values are also truncated so we push 64-bit bounds into
3035 * 32-bit bounds. Above were truncated < 32-bits already.
3036 */
3037 if (size >= 4)
3038 return;
3039 __reg_combine_64_into_32(reg);
0c17d1d2
JH
3040}
3041
a23740ec
AN
3042static bool bpf_map_is_rdonly(const struct bpf_map *map)
3043{
3044 return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen;
3045}
3046
3047static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
3048{
3049 void *ptr;
3050 u64 addr;
3051 int err;
3052
3053 err = map->ops->map_direct_value_addr(map, &addr, off);
3054 if (err)
3055 return err;
2dedd7d2 3056 ptr = (void *)(long)addr + off;
a23740ec
AN
3057
3058 switch (size) {
3059 case sizeof(u8):
3060 *val = (u64)*(u8 *)ptr;
3061 break;
3062 case sizeof(u16):
3063 *val = (u64)*(u16 *)ptr;
3064 break;
3065 case sizeof(u32):
3066 *val = (u64)*(u32 *)ptr;
3067 break;
3068 case sizeof(u64):
3069 *val = *(u64 *)ptr;
3070 break;
3071 default:
3072 return -EINVAL;
3073 }
3074 return 0;
3075}
3076
9e15db66
AS
3077static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
3078 struct bpf_reg_state *regs,
3079 int regno, int off, int size,
3080 enum bpf_access_type atype,
3081 int value_regno)
3082{
3083 struct bpf_reg_state *reg = regs + regno;
3084 const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id);
3085 const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off);
3086 u32 btf_id;
3087 int ret;
3088
9e15db66
AS
3089 if (off < 0) {
3090 verbose(env,
3091 "R%d is ptr_%s invalid negative access: off=%d\n",
3092 regno, tname, off);
3093 return -EACCES;
3094 }
3095 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3096 char tn_buf[48];
3097
3098 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3099 verbose(env,
3100 "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
3101 regno, tname, off, tn_buf);
3102 return -EACCES;
3103 }
3104
27ae7997
MKL
3105 if (env->ops->btf_struct_access) {
3106 ret = env->ops->btf_struct_access(&env->log, t, off, size,
3107 atype, &btf_id);
3108 } else {
3109 if (atype != BPF_READ) {
3110 verbose(env, "only read is supported\n");
3111 return -EACCES;
3112 }
3113
3114 ret = btf_struct_access(&env->log, t, off, size, atype,
3115 &btf_id);
3116 }
3117
9e15db66
AS
3118 if (ret < 0)
3119 return ret;
3120
8ff3571f 3121 if (atype == BPF_READ && value_regno >= 0) {
27ae7997
MKL
3122 if (ret == SCALAR_VALUE) {
3123 mark_reg_unknown(env, regs, value_regno);
3124 return 0;
3125 }
3126 mark_reg_known_zero(env, regs, value_regno);
3127 regs[value_regno].type = PTR_TO_BTF_ID;
3128 regs[value_regno].btf_id = btf_id;
9e15db66 3129 }
27ae7997 3130
9e15db66
AS
3131 return 0;
3132}
3133
17a52670
AS
3134/* check whether memory at (regno + off) is accessible for t = (read | write)
3135 * if t==write, value_regno is a register which value is stored into memory
3136 * if t==read, value_regno is a register which will receive the value from memory
3137 * if t==write && value_regno==-1, some unknown value is stored into memory
3138 * if t==read && value_regno==-1, don't care what we read from memory
3139 */
ca369602
DB
3140static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
3141 int off, int bpf_size, enum bpf_access_type t,
3142 int value_regno, bool strict_alignment_once)
17a52670 3143{
638f5b90
AS
3144 struct bpf_reg_state *regs = cur_regs(env);
3145 struct bpf_reg_state *reg = regs + regno;
f4d7e40a 3146 struct bpf_func_state *state;
17a52670
AS
3147 int size, err = 0;
3148
3149 size = bpf_size_to_bytes(bpf_size);
3150 if (size < 0)
3151 return size;
3152
f1174f77 3153 /* alignment checks will add in reg->off themselves */
ca369602 3154 err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
969bf05e
AS
3155 if (err)
3156 return err;
17a52670 3157
f1174f77
EC
3158 /* for access checks, reg->off is just part of off */
3159 off += reg->off;
3160
3161 if (reg->type == PTR_TO_MAP_VALUE) {
1be7f75d
AS
3162 if (t == BPF_WRITE && value_regno >= 0 &&
3163 is_pointer_value(env, value_regno)) {
61bd5218 3164 verbose(env, "R%d leaks addr into map\n", value_regno);
1be7f75d
AS
3165 return -EACCES;
3166 }
591fe988
DB
3167 err = check_map_access_type(env, regno, off, size, t);
3168 if (err)
3169 return err;
9fd29c08 3170 err = check_map_access(env, regno, off, size, false);
a23740ec
AN
3171 if (!err && t == BPF_READ && value_regno >= 0) {
3172 struct bpf_map *map = reg->map_ptr;
3173
3174 /* if map is read-only, track its contents as scalars */
3175 if (tnum_is_const(reg->var_off) &&
3176 bpf_map_is_rdonly(map) &&
3177 map->ops->map_direct_value_addr) {
3178 int map_off = off + reg->var_off.value;
3179 u64 val = 0;
3180
3181 err = bpf_map_direct_read(map, map_off, size,
3182 &val);
3183 if (err)
3184 return err;
3185
3186 regs[value_regno].type = SCALAR_VALUE;
3187 __mark_reg_known(&regs[value_regno], val);
3188 } else {
3189 mark_reg_unknown(env, regs, value_regno);
3190 }
3191 }
1a0dc1ac 3192 } else if (reg->type == PTR_TO_CTX) {
f1174f77 3193 enum bpf_reg_type reg_type = SCALAR_VALUE;
9e15db66 3194 u32 btf_id = 0;
19de99f7 3195
1be7f75d
AS
3196 if (t == BPF_WRITE && value_regno >= 0 &&
3197 is_pointer_value(env, value_regno)) {
61bd5218 3198 verbose(env, "R%d leaks addr into ctx\n", value_regno);
1be7f75d
AS
3199 return -EACCES;
3200 }
f1174f77 3201
58990d1f
DB
3202 err = check_ctx_reg(env, reg, regno);
3203 if (err < 0)
3204 return err;
3205
9e15db66
AS
3206 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf_id);
3207 if (err)
3208 verbose_linfo(env, insn_idx, "; ");
969bf05e 3209 if (!err && t == BPF_READ && value_regno >= 0) {
f1174f77 3210 /* ctx access returns either a scalar, or a
de8f3a83
DB
3211 * PTR_TO_PACKET[_META,_END]. In the latter
3212 * case, we know the offset is zero.
f1174f77 3213 */
46f8bc92 3214 if (reg_type == SCALAR_VALUE) {
638f5b90 3215 mark_reg_unknown(env, regs, value_regno);
46f8bc92 3216 } else {
638f5b90 3217 mark_reg_known_zero(env, regs,
61bd5218 3218 value_regno);
46f8bc92
MKL
3219 if (reg_type_may_be_null(reg_type))
3220 regs[value_regno].id = ++env->id_gen;
5327ed3d
JW
3221 /* A load of ctx field could have different
3222 * actual load size with the one encoded in the
3223 * insn. When the dst is PTR, it is for sure not
3224 * a sub-register.
3225 */
3226 regs[value_regno].subreg_def = DEF_NOT_SUBREG;
9e15db66
AS
3227 if (reg_type == PTR_TO_BTF_ID)
3228 regs[value_regno].btf_id = btf_id;
46f8bc92 3229 }
638f5b90 3230 regs[value_regno].type = reg_type;
969bf05e 3231 }
17a52670 3232
f1174f77 3233 } else if (reg->type == PTR_TO_STACK) {
f1174f77 3234 off += reg->var_off.value;
e4298d25
DB
3235 err = check_stack_access(env, reg, off, size);
3236 if (err)
3237 return err;
8726679a 3238
f4d7e40a
AS
3239 state = func(env, reg);
3240 err = update_stack_depth(env, state, off);
3241 if (err)
3242 return err;
8726679a 3243
638f5b90 3244 if (t == BPF_WRITE)
61bd5218 3245 err = check_stack_write(env, state, off, size,
af86ca4e 3246 value_regno, insn_idx);
638f5b90 3247 else
61bd5218
JK
3248 err = check_stack_read(env, state, off, size,
3249 value_regno);
de8f3a83 3250 } else if (reg_is_pkt_pointer(reg)) {
3a0af8fd 3251 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
61bd5218 3252 verbose(env, "cannot write into packet\n");
969bf05e
AS
3253 return -EACCES;
3254 }
4acf6c0b
BB
3255 if (t == BPF_WRITE && value_regno >= 0 &&
3256 is_pointer_value(env, value_regno)) {
61bd5218
JK
3257 verbose(env, "R%d leaks addr into packet\n",
3258 value_regno);
4acf6c0b
BB
3259 return -EACCES;
3260 }
9fd29c08 3261 err = check_packet_access(env, regno, off, size, false);
969bf05e 3262 if (!err && t == BPF_READ && value_regno >= 0)
638f5b90 3263 mark_reg_unknown(env, regs, value_regno);
d58e468b
PP
3264 } else if (reg->type == PTR_TO_FLOW_KEYS) {
3265 if (t == BPF_WRITE && value_regno >= 0 &&
3266 is_pointer_value(env, value_regno)) {
3267 verbose(env, "R%d leaks addr into flow keys\n",
3268 value_regno);
3269 return -EACCES;
3270 }
3271
3272 err = check_flow_keys_access(env, off, size);
3273 if (!err && t == BPF_READ && value_regno >= 0)
3274 mark_reg_unknown(env, regs, value_regno);
46f8bc92 3275 } else if (type_is_sk_pointer(reg->type)) {
c64b7983 3276 if (t == BPF_WRITE) {
46f8bc92
MKL
3277 verbose(env, "R%d cannot write into %s\n",
3278 regno, reg_type_str[reg->type]);
c64b7983
JS
3279 return -EACCES;
3280 }
5f456649 3281 err = check_sock_access(env, insn_idx, regno, off, size, t);
c64b7983
JS
3282 if (!err && value_regno >= 0)
3283 mark_reg_unknown(env, regs, value_regno);
9df1c28b
MM
3284 } else if (reg->type == PTR_TO_TP_BUFFER) {
3285 err = check_tp_buffer_access(env, reg, regno, off, size);
3286 if (!err && t == BPF_READ && value_regno >= 0)
3287 mark_reg_unknown(env, regs, value_regno);
9e15db66
AS
3288 } else if (reg->type == PTR_TO_BTF_ID) {
3289 err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
3290 value_regno);
17a52670 3291 } else {
61bd5218
JK
3292 verbose(env, "R%d invalid mem access '%s'\n", regno,
3293 reg_type_str[reg->type]);
17a52670
AS
3294 return -EACCES;
3295 }
969bf05e 3296
f1174f77 3297 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
638f5b90 3298 regs[value_regno].type == SCALAR_VALUE) {
f1174f77 3299 /* b/h/w load zero-extends, mark upper bits as known 0 */
0c17d1d2 3300 coerce_reg_to_size(&regs[value_regno], size);
969bf05e 3301 }
17a52670
AS
3302 return err;
3303}
3304
31fd8581 3305static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
17a52670 3306{
17a52670
AS
3307 int err;
3308
3309 if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
3310 insn->imm != 0) {
61bd5218 3311 verbose(env, "BPF_XADD uses reserved fields\n");
17a52670
AS
3312 return -EINVAL;
3313 }
3314
3315 /* check src1 operand */
dc503a8a 3316 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
3317 if (err)
3318 return err;
3319
3320 /* check src2 operand */
dc503a8a 3321 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
3322 if (err)
3323 return err;
3324
6bdf6abc 3325 if (is_pointer_value(env, insn->src_reg)) {
61bd5218 3326 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
6bdf6abc
DB
3327 return -EACCES;
3328 }
3329
ca369602 3330 if (is_ctx_reg(env, insn->dst_reg) ||
4b5defde 3331 is_pkt_reg(env, insn->dst_reg) ||
46f8bc92
MKL
3332 is_flow_key_reg(env, insn->dst_reg) ||
3333 is_sk_reg(env, insn->dst_reg)) {
ca369602 3334 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
2a159c6f
DB
3335 insn->dst_reg,
3336 reg_type_str[reg_state(env, insn->dst_reg)->type]);
f37a8cb8
DB
3337 return -EACCES;
3338 }
3339
17a52670 3340 /* check whether atomic_add can read the memory */
31fd8581 3341 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
ca369602 3342 BPF_SIZE(insn->code), BPF_READ, -1, true);
17a52670
AS
3343 if (err)
3344 return err;
3345
3346 /* check whether atomic_add can write into the same memory */
31fd8581 3347 return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
ca369602 3348 BPF_SIZE(insn->code), BPF_WRITE, -1, true);
17a52670
AS
3349}
3350
2011fccf
AI
3351static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno,
3352 int off, int access_size,
3353 bool zero_size_allowed)
3354{
3355 struct bpf_reg_state *reg = reg_state(env, regno);
3356
3357 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
3358 access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
3359 if (tnum_is_const(reg->var_off)) {
3360 verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
3361 regno, off, access_size);
3362 } else {
3363 char tn_buf[48];
3364
3365 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3366 verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n",
3367 regno, tn_buf, access_size);
3368 }
3369 return -EACCES;
3370 }
3371 return 0;
3372}
3373
17a52670
AS
3374/* when register 'regno' is passed into function that will read 'access_size'
3375 * bytes from that pointer, make sure that it's within stack boundary
f1174f77
EC
3376 * and all elements of stack are initialized.
3377 * Unlike most pointer bounds-checking functions, this one doesn't take an
3378 * 'off' argument, so it has to add in reg->off itself.
17a52670 3379 */
58e2af8b 3380static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
435faee1
DB
3381 int access_size, bool zero_size_allowed,
3382 struct bpf_call_arg_meta *meta)
17a52670 3383{
2a159c6f 3384 struct bpf_reg_state *reg = reg_state(env, regno);
f4d7e40a 3385 struct bpf_func_state *state = func(env, reg);
f7cf25b2 3386 int err, min_off, max_off, i, j, slot, spi;
17a52670 3387
914cb781 3388 if (reg->type != PTR_TO_STACK) {
f1174f77 3389 /* Allow zero-byte read from NULL, regardless of pointer type */
8e2fe1d9 3390 if (zero_size_allowed && access_size == 0 &&
914cb781 3391 register_is_null(reg))
8e2fe1d9
DB
3392 return 0;
3393
61bd5218 3394 verbose(env, "R%d type=%s expected=%s\n", regno,
914cb781 3395 reg_type_str[reg->type],
8e2fe1d9 3396 reg_type_str[PTR_TO_STACK]);
17a52670 3397 return -EACCES;
8e2fe1d9 3398 }
17a52670 3399
2011fccf
AI
3400 if (tnum_is_const(reg->var_off)) {
3401 min_off = max_off = reg->var_off.value + reg->off;
3402 err = __check_stack_boundary(env, regno, min_off, access_size,
3403 zero_size_allowed);
3404 if (err)
3405 return err;
3406 } else {
088ec26d
AI
3407 /* Variable offset is prohibited for unprivileged mode for
3408 * simplicity since it requires corresponding support in
3409 * Spectre masking for stack ALU.
3410 * See also retrieve_ptr_limit().
3411 */
3412 if (!env->allow_ptr_leaks) {
3413 char tn_buf[48];
f1174f77 3414
088ec26d
AI
3415 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3416 verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n",
3417 regno, tn_buf);
3418 return -EACCES;
3419 }
f2bcd05e
AI
3420 /* Only initialized buffer on stack is allowed to be accessed
3421 * with variable offset. With uninitialized buffer it's hard to
3422 * guarantee that whole memory is marked as initialized on
3423 * helper return since specific bounds are unknown what may
3424 * cause uninitialized stack leaking.
3425 */
3426 if (meta && meta->raw_mode)
3427 meta = NULL;
3428
107c26a7
AI
3429 if (reg->smax_value >= BPF_MAX_VAR_OFF ||
3430 reg->smax_value <= -BPF_MAX_VAR_OFF) {
3431 verbose(env, "R%d unbounded indirect variable offset stack access\n",
3432 regno);
3433 return -EACCES;
3434 }
2011fccf 3435 min_off = reg->smin_value + reg->off;
107c26a7 3436 max_off = reg->smax_value + reg->off;
2011fccf
AI
3437 err = __check_stack_boundary(env, regno, min_off, access_size,
3438 zero_size_allowed);
107c26a7
AI
3439 if (err) {
3440 verbose(env, "R%d min value is outside of stack bound\n",
3441 regno);
2011fccf 3442 return err;
107c26a7 3443 }
2011fccf
AI
3444 err = __check_stack_boundary(env, regno, max_off, access_size,
3445 zero_size_allowed);
107c26a7
AI
3446 if (err) {
3447 verbose(env, "R%d max value is outside of stack bound\n",
3448 regno);
2011fccf 3449 return err;
107c26a7 3450 }
17a52670
AS
3451 }
3452
435faee1
DB
3453 if (meta && meta->raw_mode) {
3454 meta->access_size = access_size;
3455 meta->regno = regno;
3456 return 0;
3457 }
3458
2011fccf 3459 for (i = min_off; i < max_off + access_size; i++) {
cc2b14d5
AS
3460 u8 *stype;
3461
2011fccf 3462 slot = -i - 1;
638f5b90 3463 spi = slot / BPF_REG_SIZE;
cc2b14d5
AS
3464 if (state->allocated_stack <= slot)
3465 goto err;
3466 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
3467 if (*stype == STACK_MISC)
3468 goto mark;
3469 if (*stype == STACK_ZERO) {
3470 /* helper can write anything into the stack */
3471 *stype = STACK_MISC;
3472 goto mark;
17a52670 3473 }
f7cf25b2
AS
3474 if (state->stack[spi].slot_type[0] == STACK_SPILL &&
3475 state->stack[spi].spilled_ptr.type == SCALAR_VALUE) {
f54c7898 3476 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
f7cf25b2
AS
3477 for (j = 0; j < BPF_REG_SIZE; j++)
3478 state->stack[spi].slot_type[j] = STACK_MISC;
3479 goto mark;
3480 }
3481
cc2b14d5 3482err:
2011fccf
AI
3483 if (tnum_is_const(reg->var_off)) {
3484 verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
3485 min_off, i - min_off, access_size);
3486 } else {
3487 char tn_buf[48];
3488
3489 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3490 verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n",
3491 tn_buf, i - min_off, access_size);
3492 }
cc2b14d5
AS
3493 return -EACCES;
3494mark:
3495 /* reading any byte out of 8-byte 'spill_slot' will cause
3496 * the whole slot to be marked as 'read'
3497 */
679c782d 3498 mark_reg_read(env, &state->stack[spi].spilled_ptr,
5327ed3d
JW
3499 state->stack[spi].spilled_ptr.parent,
3500 REG_LIVE_READ64);
17a52670 3501 }
2011fccf 3502 return update_stack_depth(env, state, min_off);
17a52670
AS
3503}
3504
06c1c049
GB
3505static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
3506 int access_size, bool zero_size_allowed,
3507 struct bpf_call_arg_meta *meta)
3508{
638f5b90 3509 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
06c1c049 3510
f1174f77 3511 switch (reg->type) {
06c1c049 3512 case PTR_TO_PACKET:
de8f3a83 3513 case PTR_TO_PACKET_META:
9fd29c08
YS
3514 return check_packet_access(env, regno, reg->off, access_size,
3515 zero_size_allowed);
06c1c049 3516 case PTR_TO_MAP_VALUE:
591fe988
DB
3517 if (check_map_access_type(env, regno, reg->off, access_size,
3518 meta && meta->raw_mode ? BPF_WRITE :
3519 BPF_READ))
3520 return -EACCES;
9fd29c08
YS
3521 return check_map_access(env, regno, reg->off, access_size,
3522 zero_size_allowed);
f1174f77 3523 default: /* scalar_value|ptr_to_stack or invalid ptr */
06c1c049
GB
3524 return check_stack_boundary(env, regno, access_size,
3525 zero_size_allowed, meta);
3526 }
3527}
3528
d83525ca
AS
3529/* Implementation details:
3530 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
3531 * Two bpf_map_lookups (even with the same key) will have different reg->id.
3532 * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
3533 * value_or_null->value transition, since the verifier only cares about
3534 * the range of access to valid map value pointer and doesn't care about actual
3535 * address of the map element.
3536 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
3537 * reg->id > 0 after value_or_null->value transition. By doing so
3538 * two bpf_map_lookups will be considered two different pointers that
3539 * point to different bpf_spin_locks.
3540 * The verifier allows taking only one bpf_spin_lock at a time to avoid
3541 * dead-locks.
3542 * Since only one bpf_spin_lock is allowed the checks are simpler than
3543 * reg_is_refcounted() logic. The verifier needs to remember only
3544 * one spin_lock instead of array of acquired_refs.
3545 * cur_state->active_spin_lock remembers which map value element got locked
3546 * and clears it after bpf_spin_unlock.
3547 */
3548static int process_spin_lock(struct bpf_verifier_env *env, int regno,
3549 bool is_lock)
3550{
3551 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
3552 struct bpf_verifier_state *cur = env->cur_state;
3553 bool is_const = tnum_is_const(reg->var_off);
3554 struct bpf_map *map = reg->map_ptr;
3555 u64 val = reg->var_off.value;
3556
3557 if (reg->type != PTR_TO_MAP_VALUE) {
3558 verbose(env, "R%d is not a pointer to map_value\n", regno);
3559 return -EINVAL;
3560 }
3561 if (!is_const) {
3562 verbose(env,
3563 "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
3564 regno);
3565 return -EINVAL;
3566 }
3567 if (!map->btf) {
3568 verbose(env,
3569 "map '%s' has to have BTF in order to use bpf_spin_lock\n",
3570 map->name);
3571 return -EINVAL;
3572 }
3573 if (!map_value_has_spin_lock(map)) {
3574 if (map->spin_lock_off == -E2BIG)
3575 verbose(env,
3576 "map '%s' has more than one 'struct bpf_spin_lock'\n",
3577 map->name);
3578 else if (map->spin_lock_off == -ENOENT)
3579 verbose(env,
3580 "map '%s' doesn't have 'struct bpf_spin_lock'\n",
3581 map->name);
3582 else
3583 verbose(env,
3584 "map '%s' is not a struct type or bpf_spin_lock is mangled\n",
3585 map->name);
3586 return -EINVAL;
3587 }
3588 if (map->spin_lock_off != val + reg->off) {
3589 verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
3590 val + reg->off);
3591 return -EINVAL;
3592 }
3593 if (is_lock) {
3594 if (cur->active_spin_lock) {
3595 verbose(env,
3596 "Locking two bpf_spin_locks are not allowed\n");
3597 return -EINVAL;
3598 }
3599 cur->active_spin_lock = reg->id;
3600 } else {
3601 if (!cur->active_spin_lock) {
3602 verbose(env, "bpf_spin_unlock without taking a lock\n");
3603 return -EINVAL;
3604 }
3605 if (cur->active_spin_lock != reg->id) {
3606 verbose(env, "bpf_spin_unlock of different lock\n");
3607 return -EINVAL;
3608 }
3609 cur->active_spin_lock = 0;
3610 }
3611 return 0;
3612}
3613
90133415
DB
3614static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
3615{
3616 return type == ARG_PTR_TO_MEM ||
3617 type == ARG_PTR_TO_MEM_OR_NULL ||
3618 type == ARG_PTR_TO_UNINIT_MEM;
3619}
3620
3621static bool arg_type_is_mem_size(enum bpf_arg_type type)
3622{
3623 return type == ARG_CONST_SIZE ||
3624 type == ARG_CONST_SIZE_OR_ZERO;
3625}
3626
57c3bb72
AI
3627static bool arg_type_is_int_ptr(enum bpf_arg_type type)
3628{
3629 return type == ARG_PTR_TO_INT ||
3630 type == ARG_PTR_TO_LONG;
3631}
3632
3633static int int_ptr_type_to_size(enum bpf_arg_type type)
3634{
3635 if (type == ARG_PTR_TO_INT)
3636 return sizeof(u32);
3637 else if (type == ARG_PTR_TO_LONG)
3638 return sizeof(u64);
3639
3640 return -EINVAL;
3641}
3642
58e2af8b 3643static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
33ff9823
DB
3644 enum bpf_arg_type arg_type,
3645 struct bpf_call_arg_meta *meta)
17a52670 3646{
638f5b90 3647 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6841de8b 3648 enum bpf_reg_type expected_type, type = reg->type;
17a52670
AS
3649 int err = 0;
3650
80f1d68c 3651 if (arg_type == ARG_DONTCARE)
17a52670
AS
3652 return 0;
3653
dc503a8a
EC
3654 err = check_reg_arg(env, regno, SRC_OP);
3655 if (err)
3656 return err;
17a52670 3657
1be7f75d
AS
3658 if (arg_type == ARG_ANYTHING) {
3659 if (is_pointer_value(env, regno)) {
61bd5218
JK
3660 verbose(env, "R%d leaks addr into helper function\n",
3661 regno);
1be7f75d
AS
3662 return -EACCES;
3663 }
80f1d68c 3664 return 0;
1be7f75d 3665 }
80f1d68c 3666
de8f3a83 3667 if (type_is_pkt_pointer(type) &&
3a0af8fd 3668 !may_access_direct_pkt_data(env, meta, BPF_READ)) {
61bd5218 3669 verbose(env, "helper access to the packet is not allowed\n");
6841de8b
AS
3670 return -EACCES;
3671 }
3672
8e2fe1d9 3673 if (arg_type == ARG_PTR_TO_MAP_KEY ||
2ea864c5 3674 arg_type == ARG_PTR_TO_MAP_VALUE ||
6ac99e8f
MKL
3675 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
3676 arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
17a52670 3677 expected_type = PTR_TO_STACK;
6ac99e8f
MKL
3678 if (register_is_null(reg) &&
3679 arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL)
3680 /* final test in check_stack_boundary() */;
3681 else if (!type_is_pkt_pointer(type) &&
3682 type != PTR_TO_MAP_VALUE &&
3683 type != expected_type)
6841de8b 3684 goto err_type;
39f19ebb
AS
3685 } else if (arg_type == ARG_CONST_SIZE ||
3686 arg_type == ARG_CONST_SIZE_OR_ZERO) {
f1174f77
EC
3687 expected_type = SCALAR_VALUE;
3688 if (type != expected_type)
6841de8b 3689 goto err_type;
17a52670
AS
3690 } else if (arg_type == ARG_CONST_MAP_PTR) {
3691 expected_type = CONST_PTR_TO_MAP;
6841de8b
AS
3692 if (type != expected_type)
3693 goto err_type;
f318903c
DB
3694 } else if (arg_type == ARG_PTR_TO_CTX ||
3695 arg_type == ARG_PTR_TO_CTX_OR_NULL) {
608cd71a 3696 expected_type = PTR_TO_CTX;
f318903c
DB
3697 if (!(register_is_null(reg) &&
3698 arg_type == ARG_PTR_TO_CTX_OR_NULL)) {
3699 if (type != expected_type)
3700 goto err_type;
3701 err = check_ctx_reg(env, reg, regno);
3702 if (err < 0)
3703 return err;
3704 }
46f8bc92
MKL
3705 } else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
3706 expected_type = PTR_TO_SOCK_COMMON;
3707 /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
3708 if (!type_is_sk_pointer(type))
3709 goto err_type;
1b986589
MKL
3710 if (reg->ref_obj_id) {
3711 if (meta->ref_obj_id) {
3712 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
3713 regno, reg->ref_obj_id,
3714 meta->ref_obj_id);
3715 return -EFAULT;
3716 }
3717 meta->ref_obj_id = reg->ref_obj_id;
fd978bf7 3718 }
6ac99e8f
MKL
3719 } else if (arg_type == ARG_PTR_TO_SOCKET) {
3720 expected_type = PTR_TO_SOCKET;
3721 if (type != expected_type)
3722 goto err_type;
a7658e1a
AS
3723 } else if (arg_type == ARG_PTR_TO_BTF_ID) {
3724 expected_type = PTR_TO_BTF_ID;
3725 if (type != expected_type)
3726 goto err_type;
3727 if (reg->btf_id != meta->btf_id) {
3728 verbose(env, "Helper has type %s got %s in R%d\n",
3729 kernel_type_name(meta->btf_id),
3730 kernel_type_name(reg->btf_id), regno);
3731
3732 return -EACCES;
3733 }
3734 if (!tnum_is_const(reg->var_off) || reg->var_off.value || reg->off) {
3735 verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
3736 regno);
3737 return -EACCES;
3738 }
d83525ca
AS
3739 } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
3740 if (meta->func_id == BPF_FUNC_spin_lock) {
3741 if (process_spin_lock(env, regno, true))
3742 return -EACCES;
3743 } else if (meta->func_id == BPF_FUNC_spin_unlock) {
3744 if (process_spin_lock(env, regno, false))
3745 return -EACCES;
3746 } else {
3747 verbose(env, "verifier internal error\n");
3748 return -EFAULT;
3749 }
90133415 3750 } else if (arg_type_is_mem_ptr(arg_type)) {
8e2fe1d9
DB
3751 expected_type = PTR_TO_STACK;
3752 /* One exception here. In case function allows for NULL to be
f1174f77 3753 * passed in as argument, it's a SCALAR_VALUE type. Final test
8e2fe1d9
DB
3754 * happens during stack boundary checking.
3755 */
914cb781 3756 if (register_is_null(reg) &&
db1ac496 3757 arg_type == ARG_PTR_TO_MEM_OR_NULL)
6841de8b 3758 /* final test in check_stack_boundary() */;
de8f3a83
DB
3759 else if (!type_is_pkt_pointer(type) &&
3760 type != PTR_TO_MAP_VALUE &&
f1174f77 3761 type != expected_type)
6841de8b 3762 goto err_type;
39f19ebb 3763 meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
57c3bb72
AI
3764 } else if (arg_type_is_int_ptr(arg_type)) {
3765 expected_type = PTR_TO_STACK;
3766 if (!type_is_pkt_pointer(type) &&
3767 type != PTR_TO_MAP_VALUE &&
3768 type != expected_type)
3769 goto err_type;
17a52670 3770 } else {
61bd5218 3771 verbose(env, "unsupported arg_type %d\n", arg_type);
17a52670
AS
3772 return -EFAULT;
3773 }
3774
17a52670
AS
3775 if (arg_type == ARG_CONST_MAP_PTR) {
3776 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
33ff9823 3777 meta->map_ptr = reg->map_ptr;
17a52670
AS
3778 } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
3779 /* bpf_map_xxx(..., map_ptr, ..., key) call:
3780 * check that [key, key + map->key_size) are within
3781 * stack limits and initialized
3782 */
33ff9823 3783 if (!meta->map_ptr) {
17a52670
AS
3784 /* in function declaration map_ptr must come before
3785 * map_key, so that it's verified and known before
3786 * we have to check map_key here. Otherwise it means
3787 * that kernel subsystem misconfigured verifier
3788 */
61bd5218 3789 verbose(env, "invalid map_ptr to access map->key\n");
17a52670
AS
3790 return -EACCES;
3791 }
d71962f3
PC
3792 err = check_helper_mem_access(env, regno,
3793 meta->map_ptr->key_size, false,
3794 NULL);
2ea864c5 3795 } else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
6ac99e8f
MKL
3796 (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL &&
3797 !register_is_null(reg)) ||
2ea864c5 3798 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
17a52670
AS
3799 /* bpf_map_xxx(..., map_ptr, ..., value) call:
3800 * check [value, value + map->value_size) validity
3801 */
33ff9823 3802 if (!meta->map_ptr) {
17a52670 3803 /* kernel subsystem misconfigured verifier */
61bd5218 3804 verbose(env, "invalid map_ptr to access map->value\n");
17a52670
AS
3805 return -EACCES;
3806 }
2ea864c5 3807 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
d71962f3
PC
3808 err = check_helper_mem_access(env, regno,
3809 meta->map_ptr->value_size, false,
2ea864c5 3810 meta);
90133415 3811 } else if (arg_type_is_mem_size(arg_type)) {
39f19ebb 3812 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
17a52670 3813
10060503
JF
3814 /* This is used to refine r0 return value bounds for helpers
3815 * that enforce this value as an upper bound on return values.
3816 * See do_refine_retval_range() for helpers that can refine
3817 * the return value. C type of helper is u32 so we pull register
3818 * bound from umax_value however, if negative verifier errors
3819 * out. Only upper bounds can be learned because retval is an
3820 * int type and negative retvals are allowed.
849fa506 3821 */
10060503 3822 meta->msize_max_value = reg->umax_value;
849fa506 3823
f1174f77
EC
3824 /* The register is SCALAR_VALUE; the access check
3825 * happens using its boundaries.
06c1c049 3826 */
f1174f77 3827 if (!tnum_is_const(reg->var_off))
06c1c049
GB
3828 /* For unprivileged variable accesses, disable raw
3829 * mode so that the program is required to
3830 * initialize all the memory that the helper could
3831 * just partially fill up.
3832 */
3833 meta = NULL;
3834
b03c9f9f 3835 if (reg->smin_value < 0) {
61bd5218 3836 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
f1174f77
EC
3837 regno);
3838 return -EACCES;
3839 }
06c1c049 3840
b03c9f9f 3841 if (reg->umin_value == 0) {
f1174f77
EC
3842 err = check_helper_mem_access(env, regno - 1, 0,
3843 zero_size_allowed,
3844 meta);
06c1c049
GB
3845 if (err)
3846 return err;
06c1c049 3847 }
f1174f77 3848
b03c9f9f 3849 if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
61bd5218 3850 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
f1174f77
EC
3851 regno);
3852 return -EACCES;
3853 }
3854 err = check_helper_mem_access(env, regno - 1,
b03c9f9f 3855 reg->umax_value,
f1174f77 3856 zero_size_allowed, meta);
b5dc0163
AS
3857 if (!err)
3858 err = mark_chain_precision(env, regno);
57c3bb72
AI
3859 } else if (arg_type_is_int_ptr(arg_type)) {
3860 int size = int_ptr_type_to_size(arg_type);
3861
3862 err = check_helper_mem_access(env, regno, size, false, meta);
3863 if (err)
3864 return err;
3865 err = check_ptr_alignment(env, reg, 0, size, true);
17a52670
AS
3866 }
3867
3868 return err;
6841de8b 3869err_type:
61bd5218 3870 verbose(env, "R%d type=%s expected=%s\n", regno,
6841de8b
AS
3871 reg_type_str[type], reg_type_str[expected_type]);
3872 return -EACCES;
17a52670
AS
3873}
3874
61bd5218
JK
3875static int check_map_func_compatibility(struct bpf_verifier_env *env,
3876 struct bpf_map *map, int func_id)
35578d79 3877{
35578d79
KX
3878 if (!map)
3879 return 0;
3880
6aff67c8
AS
3881 /* We need a two way check, first is from map perspective ... */
3882 switch (map->map_type) {
3883 case BPF_MAP_TYPE_PROG_ARRAY:
3884 if (func_id != BPF_FUNC_tail_call)
3885 goto error;
3886 break;
3887 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
3888 if (func_id != BPF_FUNC_perf_event_read &&
908432ca 3889 func_id != BPF_FUNC_perf_event_output &&
a7658e1a 3890 func_id != BPF_FUNC_skb_output &&
d831ee84
EC
3891 func_id != BPF_FUNC_perf_event_read_value &&
3892 func_id != BPF_FUNC_xdp_output)
6aff67c8
AS
3893 goto error;
3894 break;
3895 case BPF_MAP_TYPE_STACK_TRACE:
3896 if (func_id != BPF_FUNC_get_stackid)
3897 goto error;
3898 break;
4ed8ec52 3899 case BPF_MAP_TYPE_CGROUP_ARRAY:
60747ef4 3900 if (func_id != BPF_FUNC_skb_under_cgroup &&
60d20f91 3901 func_id != BPF_FUNC_current_task_under_cgroup)
4a482f34
MKL
3902 goto error;
3903 break;
cd339431 3904 case BPF_MAP_TYPE_CGROUP_STORAGE:
b741f163 3905 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
cd339431
RG
3906 if (func_id != BPF_FUNC_get_local_storage)
3907 goto error;
3908 break;
546ac1ff 3909 case BPF_MAP_TYPE_DEVMAP:
6f9d451a 3910 case BPF_MAP_TYPE_DEVMAP_HASH:
0cdbb4b0
THJ
3911 if (func_id != BPF_FUNC_redirect_map &&
3912 func_id != BPF_FUNC_map_lookup_elem)
546ac1ff
JF
3913 goto error;
3914 break;
fbfc504a
BT
3915 /* Restrict bpf side of cpumap and xskmap, open when use-cases
3916 * appear.
3917 */
6710e112
JDB
3918 case BPF_MAP_TYPE_CPUMAP:
3919 if (func_id != BPF_FUNC_redirect_map)
3920 goto error;
3921 break;
fada7fdc
JL
3922 case BPF_MAP_TYPE_XSKMAP:
3923 if (func_id != BPF_FUNC_redirect_map &&
3924 func_id != BPF_FUNC_map_lookup_elem)
3925 goto error;
3926 break;
56f668df 3927 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
bcc6b1b7 3928 case BPF_MAP_TYPE_HASH_OF_MAPS:
56f668df
MKL
3929 if (func_id != BPF_FUNC_map_lookup_elem)
3930 goto error;
16a43625 3931 break;
174a79ff
JF
3932 case BPF_MAP_TYPE_SOCKMAP:
3933 if (func_id != BPF_FUNC_sk_redirect_map &&
3934 func_id != BPF_FUNC_sock_map_update &&
4f738adb 3935 func_id != BPF_FUNC_map_delete_elem &&
9fed9000
JS
3936 func_id != BPF_FUNC_msg_redirect_map &&
3937 func_id != BPF_FUNC_sk_select_reuseport)
174a79ff
JF
3938 goto error;
3939 break;
81110384
JF
3940 case BPF_MAP_TYPE_SOCKHASH:
3941 if (func_id != BPF_FUNC_sk_redirect_hash &&
3942 func_id != BPF_FUNC_sock_hash_update &&
3943 func_id != BPF_FUNC_map_delete_elem &&
9fed9000
JS
3944 func_id != BPF_FUNC_msg_redirect_hash &&
3945 func_id != BPF_FUNC_sk_select_reuseport)
81110384
JF
3946 goto error;
3947 break;
2dbb9b9e
MKL
3948 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
3949 if (func_id != BPF_FUNC_sk_select_reuseport)
3950 goto error;
3951 break;
f1a2e44a
MV
3952 case BPF_MAP_TYPE_QUEUE:
3953 case BPF_MAP_TYPE_STACK:
3954 if (func_id != BPF_FUNC_map_peek_elem &&
3955 func_id != BPF_FUNC_map_pop_elem &&
3956 func_id != BPF_FUNC_map_push_elem)
3957 goto error;
3958 break;
6ac99e8f
MKL
3959 case BPF_MAP_TYPE_SK_STORAGE:
3960 if (func_id != BPF_FUNC_sk_storage_get &&
3961 func_id != BPF_FUNC_sk_storage_delete)
3962 goto error;
3963 break;
6aff67c8
AS
3964 default:
3965 break;
3966 }
3967
3968 /* ... and second from the function itself. */
3969 switch (func_id) {
3970 case BPF_FUNC_tail_call:
3971 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
3972 goto error;
f910cefa 3973 if (env->subprog_cnt > 1) {
f4d7e40a
AS
3974 verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
3975 return -EINVAL;
3976 }
6aff67c8
AS
3977 break;
3978 case BPF_FUNC_perf_event_read:
3979 case BPF_FUNC_perf_event_output:
908432ca 3980 case BPF_FUNC_perf_event_read_value:
a7658e1a 3981 case BPF_FUNC_skb_output:
d831ee84 3982 case BPF_FUNC_xdp_output:
6aff67c8
AS
3983 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
3984 goto error;
3985 break;
3986 case BPF_FUNC_get_stackid:
3987 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
3988 goto error;
3989 break;
60d20f91 3990 case BPF_FUNC_current_task_under_cgroup:
747ea55e 3991 case BPF_FUNC_skb_under_cgroup:
4a482f34
MKL
3992 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
3993 goto error;
3994 break;
97f91a7c 3995 case BPF_FUNC_redirect_map:
9c270af3 3996 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
6f9d451a 3997 map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
fbfc504a
BT
3998 map->map_type != BPF_MAP_TYPE_CPUMAP &&
3999 map->map_type != BPF_MAP_TYPE_XSKMAP)
97f91a7c
JF
4000 goto error;
4001 break;
174a79ff 4002 case BPF_FUNC_sk_redirect_map:
4f738adb 4003 case BPF_FUNC_msg_redirect_map:
81110384 4004 case BPF_FUNC_sock_map_update:
174a79ff
JF
4005 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
4006 goto error;
4007 break;
81110384
JF
4008 case BPF_FUNC_sk_redirect_hash:
4009 case BPF_FUNC_msg_redirect_hash:
4010 case BPF_FUNC_sock_hash_update:
4011 if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
174a79ff
JF
4012 goto error;
4013 break;
cd339431 4014 case BPF_FUNC_get_local_storage:
b741f163
RG
4015 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
4016 map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
cd339431
RG
4017 goto error;
4018 break;
2dbb9b9e 4019 case BPF_FUNC_sk_select_reuseport:
9fed9000
JS
4020 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
4021 map->map_type != BPF_MAP_TYPE_SOCKMAP &&
4022 map->map_type != BPF_MAP_TYPE_SOCKHASH)
2dbb9b9e
MKL
4023 goto error;
4024 break;
f1a2e44a
MV
4025 case BPF_FUNC_map_peek_elem:
4026 case BPF_FUNC_map_pop_elem:
4027 case BPF_FUNC_map_push_elem:
4028 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
4029 map->map_type != BPF_MAP_TYPE_STACK)
4030 goto error;
4031 break;
6ac99e8f
MKL
4032 case BPF_FUNC_sk_storage_get:
4033 case BPF_FUNC_sk_storage_delete:
4034 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
4035 goto error;
4036 break;
6aff67c8
AS
4037 default:
4038 break;
35578d79
KX
4039 }
4040
4041 return 0;
6aff67c8 4042error:
61bd5218 4043 verbose(env, "cannot pass map_type %d into func %s#%d\n",
ebb676da 4044 map->map_type, func_id_name(func_id), func_id);
6aff67c8 4045 return -EINVAL;
35578d79
KX
4046}
4047
90133415 4048static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
435faee1
DB
4049{
4050 int count = 0;
4051
39f19ebb 4052 if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 4053 count++;
39f19ebb 4054 if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 4055 count++;
39f19ebb 4056 if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 4057 count++;
39f19ebb 4058 if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 4059 count++;
39f19ebb 4060 if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
435faee1
DB
4061 count++;
4062
90133415
DB
4063 /* We only support one arg being in raw mode at the moment,
4064 * which is sufficient for the helper functions we have
4065 * right now.
4066 */
4067 return count <= 1;
4068}
4069
4070static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
4071 enum bpf_arg_type arg_next)
4072{
4073 return (arg_type_is_mem_ptr(arg_curr) &&
4074 !arg_type_is_mem_size(arg_next)) ||
4075 (!arg_type_is_mem_ptr(arg_curr) &&
4076 arg_type_is_mem_size(arg_next));
4077}
4078
4079static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
4080{
4081 /* bpf_xxx(..., buf, len) call will access 'len'
4082 * bytes from memory 'buf'. Both arg types need
4083 * to be paired, so make sure there's no buggy
4084 * helper function specification.
4085 */
4086 if (arg_type_is_mem_size(fn->arg1_type) ||
4087 arg_type_is_mem_ptr(fn->arg5_type) ||
4088 check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
4089 check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
4090 check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
4091 check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
4092 return false;
4093
4094 return true;
4095}
4096
1b986589 4097static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
fd978bf7
JS
4098{
4099 int count = 0;
4100
1b986589 4101 if (arg_type_may_be_refcounted(fn->arg1_type))
fd978bf7 4102 count++;
1b986589 4103 if (arg_type_may_be_refcounted(fn->arg2_type))
fd978bf7 4104 count++;
1b986589 4105 if (arg_type_may_be_refcounted(fn->arg3_type))
fd978bf7 4106 count++;
1b986589 4107 if (arg_type_may_be_refcounted(fn->arg4_type))
fd978bf7 4108 count++;
1b986589 4109 if (arg_type_may_be_refcounted(fn->arg5_type))
fd978bf7
JS
4110 count++;
4111
1b986589
MKL
4112 /* A reference acquiring function cannot acquire
4113 * another refcounted ptr.
4114 */
4115 if (is_acquire_function(func_id) && count)
4116 return false;
4117
fd978bf7
JS
4118 /* We only support one arg being unreferenced at the moment,
4119 * which is sufficient for the helper functions we have right now.
4120 */
4121 return count <= 1;
4122}
4123
1b986589 4124static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
90133415
DB
4125{
4126 return check_raw_mode_ok(fn) &&
fd978bf7 4127 check_arg_pair_ok(fn) &&
1b986589 4128 check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
435faee1
DB
4129}
4130
de8f3a83
DB
4131/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
4132 * are now invalid, so turn them into unknown SCALAR_VALUE.
f1174f77 4133 */
f4d7e40a
AS
4134static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
4135 struct bpf_func_state *state)
969bf05e 4136{
58e2af8b 4137 struct bpf_reg_state *regs = state->regs, *reg;
969bf05e
AS
4138 int i;
4139
4140 for (i = 0; i < MAX_BPF_REG; i++)
de8f3a83 4141 if (reg_is_pkt_pointer_any(&regs[i]))
61bd5218 4142 mark_reg_unknown(env, regs, i);
969bf05e 4143
f3709f69
JS
4144 bpf_for_each_spilled_reg(i, state, reg) {
4145 if (!reg)
969bf05e 4146 continue;
de8f3a83 4147 if (reg_is_pkt_pointer_any(reg))
f54c7898 4148 __mark_reg_unknown(env, reg);
969bf05e
AS
4149 }
4150}
4151
f4d7e40a
AS
4152static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
4153{
4154 struct bpf_verifier_state *vstate = env->cur_state;
4155 int i;
4156
4157 for (i = 0; i <= vstate->curframe; i++)
4158 __clear_all_pkt_pointers(env, vstate->frame[i]);
4159}
4160
fd978bf7 4161static void release_reg_references(struct bpf_verifier_env *env,
1b986589
MKL
4162 struct bpf_func_state *state,
4163 int ref_obj_id)
fd978bf7
JS
4164{
4165 struct bpf_reg_state *regs = state->regs, *reg;
4166 int i;
4167
4168 for (i = 0; i < MAX_BPF_REG; i++)
1b986589 4169 if (regs[i].ref_obj_id == ref_obj_id)
fd978bf7
JS
4170 mark_reg_unknown(env, regs, i);
4171
4172 bpf_for_each_spilled_reg(i, state, reg) {
4173 if (!reg)
4174 continue;
1b986589 4175 if (reg->ref_obj_id == ref_obj_id)
f54c7898 4176 __mark_reg_unknown(env, reg);
fd978bf7
JS
4177 }
4178}
4179
4180/* The pointer with the specified id has released its reference to kernel
4181 * resources. Identify all copies of the same pointer and clear the reference.
4182 */
4183static int release_reference(struct bpf_verifier_env *env,
1b986589 4184 int ref_obj_id)
fd978bf7
JS
4185{
4186 struct bpf_verifier_state *vstate = env->cur_state;
1b986589 4187 int err;
fd978bf7
JS
4188 int i;
4189
1b986589
MKL
4190 err = release_reference_state(cur_func(env), ref_obj_id);
4191 if (err)
4192 return err;
4193
fd978bf7 4194 for (i = 0; i <= vstate->curframe; i++)
1b986589 4195 release_reg_references(env, vstate->frame[i], ref_obj_id);
fd978bf7 4196
1b986589 4197 return 0;
fd978bf7
JS
4198}
4199
51c39bb1
AS
4200static void clear_caller_saved_regs(struct bpf_verifier_env *env,
4201 struct bpf_reg_state *regs)
4202{
4203 int i;
4204
4205 /* after the call registers r0 - r5 were scratched */
4206 for (i = 0; i < CALLER_SAVED_REGS; i++) {
4207 mark_reg_not_init(env, regs, caller_saved[i]);
4208 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
4209 }
4210}
4211
f4d7e40a
AS
4212static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
4213 int *insn_idx)
4214{
4215 struct bpf_verifier_state *state = env->cur_state;
51c39bb1 4216 struct bpf_func_info_aux *func_info_aux;
f4d7e40a 4217 struct bpf_func_state *caller, *callee;
fd978bf7 4218 int i, err, subprog, target_insn;
51c39bb1 4219 bool is_global = false;
f4d7e40a 4220
aada9ce6 4221 if (state->curframe + 1 >= MAX_CALL_FRAMES) {
f4d7e40a 4222 verbose(env, "the call stack of %d frames is too deep\n",
aada9ce6 4223 state->curframe + 2);
f4d7e40a
AS
4224 return -E2BIG;
4225 }
4226
4227 target_insn = *insn_idx + insn->imm;
4228 subprog = find_subprog(env, target_insn + 1);
4229 if (subprog < 0) {
4230 verbose(env, "verifier bug. No program starts at insn %d\n",
4231 target_insn + 1);
4232 return -EFAULT;
4233 }
4234
4235 caller = state->frame[state->curframe];
4236 if (state->frame[state->curframe + 1]) {
4237 verbose(env, "verifier bug. Frame %d already allocated\n",
4238 state->curframe + 1);
4239 return -EFAULT;
4240 }
4241
51c39bb1
AS
4242 func_info_aux = env->prog->aux->func_info_aux;
4243 if (func_info_aux)
4244 is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
4245 err = btf_check_func_arg_match(env, subprog, caller->regs);
4246 if (err == -EFAULT)
4247 return err;
4248 if (is_global) {
4249 if (err) {
4250 verbose(env, "Caller passes invalid args into func#%d\n",
4251 subprog);
4252 return err;
4253 } else {
4254 if (env->log.level & BPF_LOG_LEVEL)
4255 verbose(env,
4256 "Func#%d is global and valid. Skipping.\n",
4257 subprog);
4258 clear_caller_saved_regs(env, caller->regs);
4259
4260 /* All global functions return SCALAR_VALUE */
4261 mark_reg_unknown(env, caller->regs, BPF_REG_0);
4262
4263 /* continue with next insn after call */
4264 return 0;
4265 }
4266 }
4267
f4d7e40a
AS
4268 callee = kzalloc(sizeof(*callee), GFP_KERNEL);
4269 if (!callee)
4270 return -ENOMEM;
4271 state->frame[state->curframe + 1] = callee;
4272
4273 /* callee cannot access r0, r6 - r9 for reading and has to write
4274 * into its own stack before reading from it.
4275 * callee can read/write into caller's stack
4276 */
4277 init_func_state(env, callee,
4278 /* remember the callsite, it will be used by bpf_exit */
4279 *insn_idx /* callsite */,
4280 state->curframe + 1 /* frameno within this callchain */,
f910cefa 4281 subprog /* subprog number within this prog */);
f4d7e40a 4282
fd978bf7
JS
4283 /* Transfer references to the callee */
4284 err = transfer_reference_state(callee, caller);
4285 if (err)
4286 return err;
4287
679c782d
EC
4288 /* copy r1 - r5 args that callee can access. The copy includes parent
4289 * pointers, which connects us up to the liveness chain
4290 */
f4d7e40a
AS
4291 for (i = BPF_REG_1; i <= BPF_REG_5; i++)
4292 callee->regs[i] = caller->regs[i];
4293
51c39bb1 4294 clear_caller_saved_regs(env, caller->regs);
f4d7e40a
AS
4295
4296 /* only increment it after check_reg_arg() finished */
4297 state->curframe++;
4298
4299 /* and go analyze first insn of the callee */
4300 *insn_idx = target_insn;
4301
06ee7115 4302 if (env->log.level & BPF_LOG_LEVEL) {
f4d7e40a
AS
4303 verbose(env, "caller:\n");
4304 print_verifier_state(env, caller);
4305 verbose(env, "callee:\n");
4306 print_verifier_state(env, callee);
4307 }
4308 return 0;
4309}
4310
4311static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
4312{
4313 struct bpf_verifier_state *state = env->cur_state;
4314 struct bpf_func_state *caller, *callee;
4315 struct bpf_reg_state *r0;
fd978bf7 4316 int err;
f4d7e40a
AS
4317
4318 callee = state->frame[state->curframe];
4319 r0 = &callee->regs[BPF_REG_0];
4320 if (r0->type == PTR_TO_STACK) {
4321 /* technically it's ok to return caller's stack pointer
4322 * (or caller's caller's pointer) back to the caller,
4323 * since these pointers are valid. Only current stack
4324 * pointer will be invalid as soon as function exits,
4325 * but let's be conservative
4326 */
4327 verbose(env, "cannot return stack pointer to the caller\n");
4328 return -EINVAL;
4329 }
4330
4331 state->curframe--;
4332 caller = state->frame[state->curframe];
4333 /* return to the caller whatever r0 had in the callee */
4334 caller->regs[BPF_REG_0] = *r0;
4335
fd978bf7
JS
4336 /* Transfer references to the caller */
4337 err = transfer_reference_state(caller, callee);
4338 if (err)
4339 return err;
4340
f4d7e40a 4341 *insn_idx = callee->callsite + 1;
06ee7115 4342 if (env->log.level & BPF_LOG_LEVEL) {
f4d7e40a
AS
4343 verbose(env, "returning from callee:\n");
4344 print_verifier_state(env, callee);
4345 verbose(env, "to caller at %d:\n", *insn_idx);
4346 print_verifier_state(env, caller);
4347 }
4348 /* clear everything in the callee */
4349 free_func_state(callee);
4350 state->frame[state->curframe + 1] = NULL;
4351 return 0;
4352}
4353
849fa506
YS
4354static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
4355 int func_id,
4356 struct bpf_call_arg_meta *meta)
4357{
4358 struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
4359
4360 if (ret_type != RET_INTEGER ||
4361 (func_id != BPF_FUNC_get_stack &&
4362 func_id != BPF_FUNC_probe_read_str))
4363 return;
4364
10060503 4365 ret_reg->smax_value = meta->msize_max_value;
fa123ac0 4366 ret_reg->s32_max_value = meta->msize_max_value;
849fa506
YS
4367 __reg_deduce_bounds(ret_reg);
4368 __reg_bound_offset(ret_reg);
10060503 4369 __update_reg_bounds(ret_reg);
849fa506
YS
4370}
4371
c93552c4
DB
4372static int
4373record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
4374 int func_id, int insn_idx)
4375{
4376 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
591fe988 4377 struct bpf_map *map = meta->map_ptr;
c93552c4
DB
4378
4379 if (func_id != BPF_FUNC_tail_call &&
09772d92
DB
4380 func_id != BPF_FUNC_map_lookup_elem &&
4381 func_id != BPF_FUNC_map_update_elem &&
f1a2e44a
MV
4382 func_id != BPF_FUNC_map_delete_elem &&
4383 func_id != BPF_FUNC_map_push_elem &&
4384 func_id != BPF_FUNC_map_pop_elem &&
4385 func_id != BPF_FUNC_map_peek_elem)
c93552c4 4386 return 0;
09772d92 4387
591fe988 4388 if (map == NULL) {
c93552c4
DB
4389 verbose(env, "kernel subsystem misconfigured verifier\n");
4390 return -EINVAL;
4391 }
4392
591fe988
DB
4393 /* In case of read-only, some additional restrictions
4394 * need to be applied in order to prevent altering the
4395 * state of the map from program side.
4396 */
4397 if ((map->map_flags & BPF_F_RDONLY_PROG) &&
4398 (func_id == BPF_FUNC_map_delete_elem ||
4399 func_id == BPF_FUNC_map_update_elem ||
4400 func_id == BPF_FUNC_map_push_elem ||
4401 func_id == BPF_FUNC_map_pop_elem)) {
4402 verbose(env, "write into map forbidden\n");
4403 return -EACCES;
4404 }
4405
d2e4c1e6 4406 if (!BPF_MAP_PTR(aux->map_ptr_state))
c93552c4
DB
4407 bpf_map_ptr_store(aux, meta->map_ptr,
4408 meta->map_ptr->unpriv_array);
d2e4c1e6 4409 else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
c93552c4
DB
4410 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
4411 meta->map_ptr->unpriv_array);
4412 return 0;
4413}
4414
d2e4c1e6
DB
4415static int
4416record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
4417 int func_id, int insn_idx)
4418{
4419 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
4420 struct bpf_reg_state *regs = cur_regs(env), *reg;
4421 struct bpf_map *map = meta->map_ptr;
4422 struct tnum range;
4423 u64 val;
cc52d914 4424 int err;
d2e4c1e6
DB
4425
4426 if (func_id != BPF_FUNC_tail_call)
4427 return 0;
4428 if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
4429 verbose(env, "kernel subsystem misconfigured verifier\n");
4430 return -EINVAL;
4431 }
4432
4433 range = tnum_range(0, map->max_entries - 1);
4434 reg = &regs[BPF_REG_3];
4435
4436 if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) {
4437 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
4438 return 0;
4439 }
4440
cc52d914
DB
4441 err = mark_chain_precision(env, BPF_REG_3);
4442 if (err)
4443 return err;
4444
d2e4c1e6
DB
4445 val = reg->var_off.value;
4446 if (bpf_map_key_unseen(aux))
4447 bpf_map_key_store(aux, val);
4448 else if (!bpf_map_key_poisoned(aux) &&
4449 bpf_map_key_immediate(aux) != val)
4450 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
4451 return 0;
4452}
4453
fd978bf7
JS
4454static int check_reference_leak(struct bpf_verifier_env *env)
4455{
4456 struct bpf_func_state *state = cur_func(env);
4457 int i;
4458
4459 for (i = 0; i < state->acquired_refs; i++) {
4460 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
4461 state->refs[i].id, state->refs[i].insn_idx);
4462 }
4463 return state->acquired_refs ? -EINVAL : 0;
4464}
4465
f4d7e40a 4466static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
17a52670 4467{
17a52670 4468 const struct bpf_func_proto *fn = NULL;
638f5b90 4469 struct bpf_reg_state *regs;
33ff9823 4470 struct bpf_call_arg_meta meta;
969bf05e 4471 bool changes_data;
17a52670
AS
4472 int i, err;
4473
4474 /* find function prototype */
4475 if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
61bd5218
JK
4476 verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
4477 func_id);
17a52670
AS
4478 return -EINVAL;
4479 }
4480
00176a34 4481 if (env->ops->get_func_proto)
5e43f899 4482 fn = env->ops->get_func_proto(func_id, env->prog);
17a52670 4483 if (!fn) {
61bd5218
JK
4484 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
4485 func_id);
17a52670
AS
4486 return -EINVAL;
4487 }
4488
4489 /* eBPF programs must be GPL compatible to use GPL-ed functions */
24701ece 4490 if (!env->prog->gpl_compatible && fn->gpl_only) {
3fe2867c 4491 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
17a52670
AS
4492 return -EINVAL;
4493 }
4494
04514d13 4495 /* With LD_ABS/IND some JITs save/restore skb from r1. */
17bedab2 4496 changes_data = bpf_helper_changes_pkt_data(fn->func);
04514d13
DB
4497 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
4498 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
4499 func_id_name(func_id), func_id);
4500 return -EINVAL;
4501 }
969bf05e 4502
33ff9823 4503 memset(&meta, 0, sizeof(meta));
36bbef52 4504 meta.pkt_access = fn->pkt_access;
33ff9823 4505
1b986589 4506 err = check_func_proto(fn, func_id);
435faee1 4507 if (err) {
61bd5218 4508 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
ebb676da 4509 func_id_name(func_id), func_id);
435faee1
DB
4510 return err;
4511 }
4512
d83525ca 4513 meta.func_id = func_id;
17a52670 4514 /* check args */
a7658e1a 4515 for (i = 0; i < 5; i++) {
9cc31b3a
AS
4516 err = btf_resolve_helper_id(&env->log, fn, i);
4517 if (err > 0)
4518 meta.btf_id = err;
a7658e1a
AS
4519 err = check_func_arg(env, BPF_REG_1 + i, fn->arg_type[i], &meta);
4520 if (err)
4521 return err;
4522 }
17a52670 4523
c93552c4
DB
4524 err = record_func_map(env, &meta, func_id, insn_idx);
4525 if (err)
4526 return err;
4527
d2e4c1e6
DB
4528 err = record_func_key(env, &meta, func_id, insn_idx);
4529 if (err)
4530 return err;
4531
435faee1
DB
4532 /* Mark slots with STACK_MISC in case of raw mode, stack offset
4533 * is inferred from register state.
4534 */
4535 for (i = 0; i < meta.access_size; i++) {
ca369602
DB
4536 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
4537 BPF_WRITE, -1, false);
435faee1
DB
4538 if (err)
4539 return err;
4540 }
4541
fd978bf7
JS
4542 if (func_id == BPF_FUNC_tail_call) {
4543 err = check_reference_leak(env);
4544 if (err) {
4545 verbose(env, "tail_call would lead to reference leak\n");
4546 return err;
4547 }
4548 } else if (is_release_function(func_id)) {
1b986589 4549 err = release_reference(env, meta.ref_obj_id);
46f8bc92
MKL
4550 if (err) {
4551 verbose(env, "func %s#%d reference has not been acquired before\n",
4552 func_id_name(func_id), func_id);
fd978bf7 4553 return err;
46f8bc92 4554 }
fd978bf7
JS
4555 }
4556
638f5b90 4557 regs = cur_regs(env);
cd339431
RG
4558
4559 /* check that flags argument in get_local_storage(map, flags) is 0,
4560 * this is required because get_local_storage() can't return an error.
4561 */
4562 if (func_id == BPF_FUNC_get_local_storage &&
4563 !register_is_null(&regs[BPF_REG_2])) {
4564 verbose(env, "get_local_storage() doesn't support non-zero flags\n");
4565 return -EINVAL;
4566 }
4567
17a52670 4568 /* reset caller saved regs */
dc503a8a 4569 for (i = 0; i < CALLER_SAVED_REGS; i++) {
61bd5218 4570 mark_reg_not_init(env, regs, caller_saved[i]);
dc503a8a
EC
4571 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
4572 }
17a52670 4573
5327ed3d
JW
4574 /* helper call returns 64-bit value. */
4575 regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
4576
dc503a8a 4577 /* update return register (already marked as written above) */
17a52670 4578 if (fn->ret_type == RET_INTEGER) {
f1174f77 4579 /* sets type to SCALAR_VALUE */
61bd5218 4580 mark_reg_unknown(env, regs, BPF_REG_0);
17a52670
AS
4581 } else if (fn->ret_type == RET_VOID) {
4582 regs[BPF_REG_0].type = NOT_INIT;
3e6a4b3e
RG
4583 } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
4584 fn->ret_type == RET_PTR_TO_MAP_VALUE) {
f1174f77 4585 /* There is no offset yet applied, variable or fixed */
61bd5218 4586 mark_reg_known_zero(env, regs, BPF_REG_0);
17a52670
AS
4587 /* remember map_ptr, so that check_map_access()
4588 * can check 'value_size' boundary of memory access
4589 * to map element returned from bpf_map_lookup_elem()
4590 */
33ff9823 4591 if (meta.map_ptr == NULL) {
61bd5218
JK
4592 verbose(env,
4593 "kernel subsystem misconfigured verifier\n");
17a52670
AS
4594 return -EINVAL;
4595 }
33ff9823 4596 regs[BPF_REG_0].map_ptr = meta.map_ptr;
4d31f301
DB
4597 if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
4598 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
e16d2f1a
AS
4599 if (map_value_has_spin_lock(meta.map_ptr))
4600 regs[BPF_REG_0].id = ++env->id_gen;
4d31f301
DB
4601 } else {
4602 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
4603 regs[BPF_REG_0].id = ++env->id_gen;
4604 }
c64b7983
JS
4605 } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
4606 mark_reg_known_zero(env, regs, BPF_REG_0);
4607 regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
0f3adc28 4608 regs[BPF_REG_0].id = ++env->id_gen;
85a51f8c
LB
4609 } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
4610 mark_reg_known_zero(env, regs, BPF_REG_0);
4611 regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
4612 regs[BPF_REG_0].id = ++env->id_gen;
655a51e5
MKL
4613 } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
4614 mark_reg_known_zero(env, regs, BPF_REG_0);
4615 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
4616 regs[BPF_REG_0].id = ++env->id_gen;
17a52670 4617 } else {
61bd5218 4618 verbose(env, "unknown return type %d of func %s#%d\n",
ebb676da 4619 fn->ret_type, func_id_name(func_id), func_id);
17a52670
AS
4620 return -EINVAL;
4621 }
04fd61ab 4622
0f3adc28 4623 if (is_ptr_cast_function(func_id)) {
1b986589
MKL
4624 /* For release_reference() */
4625 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
0f3adc28
LB
4626 } else if (is_acquire_function(func_id)) {
4627 int id = acquire_reference_state(env, insn_idx);
4628
4629 if (id < 0)
4630 return id;
4631 /* For mark_ptr_or_null_reg() */
4632 regs[BPF_REG_0].id = id;
4633 /* For release_reference() */
4634 regs[BPF_REG_0].ref_obj_id = id;
4635 }
1b986589 4636
849fa506
YS
4637 do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
4638
61bd5218 4639 err = check_map_func_compatibility(env, meta.map_ptr, func_id);
35578d79
KX
4640 if (err)
4641 return err;
04fd61ab 4642
c195651e
YS
4643 if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
4644 const char *err_str;
4645
4646#ifdef CONFIG_PERF_EVENTS
4647 err = get_callchain_buffers(sysctl_perf_event_max_stack);
4648 err_str = "cannot get callchain buffer for func %s#%d\n";
4649#else
4650 err = -ENOTSUPP;
4651 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
4652#endif
4653 if (err) {
4654 verbose(env, err_str, func_id_name(func_id), func_id);
4655 return err;
4656 }
4657
4658 env->prog->has_callchain_buf = true;
4659 }
4660
969bf05e
AS
4661 if (changes_data)
4662 clear_all_pkt_pointers(env);
4663 return 0;
4664}
4665
b03c9f9f
EC
4666static bool signed_add_overflows(s64 a, s64 b)
4667{
4668 /* Do the add in u64, where overflow is well-defined */
4669 s64 res = (s64)((u64)a + (u64)b);
4670
4671 if (b < 0)
4672 return res > a;
4673 return res < a;
4674}
4675
3f50f132
JF
4676static bool signed_add32_overflows(s64 a, s64 b)
4677{
4678 /* Do the add in u32, where overflow is well-defined */
4679 s32 res = (s32)((u32)a + (u32)b);
4680
4681 if (b < 0)
4682 return res > a;
4683 return res < a;
4684}
4685
4686static bool signed_sub_overflows(s32 a, s32 b)
b03c9f9f
EC
4687{
4688 /* Do the sub in u64, where overflow is well-defined */
4689 s64 res = (s64)((u64)a - (u64)b);
4690
4691 if (b < 0)
4692 return res < a;
4693 return res > a;
969bf05e
AS
4694}
4695
3f50f132
JF
4696static bool signed_sub32_overflows(s32 a, s32 b)
4697{
4698 /* Do the sub in u64, where overflow is well-defined */
4699 s32 res = (s32)((u32)a - (u32)b);
4700
4701 if (b < 0)
4702 return res < a;
4703 return res > a;
4704}
4705
bb7f0f98
AS
4706static bool check_reg_sane_offset(struct bpf_verifier_env *env,
4707 const struct bpf_reg_state *reg,
4708 enum bpf_reg_type type)
4709{
4710 bool known = tnum_is_const(reg->var_off);
4711 s64 val = reg->var_off.value;
4712 s64 smin = reg->smin_value;
4713
4714 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
4715 verbose(env, "math between %s pointer and %lld is not allowed\n",
4716 reg_type_str[type], val);
4717 return false;
4718 }
4719
4720 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
4721 verbose(env, "%s pointer offset %d is not allowed\n",
4722 reg_type_str[type], reg->off);
4723 return false;
4724 }
4725
4726 if (smin == S64_MIN) {
4727 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
4728 reg_type_str[type]);
4729 return false;
4730 }
4731
4732 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
4733 verbose(env, "value %lld makes %s pointer be out of bounds\n",
4734 smin, reg_type_str[type]);
4735 return false;
4736 }
4737
4738 return true;
4739}
4740
979d63d5
DB
4741static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
4742{
4743 return &env->insn_aux_data[env->insn_idx];
4744}
4745
4746static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
4747 u32 *ptr_limit, u8 opcode, bool off_is_neg)
4748{
4749 bool mask_to_left = (opcode == BPF_ADD && off_is_neg) ||
4750 (opcode == BPF_SUB && !off_is_neg);
4751 u32 off;
4752
4753 switch (ptr_reg->type) {
4754 case PTR_TO_STACK:
088ec26d
AI
4755 /* Indirect variable offset stack access is prohibited in
4756 * unprivileged mode so it's not handled here.
4757 */
979d63d5
DB
4758 off = ptr_reg->off + ptr_reg->var_off.value;
4759 if (mask_to_left)
4760 *ptr_limit = MAX_BPF_STACK + off;
4761 else
4762 *ptr_limit = -off;
4763 return 0;
4764 case PTR_TO_MAP_VALUE:
4765 if (mask_to_left) {
4766 *ptr_limit = ptr_reg->umax_value + ptr_reg->off;
4767 } else {
4768 off = ptr_reg->smin_value + ptr_reg->off;
4769 *ptr_limit = ptr_reg->map_ptr->value_size - off;
4770 }
4771 return 0;
4772 default:
4773 return -EINVAL;
4774 }
4775}
4776
d3bd7413
DB
4777static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
4778 const struct bpf_insn *insn)
4779{
4780 return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K;
4781}
4782
4783static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
4784 u32 alu_state, u32 alu_limit)
4785{
4786 /* If we arrived here from different branches with different
4787 * state or limits to sanitize, then this won't work.
4788 */
4789 if (aux->alu_state &&
4790 (aux->alu_state != alu_state ||
4791 aux->alu_limit != alu_limit))
4792 return -EACCES;
4793
4794 /* Corresponding fixup done in fixup_bpf_calls(). */
4795 aux->alu_state = alu_state;
4796 aux->alu_limit = alu_limit;
4797 return 0;
4798}
4799
4800static int sanitize_val_alu(struct bpf_verifier_env *env,
4801 struct bpf_insn *insn)
4802{
4803 struct bpf_insn_aux_data *aux = cur_aux(env);
4804
4805 if (can_skip_alu_sanitation(env, insn))
4806 return 0;
4807
4808 return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
4809}
4810
979d63d5
DB
4811static int sanitize_ptr_alu(struct bpf_verifier_env *env,
4812 struct bpf_insn *insn,
4813 const struct bpf_reg_state *ptr_reg,
4814 struct bpf_reg_state *dst_reg,
4815 bool off_is_neg)
4816{
4817 struct bpf_verifier_state *vstate = env->cur_state;
4818 struct bpf_insn_aux_data *aux = cur_aux(env);
4819 bool ptr_is_dst_reg = ptr_reg == dst_reg;
4820 u8 opcode = BPF_OP(insn->code);
4821 u32 alu_state, alu_limit;
4822 struct bpf_reg_state tmp;
4823 bool ret;
4824
d3bd7413 4825 if (can_skip_alu_sanitation(env, insn))
979d63d5
DB
4826 return 0;
4827
4828 /* We already marked aux for masking from non-speculative
4829 * paths, thus we got here in the first place. We only care
4830 * to explore bad access from here.
4831 */
4832 if (vstate->speculative)
4833 goto do_sim;
4834
4835 alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
4836 alu_state |= ptr_is_dst_reg ?
4837 BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
4838
4839 if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg))
4840 return 0;
d3bd7413 4841 if (update_alu_sanitation_state(aux, alu_state, alu_limit))
979d63d5 4842 return -EACCES;
979d63d5
DB
4843do_sim:
4844 /* Simulate and find potential out-of-bounds access under
4845 * speculative execution from truncation as a result of
4846 * masking when off was not within expected range. If off
4847 * sits in dst, then we temporarily need to move ptr there
4848 * to simulate dst (== 0) +/-= ptr. Needed, for example,
4849 * for cases where we use K-based arithmetic in one direction
4850 * and truncated reg-based in the other in order to explore
4851 * bad access.
4852 */
4853 if (!ptr_is_dst_reg) {
4854 tmp = *dst_reg;
4855 *dst_reg = *ptr_reg;
4856 }
4857 ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
0803278b 4858 if (!ptr_is_dst_reg && ret)
979d63d5
DB
4859 *dst_reg = tmp;
4860 return !ret ? -EFAULT : 0;
4861}
4862
f1174f77 4863/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
f1174f77
EC
4864 * Caller should also handle BPF_MOV case separately.
4865 * If we return -EACCES, caller may want to try again treating pointer as a
4866 * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks.
4867 */
4868static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
4869 struct bpf_insn *insn,
4870 const struct bpf_reg_state *ptr_reg,
4871 const struct bpf_reg_state *off_reg)
969bf05e 4872{
f4d7e40a
AS
4873 struct bpf_verifier_state *vstate = env->cur_state;
4874 struct bpf_func_state *state = vstate->frame[vstate->curframe];
4875 struct bpf_reg_state *regs = state->regs, *dst_reg;
f1174f77 4876 bool known = tnum_is_const(off_reg->var_off);
b03c9f9f
EC
4877 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
4878 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
4879 u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
4880 umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
9d7eceed 4881 u32 dst = insn->dst_reg, src = insn->src_reg;
969bf05e 4882 u8 opcode = BPF_OP(insn->code);
979d63d5 4883 int ret;
969bf05e 4884
f1174f77 4885 dst_reg = &regs[dst];
969bf05e 4886
6f16101e
DB
4887 if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
4888 smin_val > smax_val || umin_val > umax_val) {
4889 /* Taint dst register if offset had invalid bounds derived from
4890 * e.g. dead branches.
4891 */
f54c7898 4892 __mark_reg_unknown(env, dst_reg);
6f16101e 4893 return 0;
f1174f77
EC
4894 }
4895
4896 if (BPF_CLASS(insn->code) != BPF_ALU64) {
4897 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
82abbf8d
AS
4898 verbose(env,
4899 "R%d 32-bit pointer arithmetic prohibited\n",
4900 dst);
f1174f77 4901 return -EACCES;
969bf05e
AS
4902 }
4903
aad2eeaf
JS
4904 switch (ptr_reg->type) {
4905 case PTR_TO_MAP_VALUE_OR_NULL:
4906 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
4907 dst, reg_type_str[ptr_reg->type]);
f1174f77 4908 return -EACCES;
aad2eeaf
JS
4909 case CONST_PTR_TO_MAP:
4910 case PTR_TO_PACKET_END:
c64b7983
JS
4911 case PTR_TO_SOCKET:
4912 case PTR_TO_SOCKET_OR_NULL:
46f8bc92
MKL
4913 case PTR_TO_SOCK_COMMON:
4914 case PTR_TO_SOCK_COMMON_OR_NULL:
655a51e5
MKL
4915 case PTR_TO_TCP_SOCK:
4916 case PTR_TO_TCP_SOCK_OR_NULL:
fada7fdc 4917 case PTR_TO_XDP_SOCK:
aad2eeaf
JS
4918 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
4919 dst, reg_type_str[ptr_reg->type]);
f1174f77 4920 return -EACCES;
9d7eceed
DB
4921 case PTR_TO_MAP_VALUE:
4922 if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
4923 verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
4924 off_reg == dst_reg ? dst : src);
4925 return -EACCES;
4926 }
4927 /* fall-through */
aad2eeaf
JS
4928 default:
4929 break;
f1174f77
EC
4930 }
4931
4932 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
4933 * The id may be overwritten later if we create a new variable offset.
969bf05e 4934 */
f1174f77
EC
4935 dst_reg->type = ptr_reg->type;
4936 dst_reg->id = ptr_reg->id;
969bf05e 4937
bb7f0f98
AS
4938 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
4939 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
4940 return -EINVAL;
4941
3f50f132
JF
4942 /* pointer types do not carry 32-bit bounds at the moment. */
4943 __mark_reg32_unbounded(dst_reg);
4944
f1174f77
EC
4945 switch (opcode) {
4946 case BPF_ADD:
979d63d5
DB
4947 ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
4948 if (ret < 0) {
4949 verbose(env, "R%d tried to add from different maps or paths\n", dst);
4950 return ret;
4951 }
f1174f77
EC
4952 /* We can take a fixed offset as long as it doesn't overflow
4953 * the s32 'off' field
969bf05e 4954 */
b03c9f9f
EC
4955 if (known && (ptr_reg->off + smin_val ==
4956 (s64)(s32)(ptr_reg->off + smin_val))) {
f1174f77 4957 /* pointer += K. Accumulate it into fixed offset */
b03c9f9f
EC
4958 dst_reg->smin_value = smin_ptr;
4959 dst_reg->smax_value = smax_ptr;
4960 dst_reg->umin_value = umin_ptr;
4961 dst_reg->umax_value = umax_ptr;
f1174f77 4962 dst_reg->var_off = ptr_reg->var_off;
b03c9f9f 4963 dst_reg->off = ptr_reg->off + smin_val;
0962590e 4964 dst_reg->raw = ptr_reg->raw;
f1174f77
EC
4965 break;
4966 }
f1174f77
EC
4967 /* A new variable offset is created. Note that off_reg->off
4968 * == 0, since it's a scalar.
4969 * dst_reg gets the pointer type and since some positive
4970 * integer value was added to the pointer, give it a new 'id'
4971 * if it's a PTR_TO_PACKET.
4972 * this creates a new 'base' pointer, off_reg (variable) gets
4973 * added into the variable offset, and we copy the fixed offset
4974 * from ptr_reg.
969bf05e 4975 */
b03c9f9f
EC
4976 if (signed_add_overflows(smin_ptr, smin_val) ||
4977 signed_add_overflows(smax_ptr, smax_val)) {
4978 dst_reg->smin_value = S64_MIN;
4979 dst_reg->smax_value = S64_MAX;
4980 } else {
4981 dst_reg->smin_value = smin_ptr + smin_val;
4982 dst_reg->smax_value = smax_ptr + smax_val;
4983 }
4984 if (umin_ptr + umin_val < umin_ptr ||
4985 umax_ptr + umax_val < umax_ptr) {
4986 dst_reg->umin_value = 0;
4987 dst_reg->umax_value = U64_MAX;
4988 } else {
4989 dst_reg->umin_value = umin_ptr + umin_val;
4990 dst_reg->umax_value = umax_ptr + umax_val;
4991 }
f1174f77
EC
4992 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
4993 dst_reg->off = ptr_reg->off;
0962590e 4994 dst_reg->raw = ptr_reg->raw;
de8f3a83 4995 if (reg_is_pkt_pointer(ptr_reg)) {
f1174f77
EC
4996 dst_reg->id = ++env->id_gen;
4997 /* something was added to pkt_ptr, set range to zero */
0962590e 4998 dst_reg->raw = 0;
f1174f77
EC
4999 }
5000 break;
5001 case BPF_SUB:
979d63d5
DB
5002 ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
5003 if (ret < 0) {
5004 verbose(env, "R%d tried to sub from different maps or paths\n", dst);
5005 return ret;
5006 }
f1174f77
EC
5007 if (dst_reg == off_reg) {
5008 /* scalar -= pointer. Creates an unknown scalar */
82abbf8d
AS
5009 verbose(env, "R%d tried to subtract pointer from scalar\n",
5010 dst);
f1174f77
EC
5011 return -EACCES;
5012 }
5013 /* We don't allow subtraction from FP, because (according to
5014 * test_verifier.c test "invalid fp arithmetic", JITs might not
5015 * be able to deal with it.
969bf05e 5016 */
f1174f77 5017 if (ptr_reg->type == PTR_TO_STACK) {
82abbf8d
AS
5018 verbose(env, "R%d subtraction from stack pointer prohibited\n",
5019 dst);
f1174f77
EC
5020 return -EACCES;
5021 }
b03c9f9f
EC
5022 if (known && (ptr_reg->off - smin_val ==
5023 (s64)(s32)(ptr_reg->off - smin_val))) {
f1174f77 5024 /* pointer -= K. Subtract it from fixed offset */
b03c9f9f
EC
5025 dst_reg->smin_value = smin_ptr;
5026 dst_reg->smax_value = smax_ptr;
5027 dst_reg->umin_value = umin_ptr;
5028 dst_reg->umax_value = umax_ptr;
f1174f77
EC
5029 dst_reg->var_off = ptr_reg->var_off;
5030 dst_reg->id = ptr_reg->id;
b03c9f9f 5031 dst_reg->off = ptr_reg->off - smin_val;
0962590e 5032 dst_reg->raw = ptr_reg->raw;
f1174f77
EC
5033 break;
5034 }
f1174f77
EC
5035 /* A new variable offset is created. If the subtrahend is known
5036 * nonnegative, then any reg->range we had before is still good.
969bf05e 5037 */
b03c9f9f
EC
5038 if (signed_sub_overflows(smin_ptr, smax_val) ||
5039 signed_sub_overflows(smax_ptr, smin_val)) {
5040 /* Overflow possible, we know nothing */
5041 dst_reg->smin_value = S64_MIN;
5042 dst_reg->smax_value = S64_MAX;
5043 } else {
5044 dst_reg->smin_value = smin_ptr - smax_val;
5045 dst_reg->smax_value = smax_ptr - smin_val;
5046 }
5047 if (umin_ptr < umax_val) {
5048 /* Overflow possible, we know nothing */
5049 dst_reg->umin_value = 0;
5050 dst_reg->umax_value = U64_MAX;
5051 } else {
5052 /* Cannot overflow (as long as bounds are consistent) */
5053 dst_reg->umin_value = umin_ptr - umax_val;
5054 dst_reg->umax_value = umax_ptr - umin_val;
5055 }
f1174f77
EC
5056 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
5057 dst_reg->off = ptr_reg->off;
0962590e 5058 dst_reg->raw = ptr_reg->raw;
de8f3a83 5059 if (reg_is_pkt_pointer(ptr_reg)) {
f1174f77
EC
5060 dst_reg->id = ++env->id_gen;
5061 /* something was added to pkt_ptr, set range to zero */
b03c9f9f 5062 if (smin_val < 0)
0962590e 5063 dst_reg->raw = 0;
43188702 5064 }
f1174f77
EC
5065 break;
5066 case BPF_AND:
5067 case BPF_OR:
5068 case BPF_XOR:
82abbf8d
AS
5069 /* bitwise ops on pointers are troublesome, prohibit. */
5070 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
5071 dst, bpf_alu_string[opcode >> 4]);
f1174f77
EC
5072 return -EACCES;
5073 default:
5074 /* other operators (e.g. MUL,LSH) produce non-pointer results */
82abbf8d
AS
5075 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
5076 dst, bpf_alu_string[opcode >> 4]);
f1174f77 5077 return -EACCES;
43188702
JF
5078 }
5079
bb7f0f98
AS
5080 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
5081 return -EINVAL;
5082
b03c9f9f
EC
5083 __update_reg_bounds(dst_reg);
5084 __reg_deduce_bounds(dst_reg);
5085 __reg_bound_offset(dst_reg);
0d6303db
DB
5086
5087 /* For unprivileged we require that resulting offset must be in bounds
5088 * in order to be able to sanitize access later on.
5089 */
e4298d25
DB
5090 if (!env->allow_ptr_leaks) {
5091 if (dst_reg->type == PTR_TO_MAP_VALUE &&
5092 check_map_access(env, dst, dst_reg->off, 1, false)) {
5093 verbose(env, "R%d pointer arithmetic of map value goes out of range, "
5094 "prohibited for !root\n", dst);
5095 return -EACCES;
5096 } else if (dst_reg->type == PTR_TO_STACK &&
5097 check_stack_access(env, dst_reg, dst_reg->off +
5098 dst_reg->var_off.value, 1)) {
5099 verbose(env, "R%d stack pointer arithmetic goes out of range, "
5100 "prohibited for !root\n", dst);
5101 return -EACCES;
5102 }
0d6303db
DB
5103 }
5104
43188702
JF
5105 return 0;
5106}
5107
3f50f132
JF
5108static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
5109 struct bpf_reg_state *src_reg)
5110{
5111 s32 smin_val = src_reg->s32_min_value;
5112 s32 smax_val = src_reg->s32_max_value;
5113 u32 umin_val = src_reg->u32_min_value;
5114 u32 umax_val = src_reg->u32_max_value;
5115
5116 if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
5117 signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
5118 dst_reg->s32_min_value = S32_MIN;
5119 dst_reg->s32_max_value = S32_MAX;
5120 } else {
5121 dst_reg->s32_min_value += smin_val;
5122 dst_reg->s32_max_value += smax_val;
5123 }
5124 if (dst_reg->u32_min_value + umin_val < umin_val ||
5125 dst_reg->u32_max_value + umax_val < umax_val) {
5126 dst_reg->u32_min_value = 0;
5127 dst_reg->u32_max_value = U32_MAX;
5128 } else {
5129 dst_reg->u32_min_value += umin_val;
5130 dst_reg->u32_max_value += umax_val;
5131 }
5132}
5133
07cd2631
JF
5134static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
5135 struct bpf_reg_state *src_reg)
5136{
5137 s64 smin_val = src_reg->smin_value;
5138 s64 smax_val = src_reg->smax_value;
5139 u64 umin_val = src_reg->umin_value;
5140 u64 umax_val = src_reg->umax_value;
5141
5142 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
5143 signed_add_overflows(dst_reg->smax_value, smax_val)) {
5144 dst_reg->smin_value = S64_MIN;
5145 dst_reg->smax_value = S64_MAX;
5146 } else {
5147 dst_reg->smin_value += smin_val;
5148 dst_reg->smax_value += smax_val;
5149 }
5150 if (dst_reg->umin_value + umin_val < umin_val ||
5151 dst_reg->umax_value + umax_val < umax_val) {
5152 dst_reg->umin_value = 0;
5153 dst_reg->umax_value = U64_MAX;
5154 } else {
5155 dst_reg->umin_value += umin_val;
5156 dst_reg->umax_value += umax_val;
5157 }
3f50f132
JF
5158}
5159
5160static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
5161 struct bpf_reg_state *src_reg)
5162{
5163 s32 smin_val = src_reg->s32_min_value;
5164 s32 smax_val = src_reg->s32_max_value;
5165 u32 umin_val = src_reg->u32_min_value;
5166 u32 umax_val = src_reg->u32_max_value;
5167
5168 if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
5169 signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
5170 /* Overflow possible, we know nothing */
5171 dst_reg->s32_min_value = S32_MIN;
5172 dst_reg->s32_max_value = S32_MAX;
5173 } else {
5174 dst_reg->s32_min_value -= smax_val;
5175 dst_reg->s32_max_value -= smin_val;
5176 }
5177 if (dst_reg->u32_min_value < umax_val) {
5178 /* Overflow possible, we know nothing */
5179 dst_reg->u32_min_value = 0;
5180 dst_reg->u32_max_value = U32_MAX;
5181 } else {
5182 /* Cannot overflow (as long as bounds are consistent) */
5183 dst_reg->u32_min_value -= umax_val;
5184 dst_reg->u32_max_value -= umin_val;
5185 }
07cd2631
JF
5186}
5187
5188static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
5189 struct bpf_reg_state *src_reg)
5190{
5191 s64 smin_val = src_reg->smin_value;
5192 s64 smax_val = src_reg->smax_value;
5193 u64 umin_val = src_reg->umin_value;
5194 u64 umax_val = src_reg->umax_value;
5195
5196 if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
5197 signed_sub_overflows(dst_reg->smax_value, smin_val)) {
5198 /* Overflow possible, we know nothing */
5199 dst_reg->smin_value = S64_MIN;
5200 dst_reg->smax_value = S64_MAX;
5201 } else {
5202 dst_reg->smin_value -= smax_val;
5203 dst_reg->smax_value -= smin_val;
5204 }
5205 if (dst_reg->umin_value < umax_val) {
5206 /* Overflow possible, we know nothing */
5207 dst_reg->umin_value = 0;
5208 dst_reg->umax_value = U64_MAX;
5209 } else {
5210 /* Cannot overflow (as long as bounds are consistent) */
5211 dst_reg->umin_value -= umax_val;
5212 dst_reg->umax_value -= umin_val;
5213 }
3f50f132
JF
5214}
5215
5216static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
5217 struct bpf_reg_state *src_reg)
5218{
5219 s32 smin_val = src_reg->s32_min_value;
5220 u32 umin_val = src_reg->u32_min_value;
5221 u32 umax_val = src_reg->u32_max_value;
5222
5223 if (smin_val < 0 || dst_reg->s32_min_value < 0) {
5224 /* Ain't nobody got time to multiply that sign */
5225 __mark_reg32_unbounded(dst_reg);
5226 return;
5227 }
5228 /* Both values are positive, so we can work with unsigned and
5229 * copy the result to signed (unless it exceeds S32_MAX).
5230 */
5231 if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
5232 /* Potential overflow, we know nothing */
5233 __mark_reg32_unbounded(dst_reg);
5234 return;
5235 }
5236 dst_reg->u32_min_value *= umin_val;
5237 dst_reg->u32_max_value *= umax_val;
5238 if (dst_reg->u32_max_value > S32_MAX) {
5239 /* Overflow possible, we know nothing */
5240 dst_reg->s32_min_value = S32_MIN;
5241 dst_reg->s32_max_value = S32_MAX;
5242 } else {
5243 dst_reg->s32_min_value = dst_reg->u32_min_value;
5244 dst_reg->s32_max_value = dst_reg->u32_max_value;
5245 }
07cd2631
JF
5246}
5247
5248static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
5249 struct bpf_reg_state *src_reg)
5250{
5251 s64 smin_val = src_reg->smin_value;
5252 u64 umin_val = src_reg->umin_value;
5253 u64 umax_val = src_reg->umax_value;
5254
07cd2631
JF
5255 if (smin_val < 0 || dst_reg->smin_value < 0) {
5256 /* Ain't nobody got time to multiply that sign */
3f50f132 5257 __mark_reg64_unbounded(dst_reg);
07cd2631
JF
5258 return;
5259 }
5260 /* Both values are positive, so we can work with unsigned and
5261 * copy the result to signed (unless it exceeds S64_MAX).
5262 */
5263 if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
5264 /* Potential overflow, we know nothing */
3f50f132 5265 __mark_reg64_unbounded(dst_reg);
07cd2631
JF
5266 return;
5267 }
5268 dst_reg->umin_value *= umin_val;
5269 dst_reg->umax_value *= umax_val;
5270 if (dst_reg->umax_value > S64_MAX) {
5271 /* Overflow possible, we know nothing */
5272 dst_reg->smin_value = S64_MIN;
5273 dst_reg->smax_value = S64_MAX;
5274 } else {
5275 dst_reg->smin_value = dst_reg->umin_value;
5276 dst_reg->smax_value = dst_reg->umax_value;
5277 }
5278}
5279
3f50f132
JF
5280static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
5281 struct bpf_reg_state *src_reg)
5282{
5283 bool src_known = tnum_subreg_is_const(src_reg->var_off);
5284 bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
5285 struct tnum var32_off = tnum_subreg(dst_reg->var_off);
5286 s32 smin_val = src_reg->s32_min_value;
5287 u32 umax_val = src_reg->u32_max_value;
5288
5289 /* Assuming scalar64_min_max_and will be called so its safe
5290 * to skip updating register for known 32-bit case.
5291 */
5292 if (src_known && dst_known)
5293 return;
5294
5295 /* We get our minimum from the var_off, since that's inherently
5296 * bitwise. Our maximum is the minimum of the operands' maxima.
5297 */
5298 dst_reg->u32_min_value = var32_off.value;
5299 dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
5300 if (dst_reg->s32_min_value < 0 || smin_val < 0) {
5301 /* Lose signed bounds when ANDing negative numbers,
5302 * ain't nobody got time for that.
5303 */
5304 dst_reg->s32_min_value = S32_MIN;
5305 dst_reg->s32_max_value = S32_MAX;
5306 } else {
5307 /* ANDing two positives gives a positive, so safe to
5308 * cast result into s64.
5309 */
5310 dst_reg->s32_min_value = dst_reg->u32_min_value;
5311 dst_reg->s32_max_value = dst_reg->u32_max_value;
5312 }
5313
5314}
5315
07cd2631
JF
5316static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
5317 struct bpf_reg_state *src_reg)
5318{
3f50f132
JF
5319 bool src_known = tnum_is_const(src_reg->var_off);
5320 bool dst_known = tnum_is_const(dst_reg->var_off);
07cd2631
JF
5321 s64 smin_val = src_reg->smin_value;
5322 u64 umax_val = src_reg->umax_value;
5323
3f50f132
JF
5324 if (src_known && dst_known) {
5325 __mark_reg_known(dst_reg, dst_reg->var_off.value &
5326 src_reg->var_off.value);
5327 return;
5328 }
5329
07cd2631
JF
5330 /* We get our minimum from the var_off, since that's inherently
5331 * bitwise. Our maximum is the minimum of the operands' maxima.
5332 */
07cd2631
JF
5333 dst_reg->umin_value = dst_reg->var_off.value;
5334 dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
5335 if (dst_reg->smin_value < 0 || smin_val < 0) {
5336 /* Lose signed bounds when ANDing negative numbers,
5337 * ain't nobody got time for that.
5338 */
5339 dst_reg->smin_value = S64_MIN;
5340 dst_reg->smax_value = S64_MAX;
5341 } else {
5342 /* ANDing two positives gives a positive, so safe to
5343 * cast result into s64.
5344 */
5345 dst_reg->smin_value = dst_reg->umin_value;
5346 dst_reg->smax_value = dst_reg->umax_value;
5347 }
5348 /* We may learn something more from the var_off */
5349 __update_reg_bounds(dst_reg);
5350}
5351
3f50f132
JF
5352static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
5353 struct bpf_reg_state *src_reg)
5354{
5355 bool src_known = tnum_subreg_is_const(src_reg->var_off);
5356 bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
5357 struct tnum var32_off = tnum_subreg(dst_reg->var_off);
5358 s32 smin_val = src_reg->smin_value;
5359 u32 umin_val = src_reg->umin_value;
5360
5361 /* Assuming scalar64_min_max_or will be called so it is safe
5362 * to skip updating register for known case.
5363 */
5364 if (src_known && dst_known)
5365 return;
5366
5367 /* We get our maximum from the var_off, and our minimum is the
5368 * maximum of the operands' minima
5369 */
5370 dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
5371 dst_reg->u32_max_value = var32_off.value | var32_off.mask;
5372 if (dst_reg->s32_min_value < 0 || smin_val < 0) {
5373 /* Lose signed bounds when ORing negative numbers,
5374 * ain't nobody got time for that.
5375 */
5376 dst_reg->s32_min_value = S32_MIN;
5377 dst_reg->s32_max_value = S32_MAX;
5378 } else {
5379 /* ORing two positives gives a positive, so safe to
5380 * cast result into s64.
5381 */
5382 dst_reg->s32_min_value = dst_reg->umin_value;
5383 dst_reg->s32_max_value = dst_reg->umax_value;
5384 }
5385}
5386
07cd2631
JF
5387static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
5388 struct bpf_reg_state *src_reg)
5389{
3f50f132
JF
5390 bool src_known = tnum_is_const(src_reg->var_off);
5391 bool dst_known = tnum_is_const(dst_reg->var_off);
07cd2631
JF
5392 s64 smin_val = src_reg->smin_value;
5393 u64 umin_val = src_reg->umin_value;
5394
3f50f132
JF
5395 if (src_known && dst_known) {
5396 __mark_reg_known(dst_reg, dst_reg->var_off.value |
5397 src_reg->var_off.value);
5398 return;
5399 }
5400
07cd2631
JF
5401 /* We get our maximum from the var_off, and our minimum is the
5402 * maximum of the operands' minima
5403 */
07cd2631
JF
5404 dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
5405 dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
5406 if (dst_reg->smin_value < 0 || smin_val < 0) {
5407 /* Lose signed bounds when ORing negative numbers,
5408 * ain't nobody got time for that.
5409 */
5410 dst_reg->smin_value = S64_MIN;
5411 dst_reg->smax_value = S64_MAX;
5412 } else {
5413 /* ORing two positives gives a positive, so safe to
5414 * cast result into s64.
5415 */
5416 dst_reg->smin_value = dst_reg->umin_value;
5417 dst_reg->smax_value = dst_reg->umax_value;
5418 }
5419 /* We may learn something more from the var_off */
5420 __update_reg_bounds(dst_reg);
5421}
5422
3f50f132
JF
5423static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
5424 u64 umin_val, u64 umax_val)
07cd2631 5425{
07cd2631
JF
5426 /* We lose all sign bit information (except what we can pick
5427 * up from var_off)
5428 */
3f50f132
JF
5429 dst_reg->s32_min_value = S32_MIN;
5430 dst_reg->s32_max_value = S32_MAX;
5431 /* If we might shift our top bit out, then we know nothing */
5432 if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
5433 dst_reg->u32_min_value = 0;
5434 dst_reg->u32_max_value = U32_MAX;
5435 } else {
5436 dst_reg->u32_min_value <<= umin_val;
5437 dst_reg->u32_max_value <<= umax_val;
5438 }
5439}
5440
5441static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
5442 struct bpf_reg_state *src_reg)
5443{
5444 u32 umax_val = src_reg->u32_max_value;
5445 u32 umin_val = src_reg->u32_min_value;
5446 /* u32 alu operation will zext upper bits */
5447 struct tnum subreg = tnum_subreg(dst_reg->var_off);
5448
5449 __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
5450 dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
5451 /* Not required but being careful mark reg64 bounds as unknown so
5452 * that we are forced to pick them up from tnum and zext later and
5453 * if some path skips this step we are still safe.
5454 */
5455 __mark_reg64_unbounded(dst_reg);
5456 __update_reg32_bounds(dst_reg);
5457}
5458
5459static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
5460 u64 umin_val, u64 umax_val)
5461{
5462 /* Special case <<32 because it is a common compiler pattern to sign
5463 * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
5464 * positive we know this shift will also be positive so we can track
5465 * bounds correctly. Otherwise we lose all sign bit information except
5466 * what we can pick up from var_off. Perhaps we can generalize this
5467 * later to shifts of any length.
5468 */
5469 if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
5470 dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
5471 else
5472 dst_reg->smax_value = S64_MAX;
5473
5474 if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
5475 dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
5476 else
5477 dst_reg->smin_value = S64_MIN;
5478
07cd2631
JF
5479 /* If we might shift our top bit out, then we know nothing */
5480 if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
5481 dst_reg->umin_value = 0;
5482 dst_reg->umax_value = U64_MAX;
5483 } else {
5484 dst_reg->umin_value <<= umin_val;
5485 dst_reg->umax_value <<= umax_val;
5486 }
3f50f132
JF
5487}
5488
5489static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
5490 struct bpf_reg_state *src_reg)
5491{
5492 u64 umax_val = src_reg->umax_value;
5493 u64 umin_val = src_reg->umin_value;
5494
5495 /* scalar64 calc uses 32bit unshifted bounds so must be called first */
5496 __scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
5497 __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
5498
07cd2631
JF
5499 dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
5500 /* We may learn something more from the var_off */
5501 __update_reg_bounds(dst_reg);
5502}
5503
3f50f132
JF
5504static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
5505 struct bpf_reg_state *src_reg)
5506{
5507 struct tnum subreg = tnum_subreg(dst_reg->var_off);
5508 u32 umax_val = src_reg->u32_max_value;
5509 u32 umin_val = src_reg->u32_min_value;
5510
5511 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
5512 * be negative, then either:
5513 * 1) src_reg might be zero, so the sign bit of the result is
5514 * unknown, so we lose our signed bounds
5515 * 2) it's known negative, thus the unsigned bounds capture the
5516 * signed bounds
5517 * 3) the signed bounds cross zero, so they tell us nothing
5518 * about the result
5519 * If the value in dst_reg is known nonnegative, then again the
5520 * unsigned bounts capture the signed bounds.
5521 * Thus, in all cases it suffices to blow away our signed bounds
5522 * and rely on inferring new ones from the unsigned bounds and
5523 * var_off of the result.
5524 */
5525 dst_reg->s32_min_value = S32_MIN;
5526 dst_reg->s32_max_value = S32_MAX;
5527
5528 dst_reg->var_off = tnum_rshift(subreg, umin_val);
5529 dst_reg->u32_min_value >>= umax_val;
5530 dst_reg->u32_max_value >>= umin_val;
5531
5532 __mark_reg64_unbounded(dst_reg);
5533 __update_reg32_bounds(dst_reg);
5534}
5535
07cd2631
JF
5536static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
5537 struct bpf_reg_state *src_reg)
5538{
5539 u64 umax_val = src_reg->umax_value;
5540 u64 umin_val = src_reg->umin_value;
5541
5542 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
5543 * be negative, then either:
5544 * 1) src_reg might be zero, so the sign bit of the result is
5545 * unknown, so we lose our signed bounds
5546 * 2) it's known negative, thus the unsigned bounds capture the
5547 * signed bounds
5548 * 3) the signed bounds cross zero, so they tell us nothing
5549 * about the result
5550 * If the value in dst_reg is known nonnegative, then again the
5551 * unsigned bounts capture the signed bounds.
5552 * Thus, in all cases it suffices to blow away our signed bounds
5553 * and rely on inferring new ones from the unsigned bounds and
5554 * var_off of the result.
5555 */
5556 dst_reg->smin_value = S64_MIN;
5557 dst_reg->smax_value = S64_MAX;
5558 dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
5559 dst_reg->umin_value >>= umax_val;
5560 dst_reg->umax_value >>= umin_val;
3f50f132
JF
5561
5562 /* Its not easy to operate on alu32 bounds here because it depends
5563 * on bits being shifted in. Take easy way out and mark unbounded
5564 * so we can recalculate later from tnum.
5565 */
5566 __mark_reg32_unbounded(dst_reg);
07cd2631
JF
5567 __update_reg_bounds(dst_reg);
5568}
5569
3f50f132
JF
5570static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
5571 struct bpf_reg_state *src_reg)
07cd2631 5572{
3f50f132 5573 u64 umin_val = src_reg->u32_min_value;
07cd2631
JF
5574
5575 /* Upon reaching here, src_known is true and
5576 * umax_val is equal to umin_val.
5577 */
3f50f132
JF
5578 dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
5579 dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
07cd2631 5580
3f50f132
JF
5581 dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
5582
5583 /* blow away the dst_reg umin_value/umax_value and rely on
5584 * dst_reg var_off to refine the result.
5585 */
5586 dst_reg->u32_min_value = 0;
5587 dst_reg->u32_max_value = U32_MAX;
5588
5589 __mark_reg64_unbounded(dst_reg);
5590 __update_reg32_bounds(dst_reg);
5591}
5592
5593static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
5594 struct bpf_reg_state *src_reg)
5595{
5596 u64 umin_val = src_reg->umin_value;
5597
5598 /* Upon reaching here, src_known is true and umax_val is equal
5599 * to umin_val.
5600 */
5601 dst_reg->smin_value >>= umin_val;
5602 dst_reg->smax_value >>= umin_val;
5603
5604 dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
07cd2631
JF
5605
5606 /* blow away the dst_reg umin_value/umax_value and rely on
5607 * dst_reg var_off to refine the result.
5608 */
5609 dst_reg->umin_value = 0;
5610 dst_reg->umax_value = U64_MAX;
3f50f132
JF
5611
5612 /* Its not easy to operate on alu32 bounds here because it depends
5613 * on bits being shifted in from upper 32-bits. Take easy way out
5614 * and mark unbounded so we can recalculate later from tnum.
5615 */
5616 __mark_reg32_unbounded(dst_reg);
07cd2631
JF
5617 __update_reg_bounds(dst_reg);
5618}
5619
468f6eaf
JH
5620/* WARNING: This function does calculations on 64-bit values, but the actual
5621 * execution may occur on 32-bit values. Therefore, things like bitshifts
5622 * need extra checks in the 32-bit case.
5623 */
f1174f77
EC
5624static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
5625 struct bpf_insn *insn,
5626 struct bpf_reg_state *dst_reg,
5627 struct bpf_reg_state src_reg)
969bf05e 5628{
638f5b90 5629 struct bpf_reg_state *regs = cur_regs(env);
48461135 5630 u8 opcode = BPF_OP(insn->code);
b0b3fb67 5631 bool src_known;
b03c9f9f
EC
5632 s64 smin_val, smax_val;
5633 u64 umin_val, umax_val;
3f50f132
JF
5634 s32 s32_min_val, s32_max_val;
5635 u32 u32_min_val, u32_max_val;
468f6eaf 5636 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
d3bd7413
DB
5637 u32 dst = insn->dst_reg;
5638 int ret;
3f50f132 5639 bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
b799207e 5640
b03c9f9f
EC
5641 smin_val = src_reg.smin_value;
5642 smax_val = src_reg.smax_value;
5643 umin_val = src_reg.umin_value;
5644 umax_val = src_reg.umax_value;
f23cc643 5645
3f50f132
JF
5646 s32_min_val = src_reg.s32_min_value;
5647 s32_max_val = src_reg.s32_max_value;
5648 u32_min_val = src_reg.u32_min_value;
5649 u32_max_val = src_reg.u32_max_value;
5650
5651 if (alu32) {
5652 src_known = tnum_subreg_is_const(src_reg.var_off);
3f50f132
JF
5653 if ((src_known &&
5654 (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
5655 s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
5656 /* Taint dst register if offset had invalid bounds
5657 * derived from e.g. dead branches.
5658 */
5659 __mark_reg_unknown(env, dst_reg);
5660 return 0;
5661 }
5662 } else {
5663 src_known = tnum_is_const(src_reg.var_off);
3f50f132
JF
5664 if ((src_known &&
5665 (smin_val != smax_val || umin_val != umax_val)) ||
5666 smin_val > smax_val || umin_val > umax_val) {
5667 /* Taint dst register if offset had invalid bounds
5668 * derived from e.g. dead branches.
5669 */
5670 __mark_reg_unknown(env, dst_reg);
5671 return 0;
5672 }
6f16101e
DB
5673 }
5674
bb7f0f98
AS
5675 if (!src_known &&
5676 opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
f54c7898 5677 __mark_reg_unknown(env, dst_reg);
bb7f0f98
AS
5678 return 0;
5679 }
5680
3f50f132
JF
5681 /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
5682 * There are two classes of instructions: The first class we track both
5683 * alu32 and alu64 sign/unsigned bounds independently this provides the
5684 * greatest amount of precision when alu operations are mixed with jmp32
5685 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
5686 * and BPF_OR. This is possible because these ops have fairly easy to
5687 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
5688 * See alu32 verifier tests for examples. The second class of
5689 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
5690 * with regards to tracking sign/unsigned bounds because the bits may
5691 * cross subreg boundaries in the alu64 case. When this happens we mark
5692 * the reg unbounded in the subreg bound space and use the resulting
5693 * tnum to calculate an approximation of the sign/unsigned bounds.
5694 */
48461135
JB
5695 switch (opcode) {
5696 case BPF_ADD:
d3bd7413
DB
5697 ret = sanitize_val_alu(env, insn);
5698 if (ret < 0) {
5699 verbose(env, "R%d tried to add from different pointers or scalars\n", dst);
5700 return ret;
5701 }
3f50f132 5702 scalar32_min_max_add(dst_reg, &src_reg);
07cd2631 5703 scalar_min_max_add(dst_reg, &src_reg);
3f50f132 5704 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
48461135
JB
5705 break;
5706 case BPF_SUB:
d3bd7413
DB
5707 ret = sanitize_val_alu(env, insn);
5708 if (ret < 0) {
5709 verbose(env, "R%d tried to sub from different pointers or scalars\n", dst);
5710 return ret;
5711 }
3f50f132 5712 scalar32_min_max_sub(dst_reg, &src_reg);
07cd2631 5713 scalar_min_max_sub(dst_reg, &src_reg);
3f50f132 5714 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
48461135
JB
5715 break;
5716 case BPF_MUL:
3f50f132
JF
5717 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
5718 scalar32_min_max_mul(dst_reg, &src_reg);
07cd2631 5719 scalar_min_max_mul(dst_reg, &src_reg);
48461135
JB
5720 break;
5721 case BPF_AND:
3f50f132
JF
5722 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
5723 scalar32_min_max_and(dst_reg, &src_reg);
07cd2631 5724 scalar_min_max_and(dst_reg, &src_reg);
f1174f77
EC
5725 break;
5726 case BPF_OR:
3f50f132
JF
5727 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
5728 scalar32_min_max_or(dst_reg, &src_reg);
07cd2631 5729 scalar_min_max_or(dst_reg, &src_reg);
48461135
JB
5730 break;
5731 case BPF_LSH:
468f6eaf
JH
5732 if (umax_val >= insn_bitness) {
5733 /* Shifts greater than 31 or 63 are undefined.
5734 * This includes shifts by a negative number.
b03c9f9f 5735 */
61bd5218 5736 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77
EC
5737 break;
5738 }
3f50f132
JF
5739 if (alu32)
5740 scalar32_min_max_lsh(dst_reg, &src_reg);
5741 else
5742 scalar_min_max_lsh(dst_reg, &src_reg);
48461135
JB
5743 break;
5744 case BPF_RSH:
468f6eaf
JH
5745 if (umax_val >= insn_bitness) {
5746 /* Shifts greater than 31 or 63 are undefined.
5747 * This includes shifts by a negative number.
b03c9f9f 5748 */
61bd5218 5749 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77
EC
5750 break;
5751 }
3f50f132
JF
5752 if (alu32)
5753 scalar32_min_max_rsh(dst_reg, &src_reg);
5754 else
5755 scalar_min_max_rsh(dst_reg, &src_reg);
48461135 5756 break;
9cbe1f5a
YS
5757 case BPF_ARSH:
5758 if (umax_val >= insn_bitness) {
5759 /* Shifts greater than 31 or 63 are undefined.
5760 * This includes shifts by a negative number.
5761 */
5762 mark_reg_unknown(env, regs, insn->dst_reg);
5763 break;
5764 }
3f50f132
JF
5765 if (alu32)
5766 scalar32_min_max_arsh(dst_reg, &src_reg);
5767 else
5768 scalar_min_max_arsh(dst_reg, &src_reg);
9cbe1f5a 5769 break;
48461135 5770 default:
61bd5218 5771 mark_reg_unknown(env, regs, insn->dst_reg);
48461135
JB
5772 break;
5773 }
5774
3f50f132
JF
5775 /* ALU32 ops are zero extended into 64bit register */
5776 if (alu32)
5777 zext_32_to_64(dst_reg);
468f6eaf 5778
294f2fc6 5779 __update_reg_bounds(dst_reg);
b03c9f9f
EC
5780 __reg_deduce_bounds(dst_reg);
5781 __reg_bound_offset(dst_reg);
f1174f77
EC
5782 return 0;
5783}
5784
5785/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
5786 * and var_off.
5787 */
5788static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
5789 struct bpf_insn *insn)
5790{
f4d7e40a
AS
5791 struct bpf_verifier_state *vstate = env->cur_state;
5792 struct bpf_func_state *state = vstate->frame[vstate->curframe];
5793 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
f1174f77
EC
5794 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
5795 u8 opcode = BPF_OP(insn->code);
b5dc0163 5796 int err;
f1174f77
EC
5797
5798 dst_reg = &regs[insn->dst_reg];
f1174f77
EC
5799 src_reg = NULL;
5800 if (dst_reg->type != SCALAR_VALUE)
5801 ptr_reg = dst_reg;
5802 if (BPF_SRC(insn->code) == BPF_X) {
5803 src_reg = &regs[insn->src_reg];
f1174f77
EC
5804 if (src_reg->type != SCALAR_VALUE) {
5805 if (dst_reg->type != SCALAR_VALUE) {
5806 /* Combining two pointers by any ALU op yields
82abbf8d
AS
5807 * an arbitrary scalar. Disallow all math except
5808 * pointer subtraction
f1174f77 5809 */
dd066823 5810 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
82abbf8d
AS
5811 mark_reg_unknown(env, regs, insn->dst_reg);
5812 return 0;
f1174f77 5813 }
82abbf8d
AS
5814 verbose(env, "R%d pointer %s pointer prohibited\n",
5815 insn->dst_reg,
5816 bpf_alu_string[opcode >> 4]);
5817 return -EACCES;
f1174f77
EC
5818 } else {
5819 /* scalar += pointer
5820 * This is legal, but we have to reverse our
5821 * src/dest handling in computing the range
5822 */
b5dc0163
AS
5823 err = mark_chain_precision(env, insn->dst_reg);
5824 if (err)
5825 return err;
82abbf8d
AS
5826 return adjust_ptr_min_max_vals(env, insn,
5827 src_reg, dst_reg);
f1174f77
EC
5828 }
5829 } else if (ptr_reg) {
5830 /* pointer += scalar */
b5dc0163
AS
5831 err = mark_chain_precision(env, insn->src_reg);
5832 if (err)
5833 return err;
82abbf8d
AS
5834 return adjust_ptr_min_max_vals(env, insn,
5835 dst_reg, src_reg);
f1174f77
EC
5836 }
5837 } else {
5838 /* Pretend the src is a reg with a known value, since we only
5839 * need to be able to read from this state.
5840 */
5841 off_reg.type = SCALAR_VALUE;
b03c9f9f 5842 __mark_reg_known(&off_reg, insn->imm);
f1174f77 5843 src_reg = &off_reg;
82abbf8d
AS
5844 if (ptr_reg) /* pointer += K */
5845 return adjust_ptr_min_max_vals(env, insn,
5846 ptr_reg, src_reg);
f1174f77
EC
5847 }
5848
5849 /* Got here implies adding two SCALAR_VALUEs */
5850 if (WARN_ON_ONCE(ptr_reg)) {
f4d7e40a 5851 print_verifier_state(env, state);
61bd5218 5852 verbose(env, "verifier internal error: unexpected ptr_reg\n");
f1174f77
EC
5853 return -EINVAL;
5854 }
5855 if (WARN_ON(!src_reg)) {
f4d7e40a 5856 print_verifier_state(env, state);
61bd5218 5857 verbose(env, "verifier internal error: no src_reg\n");
f1174f77
EC
5858 return -EINVAL;
5859 }
5860 return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
48461135
JB
5861}
5862
17a52670 5863/* check validity of 32-bit and 64-bit arithmetic operations */
58e2af8b 5864static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
17a52670 5865{
638f5b90 5866 struct bpf_reg_state *regs = cur_regs(env);
17a52670
AS
5867 u8 opcode = BPF_OP(insn->code);
5868 int err;
5869
5870 if (opcode == BPF_END || opcode == BPF_NEG) {
5871 if (opcode == BPF_NEG) {
5872 if (BPF_SRC(insn->code) != 0 ||
5873 insn->src_reg != BPF_REG_0 ||
5874 insn->off != 0 || insn->imm != 0) {
61bd5218 5875 verbose(env, "BPF_NEG uses reserved fields\n");
17a52670
AS
5876 return -EINVAL;
5877 }
5878 } else {
5879 if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
e67b8a68
EC
5880 (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
5881 BPF_CLASS(insn->code) == BPF_ALU64) {
61bd5218 5882 verbose(env, "BPF_END uses reserved fields\n");
17a52670
AS
5883 return -EINVAL;
5884 }
5885 }
5886
5887 /* check src operand */
dc503a8a 5888 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
5889 if (err)
5890 return err;
5891
1be7f75d 5892 if (is_pointer_value(env, insn->dst_reg)) {
61bd5218 5893 verbose(env, "R%d pointer arithmetic prohibited\n",
1be7f75d
AS
5894 insn->dst_reg);
5895 return -EACCES;
5896 }
5897
17a52670 5898 /* check dest operand */
dc503a8a 5899 err = check_reg_arg(env, insn->dst_reg, DST_OP);
17a52670
AS
5900 if (err)
5901 return err;
5902
5903 } else if (opcode == BPF_MOV) {
5904
5905 if (BPF_SRC(insn->code) == BPF_X) {
5906 if (insn->imm != 0 || insn->off != 0) {
61bd5218 5907 verbose(env, "BPF_MOV uses reserved fields\n");
17a52670
AS
5908 return -EINVAL;
5909 }
5910
5911 /* check src operand */
dc503a8a 5912 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
5913 if (err)
5914 return err;
5915 } else {
5916 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
61bd5218 5917 verbose(env, "BPF_MOV uses reserved fields\n");
17a52670
AS
5918 return -EINVAL;
5919 }
5920 }
5921
fbeb1603
AF
5922 /* check dest operand, mark as required later */
5923 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
17a52670
AS
5924 if (err)
5925 return err;
5926
5927 if (BPF_SRC(insn->code) == BPF_X) {
e434b8cd
JW
5928 struct bpf_reg_state *src_reg = regs + insn->src_reg;
5929 struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
5930
17a52670
AS
5931 if (BPF_CLASS(insn->code) == BPF_ALU64) {
5932 /* case: R1 = R2
5933 * copy register state to dest reg
5934 */
e434b8cd
JW
5935 *dst_reg = *src_reg;
5936 dst_reg->live |= REG_LIVE_WRITTEN;
5327ed3d 5937 dst_reg->subreg_def = DEF_NOT_SUBREG;
17a52670 5938 } else {
f1174f77 5939 /* R1 = (u32) R2 */
1be7f75d 5940 if (is_pointer_value(env, insn->src_reg)) {
61bd5218
JK
5941 verbose(env,
5942 "R%d partial copy of pointer\n",
1be7f75d
AS
5943 insn->src_reg);
5944 return -EACCES;
e434b8cd
JW
5945 } else if (src_reg->type == SCALAR_VALUE) {
5946 *dst_reg = *src_reg;
5947 dst_reg->live |= REG_LIVE_WRITTEN;
5327ed3d 5948 dst_reg->subreg_def = env->insn_idx + 1;
e434b8cd
JW
5949 } else {
5950 mark_reg_unknown(env, regs,
5951 insn->dst_reg);
1be7f75d 5952 }
3f50f132 5953 zext_32_to_64(dst_reg);
17a52670
AS
5954 }
5955 } else {
5956 /* case: R = imm
5957 * remember the value we stored into this reg
5958 */
fbeb1603
AF
5959 /* clear any state __mark_reg_known doesn't set */
5960 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77 5961 regs[insn->dst_reg].type = SCALAR_VALUE;
95a762e2
JH
5962 if (BPF_CLASS(insn->code) == BPF_ALU64) {
5963 __mark_reg_known(regs + insn->dst_reg,
5964 insn->imm);
5965 } else {
5966 __mark_reg_known(regs + insn->dst_reg,
5967 (u32)insn->imm);
5968 }
17a52670
AS
5969 }
5970
5971 } else if (opcode > BPF_END) {
61bd5218 5972 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
17a52670
AS
5973 return -EINVAL;
5974
5975 } else { /* all other ALU ops: and, sub, xor, add, ... */
5976
17a52670
AS
5977 if (BPF_SRC(insn->code) == BPF_X) {
5978 if (insn->imm != 0 || insn->off != 0) {
61bd5218 5979 verbose(env, "BPF_ALU uses reserved fields\n");
17a52670
AS
5980 return -EINVAL;
5981 }
5982 /* check src1 operand */
dc503a8a 5983 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
5984 if (err)
5985 return err;
5986 } else {
5987 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
61bd5218 5988 verbose(env, "BPF_ALU uses reserved fields\n");
17a52670
AS
5989 return -EINVAL;
5990 }
5991 }
5992
5993 /* check src2 operand */
dc503a8a 5994 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
5995 if (err)
5996 return err;
5997
5998 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
5999 BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
61bd5218 6000 verbose(env, "div by zero\n");
17a52670
AS
6001 return -EINVAL;
6002 }
6003
229394e8
RV
6004 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
6005 opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
6006 int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
6007
6008 if (insn->imm < 0 || insn->imm >= size) {
61bd5218 6009 verbose(env, "invalid shift %d\n", insn->imm);
229394e8
RV
6010 return -EINVAL;
6011 }
6012 }
6013
1a0dc1ac 6014 /* check dest operand */
dc503a8a 6015 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
1a0dc1ac
AS
6016 if (err)
6017 return err;
6018
f1174f77 6019 return adjust_reg_min_max_vals(env, insn);
17a52670
AS
6020 }
6021
6022 return 0;
6023}
6024
c6a9efa1
PC
6025static void __find_good_pkt_pointers(struct bpf_func_state *state,
6026 struct bpf_reg_state *dst_reg,
6027 enum bpf_reg_type type, u16 new_range)
6028{
6029 struct bpf_reg_state *reg;
6030 int i;
6031
6032 for (i = 0; i < MAX_BPF_REG; i++) {
6033 reg = &state->regs[i];
6034 if (reg->type == type && reg->id == dst_reg->id)
6035 /* keep the maximum range already checked */
6036 reg->range = max(reg->range, new_range);
6037 }
6038
6039 bpf_for_each_spilled_reg(i, state, reg) {
6040 if (!reg)
6041 continue;
6042 if (reg->type == type && reg->id == dst_reg->id)
6043 reg->range = max(reg->range, new_range);
6044 }
6045}
6046
f4d7e40a 6047static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
de8f3a83 6048 struct bpf_reg_state *dst_reg,
f8ddadc4 6049 enum bpf_reg_type type,
fb2a311a 6050 bool range_right_open)
969bf05e 6051{
fb2a311a 6052 u16 new_range;
c6a9efa1 6053 int i;
2d2be8ca 6054
fb2a311a
DB
6055 if (dst_reg->off < 0 ||
6056 (dst_reg->off == 0 && range_right_open))
f1174f77
EC
6057 /* This doesn't give us any range */
6058 return;
6059
b03c9f9f
EC
6060 if (dst_reg->umax_value > MAX_PACKET_OFF ||
6061 dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
f1174f77
EC
6062 /* Risk of overflow. For instance, ptr + (1<<63) may be less
6063 * than pkt_end, but that's because it's also less than pkt.
6064 */
6065 return;
6066
fb2a311a
DB
6067 new_range = dst_reg->off;
6068 if (range_right_open)
6069 new_range--;
6070
6071 /* Examples for register markings:
2d2be8ca 6072 *
fb2a311a 6073 * pkt_data in dst register:
2d2be8ca
DB
6074 *
6075 * r2 = r3;
6076 * r2 += 8;
6077 * if (r2 > pkt_end) goto <handle exception>
6078 * <access okay>
6079 *
b4e432f1
DB
6080 * r2 = r3;
6081 * r2 += 8;
6082 * if (r2 < pkt_end) goto <access okay>
6083 * <handle exception>
6084 *
2d2be8ca
DB
6085 * Where:
6086 * r2 == dst_reg, pkt_end == src_reg
6087 * r2=pkt(id=n,off=8,r=0)
6088 * r3=pkt(id=n,off=0,r=0)
6089 *
fb2a311a 6090 * pkt_data in src register:
2d2be8ca
DB
6091 *
6092 * r2 = r3;
6093 * r2 += 8;
6094 * if (pkt_end >= r2) goto <access okay>
6095 * <handle exception>
6096 *
b4e432f1
DB
6097 * r2 = r3;
6098 * r2 += 8;
6099 * if (pkt_end <= r2) goto <handle exception>
6100 * <access okay>
6101 *
2d2be8ca
DB
6102 * Where:
6103 * pkt_end == dst_reg, r2 == src_reg
6104 * r2=pkt(id=n,off=8,r=0)
6105 * r3=pkt(id=n,off=0,r=0)
6106 *
6107 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
fb2a311a
DB
6108 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
6109 * and [r3, r3 + 8-1) respectively is safe to access depending on
6110 * the check.
969bf05e 6111 */
2d2be8ca 6112
f1174f77
EC
6113 /* If our ids match, then we must have the same max_value. And we
6114 * don't care about the other reg's fixed offset, since if it's too big
6115 * the range won't allow anything.
6116 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
6117 */
c6a9efa1
PC
6118 for (i = 0; i <= vstate->curframe; i++)
6119 __find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
6120 new_range);
969bf05e
AS
6121}
6122
3f50f132 6123static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
4f7b3e82 6124{
3f50f132
JF
6125 struct tnum subreg = tnum_subreg(reg->var_off);
6126 s32 sval = (s32)val;
a72dafaf 6127
3f50f132
JF
6128 switch (opcode) {
6129 case BPF_JEQ:
6130 if (tnum_is_const(subreg))
6131 return !!tnum_equals_const(subreg, val);
6132 break;
6133 case BPF_JNE:
6134 if (tnum_is_const(subreg))
6135 return !tnum_equals_const(subreg, val);
6136 break;
6137 case BPF_JSET:
6138 if ((~subreg.mask & subreg.value) & val)
6139 return 1;
6140 if (!((subreg.mask | subreg.value) & val))
6141 return 0;
6142 break;
6143 case BPF_JGT:
6144 if (reg->u32_min_value > val)
6145 return 1;
6146 else if (reg->u32_max_value <= val)
6147 return 0;
6148 break;
6149 case BPF_JSGT:
6150 if (reg->s32_min_value > sval)
6151 return 1;
6152 else if (reg->s32_max_value < sval)
6153 return 0;
6154 break;
6155 case BPF_JLT:
6156 if (reg->u32_max_value < val)
6157 return 1;
6158 else if (reg->u32_min_value >= val)
6159 return 0;
6160 break;
6161 case BPF_JSLT:
6162 if (reg->s32_max_value < sval)
6163 return 1;
6164 else if (reg->s32_min_value >= sval)
6165 return 0;
6166 break;
6167 case BPF_JGE:
6168 if (reg->u32_min_value >= val)
6169 return 1;
6170 else if (reg->u32_max_value < val)
6171 return 0;
6172 break;
6173 case BPF_JSGE:
6174 if (reg->s32_min_value >= sval)
6175 return 1;
6176 else if (reg->s32_max_value < sval)
6177 return 0;
6178 break;
6179 case BPF_JLE:
6180 if (reg->u32_max_value <= val)
6181 return 1;
6182 else if (reg->u32_min_value > val)
6183 return 0;
6184 break;
6185 case BPF_JSLE:
6186 if (reg->s32_max_value <= sval)
6187 return 1;
6188 else if (reg->s32_min_value > sval)
6189 return 0;
6190 break;
6191 }
4f7b3e82 6192
3f50f132
JF
6193 return -1;
6194}
092ed096 6195
3f50f132
JF
6196
6197static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
6198{
6199 s64 sval = (s64)val;
a72dafaf 6200
4f7b3e82
AS
6201 switch (opcode) {
6202 case BPF_JEQ:
6203 if (tnum_is_const(reg->var_off))
6204 return !!tnum_equals_const(reg->var_off, val);
6205 break;
6206 case BPF_JNE:
6207 if (tnum_is_const(reg->var_off))
6208 return !tnum_equals_const(reg->var_off, val);
6209 break;
960ea056
JK
6210 case BPF_JSET:
6211 if ((~reg->var_off.mask & reg->var_off.value) & val)
6212 return 1;
6213 if (!((reg->var_off.mask | reg->var_off.value) & val))
6214 return 0;
6215 break;
4f7b3e82
AS
6216 case BPF_JGT:
6217 if (reg->umin_value > val)
6218 return 1;
6219 else if (reg->umax_value <= val)
6220 return 0;
6221 break;
6222 case BPF_JSGT:
a72dafaf 6223 if (reg->smin_value > sval)
4f7b3e82 6224 return 1;
a72dafaf 6225 else if (reg->smax_value < sval)
4f7b3e82
AS
6226 return 0;
6227 break;
6228 case BPF_JLT:
6229 if (reg->umax_value < val)
6230 return 1;
6231 else if (reg->umin_value >= val)
6232 return 0;
6233 break;
6234 case BPF_JSLT:
a72dafaf 6235 if (reg->smax_value < sval)
4f7b3e82 6236 return 1;
a72dafaf 6237 else if (reg->smin_value >= sval)
4f7b3e82
AS
6238 return 0;
6239 break;
6240 case BPF_JGE:
6241 if (reg->umin_value >= val)
6242 return 1;
6243 else if (reg->umax_value < val)
6244 return 0;
6245 break;
6246 case BPF_JSGE:
a72dafaf 6247 if (reg->smin_value >= sval)
4f7b3e82 6248 return 1;
a72dafaf 6249 else if (reg->smax_value < sval)
4f7b3e82
AS
6250 return 0;
6251 break;
6252 case BPF_JLE:
6253 if (reg->umax_value <= val)
6254 return 1;
6255 else if (reg->umin_value > val)
6256 return 0;
6257 break;
6258 case BPF_JSLE:
a72dafaf 6259 if (reg->smax_value <= sval)
4f7b3e82 6260 return 1;
a72dafaf 6261 else if (reg->smin_value > sval)
4f7b3e82
AS
6262 return 0;
6263 break;
6264 }
6265
6266 return -1;
6267}
6268
3f50f132
JF
6269/* compute branch direction of the expression "if (reg opcode val) goto target;"
6270 * and return:
6271 * 1 - branch will be taken and "goto target" will be executed
6272 * 0 - branch will not be taken and fall-through to next insn
6273 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
6274 * range [0,10]
604dca5e 6275 */
3f50f132
JF
6276static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
6277 bool is_jmp32)
604dca5e 6278{
3f50f132
JF
6279 if (__is_pointer_value(false, reg))
6280 return -1;
604dca5e 6281
3f50f132
JF
6282 if (is_jmp32)
6283 return is_branch32_taken(reg, val, opcode);
6284 return is_branch64_taken(reg, val, opcode);
604dca5e
JH
6285}
6286
48461135
JB
6287/* Adjusts the register min/max values in the case that the dst_reg is the
6288 * variable register that we are working on, and src_reg is a constant or we're
6289 * simply doing a BPF_K check.
f1174f77 6290 * In JEQ/JNE cases we also adjust the var_off values.
48461135
JB
6291 */
6292static void reg_set_min_max(struct bpf_reg_state *true_reg,
3f50f132
JF
6293 struct bpf_reg_state *false_reg,
6294 u64 val, u32 val32,
092ed096 6295 u8 opcode, bool is_jmp32)
48461135 6296{
3f50f132
JF
6297 struct tnum false_32off = tnum_subreg(false_reg->var_off);
6298 struct tnum false_64off = false_reg->var_off;
6299 struct tnum true_32off = tnum_subreg(true_reg->var_off);
6300 struct tnum true_64off = true_reg->var_off;
6301 s64 sval = (s64)val;
6302 s32 sval32 = (s32)val32;
a72dafaf 6303
f1174f77
EC
6304 /* If the dst_reg is a pointer, we can't learn anything about its
6305 * variable offset from the compare (unless src_reg were a pointer into
6306 * the same object, but we don't bother with that.
6307 * Since false_reg and true_reg have the same type by construction, we
6308 * only need to check one of them for pointerness.
6309 */
6310 if (__is_pointer_value(false, false_reg))
6311 return;
4cabc5b1 6312
48461135
JB
6313 switch (opcode) {
6314 case BPF_JEQ:
48461135 6315 case BPF_JNE:
a72dafaf
JW
6316 {
6317 struct bpf_reg_state *reg =
6318 opcode == BPF_JEQ ? true_reg : false_reg;
6319
6320 /* For BPF_JEQ, if this is false we know nothing Jon Snow, but
6321 * if it is true we know the value for sure. Likewise for
6322 * BPF_JNE.
48461135 6323 */
3f50f132
JF
6324 if (is_jmp32)
6325 __mark_reg32_known(reg, val32);
6326 else
092ed096 6327 __mark_reg_known(reg, val);
48461135 6328 break;
a72dafaf 6329 }
960ea056 6330 case BPF_JSET:
3f50f132
JF
6331 if (is_jmp32) {
6332 false_32off = tnum_and(false_32off, tnum_const(~val32));
6333 if (is_power_of_2(val32))
6334 true_32off = tnum_or(true_32off,
6335 tnum_const(val32));
6336 } else {
6337 false_64off = tnum_and(false_64off, tnum_const(~val));
6338 if (is_power_of_2(val))
6339 true_64off = tnum_or(true_64off,
6340 tnum_const(val));
6341 }
960ea056 6342 break;
48461135 6343 case BPF_JGE:
a72dafaf
JW
6344 case BPF_JGT:
6345 {
3f50f132
JF
6346 if (is_jmp32) {
6347 u32 false_umax = opcode == BPF_JGT ? val32 : val32 - 1;
6348 u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
6349
6350 false_reg->u32_max_value = min(false_reg->u32_max_value,
6351 false_umax);
6352 true_reg->u32_min_value = max(true_reg->u32_min_value,
6353 true_umin);
6354 } else {
6355 u64 false_umax = opcode == BPF_JGT ? val : val - 1;
6356 u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
6357
6358 false_reg->umax_value = min(false_reg->umax_value, false_umax);
6359 true_reg->umin_value = max(true_reg->umin_value, true_umin);
6360 }
b03c9f9f 6361 break;
a72dafaf 6362 }
48461135 6363 case BPF_JSGE:
a72dafaf
JW
6364 case BPF_JSGT:
6365 {
3f50f132
JF
6366 if (is_jmp32) {
6367 s32 false_smax = opcode == BPF_JSGT ? sval32 : sval32 - 1;
6368 s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
a72dafaf 6369
3f50f132
JF
6370 false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
6371 true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
6372 } else {
6373 s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1;
6374 s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
6375
6376 false_reg->smax_value = min(false_reg->smax_value, false_smax);
6377 true_reg->smin_value = max(true_reg->smin_value, true_smin);
6378 }
48461135 6379 break;
a72dafaf 6380 }
b4e432f1 6381 case BPF_JLE:
a72dafaf
JW
6382 case BPF_JLT:
6383 {
3f50f132
JF
6384 if (is_jmp32) {
6385 u32 false_umin = opcode == BPF_JLT ? val32 : val32 + 1;
6386 u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
6387
6388 false_reg->u32_min_value = max(false_reg->u32_min_value,
6389 false_umin);
6390 true_reg->u32_max_value = min(true_reg->u32_max_value,
6391 true_umax);
6392 } else {
6393 u64 false_umin = opcode == BPF_JLT ? val : val + 1;
6394 u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
6395
6396 false_reg->umin_value = max(false_reg->umin_value, false_umin);
6397 true_reg->umax_value = min(true_reg->umax_value, true_umax);
6398 }
b4e432f1 6399 break;
a72dafaf 6400 }
b4e432f1 6401 case BPF_JSLE:
a72dafaf
JW
6402 case BPF_JSLT:
6403 {
3f50f132
JF
6404 if (is_jmp32) {
6405 s32 false_smin = opcode == BPF_JSLT ? sval32 : sval32 + 1;
6406 s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
a72dafaf 6407
3f50f132
JF
6408 false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
6409 true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
6410 } else {
6411 s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1;
6412 s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
6413
6414 false_reg->smin_value = max(false_reg->smin_value, false_smin);
6415 true_reg->smax_value = min(true_reg->smax_value, true_smax);
6416 }
b4e432f1 6417 break;
a72dafaf 6418 }
48461135 6419 default:
0fc31b10 6420 return;
48461135
JB
6421 }
6422
3f50f132
JF
6423 if (is_jmp32) {
6424 false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
6425 tnum_subreg(false_32off));
6426 true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
6427 tnum_subreg(true_32off));
6428 __reg_combine_32_into_64(false_reg);
6429 __reg_combine_32_into_64(true_reg);
6430 } else {
6431 false_reg->var_off = false_64off;
6432 true_reg->var_off = true_64off;
6433 __reg_combine_64_into_32(false_reg);
6434 __reg_combine_64_into_32(true_reg);
6435 }
48461135
JB
6436}
6437
f1174f77
EC
6438/* Same as above, but for the case that dst_reg holds a constant and src_reg is
6439 * the variable reg.
48461135
JB
6440 */
6441static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
3f50f132
JF
6442 struct bpf_reg_state *false_reg,
6443 u64 val, u32 val32,
092ed096 6444 u8 opcode, bool is_jmp32)
48461135 6445{
0fc31b10
JH
6446 /* How can we transform "a <op> b" into "b <op> a"? */
6447 static const u8 opcode_flip[16] = {
6448 /* these stay the same */
6449 [BPF_JEQ >> 4] = BPF_JEQ,
6450 [BPF_JNE >> 4] = BPF_JNE,
6451 [BPF_JSET >> 4] = BPF_JSET,
6452 /* these swap "lesser" and "greater" (L and G in the opcodes) */
6453 [BPF_JGE >> 4] = BPF_JLE,
6454 [BPF_JGT >> 4] = BPF_JLT,
6455 [BPF_JLE >> 4] = BPF_JGE,
6456 [BPF_JLT >> 4] = BPF_JGT,
6457 [BPF_JSGE >> 4] = BPF_JSLE,
6458 [BPF_JSGT >> 4] = BPF_JSLT,
6459 [BPF_JSLE >> 4] = BPF_JSGE,
6460 [BPF_JSLT >> 4] = BPF_JSGT
6461 };
6462 opcode = opcode_flip[opcode >> 4];
6463 /* This uses zero as "not present in table"; luckily the zero opcode,
6464 * BPF_JA, can't get here.
b03c9f9f 6465 */
0fc31b10 6466 if (opcode)
3f50f132 6467 reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
f1174f77
EC
6468}
6469
6470/* Regs are known to be equal, so intersect their min/max/var_off */
6471static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
6472 struct bpf_reg_state *dst_reg)
6473{
b03c9f9f
EC
6474 src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
6475 dst_reg->umin_value);
6476 src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
6477 dst_reg->umax_value);
6478 src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
6479 dst_reg->smin_value);
6480 src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
6481 dst_reg->smax_value);
f1174f77
EC
6482 src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
6483 dst_reg->var_off);
b03c9f9f
EC
6484 /* We might have learned new bounds from the var_off. */
6485 __update_reg_bounds(src_reg);
6486 __update_reg_bounds(dst_reg);
6487 /* We might have learned something about the sign bit. */
6488 __reg_deduce_bounds(src_reg);
6489 __reg_deduce_bounds(dst_reg);
6490 /* We might have learned some bits from the bounds. */
6491 __reg_bound_offset(src_reg);
6492 __reg_bound_offset(dst_reg);
6493 /* Intersecting with the old var_off might have improved our bounds
6494 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
6495 * then new var_off is (0; 0x7f...fc) which improves our umax.
6496 */
6497 __update_reg_bounds(src_reg);
6498 __update_reg_bounds(dst_reg);
f1174f77
EC
6499}
6500
6501static void reg_combine_min_max(struct bpf_reg_state *true_src,
6502 struct bpf_reg_state *true_dst,
6503 struct bpf_reg_state *false_src,
6504 struct bpf_reg_state *false_dst,
6505 u8 opcode)
6506{
6507 switch (opcode) {
6508 case BPF_JEQ:
6509 __reg_combine_min_max(true_src, true_dst);
6510 break;
6511 case BPF_JNE:
6512 __reg_combine_min_max(false_src, false_dst);
b03c9f9f 6513 break;
4cabc5b1 6514 }
48461135
JB
6515}
6516
fd978bf7
JS
6517static void mark_ptr_or_null_reg(struct bpf_func_state *state,
6518 struct bpf_reg_state *reg, u32 id,
840b9615 6519 bool is_null)
57a09bf0 6520{
840b9615 6521 if (reg_type_may_be_null(reg->type) && reg->id == id) {
f1174f77
EC
6522 /* Old offset (both fixed and variable parts) should
6523 * have been known-zero, because we don't allow pointer
6524 * arithmetic on pointers that might be NULL.
6525 */
b03c9f9f
EC
6526 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
6527 !tnum_equals_const(reg->var_off, 0) ||
f1174f77 6528 reg->off)) {
b03c9f9f
EC
6529 __mark_reg_known_zero(reg);
6530 reg->off = 0;
f1174f77
EC
6531 }
6532 if (is_null) {
6533 reg->type = SCALAR_VALUE;
840b9615
JS
6534 } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
6535 if (reg->map_ptr->inner_map_meta) {
6536 reg->type = CONST_PTR_TO_MAP;
6537 reg->map_ptr = reg->map_ptr->inner_map_meta;
fada7fdc
JL
6538 } else if (reg->map_ptr->map_type ==
6539 BPF_MAP_TYPE_XSKMAP) {
6540 reg->type = PTR_TO_XDP_SOCK;
840b9615
JS
6541 } else {
6542 reg->type = PTR_TO_MAP_VALUE;
6543 }
c64b7983
JS
6544 } else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
6545 reg->type = PTR_TO_SOCKET;
46f8bc92
MKL
6546 } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
6547 reg->type = PTR_TO_SOCK_COMMON;
655a51e5
MKL
6548 } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
6549 reg->type = PTR_TO_TCP_SOCK;
56f668df 6550 }
1b986589
MKL
6551 if (is_null) {
6552 /* We don't need id and ref_obj_id from this point
6553 * onwards anymore, thus we should better reset it,
6554 * so that state pruning has chances to take effect.
6555 */
6556 reg->id = 0;
6557 reg->ref_obj_id = 0;
6558 } else if (!reg_may_point_to_spin_lock(reg)) {
6559 /* For not-NULL ptr, reg->ref_obj_id will be reset
6560 * in release_reg_references().
6561 *
6562 * reg->id is still used by spin_lock ptr. Other
6563 * than spin_lock ptr type, reg->id can be reset.
fd978bf7
JS
6564 */
6565 reg->id = 0;
56f668df 6566 }
57a09bf0
TG
6567 }
6568}
6569
c6a9efa1
PC
6570static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
6571 bool is_null)
6572{
6573 struct bpf_reg_state *reg;
6574 int i;
6575
6576 for (i = 0; i < MAX_BPF_REG; i++)
6577 mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
6578
6579 bpf_for_each_spilled_reg(i, state, reg) {
6580 if (!reg)
6581 continue;
6582 mark_ptr_or_null_reg(state, reg, id, is_null);
6583 }
6584}
6585
57a09bf0
TG
6586/* The logic is similar to find_good_pkt_pointers(), both could eventually
6587 * be folded together at some point.
6588 */
840b9615
JS
6589static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
6590 bool is_null)
57a09bf0 6591{
f4d7e40a 6592 struct bpf_func_state *state = vstate->frame[vstate->curframe];
c6a9efa1 6593 struct bpf_reg_state *regs = state->regs;
1b986589 6594 u32 ref_obj_id = regs[regno].ref_obj_id;
a08dd0da 6595 u32 id = regs[regno].id;
c6a9efa1 6596 int i;
57a09bf0 6597
1b986589
MKL
6598 if (ref_obj_id && ref_obj_id == id && is_null)
6599 /* regs[regno] is in the " == NULL" branch.
6600 * No one could have freed the reference state before
6601 * doing the NULL check.
6602 */
6603 WARN_ON_ONCE(release_reference_state(state, id));
fd978bf7 6604
c6a9efa1
PC
6605 for (i = 0; i <= vstate->curframe; i++)
6606 __mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
57a09bf0
TG
6607}
6608
5beca081
DB
6609static bool try_match_pkt_pointers(const struct bpf_insn *insn,
6610 struct bpf_reg_state *dst_reg,
6611 struct bpf_reg_state *src_reg,
6612 struct bpf_verifier_state *this_branch,
6613 struct bpf_verifier_state *other_branch)
6614{
6615 if (BPF_SRC(insn->code) != BPF_X)
6616 return false;
6617
092ed096
JW
6618 /* Pointers are always 64-bit. */
6619 if (BPF_CLASS(insn->code) == BPF_JMP32)
6620 return false;
6621
5beca081
DB
6622 switch (BPF_OP(insn->code)) {
6623 case BPF_JGT:
6624 if ((dst_reg->type == PTR_TO_PACKET &&
6625 src_reg->type == PTR_TO_PACKET_END) ||
6626 (dst_reg->type == PTR_TO_PACKET_META &&
6627 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
6628 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
6629 find_good_pkt_pointers(this_branch, dst_reg,
6630 dst_reg->type, false);
6631 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
6632 src_reg->type == PTR_TO_PACKET) ||
6633 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
6634 src_reg->type == PTR_TO_PACKET_META)) {
6635 /* pkt_end > pkt_data', pkt_data > pkt_meta' */
6636 find_good_pkt_pointers(other_branch, src_reg,
6637 src_reg->type, true);
6638 } else {
6639 return false;
6640 }
6641 break;
6642 case BPF_JLT:
6643 if ((dst_reg->type == PTR_TO_PACKET &&
6644 src_reg->type == PTR_TO_PACKET_END) ||
6645 (dst_reg->type == PTR_TO_PACKET_META &&
6646 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
6647 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
6648 find_good_pkt_pointers(other_branch, dst_reg,
6649 dst_reg->type, true);
6650 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
6651 src_reg->type == PTR_TO_PACKET) ||
6652 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
6653 src_reg->type == PTR_TO_PACKET_META)) {
6654 /* pkt_end < pkt_data', pkt_data > pkt_meta' */
6655 find_good_pkt_pointers(this_branch, src_reg,
6656 src_reg->type, false);
6657 } else {
6658 return false;
6659 }
6660 break;
6661 case BPF_JGE:
6662 if ((dst_reg->type == PTR_TO_PACKET &&
6663 src_reg->type == PTR_TO_PACKET_END) ||
6664 (dst_reg->type == PTR_TO_PACKET_META &&
6665 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
6666 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
6667 find_good_pkt_pointers(this_branch, dst_reg,
6668 dst_reg->type, true);
6669 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
6670 src_reg->type == PTR_TO_PACKET) ||
6671 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
6672 src_reg->type == PTR_TO_PACKET_META)) {
6673 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
6674 find_good_pkt_pointers(other_branch, src_reg,
6675 src_reg->type, false);
6676 } else {
6677 return false;
6678 }
6679 break;
6680 case BPF_JLE:
6681 if ((dst_reg->type == PTR_TO_PACKET &&
6682 src_reg->type == PTR_TO_PACKET_END) ||
6683 (dst_reg->type == PTR_TO_PACKET_META &&
6684 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
6685 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
6686 find_good_pkt_pointers(other_branch, dst_reg,
6687 dst_reg->type, false);
6688 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
6689 src_reg->type == PTR_TO_PACKET) ||
6690 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
6691 src_reg->type == PTR_TO_PACKET_META)) {
6692 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
6693 find_good_pkt_pointers(this_branch, src_reg,
6694 src_reg->type, true);
6695 } else {
6696 return false;
6697 }
6698 break;
6699 default:
6700 return false;
6701 }
6702
6703 return true;
6704}
6705
58e2af8b 6706static int check_cond_jmp_op(struct bpf_verifier_env *env,
17a52670
AS
6707 struct bpf_insn *insn, int *insn_idx)
6708{
f4d7e40a
AS
6709 struct bpf_verifier_state *this_branch = env->cur_state;
6710 struct bpf_verifier_state *other_branch;
6711 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
fb8d251e 6712 struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
17a52670 6713 u8 opcode = BPF_OP(insn->code);
092ed096 6714 bool is_jmp32;
fb8d251e 6715 int pred = -1;
17a52670
AS
6716 int err;
6717
092ed096
JW
6718 /* Only conditional jumps are expected to reach here. */
6719 if (opcode == BPF_JA || opcode > BPF_JSLE) {
6720 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
17a52670
AS
6721 return -EINVAL;
6722 }
6723
6724 if (BPF_SRC(insn->code) == BPF_X) {
6725 if (insn->imm != 0) {
092ed096 6726 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
17a52670
AS
6727 return -EINVAL;
6728 }
6729
6730 /* check src1 operand */
dc503a8a 6731 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
6732 if (err)
6733 return err;
1be7f75d
AS
6734
6735 if (is_pointer_value(env, insn->src_reg)) {
61bd5218 6736 verbose(env, "R%d pointer comparison prohibited\n",
1be7f75d
AS
6737 insn->src_reg);
6738 return -EACCES;
6739 }
fb8d251e 6740 src_reg = &regs[insn->src_reg];
17a52670
AS
6741 } else {
6742 if (insn->src_reg != BPF_REG_0) {
092ed096 6743 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
17a52670
AS
6744 return -EINVAL;
6745 }
6746 }
6747
6748 /* check src2 operand */
dc503a8a 6749 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
6750 if (err)
6751 return err;
6752
1a0dc1ac 6753 dst_reg = &regs[insn->dst_reg];
092ed096 6754 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
1a0dc1ac 6755
3f50f132
JF
6756 if (BPF_SRC(insn->code) == BPF_K) {
6757 pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
6758 } else if (src_reg->type == SCALAR_VALUE &&
6759 is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
6760 pred = is_branch_taken(dst_reg,
6761 tnum_subreg(src_reg->var_off).value,
6762 opcode,
6763 is_jmp32);
6764 } else if (src_reg->type == SCALAR_VALUE &&
6765 !is_jmp32 && tnum_is_const(src_reg->var_off)) {
6766 pred = is_branch_taken(dst_reg,
6767 src_reg->var_off.value,
6768 opcode,
6769 is_jmp32);
6770 }
6771
b5dc0163
AS
6772 if (pred >= 0) {
6773 err = mark_chain_precision(env, insn->dst_reg);
6774 if (BPF_SRC(insn->code) == BPF_X && !err)
6775 err = mark_chain_precision(env, insn->src_reg);
6776 if (err)
6777 return err;
6778 }
fb8d251e
AS
6779 if (pred == 1) {
6780 /* only follow the goto, ignore fall-through */
6781 *insn_idx += insn->off;
6782 return 0;
6783 } else if (pred == 0) {
6784 /* only follow fall-through branch, since
6785 * that's where the program will go
6786 */
6787 return 0;
17a52670
AS
6788 }
6789
979d63d5
DB
6790 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
6791 false);
17a52670
AS
6792 if (!other_branch)
6793 return -EFAULT;
f4d7e40a 6794 other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
17a52670 6795
48461135
JB
6796 /* detect if we are comparing against a constant value so we can adjust
6797 * our min/max values for our dst register.
f1174f77
EC
6798 * this is only legit if both are scalars (or pointers to the same
6799 * object, I suppose, but we don't support that right now), because
6800 * otherwise the different base pointers mean the offsets aren't
6801 * comparable.
48461135
JB
6802 */
6803 if (BPF_SRC(insn->code) == BPF_X) {
092ed096 6804 struct bpf_reg_state *src_reg = &regs[insn->src_reg];
092ed096 6805
f1174f77 6806 if (dst_reg->type == SCALAR_VALUE &&
092ed096
JW
6807 src_reg->type == SCALAR_VALUE) {
6808 if (tnum_is_const(src_reg->var_off) ||
3f50f132
JF
6809 (is_jmp32 &&
6810 tnum_is_const(tnum_subreg(src_reg->var_off))))
f4d7e40a 6811 reg_set_min_max(&other_branch_regs[insn->dst_reg],
092ed096 6812 dst_reg,
3f50f132
JF
6813 src_reg->var_off.value,
6814 tnum_subreg(src_reg->var_off).value,
092ed096
JW
6815 opcode, is_jmp32);
6816 else if (tnum_is_const(dst_reg->var_off) ||
3f50f132
JF
6817 (is_jmp32 &&
6818 tnum_is_const(tnum_subreg(dst_reg->var_off))))
f4d7e40a 6819 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
092ed096 6820 src_reg,
3f50f132
JF
6821 dst_reg->var_off.value,
6822 tnum_subreg(dst_reg->var_off).value,
092ed096
JW
6823 opcode, is_jmp32);
6824 else if (!is_jmp32 &&
6825 (opcode == BPF_JEQ || opcode == BPF_JNE))
f1174f77 6826 /* Comparing for equality, we can combine knowledge */
f4d7e40a
AS
6827 reg_combine_min_max(&other_branch_regs[insn->src_reg],
6828 &other_branch_regs[insn->dst_reg],
092ed096 6829 src_reg, dst_reg, opcode);
f1174f77
EC
6830 }
6831 } else if (dst_reg->type == SCALAR_VALUE) {
f4d7e40a 6832 reg_set_min_max(&other_branch_regs[insn->dst_reg],
3f50f132
JF
6833 dst_reg, insn->imm, (u32)insn->imm,
6834 opcode, is_jmp32);
48461135
JB
6835 }
6836
092ed096
JW
6837 /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
6838 * NOTE: these optimizations below are related with pointer comparison
6839 * which will never be JMP32.
6840 */
6841 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
1a0dc1ac 6842 insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
840b9615
JS
6843 reg_type_may_be_null(dst_reg->type)) {
6844 /* Mark all identical registers in each branch as either
57a09bf0
TG
6845 * safe or unknown depending R == 0 or R != 0 conditional.
6846 */
840b9615
JS
6847 mark_ptr_or_null_regs(this_branch, insn->dst_reg,
6848 opcode == BPF_JNE);
6849 mark_ptr_or_null_regs(other_branch, insn->dst_reg,
6850 opcode == BPF_JEQ);
5beca081
DB
6851 } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
6852 this_branch, other_branch) &&
6853 is_pointer_value(env, insn->dst_reg)) {
61bd5218
JK
6854 verbose(env, "R%d pointer comparison prohibited\n",
6855 insn->dst_reg);
1be7f75d 6856 return -EACCES;
17a52670 6857 }
06ee7115 6858 if (env->log.level & BPF_LOG_LEVEL)
f4d7e40a 6859 print_verifier_state(env, this_branch->frame[this_branch->curframe]);
17a52670
AS
6860 return 0;
6861}
6862
17a52670 6863/* verify BPF_LD_IMM64 instruction */
58e2af8b 6864static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
17a52670 6865{
d8eca5bb 6866 struct bpf_insn_aux_data *aux = cur_aux(env);
638f5b90 6867 struct bpf_reg_state *regs = cur_regs(env);
d8eca5bb 6868 struct bpf_map *map;
17a52670
AS
6869 int err;
6870
6871 if (BPF_SIZE(insn->code) != BPF_DW) {
61bd5218 6872 verbose(env, "invalid BPF_LD_IMM insn\n");
17a52670
AS
6873 return -EINVAL;
6874 }
6875 if (insn->off != 0) {
61bd5218 6876 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
17a52670
AS
6877 return -EINVAL;
6878 }
6879
dc503a8a 6880 err = check_reg_arg(env, insn->dst_reg, DST_OP);
17a52670
AS
6881 if (err)
6882 return err;
6883
6b173873 6884 if (insn->src_reg == 0) {
6b173873
JK
6885 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
6886
f1174f77 6887 regs[insn->dst_reg].type = SCALAR_VALUE;
b03c9f9f 6888 __mark_reg_known(&regs[insn->dst_reg], imm);
17a52670 6889 return 0;
6b173873 6890 }
17a52670 6891
d8eca5bb
DB
6892 map = env->used_maps[aux->map_index];
6893 mark_reg_known_zero(env, regs, insn->dst_reg);
6894 regs[insn->dst_reg].map_ptr = map;
6895
6896 if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
6897 regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
6898 regs[insn->dst_reg].off = aux->map_off;
6899 if (map_value_has_spin_lock(map))
6900 regs[insn->dst_reg].id = ++env->id_gen;
6901 } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
6902 regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
6903 } else {
6904 verbose(env, "bpf verifier is misconfigured\n");
6905 return -EINVAL;
6906 }
17a52670 6907
17a52670
AS
6908 return 0;
6909}
6910
96be4325
DB
6911static bool may_access_skb(enum bpf_prog_type type)
6912{
6913 switch (type) {
6914 case BPF_PROG_TYPE_SOCKET_FILTER:
6915 case BPF_PROG_TYPE_SCHED_CLS:
94caee8c 6916 case BPF_PROG_TYPE_SCHED_ACT:
96be4325
DB
6917 return true;
6918 default:
6919 return false;
6920 }
6921}
6922
ddd872bc
AS
6923/* verify safety of LD_ABS|LD_IND instructions:
6924 * - they can only appear in the programs where ctx == skb
6925 * - since they are wrappers of function calls, they scratch R1-R5 registers,
6926 * preserve R6-R9, and store return value into R0
6927 *
6928 * Implicit input:
6929 * ctx == skb == R6 == CTX
6930 *
6931 * Explicit input:
6932 * SRC == any register
6933 * IMM == 32-bit immediate
6934 *
6935 * Output:
6936 * R0 - 8/16/32-bit skb data converted to cpu endianness
6937 */
58e2af8b 6938static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
ddd872bc 6939{
638f5b90 6940 struct bpf_reg_state *regs = cur_regs(env);
6d4f151a 6941 static const int ctx_reg = BPF_REG_6;
ddd872bc 6942 u8 mode = BPF_MODE(insn->code);
ddd872bc
AS
6943 int i, err;
6944
24701ece 6945 if (!may_access_skb(env->prog->type)) {
61bd5218 6946 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
ddd872bc
AS
6947 return -EINVAL;
6948 }
6949
e0cea7ce
DB
6950 if (!env->ops->gen_ld_abs) {
6951 verbose(env, "bpf verifier is misconfigured\n");
6952 return -EINVAL;
6953 }
6954
f910cefa 6955 if (env->subprog_cnt > 1) {
f4d7e40a
AS
6956 /* when program has LD_ABS insn JITs and interpreter assume
6957 * that r1 == ctx == skb which is not the case for callees
6958 * that can have arbitrary arguments. It's problematic
6959 * for main prog as well since JITs would need to analyze
6960 * all functions in order to make proper register save/restore
6961 * decisions in the main prog. Hence disallow LD_ABS with calls
6962 */
6963 verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
6964 return -EINVAL;
6965 }
6966
ddd872bc 6967 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
d82bccc6 6968 BPF_SIZE(insn->code) == BPF_DW ||
ddd872bc 6969 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
61bd5218 6970 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
ddd872bc
AS
6971 return -EINVAL;
6972 }
6973
6974 /* check whether implicit source operand (register R6) is readable */
6d4f151a 6975 err = check_reg_arg(env, ctx_reg, SRC_OP);
ddd872bc
AS
6976 if (err)
6977 return err;
6978
fd978bf7
JS
6979 /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
6980 * gen_ld_abs() may terminate the program at runtime, leading to
6981 * reference leak.
6982 */
6983 err = check_reference_leak(env);
6984 if (err) {
6985 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
6986 return err;
6987 }
6988
d83525ca
AS
6989 if (env->cur_state->active_spin_lock) {
6990 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
6991 return -EINVAL;
6992 }
6993
6d4f151a 6994 if (regs[ctx_reg].type != PTR_TO_CTX) {
61bd5218
JK
6995 verbose(env,
6996 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
ddd872bc
AS
6997 return -EINVAL;
6998 }
6999
7000 if (mode == BPF_IND) {
7001 /* check explicit source operand */
dc503a8a 7002 err = check_reg_arg(env, insn->src_reg, SRC_OP);
ddd872bc
AS
7003 if (err)
7004 return err;
7005 }
7006
6d4f151a
DB
7007 err = check_ctx_reg(env, &regs[ctx_reg], ctx_reg);
7008 if (err < 0)
7009 return err;
7010
ddd872bc 7011 /* reset caller saved regs to unreadable */
dc503a8a 7012 for (i = 0; i < CALLER_SAVED_REGS; i++) {
61bd5218 7013 mark_reg_not_init(env, regs, caller_saved[i]);
dc503a8a
EC
7014 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
7015 }
ddd872bc
AS
7016
7017 /* mark destination R0 register as readable, since it contains
dc503a8a
EC
7018 * the value fetched from the packet.
7019 * Already marked as written above.
ddd872bc 7020 */
61bd5218 7021 mark_reg_unknown(env, regs, BPF_REG_0);
5327ed3d
JW
7022 /* ld_abs load up to 32-bit skb data. */
7023 regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
ddd872bc
AS
7024 return 0;
7025}
7026
390ee7e2
AS
7027static int check_return_code(struct bpf_verifier_env *env)
7028{
5cf1e914 7029 struct tnum enforce_attach_type_range = tnum_unknown;
27ae7997 7030 const struct bpf_prog *prog = env->prog;
390ee7e2
AS
7031 struct bpf_reg_state *reg;
7032 struct tnum range = tnum_range(0, 1);
27ae7997
MKL
7033 int err;
7034
9e4e01df
KS
7035 /* LSM and struct_ops func-ptr's return type could be "void" */
7036 if ((env->prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
7037 env->prog->type == BPF_PROG_TYPE_LSM) &&
27ae7997
MKL
7038 !prog->aux->attach_func_proto->type)
7039 return 0;
7040
7041 /* eBPF calling convetion is such that R0 is used
7042 * to return the value from eBPF program.
7043 * Make sure that it's readable at this time
7044 * of bpf_exit, which means that program wrote
7045 * something into it earlier
7046 */
7047 err = check_reg_arg(env, BPF_REG_0, SRC_OP);
7048 if (err)
7049 return err;
7050
7051 if (is_pointer_value(env, BPF_REG_0)) {
7052 verbose(env, "R0 leaks addr as return value\n");
7053 return -EACCES;
7054 }
390ee7e2
AS
7055
7056 switch (env->prog->type) {
983695fa
DB
7057 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
7058 if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
7059 env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG)
7060 range = tnum_range(1, 1);
ed4ed404 7061 break;
390ee7e2 7062 case BPF_PROG_TYPE_CGROUP_SKB:
5cf1e914 7063 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
7064 range = tnum_range(0, 3);
7065 enforce_attach_type_range = tnum_range(2, 3);
7066 }
ed4ed404 7067 break;
390ee7e2
AS
7068 case BPF_PROG_TYPE_CGROUP_SOCK:
7069 case BPF_PROG_TYPE_SOCK_OPS:
ebc614f6 7070 case BPF_PROG_TYPE_CGROUP_DEVICE:
7b146ceb 7071 case BPF_PROG_TYPE_CGROUP_SYSCTL:
0d01da6a 7072 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
390ee7e2 7073 break;
15ab09bd
AS
7074 case BPF_PROG_TYPE_RAW_TRACEPOINT:
7075 if (!env->prog->aux->attach_btf_id)
7076 return 0;
7077 range = tnum_const(0);
7078 break;
390ee7e2
AS
7079 default:
7080 return 0;
7081 }
7082
638f5b90 7083 reg = cur_regs(env) + BPF_REG_0;
390ee7e2 7084 if (reg->type != SCALAR_VALUE) {
61bd5218 7085 verbose(env, "At program exit the register R0 is not a known value (%s)\n",
390ee7e2
AS
7086 reg_type_str[reg->type]);
7087 return -EINVAL;
7088 }
7089
7090 if (!tnum_in(range, reg->var_off)) {
5cf1e914 7091 char tn_buf[48];
7092
61bd5218 7093 verbose(env, "At program exit the register R0 ");
390ee7e2 7094 if (!tnum_is_unknown(reg->var_off)) {
390ee7e2 7095 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 7096 verbose(env, "has value %s", tn_buf);
390ee7e2 7097 } else {
61bd5218 7098 verbose(env, "has unknown scalar value");
390ee7e2 7099 }
5cf1e914 7100 tnum_strn(tn_buf, sizeof(tn_buf), range);
983695fa 7101 verbose(env, " should have been in %s\n", tn_buf);
390ee7e2
AS
7102 return -EINVAL;
7103 }
5cf1e914 7104
7105 if (!tnum_is_unknown(enforce_attach_type_range) &&
7106 tnum_in(enforce_attach_type_range, reg->var_off))
7107 env->prog->enforce_expected_attach_type = 1;
390ee7e2
AS
7108 return 0;
7109}
7110
475fb78f
AS
7111/* non-recursive DFS pseudo code
7112 * 1 procedure DFS-iterative(G,v):
7113 * 2 label v as discovered
7114 * 3 let S be a stack
7115 * 4 S.push(v)
7116 * 5 while S is not empty
7117 * 6 t <- S.pop()
7118 * 7 if t is what we're looking for:
7119 * 8 return t
7120 * 9 for all edges e in G.adjacentEdges(t) do
7121 * 10 if edge e is already labelled
7122 * 11 continue with the next edge
7123 * 12 w <- G.adjacentVertex(t,e)
7124 * 13 if vertex w is not discovered and not explored
7125 * 14 label e as tree-edge
7126 * 15 label w as discovered
7127 * 16 S.push(w)
7128 * 17 continue at 5
7129 * 18 else if vertex w is discovered
7130 * 19 label e as back-edge
7131 * 20 else
7132 * 21 // vertex w is explored
7133 * 22 label e as forward- or cross-edge
7134 * 23 label t as explored
7135 * 24 S.pop()
7136 *
7137 * convention:
7138 * 0x10 - discovered
7139 * 0x11 - discovered and fall-through edge labelled
7140 * 0x12 - discovered and fall-through and branch edges labelled
7141 * 0x20 - explored
7142 */
7143
7144enum {
7145 DISCOVERED = 0x10,
7146 EXPLORED = 0x20,
7147 FALLTHROUGH = 1,
7148 BRANCH = 2,
7149};
7150
dc2a4ebc
AS
7151static u32 state_htab_size(struct bpf_verifier_env *env)
7152{
7153 return env->prog->len;
7154}
7155
5d839021
AS
7156static struct bpf_verifier_state_list **explored_state(
7157 struct bpf_verifier_env *env,
7158 int idx)
7159{
dc2a4ebc
AS
7160 struct bpf_verifier_state *cur = env->cur_state;
7161 struct bpf_func_state *state = cur->frame[cur->curframe];
7162
7163 return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
5d839021
AS
7164}
7165
7166static void init_explored_state(struct bpf_verifier_env *env, int idx)
7167{
a8f500af 7168 env->insn_aux_data[idx].prune_point = true;
5d839021 7169}
f1bca824 7170
475fb78f
AS
7171/* t, w, e - match pseudo-code above:
7172 * t - index of current instruction
7173 * w - next instruction
7174 * e - edge
7175 */
2589726d
AS
7176static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
7177 bool loop_ok)
475fb78f 7178{
7df737e9
AS
7179 int *insn_stack = env->cfg.insn_stack;
7180 int *insn_state = env->cfg.insn_state;
7181
475fb78f
AS
7182 if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
7183 return 0;
7184
7185 if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
7186 return 0;
7187
7188 if (w < 0 || w >= env->prog->len) {
d9762e84 7189 verbose_linfo(env, t, "%d: ", t);
61bd5218 7190 verbose(env, "jump out of range from insn %d to %d\n", t, w);
475fb78f
AS
7191 return -EINVAL;
7192 }
7193
f1bca824
AS
7194 if (e == BRANCH)
7195 /* mark branch target for state pruning */
5d839021 7196 init_explored_state(env, w);
f1bca824 7197
475fb78f
AS
7198 if (insn_state[w] == 0) {
7199 /* tree-edge */
7200 insn_state[t] = DISCOVERED | e;
7201 insn_state[w] = DISCOVERED;
7df737e9 7202 if (env->cfg.cur_stack >= env->prog->len)
475fb78f 7203 return -E2BIG;
7df737e9 7204 insn_stack[env->cfg.cur_stack++] = w;
475fb78f
AS
7205 return 1;
7206 } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
2589726d
AS
7207 if (loop_ok && env->allow_ptr_leaks)
7208 return 0;
d9762e84
MKL
7209 verbose_linfo(env, t, "%d: ", t);
7210 verbose_linfo(env, w, "%d: ", w);
61bd5218 7211 verbose(env, "back-edge from insn %d to %d\n", t, w);
475fb78f
AS
7212 return -EINVAL;
7213 } else if (insn_state[w] == EXPLORED) {
7214 /* forward- or cross-edge */
7215 insn_state[t] = DISCOVERED | e;
7216 } else {
61bd5218 7217 verbose(env, "insn state internal bug\n");
475fb78f
AS
7218 return -EFAULT;
7219 }
7220 return 0;
7221}
7222
7223/* non-recursive depth-first-search to detect loops in BPF program
7224 * loop == back-edge in directed graph
7225 */
58e2af8b 7226static int check_cfg(struct bpf_verifier_env *env)
475fb78f
AS
7227{
7228 struct bpf_insn *insns = env->prog->insnsi;
7229 int insn_cnt = env->prog->len;
7df737e9 7230 int *insn_stack, *insn_state;
475fb78f
AS
7231 int ret = 0;
7232 int i, t;
7233
7df737e9 7234 insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
475fb78f
AS
7235 if (!insn_state)
7236 return -ENOMEM;
7237
7df737e9 7238 insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
475fb78f 7239 if (!insn_stack) {
71dde681 7240 kvfree(insn_state);
475fb78f
AS
7241 return -ENOMEM;
7242 }
7243
7244 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
7245 insn_stack[0] = 0; /* 0 is the first instruction */
7df737e9 7246 env->cfg.cur_stack = 1;
475fb78f
AS
7247
7248peek_stack:
7df737e9 7249 if (env->cfg.cur_stack == 0)
475fb78f 7250 goto check_state;
7df737e9 7251 t = insn_stack[env->cfg.cur_stack - 1];
475fb78f 7252
092ed096
JW
7253 if (BPF_CLASS(insns[t].code) == BPF_JMP ||
7254 BPF_CLASS(insns[t].code) == BPF_JMP32) {
475fb78f
AS
7255 u8 opcode = BPF_OP(insns[t].code);
7256
7257 if (opcode == BPF_EXIT) {
7258 goto mark_explored;
7259 } else if (opcode == BPF_CALL) {
2589726d 7260 ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
475fb78f
AS
7261 if (ret == 1)
7262 goto peek_stack;
7263 else if (ret < 0)
7264 goto err_free;
07016151 7265 if (t + 1 < insn_cnt)
5d839021 7266 init_explored_state(env, t + 1);
cc8b0b92 7267 if (insns[t].src_reg == BPF_PSEUDO_CALL) {
5d839021 7268 init_explored_state(env, t);
2589726d
AS
7269 ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
7270 env, false);
cc8b0b92
AS
7271 if (ret == 1)
7272 goto peek_stack;
7273 else if (ret < 0)
7274 goto err_free;
7275 }
475fb78f
AS
7276 } else if (opcode == BPF_JA) {
7277 if (BPF_SRC(insns[t].code) != BPF_K) {
7278 ret = -EINVAL;
7279 goto err_free;
7280 }
7281 /* unconditional jump with single edge */
7282 ret = push_insn(t, t + insns[t].off + 1,
2589726d 7283 FALLTHROUGH, env, true);
475fb78f
AS
7284 if (ret == 1)
7285 goto peek_stack;
7286 else if (ret < 0)
7287 goto err_free;
b5dc0163
AS
7288 /* unconditional jmp is not a good pruning point,
7289 * but it's marked, since backtracking needs
7290 * to record jmp history in is_state_visited().
7291 */
7292 init_explored_state(env, t + insns[t].off + 1);
f1bca824
AS
7293 /* tell verifier to check for equivalent states
7294 * after every call and jump
7295 */
c3de6317 7296 if (t + 1 < insn_cnt)
5d839021 7297 init_explored_state(env, t + 1);
475fb78f
AS
7298 } else {
7299 /* conditional jump with two edges */
5d839021 7300 init_explored_state(env, t);
2589726d 7301 ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
475fb78f
AS
7302 if (ret == 1)
7303 goto peek_stack;
7304 else if (ret < 0)
7305 goto err_free;
7306
2589726d 7307 ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
475fb78f
AS
7308 if (ret == 1)
7309 goto peek_stack;
7310 else if (ret < 0)
7311 goto err_free;
7312 }
7313 } else {
7314 /* all other non-branch instructions with single
7315 * fall-through edge
7316 */
2589726d 7317 ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
475fb78f
AS
7318 if (ret == 1)
7319 goto peek_stack;
7320 else if (ret < 0)
7321 goto err_free;
7322 }
7323
7324mark_explored:
7325 insn_state[t] = EXPLORED;
7df737e9 7326 if (env->cfg.cur_stack-- <= 0) {
61bd5218 7327 verbose(env, "pop stack internal bug\n");
475fb78f
AS
7328 ret = -EFAULT;
7329 goto err_free;
7330 }
7331 goto peek_stack;
7332
7333check_state:
7334 for (i = 0; i < insn_cnt; i++) {
7335 if (insn_state[i] != EXPLORED) {
61bd5218 7336 verbose(env, "unreachable insn %d\n", i);
475fb78f
AS
7337 ret = -EINVAL;
7338 goto err_free;
7339 }
7340 }
7341 ret = 0; /* cfg looks good */
7342
7343err_free:
71dde681
AS
7344 kvfree(insn_state);
7345 kvfree(insn_stack);
7df737e9 7346 env->cfg.insn_state = env->cfg.insn_stack = NULL;
475fb78f
AS
7347 return ret;
7348}
7349
838e9690
YS
7350/* The minimum supported BTF func info size */
7351#define MIN_BPF_FUNCINFO_SIZE 8
7352#define MAX_FUNCINFO_REC_SIZE 252
7353
c454a46b
MKL
7354static int check_btf_func(struct bpf_verifier_env *env,
7355 const union bpf_attr *attr,
7356 union bpf_attr __user *uattr)
838e9690 7357{
d0b2818e 7358 u32 i, nfuncs, urec_size, min_size;
838e9690 7359 u32 krec_size = sizeof(struct bpf_func_info);
c454a46b 7360 struct bpf_func_info *krecord;
8c1b6e69 7361 struct bpf_func_info_aux *info_aux = NULL;
838e9690 7362 const struct btf_type *type;
c454a46b
MKL
7363 struct bpf_prog *prog;
7364 const struct btf *btf;
838e9690 7365 void __user *urecord;
d0b2818e 7366 u32 prev_offset = 0;
838e9690
YS
7367 int ret = 0;
7368
7369 nfuncs = attr->func_info_cnt;
7370 if (!nfuncs)
7371 return 0;
7372
7373 if (nfuncs != env->subprog_cnt) {
7374 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
7375 return -EINVAL;
7376 }
7377
7378 urec_size = attr->func_info_rec_size;
7379 if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
7380 urec_size > MAX_FUNCINFO_REC_SIZE ||
7381 urec_size % sizeof(u32)) {
7382 verbose(env, "invalid func info rec size %u\n", urec_size);
7383 return -EINVAL;
7384 }
7385
c454a46b
MKL
7386 prog = env->prog;
7387 btf = prog->aux->btf;
838e9690
YS
7388
7389 urecord = u64_to_user_ptr(attr->func_info);
7390 min_size = min_t(u32, krec_size, urec_size);
7391
ba64e7d8 7392 krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
c454a46b
MKL
7393 if (!krecord)
7394 return -ENOMEM;
8c1b6e69
AS
7395 info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
7396 if (!info_aux)
7397 goto err_free;
ba64e7d8 7398
838e9690
YS
7399 for (i = 0; i < nfuncs; i++) {
7400 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
7401 if (ret) {
7402 if (ret == -E2BIG) {
7403 verbose(env, "nonzero tailing record in func info");
7404 /* set the size kernel expects so loader can zero
7405 * out the rest of the record.
7406 */
7407 if (put_user(min_size, &uattr->func_info_rec_size))
7408 ret = -EFAULT;
7409 }
c454a46b 7410 goto err_free;
838e9690
YS
7411 }
7412
ba64e7d8 7413 if (copy_from_user(&krecord[i], urecord, min_size)) {
838e9690 7414 ret = -EFAULT;
c454a46b 7415 goto err_free;
838e9690
YS
7416 }
7417
d30d42e0 7418 /* check insn_off */
838e9690 7419 if (i == 0) {
d30d42e0 7420 if (krecord[i].insn_off) {
838e9690 7421 verbose(env,
d30d42e0
MKL
7422 "nonzero insn_off %u for the first func info record",
7423 krecord[i].insn_off);
838e9690 7424 ret = -EINVAL;
c454a46b 7425 goto err_free;
838e9690 7426 }
d30d42e0 7427 } else if (krecord[i].insn_off <= prev_offset) {
838e9690
YS
7428 verbose(env,
7429 "same or smaller insn offset (%u) than previous func info record (%u)",
d30d42e0 7430 krecord[i].insn_off, prev_offset);
838e9690 7431 ret = -EINVAL;
c454a46b 7432 goto err_free;
838e9690
YS
7433 }
7434
d30d42e0 7435 if (env->subprog_info[i].start != krecord[i].insn_off) {
838e9690
YS
7436 verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
7437 ret = -EINVAL;
c454a46b 7438 goto err_free;
838e9690
YS
7439 }
7440
7441 /* check type_id */
ba64e7d8 7442 type = btf_type_by_id(btf, krecord[i].type_id);
51c39bb1 7443 if (!type || !btf_type_is_func(type)) {
838e9690 7444 verbose(env, "invalid type id %d in func info",
ba64e7d8 7445 krecord[i].type_id);
838e9690 7446 ret = -EINVAL;
c454a46b 7447 goto err_free;
838e9690 7448 }
51c39bb1 7449 info_aux[i].linkage = BTF_INFO_VLEN(type->info);
d30d42e0 7450 prev_offset = krecord[i].insn_off;
838e9690
YS
7451 urecord += urec_size;
7452 }
7453
ba64e7d8
YS
7454 prog->aux->func_info = krecord;
7455 prog->aux->func_info_cnt = nfuncs;
8c1b6e69 7456 prog->aux->func_info_aux = info_aux;
838e9690
YS
7457 return 0;
7458
c454a46b 7459err_free:
ba64e7d8 7460 kvfree(krecord);
8c1b6e69 7461 kfree(info_aux);
838e9690
YS
7462 return ret;
7463}
7464
ba64e7d8
YS
7465static void adjust_btf_func(struct bpf_verifier_env *env)
7466{
8c1b6e69 7467 struct bpf_prog_aux *aux = env->prog->aux;
ba64e7d8
YS
7468 int i;
7469
8c1b6e69 7470 if (!aux->func_info)
ba64e7d8
YS
7471 return;
7472
7473 for (i = 0; i < env->subprog_cnt; i++)
8c1b6e69 7474 aux->func_info[i].insn_off = env->subprog_info[i].start;
ba64e7d8
YS
7475}
7476
c454a46b
MKL
7477#define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \
7478 sizeof(((struct bpf_line_info *)(0))->line_col))
7479#define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
7480
7481static int check_btf_line(struct bpf_verifier_env *env,
7482 const union bpf_attr *attr,
7483 union bpf_attr __user *uattr)
7484{
7485 u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
7486 struct bpf_subprog_info *sub;
7487 struct bpf_line_info *linfo;
7488 struct bpf_prog *prog;
7489 const struct btf *btf;
7490 void __user *ulinfo;
7491 int err;
7492
7493 nr_linfo = attr->line_info_cnt;
7494 if (!nr_linfo)
7495 return 0;
7496
7497 rec_size = attr->line_info_rec_size;
7498 if (rec_size < MIN_BPF_LINEINFO_SIZE ||
7499 rec_size > MAX_LINEINFO_REC_SIZE ||
7500 rec_size & (sizeof(u32) - 1))
7501 return -EINVAL;
7502
7503 /* Need to zero it in case the userspace may
7504 * pass in a smaller bpf_line_info object.
7505 */
7506 linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
7507 GFP_KERNEL | __GFP_NOWARN);
7508 if (!linfo)
7509 return -ENOMEM;
7510
7511 prog = env->prog;
7512 btf = prog->aux->btf;
7513
7514 s = 0;
7515 sub = env->subprog_info;
7516 ulinfo = u64_to_user_ptr(attr->line_info);
7517 expected_size = sizeof(struct bpf_line_info);
7518 ncopy = min_t(u32, expected_size, rec_size);
7519 for (i = 0; i < nr_linfo; i++) {
7520 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
7521 if (err) {
7522 if (err == -E2BIG) {
7523 verbose(env, "nonzero tailing record in line_info");
7524 if (put_user(expected_size,
7525 &uattr->line_info_rec_size))
7526 err = -EFAULT;
7527 }
7528 goto err_free;
7529 }
7530
7531 if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
7532 err = -EFAULT;
7533 goto err_free;
7534 }
7535
7536 /*
7537 * Check insn_off to ensure
7538 * 1) strictly increasing AND
7539 * 2) bounded by prog->len
7540 *
7541 * The linfo[0].insn_off == 0 check logically falls into
7542 * the later "missing bpf_line_info for func..." case
7543 * because the first linfo[0].insn_off must be the
7544 * first sub also and the first sub must have
7545 * subprog_info[0].start == 0.
7546 */
7547 if ((i && linfo[i].insn_off <= prev_offset) ||
7548 linfo[i].insn_off >= prog->len) {
7549 verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
7550 i, linfo[i].insn_off, prev_offset,
7551 prog->len);
7552 err = -EINVAL;
7553 goto err_free;
7554 }
7555
fdbaa0be
MKL
7556 if (!prog->insnsi[linfo[i].insn_off].code) {
7557 verbose(env,
7558 "Invalid insn code at line_info[%u].insn_off\n",
7559 i);
7560 err = -EINVAL;
7561 goto err_free;
7562 }
7563
23127b33
MKL
7564 if (!btf_name_by_offset(btf, linfo[i].line_off) ||
7565 !btf_name_by_offset(btf, linfo[i].file_name_off)) {
c454a46b
MKL
7566 verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
7567 err = -EINVAL;
7568 goto err_free;
7569 }
7570
7571 if (s != env->subprog_cnt) {
7572 if (linfo[i].insn_off == sub[s].start) {
7573 sub[s].linfo_idx = i;
7574 s++;
7575 } else if (sub[s].start < linfo[i].insn_off) {
7576 verbose(env, "missing bpf_line_info for func#%u\n", s);
7577 err = -EINVAL;
7578 goto err_free;
7579 }
7580 }
7581
7582 prev_offset = linfo[i].insn_off;
7583 ulinfo += rec_size;
7584 }
7585
7586 if (s != env->subprog_cnt) {
7587 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
7588 env->subprog_cnt - s, s);
7589 err = -EINVAL;
7590 goto err_free;
7591 }
7592
7593 prog->aux->linfo = linfo;
7594 prog->aux->nr_linfo = nr_linfo;
7595
7596 return 0;
7597
7598err_free:
7599 kvfree(linfo);
7600 return err;
7601}
7602
7603static int check_btf_info(struct bpf_verifier_env *env,
7604 const union bpf_attr *attr,
7605 union bpf_attr __user *uattr)
7606{
7607 struct btf *btf;
7608 int err;
7609
7610 if (!attr->func_info_cnt && !attr->line_info_cnt)
7611 return 0;
7612
7613 btf = btf_get_by_fd(attr->prog_btf_fd);
7614 if (IS_ERR(btf))
7615 return PTR_ERR(btf);
7616 env->prog->aux->btf = btf;
7617
7618 err = check_btf_func(env, attr, uattr);
7619 if (err)
7620 return err;
7621
7622 err = check_btf_line(env, attr, uattr);
7623 if (err)
7624 return err;
7625
7626 return 0;
ba64e7d8
YS
7627}
7628
f1174f77
EC
7629/* check %cur's range satisfies %old's */
7630static bool range_within(struct bpf_reg_state *old,
7631 struct bpf_reg_state *cur)
7632{
b03c9f9f
EC
7633 return old->umin_value <= cur->umin_value &&
7634 old->umax_value >= cur->umax_value &&
7635 old->smin_value <= cur->smin_value &&
7636 old->smax_value >= cur->smax_value;
f1174f77
EC
7637}
7638
7639/* Maximum number of register states that can exist at once */
7640#define ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
7641struct idpair {
7642 u32 old;
7643 u32 cur;
7644};
7645
7646/* If in the old state two registers had the same id, then they need to have
7647 * the same id in the new state as well. But that id could be different from
7648 * the old state, so we need to track the mapping from old to new ids.
7649 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
7650 * regs with old id 5 must also have new id 9 for the new state to be safe. But
7651 * regs with a different old id could still have new id 9, we don't care about
7652 * that.
7653 * So we look through our idmap to see if this old id has been seen before. If
7654 * so, we require the new id to match; otherwise, we add the id pair to the map.
969bf05e 7655 */
f1174f77 7656static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
969bf05e 7657{
f1174f77 7658 unsigned int i;
969bf05e 7659
f1174f77
EC
7660 for (i = 0; i < ID_MAP_SIZE; i++) {
7661 if (!idmap[i].old) {
7662 /* Reached an empty slot; haven't seen this id before */
7663 idmap[i].old = old_id;
7664 idmap[i].cur = cur_id;
7665 return true;
7666 }
7667 if (idmap[i].old == old_id)
7668 return idmap[i].cur == cur_id;
7669 }
7670 /* We ran out of idmap slots, which should be impossible */
7671 WARN_ON_ONCE(1);
7672 return false;
7673}
7674
9242b5f5
AS
7675static void clean_func_state(struct bpf_verifier_env *env,
7676 struct bpf_func_state *st)
7677{
7678 enum bpf_reg_liveness live;
7679 int i, j;
7680
7681 for (i = 0; i < BPF_REG_FP; i++) {
7682 live = st->regs[i].live;
7683 /* liveness must not touch this register anymore */
7684 st->regs[i].live |= REG_LIVE_DONE;
7685 if (!(live & REG_LIVE_READ))
7686 /* since the register is unused, clear its state
7687 * to make further comparison simpler
7688 */
f54c7898 7689 __mark_reg_not_init(env, &st->regs[i]);
9242b5f5
AS
7690 }
7691
7692 for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
7693 live = st->stack[i].spilled_ptr.live;
7694 /* liveness must not touch this stack slot anymore */
7695 st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
7696 if (!(live & REG_LIVE_READ)) {
f54c7898 7697 __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
9242b5f5
AS
7698 for (j = 0; j < BPF_REG_SIZE; j++)
7699 st->stack[i].slot_type[j] = STACK_INVALID;
7700 }
7701 }
7702}
7703
7704static void clean_verifier_state(struct bpf_verifier_env *env,
7705 struct bpf_verifier_state *st)
7706{
7707 int i;
7708
7709 if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
7710 /* all regs in this state in all frames were already marked */
7711 return;
7712
7713 for (i = 0; i <= st->curframe; i++)
7714 clean_func_state(env, st->frame[i]);
7715}
7716
7717/* the parentage chains form a tree.
7718 * the verifier states are added to state lists at given insn and
7719 * pushed into state stack for future exploration.
7720 * when the verifier reaches bpf_exit insn some of the verifer states
7721 * stored in the state lists have their final liveness state already,
7722 * but a lot of states will get revised from liveness point of view when
7723 * the verifier explores other branches.
7724 * Example:
7725 * 1: r0 = 1
7726 * 2: if r1 == 100 goto pc+1
7727 * 3: r0 = 2
7728 * 4: exit
7729 * when the verifier reaches exit insn the register r0 in the state list of
7730 * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
7731 * of insn 2 and goes exploring further. At the insn 4 it will walk the
7732 * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
7733 *
7734 * Since the verifier pushes the branch states as it sees them while exploring
7735 * the program the condition of walking the branch instruction for the second
7736 * time means that all states below this branch were already explored and
7737 * their final liveness markes are already propagated.
7738 * Hence when the verifier completes the search of state list in is_state_visited()
7739 * we can call this clean_live_states() function to mark all liveness states
7740 * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
7741 * will not be used.
7742 * This function also clears the registers and stack for states that !READ
7743 * to simplify state merging.
7744 *
7745 * Important note here that walking the same branch instruction in the callee
7746 * doesn't meant that the states are DONE. The verifier has to compare
7747 * the callsites
7748 */
7749static void clean_live_states(struct bpf_verifier_env *env, int insn,
7750 struct bpf_verifier_state *cur)
7751{
7752 struct bpf_verifier_state_list *sl;
7753 int i;
7754
5d839021 7755 sl = *explored_state(env, insn);
a8f500af 7756 while (sl) {
2589726d
AS
7757 if (sl->state.branches)
7758 goto next;
dc2a4ebc
AS
7759 if (sl->state.insn_idx != insn ||
7760 sl->state.curframe != cur->curframe)
9242b5f5
AS
7761 goto next;
7762 for (i = 0; i <= cur->curframe; i++)
7763 if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
7764 goto next;
7765 clean_verifier_state(env, &sl->state);
7766next:
7767 sl = sl->next;
7768 }
7769}
7770
f1174f77 7771/* Returns true if (rold safe implies rcur safe) */
1b688a19
EC
7772static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
7773 struct idpair *idmap)
f1174f77 7774{
f4d7e40a
AS
7775 bool equal;
7776
dc503a8a
EC
7777 if (!(rold->live & REG_LIVE_READ))
7778 /* explored state didn't use this */
7779 return true;
7780
679c782d 7781 equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
f4d7e40a
AS
7782
7783 if (rold->type == PTR_TO_STACK)
7784 /* two stack pointers are equal only if they're pointing to
7785 * the same stack frame, since fp-8 in foo != fp-8 in bar
7786 */
7787 return equal && rold->frameno == rcur->frameno;
7788
7789 if (equal)
969bf05e
AS
7790 return true;
7791
f1174f77
EC
7792 if (rold->type == NOT_INIT)
7793 /* explored state can't have used this */
969bf05e 7794 return true;
f1174f77
EC
7795 if (rcur->type == NOT_INIT)
7796 return false;
7797 switch (rold->type) {
7798 case SCALAR_VALUE:
7799 if (rcur->type == SCALAR_VALUE) {
b5dc0163
AS
7800 if (!rold->precise && !rcur->precise)
7801 return true;
f1174f77
EC
7802 /* new val must satisfy old val knowledge */
7803 return range_within(rold, rcur) &&
7804 tnum_in(rold->var_off, rcur->var_off);
7805 } else {
179d1c56
JH
7806 /* We're trying to use a pointer in place of a scalar.
7807 * Even if the scalar was unbounded, this could lead to
7808 * pointer leaks because scalars are allowed to leak
7809 * while pointers are not. We could make this safe in
7810 * special cases if root is calling us, but it's
7811 * probably not worth the hassle.
f1174f77 7812 */
179d1c56 7813 return false;
f1174f77
EC
7814 }
7815 case PTR_TO_MAP_VALUE:
1b688a19
EC
7816 /* If the new min/max/var_off satisfy the old ones and
7817 * everything else matches, we are OK.
d83525ca
AS
7818 * 'id' is not compared, since it's only used for maps with
7819 * bpf_spin_lock inside map element and in such cases if
7820 * the rest of the prog is valid for one map element then
7821 * it's valid for all map elements regardless of the key
7822 * used in bpf_map_lookup()
1b688a19
EC
7823 */
7824 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
7825 range_within(rold, rcur) &&
7826 tnum_in(rold->var_off, rcur->var_off);
f1174f77
EC
7827 case PTR_TO_MAP_VALUE_OR_NULL:
7828 /* a PTR_TO_MAP_VALUE could be safe to use as a
7829 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
7830 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
7831 * checked, doing so could have affected others with the same
7832 * id, and we can't check for that because we lost the id when
7833 * we converted to a PTR_TO_MAP_VALUE.
7834 */
7835 if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
7836 return false;
7837 if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
7838 return false;
7839 /* Check our ids match any regs they're supposed to */
7840 return check_ids(rold->id, rcur->id, idmap);
de8f3a83 7841 case PTR_TO_PACKET_META:
f1174f77 7842 case PTR_TO_PACKET:
de8f3a83 7843 if (rcur->type != rold->type)
f1174f77
EC
7844 return false;
7845 /* We must have at least as much range as the old ptr
7846 * did, so that any accesses which were safe before are
7847 * still safe. This is true even if old range < old off,
7848 * since someone could have accessed through (ptr - k), or
7849 * even done ptr -= k in a register, to get a safe access.
7850 */
7851 if (rold->range > rcur->range)
7852 return false;
7853 /* If the offsets don't match, we can't trust our alignment;
7854 * nor can we be sure that we won't fall out of range.
7855 */
7856 if (rold->off != rcur->off)
7857 return false;
7858 /* id relations must be preserved */
7859 if (rold->id && !check_ids(rold->id, rcur->id, idmap))
7860 return false;
7861 /* new val must satisfy old val knowledge */
7862 return range_within(rold, rcur) &&
7863 tnum_in(rold->var_off, rcur->var_off);
7864 case PTR_TO_CTX:
7865 case CONST_PTR_TO_MAP:
f1174f77 7866 case PTR_TO_PACKET_END:
d58e468b 7867 case PTR_TO_FLOW_KEYS:
c64b7983
JS
7868 case PTR_TO_SOCKET:
7869 case PTR_TO_SOCKET_OR_NULL:
46f8bc92
MKL
7870 case PTR_TO_SOCK_COMMON:
7871 case PTR_TO_SOCK_COMMON_OR_NULL:
655a51e5
MKL
7872 case PTR_TO_TCP_SOCK:
7873 case PTR_TO_TCP_SOCK_OR_NULL:
fada7fdc 7874 case PTR_TO_XDP_SOCK:
f1174f77
EC
7875 /* Only valid matches are exact, which memcmp() above
7876 * would have accepted
7877 */
7878 default:
7879 /* Don't know what's going on, just say it's not safe */
7880 return false;
7881 }
969bf05e 7882
f1174f77
EC
7883 /* Shouldn't get here; if we do, say it's not safe */
7884 WARN_ON_ONCE(1);
969bf05e
AS
7885 return false;
7886}
7887
f4d7e40a
AS
7888static bool stacksafe(struct bpf_func_state *old,
7889 struct bpf_func_state *cur,
638f5b90
AS
7890 struct idpair *idmap)
7891{
7892 int i, spi;
7893
638f5b90
AS
7894 /* walk slots of the explored stack and ignore any additional
7895 * slots in the current stack, since explored(safe) state
7896 * didn't use them
7897 */
7898 for (i = 0; i < old->allocated_stack; i++) {
7899 spi = i / BPF_REG_SIZE;
7900
b233920c
AS
7901 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
7902 i += BPF_REG_SIZE - 1;
cc2b14d5 7903 /* explored state didn't use this */
fd05e57b 7904 continue;
b233920c 7905 }
cc2b14d5 7906
638f5b90
AS
7907 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
7908 continue;
19e2dbb7
AS
7909
7910 /* explored stack has more populated slots than current stack
7911 * and these slots were used
7912 */
7913 if (i >= cur->allocated_stack)
7914 return false;
7915
cc2b14d5
AS
7916 /* if old state was safe with misc data in the stack
7917 * it will be safe with zero-initialized stack.
7918 * The opposite is not true
7919 */
7920 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
7921 cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
7922 continue;
638f5b90
AS
7923 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
7924 cur->stack[spi].slot_type[i % BPF_REG_SIZE])
7925 /* Ex: old explored (safe) state has STACK_SPILL in
7926 * this stack slot, but current has has STACK_MISC ->
7927 * this verifier states are not equivalent,
7928 * return false to continue verification of this path
7929 */
7930 return false;
7931 if (i % BPF_REG_SIZE)
7932 continue;
7933 if (old->stack[spi].slot_type[0] != STACK_SPILL)
7934 continue;
7935 if (!regsafe(&old->stack[spi].spilled_ptr,
7936 &cur->stack[spi].spilled_ptr,
7937 idmap))
7938 /* when explored and current stack slot are both storing
7939 * spilled registers, check that stored pointers types
7940 * are the same as well.
7941 * Ex: explored safe path could have stored
7942 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
7943 * but current path has stored:
7944 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
7945 * such verifier states are not equivalent.
7946 * return false to continue verification of this path
7947 */
7948 return false;
7949 }
7950 return true;
7951}
7952
fd978bf7
JS
7953static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
7954{
7955 if (old->acquired_refs != cur->acquired_refs)
7956 return false;
7957 return !memcmp(old->refs, cur->refs,
7958 sizeof(*old->refs) * old->acquired_refs);
7959}
7960
f1bca824
AS
7961/* compare two verifier states
7962 *
7963 * all states stored in state_list are known to be valid, since
7964 * verifier reached 'bpf_exit' instruction through them
7965 *
7966 * this function is called when verifier exploring different branches of
7967 * execution popped from the state stack. If it sees an old state that has
7968 * more strict register state and more strict stack state then this execution
7969 * branch doesn't need to be explored further, since verifier already
7970 * concluded that more strict state leads to valid finish.
7971 *
7972 * Therefore two states are equivalent if register state is more conservative
7973 * and explored stack state is more conservative than the current one.
7974 * Example:
7975 * explored current
7976 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
7977 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
7978 *
7979 * In other words if current stack state (one being explored) has more
7980 * valid slots than old one that already passed validation, it means
7981 * the verifier can stop exploring and conclude that current state is valid too
7982 *
7983 * Similarly with registers. If explored state has register type as invalid
7984 * whereas register type in current state is meaningful, it means that
7985 * the current state will reach 'bpf_exit' instruction safely
7986 */
f4d7e40a
AS
7987static bool func_states_equal(struct bpf_func_state *old,
7988 struct bpf_func_state *cur)
f1bca824 7989{
f1174f77
EC
7990 struct idpair *idmap;
7991 bool ret = false;
f1bca824
AS
7992 int i;
7993
f1174f77
EC
7994 idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
7995 /* If we failed to allocate the idmap, just say it's not safe */
7996 if (!idmap)
1a0dc1ac 7997 return false;
f1174f77
EC
7998
7999 for (i = 0; i < MAX_BPF_REG; i++) {
1b688a19 8000 if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
f1174f77 8001 goto out_free;
f1bca824
AS
8002 }
8003
638f5b90
AS
8004 if (!stacksafe(old, cur, idmap))
8005 goto out_free;
fd978bf7
JS
8006
8007 if (!refsafe(old, cur))
8008 goto out_free;
f1174f77
EC
8009 ret = true;
8010out_free:
8011 kfree(idmap);
8012 return ret;
f1bca824
AS
8013}
8014
f4d7e40a
AS
8015static bool states_equal(struct bpf_verifier_env *env,
8016 struct bpf_verifier_state *old,
8017 struct bpf_verifier_state *cur)
8018{
8019 int i;
8020
8021 if (old->curframe != cur->curframe)
8022 return false;
8023
979d63d5
DB
8024 /* Verification state from speculative execution simulation
8025 * must never prune a non-speculative execution one.
8026 */
8027 if (old->speculative && !cur->speculative)
8028 return false;
8029
d83525ca
AS
8030 if (old->active_spin_lock != cur->active_spin_lock)
8031 return false;
8032
f4d7e40a
AS
8033 /* for states to be equal callsites have to be the same
8034 * and all frame states need to be equivalent
8035 */
8036 for (i = 0; i <= old->curframe; i++) {
8037 if (old->frame[i]->callsite != cur->frame[i]->callsite)
8038 return false;
8039 if (!func_states_equal(old->frame[i], cur->frame[i]))
8040 return false;
8041 }
8042 return true;
8043}
8044
5327ed3d
JW
8045/* Return 0 if no propagation happened. Return negative error code if error
8046 * happened. Otherwise, return the propagated bit.
8047 */
55e7f3b5
JW
8048static int propagate_liveness_reg(struct bpf_verifier_env *env,
8049 struct bpf_reg_state *reg,
8050 struct bpf_reg_state *parent_reg)
8051{
5327ed3d
JW
8052 u8 parent_flag = parent_reg->live & REG_LIVE_READ;
8053 u8 flag = reg->live & REG_LIVE_READ;
55e7f3b5
JW
8054 int err;
8055
5327ed3d
JW
8056 /* When comes here, read flags of PARENT_REG or REG could be any of
8057 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
8058 * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
8059 */
8060 if (parent_flag == REG_LIVE_READ64 ||
8061 /* Or if there is no read flag from REG. */
8062 !flag ||
8063 /* Or if the read flag from REG is the same as PARENT_REG. */
8064 parent_flag == flag)
55e7f3b5
JW
8065 return 0;
8066
5327ed3d 8067 err = mark_reg_read(env, reg, parent_reg, flag);
55e7f3b5
JW
8068 if (err)
8069 return err;
8070
5327ed3d 8071 return flag;
55e7f3b5
JW
8072}
8073
8e9cd9ce 8074/* A write screens off any subsequent reads; but write marks come from the
f4d7e40a
AS
8075 * straight-line code between a state and its parent. When we arrive at an
8076 * equivalent state (jump target or such) we didn't arrive by the straight-line
8077 * code, so read marks in the state must propagate to the parent regardless
8078 * of the state's write marks. That's what 'parent == state->parent' comparison
679c782d 8079 * in mark_reg_read() is for.
8e9cd9ce 8080 */
f4d7e40a
AS
8081static int propagate_liveness(struct bpf_verifier_env *env,
8082 const struct bpf_verifier_state *vstate,
8083 struct bpf_verifier_state *vparent)
dc503a8a 8084{
3f8cafa4 8085 struct bpf_reg_state *state_reg, *parent_reg;
f4d7e40a 8086 struct bpf_func_state *state, *parent;
3f8cafa4 8087 int i, frame, err = 0;
dc503a8a 8088
f4d7e40a
AS
8089 if (vparent->curframe != vstate->curframe) {
8090 WARN(1, "propagate_live: parent frame %d current frame %d\n",
8091 vparent->curframe, vstate->curframe);
8092 return -EFAULT;
8093 }
dc503a8a
EC
8094 /* Propagate read liveness of registers... */
8095 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
83d16312 8096 for (frame = 0; frame <= vstate->curframe; frame++) {
3f8cafa4
JW
8097 parent = vparent->frame[frame];
8098 state = vstate->frame[frame];
8099 parent_reg = parent->regs;
8100 state_reg = state->regs;
83d16312
JK
8101 /* We don't need to worry about FP liveness, it's read-only */
8102 for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
55e7f3b5
JW
8103 err = propagate_liveness_reg(env, &state_reg[i],
8104 &parent_reg[i]);
5327ed3d 8105 if (err < 0)
3f8cafa4 8106 return err;
5327ed3d
JW
8107 if (err == REG_LIVE_READ64)
8108 mark_insn_zext(env, &parent_reg[i]);
dc503a8a 8109 }
f4d7e40a 8110
1b04aee7 8111 /* Propagate stack slots. */
f4d7e40a
AS
8112 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
8113 i < parent->allocated_stack / BPF_REG_SIZE; i++) {
3f8cafa4
JW
8114 parent_reg = &parent->stack[i].spilled_ptr;
8115 state_reg = &state->stack[i].spilled_ptr;
55e7f3b5
JW
8116 err = propagate_liveness_reg(env, state_reg,
8117 parent_reg);
5327ed3d 8118 if (err < 0)
3f8cafa4 8119 return err;
dc503a8a
EC
8120 }
8121 }
5327ed3d 8122 return 0;
dc503a8a
EC
8123}
8124
a3ce685d
AS
8125/* find precise scalars in the previous equivalent state and
8126 * propagate them into the current state
8127 */
8128static int propagate_precision(struct bpf_verifier_env *env,
8129 const struct bpf_verifier_state *old)
8130{
8131 struct bpf_reg_state *state_reg;
8132 struct bpf_func_state *state;
8133 int i, err = 0;
8134
8135 state = old->frame[old->curframe];
8136 state_reg = state->regs;
8137 for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
8138 if (state_reg->type != SCALAR_VALUE ||
8139 !state_reg->precise)
8140 continue;
8141 if (env->log.level & BPF_LOG_LEVEL2)
8142 verbose(env, "propagating r%d\n", i);
8143 err = mark_chain_precision(env, i);
8144 if (err < 0)
8145 return err;
8146 }
8147
8148 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
8149 if (state->stack[i].slot_type[0] != STACK_SPILL)
8150 continue;
8151 state_reg = &state->stack[i].spilled_ptr;
8152 if (state_reg->type != SCALAR_VALUE ||
8153 !state_reg->precise)
8154 continue;
8155 if (env->log.level & BPF_LOG_LEVEL2)
8156 verbose(env, "propagating fp%d\n",
8157 (-i - 1) * BPF_REG_SIZE);
8158 err = mark_chain_precision_stack(env, i);
8159 if (err < 0)
8160 return err;
8161 }
8162 return 0;
8163}
8164
2589726d
AS
8165static bool states_maybe_looping(struct bpf_verifier_state *old,
8166 struct bpf_verifier_state *cur)
8167{
8168 struct bpf_func_state *fold, *fcur;
8169 int i, fr = cur->curframe;
8170
8171 if (old->curframe != fr)
8172 return false;
8173
8174 fold = old->frame[fr];
8175 fcur = cur->frame[fr];
8176 for (i = 0; i < MAX_BPF_REG; i++)
8177 if (memcmp(&fold->regs[i], &fcur->regs[i],
8178 offsetof(struct bpf_reg_state, parent)))
8179 return false;
8180 return true;
8181}
8182
8183
58e2af8b 8184static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
f1bca824 8185{
58e2af8b 8186 struct bpf_verifier_state_list *new_sl;
9f4686c4 8187 struct bpf_verifier_state_list *sl, **pprev;
679c782d 8188 struct bpf_verifier_state *cur = env->cur_state, *new;
ceefbc96 8189 int i, j, err, states_cnt = 0;
10d274e8 8190 bool add_new_state = env->test_state_freq ? true : false;
f1bca824 8191
b5dc0163 8192 cur->last_insn_idx = env->prev_insn_idx;
a8f500af 8193 if (!env->insn_aux_data[insn_idx].prune_point)
f1bca824
AS
8194 /* this 'insn_idx' instruction wasn't marked, so we will not
8195 * be doing state search here
8196 */
8197 return 0;
8198
2589726d
AS
8199 /* bpf progs typically have pruning point every 4 instructions
8200 * http://vger.kernel.org/bpfconf2019.html#session-1
8201 * Do not add new state for future pruning if the verifier hasn't seen
8202 * at least 2 jumps and at least 8 instructions.
8203 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
8204 * In tests that amounts to up to 50% reduction into total verifier
8205 * memory consumption and 20% verifier time speedup.
8206 */
8207 if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
8208 env->insn_processed - env->prev_insn_processed >= 8)
8209 add_new_state = true;
8210
a8f500af
AS
8211 pprev = explored_state(env, insn_idx);
8212 sl = *pprev;
8213
9242b5f5
AS
8214 clean_live_states(env, insn_idx, cur);
8215
a8f500af 8216 while (sl) {
dc2a4ebc
AS
8217 states_cnt++;
8218 if (sl->state.insn_idx != insn_idx)
8219 goto next;
2589726d
AS
8220 if (sl->state.branches) {
8221 if (states_maybe_looping(&sl->state, cur) &&
8222 states_equal(env, &sl->state, cur)) {
8223 verbose_linfo(env, insn_idx, "; ");
8224 verbose(env, "infinite loop detected at insn %d\n", insn_idx);
8225 return -EINVAL;
8226 }
8227 /* if the verifier is processing a loop, avoid adding new state
8228 * too often, since different loop iterations have distinct
8229 * states and may not help future pruning.
8230 * This threshold shouldn't be too low to make sure that
8231 * a loop with large bound will be rejected quickly.
8232 * The most abusive loop will be:
8233 * r1 += 1
8234 * if r1 < 1000000 goto pc-2
8235 * 1M insn_procssed limit / 100 == 10k peak states.
8236 * This threshold shouldn't be too high either, since states
8237 * at the end of the loop are likely to be useful in pruning.
8238 */
8239 if (env->jmps_processed - env->prev_jmps_processed < 20 &&
8240 env->insn_processed - env->prev_insn_processed < 100)
8241 add_new_state = false;
8242 goto miss;
8243 }
638f5b90 8244 if (states_equal(env, &sl->state, cur)) {
9f4686c4 8245 sl->hit_cnt++;
f1bca824 8246 /* reached equivalent register/stack state,
dc503a8a
EC
8247 * prune the search.
8248 * Registers read by the continuation are read by us.
8e9cd9ce
EC
8249 * If we have any write marks in env->cur_state, they
8250 * will prevent corresponding reads in the continuation
8251 * from reaching our parent (an explored_state). Our
8252 * own state will get the read marks recorded, but
8253 * they'll be immediately forgotten as we're pruning
8254 * this state and will pop a new one.
f1bca824 8255 */
f4d7e40a 8256 err = propagate_liveness(env, &sl->state, cur);
a3ce685d
AS
8257
8258 /* if previous state reached the exit with precision and
8259 * current state is equivalent to it (except precsion marks)
8260 * the precision needs to be propagated back in
8261 * the current state.
8262 */
8263 err = err ? : push_jmp_history(env, cur);
8264 err = err ? : propagate_precision(env, &sl->state);
f4d7e40a
AS
8265 if (err)
8266 return err;
f1bca824 8267 return 1;
dc503a8a 8268 }
2589726d
AS
8269miss:
8270 /* when new state is not going to be added do not increase miss count.
8271 * Otherwise several loop iterations will remove the state
8272 * recorded earlier. The goal of these heuristics is to have
8273 * states from some iterations of the loop (some in the beginning
8274 * and some at the end) to help pruning.
8275 */
8276 if (add_new_state)
8277 sl->miss_cnt++;
9f4686c4
AS
8278 /* heuristic to determine whether this state is beneficial
8279 * to keep checking from state equivalence point of view.
8280 * Higher numbers increase max_states_per_insn and verification time,
8281 * but do not meaningfully decrease insn_processed.
8282 */
8283 if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
8284 /* the state is unlikely to be useful. Remove it to
8285 * speed up verification
8286 */
8287 *pprev = sl->next;
8288 if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
2589726d
AS
8289 u32 br = sl->state.branches;
8290
8291 WARN_ONCE(br,
8292 "BUG live_done but branches_to_explore %d\n",
8293 br);
9f4686c4
AS
8294 free_verifier_state(&sl->state, false);
8295 kfree(sl);
8296 env->peak_states--;
8297 } else {
8298 /* cannot free this state, since parentage chain may
8299 * walk it later. Add it for free_list instead to
8300 * be freed at the end of verification
8301 */
8302 sl->next = env->free_list;
8303 env->free_list = sl;
8304 }
8305 sl = *pprev;
8306 continue;
8307 }
dc2a4ebc 8308next:
9f4686c4
AS
8309 pprev = &sl->next;
8310 sl = *pprev;
f1bca824
AS
8311 }
8312
06ee7115
AS
8313 if (env->max_states_per_insn < states_cnt)
8314 env->max_states_per_insn = states_cnt;
8315
ceefbc96 8316 if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
b5dc0163 8317 return push_jmp_history(env, cur);
ceefbc96 8318
2589726d 8319 if (!add_new_state)
b5dc0163 8320 return push_jmp_history(env, cur);
ceefbc96 8321
2589726d
AS
8322 /* There were no equivalent states, remember the current one.
8323 * Technically the current state is not proven to be safe yet,
f4d7e40a 8324 * but it will either reach outer most bpf_exit (which means it's safe)
2589726d 8325 * or it will be rejected. When there are no loops the verifier won't be
f4d7e40a 8326 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
2589726d
AS
8327 * again on the way to bpf_exit.
8328 * When looping the sl->state.branches will be > 0 and this state
8329 * will not be considered for equivalence until branches == 0.
f1bca824 8330 */
638f5b90 8331 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
f1bca824
AS
8332 if (!new_sl)
8333 return -ENOMEM;
06ee7115
AS
8334 env->total_states++;
8335 env->peak_states++;
2589726d
AS
8336 env->prev_jmps_processed = env->jmps_processed;
8337 env->prev_insn_processed = env->insn_processed;
f1bca824
AS
8338
8339 /* add new state to the head of linked list */
679c782d
EC
8340 new = &new_sl->state;
8341 err = copy_verifier_state(new, cur);
1969db47 8342 if (err) {
679c782d 8343 free_verifier_state(new, false);
1969db47
AS
8344 kfree(new_sl);
8345 return err;
8346 }
dc2a4ebc 8347 new->insn_idx = insn_idx;
2589726d
AS
8348 WARN_ONCE(new->branches != 1,
8349 "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
b5dc0163 8350
2589726d 8351 cur->parent = new;
b5dc0163
AS
8352 cur->first_insn_idx = insn_idx;
8353 clear_jmp_history(cur);
5d839021
AS
8354 new_sl->next = *explored_state(env, insn_idx);
8355 *explored_state(env, insn_idx) = new_sl;
7640ead9
JK
8356 /* connect new state to parentage chain. Current frame needs all
8357 * registers connected. Only r6 - r9 of the callers are alive (pushed
8358 * to the stack implicitly by JITs) so in callers' frames connect just
8359 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
8360 * the state of the call instruction (with WRITTEN set), and r0 comes
8361 * from callee with its full parentage chain, anyway.
8362 */
8e9cd9ce
EC
8363 /* clear write marks in current state: the writes we did are not writes
8364 * our child did, so they don't screen off its reads from us.
8365 * (There are no read marks in current state, because reads always mark
8366 * their parent and current state never has children yet. Only
8367 * explored_states can get read marks.)
8368 */
eea1c227
AS
8369 for (j = 0; j <= cur->curframe; j++) {
8370 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
8371 cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
8372 for (i = 0; i < BPF_REG_FP; i++)
8373 cur->frame[j]->regs[i].live = REG_LIVE_NONE;
8374 }
f4d7e40a
AS
8375
8376 /* all stack frames are accessible from callee, clear them all */
8377 for (j = 0; j <= cur->curframe; j++) {
8378 struct bpf_func_state *frame = cur->frame[j];
679c782d 8379 struct bpf_func_state *newframe = new->frame[j];
f4d7e40a 8380
679c782d 8381 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
cc2b14d5 8382 frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
679c782d
EC
8383 frame->stack[i].spilled_ptr.parent =
8384 &newframe->stack[i].spilled_ptr;
8385 }
f4d7e40a 8386 }
f1bca824
AS
8387 return 0;
8388}
8389
c64b7983
JS
8390/* Return true if it's OK to have the same insn return a different type. */
8391static bool reg_type_mismatch_ok(enum bpf_reg_type type)
8392{
8393 switch (type) {
8394 case PTR_TO_CTX:
8395 case PTR_TO_SOCKET:
8396 case PTR_TO_SOCKET_OR_NULL:
46f8bc92
MKL
8397 case PTR_TO_SOCK_COMMON:
8398 case PTR_TO_SOCK_COMMON_OR_NULL:
655a51e5
MKL
8399 case PTR_TO_TCP_SOCK:
8400 case PTR_TO_TCP_SOCK_OR_NULL:
fada7fdc 8401 case PTR_TO_XDP_SOCK:
2a02759e 8402 case PTR_TO_BTF_ID:
c64b7983
JS
8403 return false;
8404 default:
8405 return true;
8406 }
8407}
8408
8409/* If an instruction was previously used with particular pointer types, then we
8410 * need to be careful to avoid cases such as the below, where it may be ok
8411 * for one branch accessing the pointer, but not ok for the other branch:
8412 *
8413 * R1 = sock_ptr
8414 * goto X;
8415 * ...
8416 * R1 = some_other_valid_ptr;
8417 * goto X;
8418 * ...
8419 * R2 = *(u32 *)(R1 + 0);
8420 */
8421static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
8422{
8423 return src != prev && (!reg_type_mismatch_ok(src) ||
8424 !reg_type_mismatch_ok(prev));
8425}
8426
58e2af8b 8427static int do_check(struct bpf_verifier_env *env)
17a52670 8428{
6f8a57cc 8429 bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
51c39bb1 8430 struct bpf_verifier_state *state = env->cur_state;
17a52670 8431 struct bpf_insn *insns = env->prog->insnsi;
638f5b90 8432 struct bpf_reg_state *regs;
06ee7115 8433 int insn_cnt = env->prog->len;
17a52670 8434 bool do_print_state = false;
b5dc0163 8435 int prev_insn_idx = -1;
17a52670 8436
17a52670
AS
8437 for (;;) {
8438 struct bpf_insn *insn;
8439 u8 class;
8440 int err;
8441
b5dc0163 8442 env->prev_insn_idx = prev_insn_idx;
c08435ec 8443 if (env->insn_idx >= insn_cnt) {
61bd5218 8444 verbose(env, "invalid insn idx %d insn_cnt %d\n",
c08435ec 8445 env->insn_idx, insn_cnt);
17a52670
AS
8446 return -EFAULT;
8447 }
8448
c08435ec 8449 insn = &insns[env->insn_idx];
17a52670
AS
8450 class = BPF_CLASS(insn->code);
8451
06ee7115 8452 if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
61bd5218
JK
8453 verbose(env,
8454 "BPF program is too large. Processed %d insn\n",
06ee7115 8455 env->insn_processed);
17a52670
AS
8456 return -E2BIG;
8457 }
8458
c08435ec 8459 err = is_state_visited(env, env->insn_idx);
f1bca824
AS
8460 if (err < 0)
8461 return err;
8462 if (err == 1) {
8463 /* found equivalent state, can prune the search */
06ee7115 8464 if (env->log.level & BPF_LOG_LEVEL) {
f1bca824 8465 if (do_print_state)
979d63d5
DB
8466 verbose(env, "\nfrom %d to %d%s: safe\n",
8467 env->prev_insn_idx, env->insn_idx,
8468 env->cur_state->speculative ?
8469 " (speculative execution)" : "");
f1bca824 8470 else
c08435ec 8471 verbose(env, "%d: safe\n", env->insn_idx);
f1bca824
AS
8472 }
8473 goto process_bpf_exit;
8474 }
8475
c3494801
AS
8476 if (signal_pending(current))
8477 return -EAGAIN;
8478
3c2ce60b
DB
8479 if (need_resched())
8480 cond_resched();
8481
06ee7115
AS
8482 if (env->log.level & BPF_LOG_LEVEL2 ||
8483 (env->log.level & BPF_LOG_LEVEL && do_print_state)) {
8484 if (env->log.level & BPF_LOG_LEVEL2)
c08435ec 8485 verbose(env, "%d:", env->insn_idx);
c5fc9692 8486 else
979d63d5
DB
8487 verbose(env, "\nfrom %d to %d%s:",
8488 env->prev_insn_idx, env->insn_idx,
8489 env->cur_state->speculative ?
8490 " (speculative execution)" : "");
f4d7e40a 8491 print_verifier_state(env, state->frame[state->curframe]);
17a52670
AS
8492 do_print_state = false;
8493 }
8494
06ee7115 8495 if (env->log.level & BPF_LOG_LEVEL) {
7105e828
DB
8496 const struct bpf_insn_cbs cbs = {
8497 .cb_print = verbose,
abe08840 8498 .private_data = env,
7105e828
DB
8499 };
8500
c08435ec
DB
8501 verbose_linfo(env, env->insn_idx, "; ");
8502 verbose(env, "%d: ", env->insn_idx);
abe08840 8503 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
17a52670
AS
8504 }
8505
cae1927c 8506 if (bpf_prog_is_dev_bound(env->prog->aux)) {
c08435ec
DB
8507 err = bpf_prog_offload_verify_insn(env, env->insn_idx,
8508 env->prev_insn_idx);
cae1927c
JK
8509 if (err)
8510 return err;
8511 }
13a27dfc 8512
638f5b90 8513 regs = cur_regs(env);
51c39bb1 8514 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
b5dc0163 8515 prev_insn_idx = env->insn_idx;
fd978bf7 8516
17a52670 8517 if (class == BPF_ALU || class == BPF_ALU64) {
1be7f75d 8518 err = check_alu_op(env, insn);
17a52670
AS
8519 if (err)
8520 return err;
8521
8522 } else if (class == BPF_LDX) {
3df126f3 8523 enum bpf_reg_type *prev_src_type, src_reg_type;
9bac3d6d
AS
8524
8525 /* check for reserved fields is already done */
8526
17a52670 8527 /* check src operand */
dc503a8a 8528 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
8529 if (err)
8530 return err;
8531
dc503a8a 8532 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
17a52670
AS
8533 if (err)
8534 return err;
8535
725f9dcd
AS
8536 src_reg_type = regs[insn->src_reg].type;
8537
17a52670
AS
8538 /* check that memory (src_reg + off) is readable,
8539 * the state of dst_reg will be updated by this func
8540 */
c08435ec
DB
8541 err = check_mem_access(env, env->insn_idx, insn->src_reg,
8542 insn->off, BPF_SIZE(insn->code),
8543 BPF_READ, insn->dst_reg, false);
17a52670
AS
8544 if (err)
8545 return err;
8546
c08435ec 8547 prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
3df126f3
JK
8548
8549 if (*prev_src_type == NOT_INIT) {
9bac3d6d
AS
8550 /* saw a valid insn
8551 * dst_reg = *(u32 *)(src_reg + off)
3df126f3 8552 * save type to validate intersecting paths
9bac3d6d 8553 */
3df126f3 8554 *prev_src_type = src_reg_type;
9bac3d6d 8555
c64b7983 8556 } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
9bac3d6d
AS
8557 /* ABuser program is trying to use the same insn
8558 * dst_reg = *(u32*) (src_reg + off)
8559 * with different pointer types:
8560 * src_reg == ctx in one branch and
8561 * src_reg == stack|map in some other branch.
8562 * Reject it.
8563 */
61bd5218 8564 verbose(env, "same insn cannot be used with different pointers\n");
9bac3d6d
AS
8565 return -EINVAL;
8566 }
8567
17a52670 8568 } else if (class == BPF_STX) {
3df126f3 8569 enum bpf_reg_type *prev_dst_type, dst_reg_type;
d691f9e8 8570
17a52670 8571 if (BPF_MODE(insn->code) == BPF_XADD) {
c08435ec 8572 err = check_xadd(env, env->insn_idx, insn);
17a52670
AS
8573 if (err)
8574 return err;
c08435ec 8575 env->insn_idx++;
17a52670
AS
8576 continue;
8577 }
8578
17a52670 8579 /* check src1 operand */
dc503a8a 8580 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
8581 if (err)
8582 return err;
8583 /* check src2 operand */
dc503a8a 8584 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
8585 if (err)
8586 return err;
8587
d691f9e8
AS
8588 dst_reg_type = regs[insn->dst_reg].type;
8589
17a52670 8590 /* check that memory (dst_reg + off) is writeable */
c08435ec
DB
8591 err = check_mem_access(env, env->insn_idx, insn->dst_reg,
8592 insn->off, BPF_SIZE(insn->code),
8593 BPF_WRITE, insn->src_reg, false);
17a52670
AS
8594 if (err)
8595 return err;
8596
c08435ec 8597 prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
3df126f3
JK
8598
8599 if (*prev_dst_type == NOT_INIT) {
8600 *prev_dst_type = dst_reg_type;
c64b7983 8601 } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
61bd5218 8602 verbose(env, "same insn cannot be used with different pointers\n");
d691f9e8
AS
8603 return -EINVAL;
8604 }
8605
17a52670
AS
8606 } else if (class == BPF_ST) {
8607 if (BPF_MODE(insn->code) != BPF_MEM ||
8608 insn->src_reg != BPF_REG_0) {
61bd5218 8609 verbose(env, "BPF_ST uses reserved fields\n");
17a52670
AS
8610 return -EINVAL;
8611 }
8612 /* check src operand */
dc503a8a 8613 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
8614 if (err)
8615 return err;
8616
f37a8cb8 8617 if (is_ctx_reg(env, insn->dst_reg)) {
9d2be44a 8618 verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
2a159c6f
DB
8619 insn->dst_reg,
8620 reg_type_str[reg_state(env, insn->dst_reg)->type]);
f37a8cb8
DB
8621 return -EACCES;
8622 }
8623
17a52670 8624 /* check that memory (dst_reg + off) is writeable */
c08435ec
DB
8625 err = check_mem_access(env, env->insn_idx, insn->dst_reg,
8626 insn->off, BPF_SIZE(insn->code),
8627 BPF_WRITE, -1, false);
17a52670
AS
8628 if (err)
8629 return err;
8630
092ed096 8631 } else if (class == BPF_JMP || class == BPF_JMP32) {
17a52670
AS
8632 u8 opcode = BPF_OP(insn->code);
8633
2589726d 8634 env->jmps_processed++;
17a52670
AS
8635 if (opcode == BPF_CALL) {
8636 if (BPF_SRC(insn->code) != BPF_K ||
8637 insn->off != 0 ||
f4d7e40a
AS
8638 (insn->src_reg != BPF_REG_0 &&
8639 insn->src_reg != BPF_PSEUDO_CALL) ||
092ed096
JW
8640 insn->dst_reg != BPF_REG_0 ||
8641 class == BPF_JMP32) {
61bd5218 8642 verbose(env, "BPF_CALL uses reserved fields\n");
17a52670
AS
8643 return -EINVAL;
8644 }
8645
d83525ca
AS
8646 if (env->cur_state->active_spin_lock &&
8647 (insn->src_reg == BPF_PSEUDO_CALL ||
8648 insn->imm != BPF_FUNC_spin_unlock)) {
8649 verbose(env, "function calls are not allowed while holding a lock\n");
8650 return -EINVAL;
8651 }
f4d7e40a 8652 if (insn->src_reg == BPF_PSEUDO_CALL)
c08435ec 8653 err = check_func_call(env, insn, &env->insn_idx);
f4d7e40a 8654 else
c08435ec 8655 err = check_helper_call(env, insn->imm, env->insn_idx);
17a52670
AS
8656 if (err)
8657 return err;
8658
8659 } else if (opcode == BPF_JA) {
8660 if (BPF_SRC(insn->code) != BPF_K ||
8661 insn->imm != 0 ||
8662 insn->src_reg != BPF_REG_0 ||
092ed096
JW
8663 insn->dst_reg != BPF_REG_0 ||
8664 class == BPF_JMP32) {
61bd5218 8665 verbose(env, "BPF_JA uses reserved fields\n");
17a52670
AS
8666 return -EINVAL;
8667 }
8668
c08435ec 8669 env->insn_idx += insn->off + 1;
17a52670
AS
8670 continue;
8671
8672 } else if (opcode == BPF_EXIT) {
8673 if (BPF_SRC(insn->code) != BPF_K ||
8674 insn->imm != 0 ||
8675 insn->src_reg != BPF_REG_0 ||
092ed096
JW
8676 insn->dst_reg != BPF_REG_0 ||
8677 class == BPF_JMP32) {
61bd5218 8678 verbose(env, "BPF_EXIT uses reserved fields\n");
17a52670
AS
8679 return -EINVAL;
8680 }
8681
d83525ca
AS
8682 if (env->cur_state->active_spin_lock) {
8683 verbose(env, "bpf_spin_unlock is missing\n");
8684 return -EINVAL;
8685 }
8686
f4d7e40a
AS
8687 if (state->curframe) {
8688 /* exit from nested function */
c08435ec 8689 err = prepare_func_exit(env, &env->insn_idx);
f4d7e40a
AS
8690 if (err)
8691 return err;
8692 do_print_state = true;
8693 continue;
8694 }
8695
fd978bf7
JS
8696 err = check_reference_leak(env);
8697 if (err)
8698 return err;
8699
390ee7e2
AS
8700 err = check_return_code(env);
8701 if (err)
8702 return err;
f1bca824 8703process_bpf_exit:
2589726d 8704 update_branch_counts(env, env->cur_state);
b5dc0163 8705 err = pop_stack(env, &prev_insn_idx,
6f8a57cc 8706 &env->insn_idx, pop_log);
638f5b90
AS
8707 if (err < 0) {
8708 if (err != -ENOENT)
8709 return err;
17a52670
AS
8710 break;
8711 } else {
8712 do_print_state = true;
8713 continue;
8714 }
8715 } else {
c08435ec 8716 err = check_cond_jmp_op(env, insn, &env->insn_idx);
17a52670
AS
8717 if (err)
8718 return err;
8719 }
8720 } else if (class == BPF_LD) {
8721 u8 mode = BPF_MODE(insn->code);
8722
8723 if (mode == BPF_ABS || mode == BPF_IND) {
ddd872bc
AS
8724 err = check_ld_abs(env, insn);
8725 if (err)
8726 return err;
8727
17a52670
AS
8728 } else if (mode == BPF_IMM) {
8729 err = check_ld_imm(env, insn);
8730 if (err)
8731 return err;
8732
c08435ec 8733 env->insn_idx++;
51c39bb1 8734 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
17a52670 8735 } else {
61bd5218 8736 verbose(env, "invalid BPF_LD mode\n");
17a52670
AS
8737 return -EINVAL;
8738 }
8739 } else {
61bd5218 8740 verbose(env, "unknown insn class %d\n", class);
17a52670
AS
8741 return -EINVAL;
8742 }
8743
c08435ec 8744 env->insn_idx++;
17a52670
AS
8745 }
8746
8747 return 0;
8748}
8749
56f668df
MKL
8750static int check_map_prealloc(struct bpf_map *map)
8751{
8752 return (map->map_type != BPF_MAP_TYPE_HASH &&
bcc6b1b7
MKL
8753 map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
8754 map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
56f668df
MKL
8755 !(map->map_flags & BPF_F_NO_PREALLOC);
8756}
8757
d83525ca
AS
8758static bool is_tracing_prog_type(enum bpf_prog_type type)
8759{
8760 switch (type) {
8761 case BPF_PROG_TYPE_KPROBE:
8762 case BPF_PROG_TYPE_TRACEPOINT:
8763 case BPF_PROG_TYPE_PERF_EVENT:
8764 case BPF_PROG_TYPE_RAW_TRACEPOINT:
8765 return true;
8766 default:
8767 return false;
8768 }
8769}
8770
94dacdbd
TG
8771static bool is_preallocated_map(struct bpf_map *map)
8772{
8773 if (!check_map_prealloc(map))
8774 return false;
8775 if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta))
8776 return false;
8777 return true;
8778}
8779
61bd5218
JK
8780static int check_map_prog_compatibility(struct bpf_verifier_env *env,
8781 struct bpf_map *map,
fdc15d38
AS
8782 struct bpf_prog *prog)
8783
8784{
94dacdbd
TG
8785 /*
8786 * Validate that trace type programs use preallocated hash maps.
8787 *
8788 * For programs attached to PERF events this is mandatory as the
8789 * perf NMI can hit any arbitrary code sequence.
8790 *
8791 * All other trace types using preallocated hash maps are unsafe as
8792 * well because tracepoint or kprobes can be inside locked regions
8793 * of the memory allocator or at a place where a recursion into the
8794 * memory allocator would see inconsistent state.
8795 *
2ed905c5
TG
8796 * On RT enabled kernels run-time allocation of all trace type
8797 * programs is strictly prohibited due to lock type constraints. On
8798 * !RT kernels it is allowed for backwards compatibility reasons for
8799 * now, but warnings are emitted so developers are made aware of
8800 * the unsafety and can fix their programs before this is enforced.
56f668df 8801 */
94dacdbd
TG
8802 if (is_tracing_prog_type(prog->type) && !is_preallocated_map(map)) {
8803 if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
61bd5218 8804 verbose(env, "perf_event programs can only use preallocated hash map\n");
56f668df
MKL
8805 return -EINVAL;
8806 }
2ed905c5
TG
8807 if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
8808 verbose(env, "trace type programs can only use preallocated hash map\n");
8809 return -EINVAL;
8810 }
94dacdbd
TG
8811 WARN_ONCE(1, "trace type BPF program uses run-time allocation\n");
8812 verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
fdc15d38 8813 }
a3884572 8814
d83525ca
AS
8815 if ((is_tracing_prog_type(prog->type) ||
8816 prog->type == BPF_PROG_TYPE_SOCKET_FILTER) &&
8817 map_value_has_spin_lock(map)) {
8818 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
8819 return -EINVAL;
8820 }
8821
a3884572 8822 if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
09728266 8823 !bpf_offload_prog_map_match(prog, map)) {
a3884572
JK
8824 verbose(env, "offload device mismatch between prog and map\n");
8825 return -EINVAL;
8826 }
8827
85d33df3
MKL
8828 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
8829 verbose(env, "bpf_struct_ops map cannot be used in prog\n");
8830 return -EINVAL;
8831 }
8832
fdc15d38
AS
8833 return 0;
8834}
8835
b741f163
RG
8836static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
8837{
8838 return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
8839 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
8840}
8841
0246e64d
AS
8842/* look for pseudo eBPF instructions that access map FDs and
8843 * replace them with actual map pointers
8844 */
58e2af8b 8845static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
0246e64d
AS
8846{
8847 struct bpf_insn *insn = env->prog->insnsi;
8848 int insn_cnt = env->prog->len;
fdc15d38 8849 int i, j, err;
0246e64d 8850
f1f7714e 8851 err = bpf_prog_calc_tag(env->prog);
aafe6ae9
DB
8852 if (err)
8853 return err;
8854
0246e64d 8855 for (i = 0; i < insn_cnt; i++, insn++) {
9bac3d6d 8856 if (BPF_CLASS(insn->code) == BPF_LDX &&
d691f9e8 8857 (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
61bd5218 8858 verbose(env, "BPF_LDX uses reserved fields\n");
9bac3d6d
AS
8859 return -EINVAL;
8860 }
8861
d691f9e8
AS
8862 if (BPF_CLASS(insn->code) == BPF_STX &&
8863 ((BPF_MODE(insn->code) != BPF_MEM &&
8864 BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
61bd5218 8865 verbose(env, "BPF_STX uses reserved fields\n");
d691f9e8
AS
8866 return -EINVAL;
8867 }
8868
0246e64d 8869 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
d8eca5bb 8870 struct bpf_insn_aux_data *aux;
0246e64d
AS
8871 struct bpf_map *map;
8872 struct fd f;
d8eca5bb 8873 u64 addr;
0246e64d
AS
8874
8875 if (i == insn_cnt - 1 || insn[1].code != 0 ||
8876 insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
8877 insn[1].off != 0) {
61bd5218 8878 verbose(env, "invalid bpf_ld_imm64 insn\n");
0246e64d
AS
8879 return -EINVAL;
8880 }
8881
d8eca5bb 8882 if (insn[0].src_reg == 0)
0246e64d
AS
8883 /* valid generic load 64-bit imm */
8884 goto next_insn;
8885
d8eca5bb
DB
8886 /* In final convert_pseudo_ld_imm64() step, this is
8887 * converted into regular 64-bit imm load insn.
8888 */
8889 if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD &&
8890 insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
8891 (insn[0].src_reg == BPF_PSEUDO_MAP_FD &&
8892 insn[1].imm != 0)) {
8893 verbose(env,
8894 "unrecognized bpf_ld_imm64 insn\n");
0246e64d
AS
8895 return -EINVAL;
8896 }
8897
20182390 8898 f = fdget(insn[0].imm);
c2101297 8899 map = __bpf_map_get(f);
0246e64d 8900 if (IS_ERR(map)) {
61bd5218 8901 verbose(env, "fd %d is not pointing to valid bpf_map\n",
20182390 8902 insn[0].imm);
0246e64d
AS
8903 return PTR_ERR(map);
8904 }
8905
61bd5218 8906 err = check_map_prog_compatibility(env, map, env->prog);
fdc15d38
AS
8907 if (err) {
8908 fdput(f);
8909 return err;
8910 }
8911
d8eca5bb
DB
8912 aux = &env->insn_aux_data[i];
8913 if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
8914 addr = (unsigned long)map;
8915 } else {
8916 u32 off = insn[1].imm;
8917
8918 if (off >= BPF_MAX_VAR_OFF) {
8919 verbose(env, "direct value offset of %u is not allowed\n", off);
8920 fdput(f);
8921 return -EINVAL;
8922 }
8923
8924 if (!map->ops->map_direct_value_addr) {
8925 verbose(env, "no direct value access support for this map type\n");
8926 fdput(f);
8927 return -EINVAL;
8928 }
8929
8930 err = map->ops->map_direct_value_addr(map, &addr, off);
8931 if (err) {
8932 verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
8933 map->value_size, off);
8934 fdput(f);
8935 return err;
8936 }
8937
8938 aux->map_off = off;
8939 addr += off;
8940 }
8941
8942 insn[0].imm = (u32)addr;
8943 insn[1].imm = addr >> 32;
0246e64d
AS
8944
8945 /* check whether we recorded this map already */
d8eca5bb 8946 for (j = 0; j < env->used_map_cnt; j++) {
0246e64d 8947 if (env->used_maps[j] == map) {
d8eca5bb 8948 aux->map_index = j;
0246e64d
AS
8949 fdput(f);
8950 goto next_insn;
8951 }
d8eca5bb 8952 }
0246e64d
AS
8953
8954 if (env->used_map_cnt >= MAX_USED_MAPS) {
8955 fdput(f);
8956 return -E2BIG;
8957 }
8958
0246e64d
AS
8959 /* hold the map. If the program is rejected by verifier,
8960 * the map will be released by release_maps() or it
8961 * will be used by the valid program until it's unloaded
ab7f5bf0 8962 * and all maps are released in free_used_maps()
0246e64d 8963 */
1e0bd5a0 8964 bpf_map_inc(map);
d8eca5bb
DB
8965
8966 aux->map_index = env->used_map_cnt;
92117d84
AS
8967 env->used_maps[env->used_map_cnt++] = map;
8968
b741f163 8969 if (bpf_map_is_cgroup_storage(map) &&
e4730423 8970 bpf_cgroup_storage_assign(env->prog->aux, map)) {
b741f163 8971 verbose(env, "only one cgroup storage of each type is allowed\n");
de9cbbaa
RG
8972 fdput(f);
8973 return -EBUSY;
8974 }
8975
0246e64d
AS
8976 fdput(f);
8977next_insn:
8978 insn++;
8979 i++;
5e581dad
DB
8980 continue;
8981 }
8982
8983 /* Basic sanity check before we invest more work here. */
8984 if (!bpf_opcode_in_insntable(insn->code)) {
8985 verbose(env, "unknown opcode %02x\n", insn->code);
8986 return -EINVAL;
0246e64d
AS
8987 }
8988 }
8989
8990 /* now all pseudo BPF_LD_IMM64 instructions load valid
8991 * 'struct bpf_map *' into a register instead of user map_fd.
8992 * These pointers will be used later by verifier to validate map access.
8993 */
8994 return 0;
8995}
8996
8997/* drop refcnt of maps used by the rejected program */
58e2af8b 8998static void release_maps(struct bpf_verifier_env *env)
0246e64d 8999{
a2ea0746
DB
9000 __bpf_free_used_maps(env->prog->aux, env->used_maps,
9001 env->used_map_cnt);
0246e64d
AS
9002}
9003
9004/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
58e2af8b 9005static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
0246e64d
AS
9006{
9007 struct bpf_insn *insn = env->prog->insnsi;
9008 int insn_cnt = env->prog->len;
9009 int i;
9010
9011 for (i = 0; i < insn_cnt; i++, insn++)
9012 if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
9013 insn->src_reg = 0;
9014}
9015
8041902d
AS
9016/* single env->prog->insni[off] instruction was replaced with the range
9017 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
9018 * [0, off) and [off, end) to new locations, so the patched range stays zero
9019 */
b325fbca
JW
9020static int adjust_insn_aux_data(struct bpf_verifier_env *env,
9021 struct bpf_prog *new_prog, u32 off, u32 cnt)
8041902d
AS
9022{
9023 struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
b325fbca
JW
9024 struct bpf_insn *insn = new_prog->insnsi;
9025 u32 prog_len;
c131187d 9026 int i;
8041902d 9027
b325fbca
JW
9028 /* aux info at OFF always needs adjustment, no matter fast path
9029 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
9030 * original insn at old prog.
9031 */
9032 old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
9033
8041902d
AS
9034 if (cnt == 1)
9035 return 0;
b325fbca 9036 prog_len = new_prog->len;
fad953ce
KC
9037 new_data = vzalloc(array_size(prog_len,
9038 sizeof(struct bpf_insn_aux_data)));
8041902d
AS
9039 if (!new_data)
9040 return -ENOMEM;
9041 memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
9042 memcpy(new_data + off + cnt - 1, old_data + off,
9043 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
b325fbca 9044 for (i = off; i < off + cnt - 1; i++) {
51c39bb1 9045 new_data[i].seen = env->pass_cnt;
b325fbca
JW
9046 new_data[i].zext_dst = insn_has_def32(env, insn + i);
9047 }
8041902d
AS
9048 env->insn_aux_data = new_data;
9049 vfree(old_data);
9050 return 0;
9051}
9052
cc8b0b92
AS
9053static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
9054{
9055 int i;
9056
9057 if (len == 1)
9058 return;
4cb3d99c
JW
9059 /* NOTE: fake 'exit' subprog should be updated as well. */
9060 for (i = 0; i <= env->subprog_cnt; i++) {
afd59424 9061 if (env->subprog_info[i].start <= off)
cc8b0b92 9062 continue;
9c8105bd 9063 env->subprog_info[i].start += len - 1;
cc8b0b92
AS
9064 }
9065}
9066
8041902d
AS
9067static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
9068 const struct bpf_insn *patch, u32 len)
9069{
9070 struct bpf_prog *new_prog;
9071
9072 new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
4f73379e
AS
9073 if (IS_ERR(new_prog)) {
9074 if (PTR_ERR(new_prog) == -ERANGE)
9075 verbose(env,
9076 "insn %d cannot be patched due to 16-bit range\n",
9077 env->insn_aux_data[off].orig_idx);
8041902d 9078 return NULL;
4f73379e 9079 }
b325fbca 9080 if (adjust_insn_aux_data(env, new_prog, off, len))
8041902d 9081 return NULL;
cc8b0b92 9082 adjust_subprog_starts(env, off, len);
8041902d
AS
9083 return new_prog;
9084}
9085
52875a04
JK
9086static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
9087 u32 off, u32 cnt)
9088{
9089 int i, j;
9090
9091 /* find first prog starting at or after off (first to remove) */
9092 for (i = 0; i < env->subprog_cnt; i++)
9093 if (env->subprog_info[i].start >= off)
9094 break;
9095 /* find first prog starting at or after off + cnt (first to stay) */
9096 for (j = i; j < env->subprog_cnt; j++)
9097 if (env->subprog_info[j].start >= off + cnt)
9098 break;
9099 /* if j doesn't start exactly at off + cnt, we are just removing
9100 * the front of previous prog
9101 */
9102 if (env->subprog_info[j].start != off + cnt)
9103 j--;
9104
9105 if (j > i) {
9106 struct bpf_prog_aux *aux = env->prog->aux;
9107 int move;
9108
9109 /* move fake 'exit' subprog as well */
9110 move = env->subprog_cnt + 1 - j;
9111
9112 memmove(env->subprog_info + i,
9113 env->subprog_info + j,
9114 sizeof(*env->subprog_info) * move);
9115 env->subprog_cnt -= j - i;
9116
9117 /* remove func_info */
9118 if (aux->func_info) {
9119 move = aux->func_info_cnt - j;
9120
9121 memmove(aux->func_info + i,
9122 aux->func_info + j,
9123 sizeof(*aux->func_info) * move);
9124 aux->func_info_cnt -= j - i;
9125 /* func_info->insn_off is set after all code rewrites,
9126 * in adjust_btf_func() - no need to adjust
9127 */
9128 }
9129 } else {
9130 /* convert i from "first prog to remove" to "first to adjust" */
9131 if (env->subprog_info[i].start == off)
9132 i++;
9133 }
9134
9135 /* update fake 'exit' subprog as well */
9136 for (; i <= env->subprog_cnt; i++)
9137 env->subprog_info[i].start -= cnt;
9138
9139 return 0;
9140}
9141
9142static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
9143 u32 cnt)
9144{
9145 struct bpf_prog *prog = env->prog;
9146 u32 i, l_off, l_cnt, nr_linfo;
9147 struct bpf_line_info *linfo;
9148
9149 nr_linfo = prog->aux->nr_linfo;
9150 if (!nr_linfo)
9151 return 0;
9152
9153 linfo = prog->aux->linfo;
9154
9155 /* find first line info to remove, count lines to be removed */
9156 for (i = 0; i < nr_linfo; i++)
9157 if (linfo[i].insn_off >= off)
9158 break;
9159
9160 l_off = i;
9161 l_cnt = 0;
9162 for (; i < nr_linfo; i++)
9163 if (linfo[i].insn_off < off + cnt)
9164 l_cnt++;
9165 else
9166 break;
9167
9168 /* First live insn doesn't match first live linfo, it needs to "inherit"
9169 * last removed linfo. prog is already modified, so prog->len == off
9170 * means no live instructions after (tail of the program was removed).
9171 */
9172 if (prog->len != off && l_cnt &&
9173 (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
9174 l_cnt--;
9175 linfo[--i].insn_off = off + cnt;
9176 }
9177
9178 /* remove the line info which refer to the removed instructions */
9179 if (l_cnt) {
9180 memmove(linfo + l_off, linfo + i,
9181 sizeof(*linfo) * (nr_linfo - i));
9182
9183 prog->aux->nr_linfo -= l_cnt;
9184 nr_linfo = prog->aux->nr_linfo;
9185 }
9186
9187 /* pull all linfo[i].insn_off >= off + cnt in by cnt */
9188 for (i = l_off; i < nr_linfo; i++)
9189 linfo[i].insn_off -= cnt;
9190
9191 /* fix up all subprogs (incl. 'exit') which start >= off */
9192 for (i = 0; i <= env->subprog_cnt; i++)
9193 if (env->subprog_info[i].linfo_idx > l_off) {
9194 /* program may have started in the removed region but
9195 * may not be fully removed
9196 */
9197 if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
9198 env->subprog_info[i].linfo_idx -= l_cnt;
9199 else
9200 env->subprog_info[i].linfo_idx = l_off;
9201 }
9202
9203 return 0;
9204}
9205
9206static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
9207{
9208 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
9209 unsigned int orig_prog_len = env->prog->len;
9210 int err;
9211
08ca90af
JK
9212 if (bpf_prog_is_dev_bound(env->prog->aux))
9213 bpf_prog_offload_remove_insns(env, off, cnt);
9214
52875a04
JK
9215 err = bpf_remove_insns(env->prog, off, cnt);
9216 if (err)
9217 return err;
9218
9219 err = adjust_subprog_starts_after_remove(env, off, cnt);
9220 if (err)
9221 return err;
9222
9223 err = bpf_adj_linfo_after_remove(env, off, cnt);
9224 if (err)
9225 return err;
9226
9227 memmove(aux_data + off, aux_data + off + cnt,
9228 sizeof(*aux_data) * (orig_prog_len - off - cnt));
9229
9230 return 0;
9231}
9232
2a5418a1
DB
9233/* The verifier does more data flow analysis than llvm and will not
9234 * explore branches that are dead at run time. Malicious programs can
9235 * have dead code too. Therefore replace all dead at-run-time code
9236 * with 'ja -1'.
9237 *
9238 * Just nops are not optimal, e.g. if they would sit at the end of the
9239 * program and through another bug we would manage to jump there, then
9240 * we'd execute beyond program memory otherwise. Returning exception
9241 * code also wouldn't work since we can have subprogs where the dead
9242 * code could be located.
c131187d
AS
9243 */
9244static void sanitize_dead_code(struct bpf_verifier_env *env)
9245{
9246 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
2a5418a1 9247 struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
c131187d
AS
9248 struct bpf_insn *insn = env->prog->insnsi;
9249 const int insn_cnt = env->prog->len;
9250 int i;
9251
9252 for (i = 0; i < insn_cnt; i++) {
9253 if (aux_data[i].seen)
9254 continue;
2a5418a1 9255 memcpy(insn + i, &trap, sizeof(trap));
c131187d
AS
9256 }
9257}
9258
e2ae4ca2
JK
9259static bool insn_is_cond_jump(u8 code)
9260{
9261 u8 op;
9262
092ed096
JW
9263 if (BPF_CLASS(code) == BPF_JMP32)
9264 return true;
9265
e2ae4ca2
JK
9266 if (BPF_CLASS(code) != BPF_JMP)
9267 return false;
9268
9269 op = BPF_OP(code);
9270 return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
9271}
9272
9273static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
9274{
9275 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
9276 struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
9277 struct bpf_insn *insn = env->prog->insnsi;
9278 const int insn_cnt = env->prog->len;
9279 int i;
9280
9281 for (i = 0; i < insn_cnt; i++, insn++) {
9282 if (!insn_is_cond_jump(insn->code))
9283 continue;
9284
9285 if (!aux_data[i + 1].seen)
9286 ja.off = insn->off;
9287 else if (!aux_data[i + 1 + insn->off].seen)
9288 ja.off = 0;
9289 else
9290 continue;
9291
08ca90af
JK
9292 if (bpf_prog_is_dev_bound(env->prog->aux))
9293 bpf_prog_offload_replace_insn(env, i, &ja);
9294
e2ae4ca2
JK
9295 memcpy(insn, &ja, sizeof(ja));
9296 }
9297}
9298
52875a04
JK
9299static int opt_remove_dead_code(struct bpf_verifier_env *env)
9300{
9301 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
9302 int insn_cnt = env->prog->len;
9303 int i, err;
9304
9305 for (i = 0; i < insn_cnt; i++) {
9306 int j;
9307
9308 j = 0;
9309 while (i + j < insn_cnt && !aux_data[i + j].seen)
9310 j++;
9311 if (!j)
9312 continue;
9313
9314 err = verifier_remove_insns(env, i, j);
9315 if (err)
9316 return err;
9317 insn_cnt = env->prog->len;
9318 }
9319
9320 return 0;
9321}
9322
a1b14abc
JK
9323static int opt_remove_nops(struct bpf_verifier_env *env)
9324{
9325 const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
9326 struct bpf_insn *insn = env->prog->insnsi;
9327 int insn_cnt = env->prog->len;
9328 int i, err;
9329
9330 for (i = 0; i < insn_cnt; i++) {
9331 if (memcmp(&insn[i], &ja, sizeof(ja)))
9332 continue;
9333
9334 err = verifier_remove_insns(env, i, 1);
9335 if (err)
9336 return err;
9337 insn_cnt--;
9338 i--;
9339 }
9340
9341 return 0;
9342}
9343
d6c2308c
JW
9344static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
9345 const union bpf_attr *attr)
a4b1d3c1 9346{
d6c2308c 9347 struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
a4b1d3c1 9348 struct bpf_insn_aux_data *aux = env->insn_aux_data;
d6c2308c 9349 int i, patch_len, delta = 0, len = env->prog->len;
a4b1d3c1 9350 struct bpf_insn *insns = env->prog->insnsi;
a4b1d3c1 9351 struct bpf_prog *new_prog;
d6c2308c 9352 bool rnd_hi32;
a4b1d3c1 9353
d6c2308c 9354 rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
a4b1d3c1 9355 zext_patch[1] = BPF_ZEXT_REG(0);
d6c2308c
JW
9356 rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
9357 rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
9358 rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
a4b1d3c1
JW
9359 for (i = 0; i < len; i++) {
9360 int adj_idx = i + delta;
9361 struct bpf_insn insn;
9362
d6c2308c
JW
9363 insn = insns[adj_idx];
9364 if (!aux[adj_idx].zext_dst) {
9365 u8 code, class;
9366 u32 imm_rnd;
9367
9368 if (!rnd_hi32)
9369 continue;
9370
9371 code = insn.code;
9372 class = BPF_CLASS(code);
9373 if (insn_no_def(&insn))
9374 continue;
9375
9376 /* NOTE: arg "reg" (the fourth one) is only used for
9377 * BPF_STX which has been ruled out in above
9378 * check, it is safe to pass NULL here.
9379 */
9380 if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) {
9381 if (class == BPF_LD &&
9382 BPF_MODE(code) == BPF_IMM)
9383 i++;
9384 continue;
9385 }
9386
9387 /* ctx load could be transformed into wider load. */
9388 if (class == BPF_LDX &&
9389 aux[adj_idx].ptr_type == PTR_TO_CTX)
9390 continue;
9391
9392 imm_rnd = get_random_int();
9393 rnd_hi32_patch[0] = insn;
9394 rnd_hi32_patch[1].imm = imm_rnd;
9395 rnd_hi32_patch[3].dst_reg = insn.dst_reg;
9396 patch = rnd_hi32_patch;
9397 patch_len = 4;
9398 goto apply_patch_buffer;
9399 }
9400
9401 if (!bpf_jit_needs_zext())
a4b1d3c1
JW
9402 continue;
9403
a4b1d3c1
JW
9404 zext_patch[0] = insn;
9405 zext_patch[1].dst_reg = insn.dst_reg;
9406 zext_patch[1].src_reg = insn.dst_reg;
d6c2308c
JW
9407 patch = zext_patch;
9408 patch_len = 2;
9409apply_patch_buffer:
9410 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
a4b1d3c1
JW
9411 if (!new_prog)
9412 return -ENOMEM;
9413 env->prog = new_prog;
9414 insns = new_prog->insnsi;
9415 aux = env->insn_aux_data;
d6c2308c 9416 delta += patch_len - 1;
a4b1d3c1
JW
9417 }
9418
9419 return 0;
9420}
9421
c64b7983
JS
9422/* convert load instructions that access fields of a context type into a
9423 * sequence of instructions that access fields of the underlying structure:
9424 * struct __sk_buff -> struct sk_buff
9425 * struct bpf_sock_ops -> struct sock
9bac3d6d 9426 */
58e2af8b 9427static int convert_ctx_accesses(struct bpf_verifier_env *env)
9bac3d6d 9428{
00176a34 9429 const struct bpf_verifier_ops *ops = env->ops;
f96da094 9430 int i, cnt, size, ctx_field_size, delta = 0;
3df126f3 9431 const int insn_cnt = env->prog->len;
36bbef52 9432 struct bpf_insn insn_buf[16], *insn;
46f53a65 9433 u32 target_size, size_default, off;
9bac3d6d 9434 struct bpf_prog *new_prog;
d691f9e8 9435 enum bpf_access_type type;
f96da094 9436 bool is_narrower_load;
9bac3d6d 9437
b09928b9
DB
9438 if (ops->gen_prologue || env->seen_direct_write) {
9439 if (!ops->gen_prologue) {
9440 verbose(env, "bpf verifier is misconfigured\n");
9441 return -EINVAL;
9442 }
36bbef52
DB
9443 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
9444 env->prog);
9445 if (cnt >= ARRAY_SIZE(insn_buf)) {
61bd5218 9446 verbose(env, "bpf verifier is misconfigured\n");
36bbef52
DB
9447 return -EINVAL;
9448 } else if (cnt) {
8041902d 9449 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
36bbef52
DB
9450 if (!new_prog)
9451 return -ENOMEM;
8041902d 9452
36bbef52 9453 env->prog = new_prog;
3df126f3 9454 delta += cnt - 1;
36bbef52
DB
9455 }
9456 }
9457
c64b7983 9458 if (bpf_prog_is_dev_bound(env->prog->aux))
9bac3d6d
AS
9459 return 0;
9460
3df126f3 9461 insn = env->prog->insnsi + delta;
36bbef52 9462
9bac3d6d 9463 for (i = 0; i < insn_cnt; i++, insn++) {
c64b7983
JS
9464 bpf_convert_ctx_access_t convert_ctx_access;
9465
62c7989b
DB
9466 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
9467 insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
9468 insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
ea2e7ce5 9469 insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
d691f9e8 9470 type = BPF_READ;
62c7989b
DB
9471 else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
9472 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
9473 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
ea2e7ce5 9474 insn->code == (BPF_STX | BPF_MEM | BPF_DW))
d691f9e8
AS
9475 type = BPF_WRITE;
9476 else
9bac3d6d
AS
9477 continue;
9478
af86ca4e
AS
9479 if (type == BPF_WRITE &&
9480 env->insn_aux_data[i + delta].sanitize_stack_off) {
9481 struct bpf_insn patch[] = {
9482 /* Sanitize suspicious stack slot with zero.
9483 * There are no memory dependencies for this store,
9484 * since it's only using frame pointer and immediate
9485 * constant of zero
9486 */
9487 BPF_ST_MEM(BPF_DW, BPF_REG_FP,
9488 env->insn_aux_data[i + delta].sanitize_stack_off,
9489 0),
9490 /* the original STX instruction will immediately
9491 * overwrite the same stack slot with appropriate value
9492 */
9493 *insn,
9494 };
9495
9496 cnt = ARRAY_SIZE(patch);
9497 new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
9498 if (!new_prog)
9499 return -ENOMEM;
9500
9501 delta += cnt - 1;
9502 env->prog = new_prog;
9503 insn = new_prog->insnsi + i + delta;
9504 continue;
9505 }
9506
c64b7983
JS
9507 switch (env->insn_aux_data[i + delta].ptr_type) {
9508 case PTR_TO_CTX:
9509 if (!ops->convert_ctx_access)
9510 continue;
9511 convert_ctx_access = ops->convert_ctx_access;
9512 break;
9513 case PTR_TO_SOCKET:
46f8bc92 9514 case PTR_TO_SOCK_COMMON:
c64b7983
JS
9515 convert_ctx_access = bpf_sock_convert_ctx_access;
9516 break;
655a51e5
MKL
9517 case PTR_TO_TCP_SOCK:
9518 convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
9519 break;
fada7fdc
JL
9520 case PTR_TO_XDP_SOCK:
9521 convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
9522 break;
2a02759e 9523 case PTR_TO_BTF_ID:
27ae7997
MKL
9524 if (type == BPF_READ) {
9525 insn->code = BPF_LDX | BPF_PROBE_MEM |
9526 BPF_SIZE((insn)->code);
9527 env->prog->aux->num_exentries++;
9528 } else if (env->prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
2a02759e
AS
9529 verbose(env, "Writes through BTF pointers are not allowed\n");
9530 return -EINVAL;
9531 }
2a02759e 9532 continue;
c64b7983 9533 default:
9bac3d6d 9534 continue;
c64b7983 9535 }
9bac3d6d 9536
31fd8581 9537 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
f96da094 9538 size = BPF_LDST_BYTES(insn);
31fd8581
YS
9539
9540 /* If the read access is a narrower load of the field,
9541 * convert to a 4/8-byte load, to minimum program type specific
9542 * convert_ctx_access changes. If conversion is successful,
9543 * we will apply proper mask to the result.
9544 */
f96da094 9545 is_narrower_load = size < ctx_field_size;
46f53a65
AI
9546 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
9547 off = insn->off;
31fd8581 9548 if (is_narrower_load) {
f96da094
DB
9549 u8 size_code;
9550
9551 if (type == BPF_WRITE) {
61bd5218 9552 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
f96da094
DB
9553 return -EINVAL;
9554 }
31fd8581 9555
f96da094 9556 size_code = BPF_H;
31fd8581
YS
9557 if (ctx_field_size == 4)
9558 size_code = BPF_W;
9559 else if (ctx_field_size == 8)
9560 size_code = BPF_DW;
f96da094 9561
bc23105c 9562 insn->off = off & ~(size_default - 1);
31fd8581
YS
9563 insn->code = BPF_LDX | BPF_MEM | size_code;
9564 }
f96da094
DB
9565
9566 target_size = 0;
c64b7983
JS
9567 cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
9568 &target_size);
f96da094
DB
9569 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
9570 (ctx_field_size && !target_size)) {
61bd5218 9571 verbose(env, "bpf verifier is misconfigured\n");
9bac3d6d
AS
9572 return -EINVAL;
9573 }
f96da094
DB
9574
9575 if (is_narrower_load && size < target_size) {
d895a0f1
IL
9576 u8 shift = bpf_ctx_narrow_access_offset(
9577 off, size, size_default) * 8;
46f53a65
AI
9578 if (ctx_field_size <= 4) {
9579 if (shift)
9580 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
9581 insn->dst_reg,
9582 shift);
31fd8581 9583 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
f96da094 9584 (1 << size * 8) - 1);
46f53a65
AI
9585 } else {
9586 if (shift)
9587 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
9588 insn->dst_reg,
9589 shift);
31fd8581 9590 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
e2f7fc0a 9591 (1ULL << size * 8) - 1);
46f53a65 9592 }
31fd8581 9593 }
9bac3d6d 9594
8041902d 9595 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
9bac3d6d
AS
9596 if (!new_prog)
9597 return -ENOMEM;
9598
3df126f3 9599 delta += cnt - 1;
9bac3d6d
AS
9600
9601 /* keep walking new program and skip insns we just inserted */
9602 env->prog = new_prog;
3df126f3 9603 insn = new_prog->insnsi + i + delta;
9bac3d6d
AS
9604 }
9605
9606 return 0;
9607}
9608
1c2a088a
AS
9609static int jit_subprogs(struct bpf_verifier_env *env)
9610{
9611 struct bpf_prog *prog = env->prog, **func, *tmp;
9612 int i, j, subprog_start, subprog_end = 0, len, subprog;
7105e828 9613 struct bpf_insn *insn;
1c2a088a 9614 void *old_bpf_func;
c454a46b 9615 int err;
1c2a088a 9616
f910cefa 9617 if (env->subprog_cnt <= 1)
1c2a088a
AS
9618 return 0;
9619
7105e828 9620 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
1c2a088a
AS
9621 if (insn->code != (BPF_JMP | BPF_CALL) ||
9622 insn->src_reg != BPF_PSEUDO_CALL)
9623 continue;
c7a89784
DB
9624 /* Upon error here we cannot fall back to interpreter but
9625 * need a hard reject of the program. Thus -EFAULT is
9626 * propagated in any case.
9627 */
1c2a088a
AS
9628 subprog = find_subprog(env, i + insn->imm + 1);
9629 if (subprog < 0) {
9630 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
9631 i + insn->imm + 1);
9632 return -EFAULT;
9633 }
9634 /* temporarily remember subprog id inside insn instead of
9635 * aux_data, since next loop will split up all insns into funcs
9636 */
f910cefa 9637 insn->off = subprog;
1c2a088a
AS
9638 /* remember original imm in case JIT fails and fallback
9639 * to interpreter will be needed
9640 */
9641 env->insn_aux_data[i].call_imm = insn->imm;
9642 /* point imm to __bpf_call_base+1 from JITs point of view */
9643 insn->imm = 1;
9644 }
9645
c454a46b
MKL
9646 err = bpf_prog_alloc_jited_linfo(prog);
9647 if (err)
9648 goto out_undo_insn;
9649
9650 err = -ENOMEM;
6396bb22 9651 func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
1c2a088a 9652 if (!func)
c7a89784 9653 goto out_undo_insn;
1c2a088a 9654
f910cefa 9655 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a 9656 subprog_start = subprog_end;
4cb3d99c 9657 subprog_end = env->subprog_info[i + 1].start;
1c2a088a
AS
9658
9659 len = subprog_end - subprog_start;
492ecee8
AS
9660 /* BPF_PROG_RUN doesn't call subprogs directly,
9661 * hence main prog stats include the runtime of subprogs.
9662 * subprogs don't have IDs and not reachable via prog_get_next_id
9663 * func[i]->aux->stats will never be accessed and stays NULL
9664 */
9665 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
1c2a088a
AS
9666 if (!func[i])
9667 goto out_free;
9668 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
9669 len * sizeof(struct bpf_insn));
4f74d809 9670 func[i]->type = prog->type;
1c2a088a 9671 func[i]->len = len;
4f74d809
DB
9672 if (bpf_prog_calc_tag(func[i]))
9673 goto out_free;
1c2a088a 9674 func[i]->is_func = 1;
ba64e7d8
YS
9675 func[i]->aux->func_idx = i;
9676 /* the btf and func_info will be freed only at prog->aux */
9677 func[i]->aux->btf = prog->aux->btf;
9678 func[i]->aux->func_info = prog->aux->func_info;
9679
1c2a088a
AS
9680 /* Use bpf_prog_F_tag to indicate functions in stack traces.
9681 * Long term would need debug info to populate names
9682 */
9683 func[i]->aux->name[0] = 'F';
9c8105bd 9684 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
1c2a088a 9685 func[i]->jit_requested = 1;
c454a46b
MKL
9686 func[i]->aux->linfo = prog->aux->linfo;
9687 func[i]->aux->nr_linfo = prog->aux->nr_linfo;
9688 func[i]->aux->jited_linfo = prog->aux->jited_linfo;
9689 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
1c2a088a
AS
9690 func[i] = bpf_int_jit_compile(func[i]);
9691 if (!func[i]->jited) {
9692 err = -ENOTSUPP;
9693 goto out_free;
9694 }
9695 cond_resched();
9696 }
9697 /* at this point all bpf functions were successfully JITed
9698 * now populate all bpf_calls with correct addresses and
9699 * run last pass of JIT
9700 */
f910cefa 9701 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
9702 insn = func[i]->insnsi;
9703 for (j = 0; j < func[i]->len; j++, insn++) {
9704 if (insn->code != (BPF_JMP | BPF_CALL) ||
9705 insn->src_reg != BPF_PSEUDO_CALL)
9706 continue;
9707 subprog = insn->off;
0d306c31
PB
9708 insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) -
9709 __bpf_call_base;
1c2a088a 9710 }
2162fed4
SD
9711
9712 /* we use the aux data to keep a list of the start addresses
9713 * of the JITed images for each function in the program
9714 *
9715 * for some architectures, such as powerpc64, the imm field
9716 * might not be large enough to hold the offset of the start
9717 * address of the callee's JITed image from __bpf_call_base
9718 *
9719 * in such cases, we can lookup the start address of a callee
9720 * by using its subprog id, available from the off field of
9721 * the call instruction, as an index for this list
9722 */
9723 func[i]->aux->func = func;
9724 func[i]->aux->func_cnt = env->subprog_cnt;
1c2a088a 9725 }
f910cefa 9726 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
9727 old_bpf_func = func[i]->bpf_func;
9728 tmp = bpf_int_jit_compile(func[i]);
9729 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
9730 verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
c7a89784 9731 err = -ENOTSUPP;
1c2a088a
AS
9732 goto out_free;
9733 }
9734 cond_resched();
9735 }
9736
9737 /* finally lock prog and jit images for all functions and
9738 * populate kallsysm
9739 */
f910cefa 9740 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
9741 bpf_prog_lock_ro(func[i]);
9742 bpf_prog_kallsyms_add(func[i]);
9743 }
7105e828
DB
9744
9745 /* Last step: make now unused interpreter insns from main
9746 * prog consistent for later dump requests, so they can
9747 * later look the same as if they were interpreted only.
9748 */
9749 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
7105e828
DB
9750 if (insn->code != (BPF_JMP | BPF_CALL) ||
9751 insn->src_reg != BPF_PSEUDO_CALL)
9752 continue;
9753 insn->off = env->insn_aux_data[i].call_imm;
9754 subprog = find_subprog(env, i + insn->off + 1);
dbecd738 9755 insn->imm = subprog;
7105e828
DB
9756 }
9757
1c2a088a
AS
9758 prog->jited = 1;
9759 prog->bpf_func = func[0]->bpf_func;
9760 prog->aux->func = func;
f910cefa 9761 prog->aux->func_cnt = env->subprog_cnt;
c454a46b 9762 bpf_prog_free_unused_jited_linfo(prog);
1c2a088a
AS
9763 return 0;
9764out_free:
f910cefa 9765 for (i = 0; i < env->subprog_cnt; i++)
1c2a088a
AS
9766 if (func[i])
9767 bpf_jit_free(func[i]);
9768 kfree(func);
c7a89784 9769out_undo_insn:
1c2a088a
AS
9770 /* cleanup main prog to be interpreted */
9771 prog->jit_requested = 0;
9772 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
9773 if (insn->code != (BPF_JMP | BPF_CALL) ||
9774 insn->src_reg != BPF_PSEUDO_CALL)
9775 continue;
9776 insn->off = 0;
9777 insn->imm = env->insn_aux_data[i].call_imm;
9778 }
c454a46b 9779 bpf_prog_free_jited_linfo(prog);
1c2a088a
AS
9780 return err;
9781}
9782
1ea47e01
AS
9783static int fixup_call_args(struct bpf_verifier_env *env)
9784{
19d28fbd 9785#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
9786 struct bpf_prog *prog = env->prog;
9787 struct bpf_insn *insn = prog->insnsi;
9788 int i, depth;
19d28fbd 9789#endif
e4052d06 9790 int err = 0;
1ea47e01 9791
e4052d06
QM
9792 if (env->prog->jit_requested &&
9793 !bpf_prog_is_dev_bound(env->prog->aux)) {
19d28fbd
DM
9794 err = jit_subprogs(env);
9795 if (err == 0)
1c2a088a 9796 return 0;
c7a89784
DB
9797 if (err == -EFAULT)
9798 return err;
19d28fbd
DM
9799 }
9800#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
9801 for (i = 0; i < prog->len; i++, insn++) {
9802 if (insn->code != (BPF_JMP | BPF_CALL) ||
9803 insn->src_reg != BPF_PSEUDO_CALL)
9804 continue;
9805 depth = get_callee_stack_depth(env, insn, i);
9806 if (depth < 0)
9807 return depth;
9808 bpf_patch_call_args(insn, depth);
9809 }
19d28fbd
DM
9810 err = 0;
9811#endif
9812 return err;
1ea47e01
AS
9813}
9814
79741b3b 9815/* fixup insn->imm field of bpf_call instructions
81ed18ab 9816 * and inline eligible helpers as explicit sequence of BPF instructions
e245c5c6
AS
9817 *
9818 * this function is called after eBPF program passed verification
9819 */
79741b3b 9820static int fixup_bpf_calls(struct bpf_verifier_env *env)
e245c5c6 9821{
79741b3b 9822 struct bpf_prog *prog = env->prog;
d2e4c1e6 9823 bool expect_blinding = bpf_jit_blinding_enabled(prog);
79741b3b 9824 struct bpf_insn *insn = prog->insnsi;
e245c5c6 9825 const struct bpf_func_proto *fn;
79741b3b 9826 const int insn_cnt = prog->len;
09772d92 9827 const struct bpf_map_ops *ops;
c93552c4 9828 struct bpf_insn_aux_data *aux;
81ed18ab
AS
9829 struct bpf_insn insn_buf[16];
9830 struct bpf_prog *new_prog;
9831 struct bpf_map *map_ptr;
d2e4c1e6 9832 int i, ret, cnt, delta = 0;
e245c5c6 9833
79741b3b 9834 for (i = 0; i < insn_cnt; i++, insn++) {
f6b1b3bf
DB
9835 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
9836 insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
9837 insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
68fda450 9838 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
f6b1b3bf
DB
9839 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
9840 struct bpf_insn mask_and_div[] = {
9841 BPF_MOV32_REG(insn->src_reg, insn->src_reg),
9842 /* Rx div 0 -> 0 */
9843 BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
9844 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
9845 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
9846 *insn,
9847 };
9848 struct bpf_insn mask_and_mod[] = {
9849 BPF_MOV32_REG(insn->src_reg, insn->src_reg),
9850 /* Rx mod 0 -> Rx */
9851 BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
9852 *insn,
9853 };
9854 struct bpf_insn *patchlet;
9855
9856 if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
9857 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
9858 patchlet = mask_and_div + (is64 ? 1 : 0);
9859 cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
9860 } else {
9861 patchlet = mask_and_mod + (is64 ? 1 : 0);
9862 cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
9863 }
9864
9865 new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
68fda450
AS
9866 if (!new_prog)
9867 return -ENOMEM;
9868
9869 delta += cnt - 1;
9870 env->prog = prog = new_prog;
9871 insn = new_prog->insnsi + i + delta;
9872 continue;
9873 }
9874
e0cea7ce
DB
9875 if (BPF_CLASS(insn->code) == BPF_LD &&
9876 (BPF_MODE(insn->code) == BPF_ABS ||
9877 BPF_MODE(insn->code) == BPF_IND)) {
9878 cnt = env->ops->gen_ld_abs(insn, insn_buf);
9879 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
9880 verbose(env, "bpf verifier is misconfigured\n");
9881 return -EINVAL;
9882 }
9883
9884 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
9885 if (!new_prog)
9886 return -ENOMEM;
9887
9888 delta += cnt - 1;
9889 env->prog = prog = new_prog;
9890 insn = new_prog->insnsi + i + delta;
9891 continue;
9892 }
9893
979d63d5
DB
9894 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
9895 insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
9896 const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
9897 const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
9898 struct bpf_insn insn_buf[16];
9899 struct bpf_insn *patch = &insn_buf[0];
9900 bool issrc, isneg;
9901 u32 off_reg;
9902
9903 aux = &env->insn_aux_data[i + delta];
3612af78
DB
9904 if (!aux->alu_state ||
9905 aux->alu_state == BPF_ALU_NON_POINTER)
979d63d5
DB
9906 continue;
9907
9908 isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
9909 issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
9910 BPF_ALU_SANITIZE_SRC;
9911
9912 off_reg = issrc ? insn->src_reg : insn->dst_reg;
9913 if (isneg)
9914 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
9915 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1);
9916 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
9917 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
9918 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
9919 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
9920 if (issrc) {
9921 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX,
9922 off_reg);
9923 insn->src_reg = BPF_REG_AX;
9924 } else {
9925 *patch++ = BPF_ALU64_REG(BPF_AND, off_reg,
9926 BPF_REG_AX);
9927 }
9928 if (isneg)
9929 insn->code = insn->code == code_add ?
9930 code_sub : code_add;
9931 *patch++ = *insn;
9932 if (issrc && isneg)
9933 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
9934 cnt = patch - insn_buf;
9935
9936 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
9937 if (!new_prog)
9938 return -ENOMEM;
9939
9940 delta += cnt - 1;
9941 env->prog = prog = new_prog;
9942 insn = new_prog->insnsi + i + delta;
9943 continue;
9944 }
9945
79741b3b
AS
9946 if (insn->code != (BPF_JMP | BPF_CALL))
9947 continue;
cc8b0b92
AS
9948 if (insn->src_reg == BPF_PSEUDO_CALL)
9949 continue;
e245c5c6 9950
79741b3b
AS
9951 if (insn->imm == BPF_FUNC_get_route_realm)
9952 prog->dst_needed = 1;
9953 if (insn->imm == BPF_FUNC_get_prandom_u32)
9954 bpf_user_rnd_init_once();
9802d865
JB
9955 if (insn->imm == BPF_FUNC_override_return)
9956 prog->kprobe_override = 1;
79741b3b 9957 if (insn->imm == BPF_FUNC_tail_call) {
7b9f6da1
DM
9958 /* If we tail call into other programs, we
9959 * cannot make any assumptions since they can
9960 * be replaced dynamically during runtime in
9961 * the program array.
9962 */
9963 prog->cb_access = 1;
80a58d02 9964 env->prog->aux->stack_depth = MAX_BPF_STACK;
e647815a 9965 env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
7b9f6da1 9966
79741b3b
AS
9967 /* mark bpf_tail_call as different opcode to avoid
9968 * conditional branch in the interpeter for every normal
9969 * call and to prevent accidental JITing by JIT compiler
9970 * that doesn't support bpf_tail_call yet
e245c5c6 9971 */
79741b3b 9972 insn->imm = 0;
71189fa9 9973 insn->code = BPF_JMP | BPF_TAIL_CALL;
b2157399 9974
c93552c4 9975 aux = &env->insn_aux_data[i + delta];
cc52d914
DB
9976 if (env->allow_ptr_leaks && !expect_blinding &&
9977 prog->jit_requested &&
d2e4c1e6
DB
9978 !bpf_map_key_poisoned(aux) &&
9979 !bpf_map_ptr_poisoned(aux) &&
9980 !bpf_map_ptr_unpriv(aux)) {
9981 struct bpf_jit_poke_descriptor desc = {
9982 .reason = BPF_POKE_REASON_TAIL_CALL,
9983 .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
9984 .tail_call.key = bpf_map_key_immediate(aux),
9985 };
9986
9987 ret = bpf_jit_add_poke_descriptor(prog, &desc);
9988 if (ret < 0) {
9989 verbose(env, "adding tail call poke descriptor failed\n");
9990 return ret;
9991 }
9992
9993 insn->imm = ret + 1;
9994 continue;
9995 }
9996
c93552c4
DB
9997 if (!bpf_map_ptr_unpriv(aux))
9998 continue;
9999
b2157399
AS
10000 /* instead of changing every JIT dealing with tail_call
10001 * emit two extra insns:
10002 * if (index >= max_entries) goto out;
10003 * index &= array->index_mask;
10004 * to avoid out-of-bounds cpu speculation
10005 */
c93552c4 10006 if (bpf_map_ptr_poisoned(aux)) {
40950343 10007 verbose(env, "tail_call abusing map_ptr\n");
b2157399
AS
10008 return -EINVAL;
10009 }
c93552c4 10010
d2e4c1e6 10011 map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
b2157399
AS
10012 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
10013 map_ptr->max_entries, 2);
10014 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
10015 container_of(map_ptr,
10016 struct bpf_array,
10017 map)->index_mask);
10018 insn_buf[2] = *insn;
10019 cnt = 3;
10020 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
10021 if (!new_prog)
10022 return -ENOMEM;
10023
10024 delta += cnt - 1;
10025 env->prog = prog = new_prog;
10026 insn = new_prog->insnsi + i + delta;
79741b3b
AS
10027 continue;
10028 }
e245c5c6 10029
89c63074 10030 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
09772d92
DB
10031 * and other inlining handlers are currently limited to 64 bit
10032 * only.
89c63074 10033 */
60b58afc 10034 if (prog->jit_requested && BITS_PER_LONG == 64 &&
09772d92
DB
10035 (insn->imm == BPF_FUNC_map_lookup_elem ||
10036 insn->imm == BPF_FUNC_map_update_elem ||
84430d42
DB
10037 insn->imm == BPF_FUNC_map_delete_elem ||
10038 insn->imm == BPF_FUNC_map_push_elem ||
10039 insn->imm == BPF_FUNC_map_pop_elem ||
10040 insn->imm == BPF_FUNC_map_peek_elem)) {
c93552c4
DB
10041 aux = &env->insn_aux_data[i + delta];
10042 if (bpf_map_ptr_poisoned(aux))
10043 goto patch_call_imm;
10044
d2e4c1e6 10045 map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
09772d92
DB
10046 ops = map_ptr->ops;
10047 if (insn->imm == BPF_FUNC_map_lookup_elem &&
10048 ops->map_gen_lookup) {
10049 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
10050 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
10051 verbose(env, "bpf verifier is misconfigured\n");
10052 return -EINVAL;
10053 }
81ed18ab 10054
09772d92
DB
10055 new_prog = bpf_patch_insn_data(env, i + delta,
10056 insn_buf, cnt);
10057 if (!new_prog)
10058 return -ENOMEM;
81ed18ab 10059
09772d92
DB
10060 delta += cnt - 1;
10061 env->prog = prog = new_prog;
10062 insn = new_prog->insnsi + i + delta;
10063 continue;
10064 }
81ed18ab 10065
09772d92
DB
10066 BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
10067 (void *(*)(struct bpf_map *map, void *key))NULL));
10068 BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
10069 (int (*)(struct bpf_map *map, void *key))NULL));
10070 BUILD_BUG_ON(!__same_type(ops->map_update_elem,
10071 (int (*)(struct bpf_map *map, void *key, void *value,
10072 u64 flags))NULL));
84430d42
DB
10073 BUILD_BUG_ON(!__same_type(ops->map_push_elem,
10074 (int (*)(struct bpf_map *map, void *value,
10075 u64 flags))NULL));
10076 BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
10077 (int (*)(struct bpf_map *map, void *value))NULL));
10078 BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
10079 (int (*)(struct bpf_map *map, void *value))NULL));
10080
09772d92
DB
10081 switch (insn->imm) {
10082 case BPF_FUNC_map_lookup_elem:
10083 insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
10084 __bpf_call_base;
10085 continue;
10086 case BPF_FUNC_map_update_elem:
10087 insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
10088 __bpf_call_base;
10089 continue;
10090 case BPF_FUNC_map_delete_elem:
10091 insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
10092 __bpf_call_base;
10093 continue;
84430d42
DB
10094 case BPF_FUNC_map_push_elem:
10095 insn->imm = BPF_CAST_CALL(ops->map_push_elem) -
10096 __bpf_call_base;
10097 continue;
10098 case BPF_FUNC_map_pop_elem:
10099 insn->imm = BPF_CAST_CALL(ops->map_pop_elem) -
10100 __bpf_call_base;
10101 continue;
10102 case BPF_FUNC_map_peek_elem:
10103 insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
10104 __bpf_call_base;
10105 continue;
09772d92 10106 }
81ed18ab 10107
09772d92 10108 goto patch_call_imm;
81ed18ab
AS
10109 }
10110
5576b991
MKL
10111 if (prog->jit_requested && BITS_PER_LONG == 64 &&
10112 insn->imm == BPF_FUNC_jiffies64) {
10113 struct bpf_insn ld_jiffies_addr[2] = {
10114 BPF_LD_IMM64(BPF_REG_0,
10115 (unsigned long)&jiffies),
10116 };
10117
10118 insn_buf[0] = ld_jiffies_addr[0];
10119 insn_buf[1] = ld_jiffies_addr[1];
10120 insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
10121 BPF_REG_0, 0);
10122 cnt = 3;
10123
10124 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
10125 cnt);
10126 if (!new_prog)
10127 return -ENOMEM;
10128
10129 delta += cnt - 1;
10130 env->prog = prog = new_prog;
10131 insn = new_prog->insnsi + i + delta;
10132 continue;
10133 }
10134
81ed18ab 10135patch_call_imm:
5e43f899 10136 fn = env->ops->get_func_proto(insn->imm, env->prog);
79741b3b
AS
10137 /* all functions that have prototype and verifier allowed
10138 * programs to call them, must be real in-kernel functions
10139 */
10140 if (!fn->func) {
61bd5218
JK
10141 verbose(env,
10142 "kernel subsystem misconfigured func %s#%d\n",
79741b3b
AS
10143 func_id_name(insn->imm), insn->imm);
10144 return -EFAULT;
e245c5c6 10145 }
79741b3b 10146 insn->imm = fn->func - __bpf_call_base;
e245c5c6 10147 }
e245c5c6 10148
d2e4c1e6
DB
10149 /* Since poke tab is now finalized, publish aux to tracker. */
10150 for (i = 0; i < prog->aux->size_poke_tab; i++) {
10151 map_ptr = prog->aux->poke_tab[i].tail_call.map;
10152 if (!map_ptr->ops->map_poke_track ||
10153 !map_ptr->ops->map_poke_untrack ||
10154 !map_ptr->ops->map_poke_run) {
10155 verbose(env, "bpf verifier is misconfigured\n");
10156 return -EINVAL;
10157 }
10158
10159 ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
10160 if (ret < 0) {
10161 verbose(env, "tracking tail call prog failed\n");
10162 return ret;
10163 }
10164 }
10165
79741b3b
AS
10166 return 0;
10167}
e245c5c6 10168
58e2af8b 10169static void free_states(struct bpf_verifier_env *env)
f1bca824 10170{
58e2af8b 10171 struct bpf_verifier_state_list *sl, *sln;
f1bca824
AS
10172 int i;
10173
9f4686c4
AS
10174 sl = env->free_list;
10175 while (sl) {
10176 sln = sl->next;
10177 free_verifier_state(&sl->state, false);
10178 kfree(sl);
10179 sl = sln;
10180 }
51c39bb1 10181 env->free_list = NULL;
9f4686c4 10182
f1bca824
AS
10183 if (!env->explored_states)
10184 return;
10185
dc2a4ebc 10186 for (i = 0; i < state_htab_size(env); i++) {
f1bca824
AS
10187 sl = env->explored_states[i];
10188
a8f500af
AS
10189 while (sl) {
10190 sln = sl->next;
10191 free_verifier_state(&sl->state, false);
10192 kfree(sl);
10193 sl = sln;
10194 }
51c39bb1 10195 env->explored_states[i] = NULL;
f1bca824 10196 }
51c39bb1 10197}
f1bca824 10198
51c39bb1
AS
10199/* The verifier is using insn_aux_data[] to store temporary data during
10200 * verification and to store information for passes that run after the
10201 * verification like dead code sanitization. do_check_common() for subprogram N
10202 * may analyze many other subprograms. sanitize_insn_aux_data() clears all
10203 * temporary data after do_check_common() finds that subprogram N cannot be
10204 * verified independently. pass_cnt counts the number of times
10205 * do_check_common() was run and insn->aux->seen tells the pass number
10206 * insn_aux_data was touched. These variables are compared to clear temporary
10207 * data from failed pass. For testing and experiments do_check_common() can be
10208 * run multiple times even when prior attempt to verify is unsuccessful.
10209 */
10210static void sanitize_insn_aux_data(struct bpf_verifier_env *env)
10211{
10212 struct bpf_insn *insn = env->prog->insnsi;
10213 struct bpf_insn_aux_data *aux;
10214 int i, class;
10215
10216 for (i = 0; i < env->prog->len; i++) {
10217 class = BPF_CLASS(insn[i].code);
10218 if (class != BPF_LDX && class != BPF_STX)
10219 continue;
10220 aux = &env->insn_aux_data[i];
10221 if (aux->seen != env->pass_cnt)
10222 continue;
10223 memset(aux, 0, offsetof(typeof(*aux), orig_idx));
10224 }
f1bca824
AS
10225}
10226
51c39bb1
AS
10227static int do_check_common(struct bpf_verifier_env *env, int subprog)
10228{
6f8a57cc 10229 bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
51c39bb1
AS
10230 struct bpf_verifier_state *state;
10231 struct bpf_reg_state *regs;
10232 int ret, i;
10233
10234 env->prev_linfo = NULL;
10235 env->pass_cnt++;
10236
10237 state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
10238 if (!state)
10239 return -ENOMEM;
10240 state->curframe = 0;
10241 state->speculative = false;
10242 state->branches = 1;
10243 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
10244 if (!state->frame[0]) {
10245 kfree(state);
10246 return -ENOMEM;
10247 }
10248 env->cur_state = state;
10249 init_func_state(env, state->frame[0],
10250 BPF_MAIN_FUNC /* callsite */,
10251 0 /* frameno */,
10252 subprog);
10253
10254 regs = state->frame[state->curframe]->regs;
be8704ff 10255 if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
51c39bb1
AS
10256 ret = btf_prepare_func_args(env, subprog, regs);
10257 if (ret)
10258 goto out;
10259 for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
10260 if (regs[i].type == PTR_TO_CTX)
10261 mark_reg_known_zero(env, regs, i);
10262 else if (regs[i].type == SCALAR_VALUE)
10263 mark_reg_unknown(env, regs, i);
10264 }
10265 } else {
10266 /* 1st arg to a function */
10267 regs[BPF_REG_1].type = PTR_TO_CTX;
10268 mark_reg_known_zero(env, regs, BPF_REG_1);
10269 ret = btf_check_func_arg_match(env, subprog, regs);
10270 if (ret == -EFAULT)
10271 /* unlikely verifier bug. abort.
10272 * ret == 0 and ret < 0 are sadly acceptable for
10273 * main() function due to backward compatibility.
10274 * Like socket filter program may be written as:
10275 * int bpf_prog(struct pt_regs *ctx)
10276 * and never dereference that ctx in the program.
10277 * 'struct pt_regs' is a type mismatch for socket
10278 * filter that should be using 'struct __sk_buff'.
10279 */
10280 goto out;
10281 }
10282
10283 ret = do_check(env);
10284out:
f59bbfc2
AS
10285 /* check for NULL is necessary, since cur_state can be freed inside
10286 * do_check() under memory pressure.
10287 */
10288 if (env->cur_state) {
10289 free_verifier_state(env->cur_state, true);
10290 env->cur_state = NULL;
10291 }
6f8a57cc
AN
10292 while (!pop_stack(env, NULL, NULL, false));
10293 if (!ret && pop_log)
10294 bpf_vlog_reset(&env->log, 0);
51c39bb1
AS
10295 free_states(env);
10296 if (ret)
10297 /* clean aux data in case subprog was rejected */
10298 sanitize_insn_aux_data(env);
10299 return ret;
10300}
10301
10302/* Verify all global functions in a BPF program one by one based on their BTF.
10303 * All global functions must pass verification. Otherwise the whole program is rejected.
10304 * Consider:
10305 * int bar(int);
10306 * int foo(int f)
10307 * {
10308 * return bar(f);
10309 * }
10310 * int bar(int b)
10311 * {
10312 * ...
10313 * }
10314 * foo() will be verified first for R1=any_scalar_value. During verification it
10315 * will be assumed that bar() already verified successfully and call to bar()
10316 * from foo() will be checked for type match only. Later bar() will be verified
10317 * independently to check that it's safe for R1=any_scalar_value.
10318 */
10319static int do_check_subprogs(struct bpf_verifier_env *env)
10320{
10321 struct bpf_prog_aux *aux = env->prog->aux;
10322 int i, ret;
10323
10324 if (!aux->func_info)
10325 return 0;
10326
10327 for (i = 1; i < env->subprog_cnt; i++) {
10328 if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
10329 continue;
10330 env->insn_idx = env->subprog_info[i].start;
10331 WARN_ON_ONCE(env->insn_idx == 0);
10332 ret = do_check_common(env, i);
10333 if (ret) {
10334 return ret;
10335 } else if (env->log.level & BPF_LOG_LEVEL) {
10336 verbose(env,
10337 "Func#%d is safe for any args that match its prototype\n",
10338 i);
10339 }
10340 }
10341 return 0;
10342}
10343
10344static int do_check_main(struct bpf_verifier_env *env)
10345{
10346 int ret;
10347
10348 env->insn_idx = 0;
10349 ret = do_check_common(env, 0);
10350 if (!ret)
10351 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
10352 return ret;
10353}
10354
10355
06ee7115
AS
10356static void print_verification_stats(struct bpf_verifier_env *env)
10357{
10358 int i;
10359
10360 if (env->log.level & BPF_LOG_STATS) {
10361 verbose(env, "verification time %lld usec\n",
10362 div_u64(env->verification_time, 1000));
10363 verbose(env, "stack depth ");
10364 for (i = 0; i < env->subprog_cnt; i++) {
10365 u32 depth = env->subprog_info[i].stack_depth;
10366
10367 verbose(env, "%d", depth);
10368 if (i + 1 < env->subprog_cnt)
10369 verbose(env, "+");
10370 }
10371 verbose(env, "\n");
10372 }
10373 verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
10374 "total_states %d peak_states %d mark_read %d\n",
10375 env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
10376 env->max_states_per_insn, env->total_states,
10377 env->peak_states, env->longest_mark_read_walk);
f1bca824
AS
10378}
10379
27ae7997
MKL
10380static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
10381{
10382 const struct btf_type *t, *func_proto;
10383 const struct bpf_struct_ops *st_ops;
10384 const struct btf_member *member;
10385 struct bpf_prog *prog = env->prog;
10386 u32 btf_id, member_idx;
10387 const char *mname;
10388
10389 btf_id = prog->aux->attach_btf_id;
10390 st_ops = bpf_struct_ops_find(btf_id);
10391 if (!st_ops) {
10392 verbose(env, "attach_btf_id %u is not a supported struct\n",
10393 btf_id);
10394 return -ENOTSUPP;
10395 }
10396
10397 t = st_ops->type;
10398 member_idx = prog->expected_attach_type;
10399 if (member_idx >= btf_type_vlen(t)) {
10400 verbose(env, "attach to invalid member idx %u of struct %s\n",
10401 member_idx, st_ops->name);
10402 return -EINVAL;
10403 }
10404
10405 member = &btf_type_member(t)[member_idx];
10406 mname = btf_name_by_offset(btf_vmlinux, member->name_off);
10407 func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
10408 NULL);
10409 if (!func_proto) {
10410 verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
10411 mname, member_idx, st_ops->name);
10412 return -EINVAL;
10413 }
10414
10415 if (st_ops->check_member) {
10416 int err = st_ops->check_member(t, member);
10417
10418 if (err) {
10419 verbose(env, "attach to unsupported member %s of struct %s\n",
10420 mname, st_ops->name);
10421 return err;
10422 }
10423 }
10424
10425 prog->aux->attach_func_proto = func_proto;
10426 prog->aux->attach_func_name = mname;
10427 env->ops = st_ops->verifier_ops;
10428
10429 return 0;
10430}
6ba43b76
KS
10431#define SECURITY_PREFIX "security_"
10432
10433static int check_attach_modify_return(struct bpf_verifier_env *env)
10434{
10435 struct bpf_prog *prog = env->prog;
10436 unsigned long addr = (unsigned long) prog->aux->trampoline->func.addr;
10437
6ba43b76
KS
10438 /* This is expected to be cleaned up in the future with the KRSI effort
10439 * introducing the LSM_HOOK macro for cleaning up lsm_hooks.h.
10440 */
69191754
KS
10441 if (within_error_injection_list(addr) ||
10442 !strncmp(SECURITY_PREFIX, prog->aux->attach_func_name,
10443 sizeof(SECURITY_PREFIX) - 1))
6ba43b76 10444 return 0;
6ba43b76
KS
10445
10446 verbose(env, "fmod_ret attach_btf_id %u (%s) is not modifiable\n",
10447 prog->aux->attach_btf_id, prog->aux->attach_func_name);
10448
10449 return -EINVAL;
10450}
27ae7997 10451
38207291
MKL
10452static int check_attach_btf_id(struct bpf_verifier_env *env)
10453{
10454 struct bpf_prog *prog = env->prog;
be8704ff 10455 bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
5b92a28a 10456 struct bpf_prog *tgt_prog = prog->aux->linked_prog;
38207291 10457 u32 btf_id = prog->aux->attach_btf_id;
f1b9509c 10458 const char prefix[] = "btf_trace_";
5b92a28a 10459 int ret = 0, subprog = -1, i;
fec56f58 10460 struct bpf_trampoline *tr;
38207291 10461 const struct btf_type *t;
5b92a28a 10462 bool conservative = true;
38207291 10463 const char *tname;
5b92a28a 10464 struct btf *btf;
fec56f58 10465 long addr;
5b92a28a 10466 u64 key;
38207291 10467
27ae7997
MKL
10468 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
10469 return check_struct_ops_btf_id(env);
10470
9e4e01df
KS
10471 if (prog->type != BPF_PROG_TYPE_TRACING &&
10472 prog->type != BPF_PROG_TYPE_LSM &&
10473 !prog_extension)
f1b9509c 10474 return 0;
38207291 10475
f1b9509c
AS
10476 if (!btf_id) {
10477 verbose(env, "Tracing programs must provide btf_id\n");
10478 return -EINVAL;
10479 }
5b92a28a
AS
10480 btf = bpf_prog_get_target_btf(prog);
10481 if (!btf) {
10482 verbose(env,
10483 "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
10484 return -EINVAL;
10485 }
10486 t = btf_type_by_id(btf, btf_id);
f1b9509c
AS
10487 if (!t) {
10488 verbose(env, "attach_btf_id %u is invalid\n", btf_id);
10489 return -EINVAL;
10490 }
5b92a28a 10491 tname = btf_name_by_offset(btf, t->name_off);
f1b9509c
AS
10492 if (!tname) {
10493 verbose(env, "attach_btf_id %u doesn't have a name\n", btf_id);
10494 return -EINVAL;
10495 }
5b92a28a
AS
10496 if (tgt_prog) {
10497 struct bpf_prog_aux *aux = tgt_prog->aux;
10498
10499 for (i = 0; i < aux->func_info_cnt; i++)
10500 if (aux->func_info[i].type_id == btf_id) {
10501 subprog = i;
10502 break;
10503 }
10504 if (subprog == -1) {
10505 verbose(env, "Subprog %s doesn't exist\n", tname);
10506 return -EINVAL;
10507 }
10508 conservative = aux->func_info_aux[subprog].unreliable;
be8704ff
AS
10509 if (prog_extension) {
10510 if (conservative) {
10511 verbose(env,
10512 "Cannot replace static functions\n");
10513 return -EINVAL;
10514 }
10515 if (!prog->jit_requested) {
10516 verbose(env,
10517 "Extension programs should be JITed\n");
10518 return -EINVAL;
10519 }
10520 env->ops = bpf_verifier_ops[tgt_prog->type];
03f87c0b 10521 prog->expected_attach_type = tgt_prog->expected_attach_type;
be8704ff
AS
10522 }
10523 if (!tgt_prog->jited) {
10524 verbose(env, "Can attach to only JITed progs\n");
10525 return -EINVAL;
10526 }
10527 if (tgt_prog->type == prog->type) {
10528 /* Cannot fentry/fexit another fentry/fexit program.
10529 * Cannot attach program extension to another extension.
10530 * It's ok to attach fentry/fexit to extension program.
10531 */
10532 verbose(env, "Cannot recursively attach\n");
10533 return -EINVAL;
10534 }
10535 if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
10536 prog_extension &&
10537 (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
10538 tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
10539 /* Program extensions can extend all program types
10540 * except fentry/fexit. The reason is the following.
10541 * The fentry/fexit programs are used for performance
10542 * analysis, stats and can be attached to any program
10543 * type except themselves. When extension program is
10544 * replacing XDP function it is necessary to allow
10545 * performance analysis of all functions. Both original
10546 * XDP program and its program extension. Hence
10547 * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
10548 * allowed. If extending of fentry/fexit was allowed it
10549 * would be possible to create long call chain
10550 * fentry->extension->fentry->extension beyond
10551 * reasonable stack size. Hence extending fentry is not
10552 * allowed.
10553 */
10554 verbose(env, "Cannot extend fentry/fexit\n");
10555 return -EINVAL;
10556 }
5b92a28a
AS
10557 key = ((u64)aux->id) << 32 | btf_id;
10558 } else {
be8704ff
AS
10559 if (prog_extension) {
10560 verbose(env, "Cannot replace kernel functions\n");
10561 return -EINVAL;
10562 }
5b92a28a
AS
10563 key = btf_id;
10564 }
f1b9509c
AS
10565
10566 switch (prog->expected_attach_type) {
10567 case BPF_TRACE_RAW_TP:
5b92a28a
AS
10568 if (tgt_prog) {
10569 verbose(env,
10570 "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
10571 return -EINVAL;
10572 }
38207291
MKL
10573 if (!btf_type_is_typedef(t)) {
10574 verbose(env, "attach_btf_id %u is not a typedef\n",
10575 btf_id);
10576 return -EINVAL;
10577 }
f1b9509c 10578 if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
38207291
MKL
10579 verbose(env, "attach_btf_id %u points to wrong type name %s\n",
10580 btf_id, tname);
10581 return -EINVAL;
10582 }
10583 tname += sizeof(prefix) - 1;
5b92a28a 10584 t = btf_type_by_id(btf, t->type);
38207291
MKL
10585 if (!btf_type_is_ptr(t))
10586 /* should never happen in valid vmlinux build */
10587 return -EINVAL;
5b92a28a 10588 t = btf_type_by_id(btf, t->type);
38207291
MKL
10589 if (!btf_type_is_func_proto(t))
10590 /* should never happen in valid vmlinux build */
10591 return -EINVAL;
10592
10593 /* remember two read only pointers that are valid for
10594 * the life time of the kernel
10595 */
10596 prog->aux->attach_func_name = tname;
10597 prog->aux->attach_func_proto = t;
10598 prog->aux->attach_btf_trace = true;
f1b9509c 10599 return 0;
be8704ff
AS
10600 default:
10601 if (!prog_extension)
10602 return -EINVAL;
10603 /* fallthrough */
ae240823 10604 case BPF_MODIFY_RETURN:
9e4e01df 10605 case BPF_LSM_MAC:
fec56f58
AS
10606 case BPF_TRACE_FENTRY:
10607 case BPF_TRACE_FEXIT:
9e4e01df
KS
10608 prog->aux->attach_func_name = tname;
10609 if (prog->type == BPF_PROG_TYPE_LSM) {
10610 ret = bpf_lsm_verify_prog(&env->log, prog);
10611 if (ret < 0)
10612 return ret;
10613 }
10614
fec56f58
AS
10615 if (!btf_type_is_func(t)) {
10616 verbose(env, "attach_btf_id %u is not a function\n",
10617 btf_id);
10618 return -EINVAL;
10619 }
be8704ff
AS
10620 if (prog_extension &&
10621 btf_check_type_match(env, prog, btf, t))
10622 return -EINVAL;
5b92a28a 10623 t = btf_type_by_id(btf, t->type);
fec56f58
AS
10624 if (!btf_type_is_func_proto(t))
10625 return -EINVAL;
5b92a28a 10626 tr = bpf_trampoline_lookup(key);
fec56f58
AS
10627 if (!tr)
10628 return -ENOMEM;
5b92a28a 10629 /* t is either vmlinux type or another program's type */
fec56f58
AS
10630 prog->aux->attach_func_proto = t;
10631 mutex_lock(&tr->mutex);
10632 if (tr->func.addr) {
10633 prog->aux->trampoline = tr;
10634 goto out;
10635 }
5b92a28a
AS
10636 if (tgt_prog && conservative) {
10637 prog->aux->attach_func_proto = NULL;
10638 t = NULL;
10639 }
10640 ret = btf_distill_func_proto(&env->log, btf, t,
fec56f58
AS
10641 tname, &tr->func.model);
10642 if (ret < 0)
10643 goto out;
5b92a28a 10644 if (tgt_prog) {
e9eeec58
YS
10645 if (subprog == 0)
10646 addr = (long) tgt_prog->bpf_func;
10647 else
10648 addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
5b92a28a
AS
10649 } else {
10650 addr = kallsyms_lookup_name(tname);
10651 if (!addr) {
10652 verbose(env,
10653 "The address of function %s cannot be found\n",
10654 tname);
10655 ret = -ENOENT;
10656 goto out;
10657 }
fec56f58
AS
10658 }
10659 tr->func.addr = (void *)addr;
10660 prog->aux->trampoline = tr;
6ba43b76
KS
10661
10662 if (prog->expected_attach_type == BPF_MODIFY_RETURN)
10663 ret = check_attach_modify_return(env);
fec56f58
AS
10664out:
10665 mutex_unlock(&tr->mutex);
10666 if (ret)
10667 bpf_trampoline_put(tr);
10668 return ret;
38207291 10669 }
38207291
MKL
10670}
10671
838e9690
YS
10672int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
10673 union bpf_attr __user *uattr)
51580e79 10674{
06ee7115 10675 u64 start_time = ktime_get_ns();
58e2af8b 10676 struct bpf_verifier_env *env;
b9193c1b 10677 struct bpf_verifier_log *log;
9e4c24e7 10678 int i, len, ret = -EINVAL;
e2ae4ca2 10679 bool is_priv;
51580e79 10680
eba0c929
AB
10681 /* no program is valid */
10682 if (ARRAY_SIZE(bpf_verifier_ops) == 0)
10683 return -EINVAL;
10684
58e2af8b 10685 /* 'struct bpf_verifier_env' can be global, but since it's not small,
cbd35700
AS
10686 * allocate/free it every time bpf_check() is called
10687 */
58e2af8b 10688 env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
cbd35700
AS
10689 if (!env)
10690 return -ENOMEM;
61bd5218 10691 log = &env->log;
cbd35700 10692
9e4c24e7 10693 len = (*prog)->len;
fad953ce 10694 env->insn_aux_data =
9e4c24e7 10695 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
3df126f3
JK
10696 ret = -ENOMEM;
10697 if (!env->insn_aux_data)
10698 goto err_free_env;
9e4c24e7
JK
10699 for (i = 0; i < len; i++)
10700 env->insn_aux_data[i].orig_idx = i;
9bac3d6d 10701 env->prog = *prog;
00176a34 10702 env->ops = bpf_verifier_ops[env->prog->type];
45a73c17 10703 is_priv = capable(CAP_SYS_ADMIN);
0246e64d 10704
8580ac94
AS
10705 if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
10706 mutex_lock(&bpf_verifier_lock);
10707 if (!btf_vmlinux)
10708 btf_vmlinux = btf_parse_vmlinux();
10709 mutex_unlock(&bpf_verifier_lock);
10710 }
10711
cbd35700 10712 /* grab the mutex to protect few globals used by verifier */
45a73c17
AS
10713 if (!is_priv)
10714 mutex_lock(&bpf_verifier_lock);
cbd35700
AS
10715
10716 if (attr->log_level || attr->log_buf || attr->log_size) {
10717 /* user requested verbose verifier output
10718 * and supplied buffer to store the verification trace
10719 */
e7bf8249
JK
10720 log->level = attr->log_level;
10721 log->ubuf = (char __user *) (unsigned long) attr->log_buf;
10722 log->len_total = attr->log_size;
cbd35700
AS
10723
10724 ret = -EINVAL;
e7bf8249 10725 /* log attributes have to be sane */
7a9f5c65 10726 if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 ||
06ee7115 10727 !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK)
3df126f3 10728 goto err_unlock;
cbd35700 10729 }
1ad2f583 10730
8580ac94
AS
10731 if (IS_ERR(btf_vmlinux)) {
10732 /* Either gcc or pahole or kernel are broken. */
10733 verbose(env, "in-kernel BTF is malformed\n");
10734 ret = PTR_ERR(btf_vmlinux);
38207291 10735 goto skip_full_check;
8580ac94
AS
10736 }
10737
1ad2f583
DB
10738 env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
10739 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
e07b98d9 10740 env->strict_alignment = true;
e9ee9efc
DM
10741 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
10742 env->strict_alignment = false;
cbd35700 10743
e2ae4ca2
JK
10744 env->allow_ptr_leaks = is_priv;
10745
10d274e8
AS
10746 if (is_priv)
10747 env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
10748
f4e3ec0d
JK
10749 ret = replace_map_fd_with_map_ptr(env);
10750 if (ret < 0)
10751 goto skip_full_check;
10752
cae1927c 10753 if (bpf_prog_is_dev_bound(env->prog->aux)) {
a40a2632 10754 ret = bpf_prog_offload_verifier_prep(env->prog);
ab3f0063 10755 if (ret)
f4e3ec0d 10756 goto skip_full_check;
ab3f0063
JK
10757 }
10758
dc2a4ebc 10759 env->explored_states = kvcalloc(state_htab_size(env),
58e2af8b 10760 sizeof(struct bpf_verifier_state_list *),
f1bca824
AS
10761 GFP_USER);
10762 ret = -ENOMEM;
10763 if (!env->explored_states)
10764 goto skip_full_check;
10765
d9762e84 10766 ret = check_subprogs(env);
475fb78f
AS
10767 if (ret < 0)
10768 goto skip_full_check;
10769
c454a46b 10770 ret = check_btf_info(env, attr, uattr);
838e9690
YS
10771 if (ret < 0)
10772 goto skip_full_check;
10773
be8704ff
AS
10774 ret = check_attach_btf_id(env);
10775 if (ret)
10776 goto skip_full_check;
10777
d9762e84
MKL
10778 ret = check_cfg(env);
10779 if (ret < 0)
10780 goto skip_full_check;
10781
51c39bb1
AS
10782 ret = do_check_subprogs(env);
10783 ret = ret ?: do_check_main(env);
cbd35700 10784
c941ce9c
QM
10785 if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
10786 ret = bpf_prog_offload_finalize(env);
10787
0246e64d 10788skip_full_check:
51c39bb1 10789 kvfree(env->explored_states);
0246e64d 10790
c131187d 10791 if (ret == 0)
9b38c405 10792 ret = check_max_stack_depth(env);
c131187d 10793
9b38c405 10794 /* instruction rewrites happen after this point */
e2ae4ca2
JK
10795 if (is_priv) {
10796 if (ret == 0)
10797 opt_hard_wire_dead_code_branches(env);
52875a04
JK
10798 if (ret == 0)
10799 ret = opt_remove_dead_code(env);
a1b14abc
JK
10800 if (ret == 0)
10801 ret = opt_remove_nops(env);
52875a04
JK
10802 } else {
10803 if (ret == 0)
10804 sanitize_dead_code(env);
e2ae4ca2
JK
10805 }
10806
9bac3d6d
AS
10807 if (ret == 0)
10808 /* program is valid, convert *(u32*)(ctx + off) accesses */
10809 ret = convert_ctx_accesses(env);
10810
e245c5c6 10811 if (ret == 0)
79741b3b 10812 ret = fixup_bpf_calls(env);
e245c5c6 10813
a4b1d3c1
JW
10814 /* do 32-bit optimization after insn patching has done so those patched
10815 * insns could be handled correctly.
10816 */
d6c2308c
JW
10817 if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
10818 ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
10819 env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
10820 : false;
a4b1d3c1
JW
10821 }
10822
1ea47e01
AS
10823 if (ret == 0)
10824 ret = fixup_call_args(env);
10825
06ee7115
AS
10826 env->verification_time = ktime_get_ns() - start_time;
10827 print_verification_stats(env);
10828
a2a7d570 10829 if (log->level && bpf_verifier_log_full(log))
cbd35700 10830 ret = -ENOSPC;
a2a7d570 10831 if (log->level && !log->ubuf) {
cbd35700 10832 ret = -EFAULT;
a2a7d570 10833 goto err_release_maps;
cbd35700
AS
10834 }
10835
0246e64d
AS
10836 if (ret == 0 && env->used_map_cnt) {
10837 /* if program passed verifier, update used_maps in bpf_prog_info */
9bac3d6d
AS
10838 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
10839 sizeof(env->used_maps[0]),
10840 GFP_KERNEL);
0246e64d 10841
9bac3d6d 10842 if (!env->prog->aux->used_maps) {
0246e64d 10843 ret = -ENOMEM;
a2a7d570 10844 goto err_release_maps;
0246e64d
AS
10845 }
10846
9bac3d6d 10847 memcpy(env->prog->aux->used_maps, env->used_maps,
0246e64d 10848 sizeof(env->used_maps[0]) * env->used_map_cnt);
9bac3d6d 10849 env->prog->aux->used_map_cnt = env->used_map_cnt;
0246e64d
AS
10850
10851 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
10852 * bpf_ld_imm64 instructions
10853 */
10854 convert_pseudo_ld_imm64(env);
10855 }
cbd35700 10856
ba64e7d8
YS
10857 if (ret == 0)
10858 adjust_btf_func(env);
10859
a2a7d570 10860err_release_maps:
9bac3d6d 10861 if (!env->prog->aux->used_maps)
0246e64d 10862 /* if we didn't copy map pointers into bpf_prog_info, release
ab7f5bf0 10863 * them now. Otherwise free_used_maps() will release them.
0246e64d
AS
10864 */
10865 release_maps(env);
03f87c0b
THJ
10866
10867 /* extension progs temporarily inherit the attach_type of their targets
10868 for verification purposes, so set it back to zero before returning
10869 */
10870 if (env->prog->type == BPF_PROG_TYPE_EXT)
10871 env->prog->expected_attach_type = 0;
10872
9bac3d6d 10873 *prog = env->prog;
3df126f3 10874err_unlock:
45a73c17
AS
10875 if (!is_priv)
10876 mutex_unlock(&bpf_verifier_lock);
3df126f3
JK
10877 vfree(env->insn_aux_data);
10878err_free_env:
10879 kfree(env);
51580e79
AS
10880 return ret;
10881}