]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - kernel/bpf/verifier.c
bpf: Try harder when allocating memory for large maps
[mirror_ubuntu-hirsute-kernel.git] / kernel / bpf / verifier.c
CommitLineData
51580e79 1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
969bf05e 2 * Copyright (c) 2016 Facebook
fd978bf7 3 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
51580e79
AS
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 */
838e9690 14#include <uapi/linux/btf.h>
51580e79
AS
15#include <linux/kernel.h>
16#include <linux/types.h>
17#include <linux/slab.h>
18#include <linux/bpf.h>
838e9690 19#include <linux/btf.h>
58e2af8b 20#include <linux/bpf_verifier.h>
51580e79
AS
21#include <linux/filter.h>
22#include <net/netlink.h>
23#include <linux/file.h>
24#include <linux/vmalloc.h>
ebb676da 25#include <linux/stringify.h>
cc8b0b92
AS
26#include <linux/bsearch.h>
27#include <linux/sort.h>
c195651e 28#include <linux/perf_event.h>
d9762e84 29#include <linux/ctype.h>
51580e79 30
f4ac7e0b
JK
31#include "disasm.h"
32
00176a34
JK
33static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
34#define BPF_PROG_TYPE(_id, _name) \
35 [_id] = & _name ## _verifier_ops,
36#define BPF_MAP_TYPE(_id, _ops)
37#include <linux/bpf_types.h>
38#undef BPF_PROG_TYPE
39#undef BPF_MAP_TYPE
40};
41
51580e79
AS
42/* bpf_check() is a static code analyzer that walks eBPF program
43 * instruction by instruction and updates register/stack state.
44 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
45 *
46 * The first pass is depth-first-search to check that the program is a DAG.
47 * It rejects the following programs:
48 * - larger than BPF_MAXINSNS insns
49 * - if loop is present (detected via back-edge)
50 * - unreachable insns exist (shouldn't be a forest. program = one function)
51 * - out of bounds or malformed jumps
52 * The second pass is all possible path descent from the 1st insn.
53 * Since it's analyzing all pathes through the program, the length of the
eba38a96 54 * analysis is limited to 64k insn, which may be hit even if total number of
51580e79
AS
55 * insn is less then 4K, but there are too many branches that change stack/regs.
56 * Number of 'branches to be analyzed' is limited to 1k
57 *
58 * On entry to each instruction, each register has a type, and the instruction
59 * changes the types of the registers depending on instruction semantics.
60 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
61 * copied to R1.
62 *
63 * All registers are 64-bit.
64 * R0 - return register
65 * R1-R5 argument passing registers
66 * R6-R9 callee saved registers
67 * R10 - frame pointer read-only
68 *
69 * At the start of BPF program the register R1 contains a pointer to bpf_context
70 * and has type PTR_TO_CTX.
71 *
72 * Verifier tracks arithmetic operations on pointers in case:
73 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
74 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
75 * 1st insn copies R10 (which has FRAME_PTR) type into R1
76 * and 2nd arithmetic instruction is pattern matched to recognize
77 * that it wants to construct a pointer to some element within stack.
78 * So after 2nd insn, the register R1 has type PTR_TO_STACK
79 * (and -20 constant is saved for further stack bounds checking).
80 * Meaning that this reg is a pointer to stack plus known immediate constant.
81 *
f1174f77 82 * Most of the time the registers have SCALAR_VALUE type, which
51580e79 83 * means the register has some value, but it's not a valid pointer.
f1174f77 84 * (like pointer plus pointer becomes SCALAR_VALUE type)
51580e79
AS
85 *
86 * When verifier sees load or store instructions the type of base register
c64b7983
JS
87 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
88 * four pointer types recognized by check_mem_access() function.
51580e79
AS
89 *
90 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
91 * and the range of [ptr, ptr + map's value_size) is accessible.
92 *
93 * registers used to pass values to function calls are checked against
94 * function argument constraints.
95 *
96 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
97 * It means that the register type passed to this function must be
98 * PTR_TO_STACK and it will be used inside the function as
99 * 'pointer to map element key'
100 *
101 * For example the argument constraints for bpf_map_lookup_elem():
102 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
103 * .arg1_type = ARG_CONST_MAP_PTR,
104 * .arg2_type = ARG_PTR_TO_MAP_KEY,
105 *
106 * ret_type says that this function returns 'pointer to map elem value or null'
107 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
108 * 2nd argument should be a pointer to stack, which will be used inside
109 * the helper function as a pointer to map element key.
110 *
111 * On the kernel side the helper function looks like:
112 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
113 * {
114 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
115 * void *key = (void *) (unsigned long) r2;
116 * void *value;
117 *
118 * here kernel can access 'key' and 'map' pointers safely, knowing that
119 * [key, key + map->key_size) bytes are valid and were initialized on
120 * the stack of eBPF program.
121 * }
122 *
123 * Corresponding eBPF program may look like:
124 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
125 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
126 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
127 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
128 * here verifier looks at prototype of map_lookup_elem() and sees:
129 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
130 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
131 *
132 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
133 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
134 * and were initialized prior to this call.
135 * If it's ok, then verifier allows this BPF_CALL insn and looks at
136 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
137 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
138 * returns ether pointer to map value or NULL.
139 *
140 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
141 * insn, the register holding that pointer in the true branch changes state to
142 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
143 * branch. See check_cond_jmp_op().
144 *
145 * After the call R0 is set to return type of the function and registers R1-R5
146 * are set to NOT_INIT to indicate that they are no longer readable.
fd978bf7
JS
147 *
148 * The following reference types represent a potential reference to a kernel
149 * resource which, after first being allocated, must be checked and freed by
150 * the BPF program:
151 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
152 *
153 * When the verifier sees a helper call return a reference type, it allocates a
154 * pointer id for the reference and stores it in the current function state.
155 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
156 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
157 * passes through a NULL-check conditional. For the branch wherein the state is
158 * changed to CONST_IMM, the verifier releases the reference.
6acc9b43
JS
159 *
160 * For each helper function that allocates a reference, such as
161 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
162 * bpf_sk_release(). When a reference type passes into the release function,
163 * the verifier also releases the reference. If any unchecked or unreleased
164 * reference remains at the end of the program, the verifier rejects it.
51580e79
AS
165 */
166
17a52670 167/* verifier_state + insn_idx are pushed to stack when branch is encountered */
58e2af8b 168struct bpf_verifier_stack_elem {
17a52670
AS
169 /* verifer state is 'st'
170 * before processing instruction 'insn_idx'
171 * and after processing instruction 'prev_insn_idx'
172 */
58e2af8b 173 struct bpf_verifier_state st;
17a52670
AS
174 int insn_idx;
175 int prev_insn_idx;
58e2af8b 176 struct bpf_verifier_stack_elem *next;
cbd35700
AS
177};
178
8e17c1b1 179#define BPF_COMPLEXITY_LIMIT_INSNS 131072
07016151 180#define BPF_COMPLEXITY_LIMIT_STACK 1024
ceefbc96 181#define BPF_COMPLEXITY_LIMIT_STATES 64
07016151 182
c93552c4
DB
183#define BPF_MAP_PTR_UNPRIV 1UL
184#define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \
185 POISON_POINTER_DELTA))
186#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
187
188static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
189{
190 return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON;
191}
192
193static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
194{
195 return aux->map_state & BPF_MAP_PTR_UNPRIV;
196}
197
198static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
199 const struct bpf_map *map, bool unpriv)
200{
201 BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
202 unpriv |= bpf_map_ptr_unpriv(aux);
203 aux->map_state = (unsigned long)map |
204 (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
205}
fad73a1a 206
33ff9823
DB
207struct bpf_call_arg_meta {
208 struct bpf_map *map_ptr;
435faee1 209 bool raw_mode;
36bbef52 210 bool pkt_access;
435faee1
DB
211 int regno;
212 int access_size;
849fa506
YS
213 s64 msize_smax_value;
214 u64 msize_umax_value;
1b986589 215 int ref_obj_id;
d83525ca 216 int func_id;
33ff9823
DB
217};
218
cbd35700
AS
219static DEFINE_MUTEX(bpf_verifier_lock);
220
d9762e84
MKL
221static const struct bpf_line_info *
222find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
223{
224 const struct bpf_line_info *linfo;
225 const struct bpf_prog *prog;
226 u32 i, nr_linfo;
227
228 prog = env->prog;
229 nr_linfo = prog->aux->nr_linfo;
230
231 if (!nr_linfo || insn_off >= prog->len)
232 return NULL;
233
234 linfo = prog->aux->linfo;
235 for (i = 1; i < nr_linfo; i++)
236 if (insn_off < linfo[i].insn_off)
237 break;
238
239 return &linfo[i - 1];
240}
241
77d2e05a
MKL
242void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
243 va_list args)
cbd35700 244{
a2a7d570 245 unsigned int n;
cbd35700 246
a2a7d570 247 n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
a2a7d570
JK
248
249 WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
250 "verifier log line truncated - local buffer too short\n");
251
252 n = min(log->len_total - log->len_used - 1, n);
253 log->kbuf[n] = '\0';
254
255 if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
256 log->len_used += n;
257 else
258 log->ubuf = NULL;
cbd35700 259}
abe08840
JO
260
261/* log_level controls verbosity level of eBPF verifier.
262 * bpf_verifier_log_write() is used to dump the verification trace to the log,
263 * so the user can figure out what's wrong with the program
430e68d1 264 */
abe08840
JO
265__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
266 const char *fmt, ...)
267{
268 va_list args;
269
77d2e05a
MKL
270 if (!bpf_verifier_log_needed(&env->log))
271 return;
272
abe08840 273 va_start(args, fmt);
77d2e05a 274 bpf_verifier_vlog(&env->log, fmt, args);
abe08840
JO
275 va_end(args);
276}
277EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
278
279__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
280{
77d2e05a 281 struct bpf_verifier_env *env = private_data;
abe08840
JO
282 va_list args;
283
77d2e05a
MKL
284 if (!bpf_verifier_log_needed(&env->log))
285 return;
286
abe08840 287 va_start(args, fmt);
77d2e05a 288 bpf_verifier_vlog(&env->log, fmt, args);
abe08840
JO
289 va_end(args);
290}
cbd35700 291
d9762e84
MKL
292static const char *ltrim(const char *s)
293{
294 while (isspace(*s))
295 s++;
296
297 return s;
298}
299
300__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
301 u32 insn_off,
302 const char *prefix_fmt, ...)
303{
304 const struct bpf_line_info *linfo;
305
306 if (!bpf_verifier_log_needed(&env->log))
307 return;
308
309 linfo = find_linfo(env, insn_off);
310 if (!linfo || linfo == env->prev_linfo)
311 return;
312
313 if (prefix_fmt) {
314 va_list args;
315
316 va_start(args, prefix_fmt);
317 bpf_verifier_vlog(&env->log, prefix_fmt, args);
318 va_end(args);
319 }
320
321 verbose(env, "%s\n",
322 ltrim(btf_name_by_offset(env->prog->aux->btf,
323 linfo->line_off)));
324
325 env->prev_linfo = linfo;
326}
327
de8f3a83
DB
328static bool type_is_pkt_pointer(enum bpf_reg_type type)
329{
330 return type == PTR_TO_PACKET ||
331 type == PTR_TO_PACKET_META;
332}
333
46f8bc92
MKL
334static bool type_is_sk_pointer(enum bpf_reg_type type)
335{
336 return type == PTR_TO_SOCKET ||
655a51e5
MKL
337 type == PTR_TO_SOCK_COMMON ||
338 type == PTR_TO_TCP_SOCK;
46f8bc92
MKL
339}
340
840b9615
JS
341static bool reg_type_may_be_null(enum bpf_reg_type type)
342{
fd978bf7 343 return type == PTR_TO_MAP_VALUE_OR_NULL ||
46f8bc92 344 type == PTR_TO_SOCKET_OR_NULL ||
655a51e5
MKL
345 type == PTR_TO_SOCK_COMMON_OR_NULL ||
346 type == PTR_TO_TCP_SOCK_OR_NULL;
fd978bf7
JS
347}
348
d83525ca
AS
349static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
350{
351 return reg->type == PTR_TO_MAP_VALUE &&
352 map_value_has_spin_lock(reg->map_ptr);
353}
354
1b986589 355static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
fd978bf7 356{
1b986589 357 return type == ARG_PTR_TO_SOCK_COMMON;
fd978bf7
JS
358}
359
360/* Determine whether the function releases some resources allocated by another
361 * function call. The first reference type argument will be assumed to be
362 * released by release_reference().
363 */
364static bool is_release_function(enum bpf_func_id func_id)
365{
6acc9b43 366 return func_id == BPF_FUNC_sk_release;
840b9615
JS
367}
368
46f8bc92
MKL
369static bool is_acquire_function(enum bpf_func_id func_id)
370{
371 return func_id == BPF_FUNC_sk_lookup_tcp ||
372 func_id == BPF_FUNC_sk_lookup_udp;
373}
374
1b986589
MKL
375static bool is_ptr_cast_function(enum bpf_func_id func_id)
376{
377 return func_id == BPF_FUNC_tcp_sock ||
378 func_id == BPF_FUNC_sk_fullsock;
379}
380
17a52670
AS
381/* string representation of 'enum bpf_reg_type' */
382static const char * const reg_type_str[] = {
383 [NOT_INIT] = "?",
f1174f77 384 [SCALAR_VALUE] = "inv",
17a52670
AS
385 [PTR_TO_CTX] = "ctx",
386 [CONST_PTR_TO_MAP] = "map_ptr",
387 [PTR_TO_MAP_VALUE] = "map_value",
388 [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
17a52670 389 [PTR_TO_STACK] = "fp",
969bf05e 390 [PTR_TO_PACKET] = "pkt",
de8f3a83 391 [PTR_TO_PACKET_META] = "pkt_meta",
969bf05e 392 [PTR_TO_PACKET_END] = "pkt_end",
d58e468b 393 [PTR_TO_FLOW_KEYS] = "flow_keys",
c64b7983
JS
394 [PTR_TO_SOCKET] = "sock",
395 [PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
46f8bc92
MKL
396 [PTR_TO_SOCK_COMMON] = "sock_common",
397 [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
655a51e5
MKL
398 [PTR_TO_TCP_SOCK] = "tcp_sock",
399 [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
17a52670
AS
400};
401
8efea21d
EC
402static char slot_type_char[] = {
403 [STACK_INVALID] = '?',
404 [STACK_SPILL] = 'r',
405 [STACK_MISC] = 'm',
406 [STACK_ZERO] = '0',
407};
408
4e92024a
AS
409static void print_liveness(struct bpf_verifier_env *env,
410 enum bpf_reg_liveness live)
411{
9242b5f5 412 if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
4e92024a
AS
413 verbose(env, "_");
414 if (live & REG_LIVE_READ)
415 verbose(env, "r");
416 if (live & REG_LIVE_WRITTEN)
417 verbose(env, "w");
9242b5f5
AS
418 if (live & REG_LIVE_DONE)
419 verbose(env, "D");
4e92024a
AS
420}
421
f4d7e40a
AS
422static struct bpf_func_state *func(struct bpf_verifier_env *env,
423 const struct bpf_reg_state *reg)
424{
425 struct bpf_verifier_state *cur = env->cur_state;
426
427 return cur->frame[reg->frameno];
428}
429
61bd5218 430static void print_verifier_state(struct bpf_verifier_env *env,
f4d7e40a 431 const struct bpf_func_state *state)
17a52670 432{
f4d7e40a 433 const struct bpf_reg_state *reg;
17a52670
AS
434 enum bpf_reg_type t;
435 int i;
436
f4d7e40a
AS
437 if (state->frameno)
438 verbose(env, " frame%d:", state->frameno);
17a52670 439 for (i = 0; i < MAX_BPF_REG; i++) {
1a0dc1ac
AS
440 reg = &state->regs[i];
441 t = reg->type;
17a52670
AS
442 if (t == NOT_INIT)
443 continue;
4e92024a
AS
444 verbose(env, " R%d", i);
445 print_liveness(env, reg->live);
446 verbose(env, "=%s", reg_type_str[t]);
f1174f77
EC
447 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
448 tnum_is_const(reg->var_off)) {
449 /* reg->off should be 0 for SCALAR_VALUE */
61bd5218 450 verbose(env, "%lld", reg->var_off.value + reg->off);
f4d7e40a
AS
451 if (t == PTR_TO_STACK)
452 verbose(env, ",call_%d", func(env, reg)->callsite);
f1174f77 453 } else {
1b986589
MKL
454 verbose(env, "(id=%d ref_obj_id=%d", reg->id,
455 reg->ref_obj_id);
f1174f77 456 if (t != SCALAR_VALUE)
61bd5218 457 verbose(env, ",off=%d", reg->off);
de8f3a83 458 if (type_is_pkt_pointer(t))
61bd5218 459 verbose(env, ",r=%d", reg->range);
f1174f77
EC
460 else if (t == CONST_PTR_TO_MAP ||
461 t == PTR_TO_MAP_VALUE ||
462 t == PTR_TO_MAP_VALUE_OR_NULL)
61bd5218 463 verbose(env, ",ks=%d,vs=%d",
f1174f77
EC
464 reg->map_ptr->key_size,
465 reg->map_ptr->value_size);
7d1238f2
EC
466 if (tnum_is_const(reg->var_off)) {
467 /* Typically an immediate SCALAR_VALUE, but
468 * could be a pointer whose offset is too big
469 * for reg->off
470 */
61bd5218 471 verbose(env, ",imm=%llx", reg->var_off.value);
7d1238f2
EC
472 } else {
473 if (reg->smin_value != reg->umin_value &&
474 reg->smin_value != S64_MIN)
61bd5218 475 verbose(env, ",smin_value=%lld",
7d1238f2
EC
476 (long long)reg->smin_value);
477 if (reg->smax_value != reg->umax_value &&
478 reg->smax_value != S64_MAX)
61bd5218 479 verbose(env, ",smax_value=%lld",
7d1238f2
EC
480 (long long)reg->smax_value);
481 if (reg->umin_value != 0)
61bd5218 482 verbose(env, ",umin_value=%llu",
7d1238f2
EC
483 (unsigned long long)reg->umin_value);
484 if (reg->umax_value != U64_MAX)
61bd5218 485 verbose(env, ",umax_value=%llu",
7d1238f2
EC
486 (unsigned long long)reg->umax_value);
487 if (!tnum_is_unknown(reg->var_off)) {
488 char tn_buf[48];
f1174f77 489
7d1238f2 490 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 491 verbose(env, ",var_off=%s", tn_buf);
7d1238f2 492 }
f1174f77 493 }
61bd5218 494 verbose(env, ")");
f1174f77 495 }
17a52670 496 }
638f5b90 497 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
8efea21d
EC
498 char types_buf[BPF_REG_SIZE + 1];
499 bool valid = false;
500 int j;
501
502 for (j = 0; j < BPF_REG_SIZE; j++) {
503 if (state->stack[i].slot_type[j] != STACK_INVALID)
504 valid = true;
505 types_buf[j] = slot_type_char[
506 state->stack[i].slot_type[j]];
507 }
508 types_buf[BPF_REG_SIZE] = 0;
509 if (!valid)
510 continue;
511 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
512 print_liveness(env, state->stack[i].spilled_ptr.live);
513 if (state->stack[i].slot_type[0] == STACK_SPILL)
4e92024a 514 verbose(env, "=%s",
638f5b90 515 reg_type_str[state->stack[i].spilled_ptr.type]);
8efea21d
EC
516 else
517 verbose(env, "=%s", types_buf);
17a52670 518 }
fd978bf7
JS
519 if (state->acquired_refs && state->refs[0].id) {
520 verbose(env, " refs=%d", state->refs[0].id);
521 for (i = 1; i < state->acquired_refs; i++)
522 if (state->refs[i].id)
523 verbose(env, ",%d", state->refs[i].id);
524 }
61bd5218 525 verbose(env, "\n");
17a52670
AS
526}
527
84dbf350
JS
528#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE) \
529static int copy_##NAME##_state(struct bpf_func_state *dst, \
530 const struct bpf_func_state *src) \
531{ \
532 if (!src->FIELD) \
533 return 0; \
534 if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) { \
535 /* internal bug, make state invalid to reject the program */ \
536 memset(dst, 0, sizeof(*dst)); \
537 return -EFAULT; \
538 } \
539 memcpy(dst->FIELD, src->FIELD, \
540 sizeof(*src->FIELD) * (src->COUNT / SIZE)); \
541 return 0; \
638f5b90 542}
fd978bf7
JS
543/* copy_reference_state() */
544COPY_STATE_FN(reference, acquired_refs, refs, 1)
84dbf350
JS
545/* copy_stack_state() */
546COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
547#undef COPY_STATE_FN
548
549#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE) \
550static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
551 bool copy_old) \
552{ \
553 u32 old_size = state->COUNT; \
554 struct bpf_##NAME##_state *new_##FIELD; \
555 int slot = size / SIZE; \
556 \
557 if (size <= old_size || !size) { \
558 if (copy_old) \
559 return 0; \
560 state->COUNT = slot * SIZE; \
561 if (!size && old_size) { \
562 kfree(state->FIELD); \
563 state->FIELD = NULL; \
564 } \
565 return 0; \
566 } \
567 new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
568 GFP_KERNEL); \
569 if (!new_##FIELD) \
570 return -ENOMEM; \
571 if (copy_old) { \
572 if (state->FIELD) \
573 memcpy(new_##FIELD, state->FIELD, \
574 sizeof(*new_##FIELD) * (old_size / SIZE)); \
575 memset(new_##FIELD + old_size / SIZE, 0, \
576 sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
577 } \
578 state->COUNT = slot * SIZE; \
579 kfree(state->FIELD); \
580 state->FIELD = new_##FIELD; \
581 return 0; \
582}
fd978bf7
JS
583/* realloc_reference_state() */
584REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
84dbf350
JS
585/* realloc_stack_state() */
586REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
587#undef REALLOC_STATE_FN
638f5b90
AS
588
589/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
590 * make it consume minimal amount of memory. check_stack_write() access from
f4d7e40a 591 * the program calls into realloc_func_state() to grow the stack size.
84dbf350
JS
592 * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
593 * which realloc_stack_state() copies over. It points to previous
594 * bpf_verifier_state which is never reallocated.
638f5b90 595 */
fd978bf7
JS
596static int realloc_func_state(struct bpf_func_state *state, int stack_size,
597 int refs_size, bool copy_old)
638f5b90 598{
fd978bf7
JS
599 int err = realloc_reference_state(state, refs_size, copy_old);
600 if (err)
601 return err;
602 return realloc_stack_state(state, stack_size, copy_old);
603}
604
605/* Acquire a pointer id from the env and update the state->refs to include
606 * this new pointer reference.
607 * On success, returns a valid pointer id to associate with the register
608 * On failure, returns a negative errno.
638f5b90 609 */
fd978bf7 610static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
638f5b90 611{
fd978bf7
JS
612 struct bpf_func_state *state = cur_func(env);
613 int new_ofs = state->acquired_refs;
614 int id, err;
615
616 err = realloc_reference_state(state, state->acquired_refs + 1, true);
617 if (err)
618 return err;
619 id = ++env->id_gen;
620 state->refs[new_ofs].id = id;
621 state->refs[new_ofs].insn_idx = insn_idx;
638f5b90 622
fd978bf7
JS
623 return id;
624}
625
626/* release function corresponding to acquire_reference_state(). Idempotent. */
46f8bc92 627static int release_reference_state(struct bpf_func_state *state, int ptr_id)
fd978bf7
JS
628{
629 int i, last_idx;
630
fd978bf7
JS
631 last_idx = state->acquired_refs - 1;
632 for (i = 0; i < state->acquired_refs; i++) {
633 if (state->refs[i].id == ptr_id) {
634 if (last_idx && i != last_idx)
635 memcpy(&state->refs[i], &state->refs[last_idx],
636 sizeof(*state->refs));
637 memset(&state->refs[last_idx], 0, sizeof(*state->refs));
638 state->acquired_refs--;
638f5b90 639 return 0;
638f5b90 640 }
638f5b90 641 }
46f8bc92 642 return -EINVAL;
fd978bf7
JS
643}
644
645static int transfer_reference_state(struct bpf_func_state *dst,
646 struct bpf_func_state *src)
647{
648 int err = realloc_reference_state(dst, src->acquired_refs, false);
649 if (err)
650 return err;
651 err = copy_reference_state(dst, src);
652 if (err)
653 return err;
638f5b90
AS
654 return 0;
655}
656
f4d7e40a
AS
657static void free_func_state(struct bpf_func_state *state)
658{
5896351e
AS
659 if (!state)
660 return;
fd978bf7 661 kfree(state->refs);
f4d7e40a
AS
662 kfree(state->stack);
663 kfree(state);
664}
665
1969db47
AS
666static void free_verifier_state(struct bpf_verifier_state *state,
667 bool free_self)
638f5b90 668{
f4d7e40a
AS
669 int i;
670
671 for (i = 0; i <= state->curframe; i++) {
672 free_func_state(state->frame[i]);
673 state->frame[i] = NULL;
674 }
1969db47
AS
675 if (free_self)
676 kfree(state);
638f5b90
AS
677}
678
679/* copy verifier state from src to dst growing dst stack space
680 * when necessary to accommodate larger src stack
681 */
f4d7e40a
AS
682static int copy_func_state(struct bpf_func_state *dst,
683 const struct bpf_func_state *src)
638f5b90
AS
684{
685 int err;
686
fd978bf7
JS
687 err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
688 false);
689 if (err)
690 return err;
691 memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
692 err = copy_reference_state(dst, src);
638f5b90
AS
693 if (err)
694 return err;
638f5b90
AS
695 return copy_stack_state(dst, src);
696}
697
f4d7e40a
AS
698static int copy_verifier_state(struct bpf_verifier_state *dst_state,
699 const struct bpf_verifier_state *src)
700{
701 struct bpf_func_state *dst;
702 int i, err;
703
704 /* if dst has more stack frames then src frame, free them */
705 for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
706 free_func_state(dst_state->frame[i]);
707 dst_state->frame[i] = NULL;
708 }
979d63d5 709 dst_state->speculative = src->speculative;
f4d7e40a 710 dst_state->curframe = src->curframe;
d83525ca 711 dst_state->active_spin_lock = src->active_spin_lock;
f4d7e40a
AS
712 for (i = 0; i <= src->curframe; i++) {
713 dst = dst_state->frame[i];
714 if (!dst) {
715 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
716 if (!dst)
717 return -ENOMEM;
718 dst_state->frame[i] = dst;
719 }
720 err = copy_func_state(dst, src->frame[i]);
721 if (err)
722 return err;
723 }
724 return 0;
725}
726
638f5b90
AS
727static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
728 int *insn_idx)
729{
730 struct bpf_verifier_state *cur = env->cur_state;
731 struct bpf_verifier_stack_elem *elem, *head = env->head;
732 int err;
17a52670
AS
733
734 if (env->head == NULL)
638f5b90 735 return -ENOENT;
17a52670 736
638f5b90
AS
737 if (cur) {
738 err = copy_verifier_state(cur, &head->st);
739 if (err)
740 return err;
741 }
742 if (insn_idx)
743 *insn_idx = head->insn_idx;
17a52670 744 if (prev_insn_idx)
638f5b90
AS
745 *prev_insn_idx = head->prev_insn_idx;
746 elem = head->next;
1969db47 747 free_verifier_state(&head->st, false);
638f5b90 748 kfree(head);
17a52670
AS
749 env->head = elem;
750 env->stack_size--;
638f5b90 751 return 0;
17a52670
AS
752}
753
58e2af8b 754static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
979d63d5
DB
755 int insn_idx, int prev_insn_idx,
756 bool speculative)
17a52670 757{
638f5b90 758 struct bpf_verifier_state *cur = env->cur_state;
58e2af8b 759 struct bpf_verifier_stack_elem *elem;
638f5b90 760 int err;
17a52670 761
638f5b90 762 elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
17a52670
AS
763 if (!elem)
764 goto err;
765
17a52670
AS
766 elem->insn_idx = insn_idx;
767 elem->prev_insn_idx = prev_insn_idx;
768 elem->next = env->head;
769 env->head = elem;
770 env->stack_size++;
1969db47
AS
771 err = copy_verifier_state(&elem->st, cur);
772 if (err)
773 goto err;
979d63d5 774 elem->st.speculative |= speculative;
07016151 775 if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
61bd5218 776 verbose(env, "BPF program is too complex\n");
17a52670
AS
777 goto err;
778 }
779 return &elem->st;
780err:
5896351e
AS
781 free_verifier_state(env->cur_state, true);
782 env->cur_state = NULL;
17a52670 783 /* pop all elements and return */
638f5b90 784 while (!pop_stack(env, NULL, NULL));
17a52670
AS
785 return NULL;
786}
787
788#define CALLER_SAVED_REGS 6
789static const int caller_saved[CALLER_SAVED_REGS] = {
790 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
791};
792
f1174f77
EC
793static void __mark_reg_not_init(struct bpf_reg_state *reg);
794
b03c9f9f
EC
795/* Mark the unknown part of a register (variable offset or scalar value) as
796 * known to have the value @imm.
797 */
798static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
799{
a9c676bc
AS
800 /* Clear id, off, and union(map_ptr, range) */
801 memset(((u8 *)reg) + sizeof(reg->type), 0,
802 offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
b03c9f9f
EC
803 reg->var_off = tnum_const(imm);
804 reg->smin_value = (s64)imm;
805 reg->smax_value = (s64)imm;
806 reg->umin_value = imm;
807 reg->umax_value = imm;
808}
809
f1174f77
EC
810/* Mark the 'variable offset' part of a register as zero. This should be
811 * used only on registers holding a pointer type.
812 */
813static void __mark_reg_known_zero(struct bpf_reg_state *reg)
a9789ef9 814{
b03c9f9f 815 __mark_reg_known(reg, 0);
f1174f77 816}
a9789ef9 817
cc2b14d5
AS
818static void __mark_reg_const_zero(struct bpf_reg_state *reg)
819{
820 __mark_reg_known(reg, 0);
cc2b14d5
AS
821 reg->type = SCALAR_VALUE;
822}
823
61bd5218
JK
824static void mark_reg_known_zero(struct bpf_verifier_env *env,
825 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
826{
827 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 828 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
f1174f77
EC
829 /* Something bad happened, let's kill all regs */
830 for (regno = 0; regno < MAX_BPF_REG; regno++)
831 __mark_reg_not_init(regs + regno);
832 return;
833 }
834 __mark_reg_known_zero(regs + regno);
835}
836
de8f3a83
DB
837static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
838{
839 return type_is_pkt_pointer(reg->type);
840}
841
842static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
843{
844 return reg_is_pkt_pointer(reg) ||
845 reg->type == PTR_TO_PACKET_END;
846}
847
848/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
849static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
850 enum bpf_reg_type which)
851{
852 /* The register can already have a range from prior markings.
853 * This is fine as long as it hasn't been advanced from its
854 * origin.
855 */
856 return reg->type == which &&
857 reg->id == 0 &&
858 reg->off == 0 &&
859 tnum_equals_const(reg->var_off, 0);
860}
861
b03c9f9f
EC
862/* Attempts to improve min/max values based on var_off information */
863static void __update_reg_bounds(struct bpf_reg_state *reg)
864{
865 /* min signed is max(sign bit) | min(other bits) */
866 reg->smin_value = max_t(s64, reg->smin_value,
867 reg->var_off.value | (reg->var_off.mask & S64_MIN));
868 /* max signed is min(sign bit) | max(other bits) */
869 reg->smax_value = min_t(s64, reg->smax_value,
870 reg->var_off.value | (reg->var_off.mask & S64_MAX));
871 reg->umin_value = max(reg->umin_value, reg->var_off.value);
872 reg->umax_value = min(reg->umax_value,
873 reg->var_off.value | reg->var_off.mask);
874}
875
876/* Uses signed min/max values to inform unsigned, and vice-versa */
877static void __reg_deduce_bounds(struct bpf_reg_state *reg)
878{
879 /* Learn sign from signed bounds.
880 * If we cannot cross the sign boundary, then signed and unsigned bounds
881 * are the same, so combine. This works even in the negative case, e.g.
882 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
883 */
884 if (reg->smin_value >= 0 || reg->smax_value < 0) {
885 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
886 reg->umin_value);
887 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
888 reg->umax_value);
889 return;
890 }
891 /* Learn sign from unsigned bounds. Signed bounds cross the sign
892 * boundary, so we must be careful.
893 */
894 if ((s64)reg->umax_value >= 0) {
895 /* Positive. We can't learn anything from the smin, but smax
896 * is positive, hence safe.
897 */
898 reg->smin_value = reg->umin_value;
899 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
900 reg->umax_value);
901 } else if ((s64)reg->umin_value < 0) {
902 /* Negative. We can't learn anything from the smax, but smin
903 * is negative, hence safe.
904 */
905 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
906 reg->umin_value);
907 reg->smax_value = reg->umax_value;
908 }
909}
910
911/* Attempts to improve var_off based on unsigned min/max information */
912static void __reg_bound_offset(struct bpf_reg_state *reg)
913{
914 reg->var_off = tnum_intersect(reg->var_off,
915 tnum_range(reg->umin_value,
916 reg->umax_value));
917}
918
919/* Reset the min/max bounds of a register */
920static void __mark_reg_unbounded(struct bpf_reg_state *reg)
921{
922 reg->smin_value = S64_MIN;
923 reg->smax_value = S64_MAX;
924 reg->umin_value = 0;
925 reg->umax_value = U64_MAX;
926}
927
f1174f77
EC
928/* Mark a register as having a completely unknown (scalar) value. */
929static void __mark_reg_unknown(struct bpf_reg_state *reg)
930{
a9c676bc
AS
931 /*
932 * Clear type, id, off, and union(map_ptr, range) and
933 * padding between 'type' and union
934 */
935 memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
f1174f77 936 reg->type = SCALAR_VALUE;
f1174f77 937 reg->var_off = tnum_unknown;
f4d7e40a 938 reg->frameno = 0;
b03c9f9f 939 __mark_reg_unbounded(reg);
f1174f77
EC
940}
941
61bd5218
JK
942static void mark_reg_unknown(struct bpf_verifier_env *env,
943 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
944{
945 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 946 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
19ceb417
AS
947 /* Something bad happened, let's kill all regs except FP */
948 for (regno = 0; regno < BPF_REG_FP; regno++)
f1174f77
EC
949 __mark_reg_not_init(regs + regno);
950 return;
951 }
952 __mark_reg_unknown(regs + regno);
953}
954
955static void __mark_reg_not_init(struct bpf_reg_state *reg)
956{
957 __mark_reg_unknown(reg);
958 reg->type = NOT_INIT;
959}
960
61bd5218
JK
961static void mark_reg_not_init(struct bpf_verifier_env *env,
962 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
963{
964 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 965 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
19ceb417
AS
966 /* Something bad happened, let's kill all regs except FP */
967 for (regno = 0; regno < BPF_REG_FP; regno++)
f1174f77
EC
968 __mark_reg_not_init(regs + regno);
969 return;
970 }
971 __mark_reg_not_init(regs + regno);
a9789ef9
DB
972}
973
61bd5218 974static void init_reg_state(struct bpf_verifier_env *env,
f4d7e40a 975 struct bpf_func_state *state)
17a52670 976{
f4d7e40a 977 struct bpf_reg_state *regs = state->regs;
17a52670
AS
978 int i;
979
dc503a8a 980 for (i = 0; i < MAX_BPF_REG; i++) {
61bd5218 981 mark_reg_not_init(env, regs, i);
dc503a8a 982 regs[i].live = REG_LIVE_NONE;
679c782d 983 regs[i].parent = NULL;
dc503a8a 984 }
17a52670
AS
985
986 /* frame pointer */
f1174f77 987 regs[BPF_REG_FP].type = PTR_TO_STACK;
61bd5218 988 mark_reg_known_zero(env, regs, BPF_REG_FP);
f4d7e40a 989 regs[BPF_REG_FP].frameno = state->frameno;
17a52670
AS
990
991 /* 1st arg to a function */
992 regs[BPF_REG_1].type = PTR_TO_CTX;
61bd5218 993 mark_reg_known_zero(env, regs, BPF_REG_1);
6760bf2d
DB
994}
995
f4d7e40a
AS
996#define BPF_MAIN_FUNC (-1)
997static void init_func_state(struct bpf_verifier_env *env,
998 struct bpf_func_state *state,
999 int callsite, int frameno, int subprogno)
1000{
1001 state->callsite = callsite;
1002 state->frameno = frameno;
1003 state->subprogno = subprogno;
1004 init_reg_state(env, state);
1005}
1006
17a52670
AS
1007enum reg_arg_type {
1008 SRC_OP, /* register is used as source operand */
1009 DST_OP, /* register is used as destination operand */
1010 DST_OP_NO_MARK /* same as above, check only, don't mark */
1011};
1012
cc8b0b92
AS
1013static int cmp_subprogs(const void *a, const void *b)
1014{
9c8105bd
JW
1015 return ((struct bpf_subprog_info *)a)->start -
1016 ((struct bpf_subprog_info *)b)->start;
cc8b0b92
AS
1017}
1018
1019static int find_subprog(struct bpf_verifier_env *env, int off)
1020{
9c8105bd 1021 struct bpf_subprog_info *p;
cc8b0b92 1022
9c8105bd
JW
1023 p = bsearch(&off, env->subprog_info, env->subprog_cnt,
1024 sizeof(env->subprog_info[0]), cmp_subprogs);
cc8b0b92
AS
1025 if (!p)
1026 return -ENOENT;
9c8105bd 1027 return p - env->subprog_info;
cc8b0b92
AS
1028
1029}
1030
1031static int add_subprog(struct bpf_verifier_env *env, int off)
1032{
1033 int insn_cnt = env->prog->len;
1034 int ret;
1035
1036 if (off >= insn_cnt || off < 0) {
1037 verbose(env, "call to invalid destination\n");
1038 return -EINVAL;
1039 }
1040 ret = find_subprog(env, off);
1041 if (ret >= 0)
1042 return 0;
4cb3d99c 1043 if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
cc8b0b92
AS
1044 verbose(env, "too many subprograms\n");
1045 return -E2BIG;
1046 }
9c8105bd
JW
1047 env->subprog_info[env->subprog_cnt++].start = off;
1048 sort(env->subprog_info, env->subprog_cnt,
1049 sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
cc8b0b92
AS
1050 return 0;
1051}
1052
1053static int check_subprogs(struct bpf_verifier_env *env)
1054{
1055 int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
9c8105bd 1056 struct bpf_subprog_info *subprog = env->subprog_info;
cc8b0b92
AS
1057 struct bpf_insn *insn = env->prog->insnsi;
1058 int insn_cnt = env->prog->len;
1059
f910cefa
JW
1060 /* Add entry function. */
1061 ret = add_subprog(env, 0);
1062 if (ret < 0)
1063 return ret;
1064
cc8b0b92
AS
1065 /* determine subprog starts. The end is one before the next starts */
1066 for (i = 0; i < insn_cnt; i++) {
1067 if (insn[i].code != (BPF_JMP | BPF_CALL))
1068 continue;
1069 if (insn[i].src_reg != BPF_PSEUDO_CALL)
1070 continue;
1071 if (!env->allow_ptr_leaks) {
1072 verbose(env, "function calls to other bpf functions are allowed for root only\n");
1073 return -EPERM;
1074 }
cc8b0b92
AS
1075 ret = add_subprog(env, i + insn[i].imm + 1);
1076 if (ret < 0)
1077 return ret;
1078 }
1079
4cb3d99c
JW
1080 /* Add a fake 'exit' subprog which could simplify subprog iteration
1081 * logic. 'subprog_cnt' should not be increased.
1082 */
1083 subprog[env->subprog_cnt].start = insn_cnt;
1084
cc8b0b92
AS
1085 if (env->log.level > 1)
1086 for (i = 0; i < env->subprog_cnt; i++)
9c8105bd 1087 verbose(env, "func#%d @%d\n", i, subprog[i].start);
cc8b0b92
AS
1088
1089 /* now check that all jumps are within the same subprog */
4cb3d99c
JW
1090 subprog_start = subprog[cur_subprog].start;
1091 subprog_end = subprog[cur_subprog + 1].start;
cc8b0b92
AS
1092 for (i = 0; i < insn_cnt; i++) {
1093 u8 code = insn[i].code;
1094
092ed096 1095 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
cc8b0b92
AS
1096 goto next;
1097 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
1098 goto next;
1099 off = i + insn[i].off + 1;
1100 if (off < subprog_start || off >= subprog_end) {
1101 verbose(env, "jump out of range from insn %d to %d\n", i, off);
1102 return -EINVAL;
1103 }
1104next:
1105 if (i == subprog_end - 1) {
1106 /* to avoid fall-through from one subprog into another
1107 * the last insn of the subprog should be either exit
1108 * or unconditional jump back
1109 */
1110 if (code != (BPF_JMP | BPF_EXIT) &&
1111 code != (BPF_JMP | BPF_JA)) {
1112 verbose(env, "last insn is not an exit or jmp\n");
1113 return -EINVAL;
1114 }
1115 subprog_start = subprog_end;
4cb3d99c
JW
1116 cur_subprog++;
1117 if (cur_subprog < env->subprog_cnt)
9c8105bd 1118 subprog_end = subprog[cur_subprog + 1].start;
cc8b0b92
AS
1119 }
1120 }
1121 return 0;
1122}
1123
679c782d
EC
1124/* Parentage chain of this register (or stack slot) should take care of all
1125 * issues like callee-saved registers, stack slot allocation time, etc.
1126 */
f4d7e40a 1127static int mark_reg_read(struct bpf_verifier_env *env,
679c782d
EC
1128 const struct bpf_reg_state *state,
1129 struct bpf_reg_state *parent)
f4d7e40a
AS
1130{
1131 bool writes = parent == state->parent; /* Observe write marks */
dc503a8a
EC
1132
1133 while (parent) {
1134 /* if read wasn't screened by an earlier write ... */
679c782d 1135 if (writes && state->live & REG_LIVE_WRITTEN)
dc503a8a 1136 break;
9242b5f5
AS
1137 if (parent->live & REG_LIVE_DONE) {
1138 verbose(env, "verifier BUG type %s var_off %lld off %d\n",
1139 reg_type_str[parent->type],
1140 parent->var_off.value, parent->off);
1141 return -EFAULT;
1142 }
dc503a8a 1143 /* ... then we depend on parent's value */
679c782d 1144 parent->live |= REG_LIVE_READ;
dc503a8a
EC
1145 state = parent;
1146 parent = state->parent;
f4d7e40a 1147 writes = true;
dc503a8a 1148 }
f4d7e40a 1149 return 0;
dc503a8a
EC
1150}
1151
1152static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
17a52670
AS
1153 enum reg_arg_type t)
1154{
f4d7e40a
AS
1155 struct bpf_verifier_state *vstate = env->cur_state;
1156 struct bpf_func_state *state = vstate->frame[vstate->curframe];
1157 struct bpf_reg_state *regs = state->regs;
dc503a8a 1158
17a52670 1159 if (regno >= MAX_BPF_REG) {
61bd5218 1160 verbose(env, "R%d is invalid\n", regno);
17a52670
AS
1161 return -EINVAL;
1162 }
1163
1164 if (t == SRC_OP) {
1165 /* check whether register used as source operand can be read */
1166 if (regs[regno].type == NOT_INIT) {
61bd5218 1167 verbose(env, "R%d !read_ok\n", regno);
17a52670
AS
1168 return -EACCES;
1169 }
679c782d
EC
1170 /* We don't need to worry about FP liveness because it's read-only */
1171 if (regno != BPF_REG_FP)
1172 return mark_reg_read(env, &regs[regno],
1173 regs[regno].parent);
17a52670
AS
1174 } else {
1175 /* check whether register used as dest operand can be written to */
1176 if (regno == BPF_REG_FP) {
61bd5218 1177 verbose(env, "frame pointer is read only\n");
17a52670
AS
1178 return -EACCES;
1179 }
dc503a8a 1180 regs[regno].live |= REG_LIVE_WRITTEN;
17a52670 1181 if (t == DST_OP)
61bd5218 1182 mark_reg_unknown(env, regs, regno);
17a52670
AS
1183 }
1184 return 0;
1185}
1186
1be7f75d
AS
1187static bool is_spillable_regtype(enum bpf_reg_type type)
1188{
1189 switch (type) {
1190 case PTR_TO_MAP_VALUE:
1191 case PTR_TO_MAP_VALUE_OR_NULL:
1192 case PTR_TO_STACK:
1193 case PTR_TO_CTX:
969bf05e 1194 case PTR_TO_PACKET:
de8f3a83 1195 case PTR_TO_PACKET_META:
969bf05e 1196 case PTR_TO_PACKET_END:
d58e468b 1197 case PTR_TO_FLOW_KEYS:
1be7f75d 1198 case CONST_PTR_TO_MAP:
c64b7983
JS
1199 case PTR_TO_SOCKET:
1200 case PTR_TO_SOCKET_OR_NULL:
46f8bc92
MKL
1201 case PTR_TO_SOCK_COMMON:
1202 case PTR_TO_SOCK_COMMON_OR_NULL:
655a51e5
MKL
1203 case PTR_TO_TCP_SOCK:
1204 case PTR_TO_TCP_SOCK_OR_NULL:
1be7f75d
AS
1205 return true;
1206 default:
1207 return false;
1208 }
1209}
1210
cc2b14d5
AS
1211/* Does this register contain a constant zero? */
1212static bool register_is_null(struct bpf_reg_state *reg)
1213{
1214 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
1215}
1216
17a52670
AS
1217/* check_stack_read/write functions track spill/fill of registers,
1218 * stack boundary and alignment are checked in check_mem_access()
1219 */
61bd5218 1220static int check_stack_write(struct bpf_verifier_env *env,
f4d7e40a 1221 struct bpf_func_state *state, /* func where register points to */
af86ca4e 1222 int off, int size, int value_regno, int insn_idx)
17a52670 1223{
f4d7e40a 1224 struct bpf_func_state *cur; /* state of the current function */
638f5b90 1225 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
f4d7e40a 1226 enum bpf_reg_type type;
638f5b90 1227
f4d7e40a 1228 err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
fd978bf7 1229 state->acquired_refs, true);
638f5b90
AS
1230 if (err)
1231 return err;
9c399760
AS
1232 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
1233 * so it's aligned access and [off, off + size) are within stack limits
1234 */
638f5b90
AS
1235 if (!env->allow_ptr_leaks &&
1236 state->stack[spi].slot_type[0] == STACK_SPILL &&
1237 size != BPF_REG_SIZE) {
1238 verbose(env, "attempt to corrupt spilled pointer on stack\n");
1239 return -EACCES;
1240 }
17a52670 1241
f4d7e40a 1242 cur = env->cur_state->frame[env->cur_state->curframe];
17a52670 1243 if (value_regno >= 0 &&
f4d7e40a 1244 is_spillable_regtype((type = cur->regs[value_regno].type))) {
17a52670
AS
1245
1246 /* register containing pointer is being spilled into stack */
9c399760 1247 if (size != BPF_REG_SIZE) {
61bd5218 1248 verbose(env, "invalid size of register spill\n");
17a52670
AS
1249 return -EACCES;
1250 }
1251
f4d7e40a
AS
1252 if (state != cur && type == PTR_TO_STACK) {
1253 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
1254 return -EINVAL;
1255 }
1256
17a52670 1257 /* save register state */
f4d7e40a 1258 state->stack[spi].spilled_ptr = cur->regs[value_regno];
638f5b90 1259 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
17a52670 1260
af86ca4e
AS
1261 for (i = 0; i < BPF_REG_SIZE; i++) {
1262 if (state->stack[spi].slot_type[i] == STACK_MISC &&
1263 !env->allow_ptr_leaks) {
1264 int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
1265 int soff = (-spi - 1) * BPF_REG_SIZE;
1266
1267 /* detected reuse of integer stack slot with a pointer
1268 * which means either llvm is reusing stack slot or
1269 * an attacker is trying to exploit CVE-2018-3639
1270 * (speculative store bypass)
1271 * Have to sanitize that slot with preemptive
1272 * store of zero.
1273 */
1274 if (*poff && *poff != soff) {
1275 /* disallow programs where single insn stores
1276 * into two different stack slots, since verifier
1277 * cannot sanitize them
1278 */
1279 verbose(env,
1280 "insn %d cannot access two stack slots fp%d and fp%d",
1281 insn_idx, *poff, soff);
1282 return -EINVAL;
1283 }
1284 *poff = soff;
1285 }
638f5b90 1286 state->stack[spi].slot_type[i] = STACK_SPILL;
af86ca4e 1287 }
9c399760 1288 } else {
cc2b14d5
AS
1289 u8 type = STACK_MISC;
1290
679c782d
EC
1291 /* regular write of data into stack destroys any spilled ptr */
1292 state->stack[spi].spilled_ptr.type = NOT_INIT;
0bae2d4d
JW
1293 /* Mark slots as STACK_MISC if they belonged to spilled ptr. */
1294 if (state->stack[spi].slot_type[0] == STACK_SPILL)
1295 for (i = 0; i < BPF_REG_SIZE; i++)
1296 state->stack[spi].slot_type[i] = STACK_MISC;
9c399760 1297
cc2b14d5
AS
1298 /* only mark the slot as written if all 8 bytes were written
1299 * otherwise read propagation may incorrectly stop too soon
1300 * when stack slots are partially written.
1301 * This heuristic means that read propagation will be
1302 * conservative, since it will add reg_live_read marks
1303 * to stack slots all the way to first state when programs
1304 * writes+reads less than 8 bytes
1305 */
1306 if (size == BPF_REG_SIZE)
1307 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1308
1309 /* when we zero initialize stack slots mark them as such */
1310 if (value_regno >= 0 &&
1311 register_is_null(&cur->regs[value_regno]))
1312 type = STACK_ZERO;
1313
0bae2d4d 1314 /* Mark slots affected by this stack write. */
9c399760 1315 for (i = 0; i < size; i++)
638f5b90 1316 state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
cc2b14d5 1317 type;
17a52670
AS
1318 }
1319 return 0;
1320}
1321
61bd5218 1322static int check_stack_read(struct bpf_verifier_env *env,
f4d7e40a
AS
1323 struct bpf_func_state *reg_state /* func where register points to */,
1324 int off, int size, int value_regno)
17a52670 1325{
f4d7e40a
AS
1326 struct bpf_verifier_state *vstate = env->cur_state;
1327 struct bpf_func_state *state = vstate->frame[vstate->curframe];
638f5b90
AS
1328 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
1329 u8 *stype;
17a52670 1330
f4d7e40a 1331 if (reg_state->allocated_stack <= slot) {
638f5b90
AS
1332 verbose(env, "invalid read from stack off %d+0 size %d\n",
1333 off, size);
1334 return -EACCES;
1335 }
f4d7e40a 1336 stype = reg_state->stack[spi].slot_type;
17a52670 1337
638f5b90 1338 if (stype[0] == STACK_SPILL) {
9c399760 1339 if (size != BPF_REG_SIZE) {
61bd5218 1340 verbose(env, "invalid size of register spill\n");
17a52670
AS
1341 return -EACCES;
1342 }
9c399760 1343 for (i = 1; i < BPF_REG_SIZE; i++) {
638f5b90 1344 if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
61bd5218 1345 verbose(env, "corrupted spill memory\n");
17a52670
AS
1346 return -EACCES;
1347 }
1348 }
1349
dc503a8a 1350 if (value_regno >= 0) {
17a52670 1351 /* restore register state from stack */
f4d7e40a 1352 state->regs[value_regno] = reg_state->stack[spi].spilled_ptr;
2f18f62e
AS
1353 /* mark reg as written since spilled pointer state likely
1354 * has its liveness marks cleared by is_state_visited()
1355 * which resets stack/reg liveness for state transitions
1356 */
1357 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
dc503a8a 1358 }
679c782d
EC
1359 mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
1360 reg_state->stack[spi].spilled_ptr.parent);
17a52670
AS
1361 return 0;
1362 } else {
cc2b14d5
AS
1363 int zeros = 0;
1364
17a52670 1365 for (i = 0; i < size; i++) {
cc2b14d5
AS
1366 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
1367 continue;
1368 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
1369 zeros++;
1370 continue;
17a52670 1371 }
cc2b14d5
AS
1372 verbose(env, "invalid read from stack off %d+%d size %d\n",
1373 off, i, size);
1374 return -EACCES;
1375 }
679c782d
EC
1376 mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
1377 reg_state->stack[spi].spilled_ptr.parent);
cc2b14d5
AS
1378 if (value_regno >= 0) {
1379 if (zeros == size) {
1380 /* any size read into register is zero extended,
1381 * so the whole register == const_zero
1382 */
1383 __mark_reg_const_zero(&state->regs[value_regno]);
1384 } else {
1385 /* have read misc data from the stack */
1386 mark_reg_unknown(env, state->regs, value_regno);
1387 }
1388 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
17a52670 1389 }
17a52670
AS
1390 return 0;
1391 }
1392}
1393
e4298d25
DB
1394static int check_stack_access(struct bpf_verifier_env *env,
1395 const struct bpf_reg_state *reg,
1396 int off, int size)
1397{
1398 /* Stack accesses must be at a fixed offset, so that we
1399 * can determine what type of data were returned. See
1400 * check_stack_read().
1401 */
1402 if (!tnum_is_const(reg->var_off)) {
1403 char tn_buf[48];
1404
1405 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1406 verbose(env, "variable stack access var_off=%s off=%d size=%d",
1407 tn_buf, off, size);
1408 return -EACCES;
1409 }
1410
1411 if (off >= 0 || off < -MAX_BPF_STACK) {
1412 verbose(env, "invalid stack off=%d size=%d\n", off, size);
1413 return -EACCES;
1414 }
1415
1416 return 0;
1417}
1418
17a52670 1419/* check read/write into map element returned by bpf_map_lookup_elem() */
f1174f77 1420static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
9fd29c08 1421 int size, bool zero_size_allowed)
17a52670 1422{
638f5b90
AS
1423 struct bpf_reg_state *regs = cur_regs(env);
1424 struct bpf_map *map = regs[regno].map_ptr;
17a52670 1425
9fd29c08
YS
1426 if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1427 off + size > map->value_size) {
61bd5218 1428 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
17a52670
AS
1429 map->value_size, off, size);
1430 return -EACCES;
1431 }
1432 return 0;
1433}
1434
f1174f77
EC
1435/* check read/write into a map element with possible variable offset */
1436static int check_map_access(struct bpf_verifier_env *env, u32 regno,
9fd29c08 1437 int off, int size, bool zero_size_allowed)
dbcfe5f7 1438{
f4d7e40a
AS
1439 struct bpf_verifier_state *vstate = env->cur_state;
1440 struct bpf_func_state *state = vstate->frame[vstate->curframe];
dbcfe5f7
GB
1441 struct bpf_reg_state *reg = &state->regs[regno];
1442 int err;
1443
f1174f77
EC
1444 /* We may have adjusted the register to this map value, so we
1445 * need to try adding each of min_value and max_value to off
1446 * to make sure our theoretical access will be safe.
dbcfe5f7 1447 */
61bd5218
JK
1448 if (env->log.level)
1449 print_verifier_state(env, state);
b7137c4e 1450
dbcfe5f7
GB
1451 /* The minimum value is only important with signed
1452 * comparisons where we can't assume the floor of a
1453 * value is 0. If we are using signed variables for our
1454 * index'es we need to make sure that whatever we use
1455 * will have a set floor within our range.
1456 */
b7137c4e
DB
1457 if (reg->smin_value < 0 &&
1458 (reg->smin_value == S64_MIN ||
1459 (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
1460 reg->smin_value + off < 0)) {
61bd5218 1461 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
dbcfe5f7
GB
1462 regno);
1463 return -EACCES;
1464 }
9fd29c08
YS
1465 err = __check_map_access(env, regno, reg->smin_value + off, size,
1466 zero_size_allowed);
dbcfe5f7 1467 if (err) {
61bd5218
JK
1468 verbose(env, "R%d min value is outside of the array range\n",
1469 regno);
dbcfe5f7
GB
1470 return err;
1471 }
1472
b03c9f9f
EC
1473 /* If we haven't set a max value then we need to bail since we can't be
1474 * sure we won't do bad things.
1475 * If reg->umax_value + off could overflow, treat that as unbounded too.
dbcfe5f7 1476 */
b03c9f9f 1477 if (reg->umax_value >= BPF_MAX_VAR_OFF) {
61bd5218 1478 verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
dbcfe5f7
GB
1479 regno);
1480 return -EACCES;
1481 }
9fd29c08
YS
1482 err = __check_map_access(env, regno, reg->umax_value + off, size,
1483 zero_size_allowed);
f1174f77 1484 if (err)
61bd5218
JK
1485 verbose(env, "R%d max value is outside of the array range\n",
1486 regno);
d83525ca
AS
1487
1488 if (map_value_has_spin_lock(reg->map_ptr)) {
1489 u32 lock = reg->map_ptr->spin_lock_off;
1490
1491 /* if any part of struct bpf_spin_lock can be touched by
1492 * load/store reject this program.
1493 * To check that [x1, x2) overlaps with [y1, y2)
1494 * it is sufficient to check x1 < y2 && y1 < x2.
1495 */
1496 if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
1497 lock < reg->umax_value + off + size) {
1498 verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
1499 return -EACCES;
1500 }
1501 }
f1174f77 1502 return err;
dbcfe5f7
GB
1503}
1504
969bf05e
AS
1505#define MAX_PACKET_OFF 0xffff
1506
58e2af8b 1507static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
3a0af8fd
TG
1508 const struct bpf_call_arg_meta *meta,
1509 enum bpf_access_type t)
4acf6c0b 1510{
36bbef52 1511 switch (env->prog->type) {
5d66fa7d 1512 /* Program types only with direct read access go here! */
3a0af8fd
TG
1513 case BPF_PROG_TYPE_LWT_IN:
1514 case BPF_PROG_TYPE_LWT_OUT:
004d4b27 1515 case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2dbb9b9e 1516 case BPF_PROG_TYPE_SK_REUSEPORT:
5d66fa7d 1517 case BPF_PROG_TYPE_FLOW_DISSECTOR:
d5563d36 1518 case BPF_PROG_TYPE_CGROUP_SKB:
3a0af8fd
TG
1519 if (t == BPF_WRITE)
1520 return false;
7e57fbb2 1521 /* fallthrough */
5d66fa7d
DB
1522
1523 /* Program types with direct read + write access go here! */
36bbef52
DB
1524 case BPF_PROG_TYPE_SCHED_CLS:
1525 case BPF_PROG_TYPE_SCHED_ACT:
4acf6c0b 1526 case BPF_PROG_TYPE_XDP:
3a0af8fd 1527 case BPF_PROG_TYPE_LWT_XMIT:
8a31db56 1528 case BPF_PROG_TYPE_SK_SKB:
4f738adb 1529 case BPF_PROG_TYPE_SK_MSG:
36bbef52
DB
1530 if (meta)
1531 return meta->pkt_access;
1532
1533 env->seen_direct_write = true;
4acf6c0b
BB
1534 return true;
1535 default:
1536 return false;
1537 }
1538}
1539
f1174f77 1540static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
9fd29c08 1541 int off, int size, bool zero_size_allowed)
969bf05e 1542{
638f5b90 1543 struct bpf_reg_state *regs = cur_regs(env);
58e2af8b 1544 struct bpf_reg_state *reg = &regs[regno];
969bf05e 1545
9fd29c08
YS
1546 if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1547 (u64)off + size > reg->range) {
61bd5218 1548 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
d91b28ed 1549 off, size, regno, reg->id, reg->off, reg->range);
969bf05e
AS
1550 return -EACCES;
1551 }
1552 return 0;
1553}
1554
f1174f77 1555static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
9fd29c08 1556 int size, bool zero_size_allowed)
f1174f77 1557{
638f5b90 1558 struct bpf_reg_state *regs = cur_regs(env);
f1174f77
EC
1559 struct bpf_reg_state *reg = &regs[regno];
1560 int err;
1561
1562 /* We may have added a variable offset to the packet pointer; but any
1563 * reg->range we have comes after that. We are only checking the fixed
1564 * offset.
1565 */
1566
1567 /* We don't allow negative numbers, because we aren't tracking enough
1568 * detail to prove they're safe.
1569 */
b03c9f9f 1570 if (reg->smin_value < 0) {
61bd5218 1571 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
f1174f77
EC
1572 regno);
1573 return -EACCES;
1574 }
9fd29c08 1575 err = __check_packet_access(env, regno, off, size, zero_size_allowed);
f1174f77 1576 if (err) {
61bd5218 1577 verbose(env, "R%d offset is outside of the packet\n", regno);
f1174f77
EC
1578 return err;
1579 }
e647815a
JW
1580
1581 /* __check_packet_access has made sure "off + size - 1" is within u16.
1582 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
1583 * otherwise find_good_pkt_pointers would have refused to set range info
1584 * that __check_packet_access would have rejected this pkt access.
1585 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
1586 */
1587 env->prog->aux->max_pkt_offset =
1588 max_t(u32, env->prog->aux->max_pkt_offset,
1589 off + reg->umax_value + size - 1);
1590
f1174f77
EC
1591 return err;
1592}
1593
1594/* check access to 'struct bpf_context' fields. Supports fixed offsets only */
31fd8581 1595static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
19de99f7 1596 enum bpf_access_type t, enum bpf_reg_type *reg_type)
17a52670 1597{
f96da094
DB
1598 struct bpf_insn_access_aux info = {
1599 .reg_type = *reg_type,
1600 };
31fd8581 1601
4f9218aa 1602 if (env->ops->is_valid_access &&
5e43f899 1603 env->ops->is_valid_access(off, size, t, env->prog, &info)) {
f96da094
DB
1604 /* A non zero info.ctx_field_size indicates that this field is a
1605 * candidate for later verifier transformation to load the whole
1606 * field and then apply a mask when accessed with a narrower
1607 * access than actual ctx access size. A zero info.ctx_field_size
1608 * will only allow for whole field access and rejects any other
1609 * type of narrower access.
31fd8581 1610 */
23994631 1611 *reg_type = info.reg_type;
31fd8581 1612
4f9218aa 1613 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
32bbe007
AS
1614 /* remember the offset of last byte accessed in ctx */
1615 if (env->prog->aux->max_ctx_offset < off + size)
1616 env->prog->aux->max_ctx_offset = off + size;
17a52670 1617 return 0;
32bbe007 1618 }
17a52670 1619
61bd5218 1620 verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
17a52670
AS
1621 return -EACCES;
1622}
1623
d58e468b
PP
1624static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
1625 int size)
1626{
1627 if (size < 0 || off < 0 ||
1628 (u64)off + size > sizeof(struct bpf_flow_keys)) {
1629 verbose(env, "invalid access to flow keys off=%d size=%d\n",
1630 off, size);
1631 return -EACCES;
1632 }
1633 return 0;
1634}
1635
5f456649
MKL
1636static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
1637 u32 regno, int off, int size,
1638 enum bpf_access_type t)
c64b7983
JS
1639{
1640 struct bpf_reg_state *regs = cur_regs(env);
1641 struct bpf_reg_state *reg = &regs[regno];
5f456649 1642 struct bpf_insn_access_aux info = {};
46f8bc92 1643 bool valid;
c64b7983
JS
1644
1645 if (reg->smin_value < 0) {
1646 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1647 regno);
1648 return -EACCES;
1649 }
1650
46f8bc92
MKL
1651 switch (reg->type) {
1652 case PTR_TO_SOCK_COMMON:
1653 valid = bpf_sock_common_is_valid_access(off, size, t, &info);
1654 break;
1655 case PTR_TO_SOCKET:
1656 valid = bpf_sock_is_valid_access(off, size, t, &info);
1657 break;
655a51e5
MKL
1658 case PTR_TO_TCP_SOCK:
1659 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
1660 break;
46f8bc92
MKL
1661 default:
1662 valid = false;
c64b7983
JS
1663 }
1664
5f456649 1665
46f8bc92
MKL
1666 if (valid) {
1667 env->insn_aux_data[insn_idx].ctx_field_size =
1668 info.ctx_field_size;
1669 return 0;
1670 }
1671
1672 verbose(env, "R%d invalid %s access off=%d size=%d\n",
1673 regno, reg_type_str[reg->type], off, size);
1674
1675 return -EACCES;
c64b7983
JS
1676}
1677
4cabc5b1
DB
1678static bool __is_pointer_value(bool allow_ptr_leaks,
1679 const struct bpf_reg_state *reg)
1be7f75d 1680{
4cabc5b1 1681 if (allow_ptr_leaks)
1be7f75d
AS
1682 return false;
1683
f1174f77 1684 return reg->type != SCALAR_VALUE;
1be7f75d
AS
1685}
1686
2a159c6f
DB
1687static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
1688{
1689 return cur_regs(env) + regno;
1690}
1691
4cabc5b1
DB
1692static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
1693{
2a159c6f 1694 return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
4cabc5b1
DB
1695}
1696
f37a8cb8
DB
1697static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
1698{
2a159c6f 1699 const struct bpf_reg_state *reg = reg_state(env, regno);
f37a8cb8 1700
46f8bc92
MKL
1701 return reg->type == PTR_TO_CTX;
1702}
1703
1704static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
1705{
1706 const struct bpf_reg_state *reg = reg_state(env, regno);
1707
1708 return type_is_sk_pointer(reg->type);
f37a8cb8
DB
1709}
1710
ca369602
DB
1711static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
1712{
2a159c6f 1713 const struct bpf_reg_state *reg = reg_state(env, regno);
ca369602
DB
1714
1715 return type_is_pkt_pointer(reg->type);
1716}
1717
4b5defde
DB
1718static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
1719{
1720 const struct bpf_reg_state *reg = reg_state(env, regno);
1721
1722 /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
1723 return reg->type == PTR_TO_FLOW_KEYS;
1724}
1725
61bd5218
JK
1726static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
1727 const struct bpf_reg_state *reg,
d1174416 1728 int off, int size, bool strict)
969bf05e 1729{
f1174f77 1730 struct tnum reg_off;
e07b98d9 1731 int ip_align;
d1174416
DM
1732
1733 /* Byte size accesses are always allowed. */
1734 if (!strict || size == 1)
1735 return 0;
1736
e4eda884
DM
1737 /* For platforms that do not have a Kconfig enabling
1738 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
1739 * NET_IP_ALIGN is universally set to '2'. And on platforms
1740 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
1741 * to this code only in strict mode where we want to emulate
1742 * the NET_IP_ALIGN==2 checking. Therefore use an
1743 * unconditional IP align value of '2'.
e07b98d9 1744 */
e4eda884 1745 ip_align = 2;
f1174f77
EC
1746
1747 reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
1748 if (!tnum_is_aligned(reg_off, size)) {
1749 char tn_buf[48];
1750
1751 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218
JK
1752 verbose(env,
1753 "misaligned packet access off %d+%s+%d+%d size %d\n",
f1174f77 1754 ip_align, tn_buf, reg->off, off, size);
969bf05e
AS
1755 return -EACCES;
1756 }
79adffcd 1757
969bf05e
AS
1758 return 0;
1759}
1760
61bd5218
JK
1761static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
1762 const struct bpf_reg_state *reg,
f1174f77
EC
1763 const char *pointer_desc,
1764 int off, int size, bool strict)
79adffcd 1765{
f1174f77
EC
1766 struct tnum reg_off;
1767
1768 /* Byte size accesses are always allowed. */
1769 if (!strict || size == 1)
1770 return 0;
1771
1772 reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
1773 if (!tnum_is_aligned(reg_off, size)) {
1774 char tn_buf[48];
1775
1776 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 1777 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
f1174f77 1778 pointer_desc, tn_buf, reg->off, off, size);
79adffcd
DB
1779 return -EACCES;
1780 }
1781
969bf05e
AS
1782 return 0;
1783}
1784
e07b98d9 1785static int check_ptr_alignment(struct bpf_verifier_env *env,
ca369602
DB
1786 const struct bpf_reg_state *reg, int off,
1787 int size, bool strict_alignment_once)
79adffcd 1788{
ca369602 1789 bool strict = env->strict_alignment || strict_alignment_once;
f1174f77 1790 const char *pointer_desc = "";
d1174416 1791
79adffcd
DB
1792 switch (reg->type) {
1793 case PTR_TO_PACKET:
de8f3a83
DB
1794 case PTR_TO_PACKET_META:
1795 /* Special case, because of NET_IP_ALIGN. Given metadata sits
1796 * right in front, treat it the very same way.
1797 */
61bd5218 1798 return check_pkt_ptr_alignment(env, reg, off, size, strict);
d58e468b
PP
1799 case PTR_TO_FLOW_KEYS:
1800 pointer_desc = "flow keys ";
1801 break;
f1174f77
EC
1802 case PTR_TO_MAP_VALUE:
1803 pointer_desc = "value ";
1804 break;
1805 case PTR_TO_CTX:
1806 pointer_desc = "context ";
1807 break;
1808 case PTR_TO_STACK:
1809 pointer_desc = "stack ";
a5ec6ae1
JH
1810 /* The stack spill tracking logic in check_stack_write()
1811 * and check_stack_read() relies on stack accesses being
1812 * aligned.
1813 */
1814 strict = true;
f1174f77 1815 break;
c64b7983
JS
1816 case PTR_TO_SOCKET:
1817 pointer_desc = "sock ";
1818 break;
46f8bc92
MKL
1819 case PTR_TO_SOCK_COMMON:
1820 pointer_desc = "sock_common ";
1821 break;
655a51e5
MKL
1822 case PTR_TO_TCP_SOCK:
1823 pointer_desc = "tcp_sock ";
1824 break;
79adffcd 1825 default:
f1174f77 1826 break;
79adffcd 1827 }
61bd5218
JK
1828 return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
1829 strict);
79adffcd
DB
1830}
1831
f4d7e40a
AS
1832static int update_stack_depth(struct bpf_verifier_env *env,
1833 const struct bpf_func_state *func,
1834 int off)
1835{
9c8105bd 1836 u16 stack = env->subprog_info[func->subprogno].stack_depth;
f4d7e40a
AS
1837
1838 if (stack >= -off)
1839 return 0;
1840
1841 /* update known max for given subprogram */
9c8105bd 1842 env->subprog_info[func->subprogno].stack_depth = -off;
70a87ffe
AS
1843 return 0;
1844}
f4d7e40a 1845
70a87ffe
AS
1846/* starting from main bpf function walk all instructions of the function
1847 * and recursively walk all callees that given function can call.
1848 * Ignore jump and exit insns.
1849 * Since recursion is prevented by check_cfg() this algorithm
1850 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
1851 */
1852static int check_max_stack_depth(struct bpf_verifier_env *env)
1853{
9c8105bd
JW
1854 int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
1855 struct bpf_subprog_info *subprog = env->subprog_info;
70a87ffe 1856 struct bpf_insn *insn = env->prog->insnsi;
70a87ffe
AS
1857 int ret_insn[MAX_CALL_FRAMES];
1858 int ret_prog[MAX_CALL_FRAMES];
f4d7e40a 1859
70a87ffe
AS
1860process_func:
1861 /* round up to 32-bytes, since this is granularity
1862 * of interpreter stack size
1863 */
9c8105bd 1864 depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
70a87ffe 1865 if (depth > MAX_BPF_STACK) {
f4d7e40a 1866 verbose(env, "combined stack size of %d calls is %d. Too large\n",
70a87ffe 1867 frame + 1, depth);
f4d7e40a
AS
1868 return -EACCES;
1869 }
70a87ffe 1870continue_func:
4cb3d99c 1871 subprog_end = subprog[idx + 1].start;
70a87ffe
AS
1872 for (; i < subprog_end; i++) {
1873 if (insn[i].code != (BPF_JMP | BPF_CALL))
1874 continue;
1875 if (insn[i].src_reg != BPF_PSEUDO_CALL)
1876 continue;
1877 /* remember insn and function to return to */
1878 ret_insn[frame] = i + 1;
9c8105bd 1879 ret_prog[frame] = idx;
70a87ffe
AS
1880
1881 /* find the callee */
1882 i = i + insn[i].imm + 1;
9c8105bd
JW
1883 idx = find_subprog(env, i);
1884 if (idx < 0) {
70a87ffe
AS
1885 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1886 i);
1887 return -EFAULT;
1888 }
70a87ffe
AS
1889 frame++;
1890 if (frame >= MAX_CALL_FRAMES) {
1891 WARN_ONCE(1, "verifier bug. Call stack is too deep\n");
1892 return -EFAULT;
1893 }
1894 goto process_func;
1895 }
1896 /* end of for() loop means the last insn of the 'subprog'
1897 * was reached. Doesn't matter whether it was JA or EXIT
1898 */
1899 if (frame == 0)
1900 return 0;
9c8105bd 1901 depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
70a87ffe
AS
1902 frame--;
1903 i = ret_insn[frame];
9c8105bd 1904 idx = ret_prog[frame];
70a87ffe 1905 goto continue_func;
f4d7e40a
AS
1906}
1907
19d28fbd 1908#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
1909static int get_callee_stack_depth(struct bpf_verifier_env *env,
1910 const struct bpf_insn *insn, int idx)
1911{
1912 int start = idx + insn->imm + 1, subprog;
1913
1914 subprog = find_subprog(env, start);
1915 if (subprog < 0) {
1916 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1917 start);
1918 return -EFAULT;
1919 }
9c8105bd 1920 return env->subprog_info[subprog].stack_depth;
1ea47e01 1921}
19d28fbd 1922#endif
1ea47e01 1923
58990d1f
DB
1924static int check_ctx_reg(struct bpf_verifier_env *env,
1925 const struct bpf_reg_state *reg, int regno)
1926{
1927 /* Access to ctx or passing it to a helper is only allowed in
1928 * its original, unmodified form.
1929 */
1930
1931 if (reg->off) {
1932 verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
1933 regno, reg->off);
1934 return -EACCES;
1935 }
1936
1937 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
1938 char tn_buf[48];
1939
1940 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1941 verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
1942 return -EACCES;
1943 }
1944
1945 return 0;
1946}
1947
0c17d1d2
JH
1948/* truncate register to smaller size (in bytes)
1949 * must be called with size < BPF_REG_SIZE
1950 */
1951static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
1952{
1953 u64 mask;
1954
1955 /* clear high bits in bit representation */
1956 reg->var_off = tnum_cast(reg->var_off, size);
1957
1958 /* fix arithmetic bounds */
1959 mask = ((u64)1 << (size * 8)) - 1;
1960 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
1961 reg->umin_value &= mask;
1962 reg->umax_value &= mask;
1963 } else {
1964 reg->umin_value = 0;
1965 reg->umax_value = mask;
1966 }
1967 reg->smin_value = reg->umin_value;
1968 reg->smax_value = reg->umax_value;
1969}
1970
17a52670
AS
1971/* check whether memory at (regno + off) is accessible for t = (read | write)
1972 * if t==write, value_regno is a register which value is stored into memory
1973 * if t==read, value_regno is a register which will receive the value from memory
1974 * if t==write && value_regno==-1, some unknown value is stored into memory
1975 * if t==read && value_regno==-1, don't care what we read from memory
1976 */
ca369602
DB
1977static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
1978 int off, int bpf_size, enum bpf_access_type t,
1979 int value_regno, bool strict_alignment_once)
17a52670 1980{
638f5b90
AS
1981 struct bpf_reg_state *regs = cur_regs(env);
1982 struct bpf_reg_state *reg = regs + regno;
f4d7e40a 1983 struct bpf_func_state *state;
17a52670
AS
1984 int size, err = 0;
1985
1986 size = bpf_size_to_bytes(bpf_size);
1987 if (size < 0)
1988 return size;
1989
f1174f77 1990 /* alignment checks will add in reg->off themselves */
ca369602 1991 err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
969bf05e
AS
1992 if (err)
1993 return err;
17a52670 1994
f1174f77
EC
1995 /* for access checks, reg->off is just part of off */
1996 off += reg->off;
1997
1998 if (reg->type == PTR_TO_MAP_VALUE) {
1be7f75d
AS
1999 if (t == BPF_WRITE && value_regno >= 0 &&
2000 is_pointer_value(env, value_regno)) {
61bd5218 2001 verbose(env, "R%d leaks addr into map\n", value_regno);
1be7f75d
AS
2002 return -EACCES;
2003 }
48461135 2004
9fd29c08 2005 err = check_map_access(env, regno, off, size, false);
17a52670 2006 if (!err && t == BPF_READ && value_regno >= 0)
638f5b90 2007 mark_reg_unknown(env, regs, value_regno);
17a52670 2008
1a0dc1ac 2009 } else if (reg->type == PTR_TO_CTX) {
f1174f77 2010 enum bpf_reg_type reg_type = SCALAR_VALUE;
19de99f7 2011
1be7f75d
AS
2012 if (t == BPF_WRITE && value_regno >= 0 &&
2013 is_pointer_value(env, value_regno)) {
61bd5218 2014 verbose(env, "R%d leaks addr into ctx\n", value_regno);
1be7f75d
AS
2015 return -EACCES;
2016 }
f1174f77 2017
58990d1f
DB
2018 err = check_ctx_reg(env, reg, regno);
2019 if (err < 0)
2020 return err;
2021
31fd8581 2022 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
969bf05e 2023 if (!err && t == BPF_READ && value_regno >= 0) {
f1174f77 2024 /* ctx access returns either a scalar, or a
de8f3a83
DB
2025 * PTR_TO_PACKET[_META,_END]. In the latter
2026 * case, we know the offset is zero.
f1174f77 2027 */
46f8bc92 2028 if (reg_type == SCALAR_VALUE) {
638f5b90 2029 mark_reg_unknown(env, regs, value_regno);
46f8bc92 2030 } else {
638f5b90 2031 mark_reg_known_zero(env, regs,
61bd5218 2032 value_regno);
46f8bc92
MKL
2033 if (reg_type_may_be_null(reg_type))
2034 regs[value_regno].id = ++env->id_gen;
2035 }
638f5b90 2036 regs[value_regno].type = reg_type;
969bf05e 2037 }
17a52670 2038
f1174f77 2039 } else if (reg->type == PTR_TO_STACK) {
f1174f77 2040 off += reg->var_off.value;
e4298d25
DB
2041 err = check_stack_access(env, reg, off, size);
2042 if (err)
2043 return err;
8726679a 2044
f4d7e40a
AS
2045 state = func(env, reg);
2046 err = update_stack_depth(env, state, off);
2047 if (err)
2048 return err;
8726679a 2049
638f5b90 2050 if (t == BPF_WRITE)
61bd5218 2051 err = check_stack_write(env, state, off, size,
af86ca4e 2052 value_regno, insn_idx);
638f5b90 2053 else
61bd5218
JK
2054 err = check_stack_read(env, state, off, size,
2055 value_regno);
de8f3a83 2056 } else if (reg_is_pkt_pointer(reg)) {
3a0af8fd 2057 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
61bd5218 2058 verbose(env, "cannot write into packet\n");
969bf05e
AS
2059 return -EACCES;
2060 }
4acf6c0b
BB
2061 if (t == BPF_WRITE && value_regno >= 0 &&
2062 is_pointer_value(env, value_regno)) {
61bd5218
JK
2063 verbose(env, "R%d leaks addr into packet\n",
2064 value_regno);
4acf6c0b
BB
2065 return -EACCES;
2066 }
9fd29c08 2067 err = check_packet_access(env, regno, off, size, false);
969bf05e 2068 if (!err && t == BPF_READ && value_regno >= 0)
638f5b90 2069 mark_reg_unknown(env, regs, value_regno);
d58e468b
PP
2070 } else if (reg->type == PTR_TO_FLOW_KEYS) {
2071 if (t == BPF_WRITE && value_regno >= 0 &&
2072 is_pointer_value(env, value_regno)) {
2073 verbose(env, "R%d leaks addr into flow keys\n",
2074 value_regno);
2075 return -EACCES;
2076 }
2077
2078 err = check_flow_keys_access(env, off, size);
2079 if (!err && t == BPF_READ && value_regno >= 0)
2080 mark_reg_unknown(env, regs, value_regno);
46f8bc92 2081 } else if (type_is_sk_pointer(reg->type)) {
c64b7983 2082 if (t == BPF_WRITE) {
46f8bc92
MKL
2083 verbose(env, "R%d cannot write into %s\n",
2084 regno, reg_type_str[reg->type]);
c64b7983
JS
2085 return -EACCES;
2086 }
5f456649 2087 err = check_sock_access(env, insn_idx, regno, off, size, t);
c64b7983
JS
2088 if (!err && value_regno >= 0)
2089 mark_reg_unknown(env, regs, value_regno);
17a52670 2090 } else {
61bd5218
JK
2091 verbose(env, "R%d invalid mem access '%s'\n", regno,
2092 reg_type_str[reg->type]);
17a52670
AS
2093 return -EACCES;
2094 }
969bf05e 2095
f1174f77 2096 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
638f5b90 2097 regs[value_regno].type == SCALAR_VALUE) {
f1174f77 2098 /* b/h/w load zero-extends, mark upper bits as known 0 */
0c17d1d2 2099 coerce_reg_to_size(&regs[value_regno], size);
969bf05e 2100 }
17a52670
AS
2101 return err;
2102}
2103
31fd8581 2104static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
17a52670 2105{
17a52670
AS
2106 int err;
2107
2108 if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
2109 insn->imm != 0) {
61bd5218 2110 verbose(env, "BPF_XADD uses reserved fields\n");
17a52670
AS
2111 return -EINVAL;
2112 }
2113
2114 /* check src1 operand */
dc503a8a 2115 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
2116 if (err)
2117 return err;
2118
2119 /* check src2 operand */
dc503a8a 2120 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
2121 if (err)
2122 return err;
2123
6bdf6abc 2124 if (is_pointer_value(env, insn->src_reg)) {
61bd5218 2125 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
6bdf6abc
DB
2126 return -EACCES;
2127 }
2128
ca369602 2129 if (is_ctx_reg(env, insn->dst_reg) ||
4b5defde 2130 is_pkt_reg(env, insn->dst_reg) ||
46f8bc92
MKL
2131 is_flow_key_reg(env, insn->dst_reg) ||
2132 is_sk_reg(env, insn->dst_reg)) {
ca369602 2133 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
2a159c6f
DB
2134 insn->dst_reg,
2135 reg_type_str[reg_state(env, insn->dst_reg)->type]);
f37a8cb8
DB
2136 return -EACCES;
2137 }
2138
17a52670 2139 /* check whether atomic_add can read the memory */
31fd8581 2140 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
ca369602 2141 BPF_SIZE(insn->code), BPF_READ, -1, true);
17a52670
AS
2142 if (err)
2143 return err;
2144
2145 /* check whether atomic_add can write into the same memory */
31fd8581 2146 return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
ca369602 2147 BPF_SIZE(insn->code), BPF_WRITE, -1, true);
17a52670
AS
2148}
2149
2150/* when register 'regno' is passed into function that will read 'access_size'
2151 * bytes from that pointer, make sure that it's within stack boundary
f1174f77
EC
2152 * and all elements of stack are initialized.
2153 * Unlike most pointer bounds-checking functions, this one doesn't take an
2154 * 'off' argument, so it has to add in reg->off itself.
17a52670 2155 */
58e2af8b 2156static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
435faee1
DB
2157 int access_size, bool zero_size_allowed,
2158 struct bpf_call_arg_meta *meta)
17a52670 2159{
2a159c6f 2160 struct bpf_reg_state *reg = reg_state(env, regno);
f4d7e40a 2161 struct bpf_func_state *state = func(env, reg);
638f5b90 2162 int off, i, slot, spi;
17a52670 2163
914cb781 2164 if (reg->type != PTR_TO_STACK) {
f1174f77 2165 /* Allow zero-byte read from NULL, regardless of pointer type */
8e2fe1d9 2166 if (zero_size_allowed && access_size == 0 &&
914cb781 2167 register_is_null(reg))
8e2fe1d9
DB
2168 return 0;
2169
61bd5218 2170 verbose(env, "R%d type=%s expected=%s\n", regno,
914cb781 2171 reg_type_str[reg->type],
8e2fe1d9 2172 reg_type_str[PTR_TO_STACK]);
17a52670 2173 return -EACCES;
8e2fe1d9 2174 }
17a52670 2175
f1174f77 2176 /* Only allow fixed-offset stack reads */
914cb781 2177 if (!tnum_is_const(reg->var_off)) {
f1174f77
EC
2178 char tn_buf[48];
2179
914cb781 2180 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 2181 verbose(env, "invalid variable stack read R%d var_off=%s\n",
f1174f77 2182 regno, tn_buf);
ea25f914 2183 return -EACCES;
f1174f77 2184 }
914cb781 2185 off = reg->off + reg->var_off.value;
17a52670 2186 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
9fd29c08 2187 access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
61bd5218 2188 verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
17a52670
AS
2189 regno, off, access_size);
2190 return -EACCES;
2191 }
2192
435faee1
DB
2193 if (meta && meta->raw_mode) {
2194 meta->access_size = access_size;
2195 meta->regno = regno;
2196 return 0;
2197 }
2198
17a52670 2199 for (i = 0; i < access_size; i++) {
cc2b14d5
AS
2200 u8 *stype;
2201
638f5b90
AS
2202 slot = -(off + i) - 1;
2203 spi = slot / BPF_REG_SIZE;
cc2b14d5
AS
2204 if (state->allocated_stack <= slot)
2205 goto err;
2206 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
2207 if (*stype == STACK_MISC)
2208 goto mark;
2209 if (*stype == STACK_ZERO) {
2210 /* helper can write anything into the stack */
2211 *stype = STACK_MISC;
2212 goto mark;
17a52670 2213 }
cc2b14d5
AS
2214err:
2215 verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
2216 off, i, access_size);
2217 return -EACCES;
2218mark:
2219 /* reading any byte out of 8-byte 'spill_slot' will cause
2220 * the whole slot to be marked as 'read'
2221 */
679c782d
EC
2222 mark_reg_read(env, &state->stack[spi].spilled_ptr,
2223 state->stack[spi].spilled_ptr.parent);
17a52670 2224 }
f4d7e40a 2225 return update_stack_depth(env, state, off);
17a52670
AS
2226}
2227
06c1c049
GB
2228static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
2229 int access_size, bool zero_size_allowed,
2230 struct bpf_call_arg_meta *meta)
2231{
638f5b90 2232 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
06c1c049 2233
f1174f77 2234 switch (reg->type) {
06c1c049 2235 case PTR_TO_PACKET:
de8f3a83 2236 case PTR_TO_PACKET_META:
9fd29c08
YS
2237 return check_packet_access(env, regno, reg->off, access_size,
2238 zero_size_allowed);
06c1c049 2239 case PTR_TO_MAP_VALUE:
9fd29c08
YS
2240 return check_map_access(env, regno, reg->off, access_size,
2241 zero_size_allowed);
f1174f77 2242 default: /* scalar_value|ptr_to_stack or invalid ptr */
06c1c049
GB
2243 return check_stack_boundary(env, regno, access_size,
2244 zero_size_allowed, meta);
2245 }
2246}
2247
d83525ca
AS
2248/* Implementation details:
2249 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
2250 * Two bpf_map_lookups (even with the same key) will have different reg->id.
2251 * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
2252 * value_or_null->value transition, since the verifier only cares about
2253 * the range of access to valid map value pointer and doesn't care about actual
2254 * address of the map element.
2255 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
2256 * reg->id > 0 after value_or_null->value transition. By doing so
2257 * two bpf_map_lookups will be considered two different pointers that
2258 * point to different bpf_spin_locks.
2259 * The verifier allows taking only one bpf_spin_lock at a time to avoid
2260 * dead-locks.
2261 * Since only one bpf_spin_lock is allowed the checks are simpler than
2262 * reg_is_refcounted() logic. The verifier needs to remember only
2263 * one spin_lock instead of array of acquired_refs.
2264 * cur_state->active_spin_lock remembers which map value element got locked
2265 * and clears it after bpf_spin_unlock.
2266 */
2267static int process_spin_lock(struct bpf_verifier_env *env, int regno,
2268 bool is_lock)
2269{
2270 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
2271 struct bpf_verifier_state *cur = env->cur_state;
2272 bool is_const = tnum_is_const(reg->var_off);
2273 struct bpf_map *map = reg->map_ptr;
2274 u64 val = reg->var_off.value;
2275
2276 if (reg->type != PTR_TO_MAP_VALUE) {
2277 verbose(env, "R%d is not a pointer to map_value\n", regno);
2278 return -EINVAL;
2279 }
2280 if (!is_const) {
2281 verbose(env,
2282 "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
2283 regno);
2284 return -EINVAL;
2285 }
2286 if (!map->btf) {
2287 verbose(env,
2288 "map '%s' has to have BTF in order to use bpf_spin_lock\n",
2289 map->name);
2290 return -EINVAL;
2291 }
2292 if (!map_value_has_spin_lock(map)) {
2293 if (map->spin_lock_off == -E2BIG)
2294 verbose(env,
2295 "map '%s' has more than one 'struct bpf_spin_lock'\n",
2296 map->name);
2297 else if (map->spin_lock_off == -ENOENT)
2298 verbose(env,
2299 "map '%s' doesn't have 'struct bpf_spin_lock'\n",
2300 map->name);
2301 else
2302 verbose(env,
2303 "map '%s' is not a struct type or bpf_spin_lock is mangled\n",
2304 map->name);
2305 return -EINVAL;
2306 }
2307 if (map->spin_lock_off != val + reg->off) {
2308 verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
2309 val + reg->off);
2310 return -EINVAL;
2311 }
2312 if (is_lock) {
2313 if (cur->active_spin_lock) {
2314 verbose(env,
2315 "Locking two bpf_spin_locks are not allowed\n");
2316 return -EINVAL;
2317 }
2318 cur->active_spin_lock = reg->id;
2319 } else {
2320 if (!cur->active_spin_lock) {
2321 verbose(env, "bpf_spin_unlock without taking a lock\n");
2322 return -EINVAL;
2323 }
2324 if (cur->active_spin_lock != reg->id) {
2325 verbose(env, "bpf_spin_unlock of different lock\n");
2326 return -EINVAL;
2327 }
2328 cur->active_spin_lock = 0;
2329 }
2330 return 0;
2331}
2332
90133415
DB
2333static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
2334{
2335 return type == ARG_PTR_TO_MEM ||
2336 type == ARG_PTR_TO_MEM_OR_NULL ||
2337 type == ARG_PTR_TO_UNINIT_MEM;
2338}
2339
2340static bool arg_type_is_mem_size(enum bpf_arg_type type)
2341{
2342 return type == ARG_CONST_SIZE ||
2343 type == ARG_CONST_SIZE_OR_ZERO;
2344}
2345
58e2af8b 2346static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
33ff9823
DB
2347 enum bpf_arg_type arg_type,
2348 struct bpf_call_arg_meta *meta)
17a52670 2349{
638f5b90 2350 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6841de8b 2351 enum bpf_reg_type expected_type, type = reg->type;
17a52670
AS
2352 int err = 0;
2353
80f1d68c 2354 if (arg_type == ARG_DONTCARE)
17a52670
AS
2355 return 0;
2356
dc503a8a
EC
2357 err = check_reg_arg(env, regno, SRC_OP);
2358 if (err)
2359 return err;
17a52670 2360
1be7f75d
AS
2361 if (arg_type == ARG_ANYTHING) {
2362 if (is_pointer_value(env, regno)) {
61bd5218
JK
2363 verbose(env, "R%d leaks addr into helper function\n",
2364 regno);
1be7f75d
AS
2365 return -EACCES;
2366 }
80f1d68c 2367 return 0;
1be7f75d 2368 }
80f1d68c 2369
de8f3a83 2370 if (type_is_pkt_pointer(type) &&
3a0af8fd 2371 !may_access_direct_pkt_data(env, meta, BPF_READ)) {
61bd5218 2372 verbose(env, "helper access to the packet is not allowed\n");
6841de8b
AS
2373 return -EACCES;
2374 }
2375
8e2fe1d9 2376 if (arg_type == ARG_PTR_TO_MAP_KEY ||
2ea864c5
MV
2377 arg_type == ARG_PTR_TO_MAP_VALUE ||
2378 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
17a52670 2379 expected_type = PTR_TO_STACK;
d71962f3 2380 if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE &&
de8f3a83 2381 type != expected_type)
6841de8b 2382 goto err_type;
39f19ebb
AS
2383 } else if (arg_type == ARG_CONST_SIZE ||
2384 arg_type == ARG_CONST_SIZE_OR_ZERO) {
f1174f77
EC
2385 expected_type = SCALAR_VALUE;
2386 if (type != expected_type)
6841de8b 2387 goto err_type;
17a52670
AS
2388 } else if (arg_type == ARG_CONST_MAP_PTR) {
2389 expected_type = CONST_PTR_TO_MAP;
6841de8b
AS
2390 if (type != expected_type)
2391 goto err_type;
608cd71a
AS
2392 } else if (arg_type == ARG_PTR_TO_CTX) {
2393 expected_type = PTR_TO_CTX;
6841de8b
AS
2394 if (type != expected_type)
2395 goto err_type;
58990d1f
DB
2396 err = check_ctx_reg(env, reg, regno);
2397 if (err < 0)
2398 return err;
46f8bc92
MKL
2399 } else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
2400 expected_type = PTR_TO_SOCK_COMMON;
2401 /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
2402 if (!type_is_sk_pointer(type))
2403 goto err_type;
1b986589
MKL
2404 if (reg->ref_obj_id) {
2405 if (meta->ref_obj_id) {
2406 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
2407 regno, reg->ref_obj_id,
2408 meta->ref_obj_id);
2409 return -EFAULT;
2410 }
2411 meta->ref_obj_id = reg->ref_obj_id;
fd978bf7 2412 }
d83525ca
AS
2413 } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
2414 if (meta->func_id == BPF_FUNC_spin_lock) {
2415 if (process_spin_lock(env, regno, true))
2416 return -EACCES;
2417 } else if (meta->func_id == BPF_FUNC_spin_unlock) {
2418 if (process_spin_lock(env, regno, false))
2419 return -EACCES;
2420 } else {
2421 verbose(env, "verifier internal error\n");
2422 return -EFAULT;
2423 }
90133415 2424 } else if (arg_type_is_mem_ptr(arg_type)) {
8e2fe1d9
DB
2425 expected_type = PTR_TO_STACK;
2426 /* One exception here. In case function allows for NULL to be
f1174f77 2427 * passed in as argument, it's a SCALAR_VALUE type. Final test
8e2fe1d9
DB
2428 * happens during stack boundary checking.
2429 */
914cb781 2430 if (register_is_null(reg) &&
db1ac496 2431 arg_type == ARG_PTR_TO_MEM_OR_NULL)
6841de8b 2432 /* final test in check_stack_boundary() */;
de8f3a83
DB
2433 else if (!type_is_pkt_pointer(type) &&
2434 type != PTR_TO_MAP_VALUE &&
f1174f77 2435 type != expected_type)
6841de8b 2436 goto err_type;
39f19ebb 2437 meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
17a52670 2438 } else {
61bd5218 2439 verbose(env, "unsupported arg_type %d\n", arg_type);
17a52670
AS
2440 return -EFAULT;
2441 }
2442
17a52670
AS
2443 if (arg_type == ARG_CONST_MAP_PTR) {
2444 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
33ff9823 2445 meta->map_ptr = reg->map_ptr;
17a52670
AS
2446 } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
2447 /* bpf_map_xxx(..., map_ptr, ..., key) call:
2448 * check that [key, key + map->key_size) are within
2449 * stack limits and initialized
2450 */
33ff9823 2451 if (!meta->map_ptr) {
17a52670
AS
2452 /* in function declaration map_ptr must come before
2453 * map_key, so that it's verified and known before
2454 * we have to check map_key here. Otherwise it means
2455 * that kernel subsystem misconfigured verifier
2456 */
61bd5218 2457 verbose(env, "invalid map_ptr to access map->key\n");
17a52670
AS
2458 return -EACCES;
2459 }
d71962f3
PC
2460 err = check_helper_mem_access(env, regno,
2461 meta->map_ptr->key_size, false,
2462 NULL);
2ea864c5
MV
2463 } else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
2464 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
17a52670
AS
2465 /* bpf_map_xxx(..., map_ptr, ..., value) call:
2466 * check [value, value + map->value_size) validity
2467 */
33ff9823 2468 if (!meta->map_ptr) {
17a52670 2469 /* kernel subsystem misconfigured verifier */
61bd5218 2470 verbose(env, "invalid map_ptr to access map->value\n");
17a52670
AS
2471 return -EACCES;
2472 }
2ea864c5 2473 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
d71962f3
PC
2474 err = check_helper_mem_access(env, regno,
2475 meta->map_ptr->value_size, false,
2ea864c5 2476 meta);
90133415 2477 } else if (arg_type_is_mem_size(arg_type)) {
39f19ebb 2478 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
17a52670 2479
849fa506
YS
2480 /* remember the mem_size which may be used later
2481 * to refine return values.
2482 */
2483 meta->msize_smax_value = reg->smax_value;
2484 meta->msize_umax_value = reg->umax_value;
2485
f1174f77
EC
2486 /* The register is SCALAR_VALUE; the access check
2487 * happens using its boundaries.
06c1c049 2488 */
f1174f77 2489 if (!tnum_is_const(reg->var_off))
06c1c049
GB
2490 /* For unprivileged variable accesses, disable raw
2491 * mode so that the program is required to
2492 * initialize all the memory that the helper could
2493 * just partially fill up.
2494 */
2495 meta = NULL;
2496
b03c9f9f 2497 if (reg->smin_value < 0) {
61bd5218 2498 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
f1174f77
EC
2499 regno);
2500 return -EACCES;
2501 }
06c1c049 2502
b03c9f9f 2503 if (reg->umin_value == 0) {
f1174f77
EC
2504 err = check_helper_mem_access(env, regno - 1, 0,
2505 zero_size_allowed,
2506 meta);
06c1c049
GB
2507 if (err)
2508 return err;
06c1c049 2509 }
f1174f77 2510
b03c9f9f 2511 if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
61bd5218 2512 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
f1174f77
EC
2513 regno);
2514 return -EACCES;
2515 }
2516 err = check_helper_mem_access(env, regno - 1,
b03c9f9f 2517 reg->umax_value,
f1174f77 2518 zero_size_allowed, meta);
17a52670
AS
2519 }
2520
2521 return err;
6841de8b 2522err_type:
61bd5218 2523 verbose(env, "R%d type=%s expected=%s\n", regno,
6841de8b
AS
2524 reg_type_str[type], reg_type_str[expected_type]);
2525 return -EACCES;
17a52670
AS
2526}
2527
61bd5218
JK
2528static int check_map_func_compatibility(struct bpf_verifier_env *env,
2529 struct bpf_map *map, int func_id)
35578d79 2530{
35578d79
KX
2531 if (!map)
2532 return 0;
2533
6aff67c8
AS
2534 /* We need a two way check, first is from map perspective ... */
2535 switch (map->map_type) {
2536 case BPF_MAP_TYPE_PROG_ARRAY:
2537 if (func_id != BPF_FUNC_tail_call)
2538 goto error;
2539 break;
2540 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
2541 if (func_id != BPF_FUNC_perf_event_read &&
908432ca
YS
2542 func_id != BPF_FUNC_perf_event_output &&
2543 func_id != BPF_FUNC_perf_event_read_value)
6aff67c8
AS
2544 goto error;
2545 break;
2546 case BPF_MAP_TYPE_STACK_TRACE:
2547 if (func_id != BPF_FUNC_get_stackid)
2548 goto error;
2549 break;
4ed8ec52 2550 case BPF_MAP_TYPE_CGROUP_ARRAY:
60747ef4 2551 if (func_id != BPF_FUNC_skb_under_cgroup &&
60d20f91 2552 func_id != BPF_FUNC_current_task_under_cgroup)
4a482f34
MKL
2553 goto error;
2554 break;
cd339431 2555 case BPF_MAP_TYPE_CGROUP_STORAGE:
b741f163 2556 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
cd339431
RG
2557 if (func_id != BPF_FUNC_get_local_storage)
2558 goto error;
2559 break;
546ac1ff
JF
2560 /* devmap returns a pointer to a live net_device ifindex that we cannot
2561 * allow to be modified from bpf side. So do not allow lookup elements
2562 * for now.
2563 */
2564 case BPF_MAP_TYPE_DEVMAP:
2ddf71e2 2565 if (func_id != BPF_FUNC_redirect_map)
546ac1ff
JF
2566 goto error;
2567 break;
fbfc504a
BT
2568 /* Restrict bpf side of cpumap and xskmap, open when use-cases
2569 * appear.
2570 */
6710e112 2571 case BPF_MAP_TYPE_CPUMAP:
fbfc504a 2572 case BPF_MAP_TYPE_XSKMAP:
6710e112
JDB
2573 if (func_id != BPF_FUNC_redirect_map)
2574 goto error;
2575 break;
56f668df 2576 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
bcc6b1b7 2577 case BPF_MAP_TYPE_HASH_OF_MAPS:
56f668df
MKL
2578 if (func_id != BPF_FUNC_map_lookup_elem)
2579 goto error;
16a43625 2580 break;
174a79ff
JF
2581 case BPF_MAP_TYPE_SOCKMAP:
2582 if (func_id != BPF_FUNC_sk_redirect_map &&
2583 func_id != BPF_FUNC_sock_map_update &&
4f738adb
JF
2584 func_id != BPF_FUNC_map_delete_elem &&
2585 func_id != BPF_FUNC_msg_redirect_map)
174a79ff
JF
2586 goto error;
2587 break;
81110384
JF
2588 case BPF_MAP_TYPE_SOCKHASH:
2589 if (func_id != BPF_FUNC_sk_redirect_hash &&
2590 func_id != BPF_FUNC_sock_hash_update &&
2591 func_id != BPF_FUNC_map_delete_elem &&
2592 func_id != BPF_FUNC_msg_redirect_hash)
2593 goto error;
2594 break;
2dbb9b9e
MKL
2595 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
2596 if (func_id != BPF_FUNC_sk_select_reuseport)
2597 goto error;
2598 break;
f1a2e44a
MV
2599 case BPF_MAP_TYPE_QUEUE:
2600 case BPF_MAP_TYPE_STACK:
2601 if (func_id != BPF_FUNC_map_peek_elem &&
2602 func_id != BPF_FUNC_map_pop_elem &&
2603 func_id != BPF_FUNC_map_push_elem)
2604 goto error;
2605 break;
6aff67c8
AS
2606 default:
2607 break;
2608 }
2609
2610 /* ... and second from the function itself. */
2611 switch (func_id) {
2612 case BPF_FUNC_tail_call:
2613 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
2614 goto error;
f910cefa 2615 if (env->subprog_cnt > 1) {
f4d7e40a
AS
2616 verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
2617 return -EINVAL;
2618 }
6aff67c8
AS
2619 break;
2620 case BPF_FUNC_perf_event_read:
2621 case BPF_FUNC_perf_event_output:
908432ca 2622 case BPF_FUNC_perf_event_read_value:
6aff67c8
AS
2623 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
2624 goto error;
2625 break;
2626 case BPF_FUNC_get_stackid:
2627 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
2628 goto error;
2629 break;
60d20f91 2630 case BPF_FUNC_current_task_under_cgroup:
747ea55e 2631 case BPF_FUNC_skb_under_cgroup:
4a482f34
MKL
2632 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
2633 goto error;
2634 break;
97f91a7c 2635 case BPF_FUNC_redirect_map:
9c270af3 2636 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
fbfc504a
BT
2637 map->map_type != BPF_MAP_TYPE_CPUMAP &&
2638 map->map_type != BPF_MAP_TYPE_XSKMAP)
97f91a7c
JF
2639 goto error;
2640 break;
174a79ff 2641 case BPF_FUNC_sk_redirect_map:
4f738adb 2642 case BPF_FUNC_msg_redirect_map:
81110384 2643 case BPF_FUNC_sock_map_update:
174a79ff
JF
2644 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
2645 goto error;
2646 break;
81110384
JF
2647 case BPF_FUNC_sk_redirect_hash:
2648 case BPF_FUNC_msg_redirect_hash:
2649 case BPF_FUNC_sock_hash_update:
2650 if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
174a79ff
JF
2651 goto error;
2652 break;
cd339431 2653 case BPF_FUNC_get_local_storage:
b741f163
RG
2654 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
2655 map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
cd339431
RG
2656 goto error;
2657 break;
2dbb9b9e
MKL
2658 case BPF_FUNC_sk_select_reuseport:
2659 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
2660 goto error;
2661 break;
f1a2e44a
MV
2662 case BPF_FUNC_map_peek_elem:
2663 case BPF_FUNC_map_pop_elem:
2664 case BPF_FUNC_map_push_elem:
2665 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
2666 map->map_type != BPF_MAP_TYPE_STACK)
2667 goto error;
2668 break;
6aff67c8
AS
2669 default:
2670 break;
35578d79
KX
2671 }
2672
2673 return 0;
6aff67c8 2674error:
61bd5218 2675 verbose(env, "cannot pass map_type %d into func %s#%d\n",
ebb676da 2676 map->map_type, func_id_name(func_id), func_id);
6aff67c8 2677 return -EINVAL;
35578d79
KX
2678}
2679
90133415 2680static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
435faee1
DB
2681{
2682 int count = 0;
2683
39f19ebb 2684 if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 2685 count++;
39f19ebb 2686 if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 2687 count++;
39f19ebb 2688 if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 2689 count++;
39f19ebb 2690 if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 2691 count++;
39f19ebb 2692 if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
435faee1
DB
2693 count++;
2694
90133415
DB
2695 /* We only support one arg being in raw mode at the moment,
2696 * which is sufficient for the helper functions we have
2697 * right now.
2698 */
2699 return count <= 1;
2700}
2701
2702static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
2703 enum bpf_arg_type arg_next)
2704{
2705 return (arg_type_is_mem_ptr(arg_curr) &&
2706 !arg_type_is_mem_size(arg_next)) ||
2707 (!arg_type_is_mem_ptr(arg_curr) &&
2708 arg_type_is_mem_size(arg_next));
2709}
2710
2711static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
2712{
2713 /* bpf_xxx(..., buf, len) call will access 'len'
2714 * bytes from memory 'buf'. Both arg types need
2715 * to be paired, so make sure there's no buggy
2716 * helper function specification.
2717 */
2718 if (arg_type_is_mem_size(fn->arg1_type) ||
2719 arg_type_is_mem_ptr(fn->arg5_type) ||
2720 check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
2721 check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
2722 check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
2723 check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
2724 return false;
2725
2726 return true;
2727}
2728
1b986589 2729static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
fd978bf7
JS
2730{
2731 int count = 0;
2732
1b986589 2733 if (arg_type_may_be_refcounted(fn->arg1_type))
fd978bf7 2734 count++;
1b986589 2735 if (arg_type_may_be_refcounted(fn->arg2_type))
fd978bf7 2736 count++;
1b986589 2737 if (arg_type_may_be_refcounted(fn->arg3_type))
fd978bf7 2738 count++;
1b986589 2739 if (arg_type_may_be_refcounted(fn->arg4_type))
fd978bf7 2740 count++;
1b986589 2741 if (arg_type_may_be_refcounted(fn->arg5_type))
fd978bf7
JS
2742 count++;
2743
1b986589
MKL
2744 /* A reference acquiring function cannot acquire
2745 * another refcounted ptr.
2746 */
2747 if (is_acquire_function(func_id) && count)
2748 return false;
2749
fd978bf7
JS
2750 /* We only support one arg being unreferenced at the moment,
2751 * which is sufficient for the helper functions we have right now.
2752 */
2753 return count <= 1;
2754}
2755
1b986589 2756static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
90133415
DB
2757{
2758 return check_raw_mode_ok(fn) &&
fd978bf7 2759 check_arg_pair_ok(fn) &&
1b986589 2760 check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
435faee1
DB
2761}
2762
de8f3a83
DB
2763/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
2764 * are now invalid, so turn them into unknown SCALAR_VALUE.
f1174f77 2765 */
f4d7e40a
AS
2766static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
2767 struct bpf_func_state *state)
969bf05e 2768{
58e2af8b 2769 struct bpf_reg_state *regs = state->regs, *reg;
969bf05e
AS
2770 int i;
2771
2772 for (i = 0; i < MAX_BPF_REG; i++)
de8f3a83 2773 if (reg_is_pkt_pointer_any(&regs[i]))
61bd5218 2774 mark_reg_unknown(env, regs, i);
969bf05e 2775
f3709f69
JS
2776 bpf_for_each_spilled_reg(i, state, reg) {
2777 if (!reg)
969bf05e 2778 continue;
de8f3a83
DB
2779 if (reg_is_pkt_pointer_any(reg))
2780 __mark_reg_unknown(reg);
969bf05e
AS
2781 }
2782}
2783
f4d7e40a
AS
2784static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
2785{
2786 struct bpf_verifier_state *vstate = env->cur_state;
2787 int i;
2788
2789 for (i = 0; i <= vstate->curframe; i++)
2790 __clear_all_pkt_pointers(env, vstate->frame[i]);
2791}
2792
fd978bf7 2793static void release_reg_references(struct bpf_verifier_env *env,
1b986589
MKL
2794 struct bpf_func_state *state,
2795 int ref_obj_id)
fd978bf7
JS
2796{
2797 struct bpf_reg_state *regs = state->regs, *reg;
2798 int i;
2799
2800 for (i = 0; i < MAX_BPF_REG; i++)
1b986589 2801 if (regs[i].ref_obj_id == ref_obj_id)
fd978bf7
JS
2802 mark_reg_unknown(env, regs, i);
2803
2804 bpf_for_each_spilled_reg(i, state, reg) {
2805 if (!reg)
2806 continue;
1b986589 2807 if (reg->ref_obj_id == ref_obj_id)
fd978bf7
JS
2808 __mark_reg_unknown(reg);
2809 }
2810}
2811
2812/* The pointer with the specified id has released its reference to kernel
2813 * resources. Identify all copies of the same pointer and clear the reference.
2814 */
2815static int release_reference(struct bpf_verifier_env *env,
1b986589 2816 int ref_obj_id)
fd978bf7
JS
2817{
2818 struct bpf_verifier_state *vstate = env->cur_state;
1b986589 2819 int err;
fd978bf7
JS
2820 int i;
2821
1b986589
MKL
2822 err = release_reference_state(cur_func(env), ref_obj_id);
2823 if (err)
2824 return err;
2825
fd978bf7 2826 for (i = 0; i <= vstate->curframe; i++)
1b986589 2827 release_reg_references(env, vstate->frame[i], ref_obj_id);
fd978bf7 2828
1b986589 2829 return 0;
fd978bf7
JS
2830}
2831
f4d7e40a
AS
2832static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
2833 int *insn_idx)
2834{
2835 struct bpf_verifier_state *state = env->cur_state;
2836 struct bpf_func_state *caller, *callee;
fd978bf7 2837 int i, err, subprog, target_insn;
f4d7e40a 2838
aada9ce6 2839 if (state->curframe + 1 >= MAX_CALL_FRAMES) {
f4d7e40a 2840 verbose(env, "the call stack of %d frames is too deep\n",
aada9ce6 2841 state->curframe + 2);
f4d7e40a
AS
2842 return -E2BIG;
2843 }
2844
2845 target_insn = *insn_idx + insn->imm;
2846 subprog = find_subprog(env, target_insn + 1);
2847 if (subprog < 0) {
2848 verbose(env, "verifier bug. No program starts at insn %d\n",
2849 target_insn + 1);
2850 return -EFAULT;
2851 }
2852
2853 caller = state->frame[state->curframe];
2854 if (state->frame[state->curframe + 1]) {
2855 verbose(env, "verifier bug. Frame %d already allocated\n",
2856 state->curframe + 1);
2857 return -EFAULT;
2858 }
2859
2860 callee = kzalloc(sizeof(*callee), GFP_KERNEL);
2861 if (!callee)
2862 return -ENOMEM;
2863 state->frame[state->curframe + 1] = callee;
2864
2865 /* callee cannot access r0, r6 - r9 for reading and has to write
2866 * into its own stack before reading from it.
2867 * callee can read/write into caller's stack
2868 */
2869 init_func_state(env, callee,
2870 /* remember the callsite, it will be used by bpf_exit */
2871 *insn_idx /* callsite */,
2872 state->curframe + 1 /* frameno within this callchain */,
f910cefa 2873 subprog /* subprog number within this prog */);
f4d7e40a 2874
fd978bf7
JS
2875 /* Transfer references to the callee */
2876 err = transfer_reference_state(callee, caller);
2877 if (err)
2878 return err;
2879
679c782d
EC
2880 /* copy r1 - r5 args that callee can access. The copy includes parent
2881 * pointers, which connects us up to the liveness chain
2882 */
f4d7e40a
AS
2883 for (i = BPF_REG_1; i <= BPF_REG_5; i++)
2884 callee->regs[i] = caller->regs[i];
2885
679c782d 2886 /* after the call registers r0 - r5 were scratched */
f4d7e40a
AS
2887 for (i = 0; i < CALLER_SAVED_REGS; i++) {
2888 mark_reg_not_init(env, caller->regs, caller_saved[i]);
2889 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
2890 }
2891
2892 /* only increment it after check_reg_arg() finished */
2893 state->curframe++;
2894
2895 /* and go analyze first insn of the callee */
2896 *insn_idx = target_insn;
2897
2898 if (env->log.level) {
2899 verbose(env, "caller:\n");
2900 print_verifier_state(env, caller);
2901 verbose(env, "callee:\n");
2902 print_verifier_state(env, callee);
2903 }
2904 return 0;
2905}
2906
2907static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
2908{
2909 struct bpf_verifier_state *state = env->cur_state;
2910 struct bpf_func_state *caller, *callee;
2911 struct bpf_reg_state *r0;
fd978bf7 2912 int err;
f4d7e40a
AS
2913
2914 callee = state->frame[state->curframe];
2915 r0 = &callee->regs[BPF_REG_0];
2916 if (r0->type == PTR_TO_STACK) {
2917 /* technically it's ok to return caller's stack pointer
2918 * (or caller's caller's pointer) back to the caller,
2919 * since these pointers are valid. Only current stack
2920 * pointer will be invalid as soon as function exits,
2921 * but let's be conservative
2922 */
2923 verbose(env, "cannot return stack pointer to the caller\n");
2924 return -EINVAL;
2925 }
2926
2927 state->curframe--;
2928 caller = state->frame[state->curframe];
2929 /* return to the caller whatever r0 had in the callee */
2930 caller->regs[BPF_REG_0] = *r0;
2931
fd978bf7
JS
2932 /* Transfer references to the caller */
2933 err = transfer_reference_state(caller, callee);
2934 if (err)
2935 return err;
2936
f4d7e40a
AS
2937 *insn_idx = callee->callsite + 1;
2938 if (env->log.level) {
2939 verbose(env, "returning from callee:\n");
2940 print_verifier_state(env, callee);
2941 verbose(env, "to caller at %d:\n", *insn_idx);
2942 print_verifier_state(env, caller);
2943 }
2944 /* clear everything in the callee */
2945 free_func_state(callee);
2946 state->frame[state->curframe + 1] = NULL;
2947 return 0;
2948}
2949
849fa506
YS
2950static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
2951 int func_id,
2952 struct bpf_call_arg_meta *meta)
2953{
2954 struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
2955
2956 if (ret_type != RET_INTEGER ||
2957 (func_id != BPF_FUNC_get_stack &&
2958 func_id != BPF_FUNC_probe_read_str))
2959 return;
2960
2961 ret_reg->smax_value = meta->msize_smax_value;
2962 ret_reg->umax_value = meta->msize_umax_value;
2963 __reg_deduce_bounds(ret_reg);
2964 __reg_bound_offset(ret_reg);
2965}
2966
c93552c4
DB
2967static int
2968record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
2969 int func_id, int insn_idx)
2970{
2971 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
2972
2973 if (func_id != BPF_FUNC_tail_call &&
09772d92
DB
2974 func_id != BPF_FUNC_map_lookup_elem &&
2975 func_id != BPF_FUNC_map_update_elem &&
f1a2e44a
MV
2976 func_id != BPF_FUNC_map_delete_elem &&
2977 func_id != BPF_FUNC_map_push_elem &&
2978 func_id != BPF_FUNC_map_pop_elem &&
2979 func_id != BPF_FUNC_map_peek_elem)
c93552c4 2980 return 0;
09772d92 2981
c93552c4
DB
2982 if (meta->map_ptr == NULL) {
2983 verbose(env, "kernel subsystem misconfigured verifier\n");
2984 return -EINVAL;
2985 }
2986
2987 if (!BPF_MAP_PTR(aux->map_state))
2988 bpf_map_ptr_store(aux, meta->map_ptr,
2989 meta->map_ptr->unpriv_array);
2990 else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr)
2991 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
2992 meta->map_ptr->unpriv_array);
2993 return 0;
2994}
2995
fd978bf7
JS
2996static int check_reference_leak(struct bpf_verifier_env *env)
2997{
2998 struct bpf_func_state *state = cur_func(env);
2999 int i;
3000
3001 for (i = 0; i < state->acquired_refs; i++) {
3002 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
3003 state->refs[i].id, state->refs[i].insn_idx);
3004 }
3005 return state->acquired_refs ? -EINVAL : 0;
3006}
3007
f4d7e40a 3008static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
17a52670 3009{
17a52670 3010 const struct bpf_func_proto *fn = NULL;
638f5b90 3011 struct bpf_reg_state *regs;
33ff9823 3012 struct bpf_call_arg_meta meta;
969bf05e 3013 bool changes_data;
17a52670
AS
3014 int i, err;
3015
3016 /* find function prototype */
3017 if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
61bd5218
JK
3018 verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
3019 func_id);
17a52670
AS
3020 return -EINVAL;
3021 }
3022
00176a34 3023 if (env->ops->get_func_proto)
5e43f899 3024 fn = env->ops->get_func_proto(func_id, env->prog);
17a52670 3025 if (!fn) {
61bd5218
JK
3026 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
3027 func_id);
17a52670
AS
3028 return -EINVAL;
3029 }
3030
3031 /* eBPF programs must be GPL compatible to use GPL-ed functions */
24701ece 3032 if (!env->prog->gpl_compatible && fn->gpl_only) {
3fe2867c 3033 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
17a52670
AS
3034 return -EINVAL;
3035 }
3036
04514d13 3037 /* With LD_ABS/IND some JITs save/restore skb from r1. */
17bedab2 3038 changes_data = bpf_helper_changes_pkt_data(fn->func);
04514d13
DB
3039 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
3040 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
3041 func_id_name(func_id), func_id);
3042 return -EINVAL;
3043 }
969bf05e 3044
33ff9823 3045 memset(&meta, 0, sizeof(meta));
36bbef52 3046 meta.pkt_access = fn->pkt_access;
33ff9823 3047
1b986589 3048 err = check_func_proto(fn, func_id);
435faee1 3049 if (err) {
61bd5218 3050 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
ebb676da 3051 func_id_name(func_id), func_id);
435faee1
DB
3052 return err;
3053 }
3054
d83525ca 3055 meta.func_id = func_id;
17a52670 3056 /* check args */
33ff9823 3057 err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
17a52670
AS
3058 if (err)
3059 return err;
33ff9823 3060 err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
17a52670
AS
3061 if (err)
3062 return err;
33ff9823 3063 err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
17a52670
AS
3064 if (err)
3065 return err;
33ff9823 3066 err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
17a52670
AS
3067 if (err)
3068 return err;
33ff9823 3069 err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
17a52670
AS
3070 if (err)
3071 return err;
3072
c93552c4
DB
3073 err = record_func_map(env, &meta, func_id, insn_idx);
3074 if (err)
3075 return err;
3076
435faee1
DB
3077 /* Mark slots with STACK_MISC in case of raw mode, stack offset
3078 * is inferred from register state.
3079 */
3080 for (i = 0; i < meta.access_size; i++) {
ca369602
DB
3081 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
3082 BPF_WRITE, -1, false);
435faee1
DB
3083 if (err)
3084 return err;
3085 }
3086
fd978bf7
JS
3087 if (func_id == BPF_FUNC_tail_call) {
3088 err = check_reference_leak(env);
3089 if (err) {
3090 verbose(env, "tail_call would lead to reference leak\n");
3091 return err;
3092 }
3093 } else if (is_release_function(func_id)) {
1b986589 3094 err = release_reference(env, meta.ref_obj_id);
46f8bc92
MKL
3095 if (err) {
3096 verbose(env, "func %s#%d reference has not been acquired before\n",
3097 func_id_name(func_id), func_id);
fd978bf7 3098 return err;
46f8bc92 3099 }
fd978bf7
JS
3100 }
3101
638f5b90 3102 regs = cur_regs(env);
cd339431
RG
3103
3104 /* check that flags argument in get_local_storage(map, flags) is 0,
3105 * this is required because get_local_storage() can't return an error.
3106 */
3107 if (func_id == BPF_FUNC_get_local_storage &&
3108 !register_is_null(&regs[BPF_REG_2])) {
3109 verbose(env, "get_local_storage() doesn't support non-zero flags\n");
3110 return -EINVAL;
3111 }
3112
17a52670 3113 /* reset caller saved regs */
dc503a8a 3114 for (i = 0; i < CALLER_SAVED_REGS; i++) {
61bd5218 3115 mark_reg_not_init(env, regs, caller_saved[i]);
dc503a8a
EC
3116 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
3117 }
17a52670 3118
dc503a8a 3119 /* update return register (already marked as written above) */
17a52670 3120 if (fn->ret_type == RET_INTEGER) {
f1174f77 3121 /* sets type to SCALAR_VALUE */
61bd5218 3122 mark_reg_unknown(env, regs, BPF_REG_0);
17a52670
AS
3123 } else if (fn->ret_type == RET_VOID) {
3124 regs[BPF_REG_0].type = NOT_INIT;
3e6a4b3e
RG
3125 } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
3126 fn->ret_type == RET_PTR_TO_MAP_VALUE) {
f1174f77 3127 /* There is no offset yet applied, variable or fixed */
61bd5218 3128 mark_reg_known_zero(env, regs, BPF_REG_0);
17a52670
AS
3129 /* remember map_ptr, so that check_map_access()
3130 * can check 'value_size' boundary of memory access
3131 * to map element returned from bpf_map_lookup_elem()
3132 */
33ff9823 3133 if (meta.map_ptr == NULL) {
61bd5218
JK
3134 verbose(env,
3135 "kernel subsystem misconfigured verifier\n");
17a52670
AS
3136 return -EINVAL;
3137 }
33ff9823 3138 regs[BPF_REG_0].map_ptr = meta.map_ptr;
4d31f301
DB
3139 if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
3140 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
e16d2f1a
AS
3141 if (map_value_has_spin_lock(meta.map_ptr))
3142 regs[BPF_REG_0].id = ++env->id_gen;
4d31f301
DB
3143 } else {
3144 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
3145 regs[BPF_REG_0].id = ++env->id_gen;
3146 }
c64b7983
JS
3147 } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
3148 mark_reg_known_zero(env, regs, BPF_REG_0);
3149 regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
46f8bc92
MKL
3150 if (is_acquire_function(func_id)) {
3151 int id = acquire_reference_state(env, insn_idx);
3152
3153 if (id < 0)
3154 return id;
1b986589 3155 /* For mark_ptr_or_null_reg() */
46f8bc92 3156 regs[BPF_REG_0].id = id;
1b986589
MKL
3157 /* For release_reference() */
3158 regs[BPF_REG_0].ref_obj_id = id;
46f8bc92
MKL
3159 } else {
3160 /* For mark_ptr_or_null_reg() */
3161 regs[BPF_REG_0].id = ++env->id_gen;
3162 }
655a51e5
MKL
3163 } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
3164 mark_reg_known_zero(env, regs, BPF_REG_0);
3165 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
3166 regs[BPF_REG_0].id = ++env->id_gen;
17a52670 3167 } else {
61bd5218 3168 verbose(env, "unknown return type %d of func %s#%d\n",
ebb676da 3169 fn->ret_type, func_id_name(func_id), func_id);
17a52670
AS
3170 return -EINVAL;
3171 }
04fd61ab 3172
1b986589
MKL
3173 if (is_ptr_cast_function(func_id))
3174 /* For release_reference() */
3175 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
3176
849fa506
YS
3177 do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
3178
61bd5218 3179 err = check_map_func_compatibility(env, meta.map_ptr, func_id);
35578d79
KX
3180 if (err)
3181 return err;
04fd61ab 3182
c195651e
YS
3183 if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
3184 const char *err_str;
3185
3186#ifdef CONFIG_PERF_EVENTS
3187 err = get_callchain_buffers(sysctl_perf_event_max_stack);
3188 err_str = "cannot get callchain buffer for func %s#%d\n";
3189#else
3190 err = -ENOTSUPP;
3191 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
3192#endif
3193 if (err) {
3194 verbose(env, err_str, func_id_name(func_id), func_id);
3195 return err;
3196 }
3197
3198 env->prog->has_callchain_buf = true;
3199 }
3200
969bf05e
AS
3201 if (changes_data)
3202 clear_all_pkt_pointers(env);
3203 return 0;
3204}
3205
b03c9f9f
EC
3206static bool signed_add_overflows(s64 a, s64 b)
3207{
3208 /* Do the add in u64, where overflow is well-defined */
3209 s64 res = (s64)((u64)a + (u64)b);
3210
3211 if (b < 0)
3212 return res > a;
3213 return res < a;
3214}
3215
3216static bool signed_sub_overflows(s64 a, s64 b)
3217{
3218 /* Do the sub in u64, where overflow is well-defined */
3219 s64 res = (s64)((u64)a - (u64)b);
3220
3221 if (b < 0)
3222 return res < a;
3223 return res > a;
969bf05e
AS
3224}
3225
bb7f0f98
AS
3226static bool check_reg_sane_offset(struct bpf_verifier_env *env,
3227 const struct bpf_reg_state *reg,
3228 enum bpf_reg_type type)
3229{
3230 bool known = tnum_is_const(reg->var_off);
3231 s64 val = reg->var_off.value;
3232 s64 smin = reg->smin_value;
3233
3234 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
3235 verbose(env, "math between %s pointer and %lld is not allowed\n",
3236 reg_type_str[type], val);
3237 return false;
3238 }
3239
3240 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
3241 verbose(env, "%s pointer offset %d is not allowed\n",
3242 reg_type_str[type], reg->off);
3243 return false;
3244 }
3245
3246 if (smin == S64_MIN) {
3247 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
3248 reg_type_str[type]);
3249 return false;
3250 }
3251
3252 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
3253 verbose(env, "value %lld makes %s pointer be out of bounds\n",
3254 smin, reg_type_str[type]);
3255 return false;
3256 }
3257
3258 return true;
3259}
3260
979d63d5
DB
3261static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
3262{
3263 return &env->insn_aux_data[env->insn_idx];
3264}
3265
3266static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
3267 u32 *ptr_limit, u8 opcode, bool off_is_neg)
3268{
3269 bool mask_to_left = (opcode == BPF_ADD && off_is_neg) ||
3270 (opcode == BPF_SUB && !off_is_neg);
3271 u32 off;
3272
3273 switch (ptr_reg->type) {
3274 case PTR_TO_STACK:
3275 off = ptr_reg->off + ptr_reg->var_off.value;
3276 if (mask_to_left)
3277 *ptr_limit = MAX_BPF_STACK + off;
3278 else
3279 *ptr_limit = -off;
3280 return 0;
3281 case PTR_TO_MAP_VALUE:
3282 if (mask_to_left) {
3283 *ptr_limit = ptr_reg->umax_value + ptr_reg->off;
3284 } else {
3285 off = ptr_reg->smin_value + ptr_reg->off;
3286 *ptr_limit = ptr_reg->map_ptr->value_size - off;
3287 }
3288 return 0;
3289 default:
3290 return -EINVAL;
3291 }
3292}
3293
d3bd7413
DB
3294static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
3295 const struct bpf_insn *insn)
3296{
3297 return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K;
3298}
3299
3300static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
3301 u32 alu_state, u32 alu_limit)
3302{
3303 /* If we arrived here from different branches with different
3304 * state or limits to sanitize, then this won't work.
3305 */
3306 if (aux->alu_state &&
3307 (aux->alu_state != alu_state ||
3308 aux->alu_limit != alu_limit))
3309 return -EACCES;
3310
3311 /* Corresponding fixup done in fixup_bpf_calls(). */
3312 aux->alu_state = alu_state;
3313 aux->alu_limit = alu_limit;
3314 return 0;
3315}
3316
3317static int sanitize_val_alu(struct bpf_verifier_env *env,
3318 struct bpf_insn *insn)
3319{
3320 struct bpf_insn_aux_data *aux = cur_aux(env);
3321
3322 if (can_skip_alu_sanitation(env, insn))
3323 return 0;
3324
3325 return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
3326}
3327
979d63d5
DB
3328static int sanitize_ptr_alu(struct bpf_verifier_env *env,
3329 struct bpf_insn *insn,
3330 const struct bpf_reg_state *ptr_reg,
3331 struct bpf_reg_state *dst_reg,
3332 bool off_is_neg)
3333{
3334 struct bpf_verifier_state *vstate = env->cur_state;
3335 struct bpf_insn_aux_data *aux = cur_aux(env);
3336 bool ptr_is_dst_reg = ptr_reg == dst_reg;
3337 u8 opcode = BPF_OP(insn->code);
3338 u32 alu_state, alu_limit;
3339 struct bpf_reg_state tmp;
3340 bool ret;
3341
d3bd7413 3342 if (can_skip_alu_sanitation(env, insn))
979d63d5
DB
3343 return 0;
3344
3345 /* We already marked aux for masking from non-speculative
3346 * paths, thus we got here in the first place. We only care
3347 * to explore bad access from here.
3348 */
3349 if (vstate->speculative)
3350 goto do_sim;
3351
3352 alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
3353 alu_state |= ptr_is_dst_reg ?
3354 BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
3355
3356 if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg))
3357 return 0;
d3bd7413 3358 if (update_alu_sanitation_state(aux, alu_state, alu_limit))
979d63d5 3359 return -EACCES;
979d63d5
DB
3360do_sim:
3361 /* Simulate and find potential out-of-bounds access under
3362 * speculative execution from truncation as a result of
3363 * masking when off was not within expected range. If off
3364 * sits in dst, then we temporarily need to move ptr there
3365 * to simulate dst (== 0) +/-= ptr. Needed, for example,
3366 * for cases where we use K-based arithmetic in one direction
3367 * and truncated reg-based in the other in order to explore
3368 * bad access.
3369 */
3370 if (!ptr_is_dst_reg) {
3371 tmp = *dst_reg;
3372 *dst_reg = *ptr_reg;
3373 }
3374 ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
3375 if (!ptr_is_dst_reg)
3376 *dst_reg = tmp;
3377 return !ret ? -EFAULT : 0;
3378}
3379
f1174f77 3380/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
f1174f77
EC
3381 * Caller should also handle BPF_MOV case separately.
3382 * If we return -EACCES, caller may want to try again treating pointer as a
3383 * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks.
3384 */
3385static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
3386 struct bpf_insn *insn,
3387 const struct bpf_reg_state *ptr_reg,
3388 const struct bpf_reg_state *off_reg)
969bf05e 3389{
f4d7e40a
AS
3390 struct bpf_verifier_state *vstate = env->cur_state;
3391 struct bpf_func_state *state = vstate->frame[vstate->curframe];
3392 struct bpf_reg_state *regs = state->regs, *dst_reg;
f1174f77 3393 bool known = tnum_is_const(off_reg->var_off);
b03c9f9f
EC
3394 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
3395 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
3396 u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
3397 umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
9d7eceed 3398 u32 dst = insn->dst_reg, src = insn->src_reg;
969bf05e 3399 u8 opcode = BPF_OP(insn->code);
979d63d5 3400 int ret;
969bf05e 3401
f1174f77 3402 dst_reg = &regs[dst];
969bf05e 3403
6f16101e
DB
3404 if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
3405 smin_val > smax_val || umin_val > umax_val) {
3406 /* Taint dst register if offset had invalid bounds derived from
3407 * e.g. dead branches.
3408 */
3409 __mark_reg_unknown(dst_reg);
3410 return 0;
f1174f77
EC
3411 }
3412
3413 if (BPF_CLASS(insn->code) != BPF_ALU64) {
3414 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
82abbf8d
AS
3415 verbose(env,
3416 "R%d 32-bit pointer arithmetic prohibited\n",
3417 dst);
f1174f77 3418 return -EACCES;
969bf05e
AS
3419 }
3420
aad2eeaf
JS
3421 switch (ptr_reg->type) {
3422 case PTR_TO_MAP_VALUE_OR_NULL:
3423 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
3424 dst, reg_type_str[ptr_reg->type]);
f1174f77 3425 return -EACCES;
aad2eeaf
JS
3426 case CONST_PTR_TO_MAP:
3427 case PTR_TO_PACKET_END:
c64b7983
JS
3428 case PTR_TO_SOCKET:
3429 case PTR_TO_SOCKET_OR_NULL:
46f8bc92
MKL
3430 case PTR_TO_SOCK_COMMON:
3431 case PTR_TO_SOCK_COMMON_OR_NULL:
655a51e5
MKL
3432 case PTR_TO_TCP_SOCK:
3433 case PTR_TO_TCP_SOCK_OR_NULL:
aad2eeaf
JS
3434 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
3435 dst, reg_type_str[ptr_reg->type]);
f1174f77 3436 return -EACCES;
9d7eceed
DB
3437 case PTR_TO_MAP_VALUE:
3438 if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
3439 verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
3440 off_reg == dst_reg ? dst : src);
3441 return -EACCES;
3442 }
3443 /* fall-through */
aad2eeaf
JS
3444 default:
3445 break;
f1174f77
EC
3446 }
3447
3448 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
3449 * The id may be overwritten later if we create a new variable offset.
969bf05e 3450 */
f1174f77
EC
3451 dst_reg->type = ptr_reg->type;
3452 dst_reg->id = ptr_reg->id;
969bf05e 3453
bb7f0f98
AS
3454 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
3455 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
3456 return -EINVAL;
3457
f1174f77
EC
3458 switch (opcode) {
3459 case BPF_ADD:
979d63d5
DB
3460 ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
3461 if (ret < 0) {
3462 verbose(env, "R%d tried to add from different maps or paths\n", dst);
3463 return ret;
3464 }
f1174f77
EC
3465 /* We can take a fixed offset as long as it doesn't overflow
3466 * the s32 'off' field
969bf05e 3467 */
b03c9f9f
EC
3468 if (known && (ptr_reg->off + smin_val ==
3469 (s64)(s32)(ptr_reg->off + smin_val))) {
f1174f77 3470 /* pointer += K. Accumulate it into fixed offset */
b03c9f9f
EC
3471 dst_reg->smin_value = smin_ptr;
3472 dst_reg->smax_value = smax_ptr;
3473 dst_reg->umin_value = umin_ptr;
3474 dst_reg->umax_value = umax_ptr;
f1174f77 3475 dst_reg->var_off = ptr_reg->var_off;
b03c9f9f 3476 dst_reg->off = ptr_reg->off + smin_val;
0962590e 3477 dst_reg->raw = ptr_reg->raw;
f1174f77
EC
3478 break;
3479 }
f1174f77
EC
3480 /* A new variable offset is created. Note that off_reg->off
3481 * == 0, since it's a scalar.
3482 * dst_reg gets the pointer type and since some positive
3483 * integer value was added to the pointer, give it a new 'id'
3484 * if it's a PTR_TO_PACKET.
3485 * this creates a new 'base' pointer, off_reg (variable) gets
3486 * added into the variable offset, and we copy the fixed offset
3487 * from ptr_reg.
969bf05e 3488 */
b03c9f9f
EC
3489 if (signed_add_overflows(smin_ptr, smin_val) ||
3490 signed_add_overflows(smax_ptr, smax_val)) {
3491 dst_reg->smin_value = S64_MIN;
3492 dst_reg->smax_value = S64_MAX;
3493 } else {
3494 dst_reg->smin_value = smin_ptr + smin_val;
3495 dst_reg->smax_value = smax_ptr + smax_val;
3496 }
3497 if (umin_ptr + umin_val < umin_ptr ||
3498 umax_ptr + umax_val < umax_ptr) {
3499 dst_reg->umin_value = 0;
3500 dst_reg->umax_value = U64_MAX;
3501 } else {
3502 dst_reg->umin_value = umin_ptr + umin_val;
3503 dst_reg->umax_value = umax_ptr + umax_val;
3504 }
f1174f77
EC
3505 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
3506 dst_reg->off = ptr_reg->off;
0962590e 3507 dst_reg->raw = ptr_reg->raw;
de8f3a83 3508 if (reg_is_pkt_pointer(ptr_reg)) {
f1174f77
EC
3509 dst_reg->id = ++env->id_gen;
3510 /* something was added to pkt_ptr, set range to zero */
0962590e 3511 dst_reg->raw = 0;
f1174f77
EC
3512 }
3513 break;
3514 case BPF_SUB:
979d63d5
DB
3515 ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
3516 if (ret < 0) {
3517 verbose(env, "R%d tried to sub from different maps or paths\n", dst);
3518 return ret;
3519 }
f1174f77
EC
3520 if (dst_reg == off_reg) {
3521 /* scalar -= pointer. Creates an unknown scalar */
82abbf8d
AS
3522 verbose(env, "R%d tried to subtract pointer from scalar\n",
3523 dst);
f1174f77
EC
3524 return -EACCES;
3525 }
3526 /* We don't allow subtraction from FP, because (according to
3527 * test_verifier.c test "invalid fp arithmetic", JITs might not
3528 * be able to deal with it.
969bf05e 3529 */
f1174f77 3530 if (ptr_reg->type == PTR_TO_STACK) {
82abbf8d
AS
3531 verbose(env, "R%d subtraction from stack pointer prohibited\n",
3532 dst);
f1174f77
EC
3533 return -EACCES;
3534 }
b03c9f9f
EC
3535 if (known && (ptr_reg->off - smin_val ==
3536 (s64)(s32)(ptr_reg->off - smin_val))) {
f1174f77 3537 /* pointer -= K. Subtract it from fixed offset */
b03c9f9f
EC
3538 dst_reg->smin_value = smin_ptr;
3539 dst_reg->smax_value = smax_ptr;
3540 dst_reg->umin_value = umin_ptr;
3541 dst_reg->umax_value = umax_ptr;
f1174f77
EC
3542 dst_reg->var_off = ptr_reg->var_off;
3543 dst_reg->id = ptr_reg->id;
b03c9f9f 3544 dst_reg->off = ptr_reg->off - smin_val;
0962590e 3545 dst_reg->raw = ptr_reg->raw;
f1174f77
EC
3546 break;
3547 }
f1174f77
EC
3548 /* A new variable offset is created. If the subtrahend is known
3549 * nonnegative, then any reg->range we had before is still good.
969bf05e 3550 */
b03c9f9f
EC
3551 if (signed_sub_overflows(smin_ptr, smax_val) ||
3552 signed_sub_overflows(smax_ptr, smin_val)) {
3553 /* Overflow possible, we know nothing */
3554 dst_reg->smin_value = S64_MIN;
3555 dst_reg->smax_value = S64_MAX;
3556 } else {
3557 dst_reg->smin_value = smin_ptr - smax_val;
3558 dst_reg->smax_value = smax_ptr - smin_val;
3559 }
3560 if (umin_ptr < umax_val) {
3561 /* Overflow possible, we know nothing */
3562 dst_reg->umin_value = 0;
3563 dst_reg->umax_value = U64_MAX;
3564 } else {
3565 /* Cannot overflow (as long as bounds are consistent) */
3566 dst_reg->umin_value = umin_ptr - umax_val;
3567 dst_reg->umax_value = umax_ptr - umin_val;
3568 }
f1174f77
EC
3569 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
3570 dst_reg->off = ptr_reg->off;
0962590e 3571 dst_reg->raw = ptr_reg->raw;
de8f3a83 3572 if (reg_is_pkt_pointer(ptr_reg)) {
f1174f77
EC
3573 dst_reg->id = ++env->id_gen;
3574 /* something was added to pkt_ptr, set range to zero */
b03c9f9f 3575 if (smin_val < 0)
0962590e 3576 dst_reg->raw = 0;
43188702 3577 }
f1174f77
EC
3578 break;
3579 case BPF_AND:
3580 case BPF_OR:
3581 case BPF_XOR:
82abbf8d
AS
3582 /* bitwise ops on pointers are troublesome, prohibit. */
3583 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
3584 dst, bpf_alu_string[opcode >> 4]);
f1174f77
EC
3585 return -EACCES;
3586 default:
3587 /* other operators (e.g. MUL,LSH) produce non-pointer results */
82abbf8d
AS
3588 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
3589 dst, bpf_alu_string[opcode >> 4]);
f1174f77 3590 return -EACCES;
43188702
JF
3591 }
3592
bb7f0f98
AS
3593 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
3594 return -EINVAL;
3595
b03c9f9f
EC
3596 __update_reg_bounds(dst_reg);
3597 __reg_deduce_bounds(dst_reg);
3598 __reg_bound_offset(dst_reg);
0d6303db
DB
3599
3600 /* For unprivileged we require that resulting offset must be in bounds
3601 * in order to be able to sanitize access later on.
3602 */
e4298d25
DB
3603 if (!env->allow_ptr_leaks) {
3604 if (dst_reg->type == PTR_TO_MAP_VALUE &&
3605 check_map_access(env, dst, dst_reg->off, 1, false)) {
3606 verbose(env, "R%d pointer arithmetic of map value goes out of range, "
3607 "prohibited for !root\n", dst);
3608 return -EACCES;
3609 } else if (dst_reg->type == PTR_TO_STACK &&
3610 check_stack_access(env, dst_reg, dst_reg->off +
3611 dst_reg->var_off.value, 1)) {
3612 verbose(env, "R%d stack pointer arithmetic goes out of range, "
3613 "prohibited for !root\n", dst);
3614 return -EACCES;
3615 }
0d6303db
DB
3616 }
3617
43188702
JF
3618 return 0;
3619}
3620
468f6eaf
JH
3621/* WARNING: This function does calculations on 64-bit values, but the actual
3622 * execution may occur on 32-bit values. Therefore, things like bitshifts
3623 * need extra checks in the 32-bit case.
3624 */
f1174f77
EC
3625static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
3626 struct bpf_insn *insn,
3627 struct bpf_reg_state *dst_reg,
3628 struct bpf_reg_state src_reg)
969bf05e 3629{
638f5b90 3630 struct bpf_reg_state *regs = cur_regs(env);
48461135 3631 u8 opcode = BPF_OP(insn->code);
f1174f77 3632 bool src_known, dst_known;
b03c9f9f
EC
3633 s64 smin_val, smax_val;
3634 u64 umin_val, umax_val;
468f6eaf 3635 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
d3bd7413
DB
3636 u32 dst = insn->dst_reg;
3637 int ret;
48461135 3638
b799207e
JH
3639 if (insn_bitness == 32) {
3640 /* Relevant for 32-bit RSH: Information can propagate towards
3641 * LSB, so it isn't sufficient to only truncate the output to
3642 * 32 bits.
3643 */
3644 coerce_reg_to_size(dst_reg, 4);
3645 coerce_reg_to_size(&src_reg, 4);
3646 }
3647
b03c9f9f
EC
3648 smin_val = src_reg.smin_value;
3649 smax_val = src_reg.smax_value;
3650 umin_val = src_reg.umin_value;
3651 umax_val = src_reg.umax_value;
f1174f77
EC
3652 src_known = tnum_is_const(src_reg.var_off);
3653 dst_known = tnum_is_const(dst_reg->var_off);
f23cc643 3654
6f16101e
DB
3655 if ((src_known && (smin_val != smax_val || umin_val != umax_val)) ||
3656 smin_val > smax_val || umin_val > umax_val) {
3657 /* Taint dst register if offset had invalid bounds derived from
3658 * e.g. dead branches.
3659 */
3660 __mark_reg_unknown(dst_reg);
3661 return 0;
3662 }
3663
bb7f0f98
AS
3664 if (!src_known &&
3665 opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
3666 __mark_reg_unknown(dst_reg);
3667 return 0;
3668 }
3669
48461135
JB
3670 switch (opcode) {
3671 case BPF_ADD:
d3bd7413
DB
3672 ret = sanitize_val_alu(env, insn);
3673 if (ret < 0) {
3674 verbose(env, "R%d tried to add from different pointers or scalars\n", dst);
3675 return ret;
3676 }
b03c9f9f
EC
3677 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
3678 signed_add_overflows(dst_reg->smax_value, smax_val)) {
3679 dst_reg->smin_value = S64_MIN;
3680 dst_reg->smax_value = S64_MAX;
3681 } else {
3682 dst_reg->smin_value += smin_val;
3683 dst_reg->smax_value += smax_val;
3684 }
3685 if (dst_reg->umin_value + umin_val < umin_val ||
3686 dst_reg->umax_value + umax_val < umax_val) {
3687 dst_reg->umin_value = 0;
3688 dst_reg->umax_value = U64_MAX;
3689 } else {
3690 dst_reg->umin_value += umin_val;
3691 dst_reg->umax_value += umax_val;
3692 }
f1174f77 3693 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
48461135
JB
3694 break;
3695 case BPF_SUB:
d3bd7413
DB
3696 ret = sanitize_val_alu(env, insn);
3697 if (ret < 0) {
3698 verbose(env, "R%d tried to sub from different pointers or scalars\n", dst);
3699 return ret;
3700 }
b03c9f9f
EC
3701 if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
3702 signed_sub_overflows(dst_reg->smax_value, smin_val)) {
3703 /* Overflow possible, we know nothing */
3704 dst_reg->smin_value = S64_MIN;
3705 dst_reg->smax_value = S64_MAX;
3706 } else {
3707 dst_reg->smin_value -= smax_val;
3708 dst_reg->smax_value -= smin_val;
3709 }
3710 if (dst_reg->umin_value < umax_val) {
3711 /* Overflow possible, we know nothing */
3712 dst_reg->umin_value = 0;
3713 dst_reg->umax_value = U64_MAX;
3714 } else {
3715 /* Cannot overflow (as long as bounds are consistent) */
3716 dst_reg->umin_value -= umax_val;
3717 dst_reg->umax_value -= umin_val;
3718 }
f1174f77 3719 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
48461135
JB
3720 break;
3721 case BPF_MUL:
b03c9f9f
EC
3722 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
3723 if (smin_val < 0 || dst_reg->smin_value < 0) {
f1174f77 3724 /* Ain't nobody got time to multiply that sign */
b03c9f9f
EC
3725 __mark_reg_unbounded(dst_reg);
3726 __update_reg_bounds(dst_reg);
f1174f77
EC
3727 break;
3728 }
b03c9f9f
EC
3729 /* Both values are positive, so we can work with unsigned and
3730 * copy the result to signed (unless it exceeds S64_MAX).
f1174f77 3731 */
b03c9f9f
EC
3732 if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
3733 /* Potential overflow, we know nothing */
3734 __mark_reg_unbounded(dst_reg);
3735 /* (except what we can learn from the var_off) */
3736 __update_reg_bounds(dst_reg);
3737 break;
3738 }
3739 dst_reg->umin_value *= umin_val;
3740 dst_reg->umax_value *= umax_val;
3741 if (dst_reg->umax_value > S64_MAX) {
3742 /* Overflow possible, we know nothing */
3743 dst_reg->smin_value = S64_MIN;
3744 dst_reg->smax_value = S64_MAX;
3745 } else {
3746 dst_reg->smin_value = dst_reg->umin_value;
3747 dst_reg->smax_value = dst_reg->umax_value;
3748 }
48461135
JB
3749 break;
3750 case BPF_AND:
f1174f77 3751 if (src_known && dst_known) {
b03c9f9f
EC
3752 __mark_reg_known(dst_reg, dst_reg->var_off.value &
3753 src_reg.var_off.value);
f1174f77
EC
3754 break;
3755 }
b03c9f9f
EC
3756 /* We get our minimum from the var_off, since that's inherently
3757 * bitwise. Our maximum is the minimum of the operands' maxima.
f23cc643 3758 */
f1174f77 3759 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
b03c9f9f
EC
3760 dst_reg->umin_value = dst_reg->var_off.value;
3761 dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
3762 if (dst_reg->smin_value < 0 || smin_val < 0) {
3763 /* Lose signed bounds when ANDing negative numbers,
3764 * ain't nobody got time for that.
3765 */
3766 dst_reg->smin_value = S64_MIN;
3767 dst_reg->smax_value = S64_MAX;
3768 } else {
3769 /* ANDing two positives gives a positive, so safe to
3770 * cast result into s64.
3771 */
3772 dst_reg->smin_value = dst_reg->umin_value;
3773 dst_reg->smax_value = dst_reg->umax_value;
3774 }
3775 /* We may learn something more from the var_off */
3776 __update_reg_bounds(dst_reg);
f1174f77
EC
3777 break;
3778 case BPF_OR:
3779 if (src_known && dst_known) {
b03c9f9f
EC
3780 __mark_reg_known(dst_reg, dst_reg->var_off.value |
3781 src_reg.var_off.value);
f1174f77
EC
3782 break;
3783 }
b03c9f9f
EC
3784 /* We get our maximum from the var_off, and our minimum is the
3785 * maximum of the operands' minima
f1174f77
EC
3786 */
3787 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
b03c9f9f
EC
3788 dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
3789 dst_reg->umax_value = dst_reg->var_off.value |
3790 dst_reg->var_off.mask;
3791 if (dst_reg->smin_value < 0 || smin_val < 0) {
3792 /* Lose signed bounds when ORing negative numbers,
3793 * ain't nobody got time for that.
3794 */
3795 dst_reg->smin_value = S64_MIN;
3796 dst_reg->smax_value = S64_MAX;
f1174f77 3797 } else {
b03c9f9f
EC
3798 /* ORing two positives gives a positive, so safe to
3799 * cast result into s64.
3800 */
3801 dst_reg->smin_value = dst_reg->umin_value;
3802 dst_reg->smax_value = dst_reg->umax_value;
f1174f77 3803 }
b03c9f9f
EC
3804 /* We may learn something more from the var_off */
3805 __update_reg_bounds(dst_reg);
48461135
JB
3806 break;
3807 case BPF_LSH:
468f6eaf
JH
3808 if (umax_val >= insn_bitness) {
3809 /* Shifts greater than 31 or 63 are undefined.
3810 * This includes shifts by a negative number.
b03c9f9f 3811 */
61bd5218 3812 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77
EC
3813 break;
3814 }
b03c9f9f
EC
3815 /* We lose all sign bit information (except what we can pick
3816 * up from var_off)
48461135 3817 */
b03c9f9f
EC
3818 dst_reg->smin_value = S64_MIN;
3819 dst_reg->smax_value = S64_MAX;
3820 /* If we might shift our top bit out, then we know nothing */
3821 if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
3822 dst_reg->umin_value = 0;
3823 dst_reg->umax_value = U64_MAX;
d1174416 3824 } else {
b03c9f9f
EC
3825 dst_reg->umin_value <<= umin_val;
3826 dst_reg->umax_value <<= umax_val;
d1174416 3827 }
afbe1a5b 3828 dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
b03c9f9f
EC
3829 /* We may learn something more from the var_off */
3830 __update_reg_bounds(dst_reg);
48461135
JB
3831 break;
3832 case BPF_RSH:
468f6eaf
JH
3833 if (umax_val >= insn_bitness) {
3834 /* Shifts greater than 31 or 63 are undefined.
3835 * This includes shifts by a negative number.
b03c9f9f 3836 */
61bd5218 3837 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77
EC
3838 break;
3839 }
4374f256
EC
3840 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
3841 * be negative, then either:
3842 * 1) src_reg might be zero, so the sign bit of the result is
3843 * unknown, so we lose our signed bounds
3844 * 2) it's known negative, thus the unsigned bounds capture the
3845 * signed bounds
3846 * 3) the signed bounds cross zero, so they tell us nothing
3847 * about the result
3848 * If the value in dst_reg is known nonnegative, then again the
3849 * unsigned bounts capture the signed bounds.
3850 * Thus, in all cases it suffices to blow away our signed bounds
3851 * and rely on inferring new ones from the unsigned bounds and
3852 * var_off of the result.
3853 */
3854 dst_reg->smin_value = S64_MIN;
3855 dst_reg->smax_value = S64_MAX;
afbe1a5b 3856 dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
b03c9f9f
EC
3857 dst_reg->umin_value >>= umax_val;
3858 dst_reg->umax_value >>= umin_val;
3859 /* We may learn something more from the var_off */
3860 __update_reg_bounds(dst_reg);
48461135 3861 break;
9cbe1f5a
YS
3862 case BPF_ARSH:
3863 if (umax_val >= insn_bitness) {
3864 /* Shifts greater than 31 or 63 are undefined.
3865 * This includes shifts by a negative number.
3866 */
3867 mark_reg_unknown(env, regs, insn->dst_reg);
3868 break;
3869 }
3870
3871 /* Upon reaching here, src_known is true and
3872 * umax_val is equal to umin_val.
3873 */
3874 dst_reg->smin_value >>= umin_val;
3875 dst_reg->smax_value >>= umin_val;
3876 dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val);
3877
3878 /* blow away the dst_reg umin_value/umax_value and rely on
3879 * dst_reg var_off to refine the result.
3880 */
3881 dst_reg->umin_value = 0;
3882 dst_reg->umax_value = U64_MAX;
3883 __update_reg_bounds(dst_reg);
3884 break;
48461135 3885 default:
61bd5218 3886 mark_reg_unknown(env, regs, insn->dst_reg);
48461135
JB
3887 break;
3888 }
3889
468f6eaf
JH
3890 if (BPF_CLASS(insn->code) != BPF_ALU64) {
3891 /* 32-bit ALU ops are (32,32)->32 */
3892 coerce_reg_to_size(dst_reg, 4);
468f6eaf
JH
3893 }
3894
b03c9f9f
EC
3895 __reg_deduce_bounds(dst_reg);
3896 __reg_bound_offset(dst_reg);
f1174f77
EC
3897 return 0;
3898}
3899
3900/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
3901 * and var_off.
3902 */
3903static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
3904 struct bpf_insn *insn)
3905{
f4d7e40a
AS
3906 struct bpf_verifier_state *vstate = env->cur_state;
3907 struct bpf_func_state *state = vstate->frame[vstate->curframe];
3908 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
f1174f77
EC
3909 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
3910 u8 opcode = BPF_OP(insn->code);
f1174f77
EC
3911
3912 dst_reg = &regs[insn->dst_reg];
f1174f77
EC
3913 src_reg = NULL;
3914 if (dst_reg->type != SCALAR_VALUE)
3915 ptr_reg = dst_reg;
3916 if (BPF_SRC(insn->code) == BPF_X) {
3917 src_reg = &regs[insn->src_reg];
f1174f77
EC
3918 if (src_reg->type != SCALAR_VALUE) {
3919 if (dst_reg->type != SCALAR_VALUE) {
3920 /* Combining two pointers by any ALU op yields
82abbf8d
AS
3921 * an arbitrary scalar. Disallow all math except
3922 * pointer subtraction
f1174f77 3923 */
dd066823 3924 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
82abbf8d
AS
3925 mark_reg_unknown(env, regs, insn->dst_reg);
3926 return 0;
f1174f77 3927 }
82abbf8d
AS
3928 verbose(env, "R%d pointer %s pointer prohibited\n",
3929 insn->dst_reg,
3930 bpf_alu_string[opcode >> 4]);
3931 return -EACCES;
f1174f77
EC
3932 } else {
3933 /* scalar += pointer
3934 * This is legal, but we have to reverse our
3935 * src/dest handling in computing the range
3936 */
82abbf8d
AS
3937 return adjust_ptr_min_max_vals(env, insn,
3938 src_reg, dst_reg);
f1174f77
EC
3939 }
3940 } else if (ptr_reg) {
3941 /* pointer += scalar */
82abbf8d
AS
3942 return adjust_ptr_min_max_vals(env, insn,
3943 dst_reg, src_reg);
f1174f77
EC
3944 }
3945 } else {
3946 /* Pretend the src is a reg with a known value, since we only
3947 * need to be able to read from this state.
3948 */
3949 off_reg.type = SCALAR_VALUE;
b03c9f9f 3950 __mark_reg_known(&off_reg, insn->imm);
f1174f77 3951 src_reg = &off_reg;
82abbf8d
AS
3952 if (ptr_reg) /* pointer += K */
3953 return adjust_ptr_min_max_vals(env, insn,
3954 ptr_reg, src_reg);
f1174f77
EC
3955 }
3956
3957 /* Got here implies adding two SCALAR_VALUEs */
3958 if (WARN_ON_ONCE(ptr_reg)) {
f4d7e40a 3959 print_verifier_state(env, state);
61bd5218 3960 verbose(env, "verifier internal error: unexpected ptr_reg\n");
f1174f77
EC
3961 return -EINVAL;
3962 }
3963 if (WARN_ON(!src_reg)) {
f4d7e40a 3964 print_verifier_state(env, state);
61bd5218 3965 verbose(env, "verifier internal error: no src_reg\n");
f1174f77
EC
3966 return -EINVAL;
3967 }
3968 return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
48461135
JB
3969}
3970
17a52670 3971/* check validity of 32-bit and 64-bit arithmetic operations */
58e2af8b 3972static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
17a52670 3973{
638f5b90 3974 struct bpf_reg_state *regs = cur_regs(env);
17a52670
AS
3975 u8 opcode = BPF_OP(insn->code);
3976 int err;
3977
3978 if (opcode == BPF_END || opcode == BPF_NEG) {
3979 if (opcode == BPF_NEG) {
3980 if (BPF_SRC(insn->code) != 0 ||
3981 insn->src_reg != BPF_REG_0 ||
3982 insn->off != 0 || insn->imm != 0) {
61bd5218 3983 verbose(env, "BPF_NEG uses reserved fields\n");
17a52670
AS
3984 return -EINVAL;
3985 }
3986 } else {
3987 if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
e67b8a68
EC
3988 (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
3989 BPF_CLASS(insn->code) == BPF_ALU64) {
61bd5218 3990 verbose(env, "BPF_END uses reserved fields\n");
17a52670
AS
3991 return -EINVAL;
3992 }
3993 }
3994
3995 /* check src operand */
dc503a8a 3996 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
3997 if (err)
3998 return err;
3999
1be7f75d 4000 if (is_pointer_value(env, insn->dst_reg)) {
61bd5218 4001 verbose(env, "R%d pointer arithmetic prohibited\n",
1be7f75d
AS
4002 insn->dst_reg);
4003 return -EACCES;
4004 }
4005
17a52670 4006 /* check dest operand */
dc503a8a 4007 err = check_reg_arg(env, insn->dst_reg, DST_OP);
17a52670
AS
4008 if (err)
4009 return err;
4010
4011 } else if (opcode == BPF_MOV) {
4012
4013 if (BPF_SRC(insn->code) == BPF_X) {
4014 if (insn->imm != 0 || insn->off != 0) {
61bd5218 4015 verbose(env, "BPF_MOV uses reserved fields\n");
17a52670
AS
4016 return -EINVAL;
4017 }
4018
4019 /* check src operand */
dc503a8a 4020 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
4021 if (err)
4022 return err;
4023 } else {
4024 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
61bd5218 4025 verbose(env, "BPF_MOV uses reserved fields\n");
17a52670
AS
4026 return -EINVAL;
4027 }
4028 }
4029
fbeb1603
AF
4030 /* check dest operand, mark as required later */
4031 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
17a52670
AS
4032 if (err)
4033 return err;
4034
4035 if (BPF_SRC(insn->code) == BPF_X) {
e434b8cd
JW
4036 struct bpf_reg_state *src_reg = regs + insn->src_reg;
4037 struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
4038
17a52670
AS
4039 if (BPF_CLASS(insn->code) == BPF_ALU64) {
4040 /* case: R1 = R2
4041 * copy register state to dest reg
4042 */
e434b8cd
JW
4043 *dst_reg = *src_reg;
4044 dst_reg->live |= REG_LIVE_WRITTEN;
17a52670 4045 } else {
f1174f77 4046 /* R1 = (u32) R2 */
1be7f75d 4047 if (is_pointer_value(env, insn->src_reg)) {
61bd5218
JK
4048 verbose(env,
4049 "R%d partial copy of pointer\n",
1be7f75d
AS
4050 insn->src_reg);
4051 return -EACCES;
e434b8cd
JW
4052 } else if (src_reg->type == SCALAR_VALUE) {
4053 *dst_reg = *src_reg;
4054 dst_reg->live |= REG_LIVE_WRITTEN;
4055 } else {
4056 mark_reg_unknown(env, regs,
4057 insn->dst_reg);
1be7f75d 4058 }
e434b8cd 4059 coerce_reg_to_size(dst_reg, 4);
17a52670
AS
4060 }
4061 } else {
4062 /* case: R = imm
4063 * remember the value we stored into this reg
4064 */
fbeb1603
AF
4065 /* clear any state __mark_reg_known doesn't set */
4066 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77 4067 regs[insn->dst_reg].type = SCALAR_VALUE;
95a762e2
JH
4068 if (BPF_CLASS(insn->code) == BPF_ALU64) {
4069 __mark_reg_known(regs + insn->dst_reg,
4070 insn->imm);
4071 } else {
4072 __mark_reg_known(regs + insn->dst_reg,
4073 (u32)insn->imm);
4074 }
17a52670
AS
4075 }
4076
4077 } else if (opcode > BPF_END) {
61bd5218 4078 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
17a52670
AS
4079 return -EINVAL;
4080
4081 } else { /* all other ALU ops: and, sub, xor, add, ... */
4082
17a52670
AS
4083 if (BPF_SRC(insn->code) == BPF_X) {
4084 if (insn->imm != 0 || insn->off != 0) {
61bd5218 4085 verbose(env, "BPF_ALU uses reserved fields\n");
17a52670
AS
4086 return -EINVAL;
4087 }
4088 /* check src1 operand */
dc503a8a 4089 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
4090 if (err)
4091 return err;
4092 } else {
4093 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
61bd5218 4094 verbose(env, "BPF_ALU uses reserved fields\n");
17a52670
AS
4095 return -EINVAL;
4096 }
4097 }
4098
4099 /* check src2 operand */
dc503a8a 4100 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
4101 if (err)
4102 return err;
4103
4104 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
4105 BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
61bd5218 4106 verbose(env, "div by zero\n");
17a52670
AS
4107 return -EINVAL;
4108 }
4109
229394e8
RV
4110 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
4111 opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
4112 int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
4113
4114 if (insn->imm < 0 || insn->imm >= size) {
61bd5218 4115 verbose(env, "invalid shift %d\n", insn->imm);
229394e8
RV
4116 return -EINVAL;
4117 }
4118 }
4119
1a0dc1ac 4120 /* check dest operand */
dc503a8a 4121 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
1a0dc1ac
AS
4122 if (err)
4123 return err;
4124
f1174f77 4125 return adjust_reg_min_max_vals(env, insn);
17a52670
AS
4126 }
4127
4128 return 0;
4129}
4130
f4d7e40a 4131static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
de8f3a83 4132 struct bpf_reg_state *dst_reg,
f8ddadc4 4133 enum bpf_reg_type type,
fb2a311a 4134 bool range_right_open)
969bf05e 4135{
f4d7e40a 4136 struct bpf_func_state *state = vstate->frame[vstate->curframe];
58e2af8b 4137 struct bpf_reg_state *regs = state->regs, *reg;
fb2a311a 4138 u16 new_range;
f4d7e40a 4139 int i, j;
2d2be8ca 4140
fb2a311a
DB
4141 if (dst_reg->off < 0 ||
4142 (dst_reg->off == 0 && range_right_open))
f1174f77
EC
4143 /* This doesn't give us any range */
4144 return;
4145
b03c9f9f
EC
4146 if (dst_reg->umax_value > MAX_PACKET_OFF ||
4147 dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
f1174f77
EC
4148 /* Risk of overflow. For instance, ptr + (1<<63) may be less
4149 * than pkt_end, but that's because it's also less than pkt.
4150 */
4151 return;
4152
fb2a311a
DB
4153 new_range = dst_reg->off;
4154 if (range_right_open)
4155 new_range--;
4156
4157 /* Examples for register markings:
2d2be8ca 4158 *
fb2a311a 4159 * pkt_data in dst register:
2d2be8ca
DB
4160 *
4161 * r2 = r3;
4162 * r2 += 8;
4163 * if (r2 > pkt_end) goto <handle exception>
4164 * <access okay>
4165 *
b4e432f1
DB
4166 * r2 = r3;
4167 * r2 += 8;
4168 * if (r2 < pkt_end) goto <access okay>
4169 * <handle exception>
4170 *
2d2be8ca
DB
4171 * Where:
4172 * r2 == dst_reg, pkt_end == src_reg
4173 * r2=pkt(id=n,off=8,r=0)
4174 * r3=pkt(id=n,off=0,r=0)
4175 *
fb2a311a 4176 * pkt_data in src register:
2d2be8ca
DB
4177 *
4178 * r2 = r3;
4179 * r2 += 8;
4180 * if (pkt_end >= r2) goto <access okay>
4181 * <handle exception>
4182 *
b4e432f1
DB
4183 * r2 = r3;
4184 * r2 += 8;
4185 * if (pkt_end <= r2) goto <handle exception>
4186 * <access okay>
4187 *
2d2be8ca
DB
4188 * Where:
4189 * pkt_end == dst_reg, r2 == src_reg
4190 * r2=pkt(id=n,off=8,r=0)
4191 * r3=pkt(id=n,off=0,r=0)
4192 *
4193 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
fb2a311a
DB
4194 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
4195 * and [r3, r3 + 8-1) respectively is safe to access depending on
4196 * the check.
969bf05e 4197 */
2d2be8ca 4198
f1174f77
EC
4199 /* If our ids match, then we must have the same max_value. And we
4200 * don't care about the other reg's fixed offset, since if it's too big
4201 * the range won't allow anything.
4202 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
4203 */
969bf05e 4204 for (i = 0; i < MAX_BPF_REG; i++)
de8f3a83 4205 if (regs[i].type == type && regs[i].id == dst_reg->id)
b1977682 4206 /* keep the maximum range already checked */
fb2a311a 4207 regs[i].range = max(regs[i].range, new_range);
969bf05e 4208
f4d7e40a
AS
4209 for (j = 0; j <= vstate->curframe; j++) {
4210 state = vstate->frame[j];
f3709f69
JS
4211 bpf_for_each_spilled_reg(i, state, reg) {
4212 if (!reg)
f4d7e40a 4213 continue;
f4d7e40a
AS
4214 if (reg->type == type && reg->id == dst_reg->id)
4215 reg->range = max(reg->range, new_range);
4216 }
969bf05e
AS
4217 }
4218}
4219
4f7b3e82
AS
4220/* compute branch direction of the expression "if (reg opcode val) goto target;"
4221 * and return:
4222 * 1 - branch will be taken and "goto target" will be executed
4223 * 0 - branch will not be taken and fall-through to next insn
4224 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10]
4225 */
092ed096
JW
4226static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
4227 bool is_jmp32)
4f7b3e82 4228{
092ed096 4229 struct bpf_reg_state reg_lo;
a72dafaf
JW
4230 s64 sval;
4231
4f7b3e82
AS
4232 if (__is_pointer_value(false, reg))
4233 return -1;
4234
092ed096
JW
4235 if (is_jmp32) {
4236 reg_lo = *reg;
4237 reg = &reg_lo;
4238 /* For JMP32, only low 32 bits are compared, coerce_reg_to_size
4239 * could truncate high bits and update umin/umax according to
4240 * information of low bits.
4241 */
4242 coerce_reg_to_size(reg, 4);
4243 /* smin/smax need special handling. For example, after coerce,
4244 * if smin_value is 0x00000000ffffffffLL, the value is -1 when
4245 * used as operand to JMP32. It is a negative number from s32's
4246 * point of view, while it is a positive number when seen as
4247 * s64. The smin/smax are kept as s64, therefore, when used with
4248 * JMP32, they need to be transformed into s32, then sign
4249 * extended back to s64.
4250 *
4251 * Also, smin/smax were copied from umin/umax. If umin/umax has
4252 * different sign bit, then min/max relationship doesn't
4253 * maintain after casting into s32, for this case, set smin/smax
4254 * to safest range.
4255 */
4256 if ((reg->umax_value ^ reg->umin_value) &
4257 (1ULL << 31)) {
4258 reg->smin_value = S32_MIN;
4259 reg->smax_value = S32_MAX;
4260 }
4261 reg->smin_value = (s64)(s32)reg->smin_value;
4262 reg->smax_value = (s64)(s32)reg->smax_value;
4263
4264 val = (u32)val;
4265 sval = (s64)(s32)val;
4266 } else {
4267 sval = (s64)val;
4268 }
a72dafaf 4269
4f7b3e82
AS
4270 switch (opcode) {
4271 case BPF_JEQ:
4272 if (tnum_is_const(reg->var_off))
4273 return !!tnum_equals_const(reg->var_off, val);
4274 break;
4275 case BPF_JNE:
4276 if (tnum_is_const(reg->var_off))
4277 return !tnum_equals_const(reg->var_off, val);
4278 break;
960ea056
JK
4279 case BPF_JSET:
4280 if ((~reg->var_off.mask & reg->var_off.value) & val)
4281 return 1;
4282 if (!((reg->var_off.mask | reg->var_off.value) & val))
4283 return 0;
4284 break;
4f7b3e82
AS
4285 case BPF_JGT:
4286 if (reg->umin_value > val)
4287 return 1;
4288 else if (reg->umax_value <= val)
4289 return 0;
4290 break;
4291 case BPF_JSGT:
a72dafaf 4292 if (reg->smin_value > sval)
4f7b3e82 4293 return 1;
a72dafaf 4294 else if (reg->smax_value < sval)
4f7b3e82
AS
4295 return 0;
4296 break;
4297 case BPF_JLT:
4298 if (reg->umax_value < val)
4299 return 1;
4300 else if (reg->umin_value >= val)
4301 return 0;
4302 break;
4303 case BPF_JSLT:
a72dafaf 4304 if (reg->smax_value < sval)
4f7b3e82 4305 return 1;
a72dafaf 4306 else if (reg->smin_value >= sval)
4f7b3e82
AS
4307 return 0;
4308 break;
4309 case BPF_JGE:
4310 if (reg->umin_value >= val)
4311 return 1;
4312 else if (reg->umax_value < val)
4313 return 0;
4314 break;
4315 case BPF_JSGE:
a72dafaf 4316 if (reg->smin_value >= sval)
4f7b3e82 4317 return 1;
a72dafaf 4318 else if (reg->smax_value < sval)
4f7b3e82
AS
4319 return 0;
4320 break;
4321 case BPF_JLE:
4322 if (reg->umax_value <= val)
4323 return 1;
4324 else if (reg->umin_value > val)
4325 return 0;
4326 break;
4327 case BPF_JSLE:
a72dafaf 4328 if (reg->smax_value <= sval)
4f7b3e82 4329 return 1;
a72dafaf 4330 else if (reg->smin_value > sval)
4f7b3e82
AS
4331 return 0;
4332 break;
4333 }
4334
4335 return -1;
4336}
4337
092ed096
JW
4338/* Generate min value of the high 32-bit from TNUM info. */
4339static u64 gen_hi_min(struct tnum var)
4340{
4341 return var.value & ~0xffffffffULL;
4342}
4343
4344/* Generate max value of the high 32-bit from TNUM info. */
4345static u64 gen_hi_max(struct tnum var)
4346{
4347 return (var.value | var.mask) & ~0xffffffffULL;
4348}
4349
4350/* Return true if VAL is compared with a s64 sign extended from s32, and they
4351 * are with the same signedness.
4352 */
4353static bool cmp_val_with_extended_s64(s64 sval, struct bpf_reg_state *reg)
4354{
4355 return ((s32)sval >= 0 &&
4356 reg->smin_value >= 0 && reg->smax_value <= S32_MAX) ||
4357 ((s32)sval < 0 &&
4358 reg->smax_value <= 0 && reg->smin_value >= S32_MIN);
4359}
4360
48461135
JB
4361/* Adjusts the register min/max values in the case that the dst_reg is the
4362 * variable register that we are working on, and src_reg is a constant or we're
4363 * simply doing a BPF_K check.
f1174f77 4364 * In JEQ/JNE cases we also adjust the var_off values.
48461135
JB
4365 */
4366static void reg_set_min_max(struct bpf_reg_state *true_reg,
4367 struct bpf_reg_state *false_reg, u64 val,
092ed096 4368 u8 opcode, bool is_jmp32)
48461135 4369{
a72dafaf
JW
4370 s64 sval;
4371
f1174f77
EC
4372 /* If the dst_reg is a pointer, we can't learn anything about its
4373 * variable offset from the compare (unless src_reg were a pointer into
4374 * the same object, but we don't bother with that.
4375 * Since false_reg and true_reg have the same type by construction, we
4376 * only need to check one of them for pointerness.
4377 */
4378 if (__is_pointer_value(false, false_reg))
4379 return;
4cabc5b1 4380
092ed096
JW
4381 val = is_jmp32 ? (u32)val : val;
4382 sval = is_jmp32 ? (s64)(s32)val : (s64)val;
a72dafaf 4383
48461135
JB
4384 switch (opcode) {
4385 case BPF_JEQ:
48461135 4386 case BPF_JNE:
a72dafaf
JW
4387 {
4388 struct bpf_reg_state *reg =
4389 opcode == BPF_JEQ ? true_reg : false_reg;
4390
4391 /* For BPF_JEQ, if this is false we know nothing Jon Snow, but
4392 * if it is true we know the value for sure. Likewise for
4393 * BPF_JNE.
48461135 4394 */
092ed096
JW
4395 if (is_jmp32) {
4396 u64 old_v = reg->var_off.value;
4397 u64 hi_mask = ~0xffffffffULL;
4398
4399 reg->var_off.value = (old_v & hi_mask) | val;
4400 reg->var_off.mask &= hi_mask;
4401 } else {
4402 __mark_reg_known(reg, val);
4403 }
48461135 4404 break;
a72dafaf 4405 }
960ea056
JK
4406 case BPF_JSET:
4407 false_reg->var_off = tnum_and(false_reg->var_off,
4408 tnum_const(~val));
4409 if (is_power_of_2(val))
4410 true_reg->var_off = tnum_or(true_reg->var_off,
4411 tnum_const(val));
4412 break;
48461135 4413 case BPF_JGE:
a72dafaf
JW
4414 case BPF_JGT:
4415 {
4416 u64 false_umax = opcode == BPF_JGT ? val : val - 1;
4417 u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
4418
092ed096
JW
4419 if (is_jmp32) {
4420 false_umax += gen_hi_max(false_reg->var_off);
4421 true_umin += gen_hi_min(true_reg->var_off);
4422 }
a72dafaf
JW
4423 false_reg->umax_value = min(false_reg->umax_value, false_umax);
4424 true_reg->umin_value = max(true_reg->umin_value, true_umin);
b03c9f9f 4425 break;
a72dafaf 4426 }
48461135 4427 case BPF_JSGE:
a72dafaf
JW
4428 case BPF_JSGT:
4429 {
4430 s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1;
4431 s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
4432
092ed096
JW
4433 /* If the full s64 was not sign-extended from s32 then don't
4434 * deduct further info.
4435 */
4436 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4437 break;
a72dafaf
JW
4438 false_reg->smax_value = min(false_reg->smax_value, false_smax);
4439 true_reg->smin_value = max(true_reg->smin_value, true_smin);
48461135 4440 break;
a72dafaf 4441 }
b4e432f1 4442 case BPF_JLE:
a72dafaf
JW
4443 case BPF_JLT:
4444 {
4445 u64 false_umin = opcode == BPF_JLT ? val : val + 1;
4446 u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
4447
092ed096
JW
4448 if (is_jmp32) {
4449 false_umin += gen_hi_min(false_reg->var_off);
4450 true_umax += gen_hi_max(true_reg->var_off);
4451 }
a72dafaf
JW
4452 false_reg->umin_value = max(false_reg->umin_value, false_umin);
4453 true_reg->umax_value = min(true_reg->umax_value, true_umax);
b4e432f1 4454 break;
a72dafaf 4455 }
b4e432f1 4456 case BPF_JSLE:
a72dafaf
JW
4457 case BPF_JSLT:
4458 {
4459 s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1;
4460 s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
4461
092ed096
JW
4462 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4463 break;
a72dafaf
JW
4464 false_reg->smin_value = max(false_reg->smin_value, false_smin);
4465 true_reg->smax_value = min(true_reg->smax_value, true_smax);
b4e432f1 4466 break;
a72dafaf 4467 }
48461135
JB
4468 default:
4469 break;
4470 }
4471
b03c9f9f
EC
4472 __reg_deduce_bounds(false_reg);
4473 __reg_deduce_bounds(true_reg);
4474 /* We might have learned some bits from the bounds. */
4475 __reg_bound_offset(false_reg);
4476 __reg_bound_offset(true_reg);
4477 /* Intersecting with the old var_off might have improved our bounds
4478 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
4479 * then new var_off is (0; 0x7f...fc) which improves our umax.
4480 */
4481 __update_reg_bounds(false_reg);
4482 __update_reg_bounds(true_reg);
48461135
JB
4483}
4484
f1174f77
EC
4485/* Same as above, but for the case that dst_reg holds a constant and src_reg is
4486 * the variable reg.
48461135
JB
4487 */
4488static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
4489 struct bpf_reg_state *false_reg, u64 val,
092ed096 4490 u8 opcode, bool is_jmp32)
48461135 4491{
a72dafaf
JW
4492 s64 sval;
4493
f1174f77
EC
4494 if (__is_pointer_value(false, false_reg))
4495 return;
4cabc5b1 4496
092ed096
JW
4497 val = is_jmp32 ? (u32)val : val;
4498 sval = is_jmp32 ? (s64)(s32)val : (s64)val;
a72dafaf 4499
48461135
JB
4500 switch (opcode) {
4501 case BPF_JEQ:
48461135 4502 case BPF_JNE:
a72dafaf
JW
4503 {
4504 struct bpf_reg_state *reg =
4505 opcode == BPF_JEQ ? true_reg : false_reg;
4506
092ed096
JW
4507 if (is_jmp32) {
4508 u64 old_v = reg->var_off.value;
4509 u64 hi_mask = ~0xffffffffULL;
4510
4511 reg->var_off.value = (old_v & hi_mask) | val;
4512 reg->var_off.mask &= hi_mask;
4513 } else {
4514 __mark_reg_known(reg, val);
4515 }
48461135 4516 break;
a72dafaf 4517 }
960ea056
JK
4518 case BPF_JSET:
4519 false_reg->var_off = tnum_and(false_reg->var_off,
4520 tnum_const(~val));
4521 if (is_power_of_2(val))
4522 true_reg->var_off = tnum_or(true_reg->var_off,
4523 tnum_const(val));
4524 break;
48461135 4525 case BPF_JGE:
a72dafaf
JW
4526 case BPF_JGT:
4527 {
4528 u64 false_umin = opcode == BPF_JGT ? val : val + 1;
4529 u64 true_umax = opcode == BPF_JGT ? val - 1 : val;
4530
092ed096
JW
4531 if (is_jmp32) {
4532 false_umin += gen_hi_min(false_reg->var_off);
4533 true_umax += gen_hi_max(true_reg->var_off);
4534 }
a72dafaf
JW
4535 false_reg->umin_value = max(false_reg->umin_value, false_umin);
4536 true_reg->umax_value = min(true_reg->umax_value, true_umax);
b03c9f9f 4537 break;
a72dafaf 4538 }
48461135 4539 case BPF_JSGE:
a72dafaf
JW
4540 case BPF_JSGT:
4541 {
4542 s64 false_smin = opcode == BPF_JSGT ? sval : sval + 1;
4543 s64 true_smax = opcode == BPF_JSGT ? sval - 1 : sval;
4544
092ed096
JW
4545 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4546 break;
a72dafaf
JW
4547 false_reg->smin_value = max(false_reg->smin_value, false_smin);
4548 true_reg->smax_value = min(true_reg->smax_value, true_smax);
48461135 4549 break;
a72dafaf 4550 }
b4e432f1 4551 case BPF_JLE:
a72dafaf
JW
4552 case BPF_JLT:
4553 {
4554 u64 false_umax = opcode == BPF_JLT ? val : val - 1;
4555 u64 true_umin = opcode == BPF_JLT ? val + 1 : val;
4556
092ed096
JW
4557 if (is_jmp32) {
4558 false_umax += gen_hi_max(false_reg->var_off);
4559 true_umin += gen_hi_min(true_reg->var_off);
4560 }
a72dafaf
JW
4561 false_reg->umax_value = min(false_reg->umax_value, false_umax);
4562 true_reg->umin_value = max(true_reg->umin_value, true_umin);
b4e432f1 4563 break;
a72dafaf 4564 }
b4e432f1 4565 case BPF_JSLE:
a72dafaf
JW
4566 case BPF_JSLT:
4567 {
4568 s64 false_smax = opcode == BPF_JSLT ? sval : sval - 1;
4569 s64 true_smin = opcode == BPF_JSLT ? sval + 1 : sval;
4570
092ed096
JW
4571 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4572 break;
a72dafaf
JW
4573 false_reg->smax_value = min(false_reg->smax_value, false_smax);
4574 true_reg->smin_value = max(true_reg->smin_value, true_smin);
b4e432f1 4575 break;
a72dafaf 4576 }
48461135
JB
4577 default:
4578 break;
4579 }
4580
b03c9f9f
EC
4581 __reg_deduce_bounds(false_reg);
4582 __reg_deduce_bounds(true_reg);
4583 /* We might have learned some bits from the bounds. */
4584 __reg_bound_offset(false_reg);
4585 __reg_bound_offset(true_reg);
4586 /* Intersecting with the old var_off might have improved our bounds
4587 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
4588 * then new var_off is (0; 0x7f...fc) which improves our umax.
4589 */
4590 __update_reg_bounds(false_reg);
4591 __update_reg_bounds(true_reg);
f1174f77
EC
4592}
4593
4594/* Regs are known to be equal, so intersect their min/max/var_off */
4595static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
4596 struct bpf_reg_state *dst_reg)
4597{
b03c9f9f
EC
4598 src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
4599 dst_reg->umin_value);
4600 src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
4601 dst_reg->umax_value);
4602 src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
4603 dst_reg->smin_value);
4604 src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
4605 dst_reg->smax_value);
f1174f77
EC
4606 src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
4607 dst_reg->var_off);
b03c9f9f
EC
4608 /* We might have learned new bounds from the var_off. */
4609 __update_reg_bounds(src_reg);
4610 __update_reg_bounds(dst_reg);
4611 /* We might have learned something about the sign bit. */
4612 __reg_deduce_bounds(src_reg);
4613 __reg_deduce_bounds(dst_reg);
4614 /* We might have learned some bits from the bounds. */
4615 __reg_bound_offset(src_reg);
4616 __reg_bound_offset(dst_reg);
4617 /* Intersecting with the old var_off might have improved our bounds
4618 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
4619 * then new var_off is (0; 0x7f...fc) which improves our umax.
4620 */
4621 __update_reg_bounds(src_reg);
4622 __update_reg_bounds(dst_reg);
f1174f77
EC
4623}
4624
4625static void reg_combine_min_max(struct bpf_reg_state *true_src,
4626 struct bpf_reg_state *true_dst,
4627 struct bpf_reg_state *false_src,
4628 struct bpf_reg_state *false_dst,
4629 u8 opcode)
4630{
4631 switch (opcode) {
4632 case BPF_JEQ:
4633 __reg_combine_min_max(true_src, true_dst);
4634 break;
4635 case BPF_JNE:
4636 __reg_combine_min_max(false_src, false_dst);
b03c9f9f 4637 break;
4cabc5b1 4638 }
48461135
JB
4639}
4640
fd978bf7
JS
4641static void mark_ptr_or_null_reg(struct bpf_func_state *state,
4642 struct bpf_reg_state *reg, u32 id,
840b9615 4643 bool is_null)
57a09bf0 4644{
840b9615 4645 if (reg_type_may_be_null(reg->type) && reg->id == id) {
f1174f77
EC
4646 /* Old offset (both fixed and variable parts) should
4647 * have been known-zero, because we don't allow pointer
4648 * arithmetic on pointers that might be NULL.
4649 */
b03c9f9f
EC
4650 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
4651 !tnum_equals_const(reg->var_off, 0) ||
f1174f77 4652 reg->off)) {
b03c9f9f
EC
4653 __mark_reg_known_zero(reg);
4654 reg->off = 0;
f1174f77
EC
4655 }
4656 if (is_null) {
4657 reg->type = SCALAR_VALUE;
840b9615
JS
4658 } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
4659 if (reg->map_ptr->inner_map_meta) {
4660 reg->type = CONST_PTR_TO_MAP;
4661 reg->map_ptr = reg->map_ptr->inner_map_meta;
4662 } else {
4663 reg->type = PTR_TO_MAP_VALUE;
4664 }
c64b7983
JS
4665 } else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
4666 reg->type = PTR_TO_SOCKET;
46f8bc92
MKL
4667 } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
4668 reg->type = PTR_TO_SOCK_COMMON;
655a51e5
MKL
4669 } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
4670 reg->type = PTR_TO_TCP_SOCK;
56f668df 4671 }
1b986589
MKL
4672 if (is_null) {
4673 /* We don't need id and ref_obj_id from this point
4674 * onwards anymore, thus we should better reset it,
4675 * so that state pruning has chances to take effect.
4676 */
4677 reg->id = 0;
4678 reg->ref_obj_id = 0;
4679 } else if (!reg_may_point_to_spin_lock(reg)) {
4680 /* For not-NULL ptr, reg->ref_obj_id will be reset
4681 * in release_reg_references().
4682 *
4683 * reg->id is still used by spin_lock ptr. Other
4684 * than spin_lock ptr type, reg->id can be reset.
fd978bf7
JS
4685 */
4686 reg->id = 0;
56f668df 4687 }
57a09bf0
TG
4688 }
4689}
4690
4691/* The logic is similar to find_good_pkt_pointers(), both could eventually
4692 * be folded together at some point.
4693 */
840b9615
JS
4694static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
4695 bool is_null)
57a09bf0 4696{
f4d7e40a 4697 struct bpf_func_state *state = vstate->frame[vstate->curframe];
f3709f69 4698 struct bpf_reg_state *reg, *regs = state->regs;
1b986589 4699 u32 ref_obj_id = regs[regno].ref_obj_id;
a08dd0da 4700 u32 id = regs[regno].id;
f4d7e40a 4701 int i, j;
57a09bf0 4702
1b986589
MKL
4703 if (ref_obj_id && ref_obj_id == id && is_null)
4704 /* regs[regno] is in the " == NULL" branch.
4705 * No one could have freed the reference state before
4706 * doing the NULL check.
4707 */
4708 WARN_ON_ONCE(release_reference_state(state, id));
fd978bf7 4709
57a09bf0 4710 for (i = 0; i < MAX_BPF_REG; i++)
fd978bf7 4711 mark_ptr_or_null_reg(state, &regs[i], id, is_null);
57a09bf0 4712
f4d7e40a
AS
4713 for (j = 0; j <= vstate->curframe; j++) {
4714 state = vstate->frame[j];
f3709f69
JS
4715 bpf_for_each_spilled_reg(i, state, reg) {
4716 if (!reg)
f4d7e40a 4717 continue;
fd978bf7 4718 mark_ptr_or_null_reg(state, reg, id, is_null);
f4d7e40a 4719 }
57a09bf0
TG
4720 }
4721}
4722
5beca081
DB
4723static bool try_match_pkt_pointers(const struct bpf_insn *insn,
4724 struct bpf_reg_state *dst_reg,
4725 struct bpf_reg_state *src_reg,
4726 struct bpf_verifier_state *this_branch,
4727 struct bpf_verifier_state *other_branch)
4728{
4729 if (BPF_SRC(insn->code) != BPF_X)
4730 return false;
4731
092ed096
JW
4732 /* Pointers are always 64-bit. */
4733 if (BPF_CLASS(insn->code) == BPF_JMP32)
4734 return false;
4735
5beca081
DB
4736 switch (BPF_OP(insn->code)) {
4737 case BPF_JGT:
4738 if ((dst_reg->type == PTR_TO_PACKET &&
4739 src_reg->type == PTR_TO_PACKET_END) ||
4740 (dst_reg->type == PTR_TO_PACKET_META &&
4741 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4742 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
4743 find_good_pkt_pointers(this_branch, dst_reg,
4744 dst_reg->type, false);
4745 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4746 src_reg->type == PTR_TO_PACKET) ||
4747 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4748 src_reg->type == PTR_TO_PACKET_META)) {
4749 /* pkt_end > pkt_data', pkt_data > pkt_meta' */
4750 find_good_pkt_pointers(other_branch, src_reg,
4751 src_reg->type, true);
4752 } else {
4753 return false;
4754 }
4755 break;
4756 case BPF_JLT:
4757 if ((dst_reg->type == PTR_TO_PACKET &&
4758 src_reg->type == PTR_TO_PACKET_END) ||
4759 (dst_reg->type == PTR_TO_PACKET_META &&
4760 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4761 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
4762 find_good_pkt_pointers(other_branch, dst_reg,
4763 dst_reg->type, true);
4764 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4765 src_reg->type == PTR_TO_PACKET) ||
4766 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4767 src_reg->type == PTR_TO_PACKET_META)) {
4768 /* pkt_end < pkt_data', pkt_data > pkt_meta' */
4769 find_good_pkt_pointers(this_branch, src_reg,
4770 src_reg->type, false);
4771 } else {
4772 return false;
4773 }
4774 break;
4775 case BPF_JGE:
4776 if ((dst_reg->type == PTR_TO_PACKET &&
4777 src_reg->type == PTR_TO_PACKET_END) ||
4778 (dst_reg->type == PTR_TO_PACKET_META &&
4779 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4780 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
4781 find_good_pkt_pointers(this_branch, dst_reg,
4782 dst_reg->type, true);
4783 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4784 src_reg->type == PTR_TO_PACKET) ||
4785 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4786 src_reg->type == PTR_TO_PACKET_META)) {
4787 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
4788 find_good_pkt_pointers(other_branch, src_reg,
4789 src_reg->type, false);
4790 } else {
4791 return false;
4792 }
4793 break;
4794 case BPF_JLE:
4795 if ((dst_reg->type == PTR_TO_PACKET &&
4796 src_reg->type == PTR_TO_PACKET_END) ||
4797 (dst_reg->type == PTR_TO_PACKET_META &&
4798 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4799 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
4800 find_good_pkt_pointers(other_branch, dst_reg,
4801 dst_reg->type, false);
4802 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4803 src_reg->type == PTR_TO_PACKET) ||
4804 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4805 src_reg->type == PTR_TO_PACKET_META)) {
4806 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
4807 find_good_pkt_pointers(this_branch, src_reg,
4808 src_reg->type, true);
4809 } else {
4810 return false;
4811 }
4812 break;
4813 default:
4814 return false;
4815 }
4816
4817 return true;
4818}
4819
58e2af8b 4820static int check_cond_jmp_op(struct bpf_verifier_env *env,
17a52670
AS
4821 struct bpf_insn *insn, int *insn_idx)
4822{
f4d7e40a
AS
4823 struct bpf_verifier_state *this_branch = env->cur_state;
4824 struct bpf_verifier_state *other_branch;
4825 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
4826 struct bpf_reg_state *dst_reg, *other_branch_regs;
17a52670 4827 u8 opcode = BPF_OP(insn->code);
092ed096 4828 bool is_jmp32;
17a52670
AS
4829 int err;
4830
092ed096
JW
4831 /* Only conditional jumps are expected to reach here. */
4832 if (opcode == BPF_JA || opcode > BPF_JSLE) {
4833 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
17a52670
AS
4834 return -EINVAL;
4835 }
4836
4837 if (BPF_SRC(insn->code) == BPF_X) {
4838 if (insn->imm != 0) {
092ed096 4839 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
17a52670
AS
4840 return -EINVAL;
4841 }
4842
4843 /* check src1 operand */
dc503a8a 4844 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
4845 if (err)
4846 return err;
1be7f75d
AS
4847
4848 if (is_pointer_value(env, insn->src_reg)) {
61bd5218 4849 verbose(env, "R%d pointer comparison prohibited\n",
1be7f75d
AS
4850 insn->src_reg);
4851 return -EACCES;
4852 }
17a52670
AS
4853 } else {
4854 if (insn->src_reg != BPF_REG_0) {
092ed096 4855 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
17a52670
AS
4856 return -EINVAL;
4857 }
4858 }
4859
4860 /* check src2 operand */
dc503a8a 4861 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
4862 if (err)
4863 return err;
4864
1a0dc1ac 4865 dst_reg = &regs[insn->dst_reg];
092ed096 4866 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
1a0dc1ac 4867
4f7b3e82 4868 if (BPF_SRC(insn->code) == BPF_K) {
092ed096
JW
4869 int pred = is_branch_taken(dst_reg, insn->imm, opcode,
4870 is_jmp32);
4f7b3e82
AS
4871
4872 if (pred == 1) {
4873 /* only follow the goto, ignore fall-through */
17a52670
AS
4874 *insn_idx += insn->off;
4875 return 0;
4f7b3e82
AS
4876 } else if (pred == 0) {
4877 /* only follow fall-through branch, since
17a52670
AS
4878 * that's where the program will go
4879 */
4880 return 0;
4881 }
4882 }
4883
979d63d5
DB
4884 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
4885 false);
17a52670
AS
4886 if (!other_branch)
4887 return -EFAULT;
f4d7e40a 4888 other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
17a52670 4889
48461135
JB
4890 /* detect if we are comparing against a constant value so we can adjust
4891 * our min/max values for our dst register.
f1174f77
EC
4892 * this is only legit if both are scalars (or pointers to the same
4893 * object, I suppose, but we don't support that right now), because
4894 * otherwise the different base pointers mean the offsets aren't
4895 * comparable.
48461135
JB
4896 */
4897 if (BPF_SRC(insn->code) == BPF_X) {
092ed096
JW
4898 struct bpf_reg_state *src_reg = &regs[insn->src_reg];
4899 struct bpf_reg_state lo_reg0 = *dst_reg;
4900 struct bpf_reg_state lo_reg1 = *src_reg;
4901 struct bpf_reg_state *src_lo, *dst_lo;
4902
4903 dst_lo = &lo_reg0;
4904 src_lo = &lo_reg1;
4905 coerce_reg_to_size(dst_lo, 4);
4906 coerce_reg_to_size(src_lo, 4);
4907
f1174f77 4908 if (dst_reg->type == SCALAR_VALUE &&
092ed096
JW
4909 src_reg->type == SCALAR_VALUE) {
4910 if (tnum_is_const(src_reg->var_off) ||
4911 (is_jmp32 && tnum_is_const(src_lo->var_off)))
f4d7e40a 4912 reg_set_min_max(&other_branch_regs[insn->dst_reg],
092ed096
JW
4913 dst_reg,
4914 is_jmp32
4915 ? src_lo->var_off.value
4916 : src_reg->var_off.value,
4917 opcode, is_jmp32);
4918 else if (tnum_is_const(dst_reg->var_off) ||
4919 (is_jmp32 && tnum_is_const(dst_lo->var_off)))
f4d7e40a 4920 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
092ed096
JW
4921 src_reg,
4922 is_jmp32
4923 ? dst_lo->var_off.value
4924 : dst_reg->var_off.value,
4925 opcode, is_jmp32);
4926 else if (!is_jmp32 &&
4927 (opcode == BPF_JEQ || opcode == BPF_JNE))
f1174f77 4928 /* Comparing for equality, we can combine knowledge */
f4d7e40a
AS
4929 reg_combine_min_max(&other_branch_regs[insn->src_reg],
4930 &other_branch_regs[insn->dst_reg],
092ed096 4931 src_reg, dst_reg, opcode);
f1174f77
EC
4932 }
4933 } else if (dst_reg->type == SCALAR_VALUE) {
f4d7e40a 4934 reg_set_min_max(&other_branch_regs[insn->dst_reg],
092ed096 4935 dst_reg, insn->imm, opcode, is_jmp32);
48461135
JB
4936 }
4937
092ed096
JW
4938 /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
4939 * NOTE: these optimizations below are related with pointer comparison
4940 * which will never be JMP32.
4941 */
4942 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
1a0dc1ac 4943 insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
840b9615
JS
4944 reg_type_may_be_null(dst_reg->type)) {
4945 /* Mark all identical registers in each branch as either
57a09bf0
TG
4946 * safe or unknown depending R == 0 or R != 0 conditional.
4947 */
840b9615
JS
4948 mark_ptr_or_null_regs(this_branch, insn->dst_reg,
4949 opcode == BPF_JNE);
4950 mark_ptr_or_null_regs(other_branch, insn->dst_reg,
4951 opcode == BPF_JEQ);
5beca081
DB
4952 } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
4953 this_branch, other_branch) &&
4954 is_pointer_value(env, insn->dst_reg)) {
61bd5218
JK
4955 verbose(env, "R%d pointer comparison prohibited\n",
4956 insn->dst_reg);
1be7f75d 4957 return -EACCES;
17a52670 4958 }
61bd5218 4959 if (env->log.level)
f4d7e40a 4960 print_verifier_state(env, this_branch->frame[this_branch->curframe]);
17a52670
AS
4961 return 0;
4962}
4963
0246e64d
AS
4964/* return the map pointer stored inside BPF_LD_IMM64 instruction */
4965static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
4966{
4967 u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
4968
4969 return (struct bpf_map *) (unsigned long) imm64;
4970}
4971
17a52670 4972/* verify BPF_LD_IMM64 instruction */
58e2af8b 4973static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
17a52670 4974{
638f5b90 4975 struct bpf_reg_state *regs = cur_regs(env);
17a52670
AS
4976 int err;
4977
4978 if (BPF_SIZE(insn->code) != BPF_DW) {
61bd5218 4979 verbose(env, "invalid BPF_LD_IMM insn\n");
17a52670
AS
4980 return -EINVAL;
4981 }
4982 if (insn->off != 0) {
61bd5218 4983 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
17a52670
AS
4984 return -EINVAL;
4985 }
4986
dc503a8a 4987 err = check_reg_arg(env, insn->dst_reg, DST_OP);
17a52670
AS
4988 if (err)
4989 return err;
4990
6b173873 4991 if (insn->src_reg == 0) {
6b173873
JK
4992 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
4993
f1174f77 4994 regs[insn->dst_reg].type = SCALAR_VALUE;
b03c9f9f 4995 __mark_reg_known(&regs[insn->dst_reg], imm);
17a52670 4996 return 0;
6b173873 4997 }
17a52670
AS
4998
4999 /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
5000 BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
5001
5002 regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
5003 regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
5004 return 0;
5005}
5006
96be4325
DB
5007static bool may_access_skb(enum bpf_prog_type type)
5008{
5009 switch (type) {
5010 case BPF_PROG_TYPE_SOCKET_FILTER:
5011 case BPF_PROG_TYPE_SCHED_CLS:
94caee8c 5012 case BPF_PROG_TYPE_SCHED_ACT:
96be4325
DB
5013 return true;
5014 default:
5015 return false;
5016 }
5017}
5018
ddd872bc
AS
5019/* verify safety of LD_ABS|LD_IND instructions:
5020 * - they can only appear in the programs where ctx == skb
5021 * - since they are wrappers of function calls, they scratch R1-R5 registers,
5022 * preserve R6-R9, and store return value into R0
5023 *
5024 * Implicit input:
5025 * ctx == skb == R6 == CTX
5026 *
5027 * Explicit input:
5028 * SRC == any register
5029 * IMM == 32-bit immediate
5030 *
5031 * Output:
5032 * R0 - 8/16/32-bit skb data converted to cpu endianness
5033 */
58e2af8b 5034static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
ddd872bc 5035{
638f5b90 5036 struct bpf_reg_state *regs = cur_regs(env);
ddd872bc 5037 u8 mode = BPF_MODE(insn->code);
ddd872bc
AS
5038 int i, err;
5039
24701ece 5040 if (!may_access_skb(env->prog->type)) {
61bd5218 5041 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
ddd872bc
AS
5042 return -EINVAL;
5043 }
5044
e0cea7ce
DB
5045 if (!env->ops->gen_ld_abs) {
5046 verbose(env, "bpf verifier is misconfigured\n");
5047 return -EINVAL;
5048 }
5049
f910cefa 5050 if (env->subprog_cnt > 1) {
f4d7e40a
AS
5051 /* when program has LD_ABS insn JITs and interpreter assume
5052 * that r1 == ctx == skb which is not the case for callees
5053 * that can have arbitrary arguments. It's problematic
5054 * for main prog as well since JITs would need to analyze
5055 * all functions in order to make proper register save/restore
5056 * decisions in the main prog. Hence disallow LD_ABS with calls
5057 */
5058 verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
5059 return -EINVAL;
5060 }
5061
ddd872bc 5062 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
d82bccc6 5063 BPF_SIZE(insn->code) == BPF_DW ||
ddd872bc 5064 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
61bd5218 5065 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
ddd872bc
AS
5066 return -EINVAL;
5067 }
5068
5069 /* check whether implicit source operand (register R6) is readable */
dc503a8a 5070 err = check_reg_arg(env, BPF_REG_6, SRC_OP);
ddd872bc
AS
5071 if (err)
5072 return err;
5073
fd978bf7
JS
5074 /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
5075 * gen_ld_abs() may terminate the program at runtime, leading to
5076 * reference leak.
5077 */
5078 err = check_reference_leak(env);
5079 if (err) {
5080 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
5081 return err;
5082 }
5083
d83525ca
AS
5084 if (env->cur_state->active_spin_lock) {
5085 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
5086 return -EINVAL;
5087 }
5088
ddd872bc 5089 if (regs[BPF_REG_6].type != PTR_TO_CTX) {
61bd5218
JK
5090 verbose(env,
5091 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
ddd872bc
AS
5092 return -EINVAL;
5093 }
5094
5095 if (mode == BPF_IND) {
5096 /* check explicit source operand */
dc503a8a 5097 err = check_reg_arg(env, insn->src_reg, SRC_OP);
ddd872bc
AS
5098 if (err)
5099 return err;
5100 }
5101
5102 /* reset caller saved regs to unreadable */
dc503a8a 5103 for (i = 0; i < CALLER_SAVED_REGS; i++) {
61bd5218 5104 mark_reg_not_init(env, regs, caller_saved[i]);
dc503a8a
EC
5105 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
5106 }
ddd872bc
AS
5107
5108 /* mark destination R0 register as readable, since it contains
dc503a8a
EC
5109 * the value fetched from the packet.
5110 * Already marked as written above.
ddd872bc 5111 */
61bd5218 5112 mark_reg_unknown(env, regs, BPF_REG_0);
ddd872bc
AS
5113 return 0;
5114}
5115
390ee7e2
AS
5116static int check_return_code(struct bpf_verifier_env *env)
5117{
5118 struct bpf_reg_state *reg;
5119 struct tnum range = tnum_range(0, 1);
5120
5121 switch (env->prog->type) {
5122 case BPF_PROG_TYPE_CGROUP_SKB:
5123 case BPF_PROG_TYPE_CGROUP_SOCK:
4fbac77d 5124 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
390ee7e2 5125 case BPF_PROG_TYPE_SOCK_OPS:
ebc614f6 5126 case BPF_PROG_TYPE_CGROUP_DEVICE:
390ee7e2
AS
5127 break;
5128 default:
5129 return 0;
5130 }
5131
638f5b90 5132 reg = cur_regs(env) + BPF_REG_0;
390ee7e2 5133 if (reg->type != SCALAR_VALUE) {
61bd5218 5134 verbose(env, "At program exit the register R0 is not a known value (%s)\n",
390ee7e2
AS
5135 reg_type_str[reg->type]);
5136 return -EINVAL;
5137 }
5138
5139 if (!tnum_in(range, reg->var_off)) {
61bd5218 5140 verbose(env, "At program exit the register R0 ");
390ee7e2
AS
5141 if (!tnum_is_unknown(reg->var_off)) {
5142 char tn_buf[48];
5143
5144 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 5145 verbose(env, "has value %s", tn_buf);
390ee7e2 5146 } else {
61bd5218 5147 verbose(env, "has unknown scalar value");
390ee7e2 5148 }
61bd5218 5149 verbose(env, " should have been 0 or 1\n");
390ee7e2
AS
5150 return -EINVAL;
5151 }
5152 return 0;
5153}
5154
475fb78f
AS
5155/* non-recursive DFS pseudo code
5156 * 1 procedure DFS-iterative(G,v):
5157 * 2 label v as discovered
5158 * 3 let S be a stack
5159 * 4 S.push(v)
5160 * 5 while S is not empty
5161 * 6 t <- S.pop()
5162 * 7 if t is what we're looking for:
5163 * 8 return t
5164 * 9 for all edges e in G.adjacentEdges(t) do
5165 * 10 if edge e is already labelled
5166 * 11 continue with the next edge
5167 * 12 w <- G.adjacentVertex(t,e)
5168 * 13 if vertex w is not discovered and not explored
5169 * 14 label e as tree-edge
5170 * 15 label w as discovered
5171 * 16 S.push(w)
5172 * 17 continue at 5
5173 * 18 else if vertex w is discovered
5174 * 19 label e as back-edge
5175 * 20 else
5176 * 21 // vertex w is explored
5177 * 22 label e as forward- or cross-edge
5178 * 23 label t as explored
5179 * 24 S.pop()
5180 *
5181 * convention:
5182 * 0x10 - discovered
5183 * 0x11 - discovered and fall-through edge labelled
5184 * 0x12 - discovered and fall-through and branch edges labelled
5185 * 0x20 - explored
5186 */
5187
5188enum {
5189 DISCOVERED = 0x10,
5190 EXPLORED = 0x20,
5191 FALLTHROUGH = 1,
5192 BRANCH = 2,
5193};
5194
58e2af8b 5195#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
f1bca824 5196
475fb78f
AS
5197static int *insn_stack; /* stack of insns to process */
5198static int cur_stack; /* current stack index */
5199static int *insn_state;
5200
5201/* t, w, e - match pseudo-code above:
5202 * t - index of current instruction
5203 * w - next instruction
5204 * e - edge
5205 */
58e2af8b 5206static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
475fb78f
AS
5207{
5208 if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
5209 return 0;
5210
5211 if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
5212 return 0;
5213
5214 if (w < 0 || w >= env->prog->len) {
d9762e84 5215 verbose_linfo(env, t, "%d: ", t);
61bd5218 5216 verbose(env, "jump out of range from insn %d to %d\n", t, w);
475fb78f
AS
5217 return -EINVAL;
5218 }
5219
f1bca824
AS
5220 if (e == BRANCH)
5221 /* mark branch target for state pruning */
5222 env->explored_states[w] = STATE_LIST_MARK;
5223
475fb78f
AS
5224 if (insn_state[w] == 0) {
5225 /* tree-edge */
5226 insn_state[t] = DISCOVERED | e;
5227 insn_state[w] = DISCOVERED;
5228 if (cur_stack >= env->prog->len)
5229 return -E2BIG;
5230 insn_stack[cur_stack++] = w;
5231 return 1;
5232 } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
d9762e84
MKL
5233 verbose_linfo(env, t, "%d: ", t);
5234 verbose_linfo(env, w, "%d: ", w);
61bd5218 5235 verbose(env, "back-edge from insn %d to %d\n", t, w);
475fb78f
AS
5236 return -EINVAL;
5237 } else if (insn_state[w] == EXPLORED) {
5238 /* forward- or cross-edge */
5239 insn_state[t] = DISCOVERED | e;
5240 } else {
61bd5218 5241 verbose(env, "insn state internal bug\n");
475fb78f
AS
5242 return -EFAULT;
5243 }
5244 return 0;
5245}
5246
5247/* non-recursive depth-first-search to detect loops in BPF program
5248 * loop == back-edge in directed graph
5249 */
58e2af8b 5250static int check_cfg(struct bpf_verifier_env *env)
475fb78f
AS
5251{
5252 struct bpf_insn *insns = env->prog->insnsi;
5253 int insn_cnt = env->prog->len;
5254 int ret = 0;
5255 int i, t;
5256
5257 insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
5258 if (!insn_state)
5259 return -ENOMEM;
5260
5261 insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
5262 if (!insn_stack) {
5263 kfree(insn_state);
5264 return -ENOMEM;
5265 }
5266
5267 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
5268 insn_stack[0] = 0; /* 0 is the first instruction */
5269 cur_stack = 1;
5270
5271peek_stack:
5272 if (cur_stack == 0)
5273 goto check_state;
5274 t = insn_stack[cur_stack - 1];
5275
092ed096
JW
5276 if (BPF_CLASS(insns[t].code) == BPF_JMP ||
5277 BPF_CLASS(insns[t].code) == BPF_JMP32) {
475fb78f
AS
5278 u8 opcode = BPF_OP(insns[t].code);
5279
5280 if (opcode == BPF_EXIT) {
5281 goto mark_explored;
5282 } else if (opcode == BPF_CALL) {
5283 ret = push_insn(t, t + 1, FALLTHROUGH, env);
5284 if (ret == 1)
5285 goto peek_stack;
5286 else if (ret < 0)
5287 goto err_free;
07016151
DB
5288 if (t + 1 < insn_cnt)
5289 env->explored_states[t + 1] = STATE_LIST_MARK;
cc8b0b92
AS
5290 if (insns[t].src_reg == BPF_PSEUDO_CALL) {
5291 env->explored_states[t] = STATE_LIST_MARK;
5292 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
5293 if (ret == 1)
5294 goto peek_stack;
5295 else if (ret < 0)
5296 goto err_free;
5297 }
475fb78f
AS
5298 } else if (opcode == BPF_JA) {
5299 if (BPF_SRC(insns[t].code) != BPF_K) {
5300 ret = -EINVAL;
5301 goto err_free;
5302 }
5303 /* unconditional jump with single edge */
5304 ret = push_insn(t, t + insns[t].off + 1,
5305 FALLTHROUGH, env);
5306 if (ret == 1)
5307 goto peek_stack;
5308 else if (ret < 0)
5309 goto err_free;
f1bca824
AS
5310 /* tell verifier to check for equivalent states
5311 * after every call and jump
5312 */
c3de6317
AS
5313 if (t + 1 < insn_cnt)
5314 env->explored_states[t + 1] = STATE_LIST_MARK;
475fb78f
AS
5315 } else {
5316 /* conditional jump with two edges */
3c2ce60b 5317 env->explored_states[t] = STATE_LIST_MARK;
475fb78f
AS
5318 ret = push_insn(t, t + 1, FALLTHROUGH, env);
5319 if (ret == 1)
5320 goto peek_stack;
5321 else if (ret < 0)
5322 goto err_free;
5323
5324 ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
5325 if (ret == 1)
5326 goto peek_stack;
5327 else if (ret < 0)
5328 goto err_free;
5329 }
5330 } else {
5331 /* all other non-branch instructions with single
5332 * fall-through edge
5333 */
5334 ret = push_insn(t, t + 1, FALLTHROUGH, env);
5335 if (ret == 1)
5336 goto peek_stack;
5337 else if (ret < 0)
5338 goto err_free;
5339 }
5340
5341mark_explored:
5342 insn_state[t] = EXPLORED;
5343 if (cur_stack-- <= 0) {
61bd5218 5344 verbose(env, "pop stack internal bug\n");
475fb78f
AS
5345 ret = -EFAULT;
5346 goto err_free;
5347 }
5348 goto peek_stack;
5349
5350check_state:
5351 for (i = 0; i < insn_cnt; i++) {
5352 if (insn_state[i] != EXPLORED) {
61bd5218 5353 verbose(env, "unreachable insn %d\n", i);
475fb78f
AS
5354 ret = -EINVAL;
5355 goto err_free;
5356 }
5357 }
5358 ret = 0; /* cfg looks good */
5359
5360err_free:
5361 kfree(insn_state);
5362 kfree(insn_stack);
5363 return ret;
5364}
5365
838e9690
YS
5366/* The minimum supported BTF func info size */
5367#define MIN_BPF_FUNCINFO_SIZE 8
5368#define MAX_FUNCINFO_REC_SIZE 252
5369
c454a46b
MKL
5370static int check_btf_func(struct bpf_verifier_env *env,
5371 const union bpf_attr *attr,
5372 union bpf_attr __user *uattr)
838e9690 5373{
d0b2818e 5374 u32 i, nfuncs, urec_size, min_size;
838e9690 5375 u32 krec_size = sizeof(struct bpf_func_info);
c454a46b 5376 struct bpf_func_info *krecord;
838e9690 5377 const struct btf_type *type;
c454a46b
MKL
5378 struct bpf_prog *prog;
5379 const struct btf *btf;
838e9690 5380 void __user *urecord;
d0b2818e 5381 u32 prev_offset = 0;
838e9690
YS
5382 int ret = 0;
5383
5384 nfuncs = attr->func_info_cnt;
5385 if (!nfuncs)
5386 return 0;
5387
5388 if (nfuncs != env->subprog_cnt) {
5389 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
5390 return -EINVAL;
5391 }
5392
5393 urec_size = attr->func_info_rec_size;
5394 if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
5395 urec_size > MAX_FUNCINFO_REC_SIZE ||
5396 urec_size % sizeof(u32)) {
5397 verbose(env, "invalid func info rec size %u\n", urec_size);
5398 return -EINVAL;
5399 }
5400
c454a46b
MKL
5401 prog = env->prog;
5402 btf = prog->aux->btf;
838e9690
YS
5403
5404 urecord = u64_to_user_ptr(attr->func_info);
5405 min_size = min_t(u32, krec_size, urec_size);
5406
ba64e7d8 5407 krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
c454a46b
MKL
5408 if (!krecord)
5409 return -ENOMEM;
ba64e7d8 5410
838e9690
YS
5411 for (i = 0; i < nfuncs; i++) {
5412 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
5413 if (ret) {
5414 if (ret == -E2BIG) {
5415 verbose(env, "nonzero tailing record in func info");
5416 /* set the size kernel expects so loader can zero
5417 * out the rest of the record.
5418 */
5419 if (put_user(min_size, &uattr->func_info_rec_size))
5420 ret = -EFAULT;
5421 }
c454a46b 5422 goto err_free;
838e9690
YS
5423 }
5424
ba64e7d8 5425 if (copy_from_user(&krecord[i], urecord, min_size)) {
838e9690 5426 ret = -EFAULT;
c454a46b 5427 goto err_free;
838e9690
YS
5428 }
5429
d30d42e0 5430 /* check insn_off */
838e9690 5431 if (i == 0) {
d30d42e0 5432 if (krecord[i].insn_off) {
838e9690 5433 verbose(env,
d30d42e0
MKL
5434 "nonzero insn_off %u for the first func info record",
5435 krecord[i].insn_off);
838e9690 5436 ret = -EINVAL;
c454a46b 5437 goto err_free;
838e9690 5438 }
d30d42e0 5439 } else if (krecord[i].insn_off <= prev_offset) {
838e9690
YS
5440 verbose(env,
5441 "same or smaller insn offset (%u) than previous func info record (%u)",
d30d42e0 5442 krecord[i].insn_off, prev_offset);
838e9690 5443 ret = -EINVAL;
c454a46b 5444 goto err_free;
838e9690
YS
5445 }
5446
d30d42e0 5447 if (env->subprog_info[i].start != krecord[i].insn_off) {
838e9690
YS
5448 verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
5449 ret = -EINVAL;
c454a46b 5450 goto err_free;
838e9690
YS
5451 }
5452
5453 /* check type_id */
ba64e7d8 5454 type = btf_type_by_id(btf, krecord[i].type_id);
838e9690
YS
5455 if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) {
5456 verbose(env, "invalid type id %d in func info",
ba64e7d8 5457 krecord[i].type_id);
838e9690 5458 ret = -EINVAL;
c454a46b 5459 goto err_free;
838e9690
YS
5460 }
5461
d30d42e0 5462 prev_offset = krecord[i].insn_off;
838e9690
YS
5463 urecord += urec_size;
5464 }
5465
ba64e7d8
YS
5466 prog->aux->func_info = krecord;
5467 prog->aux->func_info_cnt = nfuncs;
838e9690
YS
5468 return 0;
5469
c454a46b 5470err_free:
ba64e7d8 5471 kvfree(krecord);
838e9690
YS
5472 return ret;
5473}
5474
ba64e7d8
YS
5475static void adjust_btf_func(struct bpf_verifier_env *env)
5476{
5477 int i;
5478
5479 if (!env->prog->aux->func_info)
5480 return;
5481
5482 for (i = 0; i < env->subprog_cnt; i++)
d30d42e0 5483 env->prog->aux->func_info[i].insn_off = env->subprog_info[i].start;
ba64e7d8
YS
5484}
5485
c454a46b
MKL
5486#define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \
5487 sizeof(((struct bpf_line_info *)(0))->line_col))
5488#define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
5489
5490static int check_btf_line(struct bpf_verifier_env *env,
5491 const union bpf_attr *attr,
5492 union bpf_attr __user *uattr)
5493{
5494 u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
5495 struct bpf_subprog_info *sub;
5496 struct bpf_line_info *linfo;
5497 struct bpf_prog *prog;
5498 const struct btf *btf;
5499 void __user *ulinfo;
5500 int err;
5501
5502 nr_linfo = attr->line_info_cnt;
5503 if (!nr_linfo)
5504 return 0;
5505
5506 rec_size = attr->line_info_rec_size;
5507 if (rec_size < MIN_BPF_LINEINFO_SIZE ||
5508 rec_size > MAX_LINEINFO_REC_SIZE ||
5509 rec_size & (sizeof(u32) - 1))
5510 return -EINVAL;
5511
5512 /* Need to zero it in case the userspace may
5513 * pass in a smaller bpf_line_info object.
5514 */
5515 linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
5516 GFP_KERNEL | __GFP_NOWARN);
5517 if (!linfo)
5518 return -ENOMEM;
5519
5520 prog = env->prog;
5521 btf = prog->aux->btf;
5522
5523 s = 0;
5524 sub = env->subprog_info;
5525 ulinfo = u64_to_user_ptr(attr->line_info);
5526 expected_size = sizeof(struct bpf_line_info);
5527 ncopy = min_t(u32, expected_size, rec_size);
5528 for (i = 0; i < nr_linfo; i++) {
5529 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
5530 if (err) {
5531 if (err == -E2BIG) {
5532 verbose(env, "nonzero tailing record in line_info");
5533 if (put_user(expected_size,
5534 &uattr->line_info_rec_size))
5535 err = -EFAULT;
5536 }
5537 goto err_free;
5538 }
5539
5540 if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
5541 err = -EFAULT;
5542 goto err_free;
5543 }
5544
5545 /*
5546 * Check insn_off to ensure
5547 * 1) strictly increasing AND
5548 * 2) bounded by prog->len
5549 *
5550 * The linfo[0].insn_off == 0 check logically falls into
5551 * the later "missing bpf_line_info for func..." case
5552 * because the first linfo[0].insn_off must be the
5553 * first sub also and the first sub must have
5554 * subprog_info[0].start == 0.
5555 */
5556 if ((i && linfo[i].insn_off <= prev_offset) ||
5557 linfo[i].insn_off >= prog->len) {
5558 verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
5559 i, linfo[i].insn_off, prev_offset,
5560 prog->len);
5561 err = -EINVAL;
5562 goto err_free;
5563 }
5564
fdbaa0be
MKL
5565 if (!prog->insnsi[linfo[i].insn_off].code) {
5566 verbose(env,
5567 "Invalid insn code at line_info[%u].insn_off\n",
5568 i);
5569 err = -EINVAL;
5570 goto err_free;
5571 }
5572
23127b33
MKL
5573 if (!btf_name_by_offset(btf, linfo[i].line_off) ||
5574 !btf_name_by_offset(btf, linfo[i].file_name_off)) {
c454a46b
MKL
5575 verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
5576 err = -EINVAL;
5577 goto err_free;
5578 }
5579
5580 if (s != env->subprog_cnt) {
5581 if (linfo[i].insn_off == sub[s].start) {
5582 sub[s].linfo_idx = i;
5583 s++;
5584 } else if (sub[s].start < linfo[i].insn_off) {
5585 verbose(env, "missing bpf_line_info for func#%u\n", s);
5586 err = -EINVAL;
5587 goto err_free;
5588 }
5589 }
5590
5591 prev_offset = linfo[i].insn_off;
5592 ulinfo += rec_size;
5593 }
5594
5595 if (s != env->subprog_cnt) {
5596 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
5597 env->subprog_cnt - s, s);
5598 err = -EINVAL;
5599 goto err_free;
5600 }
5601
5602 prog->aux->linfo = linfo;
5603 prog->aux->nr_linfo = nr_linfo;
5604
5605 return 0;
5606
5607err_free:
5608 kvfree(linfo);
5609 return err;
5610}
5611
5612static int check_btf_info(struct bpf_verifier_env *env,
5613 const union bpf_attr *attr,
5614 union bpf_attr __user *uattr)
5615{
5616 struct btf *btf;
5617 int err;
5618
5619 if (!attr->func_info_cnt && !attr->line_info_cnt)
5620 return 0;
5621
5622 btf = btf_get_by_fd(attr->prog_btf_fd);
5623 if (IS_ERR(btf))
5624 return PTR_ERR(btf);
5625 env->prog->aux->btf = btf;
5626
5627 err = check_btf_func(env, attr, uattr);
5628 if (err)
5629 return err;
5630
5631 err = check_btf_line(env, attr, uattr);
5632 if (err)
5633 return err;
5634
5635 return 0;
ba64e7d8
YS
5636}
5637
f1174f77
EC
5638/* check %cur's range satisfies %old's */
5639static bool range_within(struct bpf_reg_state *old,
5640 struct bpf_reg_state *cur)
5641{
b03c9f9f
EC
5642 return old->umin_value <= cur->umin_value &&
5643 old->umax_value >= cur->umax_value &&
5644 old->smin_value <= cur->smin_value &&
5645 old->smax_value >= cur->smax_value;
f1174f77
EC
5646}
5647
5648/* Maximum number of register states that can exist at once */
5649#define ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
5650struct idpair {
5651 u32 old;
5652 u32 cur;
5653};
5654
5655/* If in the old state two registers had the same id, then they need to have
5656 * the same id in the new state as well. But that id could be different from
5657 * the old state, so we need to track the mapping from old to new ids.
5658 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
5659 * regs with old id 5 must also have new id 9 for the new state to be safe. But
5660 * regs with a different old id could still have new id 9, we don't care about
5661 * that.
5662 * So we look through our idmap to see if this old id has been seen before. If
5663 * so, we require the new id to match; otherwise, we add the id pair to the map.
969bf05e 5664 */
f1174f77 5665static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
969bf05e 5666{
f1174f77 5667 unsigned int i;
969bf05e 5668
f1174f77
EC
5669 for (i = 0; i < ID_MAP_SIZE; i++) {
5670 if (!idmap[i].old) {
5671 /* Reached an empty slot; haven't seen this id before */
5672 idmap[i].old = old_id;
5673 idmap[i].cur = cur_id;
5674 return true;
5675 }
5676 if (idmap[i].old == old_id)
5677 return idmap[i].cur == cur_id;
5678 }
5679 /* We ran out of idmap slots, which should be impossible */
5680 WARN_ON_ONCE(1);
5681 return false;
5682}
5683
9242b5f5
AS
5684static void clean_func_state(struct bpf_verifier_env *env,
5685 struct bpf_func_state *st)
5686{
5687 enum bpf_reg_liveness live;
5688 int i, j;
5689
5690 for (i = 0; i < BPF_REG_FP; i++) {
5691 live = st->regs[i].live;
5692 /* liveness must not touch this register anymore */
5693 st->regs[i].live |= REG_LIVE_DONE;
5694 if (!(live & REG_LIVE_READ))
5695 /* since the register is unused, clear its state
5696 * to make further comparison simpler
5697 */
5698 __mark_reg_not_init(&st->regs[i]);
5699 }
5700
5701 for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
5702 live = st->stack[i].spilled_ptr.live;
5703 /* liveness must not touch this stack slot anymore */
5704 st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
5705 if (!(live & REG_LIVE_READ)) {
5706 __mark_reg_not_init(&st->stack[i].spilled_ptr);
5707 for (j = 0; j < BPF_REG_SIZE; j++)
5708 st->stack[i].slot_type[j] = STACK_INVALID;
5709 }
5710 }
5711}
5712
5713static void clean_verifier_state(struct bpf_verifier_env *env,
5714 struct bpf_verifier_state *st)
5715{
5716 int i;
5717
5718 if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
5719 /* all regs in this state in all frames were already marked */
5720 return;
5721
5722 for (i = 0; i <= st->curframe; i++)
5723 clean_func_state(env, st->frame[i]);
5724}
5725
5726/* the parentage chains form a tree.
5727 * the verifier states are added to state lists at given insn and
5728 * pushed into state stack for future exploration.
5729 * when the verifier reaches bpf_exit insn some of the verifer states
5730 * stored in the state lists have their final liveness state already,
5731 * but a lot of states will get revised from liveness point of view when
5732 * the verifier explores other branches.
5733 * Example:
5734 * 1: r0 = 1
5735 * 2: if r1 == 100 goto pc+1
5736 * 3: r0 = 2
5737 * 4: exit
5738 * when the verifier reaches exit insn the register r0 in the state list of
5739 * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
5740 * of insn 2 and goes exploring further. At the insn 4 it will walk the
5741 * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
5742 *
5743 * Since the verifier pushes the branch states as it sees them while exploring
5744 * the program the condition of walking the branch instruction for the second
5745 * time means that all states below this branch were already explored and
5746 * their final liveness markes are already propagated.
5747 * Hence when the verifier completes the search of state list in is_state_visited()
5748 * we can call this clean_live_states() function to mark all liveness states
5749 * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
5750 * will not be used.
5751 * This function also clears the registers and stack for states that !READ
5752 * to simplify state merging.
5753 *
5754 * Important note here that walking the same branch instruction in the callee
5755 * doesn't meant that the states are DONE. The verifier has to compare
5756 * the callsites
5757 */
5758static void clean_live_states(struct bpf_verifier_env *env, int insn,
5759 struct bpf_verifier_state *cur)
5760{
5761 struct bpf_verifier_state_list *sl;
5762 int i;
5763
5764 sl = env->explored_states[insn];
5765 if (!sl)
5766 return;
5767
5768 while (sl != STATE_LIST_MARK) {
5769 if (sl->state.curframe != cur->curframe)
5770 goto next;
5771 for (i = 0; i <= cur->curframe; i++)
5772 if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
5773 goto next;
5774 clean_verifier_state(env, &sl->state);
5775next:
5776 sl = sl->next;
5777 }
5778}
5779
f1174f77 5780/* Returns true if (rold safe implies rcur safe) */
1b688a19
EC
5781static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
5782 struct idpair *idmap)
f1174f77 5783{
f4d7e40a
AS
5784 bool equal;
5785
dc503a8a
EC
5786 if (!(rold->live & REG_LIVE_READ))
5787 /* explored state didn't use this */
5788 return true;
5789
679c782d 5790 equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
f4d7e40a
AS
5791
5792 if (rold->type == PTR_TO_STACK)
5793 /* two stack pointers are equal only if they're pointing to
5794 * the same stack frame, since fp-8 in foo != fp-8 in bar
5795 */
5796 return equal && rold->frameno == rcur->frameno;
5797
5798 if (equal)
969bf05e
AS
5799 return true;
5800
f1174f77
EC
5801 if (rold->type == NOT_INIT)
5802 /* explored state can't have used this */
969bf05e 5803 return true;
f1174f77
EC
5804 if (rcur->type == NOT_INIT)
5805 return false;
5806 switch (rold->type) {
5807 case SCALAR_VALUE:
5808 if (rcur->type == SCALAR_VALUE) {
5809 /* new val must satisfy old val knowledge */
5810 return range_within(rold, rcur) &&
5811 tnum_in(rold->var_off, rcur->var_off);
5812 } else {
179d1c56
JH
5813 /* We're trying to use a pointer in place of a scalar.
5814 * Even if the scalar was unbounded, this could lead to
5815 * pointer leaks because scalars are allowed to leak
5816 * while pointers are not. We could make this safe in
5817 * special cases if root is calling us, but it's
5818 * probably not worth the hassle.
f1174f77 5819 */
179d1c56 5820 return false;
f1174f77
EC
5821 }
5822 case PTR_TO_MAP_VALUE:
1b688a19
EC
5823 /* If the new min/max/var_off satisfy the old ones and
5824 * everything else matches, we are OK.
d83525ca
AS
5825 * 'id' is not compared, since it's only used for maps with
5826 * bpf_spin_lock inside map element and in such cases if
5827 * the rest of the prog is valid for one map element then
5828 * it's valid for all map elements regardless of the key
5829 * used in bpf_map_lookup()
1b688a19
EC
5830 */
5831 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
5832 range_within(rold, rcur) &&
5833 tnum_in(rold->var_off, rcur->var_off);
f1174f77
EC
5834 case PTR_TO_MAP_VALUE_OR_NULL:
5835 /* a PTR_TO_MAP_VALUE could be safe to use as a
5836 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
5837 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
5838 * checked, doing so could have affected others with the same
5839 * id, and we can't check for that because we lost the id when
5840 * we converted to a PTR_TO_MAP_VALUE.
5841 */
5842 if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
5843 return false;
5844 if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
5845 return false;
5846 /* Check our ids match any regs they're supposed to */
5847 return check_ids(rold->id, rcur->id, idmap);
de8f3a83 5848 case PTR_TO_PACKET_META:
f1174f77 5849 case PTR_TO_PACKET:
de8f3a83 5850 if (rcur->type != rold->type)
f1174f77
EC
5851 return false;
5852 /* We must have at least as much range as the old ptr
5853 * did, so that any accesses which were safe before are
5854 * still safe. This is true even if old range < old off,
5855 * since someone could have accessed through (ptr - k), or
5856 * even done ptr -= k in a register, to get a safe access.
5857 */
5858 if (rold->range > rcur->range)
5859 return false;
5860 /* If the offsets don't match, we can't trust our alignment;
5861 * nor can we be sure that we won't fall out of range.
5862 */
5863 if (rold->off != rcur->off)
5864 return false;
5865 /* id relations must be preserved */
5866 if (rold->id && !check_ids(rold->id, rcur->id, idmap))
5867 return false;
5868 /* new val must satisfy old val knowledge */
5869 return range_within(rold, rcur) &&
5870 tnum_in(rold->var_off, rcur->var_off);
5871 case PTR_TO_CTX:
5872 case CONST_PTR_TO_MAP:
f1174f77 5873 case PTR_TO_PACKET_END:
d58e468b 5874 case PTR_TO_FLOW_KEYS:
c64b7983
JS
5875 case PTR_TO_SOCKET:
5876 case PTR_TO_SOCKET_OR_NULL:
46f8bc92
MKL
5877 case PTR_TO_SOCK_COMMON:
5878 case PTR_TO_SOCK_COMMON_OR_NULL:
655a51e5
MKL
5879 case PTR_TO_TCP_SOCK:
5880 case PTR_TO_TCP_SOCK_OR_NULL:
f1174f77
EC
5881 /* Only valid matches are exact, which memcmp() above
5882 * would have accepted
5883 */
5884 default:
5885 /* Don't know what's going on, just say it's not safe */
5886 return false;
5887 }
969bf05e 5888
f1174f77
EC
5889 /* Shouldn't get here; if we do, say it's not safe */
5890 WARN_ON_ONCE(1);
969bf05e
AS
5891 return false;
5892}
5893
f4d7e40a
AS
5894static bool stacksafe(struct bpf_func_state *old,
5895 struct bpf_func_state *cur,
638f5b90
AS
5896 struct idpair *idmap)
5897{
5898 int i, spi;
5899
638f5b90
AS
5900 /* walk slots of the explored stack and ignore any additional
5901 * slots in the current stack, since explored(safe) state
5902 * didn't use them
5903 */
5904 for (i = 0; i < old->allocated_stack; i++) {
5905 spi = i / BPF_REG_SIZE;
5906
b233920c
AS
5907 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
5908 i += BPF_REG_SIZE - 1;
cc2b14d5 5909 /* explored state didn't use this */
fd05e57b 5910 continue;
b233920c 5911 }
cc2b14d5 5912
638f5b90
AS
5913 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
5914 continue;
19e2dbb7
AS
5915
5916 /* explored stack has more populated slots than current stack
5917 * and these slots were used
5918 */
5919 if (i >= cur->allocated_stack)
5920 return false;
5921
cc2b14d5
AS
5922 /* if old state was safe with misc data in the stack
5923 * it will be safe with zero-initialized stack.
5924 * The opposite is not true
5925 */
5926 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
5927 cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
5928 continue;
638f5b90
AS
5929 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
5930 cur->stack[spi].slot_type[i % BPF_REG_SIZE])
5931 /* Ex: old explored (safe) state has STACK_SPILL in
5932 * this stack slot, but current has has STACK_MISC ->
5933 * this verifier states are not equivalent,
5934 * return false to continue verification of this path
5935 */
5936 return false;
5937 if (i % BPF_REG_SIZE)
5938 continue;
5939 if (old->stack[spi].slot_type[0] != STACK_SPILL)
5940 continue;
5941 if (!regsafe(&old->stack[spi].spilled_ptr,
5942 &cur->stack[spi].spilled_ptr,
5943 idmap))
5944 /* when explored and current stack slot are both storing
5945 * spilled registers, check that stored pointers types
5946 * are the same as well.
5947 * Ex: explored safe path could have stored
5948 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
5949 * but current path has stored:
5950 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
5951 * such verifier states are not equivalent.
5952 * return false to continue verification of this path
5953 */
5954 return false;
5955 }
5956 return true;
5957}
5958
fd978bf7
JS
5959static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
5960{
5961 if (old->acquired_refs != cur->acquired_refs)
5962 return false;
5963 return !memcmp(old->refs, cur->refs,
5964 sizeof(*old->refs) * old->acquired_refs);
5965}
5966
f1bca824
AS
5967/* compare two verifier states
5968 *
5969 * all states stored in state_list are known to be valid, since
5970 * verifier reached 'bpf_exit' instruction through them
5971 *
5972 * this function is called when verifier exploring different branches of
5973 * execution popped from the state stack. If it sees an old state that has
5974 * more strict register state and more strict stack state then this execution
5975 * branch doesn't need to be explored further, since verifier already
5976 * concluded that more strict state leads to valid finish.
5977 *
5978 * Therefore two states are equivalent if register state is more conservative
5979 * and explored stack state is more conservative than the current one.
5980 * Example:
5981 * explored current
5982 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
5983 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
5984 *
5985 * In other words if current stack state (one being explored) has more
5986 * valid slots than old one that already passed validation, it means
5987 * the verifier can stop exploring and conclude that current state is valid too
5988 *
5989 * Similarly with registers. If explored state has register type as invalid
5990 * whereas register type in current state is meaningful, it means that
5991 * the current state will reach 'bpf_exit' instruction safely
5992 */
f4d7e40a
AS
5993static bool func_states_equal(struct bpf_func_state *old,
5994 struct bpf_func_state *cur)
f1bca824 5995{
f1174f77
EC
5996 struct idpair *idmap;
5997 bool ret = false;
f1bca824
AS
5998 int i;
5999
f1174f77
EC
6000 idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
6001 /* If we failed to allocate the idmap, just say it's not safe */
6002 if (!idmap)
1a0dc1ac 6003 return false;
f1174f77
EC
6004
6005 for (i = 0; i < MAX_BPF_REG; i++) {
1b688a19 6006 if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
f1174f77 6007 goto out_free;
f1bca824
AS
6008 }
6009
638f5b90
AS
6010 if (!stacksafe(old, cur, idmap))
6011 goto out_free;
fd978bf7
JS
6012
6013 if (!refsafe(old, cur))
6014 goto out_free;
f1174f77
EC
6015 ret = true;
6016out_free:
6017 kfree(idmap);
6018 return ret;
f1bca824
AS
6019}
6020
f4d7e40a
AS
6021static bool states_equal(struct bpf_verifier_env *env,
6022 struct bpf_verifier_state *old,
6023 struct bpf_verifier_state *cur)
6024{
6025 int i;
6026
6027 if (old->curframe != cur->curframe)
6028 return false;
6029
979d63d5
DB
6030 /* Verification state from speculative execution simulation
6031 * must never prune a non-speculative execution one.
6032 */
6033 if (old->speculative && !cur->speculative)
6034 return false;
6035
d83525ca
AS
6036 if (old->active_spin_lock != cur->active_spin_lock)
6037 return false;
6038
f4d7e40a
AS
6039 /* for states to be equal callsites have to be the same
6040 * and all frame states need to be equivalent
6041 */
6042 for (i = 0; i <= old->curframe; i++) {
6043 if (old->frame[i]->callsite != cur->frame[i]->callsite)
6044 return false;
6045 if (!func_states_equal(old->frame[i], cur->frame[i]))
6046 return false;
6047 }
6048 return true;
6049}
6050
8e9cd9ce 6051/* A write screens off any subsequent reads; but write marks come from the
f4d7e40a
AS
6052 * straight-line code between a state and its parent. When we arrive at an
6053 * equivalent state (jump target or such) we didn't arrive by the straight-line
6054 * code, so read marks in the state must propagate to the parent regardless
6055 * of the state's write marks. That's what 'parent == state->parent' comparison
679c782d 6056 * in mark_reg_read() is for.
8e9cd9ce 6057 */
f4d7e40a
AS
6058static int propagate_liveness(struct bpf_verifier_env *env,
6059 const struct bpf_verifier_state *vstate,
6060 struct bpf_verifier_state *vparent)
dc503a8a 6061{
f4d7e40a
AS
6062 int i, frame, err = 0;
6063 struct bpf_func_state *state, *parent;
dc503a8a 6064
f4d7e40a
AS
6065 if (vparent->curframe != vstate->curframe) {
6066 WARN(1, "propagate_live: parent frame %d current frame %d\n",
6067 vparent->curframe, vstate->curframe);
6068 return -EFAULT;
6069 }
dc503a8a
EC
6070 /* Propagate read liveness of registers... */
6071 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
6072 /* We don't need to worry about FP liveness because it's read-only */
6073 for (i = 0; i < BPF_REG_FP; i++) {
f4d7e40a 6074 if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ)
63f45f84 6075 continue;
f4d7e40a 6076 if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) {
679c782d
EC
6077 err = mark_reg_read(env, &vstate->frame[vstate->curframe]->regs[i],
6078 &vparent->frame[vstate->curframe]->regs[i]);
f4d7e40a
AS
6079 if (err)
6080 return err;
dc503a8a
EC
6081 }
6082 }
f4d7e40a 6083
dc503a8a 6084 /* ... and stack slots */
f4d7e40a
AS
6085 for (frame = 0; frame <= vstate->curframe; frame++) {
6086 state = vstate->frame[frame];
6087 parent = vparent->frame[frame];
6088 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
6089 i < parent->allocated_stack / BPF_REG_SIZE; i++) {
f4d7e40a
AS
6090 if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
6091 continue;
6092 if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
679c782d
EC
6093 mark_reg_read(env, &state->stack[i].spilled_ptr,
6094 &parent->stack[i].spilled_ptr);
dc503a8a
EC
6095 }
6096 }
f4d7e40a 6097 return err;
dc503a8a
EC
6098}
6099
58e2af8b 6100static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
f1bca824 6101{
58e2af8b
JK
6102 struct bpf_verifier_state_list *new_sl;
6103 struct bpf_verifier_state_list *sl;
679c782d 6104 struct bpf_verifier_state *cur = env->cur_state, *new;
ceefbc96 6105 int i, j, err, states_cnt = 0;
f1bca824
AS
6106
6107 sl = env->explored_states[insn_idx];
6108 if (!sl)
6109 /* this 'insn_idx' instruction wasn't marked, so we will not
6110 * be doing state search here
6111 */
6112 return 0;
6113
9242b5f5
AS
6114 clean_live_states(env, insn_idx, cur);
6115
f1bca824 6116 while (sl != STATE_LIST_MARK) {
638f5b90 6117 if (states_equal(env, &sl->state, cur)) {
f1bca824 6118 /* reached equivalent register/stack state,
dc503a8a
EC
6119 * prune the search.
6120 * Registers read by the continuation are read by us.
8e9cd9ce
EC
6121 * If we have any write marks in env->cur_state, they
6122 * will prevent corresponding reads in the continuation
6123 * from reaching our parent (an explored_state). Our
6124 * own state will get the read marks recorded, but
6125 * they'll be immediately forgotten as we're pruning
6126 * this state and will pop a new one.
f1bca824 6127 */
f4d7e40a
AS
6128 err = propagate_liveness(env, &sl->state, cur);
6129 if (err)
6130 return err;
f1bca824 6131 return 1;
dc503a8a 6132 }
f1bca824 6133 sl = sl->next;
ceefbc96 6134 states_cnt++;
f1bca824
AS
6135 }
6136
ceefbc96
AS
6137 if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
6138 return 0;
6139
f1bca824
AS
6140 /* there were no equivalent states, remember current one.
6141 * technically the current state is not proven to be safe yet,
f4d7e40a
AS
6142 * but it will either reach outer most bpf_exit (which means it's safe)
6143 * or it will be rejected. Since there are no loops, we won't be
6144 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
6145 * again on the way to bpf_exit
f1bca824 6146 */
638f5b90 6147 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
f1bca824
AS
6148 if (!new_sl)
6149 return -ENOMEM;
6150
6151 /* add new state to the head of linked list */
679c782d
EC
6152 new = &new_sl->state;
6153 err = copy_verifier_state(new, cur);
1969db47 6154 if (err) {
679c782d 6155 free_verifier_state(new, false);
1969db47
AS
6156 kfree(new_sl);
6157 return err;
6158 }
f1bca824
AS
6159 new_sl->next = env->explored_states[insn_idx];
6160 env->explored_states[insn_idx] = new_sl;
7640ead9
JK
6161 /* connect new state to parentage chain. Current frame needs all
6162 * registers connected. Only r6 - r9 of the callers are alive (pushed
6163 * to the stack implicitly by JITs) so in callers' frames connect just
6164 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
6165 * the state of the call instruction (with WRITTEN set), and r0 comes
6166 * from callee with its full parentage chain, anyway.
6167 */
6168 for (j = 0; j <= cur->curframe; j++)
6169 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
6170 cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
8e9cd9ce
EC
6171 /* clear write marks in current state: the writes we did are not writes
6172 * our child did, so they don't screen off its reads from us.
6173 * (There are no read marks in current state, because reads always mark
6174 * their parent and current state never has children yet. Only
6175 * explored_states can get read marks.)
6176 */
dc503a8a 6177 for (i = 0; i < BPF_REG_FP; i++)
f4d7e40a
AS
6178 cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE;
6179
6180 /* all stack frames are accessible from callee, clear them all */
6181 for (j = 0; j <= cur->curframe; j++) {
6182 struct bpf_func_state *frame = cur->frame[j];
679c782d 6183 struct bpf_func_state *newframe = new->frame[j];
f4d7e40a 6184
679c782d 6185 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
cc2b14d5 6186 frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
679c782d
EC
6187 frame->stack[i].spilled_ptr.parent =
6188 &newframe->stack[i].spilled_ptr;
6189 }
f4d7e40a 6190 }
f1bca824
AS
6191 return 0;
6192}
6193
c64b7983
JS
6194/* Return true if it's OK to have the same insn return a different type. */
6195static bool reg_type_mismatch_ok(enum bpf_reg_type type)
6196{
6197 switch (type) {
6198 case PTR_TO_CTX:
6199 case PTR_TO_SOCKET:
6200 case PTR_TO_SOCKET_OR_NULL:
46f8bc92
MKL
6201 case PTR_TO_SOCK_COMMON:
6202 case PTR_TO_SOCK_COMMON_OR_NULL:
655a51e5
MKL
6203 case PTR_TO_TCP_SOCK:
6204 case PTR_TO_TCP_SOCK_OR_NULL:
c64b7983
JS
6205 return false;
6206 default:
6207 return true;
6208 }
6209}
6210
6211/* If an instruction was previously used with particular pointer types, then we
6212 * need to be careful to avoid cases such as the below, where it may be ok
6213 * for one branch accessing the pointer, but not ok for the other branch:
6214 *
6215 * R1 = sock_ptr
6216 * goto X;
6217 * ...
6218 * R1 = some_other_valid_ptr;
6219 * goto X;
6220 * ...
6221 * R2 = *(u32 *)(R1 + 0);
6222 */
6223static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
6224{
6225 return src != prev && (!reg_type_mismatch_ok(src) ||
6226 !reg_type_mismatch_ok(prev));
6227}
6228
58e2af8b 6229static int do_check(struct bpf_verifier_env *env)
17a52670 6230{
638f5b90 6231 struct bpf_verifier_state *state;
17a52670 6232 struct bpf_insn *insns = env->prog->insnsi;
638f5b90 6233 struct bpf_reg_state *regs;
f4d7e40a 6234 int insn_cnt = env->prog->len, i;
17a52670
AS
6235 int insn_processed = 0;
6236 bool do_print_state = false;
6237
d9762e84
MKL
6238 env->prev_linfo = NULL;
6239
638f5b90
AS
6240 state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
6241 if (!state)
6242 return -ENOMEM;
f4d7e40a 6243 state->curframe = 0;
979d63d5 6244 state->speculative = false;
f4d7e40a
AS
6245 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
6246 if (!state->frame[0]) {
6247 kfree(state);
6248 return -ENOMEM;
6249 }
6250 env->cur_state = state;
6251 init_func_state(env, state->frame[0],
6252 BPF_MAIN_FUNC /* callsite */,
6253 0 /* frameno */,
6254 0 /* subprogno, zero == main subprog */);
c08435ec 6255
17a52670
AS
6256 for (;;) {
6257 struct bpf_insn *insn;
6258 u8 class;
6259 int err;
6260
c08435ec 6261 if (env->insn_idx >= insn_cnt) {
61bd5218 6262 verbose(env, "invalid insn idx %d insn_cnt %d\n",
c08435ec 6263 env->insn_idx, insn_cnt);
17a52670
AS
6264 return -EFAULT;
6265 }
6266
c08435ec 6267 insn = &insns[env->insn_idx];
17a52670
AS
6268 class = BPF_CLASS(insn->code);
6269
07016151 6270 if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
61bd5218
JK
6271 verbose(env,
6272 "BPF program is too large. Processed %d insn\n",
17a52670
AS
6273 insn_processed);
6274 return -E2BIG;
6275 }
6276
c08435ec 6277 err = is_state_visited(env, env->insn_idx);
f1bca824
AS
6278 if (err < 0)
6279 return err;
6280 if (err == 1) {
6281 /* found equivalent state, can prune the search */
61bd5218 6282 if (env->log.level) {
f1bca824 6283 if (do_print_state)
979d63d5
DB
6284 verbose(env, "\nfrom %d to %d%s: safe\n",
6285 env->prev_insn_idx, env->insn_idx,
6286 env->cur_state->speculative ?
6287 " (speculative execution)" : "");
f1bca824 6288 else
c08435ec 6289 verbose(env, "%d: safe\n", env->insn_idx);
f1bca824
AS
6290 }
6291 goto process_bpf_exit;
6292 }
6293
c3494801
AS
6294 if (signal_pending(current))
6295 return -EAGAIN;
6296
3c2ce60b
DB
6297 if (need_resched())
6298 cond_resched();
6299
61bd5218
JK
6300 if (env->log.level > 1 || (env->log.level && do_print_state)) {
6301 if (env->log.level > 1)
c08435ec 6302 verbose(env, "%d:", env->insn_idx);
c5fc9692 6303 else
979d63d5
DB
6304 verbose(env, "\nfrom %d to %d%s:",
6305 env->prev_insn_idx, env->insn_idx,
6306 env->cur_state->speculative ?
6307 " (speculative execution)" : "");
f4d7e40a 6308 print_verifier_state(env, state->frame[state->curframe]);
17a52670
AS
6309 do_print_state = false;
6310 }
6311
61bd5218 6312 if (env->log.level) {
7105e828
DB
6313 const struct bpf_insn_cbs cbs = {
6314 .cb_print = verbose,
abe08840 6315 .private_data = env,
7105e828
DB
6316 };
6317
c08435ec
DB
6318 verbose_linfo(env, env->insn_idx, "; ");
6319 verbose(env, "%d: ", env->insn_idx);
abe08840 6320 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
17a52670
AS
6321 }
6322
cae1927c 6323 if (bpf_prog_is_dev_bound(env->prog->aux)) {
c08435ec
DB
6324 err = bpf_prog_offload_verify_insn(env, env->insn_idx,
6325 env->prev_insn_idx);
cae1927c
JK
6326 if (err)
6327 return err;
6328 }
13a27dfc 6329
638f5b90 6330 regs = cur_regs(env);
c08435ec 6331 env->insn_aux_data[env->insn_idx].seen = true;
fd978bf7 6332
17a52670 6333 if (class == BPF_ALU || class == BPF_ALU64) {
1be7f75d 6334 err = check_alu_op(env, insn);
17a52670
AS
6335 if (err)
6336 return err;
6337
6338 } else if (class == BPF_LDX) {
3df126f3 6339 enum bpf_reg_type *prev_src_type, src_reg_type;
9bac3d6d
AS
6340
6341 /* check for reserved fields is already done */
6342
17a52670 6343 /* check src operand */
dc503a8a 6344 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
6345 if (err)
6346 return err;
6347
dc503a8a 6348 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
17a52670
AS
6349 if (err)
6350 return err;
6351
725f9dcd
AS
6352 src_reg_type = regs[insn->src_reg].type;
6353
17a52670
AS
6354 /* check that memory (src_reg + off) is readable,
6355 * the state of dst_reg will be updated by this func
6356 */
c08435ec
DB
6357 err = check_mem_access(env, env->insn_idx, insn->src_reg,
6358 insn->off, BPF_SIZE(insn->code),
6359 BPF_READ, insn->dst_reg, false);
17a52670
AS
6360 if (err)
6361 return err;
6362
c08435ec 6363 prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
3df126f3
JK
6364
6365 if (*prev_src_type == NOT_INIT) {
9bac3d6d
AS
6366 /* saw a valid insn
6367 * dst_reg = *(u32 *)(src_reg + off)
3df126f3 6368 * save type to validate intersecting paths
9bac3d6d 6369 */
3df126f3 6370 *prev_src_type = src_reg_type;
9bac3d6d 6371
c64b7983 6372 } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
9bac3d6d
AS
6373 /* ABuser program is trying to use the same insn
6374 * dst_reg = *(u32*) (src_reg + off)
6375 * with different pointer types:
6376 * src_reg == ctx in one branch and
6377 * src_reg == stack|map in some other branch.
6378 * Reject it.
6379 */
61bd5218 6380 verbose(env, "same insn cannot be used with different pointers\n");
9bac3d6d
AS
6381 return -EINVAL;
6382 }
6383
17a52670 6384 } else if (class == BPF_STX) {
3df126f3 6385 enum bpf_reg_type *prev_dst_type, dst_reg_type;
d691f9e8 6386
17a52670 6387 if (BPF_MODE(insn->code) == BPF_XADD) {
c08435ec 6388 err = check_xadd(env, env->insn_idx, insn);
17a52670
AS
6389 if (err)
6390 return err;
c08435ec 6391 env->insn_idx++;
17a52670
AS
6392 continue;
6393 }
6394
17a52670 6395 /* check src1 operand */
dc503a8a 6396 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
6397 if (err)
6398 return err;
6399 /* check src2 operand */
dc503a8a 6400 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
6401 if (err)
6402 return err;
6403
d691f9e8
AS
6404 dst_reg_type = regs[insn->dst_reg].type;
6405
17a52670 6406 /* check that memory (dst_reg + off) is writeable */
c08435ec
DB
6407 err = check_mem_access(env, env->insn_idx, insn->dst_reg,
6408 insn->off, BPF_SIZE(insn->code),
6409 BPF_WRITE, insn->src_reg, false);
17a52670
AS
6410 if (err)
6411 return err;
6412
c08435ec 6413 prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
3df126f3
JK
6414
6415 if (*prev_dst_type == NOT_INIT) {
6416 *prev_dst_type = dst_reg_type;
c64b7983 6417 } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
61bd5218 6418 verbose(env, "same insn cannot be used with different pointers\n");
d691f9e8
AS
6419 return -EINVAL;
6420 }
6421
17a52670
AS
6422 } else if (class == BPF_ST) {
6423 if (BPF_MODE(insn->code) != BPF_MEM ||
6424 insn->src_reg != BPF_REG_0) {
61bd5218 6425 verbose(env, "BPF_ST uses reserved fields\n");
17a52670
AS
6426 return -EINVAL;
6427 }
6428 /* check src operand */
dc503a8a 6429 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
6430 if (err)
6431 return err;
6432
f37a8cb8 6433 if (is_ctx_reg(env, insn->dst_reg)) {
9d2be44a 6434 verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
2a159c6f
DB
6435 insn->dst_reg,
6436 reg_type_str[reg_state(env, insn->dst_reg)->type]);
f37a8cb8
DB
6437 return -EACCES;
6438 }
6439
17a52670 6440 /* check that memory (dst_reg + off) is writeable */
c08435ec
DB
6441 err = check_mem_access(env, env->insn_idx, insn->dst_reg,
6442 insn->off, BPF_SIZE(insn->code),
6443 BPF_WRITE, -1, false);
17a52670
AS
6444 if (err)
6445 return err;
6446
092ed096 6447 } else if (class == BPF_JMP || class == BPF_JMP32) {
17a52670
AS
6448 u8 opcode = BPF_OP(insn->code);
6449
6450 if (opcode == BPF_CALL) {
6451 if (BPF_SRC(insn->code) != BPF_K ||
6452 insn->off != 0 ||
f4d7e40a
AS
6453 (insn->src_reg != BPF_REG_0 &&
6454 insn->src_reg != BPF_PSEUDO_CALL) ||
092ed096
JW
6455 insn->dst_reg != BPF_REG_0 ||
6456 class == BPF_JMP32) {
61bd5218 6457 verbose(env, "BPF_CALL uses reserved fields\n");
17a52670
AS
6458 return -EINVAL;
6459 }
6460
d83525ca
AS
6461 if (env->cur_state->active_spin_lock &&
6462 (insn->src_reg == BPF_PSEUDO_CALL ||
6463 insn->imm != BPF_FUNC_spin_unlock)) {
6464 verbose(env, "function calls are not allowed while holding a lock\n");
6465 return -EINVAL;
6466 }
f4d7e40a 6467 if (insn->src_reg == BPF_PSEUDO_CALL)
c08435ec 6468 err = check_func_call(env, insn, &env->insn_idx);
f4d7e40a 6469 else
c08435ec 6470 err = check_helper_call(env, insn->imm, env->insn_idx);
17a52670
AS
6471 if (err)
6472 return err;
6473
6474 } else if (opcode == BPF_JA) {
6475 if (BPF_SRC(insn->code) != BPF_K ||
6476 insn->imm != 0 ||
6477 insn->src_reg != BPF_REG_0 ||
092ed096
JW
6478 insn->dst_reg != BPF_REG_0 ||
6479 class == BPF_JMP32) {
61bd5218 6480 verbose(env, "BPF_JA uses reserved fields\n");
17a52670
AS
6481 return -EINVAL;
6482 }
6483
c08435ec 6484 env->insn_idx += insn->off + 1;
17a52670
AS
6485 continue;
6486
6487 } else if (opcode == BPF_EXIT) {
6488 if (BPF_SRC(insn->code) != BPF_K ||
6489 insn->imm != 0 ||
6490 insn->src_reg != BPF_REG_0 ||
092ed096
JW
6491 insn->dst_reg != BPF_REG_0 ||
6492 class == BPF_JMP32) {
61bd5218 6493 verbose(env, "BPF_EXIT uses reserved fields\n");
17a52670
AS
6494 return -EINVAL;
6495 }
6496
d83525ca
AS
6497 if (env->cur_state->active_spin_lock) {
6498 verbose(env, "bpf_spin_unlock is missing\n");
6499 return -EINVAL;
6500 }
6501
f4d7e40a
AS
6502 if (state->curframe) {
6503 /* exit from nested function */
c08435ec
DB
6504 env->prev_insn_idx = env->insn_idx;
6505 err = prepare_func_exit(env, &env->insn_idx);
f4d7e40a
AS
6506 if (err)
6507 return err;
6508 do_print_state = true;
6509 continue;
6510 }
6511
fd978bf7
JS
6512 err = check_reference_leak(env);
6513 if (err)
6514 return err;
6515
17a52670
AS
6516 /* eBPF calling convetion is such that R0 is used
6517 * to return the value from eBPF program.
6518 * Make sure that it's readable at this time
6519 * of bpf_exit, which means that program wrote
6520 * something into it earlier
6521 */
dc503a8a 6522 err = check_reg_arg(env, BPF_REG_0, SRC_OP);
17a52670
AS
6523 if (err)
6524 return err;
6525
1be7f75d 6526 if (is_pointer_value(env, BPF_REG_0)) {
61bd5218 6527 verbose(env, "R0 leaks addr as return value\n");
1be7f75d
AS
6528 return -EACCES;
6529 }
6530
390ee7e2
AS
6531 err = check_return_code(env);
6532 if (err)
6533 return err;
f1bca824 6534process_bpf_exit:
c08435ec
DB
6535 err = pop_stack(env, &env->prev_insn_idx,
6536 &env->insn_idx);
638f5b90
AS
6537 if (err < 0) {
6538 if (err != -ENOENT)
6539 return err;
17a52670
AS
6540 break;
6541 } else {
6542 do_print_state = true;
6543 continue;
6544 }
6545 } else {
c08435ec 6546 err = check_cond_jmp_op(env, insn, &env->insn_idx);
17a52670
AS
6547 if (err)
6548 return err;
6549 }
6550 } else if (class == BPF_LD) {
6551 u8 mode = BPF_MODE(insn->code);
6552
6553 if (mode == BPF_ABS || mode == BPF_IND) {
ddd872bc
AS
6554 err = check_ld_abs(env, insn);
6555 if (err)
6556 return err;
6557
17a52670
AS
6558 } else if (mode == BPF_IMM) {
6559 err = check_ld_imm(env, insn);
6560 if (err)
6561 return err;
6562
c08435ec
DB
6563 env->insn_idx++;
6564 env->insn_aux_data[env->insn_idx].seen = true;
17a52670 6565 } else {
61bd5218 6566 verbose(env, "invalid BPF_LD mode\n");
17a52670
AS
6567 return -EINVAL;
6568 }
6569 } else {
61bd5218 6570 verbose(env, "unknown insn class %d\n", class);
17a52670
AS
6571 return -EINVAL;
6572 }
6573
c08435ec 6574 env->insn_idx++;
17a52670
AS
6575 }
6576
4bd95f4b
DB
6577 verbose(env, "processed %d insns (limit %d), stack depth ",
6578 insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
f910cefa 6579 for (i = 0; i < env->subprog_cnt; i++) {
9c8105bd 6580 u32 depth = env->subprog_info[i].stack_depth;
f4d7e40a
AS
6581
6582 verbose(env, "%d", depth);
f910cefa 6583 if (i + 1 < env->subprog_cnt)
f4d7e40a
AS
6584 verbose(env, "+");
6585 }
6586 verbose(env, "\n");
9c8105bd 6587 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
17a52670
AS
6588 return 0;
6589}
6590
56f668df
MKL
6591static int check_map_prealloc(struct bpf_map *map)
6592{
6593 return (map->map_type != BPF_MAP_TYPE_HASH &&
bcc6b1b7
MKL
6594 map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
6595 map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
56f668df
MKL
6596 !(map->map_flags & BPF_F_NO_PREALLOC);
6597}
6598
d83525ca
AS
6599static bool is_tracing_prog_type(enum bpf_prog_type type)
6600{
6601 switch (type) {
6602 case BPF_PROG_TYPE_KPROBE:
6603 case BPF_PROG_TYPE_TRACEPOINT:
6604 case BPF_PROG_TYPE_PERF_EVENT:
6605 case BPF_PROG_TYPE_RAW_TRACEPOINT:
6606 return true;
6607 default:
6608 return false;
6609 }
6610}
6611
61bd5218
JK
6612static int check_map_prog_compatibility(struct bpf_verifier_env *env,
6613 struct bpf_map *map,
fdc15d38
AS
6614 struct bpf_prog *prog)
6615
6616{
56f668df
MKL
6617 /* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use
6618 * preallocated hash maps, since doing memory allocation
6619 * in overflow_handler can crash depending on where nmi got
6620 * triggered.
6621 */
6622 if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
6623 if (!check_map_prealloc(map)) {
61bd5218 6624 verbose(env, "perf_event programs can only use preallocated hash map\n");
56f668df
MKL
6625 return -EINVAL;
6626 }
6627 if (map->inner_map_meta &&
6628 !check_map_prealloc(map->inner_map_meta)) {
61bd5218 6629 verbose(env, "perf_event programs can only use preallocated inner hash map\n");
56f668df
MKL
6630 return -EINVAL;
6631 }
fdc15d38 6632 }
a3884572 6633
d83525ca
AS
6634 if ((is_tracing_prog_type(prog->type) ||
6635 prog->type == BPF_PROG_TYPE_SOCKET_FILTER) &&
6636 map_value_has_spin_lock(map)) {
6637 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
6638 return -EINVAL;
6639 }
6640
a3884572 6641 if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
09728266 6642 !bpf_offload_prog_map_match(prog, map)) {
a3884572
JK
6643 verbose(env, "offload device mismatch between prog and map\n");
6644 return -EINVAL;
6645 }
6646
fdc15d38
AS
6647 return 0;
6648}
6649
b741f163
RG
6650static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
6651{
6652 return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
6653 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
6654}
6655
0246e64d
AS
6656/* look for pseudo eBPF instructions that access map FDs and
6657 * replace them with actual map pointers
6658 */
58e2af8b 6659static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
0246e64d
AS
6660{
6661 struct bpf_insn *insn = env->prog->insnsi;
6662 int insn_cnt = env->prog->len;
fdc15d38 6663 int i, j, err;
0246e64d 6664
f1f7714e 6665 err = bpf_prog_calc_tag(env->prog);
aafe6ae9
DB
6666 if (err)
6667 return err;
6668
0246e64d 6669 for (i = 0; i < insn_cnt; i++, insn++) {
9bac3d6d 6670 if (BPF_CLASS(insn->code) == BPF_LDX &&
d691f9e8 6671 (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
61bd5218 6672 verbose(env, "BPF_LDX uses reserved fields\n");
9bac3d6d
AS
6673 return -EINVAL;
6674 }
6675
d691f9e8
AS
6676 if (BPF_CLASS(insn->code) == BPF_STX &&
6677 ((BPF_MODE(insn->code) != BPF_MEM &&
6678 BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
61bd5218 6679 verbose(env, "BPF_STX uses reserved fields\n");
d691f9e8
AS
6680 return -EINVAL;
6681 }
6682
0246e64d
AS
6683 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
6684 struct bpf_map *map;
6685 struct fd f;
6686
6687 if (i == insn_cnt - 1 || insn[1].code != 0 ||
6688 insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
6689 insn[1].off != 0) {
61bd5218 6690 verbose(env, "invalid bpf_ld_imm64 insn\n");
0246e64d
AS
6691 return -EINVAL;
6692 }
6693
6694 if (insn->src_reg == 0)
6695 /* valid generic load 64-bit imm */
6696 goto next_insn;
6697
20182390
DB
6698 if (insn[0].src_reg != BPF_PSEUDO_MAP_FD ||
6699 insn[1].imm != 0) {
6700 verbose(env, "unrecognized bpf_ld_imm64 insn\n");
0246e64d
AS
6701 return -EINVAL;
6702 }
6703
20182390 6704 f = fdget(insn[0].imm);
c2101297 6705 map = __bpf_map_get(f);
0246e64d 6706 if (IS_ERR(map)) {
61bd5218 6707 verbose(env, "fd %d is not pointing to valid bpf_map\n",
20182390 6708 insn[0].imm);
0246e64d
AS
6709 return PTR_ERR(map);
6710 }
6711
61bd5218 6712 err = check_map_prog_compatibility(env, map, env->prog);
fdc15d38
AS
6713 if (err) {
6714 fdput(f);
6715 return err;
6716 }
6717
0246e64d
AS
6718 /* store map pointer inside BPF_LD_IMM64 instruction */
6719 insn[0].imm = (u32) (unsigned long) map;
6720 insn[1].imm = ((u64) (unsigned long) map) >> 32;
6721
6722 /* check whether we recorded this map already */
6723 for (j = 0; j < env->used_map_cnt; j++)
6724 if (env->used_maps[j] == map) {
6725 fdput(f);
6726 goto next_insn;
6727 }
6728
6729 if (env->used_map_cnt >= MAX_USED_MAPS) {
6730 fdput(f);
6731 return -E2BIG;
6732 }
6733
0246e64d
AS
6734 /* hold the map. If the program is rejected by verifier,
6735 * the map will be released by release_maps() or it
6736 * will be used by the valid program until it's unloaded
ab7f5bf0 6737 * and all maps are released in free_used_maps()
0246e64d 6738 */
92117d84
AS
6739 map = bpf_map_inc(map, false);
6740 if (IS_ERR(map)) {
6741 fdput(f);
6742 return PTR_ERR(map);
6743 }
6744 env->used_maps[env->used_map_cnt++] = map;
6745
b741f163 6746 if (bpf_map_is_cgroup_storage(map) &&
de9cbbaa 6747 bpf_cgroup_storage_assign(env->prog, map)) {
b741f163 6748 verbose(env, "only one cgroup storage of each type is allowed\n");
de9cbbaa
RG
6749 fdput(f);
6750 return -EBUSY;
6751 }
6752
0246e64d
AS
6753 fdput(f);
6754next_insn:
6755 insn++;
6756 i++;
5e581dad
DB
6757 continue;
6758 }
6759
6760 /* Basic sanity check before we invest more work here. */
6761 if (!bpf_opcode_in_insntable(insn->code)) {
6762 verbose(env, "unknown opcode %02x\n", insn->code);
6763 return -EINVAL;
0246e64d
AS
6764 }
6765 }
6766
6767 /* now all pseudo BPF_LD_IMM64 instructions load valid
6768 * 'struct bpf_map *' into a register instead of user map_fd.
6769 * These pointers will be used later by verifier to validate map access.
6770 */
6771 return 0;
6772}
6773
6774/* drop refcnt of maps used by the rejected program */
58e2af8b 6775static void release_maps(struct bpf_verifier_env *env)
0246e64d 6776{
8bad74f9 6777 enum bpf_cgroup_storage_type stype;
0246e64d
AS
6778 int i;
6779
8bad74f9
RG
6780 for_each_cgroup_storage_type(stype) {
6781 if (!env->prog->aux->cgroup_storage[stype])
6782 continue;
de9cbbaa 6783 bpf_cgroup_storage_release(env->prog,
8bad74f9
RG
6784 env->prog->aux->cgroup_storage[stype]);
6785 }
de9cbbaa 6786
0246e64d
AS
6787 for (i = 0; i < env->used_map_cnt; i++)
6788 bpf_map_put(env->used_maps[i]);
6789}
6790
6791/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
58e2af8b 6792static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
0246e64d
AS
6793{
6794 struct bpf_insn *insn = env->prog->insnsi;
6795 int insn_cnt = env->prog->len;
6796 int i;
6797
6798 for (i = 0; i < insn_cnt; i++, insn++)
6799 if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
6800 insn->src_reg = 0;
6801}
6802
8041902d
AS
6803/* single env->prog->insni[off] instruction was replaced with the range
6804 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
6805 * [0, off) and [off, end) to new locations, so the patched range stays zero
6806 */
6807static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
6808 u32 off, u32 cnt)
6809{
6810 struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
c131187d 6811 int i;
8041902d
AS
6812
6813 if (cnt == 1)
6814 return 0;
fad953ce
KC
6815 new_data = vzalloc(array_size(prog_len,
6816 sizeof(struct bpf_insn_aux_data)));
8041902d
AS
6817 if (!new_data)
6818 return -ENOMEM;
6819 memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
6820 memcpy(new_data + off + cnt - 1, old_data + off,
6821 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
c131187d
AS
6822 for (i = off; i < off + cnt - 1; i++)
6823 new_data[i].seen = true;
8041902d
AS
6824 env->insn_aux_data = new_data;
6825 vfree(old_data);
6826 return 0;
6827}
6828
cc8b0b92
AS
6829static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
6830{
6831 int i;
6832
6833 if (len == 1)
6834 return;
4cb3d99c
JW
6835 /* NOTE: fake 'exit' subprog should be updated as well. */
6836 for (i = 0; i <= env->subprog_cnt; i++) {
afd59424 6837 if (env->subprog_info[i].start <= off)
cc8b0b92 6838 continue;
9c8105bd 6839 env->subprog_info[i].start += len - 1;
cc8b0b92
AS
6840 }
6841}
6842
8041902d
AS
6843static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
6844 const struct bpf_insn *patch, u32 len)
6845{
6846 struct bpf_prog *new_prog;
6847
6848 new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
6849 if (!new_prog)
6850 return NULL;
6851 if (adjust_insn_aux_data(env, new_prog->len, off, len))
6852 return NULL;
cc8b0b92 6853 adjust_subprog_starts(env, off, len);
8041902d
AS
6854 return new_prog;
6855}
6856
52875a04
JK
6857static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
6858 u32 off, u32 cnt)
6859{
6860 int i, j;
6861
6862 /* find first prog starting at or after off (first to remove) */
6863 for (i = 0; i < env->subprog_cnt; i++)
6864 if (env->subprog_info[i].start >= off)
6865 break;
6866 /* find first prog starting at or after off + cnt (first to stay) */
6867 for (j = i; j < env->subprog_cnt; j++)
6868 if (env->subprog_info[j].start >= off + cnt)
6869 break;
6870 /* if j doesn't start exactly at off + cnt, we are just removing
6871 * the front of previous prog
6872 */
6873 if (env->subprog_info[j].start != off + cnt)
6874 j--;
6875
6876 if (j > i) {
6877 struct bpf_prog_aux *aux = env->prog->aux;
6878 int move;
6879
6880 /* move fake 'exit' subprog as well */
6881 move = env->subprog_cnt + 1 - j;
6882
6883 memmove(env->subprog_info + i,
6884 env->subprog_info + j,
6885 sizeof(*env->subprog_info) * move);
6886 env->subprog_cnt -= j - i;
6887
6888 /* remove func_info */
6889 if (aux->func_info) {
6890 move = aux->func_info_cnt - j;
6891
6892 memmove(aux->func_info + i,
6893 aux->func_info + j,
6894 sizeof(*aux->func_info) * move);
6895 aux->func_info_cnt -= j - i;
6896 /* func_info->insn_off is set after all code rewrites,
6897 * in adjust_btf_func() - no need to adjust
6898 */
6899 }
6900 } else {
6901 /* convert i from "first prog to remove" to "first to adjust" */
6902 if (env->subprog_info[i].start == off)
6903 i++;
6904 }
6905
6906 /* update fake 'exit' subprog as well */
6907 for (; i <= env->subprog_cnt; i++)
6908 env->subprog_info[i].start -= cnt;
6909
6910 return 0;
6911}
6912
6913static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
6914 u32 cnt)
6915{
6916 struct bpf_prog *prog = env->prog;
6917 u32 i, l_off, l_cnt, nr_linfo;
6918 struct bpf_line_info *linfo;
6919
6920 nr_linfo = prog->aux->nr_linfo;
6921 if (!nr_linfo)
6922 return 0;
6923
6924 linfo = prog->aux->linfo;
6925
6926 /* find first line info to remove, count lines to be removed */
6927 for (i = 0; i < nr_linfo; i++)
6928 if (linfo[i].insn_off >= off)
6929 break;
6930
6931 l_off = i;
6932 l_cnt = 0;
6933 for (; i < nr_linfo; i++)
6934 if (linfo[i].insn_off < off + cnt)
6935 l_cnt++;
6936 else
6937 break;
6938
6939 /* First live insn doesn't match first live linfo, it needs to "inherit"
6940 * last removed linfo. prog is already modified, so prog->len == off
6941 * means no live instructions after (tail of the program was removed).
6942 */
6943 if (prog->len != off && l_cnt &&
6944 (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
6945 l_cnt--;
6946 linfo[--i].insn_off = off + cnt;
6947 }
6948
6949 /* remove the line info which refer to the removed instructions */
6950 if (l_cnt) {
6951 memmove(linfo + l_off, linfo + i,
6952 sizeof(*linfo) * (nr_linfo - i));
6953
6954 prog->aux->nr_linfo -= l_cnt;
6955 nr_linfo = prog->aux->nr_linfo;
6956 }
6957
6958 /* pull all linfo[i].insn_off >= off + cnt in by cnt */
6959 for (i = l_off; i < nr_linfo; i++)
6960 linfo[i].insn_off -= cnt;
6961
6962 /* fix up all subprogs (incl. 'exit') which start >= off */
6963 for (i = 0; i <= env->subprog_cnt; i++)
6964 if (env->subprog_info[i].linfo_idx > l_off) {
6965 /* program may have started in the removed region but
6966 * may not be fully removed
6967 */
6968 if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
6969 env->subprog_info[i].linfo_idx -= l_cnt;
6970 else
6971 env->subprog_info[i].linfo_idx = l_off;
6972 }
6973
6974 return 0;
6975}
6976
6977static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
6978{
6979 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
6980 unsigned int orig_prog_len = env->prog->len;
6981 int err;
6982
08ca90af
JK
6983 if (bpf_prog_is_dev_bound(env->prog->aux))
6984 bpf_prog_offload_remove_insns(env, off, cnt);
6985
52875a04
JK
6986 err = bpf_remove_insns(env->prog, off, cnt);
6987 if (err)
6988 return err;
6989
6990 err = adjust_subprog_starts_after_remove(env, off, cnt);
6991 if (err)
6992 return err;
6993
6994 err = bpf_adj_linfo_after_remove(env, off, cnt);
6995 if (err)
6996 return err;
6997
6998 memmove(aux_data + off, aux_data + off + cnt,
6999 sizeof(*aux_data) * (orig_prog_len - off - cnt));
7000
7001 return 0;
7002}
7003
2a5418a1
DB
7004/* The verifier does more data flow analysis than llvm and will not
7005 * explore branches that are dead at run time. Malicious programs can
7006 * have dead code too. Therefore replace all dead at-run-time code
7007 * with 'ja -1'.
7008 *
7009 * Just nops are not optimal, e.g. if they would sit at the end of the
7010 * program and through another bug we would manage to jump there, then
7011 * we'd execute beyond program memory otherwise. Returning exception
7012 * code also wouldn't work since we can have subprogs where the dead
7013 * code could be located.
c131187d
AS
7014 */
7015static void sanitize_dead_code(struct bpf_verifier_env *env)
7016{
7017 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
2a5418a1 7018 struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
c131187d
AS
7019 struct bpf_insn *insn = env->prog->insnsi;
7020 const int insn_cnt = env->prog->len;
7021 int i;
7022
7023 for (i = 0; i < insn_cnt; i++) {
7024 if (aux_data[i].seen)
7025 continue;
2a5418a1 7026 memcpy(insn + i, &trap, sizeof(trap));
c131187d
AS
7027 }
7028}
7029
e2ae4ca2
JK
7030static bool insn_is_cond_jump(u8 code)
7031{
7032 u8 op;
7033
092ed096
JW
7034 if (BPF_CLASS(code) == BPF_JMP32)
7035 return true;
7036
e2ae4ca2
JK
7037 if (BPF_CLASS(code) != BPF_JMP)
7038 return false;
7039
7040 op = BPF_OP(code);
7041 return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
7042}
7043
7044static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
7045{
7046 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
7047 struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
7048 struct bpf_insn *insn = env->prog->insnsi;
7049 const int insn_cnt = env->prog->len;
7050 int i;
7051
7052 for (i = 0; i < insn_cnt; i++, insn++) {
7053 if (!insn_is_cond_jump(insn->code))
7054 continue;
7055
7056 if (!aux_data[i + 1].seen)
7057 ja.off = insn->off;
7058 else if (!aux_data[i + 1 + insn->off].seen)
7059 ja.off = 0;
7060 else
7061 continue;
7062
08ca90af
JK
7063 if (bpf_prog_is_dev_bound(env->prog->aux))
7064 bpf_prog_offload_replace_insn(env, i, &ja);
7065
e2ae4ca2
JK
7066 memcpy(insn, &ja, sizeof(ja));
7067 }
7068}
7069
52875a04
JK
7070static int opt_remove_dead_code(struct bpf_verifier_env *env)
7071{
7072 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
7073 int insn_cnt = env->prog->len;
7074 int i, err;
7075
7076 for (i = 0; i < insn_cnt; i++) {
7077 int j;
7078
7079 j = 0;
7080 while (i + j < insn_cnt && !aux_data[i + j].seen)
7081 j++;
7082 if (!j)
7083 continue;
7084
7085 err = verifier_remove_insns(env, i, j);
7086 if (err)
7087 return err;
7088 insn_cnt = env->prog->len;
7089 }
7090
7091 return 0;
7092}
7093
a1b14abc
JK
7094static int opt_remove_nops(struct bpf_verifier_env *env)
7095{
7096 const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
7097 struct bpf_insn *insn = env->prog->insnsi;
7098 int insn_cnt = env->prog->len;
7099 int i, err;
7100
7101 for (i = 0; i < insn_cnt; i++) {
7102 if (memcmp(&insn[i], &ja, sizeof(ja)))
7103 continue;
7104
7105 err = verifier_remove_insns(env, i, 1);
7106 if (err)
7107 return err;
7108 insn_cnt--;
7109 i--;
7110 }
7111
7112 return 0;
7113}
7114
c64b7983
JS
7115/* convert load instructions that access fields of a context type into a
7116 * sequence of instructions that access fields of the underlying structure:
7117 * struct __sk_buff -> struct sk_buff
7118 * struct bpf_sock_ops -> struct sock
9bac3d6d 7119 */
58e2af8b 7120static int convert_ctx_accesses(struct bpf_verifier_env *env)
9bac3d6d 7121{
00176a34 7122 const struct bpf_verifier_ops *ops = env->ops;
f96da094 7123 int i, cnt, size, ctx_field_size, delta = 0;
3df126f3 7124 const int insn_cnt = env->prog->len;
36bbef52 7125 struct bpf_insn insn_buf[16], *insn;
46f53a65 7126 u32 target_size, size_default, off;
9bac3d6d 7127 struct bpf_prog *new_prog;
d691f9e8 7128 enum bpf_access_type type;
f96da094 7129 bool is_narrower_load;
9bac3d6d 7130
b09928b9
DB
7131 if (ops->gen_prologue || env->seen_direct_write) {
7132 if (!ops->gen_prologue) {
7133 verbose(env, "bpf verifier is misconfigured\n");
7134 return -EINVAL;
7135 }
36bbef52
DB
7136 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
7137 env->prog);
7138 if (cnt >= ARRAY_SIZE(insn_buf)) {
61bd5218 7139 verbose(env, "bpf verifier is misconfigured\n");
36bbef52
DB
7140 return -EINVAL;
7141 } else if (cnt) {
8041902d 7142 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
36bbef52
DB
7143 if (!new_prog)
7144 return -ENOMEM;
8041902d 7145
36bbef52 7146 env->prog = new_prog;
3df126f3 7147 delta += cnt - 1;
36bbef52
DB
7148 }
7149 }
7150
c64b7983 7151 if (bpf_prog_is_dev_bound(env->prog->aux))
9bac3d6d
AS
7152 return 0;
7153
3df126f3 7154 insn = env->prog->insnsi + delta;
36bbef52 7155
9bac3d6d 7156 for (i = 0; i < insn_cnt; i++, insn++) {
c64b7983
JS
7157 bpf_convert_ctx_access_t convert_ctx_access;
7158
62c7989b
DB
7159 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
7160 insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
7161 insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
ea2e7ce5 7162 insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
d691f9e8 7163 type = BPF_READ;
62c7989b
DB
7164 else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
7165 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
7166 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
ea2e7ce5 7167 insn->code == (BPF_STX | BPF_MEM | BPF_DW))
d691f9e8
AS
7168 type = BPF_WRITE;
7169 else
9bac3d6d
AS
7170 continue;
7171
af86ca4e
AS
7172 if (type == BPF_WRITE &&
7173 env->insn_aux_data[i + delta].sanitize_stack_off) {
7174 struct bpf_insn patch[] = {
7175 /* Sanitize suspicious stack slot with zero.
7176 * There are no memory dependencies for this store,
7177 * since it's only using frame pointer and immediate
7178 * constant of zero
7179 */
7180 BPF_ST_MEM(BPF_DW, BPF_REG_FP,
7181 env->insn_aux_data[i + delta].sanitize_stack_off,
7182 0),
7183 /* the original STX instruction will immediately
7184 * overwrite the same stack slot with appropriate value
7185 */
7186 *insn,
7187 };
7188
7189 cnt = ARRAY_SIZE(patch);
7190 new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
7191 if (!new_prog)
7192 return -ENOMEM;
7193
7194 delta += cnt - 1;
7195 env->prog = new_prog;
7196 insn = new_prog->insnsi + i + delta;
7197 continue;
7198 }
7199
c64b7983
JS
7200 switch (env->insn_aux_data[i + delta].ptr_type) {
7201 case PTR_TO_CTX:
7202 if (!ops->convert_ctx_access)
7203 continue;
7204 convert_ctx_access = ops->convert_ctx_access;
7205 break;
7206 case PTR_TO_SOCKET:
46f8bc92 7207 case PTR_TO_SOCK_COMMON:
c64b7983
JS
7208 convert_ctx_access = bpf_sock_convert_ctx_access;
7209 break;
655a51e5
MKL
7210 case PTR_TO_TCP_SOCK:
7211 convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
7212 break;
c64b7983 7213 default:
9bac3d6d 7214 continue;
c64b7983 7215 }
9bac3d6d 7216
31fd8581 7217 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
f96da094 7218 size = BPF_LDST_BYTES(insn);
31fd8581
YS
7219
7220 /* If the read access is a narrower load of the field,
7221 * convert to a 4/8-byte load, to minimum program type specific
7222 * convert_ctx_access changes. If conversion is successful,
7223 * we will apply proper mask to the result.
7224 */
f96da094 7225 is_narrower_load = size < ctx_field_size;
46f53a65
AI
7226 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
7227 off = insn->off;
31fd8581 7228 if (is_narrower_load) {
f96da094
DB
7229 u8 size_code;
7230
7231 if (type == BPF_WRITE) {
61bd5218 7232 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
f96da094
DB
7233 return -EINVAL;
7234 }
31fd8581 7235
f96da094 7236 size_code = BPF_H;
31fd8581
YS
7237 if (ctx_field_size == 4)
7238 size_code = BPF_W;
7239 else if (ctx_field_size == 8)
7240 size_code = BPF_DW;
f96da094 7241
bc23105c 7242 insn->off = off & ~(size_default - 1);
31fd8581
YS
7243 insn->code = BPF_LDX | BPF_MEM | size_code;
7244 }
f96da094
DB
7245
7246 target_size = 0;
c64b7983
JS
7247 cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
7248 &target_size);
f96da094
DB
7249 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
7250 (ctx_field_size && !target_size)) {
61bd5218 7251 verbose(env, "bpf verifier is misconfigured\n");
9bac3d6d
AS
7252 return -EINVAL;
7253 }
f96da094
DB
7254
7255 if (is_narrower_load && size < target_size) {
46f53a65
AI
7256 u8 shift = (off & (size_default - 1)) * 8;
7257
7258 if (ctx_field_size <= 4) {
7259 if (shift)
7260 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
7261 insn->dst_reg,
7262 shift);
31fd8581 7263 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
f96da094 7264 (1 << size * 8) - 1);
46f53a65
AI
7265 } else {
7266 if (shift)
7267 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
7268 insn->dst_reg,
7269 shift);
31fd8581 7270 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
f96da094 7271 (1 << size * 8) - 1);
46f53a65 7272 }
31fd8581 7273 }
9bac3d6d 7274
8041902d 7275 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
9bac3d6d
AS
7276 if (!new_prog)
7277 return -ENOMEM;
7278
3df126f3 7279 delta += cnt - 1;
9bac3d6d
AS
7280
7281 /* keep walking new program and skip insns we just inserted */
7282 env->prog = new_prog;
3df126f3 7283 insn = new_prog->insnsi + i + delta;
9bac3d6d
AS
7284 }
7285
7286 return 0;
7287}
7288
1c2a088a
AS
7289static int jit_subprogs(struct bpf_verifier_env *env)
7290{
7291 struct bpf_prog *prog = env->prog, **func, *tmp;
7292 int i, j, subprog_start, subprog_end = 0, len, subprog;
7105e828 7293 struct bpf_insn *insn;
1c2a088a 7294 void *old_bpf_func;
c454a46b 7295 int err;
1c2a088a 7296
f910cefa 7297 if (env->subprog_cnt <= 1)
1c2a088a
AS
7298 return 0;
7299
7105e828 7300 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
1c2a088a
AS
7301 if (insn->code != (BPF_JMP | BPF_CALL) ||
7302 insn->src_reg != BPF_PSEUDO_CALL)
7303 continue;
c7a89784
DB
7304 /* Upon error here we cannot fall back to interpreter but
7305 * need a hard reject of the program. Thus -EFAULT is
7306 * propagated in any case.
7307 */
1c2a088a
AS
7308 subprog = find_subprog(env, i + insn->imm + 1);
7309 if (subprog < 0) {
7310 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
7311 i + insn->imm + 1);
7312 return -EFAULT;
7313 }
7314 /* temporarily remember subprog id inside insn instead of
7315 * aux_data, since next loop will split up all insns into funcs
7316 */
f910cefa 7317 insn->off = subprog;
1c2a088a
AS
7318 /* remember original imm in case JIT fails and fallback
7319 * to interpreter will be needed
7320 */
7321 env->insn_aux_data[i].call_imm = insn->imm;
7322 /* point imm to __bpf_call_base+1 from JITs point of view */
7323 insn->imm = 1;
7324 }
7325
c454a46b
MKL
7326 err = bpf_prog_alloc_jited_linfo(prog);
7327 if (err)
7328 goto out_undo_insn;
7329
7330 err = -ENOMEM;
6396bb22 7331 func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
1c2a088a 7332 if (!func)
c7a89784 7333 goto out_undo_insn;
1c2a088a 7334
f910cefa 7335 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a 7336 subprog_start = subprog_end;
4cb3d99c 7337 subprog_end = env->subprog_info[i + 1].start;
1c2a088a
AS
7338
7339 len = subprog_end - subprog_start;
492ecee8
AS
7340 /* BPF_PROG_RUN doesn't call subprogs directly,
7341 * hence main prog stats include the runtime of subprogs.
7342 * subprogs don't have IDs and not reachable via prog_get_next_id
7343 * func[i]->aux->stats will never be accessed and stays NULL
7344 */
7345 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
1c2a088a
AS
7346 if (!func[i])
7347 goto out_free;
7348 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
7349 len * sizeof(struct bpf_insn));
4f74d809 7350 func[i]->type = prog->type;
1c2a088a 7351 func[i]->len = len;
4f74d809
DB
7352 if (bpf_prog_calc_tag(func[i]))
7353 goto out_free;
1c2a088a 7354 func[i]->is_func = 1;
ba64e7d8
YS
7355 func[i]->aux->func_idx = i;
7356 /* the btf and func_info will be freed only at prog->aux */
7357 func[i]->aux->btf = prog->aux->btf;
7358 func[i]->aux->func_info = prog->aux->func_info;
7359
1c2a088a
AS
7360 /* Use bpf_prog_F_tag to indicate functions in stack traces.
7361 * Long term would need debug info to populate names
7362 */
7363 func[i]->aux->name[0] = 'F';
9c8105bd 7364 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
1c2a088a 7365 func[i]->jit_requested = 1;
c454a46b
MKL
7366 func[i]->aux->linfo = prog->aux->linfo;
7367 func[i]->aux->nr_linfo = prog->aux->nr_linfo;
7368 func[i]->aux->jited_linfo = prog->aux->jited_linfo;
7369 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
1c2a088a
AS
7370 func[i] = bpf_int_jit_compile(func[i]);
7371 if (!func[i]->jited) {
7372 err = -ENOTSUPP;
7373 goto out_free;
7374 }
7375 cond_resched();
7376 }
7377 /* at this point all bpf functions were successfully JITed
7378 * now populate all bpf_calls with correct addresses and
7379 * run last pass of JIT
7380 */
f910cefa 7381 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
7382 insn = func[i]->insnsi;
7383 for (j = 0; j < func[i]->len; j++, insn++) {
7384 if (insn->code != (BPF_JMP | BPF_CALL) ||
7385 insn->src_reg != BPF_PSEUDO_CALL)
7386 continue;
7387 subprog = insn->off;
1c2a088a
AS
7388 insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
7389 func[subprog]->bpf_func -
7390 __bpf_call_base;
7391 }
2162fed4
SD
7392
7393 /* we use the aux data to keep a list of the start addresses
7394 * of the JITed images for each function in the program
7395 *
7396 * for some architectures, such as powerpc64, the imm field
7397 * might not be large enough to hold the offset of the start
7398 * address of the callee's JITed image from __bpf_call_base
7399 *
7400 * in such cases, we can lookup the start address of a callee
7401 * by using its subprog id, available from the off field of
7402 * the call instruction, as an index for this list
7403 */
7404 func[i]->aux->func = func;
7405 func[i]->aux->func_cnt = env->subprog_cnt;
1c2a088a 7406 }
f910cefa 7407 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
7408 old_bpf_func = func[i]->bpf_func;
7409 tmp = bpf_int_jit_compile(func[i]);
7410 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
7411 verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
c7a89784 7412 err = -ENOTSUPP;
1c2a088a
AS
7413 goto out_free;
7414 }
7415 cond_resched();
7416 }
7417
7418 /* finally lock prog and jit images for all functions and
7419 * populate kallsysm
7420 */
f910cefa 7421 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
7422 bpf_prog_lock_ro(func[i]);
7423 bpf_prog_kallsyms_add(func[i]);
7424 }
7105e828
DB
7425
7426 /* Last step: make now unused interpreter insns from main
7427 * prog consistent for later dump requests, so they can
7428 * later look the same as if they were interpreted only.
7429 */
7430 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
7105e828
DB
7431 if (insn->code != (BPF_JMP | BPF_CALL) ||
7432 insn->src_reg != BPF_PSEUDO_CALL)
7433 continue;
7434 insn->off = env->insn_aux_data[i].call_imm;
7435 subprog = find_subprog(env, i + insn->off + 1);
dbecd738 7436 insn->imm = subprog;
7105e828
DB
7437 }
7438
1c2a088a
AS
7439 prog->jited = 1;
7440 prog->bpf_func = func[0]->bpf_func;
7441 prog->aux->func = func;
f910cefa 7442 prog->aux->func_cnt = env->subprog_cnt;
c454a46b 7443 bpf_prog_free_unused_jited_linfo(prog);
1c2a088a
AS
7444 return 0;
7445out_free:
f910cefa 7446 for (i = 0; i < env->subprog_cnt; i++)
1c2a088a
AS
7447 if (func[i])
7448 bpf_jit_free(func[i]);
7449 kfree(func);
c7a89784 7450out_undo_insn:
1c2a088a
AS
7451 /* cleanup main prog to be interpreted */
7452 prog->jit_requested = 0;
7453 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
7454 if (insn->code != (BPF_JMP | BPF_CALL) ||
7455 insn->src_reg != BPF_PSEUDO_CALL)
7456 continue;
7457 insn->off = 0;
7458 insn->imm = env->insn_aux_data[i].call_imm;
7459 }
c454a46b 7460 bpf_prog_free_jited_linfo(prog);
1c2a088a
AS
7461 return err;
7462}
7463
1ea47e01
AS
7464static int fixup_call_args(struct bpf_verifier_env *env)
7465{
19d28fbd 7466#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
7467 struct bpf_prog *prog = env->prog;
7468 struct bpf_insn *insn = prog->insnsi;
7469 int i, depth;
19d28fbd 7470#endif
e4052d06 7471 int err = 0;
1ea47e01 7472
e4052d06
QM
7473 if (env->prog->jit_requested &&
7474 !bpf_prog_is_dev_bound(env->prog->aux)) {
19d28fbd
DM
7475 err = jit_subprogs(env);
7476 if (err == 0)
1c2a088a 7477 return 0;
c7a89784
DB
7478 if (err == -EFAULT)
7479 return err;
19d28fbd
DM
7480 }
7481#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
7482 for (i = 0; i < prog->len; i++, insn++) {
7483 if (insn->code != (BPF_JMP | BPF_CALL) ||
7484 insn->src_reg != BPF_PSEUDO_CALL)
7485 continue;
7486 depth = get_callee_stack_depth(env, insn, i);
7487 if (depth < 0)
7488 return depth;
7489 bpf_patch_call_args(insn, depth);
7490 }
19d28fbd
DM
7491 err = 0;
7492#endif
7493 return err;
1ea47e01
AS
7494}
7495
79741b3b 7496/* fixup insn->imm field of bpf_call instructions
81ed18ab 7497 * and inline eligible helpers as explicit sequence of BPF instructions
e245c5c6
AS
7498 *
7499 * this function is called after eBPF program passed verification
7500 */
79741b3b 7501static int fixup_bpf_calls(struct bpf_verifier_env *env)
e245c5c6 7502{
79741b3b
AS
7503 struct bpf_prog *prog = env->prog;
7504 struct bpf_insn *insn = prog->insnsi;
e245c5c6 7505 const struct bpf_func_proto *fn;
79741b3b 7506 const int insn_cnt = prog->len;
09772d92 7507 const struct bpf_map_ops *ops;
c93552c4 7508 struct bpf_insn_aux_data *aux;
81ed18ab
AS
7509 struct bpf_insn insn_buf[16];
7510 struct bpf_prog *new_prog;
7511 struct bpf_map *map_ptr;
7512 int i, cnt, delta = 0;
e245c5c6 7513
79741b3b 7514 for (i = 0; i < insn_cnt; i++, insn++) {
f6b1b3bf
DB
7515 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
7516 insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
7517 insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
68fda450 7518 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
f6b1b3bf
DB
7519 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
7520 struct bpf_insn mask_and_div[] = {
7521 BPF_MOV32_REG(insn->src_reg, insn->src_reg),
7522 /* Rx div 0 -> 0 */
7523 BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
7524 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
7525 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
7526 *insn,
7527 };
7528 struct bpf_insn mask_and_mod[] = {
7529 BPF_MOV32_REG(insn->src_reg, insn->src_reg),
7530 /* Rx mod 0 -> Rx */
7531 BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
7532 *insn,
7533 };
7534 struct bpf_insn *patchlet;
7535
7536 if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
7537 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
7538 patchlet = mask_and_div + (is64 ? 1 : 0);
7539 cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
7540 } else {
7541 patchlet = mask_and_mod + (is64 ? 1 : 0);
7542 cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
7543 }
7544
7545 new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
68fda450
AS
7546 if (!new_prog)
7547 return -ENOMEM;
7548
7549 delta += cnt - 1;
7550 env->prog = prog = new_prog;
7551 insn = new_prog->insnsi + i + delta;
7552 continue;
7553 }
7554
e0cea7ce
DB
7555 if (BPF_CLASS(insn->code) == BPF_LD &&
7556 (BPF_MODE(insn->code) == BPF_ABS ||
7557 BPF_MODE(insn->code) == BPF_IND)) {
7558 cnt = env->ops->gen_ld_abs(insn, insn_buf);
7559 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
7560 verbose(env, "bpf verifier is misconfigured\n");
7561 return -EINVAL;
7562 }
7563
7564 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
7565 if (!new_prog)
7566 return -ENOMEM;
7567
7568 delta += cnt - 1;
7569 env->prog = prog = new_prog;
7570 insn = new_prog->insnsi + i + delta;
7571 continue;
7572 }
7573
979d63d5
DB
7574 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
7575 insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
7576 const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
7577 const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
7578 struct bpf_insn insn_buf[16];
7579 struct bpf_insn *patch = &insn_buf[0];
7580 bool issrc, isneg;
7581 u32 off_reg;
7582
7583 aux = &env->insn_aux_data[i + delta];
3612af78
DB
7584 if (!aux->alu_state ||
7585 aux->alu_state == BPF_ALU_NON_POINTER)
979d63d5
DB
7586 continue;
7587
7588 isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
7589 issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
7590 BPF_ALU_SANITIZE_SRC;
7591
7592 off_reg = issrc ? insn->src_reg : insn->dst_reg;
7593 if (isneg)
7594 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
7595 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1);
7596 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
7597 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
7598 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
7599 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
7600 if (issrc) {
7601 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX,
7602 off_reg);
7603 insn->src_reg = BPF_REG_AX;
7604 } else {
7605 *patch++ = BPF_ALU64_REG(BPF_AND, off_reg,
7606 BPF_REG_AX);
7607 }
7608 if (isneg)
7609 insn->code = insn->code == code_add ?
7610 code_sub : code_add;
7611 *patch++ = *insn;
7612 if (issrc && isneg)
7613 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
7614 cnt = patch - insn_buf;
7615
7616 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
7617 if (!new_prog)
7618 return -ENOMEM;
7619
7620 delta += cnt - 1;
7621 env->prog = prog = new_prog;
7622 insn = new_prog->insnsi + i + delta;
7623 continue;
7624 }
7625
79741b3b
AS
7626 if (insn->code != (BPF_JMP | BPF_CALL))
7627 continue;
cc8b0b92
AS
7628 if (insn->src_reg == BPF_PSEUDO_CALL)
7629 continue;
e245c5c6 7630
79741b3b
AS
7631 if (insn->imm == BPF_FUNC_get_route_realm)
7632 prog->dst_needed = 1;
7633 if (insn->imm == BPF_FUNC_get_prandom_u32)
7634 bpf_user_rnd_init_once();
9802d865
JB
7635 if (insn->imm == BPF_FUNC_override_return)
7636 prog->kprobe_override = 1;
79741b3b 7637 if (insn->imm == BPF_FUNC_tail_call) {
7b9f6da1
DM
7638 /* If we tail call into other programs, we
7639 * cannot make any assumptions since they can
7640 * be replaced dynamically during runtime in
7641 * the program array.
7642 */
7643 prog->cb_access = 1;
80a58d02 7644 env->prog->aux->stack_depth = MAX_BPF_STACK;
e647815a 7645 env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
7b9f6da1 7646
79741b3b
AS
7647 /* mark bpf_tail_call as different opcode to avoid
7648 * conditional branch in the interpeter for every normal
7649 * call and to prevent accidental JITing by JIT compiler
7650 * that doesn't support bpf_tail_call yet
e245c5c6 7651 */
79741b3b 7652 insn->imm = 0;
71189fa9 7653 insn->code = BPF_JMP | BPF_TAIL_CALL;
b2157399 7654
c93552c4
DB
7655 aux = &env->insn_aux_data[i + delta];
7656 if (!bpf_map_ptr_unpriv(aux))
7657 continue;
7658
b2157399
AS
7659 /* instead of changing every JIT dealing with tail_call
7660 * emit two extra insns:
7661 * if (index >= max_entries) goto out;
7662 * index &= array->index_mask;
7663 * to avoid out-of-bounds cpu speculation
7664 */
c93552c4 7665 if (bpf_map_ptr_poisoned(aux)) {
40950343 7666 verbose(env, "tail_call abusing map_ptr\n");
b2157399
AS
7667 return -EINVAL;
7668 }
c93552c4
DB
7669
7670 map_ptr = BPF_MAP_PTR(aux->map_state);
b2157399
AS
7671 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
7672 map_ptr->max_entries, 2);
7673 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
7674 container_of(map_ptr,
7675 struct bpf_array,
7676 map)->index_mask);
7677 insn_buf[2] = *insn;
7678 cnt = 3;
7679 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
7680 if (!new_prog)
7681 return -ENOMEM;
7682
7683 delta += cnt - 1;
7684 env->prog = prog = new_prog;
7685 insn = new_prog->insnsi + i + delta;
79741b3b
AS
7686 continue;
7687 }
e245c5c6 7688
89c63074 7689 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
09772d92
DB
7690 * and other inlining handlers are currently limited to 64 bit
7691 * only.
89c63074 7692 */
60b58afc 7693 if (prog->jit_requested && BITS_PER_LONG == 64 &&
09772d92
DB
7694 (insn->imm == BPF_FUNC_map_lookup_elem ||
7695 insn->imm == BPF_FUNC_map_update_elem ||
84430d42
DB
7696 insn->imm == BPF_FUNC_map_delete_elem ||
7697 insn->imm == BPF_FUNC_map_push_elem ||
7698 insn->imm == BPF_FUNC_map_pop_elem ||
7699 insn->imm == BPF_FUNC_map_peek_elem)) {
c93552c4
DB
7700 aux = &env->insn_aux_data[i + delta];
7701 if (bpf_map_ptr_poisoned(aux))
7702 goto patch_call_imm;
7703
7704 map_ptr = BPF_MAP_PTR(aux->map_state);
09772d92
DB
7705 ops = map_ptr->ops;
7706 if (insn->imm == BPF_FUNC_map_lookup_elem &&
7707 ops->map_gen_lookup) {
7708 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
7709 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
7710 verbose(env, "bpf verifier is misconfigured\n");
7711 return -EINVAL;
7712 }
81ed18ab 7713
09772d92
DB
7714 new_prog = bpf_patch_insn_data(env, i + delta,
7715 insn_buf, cnt);
7716 if (!new_prog)
7717 return -ENOMEM;
81ed18ab 7718
09772d92
DB
7719 delta += cnt - 1;
7720 env->prog = prog = new_prog;
7721 insn = new_prog->insnsi + i + delta;
7722 continue;
7723 }
81ed18ab 7724
09772d92
DB
7725 BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
7726 (void *(*)(struct bpf_map *map, void *key))NULL));
7727 BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
7728 (int (*)(struct bpf_map *map, void *key))NULL));
7729 BUILD_BUG_ON(!__same_type(ops->map_update_elem,
7730 (int (*)(struct bpf_map *map, void *key, void *value,
7731 u64 flags))NULL));
84430d42
DB
7732 BUILD_BUG_ON(!__same_type(ops->map_push_elem,
7733 (int (*)(struct bpf_map *map, void *value,
7734 u64 flags))NULL));
7735 BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
7736 (int (*)(struct bpf_map *map, void *value))NULL));
7737 BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
7738 (int (*)(struct bpf_map *map, void *value))NULL));
7739
09772d92
DB
7740 switch (insn->imm) {
7741 case BPF_FUNC_map_lookup_elem:
7742 insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
7743 __bpf_call_base;
7744 continue;
7745 case BPF_FUNC_map_update_elem:
7746 insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
7747 __bpf_call_base;
7748 continue;
7749 case BPF_FUNC_map_delete_elem:
7750 insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
7751 __bpf_call_base;
7752 continue;
84430d42
DB
7753 case BPF_FUNC_map_push_elem:
7754 insn->imm = BPF_CAST_CALL(ops->map_push_elem) -
7755 __bpf_call_base;
7756 continue;
7757 case BPF_FUNC_map_pop_elem:
7758 insn->imm = BPF_CAST_CALL(ops->map_pop_elem) -
7759 __bpf_call_base;
7760 continue;
7761 case BPF_FUNC_map_peek_elem:
7762 insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
7763 __bpf_call_base;
7764 continue;
09772d92 7765 }
81ed18ab 7766
09772d92 7767 goto patch_call_imm;
81ed18ab
AS
7768 }
7769
7770patch_call_imm:
5e43f899 7771 fn = env->ops->get_func_proto(insn->imm, env->prog);
79741b3b
AS
7772 /* all functions that have prototype and verifier allowed
7773 * programs to call them, must be real in-kernel functions
7774 */
7775 if (!fn->func) {
61bd5218
JK
7776 verbose(env,
7777 "kernel subsystem misconfigured func %s#%d\n",
79741b3b
AS
7778 func_id_name(insn->imm), insn->imm);
7779 return -EFAULT;
e245c5c6 7780 }
79741b3b 7781 insn->imm = fn->func - __bpf_call_base;
e245c5c6 7782 }
e245c5c6 7783
79741b3b
AS
7784 return 0;
7785}
e245c5c6 7786
58e2af8b 7787static void free_states(struct bpf_verifier_env *env)
f1bca824 7788{
58e2af8b 7789 struct bpf_verifier_state_list *sl, *sln;
f1bca824
AS
7790 int i;
7791
7792 if (!env->explored_states)
7793 return;
7794
7795 for (i = 0; i < env->prog->len; i++) {
7796 sl = env->explored_states[i];
7797
7798 if (sl)
7799 while (sl != STATE_LIST_MARK) {
7800 sln = sl->next;
1969db47 7801 free_verifier_state(&sl->state, false);
f1bca824
AS
7802 kfree(sl);
7803 sl = sln;
7804 }
7805 }
7806
7807 kfree(env->explored_states);
7808}
7809
838e9690
YS
7810int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
7811 union bpf_attr __user *uattr)
51580e79 7812{
58e2af8b 7813 struct bpf_verifier_env *env;
b9193c1b 7814 struct bpf_verifier_log *log;
9e4c24e7 7815 int i, len, ret = -EINVAL;
e2ae4ca2 7816 bool is_priv;
51580e79 7817
eba0c929
AB
7818 /* no program is valid */
7819 if (ARRAY_SIZE(bpf_verifier_ops) == 0)
7820 return -EINVAL;
7821
58e2af8b 7822 /* 'struct bpf_verifier_env' can be global, but since it's not small,
cbd35700
AS
7823 * allocate/free it every time bpf_check() is called
7824 */
58e2af8b 7825 env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
cbd35700
AS
7826 if (!env)
7827 return -ENOMEM;
61bd5218 7828 log = &env->log;
cbd35700 7829
9e4c24e7 7830 len = (*prog)->len;
fad953ce 7831 env->insn_aux_data =
9e4c24e7 7832 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
3df126f3
JK
7833 ret = -ENOMEM;
7834 if (!env->insn_aux_data)
7835 goto err_free_env;
9e4c24e7
JK
7836 for (i = 0; i < len; i++)
7837 env->insn_aux_data[i].orig_idx = i;
9bac3d6d 7838 env->prog = *prog;
00176a34 7839 env->ops = bpf_verifier_ops[env->prog->type];
0246e64d 7840
cbd35700
AS
7841 /* grab the mutex to protect few globals used by verifier */
7842 mutex_lock(&bpf_verifier_lock);
7843
7844 if (attr->log_level || attr->log_buf || attr->log_size) {
7845 /* user requested verbose verifier output
7846 * and supplied buffer to store the verification trace
7847 */
e7bf8249
JK
7848 log->level = attr->log_level;
7849 log->ubuf = (char __user *) (unsigned long) attr->log_buf;
7850 log->len_total = attr->log_size;
cbd35700
AS
7851
7852 ret = -EINVAL;
e7bf8249
JK
7853 /* log attributes have to be sane */
7854 if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
7855 !log->level || !log->ubuf)
3df126f3 7856 goto err_unlock;
cbd35700 7857 }
1ad2f583
DB
7858
7859 env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
7860 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
e07b98d9 7861 env->strict_alignment = true;
e9ee9efc
DM
7862 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
7863 env->strict_alignment = false;
cbd35700 7864
e2ae4ca2
JK
7865 is_priv = capable(CAP_SYS_ADMIN);
7866 env->allow_ptr_leaks = is_priv;
7867
f4e3ec0d
JK
7868 ret = replace_map_fd_with_map_ptr(env);
7869 if (ret < 0)
7870 goto skip_full_check;
7871
cae1927c 7872 if (bpf_prog_is_dev_bound(env->prog->aux)) {
a40a2632 7873 ret = bpf_prog_offload_verifier_prep(env->prog);
ab3f0063 7874 if (ret)
f4e3ec0d 7875 goto skip_full_check;
ab3f0063
JK
7876 }
7877
9bac3d6d 7878 env->explored_states = kcalloc(env->prog->len,
58e2af8b 7879 sizeof(struct bpf_verifier_state_list *),
f1bca824
AS
7880 GFP_USER);
7881 ret = -ENOMEM;
7882 if (!env->explored_states)
7883 goto skip_full_check;
7884
d9762e84 7885 ret = check_subprogs(env);
475fb78f
AS
7886 if (ret < 0)
7887 goto skip_full_check;
7888
c454a46b 7889 ret = check_btf_info(env, attr, uattr);
838e9690
YS
7890 if (ret < 0)
7891 goto skip_full_check;
7892
d9762e84
MKL
7893 ret = check_cfg(env);
7894 if (ret < 0)
7895 goto skip_full_check;
7896
17a52670 7897 ret = do_check(env);
8c01c4f8
CG
7898 if (env->cur_state) {
7899 free_verifier_state(env->cur_state, true);
7900 env->cur_state = NULL;
7901 }
cbd35700 7902
c941ce9c
QM
7903 if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
7904 ret = bpf_prog_offload_finalize(env);
7905
0246e64d 7906skip_full_check:
638f5b90 7907 while (!pop_stack(env, NULL, NULL));
f1bca824 7908 free_states(env);
0246e64d 7909
c131187d 7910 if (ret == 0)
9b38c405 7911 ret = check_max_stack_depth(env);
c131187d 7912
9b38c405 7913 /* instruction rewrites happen after this point */
e2ae4ca2
JK
7914 if (is_priv) {
7915 if (ret == 0)
7916 opt_hard_wire_dead_code_branches(env);
52875a04
JK
7917 if (ret == 0)
7918 ret = opt_remove_dead_code(env);
a1b14abc
JK
7919 if (ret == 0)
7920 ret = opt_remove_nops(env);
52875a04
JK
7921 } else {
7922 if (ret == 0)
7923 sanitize_dead_code(env);
e2ae4ca2
JK
7924 }
7925
9bac3d6d
AS
7926 if (ret == 0)
7927 /* program is valid, convert *(u32*)(ctx + off) accesses */
7928 ret = convert_ctx_accesses(env);
7929
e245c5c6 7930 if (ret == 0)
79741b3b 7931 ret = fixup_bpf_calls(env);
e245c5c6 7932
1ea47e01
AS
7933 if (ret == 0)
7934 ret = fixup_call_args(env);
7935
a2a7d570 7936 if (log->level && bpf_verifier_log_full(log))
cbd35700 7937 ret = -ENOSPC;
a2a7d570 7938 if (log->level && !log->ubuf) {
cbd35700 7939 ret = -EFAULT;
a2a7d570 7940 goto err_release_maps;
cbd35700
AS
7941 }
7942
0246e64d
AS
7943 if (ret == 0 && env->used_map_cnt) {
7944 /* if program passed verifier, update used_maps in bpf_prog_info */
9bac3d6d
AS
7945 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
7946 sizeof(env->used_maps[0]),
7947 GFP_KERNEL);
0246e64d 7948
9bac3d6d 7949 if (!env->prog->aux->used_maps) {
0246e64d 7950 ret = -ENOMEM;
a2a7d570 7951 goto err_release_maps;
0246e64d
AS
7952 }
7953
9bac3d6d 7954 memcpy(env->prog->aux->used_maps, env->used_maps,
0246e64d 7955 sizeof(env->used_maps[0]) * env->used_map_cnt);
9bac3d6d 7956 env->prog->aux->used_map_cnt = env->used_map_cnt;
0246e64d
AS
7957
7958 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
7959 * bpf_ld_imm64 instructions
7960 */
7961 convert_pseudo_ld_imm64(env);
7962 }
cbd35700 7963
ba64e7d8
YS
7964 if (ret == 0)
7965 adjust_btf_func(env);
7966
a2a7d570 7967err_release_maps:
9bac3d6d 7968 if (!env->prog->aux->used_maps)
0246e64d 7969 /* if we didn't copy map pointers into bpf_prog_info, release
ab7f5bf0 7970 * them now. Otherwise free_used_maps() will release them.
0246e64d
AS
7971 */
7972 release_maps(env);
9bac3d6d 7973 *prog = env->prog;
3df126f3 7974err_unlock:
cbd35700 7975 mutex_unlock(&bpf_verifier_lock);
3df126f3
JK
7976 vfree(env->insn_aux_data);
7977err_free_env:
7978 kfree(env);
51580e79
AS
7979 return ret;
7980}