kernel/bpf/verifier.c

   1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   2  * Copyright (c) 2016 Facebook
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of version 2 of the GNU General Public
   6  * License as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful, but
   9  * WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11  * General Public License for more details.
  12  */
  13 #include <linux/kernel.h>
  14 #include <linux/types.h>
  15 #include <linux/slab.h>
  16 #include <linux/bpf.h>
  17 #include <linux/bpf_verifier.h>
  18 #include <linux/filter.h>
  19 #include <net/netlink.h>
  20 #include <linux/file.h>
  21 #include <linux/vmalloc.h>
  22 #include <linux/stringify.h>
  23 #include <linux/bsearch.h>
  24 #include <linux/sort.h>
  25
  26 #include "disasm.h"
  27
  28 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
  29 #define BPF_PROG_TYPE(_id, _name) \
  30         [_id] = & _name ## _verifier_ops,
  31 #define BPF_MAP_TYPE(_id, _ops)
  32 #include <linux/bpf_types.h>
  33 #undef BPF_PROG_TYPE
  34 #undef BPF_MAP_TYPE
  35 };
  36
  37 /* bpf_check() is a static code analyzer that walks eBPF program
  38  * instruction by instruction and updates register/stack state.
  39  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
  40  *
  41  * The first pass is depth-first-search to check that the program is a DAG.
  42  * It rejects the following programs:
  43  * - larger than BPF_MAXINSNS insns
  44  * - if loop is present (detected via back-edge)
  45  * - unreachable insns exist (shouldn't be a forest. program = one function)
  46  * - out of bounds or malformed jumps
  47  * The second pass is all possible path descent from the 1st insn.
  48  * Since it's analyzing all pathes through the program, the length of the
  49  * analysis is limited to 64k insn, which may be hit even if total number of
  50  * insn is less then 4K, but there are too many branches that change stack/regs.
  51  * Number of 'branches to be analyzed' is limited to 1k
  52  *
  53  * On entry to each instruction, each register has a type, and the instruction
  54  * changes the types of the registers depending on instruction semantics.
  55  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
  56  * copied to R1.
  57  *
  58  * All registers are 64-bit.
  59  * R0 - return register
  60  * R1-R5 argument passing registers
  61  * R6-R9 callee saved registers
  62  * R10 - frame pointer read-only
  63  *
  64  * At the start of BPF program the register R1 contains a pointer to bpf_context
  65  * and has type PTR_TO_CTX.
  66  *
  67  * Verifier tracks arithmetic operations on pointers in case:
  68  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
  69  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
  70  * 1st insn copies R10 (which has FRAME_PTR) type into R1
  71  * and 2nd arithmetic instruction is pattern matched to recognize
  72  * that it wants to construct a pointer to some element within stack.
  73  * So after 2nd insn, the register R1 has type PTR_TO_STACK
  74  * (and -20 constant is saved for further stack bounds checking).
  75  * Meaning that this reg is a pointer to stack plus known immediate constant.
  76  *
  77  * Most of the time the registers have SCALAR_VALUE type, which
  78  * means the register has some value, but it's not a valid pointer.
  79  * (like pointer plus pointer becomes SCALAR_VALUE type)
  80  *
  81  * When verifier sees load or store instructions the type of base register
  82  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer
  83  * types recognized by check_mem_access() function.
  84  *
  85  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
  86  * and the range of [ptr, ptr + map's value_size) is accessible.
  87  *
  88  * registers used to pass values to function calls are checked against
  89  * function argument constraints.
  90  *
  91  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
  92  * It means that the register type passed to this function must be
  93  * PTR_TO_STACK and it will be used inside the function as
  94  * 'pointer to map element key'
  95  *
  96  * For example the argument constraints for bpf_map_lookup_elem():
  97  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
  98  *   .arg1_type = ARG_CONST_MAP_PTR,
  99  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
 100  *
 101  * ret_type says that this function returns 'pointer to map elem value or null'
 102  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
 103  * 2nd argument should be a pointer to stack, which will be used inside
 104  * the helper function as a pointer to map element key.
 105  *
 106  * On the kernel side the helper function looks like:
 107  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 108  * {
 109  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
 110  *    void *key = (void *) (unsigned long) r2;
 111  *    void *value;
 112  *
 113  *    here kernel can access 'key' and 'map' pointers safely, knowing that
 114  *    [key, key + map->key_size) bytes are valid and were initialized on
 115  *    the stack of eBPF program.
 116  * }
 117  *
 118  * Corresponding eBPF program may look like:
 119  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
 120  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
 121  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
 122  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 123  * here verifier looks at prototype of map_lookup_elem() and sees:
 124  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
 125  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
 126  *
 127  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
 128  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
 129  * and were initialized prior to this call.
 130  * If it's ok, then verifier allows this BPF_CALL insn and looks at
 131  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
 132  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
 133  * returns ether pointer to map value or NULL.
 134  *
 135  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
 136  * insn, the register holding that pointer in the true branch changes state to
 137  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
 138  * branch. See check_cond_jmp_op().
 139  *
 140  * After the call R0 is set to return type of the function and registers R1-R5
 141  * are set to NOT_INIT to indicate that they are no longer readable.
 142  */
 143
 144 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
 145 struct bpf_verifier_stack_elem {
 146         /* verifer state is 'st'
 147          * before processing instruction 'insn_idx'
 148          * and after processing instruction 'prev_insn_idx'
 149          */
 150         struct bpf_verifier_state st;
 151         int insn_idx;
 152         int prev_insn_idx;
 153         struct bpf_verifier_stack_elem *next;
 154 };
 155
 156 #define BPF_COMPLEXITY_LIMIT_INSNS      131072
 157 #define BPF_COMPLEXITY_LIMIT_STACK      1024
 158
 159 #define BPF_MAP_PTR_POISON ((void *)0xeB9F + POISON_POINTER_DELTA)
 160
 161 struct bpf_call_arg_meta {
 162         struct bpf_map *map_ptr;
 163         bool raw_mode;
 164         bool pkt_access;
 165         int regno;
 166         int access_size;
 167 };
 168
 169 static DEFINE_MUTEX(bpf_verifier_lock);
 170
 171 /* log_level controls verbosity level of eBPF verifier.
 172  * verbose() is used to dump the verification trace to the log, so the user
 173  * can figure out what's wrong with the program
 174  */
 175 static __printf(2, 3) void verbose(struct bpf_verifier_env *env,
 176                                    const char *fmt, ...)
 177 {
 178         struct bpf_verifer_log *log = &env->log;
 179         unsigned int n;
 180         va_list args;
 181
 182         if (!log->level || !log->ubuf || bpf_verifier_log_full(log))
 183                 return;
 184
 185         va_start(args, fmt);
 186         n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
 187         va_end(args);
 188
 189         WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
 190                   "verifier log line truncated - local buffer too short\n");
 191
 192         n = min(log->len_total - log->len_used - 1, n);
 193         log->kbuf[n] = '\0';
 194
 195         if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
 196                 log->len_used += n;
 197         else
 198                 log->ubuf = NULL;
 199 }
 200
 201 static bool type_is_pkt_pointer(enum bpf_reg_type type)
 202 {
 203         return type == PTR_TO_PACKET ||
 204                type == PTR_TO_PACKET_META;
 205 }
 206
 207 /* string representation of 'enum bpf_reg_type' */
 208 static const char * const reg_type_str[] = {
 209         [NOT_INIT]              = "?",
 210         [SCALAR_VALUE]          = "inv",
 211         [PTR_TO_CTX]            = "ctx",
 212         [CONST_PTR_TO_MAP]      = "map_ptr",
 213         [PTR_TO_MAP_VALUE]      = "map_value",
 214         [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
 215         [PTR_TO_STACK]          = "fp",
 216         [PTR_TO_PACKET]         = "pkt",
 217         [PTR_TO_PACKET_META]    = "pkt_meta",
 218         [PTR_TO_PACKET_END]     = "pkt_end",
 219 };
 220
 221 static void print_liveness(struct bpf_verifier_env *env,
 222                            enum bpf_reg_liveness live)
 223 {
 224         if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN))
 225             verbose(env, "_");
 226         if (live & REG_LIVE_READ)
 227                 verbose(env, "r");
 228         if (live & REG_LIVE_WRITTEN)
 229                 verbose(env, "w");
 230 }
 231
 232 static struct bpf_func_state *func(struct bpf_verifier_env *env,
 233                                    const struct bpf_reg_state *reg)
 234 {
 235         struct bpf_verifier_state *cur = env->cur_state;
 236
 237         return cur->frame[reg->frameno];
 238 }
 239
 240 static void print_verifier_state(struct bpf_verifier_env *env,
 241                                  const struct bpf_func_state *state)
 242 {
 243         const struct bpf_reg_state *reg;
 244         enum bpf_reg_type t;
 245         int i;
 246
 247         if (state->frameno)
 248                 verbose(env, " frame%d:", state->frameno);
 249         for (i = 0; i < MAX_BPF_REG; i++) {
 250                 reg = &state->regs[i];
 251                 t = reg->type;
 252                 if (t == NOT_INIT)
 253                         continue;
 254                 verbose(env, " R%d", i);
 255                 print_liveness(env, reg->live);
 256                 verbose(env, "=%s", reg_type_str[t]);
 257                 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
 258                     tnum_is_const(reg->var_off)) {
 259                         /* reg->off should be 0 for SCALAR_VALUE */
 260                         verbose(env, "%lld", reg->var_off.value + reg->off);
 261                         if (t == PTR_TO_STACK)
 262                                 verbose(env, ",call_%d", func(env, reg)->callsite);
 263                 } else {
 264                         verbose(env, "(id=%d", reg->id);
 265                         if (t != SCALAR_VALUE)
 266                                 verbose(env, ",off=%d", reg->off);
 267                         if (type_is_pkt_pointer(t))
 268                                 verbose(env, ",r=%d", reg->range);
 269                         else if (t == CONST_PTR_TO_MAP ||
 270                                  t == PTR_TO_MAP_VALUE ||
 271                                  t == PTR_TO_MAP_VALUE_OR_NULL)
 272                                 verbose(env, ",ks=%d,vs=%d",
 273                                         reg->map_ptr->key_size,
 274                                         reg->map_ptr->value_size);
 275                         if (tnum_is_const(reg->var_off)) {
 276                                 /* Typically an immediate SCALAR_VALUE, but
 277                                  * could be a pointer whose offset is too big
 278                                  * for reg->off
 279                                  */
 280                                 verbose(env, ",imm=%llx", reg->var_off.value);
 281                         } else {
 282                                 if (reg->smin_value != reg->umin_value &&
 283                                     reg->smin_value != S64_MIN)
 284                                         verbose(env, ",smin_value=%lld",
 285                                                 (long long)reg->smin_value);
 286                                 if (reg->smax_value != reg->umax_value &&
 287                                     reg->smax_value != S64_MAX)
 288                                         verbose(env, ",smax_value=%lld",
 289                                                 (long long)reg->smax_value);
 290                                 if (reg->umin_value != 0)
 291                                         verbose(env, ",umin_value=%llu",
 292                                                 (unsigned long long)reg->umin_value);
 293                                 if (reg->umax_value != U64_MAX)
 294                                         verbose(env, ",umax_value=%llu",
 295                                                 (unsigned long long)reg->umax_value);
 296                                 if (!tnum_is_unknown(reg->var_off)) {
 297                                         char tn_buf[48];
 298
 299                                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 300                                         verbose(env, ",var_off=%s", tn_buf);
 301                                 }
 302                         }
 303                         verbose(env, ")");
 304                 }
 305         }
 306         for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
 307                 if (state->stack[i].slot_type[0] == STACK_SPILL) {
 308                         verbose(env, " fp%d",
 309                                 (-i - 1) * BPF_REG_SIZE);
 310                         print_liveness(env, state->stack[i].spilled_ptr.live);
 311                         verbose(env, "=%s",
 312                                 reg_type_str[state->stack[i].spilled_ptr.type]);
 313                 }
 314                 if (state->stack[i].slot_type[0] == STACK_ZERO)
 315                         verbose(env, " fp%d=0", (-i - 1) * BPF_REG_SIZE);
 316         }
 317         verbose(env, "\n");
 318 }
 319
 320 static int copy_stack_state(struct bpf_func_state *dst,
 321                             const struct bpf_func_state *src)
 322 {
 323         if (!src->stack)
 324                 return 0;
 325         if (WARN_ON_ONCE(dst->allocated_stack < src->allocated_stack)) {
 326                 /* internal bug, make state invalid to reject the program */
 327                 memset(dst, 0, sizeof(*dst));
 328                 return -EFAULT;
 329         }
 330         memcpy(dst->stack, src->stack,
 331                sizeof(*src->stack) * (src->allocated_stack / BPF_REG_SIZE));
 332         return 0;
 333 }
 334
 335 /* do_check() starts with zero-sized stack in struct bpf_verifier_state to
 336  * make it consume minimal amount of memory. check_stack_write() access from
 337  * the program calls into realloc_func_state() to grow the stack size.
 338  * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
 339  * which this function copies over. It points to previous bpf_verifier_state
 340  * which is never reallocated
 341  */
 342 static int realloc_func_state(struct bpf_func_state *state, int size,
 343                               bool copy_old)
 344 {
 345         u32 old_size = state->allocated_stack;
 346         struct bpf_stack_state *new_stack;
 347         int slot = size / BPF_REG_SIZE;
 348
 349         if (size <= old_size || !size) {
 350                 if (copy_old)
 351                         return 0;
 352                 state->allocated_stack = slot * BPF_REG_SIZE;
 353                 if (!size && old_size) {
 354                         kfree(state->stack);
 355                         state->stack = NULL;
 356                 }
 357                 return 0;
 358         }
 359         new_stack = kmalloc_array(slot, sizeof(struct bpf_stack_state),
 360                                   GFP_KERNEL);
 361         if (!new_stack)
 362                 return -ENOMEM;
 363         if (copy_old) {
 364                 if (state->stack)
 365                         memcpy(new_stack, state->stack,
 366                                sizeof(*new_stack) * (old_size / BPF_REG_SIZE));
 367                 memset(new_stack + old_size / BPF_REG_SIZE, 0,
 368                        sizeof(*new_stack) * (size - old_size) / BPF_REG_SIZE);
 369         }
 370         state->allocated_stack = slot * BPF_REG_SIZE;
 371         kfree(state->stack);
 372         state->stack = new_stack;
 373         return 0;
 374 }
 375
 376 static void free_func_state(struct bpf_func_state *state)
 377 {
 378         kfree(state->stack);
 379         kfree(state);
 380 }
 381
 382 static void free_verifier_state(struct bpf_verifier_state *state,
 383                                 bool free_self)
 384 {
 385         int i;
 386
 387         for (i = 0; i <= state->curframe; i++) {
 388                 free_func_state(state->frame[i]);
 389                 state->frame[i] = NULL;
 390         }
 391         if (free_self)
 392                 kfree(state);
 393 }
 394
 395 /* copy verifier state from src to dst growing dst stack space
 396  * when necessary to accommodate larger src stack
 397  */
 398 static int copy_func_state(struct bpf_func_state *dst,
 399                            const struct bpf_func_state *src)
 400 {
 401         int err;
 402
 403         err = realloc_func_state(dst, src->allocated_stack, false);
 404         if (err)
 405                 return err;
 406         memcpy(dst, src, offsetof(struct bpf_func_state, allocated_stack));
 407         return copy_stack_state(dst, src);
 408 }
 409
 410 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
 411                                const struct bpf_verifier_state *src)
 412 {
 413         struct bpf_func_state *dst;
 414         int i, err;
 415
 416         /* if dst has more stack frames then src frame, free them */
 417         for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
 418                 free_func_state(dst_state->frame[i]);
 419                 dst_state->frame[i] = NULL;
 420         }
 421         dst_state->curframe = src->curframe;
 422         dst_state->parent = src->parent;
 423         for (i = 0; i <= src->curframe; i++) {
 424                 dst = dst_state->frame[i];
 425                 if (!dst) {
 426                         dst = kzalloc(sizeof(*dst), GFP_KERNEL);
 427                         if (!dst)
 428                                 return -ENOMEM;
 429                         dst_state->frame[i] = dst;
 430                 }
 431                 err = copy_func_state(dst, src->frame[i]);
 432                 if (err)
 433                         return err;
 434         }
 435         return 0;
 436 }
 437
 438 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
 439                      int *insn_idx)
 440 {
 441         struct bpf_verifier_state *cur = env->cur_state;
 442         struct bpf_verifier_stack_elem *elem, *head = env->head;
 443         int err;
 444
 445         if (env->head == NULL)
 446                 return -ENOENT;
 447
 448         if (cur) {
 449                 err = copy_verifier_state(cur, &head->st);
 450                 if (err)
 451                         return err;
 452         }
 453         if (insn_idx)
 454                 *insn_idx = head->insn_idx;
 455         if (prev_insn_idx)
 456                 *prev_insn_idx = head->prev_insn_idx;
 457         elem = head->next;
 458         free_verifier_state(&head->st, false);
 459         kfree(head);
 460         env->head = elem;
 461         env->stack_size--;
 462         return 0;
 463 }
 464
 465 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
 466                                              int insn_idx, int prev_insn_idx)
 467 {
 468         struct bpf_verifier_state *cur = env->cur_state;
 469         struct bpf_verifier_stack_elem *elem;
 470         int err;
 471
 472         elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
 473         if (!elem)
 474                 goto err;
 475
 476         elem->insn_idx = insn_idx;
 477         elem->prev_insn_idx = prev_insn_idx;
 478         elem->next = env->head;
 479         env->head = elem;
 480         env->stack_size++;
 481         err = copy_verifier_state(&elem->st, cur);
 482         if (err)
 483                 goto err;
 484         if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
 485                 verbose(env, "BPF program is too complex\n");
 486                 goto err;
 487         }
 488         return &elem->st;
 489 err:
 490         /* pop all elements and return */
 491         while (!pop_stack(env, NULL, NULL));
 492         return NULL;
 493 }
 494
 495 #define CALLER_SAVED_REGS 6
 496 static const int caller_saved[CALLER_SAVED_REGS] = {
 497         BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 498 };
 499 #define CALLEE_SAVED_REGS 5
 500 static const int callee_saved[CALLEE_SAVED_REGS] = {
 501         BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9
 502 };
 503
 504 static void __mark_reg_not_init(struct bpf_reg_state *reg);
 505
 506 /* Mark the unknown part of a register (variable offset or scalar value) as
 507  * known to have the value @imm.
 508  */
 509 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
 510 {
 511         reg->id = 0;
 512         reg->var_off = tnum_const(imm);
 513         reg->smin_value = (s64)imm;
 514         reg->smax_value = (s64)imm;
 515         reg->umin_value = imm;
 516         reg->umax_value = imm;
 517 }
 518
 519 /* Mark the 'variable offset' part of a register as zero.  This should be
 520  * used only on registers holding a pointer type.
 521  */
 522 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
 523 {
 524         __mark_reg_known(reg, 0);
 525 }
 526
 527 static void __mark_reg_const_zero(struct bpf_reg_state *reg)
 528 {
 529         __mark_reg_known(reg, 0);
 530         reg->off = 0;
 531         reg->type = SCALAR_VALUE;
 532 }
 533
 534 static void mark_reg_known_zero(struct bpf_verifier_env *env,
 535                                 struct bpf_reg_state *regs, u32 regno)
 536 {
 537         if (WARN_ON(regno >= MAX_BPF_REG)) {
 538                 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
 539                 /* Something bad happened, let's kill all regs */
 540                 for (regno = 0; regno < MAX_BPF_REG; regno++)
 541                         __mark_reg_not_init(regs + regno);
 542                 return;
 543         }
 544         __mark_reg_known_zero(regs + regno);
 545 }
 546
 547 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
 548 {
 549         return type_is_pkt_pointer(reg->type);
 550 }
 551
 552 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
 553 {
 554         return reg_is_pkt_pointer(reg) ||
 555                reg->type == PTR_TO_PACKET_END;
 556 }
 557
 558 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
 559 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
 560                                     enum bpf_reg_type which)
 561 {
 562         /* The register can already have a range from prior markings.
 563          * This is fine as long as it hasn't been advanced from its
 564          * origin.
 565          */
 566         return reg->type == which &&
 567                reg->id == 0 &&
 568                reg->off == 0 &&
 569                tnum_equals_const(reg->var_off, 0);
 570 }
 571
 572 /* Attempts to improve min/max values based on var_off information */
 573 static void __update_reg_bounds(struct bpf_reg_state *reg)
 574 {
 575         /* min signed is max(sign bit) | min(other bits) */
 576         reg->smin_value = max_t(s64, reg->smin_value,
 577                                 reg->var_off.value | (reg->var_off.mask & S64_MIN));
 578         /* max signed is min(sign bit) | max(other bits) */
 579         reg->smax_value = min_t(s64, reg->smax_value,
 580                                 reg->var_off.value | (reg->var_off.mask & S64_MAX));
 581         reg->umin_value = max(reg->umin_value, reg->var_off.value);
 582         reg->umax_value = min(reg->umax_value,
 583                               reg->var_off.value | reg->var_off.mask);
 584 }
 585
 586 /* Uses signed min/max values to inform unsigned, and vice-versa */
 587 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
 588 {
 589         /* Learn sign from signed bounds.
 590          * If we cannot cross the sign boundary, then signed and unsigned bounds
 591          * are the same, so combine.  This works even in the negative case, e.g.
 592          * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
 593          */
 594         if (reg->smin_value >= 0 || reg->smax_value < 0) {
 595                 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
 596                                                           reg->umin_value);
 597                 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
 598                                                           reg->umax_value);
 599                 return;
 600         }
 601         /* Learn sign from unsigned bounds.  Signed bounds cross the sign
 602          * boundary, so we must be careful.
 603          */
 604         if ((s64)reg->umax_value >= 0) {
 605                 /* Positive.  We can't learn anything from the smin, but smax
 606                  * is positive, hence safe.
 607                  */
 608                 reg->smin_value = reg->umin_value;
 609                 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
 610                                                           reg->umax_value);
 611         } else if ((s64)reg->umin_value < 0) {
 612                 /* Negative.  We can't learn anything from the smax, but smin
 613                  * is negative, hence safe.
 614                  */
 615                 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
 616                                                           reg->umin_value);
 617                 reg->smax_value = reg->umax_value;
 618         }
 619 }
 620
 621 /* Attempts to improve var_off based on unsigned min/max information */
 622 static void __reg_bound_offset(struct bpf_reg_state *reg)
 623 {
 624         reg->var_off = tnum_intersect(reg->var_off,
 625                                       tnum_range(reg->umin_value,
 626                                                  reg->umax_value));
 627 }
 628
 629 /* Reset the min/max bounds of a register */
 630 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
 631 {
 632         reg->smin_value = S64_MIN;
 633         reg->smax_value = S64_MAX;
 634         reg->umin_value = 0;
 635         reg->umax_value = U64_MAX;
 636 }
 637
 638 /* Mark a register as having a completely unknown (scalar) value. */
 639 static void __mark_reg_unknown(struct bpf_reg_state *reg)
 640 {
 641         reg->type = SCALAR_VALUE;
 642         reg->id = 0;
 643         reg->off = 0;
 644         reg->var_off = tnum_unknown;
 645         reg->frameno = 0;
 646         __mark_reg_unbounded(reg);
 647 }
 648
 649 static void mark_reg_unknown(struct bpf_verifier_env *env,
 650                              struct bpf_reg_state *regs, u32 regno)
 651 {
 652         if (WARN_ON(regno >= MAX_BPF_REG)) {
 653                 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
 654                 /* Something bad happened, let's kill all regs except FP */
 655                 for (regno = 0; regno < BPF_REG_FP; regno++)
 656                         __mark_reg_not_init(regs + regno);
 657                 return;
 658         }
 659         __mark_reg_unknown(regs + regno);
 660 }
 661
 662 static void __mark_reg_not_init(struct bpf_reg_state *reg)
 663 {
 664         __mark_reg_unknown(reg);
 665         reg->type = NOT_INIT;
 666 }
 667
 668 static void mark_reg_not_init(struct bpf_verifier_env *env,
 669                               struct bpf_reg_state *regs, u32 regno)
 670 {
 671         if (WARN_ON(regno >= MAX_BPF_REG)) {
 672                 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
 673                 /* Something bad happened, let's kill all regs except FP */
 674                 for (regno = 0; regno < BPF_REG_FP; regno++)
 675                         __mark_reg_not_init(regs + regno);
 676                 return;
 677         }
 678         __mark_reg_not_init(regs + regno);
 679 }
 680
 681 static void init_reg_state(struct bpf_verifier_env *env,
 682                            struct bpf_func_state *state)
 683 {
 684         struct bpf_reg_state *regs = state->regs;
 685         int i;
 686
 687         for (i = 0; i < MAX_BPF_REG; i++) {
 688                 mark_reg_not_init(env, regs, i);
 689                 regs[i].live = REG_LIVE_NONE;
 690         }
 691
 692         /* frame pointer */
 693         regs[BPF_REG_FP].type = PTR_TO_STACK;
 694         mark_reg_known_zero(env, regs, BPF_REG_FP);
 695         regs[BPF_REG_FP].frameno = state->frameno;
 696
 697         /* 1st arg to a function */
 698         regs[BPF_REG_1].type = PTR_TO_CTX;
 699         mark_reg_known_zero(env, regs, BPF_REG_1);
 700 }
 701
 702 #define BPF_MAIN_FUNC (-1)
 703 static void init_func_state(struct bpf_verifier_env *env,
 704                             struct bpf_func_state *state,
 705                             int callsite, int frameno, int subprogno)
 706 {
 707         state->callsite = callsite;
 708         state->frameno = frameno;
 709         state->subprogno = subprogno;
 710         init_reg_state(env, state);
 711 }
 712
 713 enum reg_arg_type {
 714         SRC_OP,         /* register is used as source operand */
 715         DST_OP,         /* register is used as destination operand */
 716         DST_OP_NO_MARK  /* same as above, check only, don't mark */
 717 };
 718
 719 static int cmp_subprogs(const void *a, const void *b)
 720 {
 721         return *(int *)a - *(int *)b;
 722 }
 723
 724 static int find_subprog(struct bpf_verifier_env *env, int off)
 725 {
 726         u32 *p;
 727
 728         p = bsearch(&off, env->subprog_starts, env->subprog_cnt,
 729                     sizeof(env->subprog_starts[0]), cmp_subprogs);
 730         if (!p)
 731                 return -ENOENT;
 732         return p - env->subprog_starts;
 733
 734 }
 735
 736 static int add_subprog(struct bpf_verifier_env *env, int off)
 737 {
 738         int insn_cnt = env->prog->len;
 739         int ret;
 740
 741         if (off >= insn_cnt || off < 0) {
 742                 verbose(env, "call to invalid destination\n");
 743                 return -EINVAL;
 744         }
 745         ret = find_subprog(env, off);
 746         if (ret >= 0)
 747                 return 0;
 748         if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
 749                 verbose(env, "too many subprograms\n");
 750                 return -E2BIG;
 751         }
 752         env->subprog_starts[env->subprog_cnt++] = off;
 753         sort(env->subprog_starts, env->subprog_cnt,
 754              sizeof(env->subprog_starts[0]), cmp_subprogs, NULL);
 755         return 0;
 756 }
 757
 758 static int check_subprogs(struct bpf_verifier_env *env)
 759 {
 760         int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
 761         struct bpf_insn *insn = env->prog->insnsi;
 762         int insn_cnt = env->prog->len;
 763
 764         /* determine subprog starts. The end is one before the next starts */
 765         for (i = 0; i < insn_cnt; i++) {
 766                 if (insn[i].code != (BPF_JMP | BPF_CALL))
 767                         continue;
 768                 if (insn[i].src_reg != BPF_PSEUDO_CALL)
 769                         continue;
 770                 if (!env->allow_ptr_leaks) {
 771                         verbose(env, "function calls to other bpf functions are allowed for root only\n");
 772                         return -EPERM;
 773                 }
 774                 if (bpf_prog_is_dev_bound(env->prog->aux)) {
 775                         verbose(env, "funcation calls in offloaded programs are not supported yet\n");
 776                         return -EINVAL;
 777                 }
 778                 ret = add_subprog(env, i + insn[i].imm + 1);
 779                 if (ret < 0)
 780                         return ret;
 781         }
 782
 783         if (env->log.level > 1)
 784                 for (i = 0; i < env->subprog_cnt; i++)
 785                         verbose(env, "func#%d @%d\n", i, env->subprog_starts[i]);
 786
 787         /* now check that all jumps are within the same subprog */
 788         subprog_start = 0;
 789         if (env->subprog_cnt == cur_subprog)
 790                 subprog_end = insn_cnt;
 791         else
 792                 subprog_end = env->subprog_starts[cur_subprog++];
 793         for (i = 0; i < insn_cnt; i++) {
 794                 u8 code = insn[i].code;
 795
 796                 if (BPF_CLASS(code) != BPF_JMP)
 797                         goto next;
 798                 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
 799                         goto next;
 800                 off = i + insn[i].off + 1;
 801                 if (off < subprog_start || off >= subprog_end) {
 802                         verbose(env, "jump out of range from insn %d to %d\n", i, off);
 803                         return -EINVAL;
 804                 }
 805 next:
 806                 if (i == subprog_end - 1) {
 807                         /* to avoid fall-through from one subprog into another
 808                          * the last insn of the subprog should be either exit
 809                          * or unconditional jump back
 810                          */
 811                         if (code != (BPF_JMP | BPF_EXIT) &&
 812                             code != (BPF_JMP | BPF_JA)) {
 813                                 verbose(env, "last insn is not an exit or jmp\n");
 814                                 return -EINVAL;
 815                         }
 816                         subprog_start = subprog_end;
 817                         if (env->subprog_cnt == cur_subprog)
 818                                 subprog_end = insn_cnt;
 819                         else
 820                                 subprog_end = env->subprog_starts[cur_subprog++];
 821                 }
 822         }
 823         return 0;
 824 }
 825
 826 struct bpf_verifier_state *skip_callee(struct bpf_verifier_env *env,
 827                                        const struct bpf_verifier_state *state,
 828                                        struct bpf_verifier_state *parent,
 829                                        u32 regno)
 830 {
 831         struct bpf_verifier_state *tmp = NULL;
 832
 833         /* 'parent' could be a state of caller and
 834          * 'state' could be a state of callee. In such case
 835          * parent->curframe < state->curframe
 836          * and it's ok for r1 - r5 registers
 837          *
 838          * 'parent' could be a callee's state after it bpf_exit-ed.
 839          * In such case parent->curframe > state->curframe
 840          * and it's ok for r0 only
 841          */
 842         if (parent->curframe == state->curframe ||
 843             (parent->curframe < state->curframe &&
 844              regno >= BPF_REG_1 && regno <= BPF_REG_5) ||
 845             (parent->curframe > state->curframe &&
 846                regno == BPF_REG_0))
 847                 return parent;
 848
 849         if (parent->curframe > state->curframe &&
 850             regno >= BPF_REG_6) {
 851                 /* for callee saved regs we have to skip the whole chain
 852                  * of states that belong to callee and mark as LIVE_READ
 853                  * the registers before the call
 854                  */
 855                 tmp = parent;
 856                 while (tmp && tmp->curframe != state->curframe) {
 857                         tmp = tmp->parent;
 858                 }
 859                 if (!tmp)
 860                         goto bug;
 861                 parent = tmp;
 862         } else {
 863                 goto bug;
 864         }
 865         return parent;
 866 bug:
 867         verbose(env, "verifier bug regno %d tmp %p\n", regno, tmp);
 868         verbose(env, "regno %d parent frame %d current frame %d\n",
 869                 regno, parent->curframe, state->curframe);
 870         return 0;
 871 }
 872
 873 static int mark_reg_read(struct bpf_verifier_env *env,
 874                          const struct bpf_verifier_state *state,
 875                          struct bpf_verifier_state *parent,
 876                          u32 regno)
 877 {
 878         bool writes = parent == state->parent; /* Observe write marks */
 879
 880         if (regno == BPF_REG_FP)
 881                 /* We don't need to worry about FP liveness because it's read-only */
 882                 return 0;
 883
 884         while (parent) {
 885                 /* if read wasn't screened by an earlier write ... */
 886                 if (writes && state->frame[state->curframe]->regs[regno].live & REG_LIVE_WRITTEN)
 887                         break;
 888                 parent = skip_callee(env, state, parent, regno);
 889                 if (!parent)
 890                         return -EFAULT;
 891                 /* ... then we depend on parent's value */
 892                 parent->frame[parent->curframe]->regs[regno].live |= REG_LIVE_READ;
 893                 state = parent;
 894                 parent = state->parent;
 895                 writes = true;
 896         }
 897         return 0;
 898 }
 899
 900 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
 901                          enum reg_arg_type t)
 902 {
 903         struct bpf_verifier_state *vstate = env->cur_state;
 904         struct bpf_func_state *state = vstate->frame[vstate->curframe];
 905         struct bpf_reg_state *regs = state->regs;
 906
 907         if (regno >= MAX_BPF_REG) {
 908                 verbose(env, "R%d is invalid\n", regno);
 909                 return -EINVAL;
 910         }
 911
 912         if (t == SRC_OP) {
 913                 /* check whether register used as source operand can be read */
 914                 if (regs[regno].type == NOT_INIT) {
 915                         verbose(env, "R%d !read_ok\n", regno);
 916                         return -EACCES;
 917                 }
 918                 return mark_reg_read(env, vstate, vstate->parent, regno);
 919         } else {
 920                 /* check whether register used as dest operand can be written to */
 921                 if (regno == BPF_REG_FP) {
 922                         verbose(env, "frame pointer is read only\n");
 923                         return -EACCES;
 924                 }
 925                 regs[regno].live |= REG_LIVE_WRITTEN;
 926                 if (t == DST_OP)
 927                         mark_reg_unknown(env, regs, regno);
 928         }
 929         return 0;
 930 }
 931
 932 static bool is_spillable_regtype(enum bpf_reg_type type)
 933 {
 934         switch (type) {
 935         case PTR_TO_MAP_VALUE:
 936         case PTR_TO_MAP_VALUE_OR_NULL:
 937         case PTR_TO_STACK:
 938         case PTR_TO_CTX:
 939         case PTR_TO_PACKET:
 940         case PTR_TO_PACKET_META:
 941         case PTR_TO_PACKET_END:
 942         case CONST_PTR_TO_MAP:
 943                 return true;
 944         default:
 945                 return false;
 946         }
 947 }
 948
 949 /* Does this register contain a constant zero? */
 950 static bool register_is_null(struct bpf_reg_state *reg)
 951 {
 952         return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
 953 }
 954
 955 /* check_stack_read/write functions track spill/fill of registers,
 956  * stack boundary and alignment are checked in check_mem_access()
 957  */
 958 static int check_stack_write(struct bpf_verifier_env *env,
 959                              struct bpf_func_state *state, /* func where register points to */
 960                              int off, int size, int value_regno)
 961 {
 962         struct bpf_func_state *cur; /* state of the current function */
 963         int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
 964         enum bpf_reg_type type;
 965
 966         err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
 967                                  true);
 968         if (err)
 969                 return err;
 970         /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
 971          * so it's aligned access and [off, off + size) are within stack limits
 972          */
 973         if (!env->allow_ptr_leaks &&
 974             state->stack[spi].slot_type[0] == STACK_SPILL &&
 975             size != BPF_REG_SIZE) {
 976                 verbose(env, "attempt to corrupt spilled pointer on stack\n");
 977                 return -EACCES;
 978         }
 979
 980         cur = env->cur_state->frame[env->cur_state->curframe];
 981         if (value_regno >= 0 &&
 982             is_spillable_regtype((type = cur->regs[value_regno].type))) {
 983
 984                 /* register containing pointer is being spilled into stack */
 985                 if (size != BPF_REG_SIZE) {
 986                         verbose(env, "invalid size of register spill\n");
 987                         return -EACCES;
 988                 }
 989
 990                 if (state != cur && type == PTR_TO_STACK) {
 991                         verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
 992                         return -EINVAL;
 993                 }
 994
 995                 /* save register state */
 996                 state->stack[spi].spilled_ptr = cur->regs[value_regno];
 997                 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
 998
 999                 for (i = 0; i < BPF_REG_SIZE; i++)
1000                         state->stack[spi].slot_type[i] = STACK_SPILL;
1001         } else {
1002                 u8 type = STACK_MISC;
1003
1004                 /* regular write of data into stack */
1005                 state->stack[spi].spilled_ptr = (struct bpf_reg_state) {};
1006
1007                 /* only mark the slot as written if all 8 bytes were written
1008                  * otherwise read propagation may incorrectly stop too soon
1009                  * when stack slots are partially written.
1010                  * This heuristic means that read propagation will be
1011                  * conservative, since it will add reg_live_read marks
1012                  * to stack slots all the way to first state when programs
1013                  * writes+reads less than 8 bytes
1014                  */
1015                 if (size == BPF_REG_SIZE)
1016                         state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1017
1018                 /* when we zero initialize stack slots mark them as such */
1019                 if (value_regno >= 0 &&
1020                     register_is_null(&cur->regs[value_regno]))
1021                         type = STACK_ZERO;
1022
1023                 for (i = 0; i < size; i++)
1024                         state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
1025                                 type;
1026         }
1027         return 0;
1028 }
1029
1030 /* registers of every function are unique and mark_reg_read() propagates
1031  * the liveness in the following cases:
1032  * - from callee into caller for R1 - R5 that were used as arguments
1033  * - from caller into callee for R0 that used as result of the call
1034  * - from caller to the same caller skipping states of the callee for R6 - R9,
1035  *   since R6 - R9 are callee saved by implicit function prologue and
1036  *   caller's R6 != callee's R6, so when we propagate liveness up to
1037  *   parent states we need to skip callee states for R6 - R9.
1038  *
1039  * stack slot marking is different, since stacks of caller and callee are
1040  * accessible in both (since caller can pass a pointer to caller's stack to
1041  * callee which can pass it to another function), hence mark_stack_slot_read()
1042  * has to propagate the stack liveness to all parent states at given frame number.
1043  * Consider code:
1044  * f1() {
1045  *   ptr = fp - 8;
1046  *   *ptr = ctx;
1047  *   call f2 {
1048  *      .. = *ptr;
1049  *   }
1050  *   .. = *ptr;
1051  * }
1052  * First *ptr is reading from f1's stack and mark_stack_slot_read() has
1053  * to mark liveness at the f1's frame and not f2's frame.
1054  * Second *ptr is also reading from f1's stack and mark_stack_slot_read() has
1055  * to propagate liveness to f2 states at f1's frame level and further into
1056  * f1 states at f1's frame level until write into that stack slot
1057  */
1058 static void mark_stack_slot_read(struct bpf_verifier_env *env,
1059                                  const struct bpf_verifier_state *state,
1060                                  struct bpf_verifier_state *parent,
1061                                  int slot, int frameno)
1062 {
1063         bool writes = parent == state->parent; /* Observe write marks */
1064
1065         while (parent) {
1066                 if (parent->frame[frameno]->allocated_stack <= slot * BPF_REG_SIZE)
1067                         /* since LIVE_WRITTEN mark is only done for full 8-byte
1068                          * write the read marks are conservative and parent
1069                          * state may not even have the stack allocated. In such case
1070                          * end the propagation, since the loop reached beginning
1071                          * of the function
1072                          */
1073                         break;
1074                 /* if read wasn't screened by an earlier write ... */
1075                 if (writes && state->frame[frameno]->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN)
1076                         break;
1077                 /* ... then we depend on parent's value */
1078                 parent->frame[frameno]->stack[slot].spilled_ptr.live |= REG_LIVE_READ;
1079                 state = parent;
1080                 parent = state->parent;
1081                 writes = true;
1082         }
1083 }
1084
1085 static int check_stack_read(struct bpf_verifier_env *env,
1086                             struct bpf_func_state *reg_state /* func where register points to */,
1087                             int off, int size, int value_regno)
1088 {
1089         struct bpf_verifier_state *vstate = env->cur_state;
1090         struct bpf_func_state *state = vstate->frame[vstate->curframe];
1091         int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
1092         u8 *stype;
1093
1094         if (reg_state->allocated_stack <= slot) {
1095                 verbose(env, "invalid read from stack off %d+0 size %d\n",
1096                         off, size);
1097                 return -EACCES;
1098         }
1099         stype = reg_state->stack[spi].slot_type;
1100
1101         if (stype[0] == STACK_SPILL) {
1102                 if (size != BPF_REG_SIZE) {
1103                         verbose(env, "invalid size of register spill\n");
1104                         return -EACCES;
1105                 }
1106                 for (i = 1; i < BPF_REG_SIZE; i++) {
1107                         if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
1108                                 verbose(env, "corrupted spill memory\n");
1109                                 return -EACCES;
1110                         }
1111                 }
1112
1113                 if (value_regno >= 0) {
1114                         /* restore register state from stack */
1115                         state->regs[value_regno] = reg_state->stack[spi].spilled_ptr;
1116                         /* mark reg as written since spilled pointer state likely
1117                          * has its liveness marks cleared by is_state_visited()
1118                          * which resets stack/reg liveness for state transitions
1119                          */
1120                         state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1121                 }
1122                 mark_stack_slot_read(env, vstate, vstate->parent, spi,
1123                                      reg_state->frameno);
1124                 return 0;
1125         } else {
1126                 int zeros = 0;
1127
1128                 for (i = 0; i < size; i++) {
1129                         if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
1130                                 continue;
1131                         if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
1132                                 zeros++;
1133                                 continue;
1134                         }
1135                         verbose(env, "invalid read from stack off %d+%d size %d\n",
1136                                 off, i, size);
1137                         return -EACCES;
1138                 }
1139                 mark_stack_slot_read(env, vstate, vstate->parent, spi,
1140                                      reg_state->frameno);
1141                 if (value_regno >= 0) {
1142                         if (zeros == size) {
1143                                 /* any size read into register is zero extended,
1144                                  * so the whole register == const_zero
1145                                  */
1146                                 __mark_reg_const_zero(&state->regs[value_regno]);
1147                         } else {
1148                                 /* have read misc data from the stack */
1149                                 mark_reg_unknown(env, state->regs, value_regno);
1150                         }
1151                         state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1152                 }
1153                 return 0;
1154         }
1155 }
1156
1157 /* check read/write into map element returned by bpf_map_lookup_elem() */
1158 static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
1159                               int size, bool zero_size_allowed)
1160 {
1161         struct bpf_reg_state *regs = cur_regs(env);
1162         struct bpf_map *map = regs[regno].map_ptr;
1163
1164         if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1165             off + size > map->value_size) {
1166                 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
1167                         map->value_size, off, size);
1168                 return -EACCES;
1169         }
1170         return 0;
1171 }
1172
1173 /* check read/write into a map element with possible variable offset */
1174 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
1175                             int off, int size, bool zero_size_allowed)
1176 {
1177         struct bpf_verifier_state *vstate = env->cur_state;
1178         struct bpf_func_state *state = vstate->frame[vstate->curframe];
1179         struct bpf_reg_state *reg = &state->regs[regno];
1180         int err;
1181
1182         /* We may have adjusted the register to this map value, so we
1183          * need to try adding each of min_value and max_value to off
1184          * to make sure our theoretical access will be safe.
1185          */
1186         if (env->log.level)
1187                 print_verifier_state(env, state);
1188         /* The minimum value is only important with signed
1189          * comparisons where we can't assume the floor of a
1190          * value is 0.  If we are using signed variables for our
1191          * index'es we need to make sure that whatever we use
1192          * will have a set floor within our range.
1193          */
1194         if (reg->smin_value < 0) {
1195                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1196                         regno);
1197                 return -EACCES;
1198         }
1199         err = __check_map_access(env, regno, reg->smin_value + off, size,
1200                                  zero_size_allowed);
1201         if (err) {
1202                 verbose(env, "R%d min value is outside of the array range\n",
1203                         regno);
1204                 return err;
1205         }
1206
1207         /* If we haven't set a max value then we need to bail since we can't be
1208          * sure we won't do bad things.
1209          * If reg->umax_value + off could overflow, treat that as unbounded too.
1210          */
1211         if (reg->umax_value >= BPF_MAX_VAR_OFF) {
1212                 verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
1213                         regno);
1214                 return -EACCES;
1215         }
1216         err = __check_map_access(env, regno, reg->umax_value + off, size,
1217                                  zero_size_allowed);
1218         if (err)
1219                 verbose(env, "R%d max value is outside of the array range\n",
1220                         regno);
1221         return err;
1222 }
1223
1224 #define MAX_PACKET_OFF 0xffff
1225
1226 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
1227                                        const struct bpf_call_arg_meta *meta,
1228                                        enum bpf_access_type t)
1229 {
1230         switch (env->prog->type) {
1231         case BPF_PROG_TYPE_LWT_IN:
1232         case BPF_PROG_TYPE_LWT_OUT:
1233                 /* dst_input() and dst_output() can't write for now */
1234                 if (t == BPF_WRITE)
1235                         return false;
1236                 /* fallthrough */
1237         case BPF_PROG_TYPE_SCHED_CLS:
1238         case BPF_PROG_TYPE_SCHED_ACT:
1239         case BPF_PROG_TYPE_XDP:
1240         case BPF_PROG_TYPE_LWT_XMIT:
1241         case BPF_PROG_TYPE_SK_SKB:
1242                 if (meta)
1243                         return meta->pkt_access;
1244
1245                 env->seen_direct_write = true;
1246                 return true;
1247         default:
1248                 return false;
1249         }
1250 }
1251
1252 static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
1253                                  int off, int size, bool zero_size_allowed)
1254 {
1255         struct bpf_reg_state *regs = cur_regs(env);
1256         struct bpf_reg_state *reg = &regs[regno];
1257
1258         if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1259             (u64)off + size > reg->range) {
1260                 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
1261                         off, size, regno, reg->id, reg->off, reg->range);
1262                 return -EACCES;
1263         }
1264         return 0;
1265 }
1266
1267 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
1268                                int size, bool zero_size_allowed)
1269 {
1270         struct bpf_reg_state *regs = cur_regs(env);
1271         struct bpf_reg_state *reg = &regs[regno];
1272         int err;
1273
1274         /* We may have added a variable offset to the packet pointer; but any
1275          * reg->range we have comes after that.  We are only checking the fixed
1276          * offset.
1277          */
1278
1279         /* We don't allow negative numbers, because we aren't tracking enough
1280          * detail to prove they're safe.
1281          */
1282         if (reg->smin_value < 0) {
1283                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1284                         regno);
1285                 return -EACCES;
1286         }
1287         err = __check_packet_access(env, regno, off, size, zero_size_allowed);
1288         if (err) {
1289                 verbose(env, "R%d offset is outside of the packet\n", regno);
1290                 return err;
1291         }
1292         return err;
1293 }
1294
1295 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
1296 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
1297                             enum bpf_access_type t, enum bpf_reg_type *reg_type)
1298 {
1299         struct bpf_insn_access_aux info = {
1300                 .reg_type = *reg_type,
1301         };
1302
1303         if (env->ops->is_valid_access &&
1304             env->ops->is_valid_access(off, size, t, &info)) {
1305                 /* A non zero info.ctx_field_size indicates that this field is a
1306                  * candidate for later verifier transformation to load the whole
1307                  * field and then apply a mask when accessed with a narrower
1308                  * access than actual ctx access size. A zero info.ctx_field_size
1309                  * will only allow for whole field access and rejects any other
1310                  * type of narrower access.
1311                  */
1312                 *reg_type = info.reg_type;
1313
1314                 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
1315                 /* remember the offset of last byte accessed in ctx */
1316                 if (env->prog->aux->max_ctx_offset < off + size)
1317                         env->prog->aux->max_ctx_offset = off + size;
1318                 return 0;
1319         }
1320
1321         verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
1322         return -EACCES;
1323 }
1324
1325 static bool __is_pointer_value(bool allow_ptr_leaks,
1326                                const struct bpf_reg_state *reg)
1327 {
1328         if (allow_ptr_leaks)
1329                 return false;
1330
1331         return reg->type != SCALAR_VALUE;
1332 }
1333
1334 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
1335 {
1336         return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno);
1337 }
1338
1339 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
1340                                    const struct bpf_reg_state *reg,
1341                                    int off, int size, bool strict)
1342 {
1343         struct tnum reg_off;
1344         int ip_align;
1345
1346         /* Byte size accesses are always allowed. */
1347         if (!strict || size == 1)
1348                 return 0;
1349
1350         /* For platforms that do not have a Kconfig enabling
1351          * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
1352          * NET_IP_ALIGN is universally set to '2'.  And on platforms
1353          * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
1354          * to this code only in strict mode where we want to emulate
1355          * the NET_IP_ALIGN==2 checking.  Therefore use an
1356          * unconditional IP align value of '2'.
1357          */
1358         ip_align = 2;
1359
1360         reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
1361         if (!tnum_is_aligned(reg_off, size)) {
1362                 char tn_buf[48];
1363
1364                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1365                 verbose(env,
1366                         "misaligned packet access off %d+%s+%d+%d size %d\n",
1367                         ip_align, tn_buf, reg->off, off, size);
1368                 return -EACCES;
1369         }
1370
1371         return 0;
1372 }
1373
1374 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
1375                                        const struct bpf_reg_state *reg,
1376                                        const char *pointer_desc,
1377                                        int off, int size, bool strict)
1378 {
1379         struct tnum reg_off;
1380
1381         /* Byte size accesses are always allowed. */
1382         if (!strict || size == 1)
1383                 return 0;
1384
1385         reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
1386         if (!tnum_is_aligned(reg_off, size)) {
1387                 char tn_buf[48];
1388
1389                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1390                 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
1391                         pointer_desc, tn_buf, reg->off, off, size);
1392                 return -EACCES;
1393         }
1394
1395         return 0;
1396 }
1397
1398 static int check_ptr_alignment(struct bpf_verifier_env *env,
1399                                const struct bpf_reg_state *reg,
1400                                int off, int size)
1401 {
1402         bool strict = env->strict_alignment;
1403         const char *pointer_desc = "";
1404
1405         switch (reg->type) {
1406         case PTR_TO_PACKET:
1407         case PTR_TO_PACKET_META:
1408                 /* Special case, because of NET_IP_ALIGN. Given metadata sits
1409                  * right in front, treat it the very same way.
1410                  */
1411                 return check_pkt_ptr_alignment(env, reg, off, size, strict);
1412         case PTR_TO_MAP_VALUE:
1413                 pointer_desc = "value ";
1414                 break;
1415         case PTR_TO_CTX:
1416                 pointer_desc = "context ";
1417                 break;
1418         case PTR_TO_STACK:
1419                 pointer_desc = "stack ";
1420                 break;
1421         default:
1422                 break;
1423         }
1424         return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
1425                                            strict);
1426 }
1427
1428 static int update_stack_depth(struct bpf_verifier_env *env,
1429                               const struct bpf_func_state *func,
1430                               int off)
1431 {
1432         u16 stack = env->subprog_stack_depth[func->subprogno], total = 0;
1433         struct bpf_verifier_state *cur = env->cur_state;
1434         int i;
1435
1436         if (stack >= -off)
1437                 return 0;
1438
1439         /* update known max for given subprogram */
1440         env->subprog_stack_depth[func->subprogno] = -off;
1441
1442         /* compute the total for current call chain */
1443         for (i = 0; i <= cur->curframe; i++) {
1444                 u32 depth = env->subprog_stack_depth[cur->frame[i]->subprogno];
1445
1446                 /* round up to 32-bytes, since this is granularity
1447                  * of interpreter stack sizes
1448                  */
1449                 depth = round_up(depth, 32);
1450                 total += depth;
1451         }
1452
1453         if (total > MAX_BPF_STACK) {
1454                 verbose(env, "combined stack size of %d calls is %d. Too large\n",
1455                         cur->curframe, total);
1456                 return -EACCES;
1457         }
1458         return 0;
1459 }
1460
1461 static int get_callee_stack_depth(struct bpf_verifier_env *env,
1462                                   const struct bpf_insn *insn, int idx)
1463 {
1464         int start = idx + insn->imm + 1, subprog;
1465
1466         subprog = find_subprog(env, start);
1467         if (subprog < 0) {
1468                 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1469                           start);
1470                 return -EFAULT;
1471         }
1472         subprog++;
1473         return env->subprog_stack_depth[subprog];
1474 }
1475
1476 /* check whether memory at (regno + off) is accessible for t = (read | write)
1477  * if t==write, value_regno is a register which value is stored into memory
1478  * if t==read, value_regno is a register which will receive the value from memory
1479  * if t==write && value_regno==-1, some unknown value is stored into memory
1480  * if t==read && value_regno==-1, don't care what we read from memory
1481  */
1482 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off,
1483                             int bpf_size, enum bpf_access_type t,
1484                             int value_regno)
1485 {
1486         struct bpf_reg_state *regs = cur_regs(env);
1487         struct bpf_reg_state *reg = regs + regno;
1488         struct bpf_func_state *state;
1489         int size, err = 0;
1490
1491         size = bpf_size_to_bytes(bpf_size);
1492         if (size < 0)
1493                 return size;
1494
1495         /* alignment checks will add in reg->off themselves */
1496         err = check_ptr_alignment(env, reg, off, size);
1497         if (err)
1498                 return err;
1499
1500         /* for access checks, reg->off is just part of off */
1501         off += reg->off;
1502
1503         if (reg->type == PTR_TO_MAP_VALUE) {
1504                 if (t == BPF_WRITE && value_regno >= 0 &&
1505                     is_pointer_value(env, value_regno)) {
1506                         verbose(env, "R%d leaks addr into map\n", value_regno);
1507                         return -EACCES;
1508                 }
1509
1510                 err = check_map_access(env, regno, off, size, false);
1511                 if (!err && t == BPF_READ && value_regno >= 0)
1512                         mark_reg_unknown(env, regs, value_regno);
1513
1514         } else if (reg->type == PTR_TO_CTX) {
1515                 enum bpf_reg_type reg_type = SCALAR_VALUE;
1516
1517                 if (t == BPF_WRITE && value_regno >= 0 &&
1518                     is_pointer_value(env, value_regno)) {
1519                         verbose(env, "R%d leaks addr into ctx\n", value_regno);
1520                         return -EACCES;
1521                 }
1522                 /* ctx accesses must be at a fixed offset, so that we can
1523                  * determine what type of data were returned.
1524                  */
1525                 if (reg->off) {
1526                         verbose(env,
1527                                 "dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n",
1528                                 regno, reg->off, off - reg->off);
1529                         return -EACCES;
1530                 }
1531                 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
1532                         char tn_buf[48];
1533
1534                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1535                         verbose(env,
1536                                 "variable ctx access var_off=%s off=%d size=%d",
1537                                 tn_buf, off, size);
1538                         return -EACCES;
1539                 }
1540                 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
1541                 if (!err && t == BPF_READ && value_regno >= 0) {
1542                         /* ctx access returns either a scalar, or a
1543                          * PTR_TO_PACKET[_META,_END]. In the latter
1544                          * case, we know the offset is zero.
1545                          */
1546                         if (reg_type == SCALAR_VALUE)
1547                                 mark_reg_unknown(env, regs, value_regno);
1548                         else
1549                                 mark_reg_known_zero(env, regs,
1550                                                     value_regno);
1551                         regs[value_regno].id = 0;
1552                         regs[value_regno].off = 0;
1553                         regs[value_regno].range = 0;
1554                         regs[value_regno].type = reg_type;
1555                 }
1556
1557         } else if (reg->type == PTR_TO_STACK) {
1558                 /* stack accesses must be at a fixed offset, so that we can
1559                  * determine what type of data were returned.
1560                  * See check_stack_read().
1561                  */
1562                 if (!tnum_is_const(reg->var_off)) {
1563                         char tn_buf[48];
1564
1565                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1566                         verbose(env, "variable stack access var_off=%s off=%d size=%d",
1567                                 tn_buf, off, size);
1568                         return -EACCES;
1569                 }
1570                 off += reg->var_off.value;
1571                 if (off >= 0 || off < -MAX_BPF_STACK) {
1572                         verbose(env, "invalid stack off=%d size=%d\n", off,
1573                                 size);
1574                         return -EACCES;
1575                 }
1576
1577                 state = func(env, reg);
1578                 err = update_stack_depth(env, state, off);
1579                 if (err)
1580                         return err;
1581
1582                 if (t == BPF_WRITE)
1583                         err = check_stack_write(env, state, off, size,
1584                                                 value_regno);
1585                 else
1586                         err = check_stack_read(env, state, off, size,
1587                                                value_regno);
1588         } else if (reg_is_pkt_pointer(reg)) {
1589                 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
1590                         verbose(env, "cannot write into packet\n");
1591                         return -EACCES;
1592                 }
1593                 if (t == BPF_WRITE && value_regno >= 0 &&
1594                     is_pointer_value(env, value_regno)) {
1595                         verbose(env, "R%d leaks addr into packet\n",
1596                                 value_regno);
1597                         return -EACCES;
1598                 }
1599                 err = check_packet_access(env, regno, off, size, false);
1600                 if (!err && t == BPF_READ && value_regno >= 0)
1601                         mark_reg_unknown(env, regs, value_regno);
1602         } else {
1603                 verbose(env, "R%d invalid mem access '%s'\n", regno,
1604                         reg_type_str[reg->type]);
1605                 return -EACCES;
1606         }
1607
1608         if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
1609             regs[value_regno].type == SCALAR_VALUE) {
1610                 /* b/h/w load zero-extends, mark upper bits as known 0 */
1611                 regs[value_regno].var_off =
1612                         tnum_cast(regs[value_regno].var_off, size);
1613                 __update_reg_bounds(&regs[value_regno]);
1614         }
1615         return err;
1616 }
1617
1618 static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
1619 {
1620         int err;
1621
1622         if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
1623             insn->imm != 0) {
1624                 verbose(env, "BPF_XADD uses reserved fields\n");
1625                 return -EINVAL;
1626         }
1627
1628         /* check src1 operand */
1629         err = check_reg_arg(env, insn->src_reg, SRC_OP);
1630         if (err)
1631                 return err;
1632
1633         /* check src2 operand */
1634         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
1635         if (err)
1636                 return err;
1637
1638         if (is_pointer_value(env, insn->src_reg)) {
1639                 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
1640                 return -EACCES;
1641         }
1642
1643         /* check whether atomic_add can read the memory */
1644         err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
1645                                BPF_SIZE(insn->code), BPF_READ, -1);
1646         if (err)
1647                 return err;
1648
1649         /* check whether atomic_add can write into the same memory */
1650         return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
1651                                 BPF_SIZE(insn->code), BPF_WRITE, -1);
1652 }
1653
1654 /* when register 'regno' is passed into function that will read 'access_size'
1655  * bytes from that pointer, make sure that it's within stack boundary
1656  * and all elements of stack are initialized.
1657  * Unlike most pointer bounds-checking functions, this one doesn't take an
1658  * 'off' argument, so it has to add in reg->off itself.
1659  */
1660 static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
1661                                 int access_size, bool zero_size_allowed,
1662                                 struct bpf_call_arg_meta *meta)
1663 {
1664         struct bpf_reg_state *reg = cur_regs(env) + regno;
1665         struct bpf_func_state *state = func(env, reg);
1666         int off, i, slot, spi;
1667
1668         if (reg->type != PTR_TO_STACK) {
1669                 /* Allow zero-byte read from NULL, regardless of pointer type */
1670                 if (zero_size_allowed && access_size == 0 &&
1671                     register_is_null(reg))
1672                         return 0;
1673
1674                 verbose(env, "R%d type=%s expected=%s\n", regno,
1675                         reg_type_str[reg->type],
1676                         reg_type_str[PTR_TO_STACK]);
1677                 return -EACCES;
1678         }
1679
1680         /* Only allow fixed-offset stack reads */
1681         if (!tnum_is_const(reg->var_off)) {
1682                 char tn_buf[48];
1683
1684                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1685                 verbose(env, "invalid variable stack read R%d var_off=%s\n",
1686                         regno, tn_buf);
1687         }
1688         off = reg->off + reg->var_off.value;
1689         if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
1690             access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
1691                 verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
1692                         regno, off, access_size);
1693                 return -EACCES;
1694         }
1695
1696         if (meta && meta->raw_mode) {
1697                 meta->access_size = access_size;
1698                 meta->regno = regno;
1699                 return 0;
1700         }
1701
1702         for (i = 0; i < access_size; i++) {
1703                 u8 *stype;
1704
1705                 slot = -(off + i) - 1;
1706                 spi = slot / BPF_REG_SIZE;
1707                 if (state->allocated_stack <= slot)
1708                         goto err;
1709                 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
1710                 if (*stype == STACK_MISC)
1711                         goto mark;
1712                 if (*stype == STACK_ZERO) {
1713                         /* helper can write anything into the stack */
1714                         *stype = STACK_MISC;
1715                         goto mark;
1716                 }
1717 err:
1718                 verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
1719                         off, i, access_size);
1720                 return -EACCES;
1721 mark:
1722                 /* reading any byte out of 8-byte 'spill_slot' will cause
1723                  * the whole slot to be marked as 'read'
1724                  */
1725                 mark_stack_slot_read(env, env->cur_state, env->cur_state->parent,
1726                                      spi, state->frameno);
1727         }
1728         return update_stack_depth(env, state, off);
1729 }
1730
1731 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
1732                                    int access_size, bool zero_size_allowed,
1733                                    struct bpf_call_arg_meta *meta)
1734 {
1735         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
1736
1737         switch (reg->type) {
1738         case PTR_TO_PACKET:
1739         case PTR_TO_PACKET_META:
1740                 return check_packet_access(env, regno, reg->off, access_size,
1741                                            zero_size_allowed);
1742         case PTR_TO_MAP_VALUE:
1743                 return check_map_access(env, regno, reg->off, access_size,
1744                                         zero_size_allowed);
1745         default: /* scalar_value|ptr_to_stack or invalid ptr */
1746                 return check_stack_boundary(env, regno, access_size,
1747                                             zero_size_allowed, meta);
1748         }
1749 }
1750
1751 static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
1752                           enum bpf_arg_type arg_type,
1753                           struct bpf_call_arg_meta *meta)
1754 {
1755         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
1756         enum bpf_reg_type expected_type, type = reg->type;
1757         int err = 0;
1758
1759         if (arg_type == ARG_DONTCARE)
1760                 return 0;
1761
1762         err = check_reg_arg(env, regno, SRC_OP);
1763         if (err)
1764                 return err;
1765
1766         if (arg_type == ARG_ANYTHING) {
1767                 if (is_pointer_value(env, regno)) {
1768                         verbose(env, "R%d leaks addr into helper function\n",
1769                                 regno);
1770                         return -EACCES;
1771                 }
1772                 return 0;
1773         }
1774
1775         if (type_is_pkt_pointer(type) &&
1776             !may_access_direct_pkt_data(env, meta, BPF_READ)) {
1777                 verbose(env, "helper access to the packet is not allowed\n");
1778                 return -EACCES;
1779         }
1780
1781         if (arg_type == ARG_PTR_TO_MAP_KEY ||
1782             arg_type == ARG_PTR_TO_MAP_VALUE) {
1783                 expected_type = PTR_TO_STACK;
1784                 if (!type_is_pkt_pointer(type) &&
1785                     type != expected_type)
1786                         goto err_type;
1787         } else if (arg_type == ARG_CONST_SIZE ||
1788                    arg_type == ARG_CONST_SIZE_OR_ZERO) {
1789                 expected_type = SCALAR_VALUE;
1790                 if (type != expected_type)
1791                         goto err_type;
1792         } else if (arg_type == ARG_CONST_MAP_PTR) {
1793                 expected_type = CONST_PTR_TO_MAP;
1794                 if (type != expected_type)
1795                         goto err_type;
1796         } else if (arg_type == ARG_PTR_TO_CTX) {
1797                 expected_type = PTR_TO_CTX;
1798                 if (type != expected_type)
1799                         goto err_type;
1800         } else if (arg_type == ARG_PTR_TO_MEM ||
1801                    arg_type == ARG_PTR_TO_MEM_OR_NULL ||
1802                    arg_type == ARG_PTR_TO_UNINIT_MEM) {
1803                 expected_type = PTR_TO_STACK;
1804                 /* One exception here. In case function allows for NULL to be
1805                  * passed in as argument, it's a SCALAR_VALUE type. Final test
1806                  * happens during stack boundary checking.
1807                  */
1808                 if (register_is_null(reg) &&
1809                     arg_type == ARG_PTR_TO_MEM_OR_NULL)
1810                         /* final test in check_stack_boundary() */;
1811                 else if (!type_is_pkt_pointer(type) &&
1812                          type != PTR_TO_MAP_VALUE &&
1813                          type != expected_type)
1814                         goto err_type;
1815                 meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
1816         } else {
1817                 verbose(env, "unsupported arg_type %d\n", arg_type);
1818                 return -EFAULT;
1819         }
1820
1821         if (arg_type == ARG_CONST_MAP_PTR) {
1822                 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
1823                 meta->map_ptr = reg->map_ptr;
1824         } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
1825                 /* bpf_map_xxx(..., map_ptr, ..., key) call:
1826                  * check that [key, key + map->key_size) are within
1827                  * stack limits and initialized
1828                  */
1829                 if (!meta->map_ptr) {
1830                         /* in function declaration map_ptr must come before
1831                          * map_key, so that it's verified and known before
1832                          * we have to check map_key here. Otherwise it means
1833                          * that kernel subsystem misconfigured verifier
1834                          */
1835                         verbose(env, "invalid map_ptr to access map->key\n");
1836                         return -EACCES;
1837                 }
1838                 if (type_is_pkt_pointer(type))
1839                         err = check_packet_access(env, regno, reg->off,
1840                                                   meta->map_ptr->key_size,
1841                                                   false);
1842                 else
1843                         err = check_stack_boundary(env, regno,
1844                                                    meta->map_ptr->key_size,
1845                                                    false, NULL);
1846         } else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
1847                 /* bpf_map_xxx(..., map_ptr, ..., value) call:
1848                  * check [value, value + map->value_size) validity
1849                  */
1850                 if (!meta->map_ptr) {
1851                         /* kernel subsystem misconfigured verifier */
1852                         verbose(env, "invalid map_ptr to access map->value\n");
1853                         return -EACCES;
1854                 }
1855                 if (type_is_pkt_pointer(type))
1856                         err = check_packet_access(env, regno, reg->off,
1857                                                   meta->map_ptr->value_size,
1858                                                   false);
1859                 else
1860                         err = check_stack_boundary(env, regno,
1861                                                    meta->map_ptr->value_size,
1862                                                    false, NULL);
1863         } else if (arg_type == ARG_CONST_SIZE ||
1864                    arg_type == ARG_CONST_SIZE_OR_ZERO) {
1865                 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
1866
1867                 /* bpf_xxx(..., buf, len) call will access 'len' bytes
1868                  * from stack pointer 'buf'. Check it
1869                  * note: regno == len, regno - 1 == buf
1870                  */
1871                 if (regno == 0) {
1872                         /* kernel subsystem misconfigured verifier */
1873                         verbose(env,
1874                                 "ARG_CONST_SIZE cannot be first argument\n");
1875                         return -EACCES;
1876                 }
1877
1878                 /* The register is SCALAR_VALUE; the access check
1879                  * happens using its boundaries.
1880                  */
1881
1882                 if (!tnum_is_const(reg->var_off))
1883                         /* For unprivileged variable accesses, disable raw
1884                          * mode so that the program is required to
1885                          * initialize all the memory that the helper could
1886                          * just partially fill up.
1887                          */
1888                         meta = NULL;
1889
1890                 if (reg->smin_value < 0) {
1891                         verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
1892                                 regno);
1893                         return -EACCES;
1894                 }
1895
1896                 if (reg->umin_value == 0) {
1897                         err = check_helper_mem_access(env, regno - 1, 0,
1898                                                       zero_size_allowed,
1899                                                       meta);
1900                         if (err)
1901                                 return err;
1902                 }
1903
1904                 if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
1905                         verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
1906                                 regno);
1907                         return -EACCES;
1908                 }
1909                 err = check_helper_mem_access(env, regno - 1,
1910                                               reg->umax_value,
1911                                               zero_size_allowed, meta);
1912         }
1913
1914         return err;
1915 err_type:
1916         verbose(env, "R%d type=%s expected=%s\n", regno,
1917                 reg_type_str[type], reg_type_str[expected_type]);
1918         return -EACCES;
1919 }
1920
1921 static int check_map_func_compatibility(struct bpf_verifier_env *env,
1922                                         struct bpf_map *map, int func_id)
1923 {
1924         if (!map)
1925                 return 0;
1926
1927         /* We need a two way check, first is from map perspective ... */
1928         switch (map->map_type) {
1929         case BPF_MAP_TYPE_PROG_ARRAY:
1930                 if (func_id != BPF_FUNC_tail_call)
1931                         goto error;
1932                 break;
1933         case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
1934                 if (func_id != BPF_FUNC_perf_event_read &&
1935                     func_id != BPF_FUNC_perf_event_output &&
1936                     func_id != BPF_FUNC_perf_event_read_value)
1937                         goto error;
1938                 break;
1939         case BPF_MAP_TYPE_STACK_TRACE:
1940                 if (func_id != BPF_FUNC_get_stackid)
1941                         goto error;
1942                 break;
1943         case BPF_MAP_TYPE_CGROUP_ARRAY:
1944                 if (func_id != BPF_FUNC_skb_under_cgroup &&
1945                     func_id != BPF_FUNC_current_task_under_cgroup)
1946                         goto error;
1947                 break;
1948         /* devmap returns a pointer to a live net_device ifindex that we cannot
1949          * allow to be modified from bpf side. So do not allow lookup elements
1950          * for now.
1951          */
1952         case BPF_MAP_TYPE_DEVMAP:
1953                 if (func_id != BPF_FUNC_redirect_map)
1954                         goto error;
1955                 break;
1956         /* Restrict bpf side of cpumap, open when use-cases appear */
1957         case BPF_MAP_TYPE_CPUMAP:
1958                 if (func_id != BPF_FUNC_redirect_map)
1959                         goto error;
1960                 break;
1961         case BPF_MAP_TYPE_ARRAY_OF_MAPS:
1962         case BPF_MAP_TYPE_HASH_OF_MAPS:
1963                 if (func_id != BPF_FUNC_map_lookup_elem)
1964                         goto error;
1965                 break;
1966         case BPF_MAP_TYPE_SOCKMAP:
1967                 if (func_id != BPF_FUNC_sk_redirect_map &&
1968                     func_id != BPF_FUNC_sock_map_update &&
1969                     func_id != BPF_FUNC_map_delete_elem)
1970                         goto error;
1971                 break;
1972         default:
1973                 break;
1974         }
1975
1976         /* ... and second from the function itself. */
1977         switch (func_id) {
1978         case BPF_FUNC_tail_call:
1979                 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
1980                         goto error;
1981                 if (env->subprog_cnt) {
1982                         verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
1983                         return -EINVAL;
1984                 }
1985                 break;
1986         case BPF_FUNC_perf_event_read:
1987         case BPF_FUNC_perf_event_output:
1988         case BPF_FUNC_perf_event_read_value:
1989                 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
1990                         goto error;
1991                 break;
1992         case BPF_FUNC_get_stackid:
1993                 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
1994                         goto error;
1995                 break;
1996         case BPF_FUNC_current_task_under_cgroup:
1997         case BPF_FUNC_skb_under_cgroup:
1998                 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
1999                         goto error;
2000                 break;
2001         case BPF_FUNC_redirect_map:
2002                 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
2003                     map->map_type != BPF_MAP_TYPE_CPUMAP)
2004                         goto error;
2005                 break;
2006         case BPF_FUNC_sk_redirect_map:
2007                 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
2008                         goto error;
2009                 break;
2010         case BPF_FUNC_sock_map_update:
2011                 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
2012                         goto error;
2013                 break;
2014         default:
2015                 break;
2016         }
2017
2018         return 0;
2019 error:
2020         verbose(env, "cannot pass map_type %d into func %s#%d\n",
2021                 map->map_type, func_id_name(func_id), func_id);
2022         return -EINVAL;
2023 }
2024
2025 static int check_raw_mode(const struct bpf_func_proto *fn)
2026 {
2027         int count = 0;
2028
2029         if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
2030                 count++;
2031         if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
2032                 count++;
2033         if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
2034                 count++;
2035         if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
2036                 count++;
2037         if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
2038                 count++;
2039
2040         return count > 1 ? -EINVAL : 0;
2041 }
2042
2043 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
2044  * are now invalid, so turn them into unknown SCALAR_VALUE.
2045  */
2046 static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
2047                                      struct bpf_func_state *state)
2048 {
2049         struct bpf_reg_state *regs = state->regs, *reg;
2050         int i;
2051
2052         for (i = 0; i < MAX_BPF_REG; i++)
2053                 if (reg_is_pkt_pointer_any(&regs[i]))
2054                         mark_reg_unknown(env, regs, i);
2055
2056         for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
2057                 if (state->stack[i].slot_type[0] != STACK_SPILL)
2058                         continue;
2059                 reg = &state->stack[i].spilled_ptr;
2060                 if (reg_is_pkt_pointer_any(reg))
2061                         __mark_reg_unknown(reg);
2062         }
2063 }
2064
2065 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
2066 {
2067         struct bpf_verifier_state *vstate = env->cur_state;
2068         int i;
2069
2070         for (i = 0; i <= vstate->curframe; i++)
2071                 __clear_all_pkt_pointers(env, vstate->frame[i]);
2072 }
2073
2074 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
2075                            int *insn_idx)
2076 {
2077         struct bpf_verifier_state *state = env->cur_state;
2078         struct bpf_func_state *caller, *callee;
2079         int i, subprog, target_insn;
2080
2081         if (state->curframe >= MAX_CALL_FRAMES) {
2082                 verbose(env, "the call stack of %d frames is too deep\n",
2083                         state->curframe);
2084                 return -E2BIG;
2085         }
2086
2087         target_insn = *insn_idx + insn->imm;
2088         subprog = find_subprog(env, target_insn + 1);
2089         if (subprog < 0) {
2090                 verbose(env, "verifier bug. No program starts at insn %d\n",
2091                         target_insn + 1);
2092                 return -EFAULT;
2093         }
2094
2095         caller = state->frame[state->curframe];
2096         if (state->frame[state->curframe + 1]) {
2097                 verbose(env, "verifier bug. Frame %d already allocated\n",
2098                         state->curframe + 1);
2099                 return -EFAULT;
2100         }
2101
2102         callee = kzalloc(sizeof(*callee), GFP_KERNEL);
2103         if (!callee)
2104                 return -ENOMEM;
2105         state->frame[state->curframe + 1] = callee;
2106
2107         /* callee cannot access r0, r6 - r9 for reading and has to write
2108          * into its own stack before reading from it.
2109          * callee can read/write into caller's stack
2110          */
2111         init_func_state(env, callee,
2112                         /* remember the callsite, it will be used by bpf_exit */
2113                         *insn_idx /* callsite */,
2114                         state->curframe + 1 /* frameno within this callchain */,
2115                         subprog + 1 /* subprog number within this prog */);
2116
2117         /* copy r1 - r5 args that callee can access */
2118         for (i = BPF_REG_1; i <= BPF_REG_5; i++)
2119                 callee->regs[i] = caller->regs[i];
2120
2121         /* after the call regsiters r0 - r5 were scratched */
2122         for (i = 0; i < CALLER_SAVED_REGS; i++) {
2123                 mark_reg_not_init(env, caller->regs, caller_saved[i]);
2124                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
2125         }
2126
2127         /* only increment it after check_reg_arg() finished */
2128         state->curframe++;
2129
2130         /* and go analyze first insn of the callee */
2131         *insn_idx = target_insn;
2132
2133         if (env->log.level) {
2134                 verbose(env, "caller:\n");
2135                 print_verifier_state(env, caller);
2136                 verbose(env, "callee:\n");
2137                 print_verifier_state(env, callee);
2138         }
2139         return 0;
2140 }
2141
2142 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
2143 {
2144         struct bpf_verifier_state *state = env->cur_state;
2145         struct bpf_func_state *caller, *callee;
2146         struct bpf_reg_state *r0;
2147
2148         callee = state->frame[state->curframe];
2149         r0 = &callee->regs[BPF_REG_0];
2150         if (r0->type == PTR_TO_STACK) {
2151                 /* technically it's ok to return caller's stack pointer
2152                  * (or caller's caller's pointer) back to the caller,
2153                  * since these pointers are valid. Only current stack
2154                  * pointer will be invalid as soon as function exits,
2155                  * but let's be conservative
2156                  */
2157                 verbose(env, "cannot return stack pointer to the caller\n");
2158                 return -EINVAL;
2159         }
2160
2161         state->curframe--;
2162         caller = state->frame[state->curframe];
2163         /* return to the caller whatever r0 had in the callee */
2164         caller->regs[BPF_REG_0] = *r0;
2165
2166         *insn_idx = callee->callsite + 1;
2167         if (env->log.level) {
2168                 verbose(env, "returning from callee:\n");
2169                 print_verifier_state(env, callee);
2170                 verbose(env, "to caller at %d:\n", *insn_idx);
2171                 print_verifier_state(env, caller);
2172         }
2173         /* clear everything in the callee */
2174         free_func_state(callee);
2175         state->frame[state->curframe + 1] = NULL;
2176         return 0;
2177 }
2178
2179 static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
2180 {
2181         const struct bpf_func_proto *fn = NULL;
2182         struct bpf_reg_state *regs;
2183         struct bpf_call_arg_meta meta;
2184         bool changes_data;
2185         int i, err;
2186
2187         /* find function prototype */
2188         if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
2189                 verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
2190                         func_id);
2191                 return -EINVAL;
2192         }
2193
2194         if (env->ops->get_func_proto)
2195                 fn = env->ops->get_func_proto(func_id);
2196
2197         if (!fn) {
2198                 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
2199                         func_id);
2200                 return -EINVAL;
2201         }
2202
2203         /* eBPF programs must be GPL compatible to use GPL-ed functions */
2204         if (!env->prog->gpl_compatible && fn->gpl_only) {
2205                 verbose(env, "cannot call GPL only function from proprietary program\n");
2206                 return -EINVAL;
2207         }
2208
2209         changes_data = bpf_helper_changes_pkt_data(fn->func);
2210
2211         memset(&meta, 0, sizeof(meta));
2212         meta.pkt_access = fn->pkt_access;
2213
2214         /* We only support one arg being in raw mode at the moment, which
2215          * is sufficient for the helper functions we have right now.
2216          */
2217         err = check_raw_mode(fn);
2218         if (err) {
2219                 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
2220                         func_id_name(func_id), func_id);
2221                 return err;
2222         }
2223
2224         /* check args */
2225         err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
2226         if (err)
2227                 return err;
2228         err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
2229         if (err)
2230                 return err;
2231         err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
2232         if (err)
2233                 return err;
2234         err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
2235         if (err)
2236                 return err;
2237         err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
2238         if (err)
2239                 return err;
2240
2241         /* Mark slots with STACK_MISC in case of raw mode, stack offset
2242          * is inferred from register state.
2243          */
2244         for (i = 0; i < meta.access_size; i++) {
2245                 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1);
2246                 if (err)
2247                         return err;
2248         }
2249
2250         regs = cur_regs(env);
2251         /* reset caller saved regs */
2252         for (i = 0; i < CALLER_SAVED_REGS; i++) {
2253                 mark_reg_not_init(env, regs, caller_saved[i]);
2254                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
2255         }
2256
2257         /* update return register (already marked as written above) */
2258         if (fn->ret_type == RET_INTEGER) {
2259                 /* sets type to SCALAR_VALUE */
2260                 mark_reg_unknown(env, regs, BPF_REG_0);
2261         } else if (fn->ret_type == RET_VOID) {
2262                 regs[BPF_REG_0].type = NOT_INIT;
2263         } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
2264                 struct bpf_insn_aux_data *insn_aux;
2265
2266                 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
2267                 /* There is no offset yet applied, variable or fixed */
2268                 mark_reg_known_zero(env, regs, BPF_REG_0);
2269                 regs[BPF_REG_0].off = 0;
2270                 /* remember map_ptr, so that check_map_access()
2271                  * can check 'value_size' boundary of memory access
2272                  * to map element returned from bpf_map_lookup_elem()
2273                  */
2274                 if (meta.map_ptr == NULL) {
2275                         verbose(env,
2276                                 "kernel subsystem misconfigured verifier\n");
2277                         return -EINVAL;
2278                 }
2279                 regs[BPF_REG_0].map_ptr = meta.map_ptr;
2280                 regs[BPF_REG_0].id = ++env->id_gen;
2281                 insn_aux = &env->insn_aux_data[insn_idx];
2282                 if (!insn_aux->map_ptr)
2283                         insn_aux->map_ptr = meta.map_ptr;
2284                 else if (insn_aux->map_ptr != meta.map_ptr)
2285                         insn_aux->map_ptr = BPF_MAP_PTR_POISON;
2286         } else {
2287                 verbose(env, "unknown return type %d of func %s#%d\n",
2288                         fn->ret_type, func_id_name(func_id), func_id);
2289                 return -EINVAL;
2290         }
2291
2292         err = check_map_func_compatibility(env, meta.map_ptr, func_id);
2293         if (err)
2294                 return err;
2295
2296         if (changes_data)
2297                 clear_all_pkt_pointers(env);
2298         return 0;
2299 }
2300
2301 static void coerce_reg_to_32(struct bpf_reg_state *reg)
2302 {
2303         /* clear high 32 bits */
2304         reg->var_off = tnum_cast(reg->var_off, 4);
2305         /* Update bounds */
2306         __update_reg_bounds(reg);
2307 }
2308
2309 static bool signed_add_overflows(s64 a, s64 b)
2310 {
2311         /* Do the add in u64, where overflow is well-defined */
2312         s64 res = (s64)((u64)a + (u64)b);
2313
2314         if (b < 0)
2315                 return res > a;
2316         return res < a;
2317 }
2318
2319 static bool signed_sub_overflows(s64 a, s64 b)
2320 {
2321         /* Do the sub in u64, where overflow is well-defined */
2322         s64 res = (s64)((u64)a - (u64)b);
2323
2324         if (b < 0)
2325                 return res < a;
2326         return res > a;
2327 }
2328
2329 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
2330  * Caller should also handle BPF_MOV case separately.
2331  * If we return -EACCES, caller may want to try again treating pointer as a
2332  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
2333  */
2334 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
2335                                    struct bpf_insn *insn,
2336                                    const struct bpf_reg_state *ptr_reg,
2337                                    const struct bpf_reg_state *off_reg)
2338 {
2339         struct bpf_verifier_state *vstate = env->cur_state;
2340         struct bpf_func_state *state = vstate->frame[vstate->curframe];
2341         struct bpf_reg_state *regs = state->regs, *dst_reg;
2342         bool known = tnum_is_const(off_reg->var_off);
2343         s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
2344             smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
2345         u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
2346             umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
2347         u8 opcode = BPF_OP(insn->code);
2348         u32 dst = insn->dst_reg;
2349
2350         dst_reg = &regs[dst];
2351
2352         if (WARN_ON_ONCE(known && (smin_val != smax_val))) {
2353                 print_verifier_state(env, state);
2354                 verbose(env,
2355                         "verifier internal error: known but bad sbounds\n");
2356                 return -EINVAL;
2357         }
2358         if (WARN_ON_ONCE(known && (umin_val != umax_val))) {
2359                 print_verifier_state(env, state);
2360                 verbose(env,
2361                         "verifier internal error: known but bad ubounds\n");
2362                 return -EINVAL;
2363         }
2364
2365         if (BPF_CLASS(insn->code) != BPF_ALU64) {
2366                 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
2367                 if (!env->allow_ptr_leaks)
2368                         verbose(env,
2369                                 "R%d 32-bit pointer arithmetic prohibited\n",
2370                                 dst);
2371                 return -EACCES;
2372         }
2373
2374         if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
2375                 if (!env->allow_ptr_leaks)
2376                         verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
2377                                 dst);
2378                 return -EACCES;
2379         }
2380         if (ptr_reg->type == CONST_PTR_TO_MAP) {
2381                 if (!env->allow_ptr_leaks)
2382                         verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
2383                                 dst);
2384                 return -EACCES;
2385         }
2386         if (ptr_reg->type == PTR_TO_PACKET_END) {
2387                 if (!env->allow_ptr_leaks)
2388                         verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
2389                                 dst);
2390                 return -EACCES;
2391         }
2392
2393         /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
2394          * The id may be overwritten later if we create a new variable offset.
2395          */
2396         dst_reg->type = ptr_reg->type;
2397         dst_reg->id = ptr_reg->id;
2398
2399         switch (opcode) {
2400         case BPF_ADD:
2401                 /* We can take a fixed offset as long as it doesn't overflow
2402                  * the s32 'off' field
2403                  */
2404                 if (known && (ptr_reg->off + smin_val ==
2405                               (s64)(s32)(ptr_reg->off + smin_val))) {
2406                         /* pointer += K.  Accumulate it into fixed offset */
2407                         dst_reg->smin_value = smin_ptr;
2408                         dst_reg->smax_value = smax_ptr;
2409                         dst_reg->umin_value = umin_ptr;
2410                         dst_reg->umax_value = umax_ptr;
2411                         dst_reg->var_off = ptr_reg->var_off;
2412                         dst_reg->off = ptr_reg->off + smin_val;
2413                         dst_reg->range = ptr_reg->range;
2414                         break;
2415                 }
2416                 /* A new variable offset is created.  Note that off_reg->off
2417                  * == 0, since it's a scalar.
2418                  * dst_reg gets the pointer type and since some positive
2419                  * integer value was added to the pointer, give it a new 'id'
2420                  * if it's a PTR_TO_PACKET.
2421                  * this creates a new 'base' pointer, off_reg (variable) gets
2422                  * added into the variable offset, and we copy the fixed offset
2423                  * from ptr_reg.
2424                  */
2425                 if (signed_add_overflows(smin_ptr, smin_val) ||
2426                     signed_add_overflows(smax_ptr, smax_val)) {
2427                         dst_reg->smin_value = S64_MIN;
2428                         dst_reg->smax_value = S64_MAX;
2429                 } else {
2430                         dst_reg->smin_value = smin_ptr + smin_val;
2431                         dst_reg->smax_value = smax_ptr + smax_val;
2432                 }
2433                 if (umin_ptr + umin_val < umin_ptr ||
2434                     umax_ptr + umax_val < umax_ptr) {
2435                         dst_reg->umin_value = 0;
2436                         dst_reg->umax_value = U64_MAX;
2437                 } else {
2438                         dst_reg->umin_value = umin_ptr + umin_val;
2439                         dst_reg->umax_value = umax_ptr + umax_val;
2440                 }
2441                 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
2442                 dst_reg->off = ptr_reg->off;
2443                 if (reg_is_pkt_pointer(ptr_reg)) {
2444                         dst_reg->id = ++env->id_gen;
2445                         /* something was added to pkt_ptr, set range to zero */
2446                         dst_reg->range = 0;
2447                 }
2448                 break;
2449         case BPF_SUB:
2450                 if (dst_reg == off_reg) {
2451                         /* scalar -= pointer.  Creates an unknown scalar */
2452                         if (!env->allow_ptr_leaks)
2453                                 verbose(env, "R%d tried to subtract pointer from scalar\n",
2454                                         dst);
2455                         return -EACCES;
2456                 }
2457                 /* We don't allow subtraction from FP, because (according to
2458                  * test_verifier.c test "invalid fp arithmetic", JITs might not
2459                  * be able to deal with it.
2460                  */
2461                 if (ptr_reg->type == PTR_TO_STACK) {
2462                         if (!env->allow_ptr_leaks)
2463                                 verbose(env, "R%d subtraction from stack pointer prohibited\n",
2464                                         dst);
2465                         return -EACCES;
2466                 }
2467                 if (known && (ptr_reg->off - smin_val ==
2468                               (s64)(s32)(ptr_reg->off - smin_val))) {
2469                         /* pointer -= K.  Subtract it from fixed offset */
2470                         dst_reg->smin_value = smin_ptr;
2471                         dst_reg->smax_value = smax_ptr;
2472                         dst_reg->umin_value = umin_ptr;
2473                         dst_reg->umax_value = umax_ptr;
2474                         dst_reg->var_off = ptr_reg->var_off;
2475                         dst_reg->id = ptr_reg->id;
2476                         dst_reg->off = ptr_reg->off - smin_val;
2477                         dst_reg->range = ptr_reg->range;
2478                         break;
2479                 }
2480                 /* A new variable offset is created.  If the subtrahend is known
2481                  * nonnegative, then any reg->range we had before is still good.
2482                  */
2483                 if (signed_sub_overflows(smin_ptr, smax_val) ||
2484                     signed_sub_overflows(smax_ptr, smin_val)) {
2485                         /* Overflow possible, we know nothing */
2486                         dst_reg->smin_value = S64_MIN;
2487                         dst_reg->smax_value = S64_MAX;
2488                 } else {
2489                         dst_reg->smin_value = smin_ptr - smax_val;
2490                         dst_reg->smax_value = smax_ptr - smin_val;
2491                 }
2492                 if (umin_ptr < umax_val) {
2493                         /* Overflow possible, we know nothing */
2494                         dst_reg->umin_value = 0;
2495                         dst_reg->umax_value = U64_MAX;
2496                 } else {
2497                         /* Cannot overflow (as long as bounds are consistent) */
2498                         dst_reg->umin_value = umin_ptr - umax_val;
2499                         dst_reg->umax_value = umax_ptr - umin_val;
2500                 }
2501                 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
2502                 dst_reg->off = ptr_reg->off;
2503                 if (reg_is_pkt_pointer(ptr_reg)) {
2504                         dst_reg->id = ++env->id_gen;
2505                         /* something was added to pkt_ptr, set range to zero */
2506                         if (smin_val < 0)
2507                                 dst_reg->range = 0;
2508                 }
2509                 break;
2510         case BPF_AND:
2511         case BPF_OR:
2512         case BPF_XOR:
2513                 /* bitwise ops on pointers are troublesome, prohibit for now.
2514                  * (However, in principle we could allow some cases, e.g.
2515                  * ptr &= ~3 which would reduce min_value by 3.)
2516                  */
2517                 if (!env->allow_ptr_leaks)
2518                         verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
2519                                 dst, bpf_alu_string[opcode >> 4]);
2520                 return -EACCES;
2521         default:
2522                 /* other operators (e.g. MUL,LSH) produce non-pointer results */
2523                 if (!env->allow_ptr_leaks)
2524                         verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
2525                                 dst, bpf_alu_string[opcode >> 4]);
2526                 return -EACCES;
2527         }
2528
2529         __update_reg_bounds(dst_reg);
2530         __reg_deduce_bounds(dst_reg);
2531         __reg_bound_offset(dst_reg);
2532         return 0;
2533 }
2534
2535 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2536                                       struct bpf_insn *insn,
2537                                       struct bpf_reg_state *dst_reg,
2538                                       struct bpf_reg_state src_reg)
2539 {
2540         struct bpf_reg_state *regs = cur_regs(env);
2541         u8 opcode = BPF_OP(insn->code);
2542         bool src_known, dst_known;
2543         s64 smin_val, smax_val;
2544         u64 umin_val, umax_val;
2545
2546         if (BPF_CLASS(insn->code) != BPF_ALU64) {
2547                 /* 32-bit ALU ops are (32,32)->64 */
2548                 coerce_reg_to_32(dst_reg);
2549                 coerce_reg_to_32(&src_reg);
2550         }
2551         smin_val = src_reg.smin_value;
2552         smax_val = src_reg.smax_value;
2553         umin_val = src_reg.umin_value;
2554         umax_val = src_reg.umax_value;
2555         src_known = tnum_is_const(src_reg.var_off);
2556         dst_known = tnum_is_const(dst_reg->var_off);
2557
2558         switch (opcode) {
2559         case BPF_ADD:
2560                 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
2561                     signed_add_overflows(dst_reg->smax_value, smax_val)) {
2562                         dst_reg->smin_value = S64_MIN;
2563                         dst_reg->smax_value = S64_MAX;
2564                 } else {
2565                         dst_reg->smin_value += smin_val;
2566                         dst_reg->smax_value += smax_val;
2567                 }
2568                 if (dst_reg->umin_value + umin_val < umin_val ||
2569                     dst_reg->umax_value + umax_val < umax_val) {
2570                         dst_reg->umin_value = 0;
2571                         dst_reg->umax_value = U64_MAX;
2572                 } else {
2573                         dst_reg->umin_value += umin_val;
2574                         dst_reg->umax_value += umax_val;
2575                 }
2576                 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
2577                 break;
2578         case BPF_SUB:
2579                 if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
2580                     signed_sub_overflows(dst_reg->smax_value, smin_val)) {
2581                         /* Overflow possible, we know nothing */
2582                         dst_reg->smin_value = S64_MIN;
2583                         dst_reg->smax_value = S64_MAX;
2584                 } else {
2585                         dst_reg->smin_value -= smax_val;
2586                         dst_reg->smax_value -= smin_val;
2587                 }
2588                 if (dst_reg->umin_value < umax_val) {
2589                         /* Overflow possible, we know nothing */
2590                         dst_reg->umin_value = 0;
2591                         dst_reg->umax_value = U64_MAX;
2592                 } else {
2593                         /* Cannot overflow (as long as bounds are consistent) */
2594                         dst_reg->umin_value -= umax_val;
2595                         dst_reg->umax_value -= umin_val;
2596                 }
2597                 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
2598                 break;
2599         case BPF_MUL:
2600                 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
2601                 if (smin_val < 0 || dst_reg->smin_value < 0) {
2602                         /* Ain't nobody got time to multiply that sign */
2603                         __mark_reg_unbounded(dst_reg);
2604                         __update_reg_bounds(dst_reg);
2605                         break;
2606                 }
2607                 /* Both values are positive, so we can work with unsigned and
2608                  * copy the result to signed (unless it exceeds S64_MAX).
2609                  */
2610                 if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
2611                         /* Potential overflow, we know nothing */
2612                         __mark_reg_unbounded(dst_reg);
2613                         /* (except what we can learn from the var_off) */
2614                         __update_reg_bounds(dst_reg);
2615                         break;
2616                 }
2617                 dst_reg->umin_value *= umin_val;
2618                 dst_reg->umax_value *= umax_val;
2619                 if (dst_reg->umax_value > S64_MAX) {
2620                         /* Overflow possible, we know nothing */
2621                         dst_reg->smin_value = S64_MIN;
2622                         dst_reg->smax_value = S64_MAX;
2623                 } else {
2624                         dst_reg->smin_value = dst_reg->umin_value;
2625                         dst_reg->smax_value = dst_reg->umax_value;
2626                 }
2627                 break;
2628         case BPF_AND:
2629                 if (src_known && dst_known) {
2630                         __mark_reg_known(dst_reg, dst_reg->var_off.value &
2631                                                   src_reg.var_off.value);
2632                         break;
2633                 }
2634                 /* We get our minimum from the var_off, since that's inherently
2635                  * bitwise.  Our maximum is the minimum of the operands' maxima.
2636                  */
2637                 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
2638                 dst_reg->umin_value = dst_reg->var_off.value;
2639                 dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
2640                 if (dst_reg->smin_value < 0 || smin_val < 0) {
2641                         /* Lose signed bounds when ANDing negative numbers,
2642                          * ain't nobody got time for that.
2643                          */
2644                         dst_reg->smin_value = S64_MIN;
2645                         dst_reg->smax_value = S64_MAX;
2646                 } else {
2647                         /* ANDing two positives gives a positive, so safe to
2648                          * cast result into s64.
2649                          */
2650                         dst_reg->smin_value = dst_reg->umin_value;
2651                         dst_reg->smax_value = dst_reg->umax_value;
2652                 }
2653                 /* We may learn something more from the var_off */
2654                 __update_reg_bounds(dst_reg);
2655                 break;
2656         case BPF_OR:
2657                 if (src_known && dst_known) {
2658                         __mark_reg_known(dst_reg, dst_reg->var_off.value |
2659                                                   src_reg.var_off.value);
2660                         break;
2661                 }
2662                 /* We get our maximum from the var_off, and our minimum is the
2663                  * maximum of the operands' minima
2664                  */
2665                 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
2666                 dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
2667                 dst_reg->umax_value = dst_reg->var_off.value |
2668                                       dst_reg->var_off.mask;
2669                 if (dst_reg->smin_value < 0 || smin_val < 0) {
2670                         /* Lose signed bounds when ORing negative numbers,
2671                          * ain't nobody got time for that.
2672                          */
2673                         dst_reg->smin_value = S64_MIN;
2674                         dst_reg->smax_value = S64_MAX;
2675                 } else {
2676                         /* ORing two positives gives a positive, so safe to
2677                          * cast result into s64.
2678                          */
2679                         dst_reg->smin_value = dst_reg->umin_value;
2680                         dst_reg->smax_value = dst_reg->umax_value;
2681                 }
2682                 /* We may learn something more from the var_off */
2683                 __update_reg_bounds(dst_reg);
2684                 break;
2685         case BPF_LSH:
2686                 if (umax_val > 63) {
2687                         /* Shifts greater than 63 are undefined.  This includes
2688                          * shifts by a negative number.
2689                          */
2690                         mark_reg_unknown(env, regs, insn->dst_reg);
2691                         break;
2692                 }
2693                 /* We lose all sign bit information (except what we can pick
2694                  * up from var_off)
2695                  */
2696                 dst_reg->smin_value = S64_MIN;
2697                 dst_reg->smax_value = S64_MAX;
2698                 /* If we might shift our top bit out, then we know nothing */
2699                 if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
2700                         dst_reg->umin_value = 0;
2701                         dst_reg->umax_value = U64_MAX;
2702                 } else {
2703                         dst_reg->umin_value <<= umin_val;
2704                         dst_reg->umax_value <<= umax_val;
2705                 }
2706                 if (src_known)
2707                         dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
2708                 else
2709                         dst_reg->var_off = tnum_lshift(tnum_unknown, umin_val);
2710                 /* We may learn something more from the var_off */
2711                 __update_reg_bounds(dst_reg);
2712                 break;
2713         case BPF_RSH:
2714                 if (umax_val > 63) {
2715                         /* Shifts greater than 63 are undefined.  This includes
2716                          * shifts by a negative number.
2717                          */
2718                         mark_reg_unknown(env, regs, insn->dst_reg);
2719                         break;
2720                 }
2721                 /* BPF_RSH is an unsigned shift, so make the appropriate casts */
2722                 if (dst_reg->smin_value < 0) {
2723                         if (umin_val) {
2724                                 /* Sign bit will be cleared */
2725                                 dst_reg->smin_value = 0;
2726                         } else {
2727                                 /* Lost sign bit information */
2728                                 dst_reg->smin_value = S64_MIN;
2729                                 dst_reg->smax_value = S64_MAX;
2730                         }
2731                 } else {
2732                         dst_reg->smin_value =
2733                                 (u64)(dst_reg->smin_value) >> umax_val;
2734                 }
2735                 if (src_known)
2736                         dst_reg->var_off = tnum_rshift(dst_reg->var_off,
2737                                                        umin_val);
2738                 else
2739                         dst_reg->var_off = tnum_rshift(tnum_unknown, umin_val);
2740                 dst_reg->umin_value >>= umax_val;
2741                 dst_reg->umax_value >>= umin_val;
2742                 /* We may learn something more from the var_off */
2743                 __update_reg_bounds(dst_reg);
2744                 break;
2745         default:
2746                 mark_reg_unknown(env, regs, insn->dst_reg);
2747                 break;
2748         }
2749
2750         __reg_deduce_bounds(dst_reg);
2751         __reg_bound_offset(dst_reg);
2752         return 0;
2753 }
2754
2755 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
2756  * and var_off.
2757  */
2758 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2759                                    struct bpf_insn *insn)
2760 {
2761         struct bpf_verifier_state *vstate = env->cur_state;
2762         struct bpf_func_state *state = vstate->frame[vstate->curframe];
2763         struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
2764         struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
2765         u8 opcode = BPF_OP(insn->code);
2766         int rc;
2767
2768         dst_reg = &regs[insn->dst_reg];
2769         src_reg = NULL;
2770         if (dst_reg->type != SCALAR_VALUE)
2771                 ptr_reg = dst_reg;
2772         if (BPF_SRC(insn->code) == BPF_X) {
2773                 src_reg = &regs[insn->src_reg];
2774                 if (src_reg->type != SCALAR_VALUE) {
2775                         if (dst_reg->type != SCALAR_VALUE) {
2776                                 /* Combining two pointers by any ALU op yields
2777                                  * an arbitrary scalar.
2778                                  */
2779                                 if (!env->allow_ptr_leaks) {
2780                                         verbose(env, "R%d pointer %s pointer prohibited\n",
2781                                                 insn->dst_reg,
2782                                                 bpf_alu_string[opcode >> 4]);
2783                                         return -EACCES;
2784                                 }
2785                                 mark_reg_unknown(env, regs, insn->dst_reg);
2786                                 return 0;
2787                         } else {
2788                                 /* scalar += pointer
2789                                  * This is legal, but we have to reverse our
2790                                  * src/dest handling in computing the range
2791                                  */
2792                                 rc = adjust_ptr_min_max_vals(env, insn,
2793                                                              src_reg, dst_reg);
2794                                 if (rc == -EACCES && env->allow_ptr_leaks) {
2795                                         /* scalar += unknown scalar */
2796                                         __mark_reg_unknown(&off_reg);
2797                                         return adjust_scalar_min_max_vals(
2798                                                         env, insn,
2799                                                         dst_reg, off_reg);
2800                                 }
2801                                 return rc;
2802                         }
2803                 } else if (ptr_reg) {
2804                         /* pointer += scalar */
2805                         rc = adjust_ptr_min_max_vals(env, insn,
2806                                                      dst_reg, src_reg);
2807                         if (rc == -EACCES && env->allow_ptr_leaks) {
2808                                 /* unknown scalar += scalar */
2809                                 __mark_reg_unknown(dst_reg);
2810                                 return adjust_scalar_min_max_vals(
2811                                                 env, insn, dst_reg, *src_reg);
2812                         }
2813                         return rc;
2814                 }
2815         } else {
2816                 /* Pretend the src is a reg with a known value, since we only
2817                  * need to be able to read from this state.
2818                  */
2819                 off_reg.type = SCALAR_VALUE;
2820                 __mark_reg_known(&off_reg, insn->imm);
2821                 src_reg = &off_reg;
2822                 if (ptr_reg) { /* pointer += K */
2823                         rc = adjust_ptr_min_max_vals(env, insn,
2824                                                      ptr_reg, src_reg);
2825                         if (rc == -EACCES && env->allow_ptr_leaks) {
2826                                 /* unknown scalar += K */
2827                                 __mark_reg_unknown(dst_reg);
2828                                 return adjust_scalar_min_max_vals(
2829                                                 env, insn, dst_reg, off_reg);
2830                         }
2831                         return rc;
2832                 }
2833         }
2834
2835         /* Got here implies adding two SCALAR_VALUEs */
2836         if (WARN_ON_ONCE(ptr_reg)) {
2837                 print_verifier_state(env, state);
2838                 verbose(env, "verifier internal error: unexpected ptr_reg\n");
2839                 return -EINVAL;
2840         }
2841         if (WARN_ON(!src_reg)) {
2842                 print_verifier_state(env, state);
2843                 verbose(env, "verifier internal error: no src_reg\n");
2844                 return -EINVAL;
2845         }
2846         return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
2847 }
2848
2849 /* check validity of 32-bit and 64-bit arithmetic operations */
2850 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
2851 {
2852         struct bpf_reg_state *regs = cur_regs(env);
2853         u8 opcode = BPF_OP(insn->code);
2854         int err;
2855
2856         if (opcode == BPF_END || opcode == BPF_NEG) {
2857                 if (opcode == BPF_NEG) {
2858                         if (BPF_SRC(insn->code) != 0 ||
2859                             insn->src_reg != BPF_REG_0 ||
2860                             insn->off != 0 || insn->imm != 0) {
2861                                 verbose(env, "BPF_NEG uses reserved fields\n");
2862                                 return -EINVAL;
2863                         }
2864                 } else {
2865                         if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
2866                             (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
2867                             BPF_CLASS(insn->code) == BPF_ALU64) {
2868                                 verbose(env, "BPF_END uses reserved fields\n");
2869                                 return -EINVAL;
2870                         }
2871                 }
2872
2873                 /* check src operand */
2874                 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
2875                 if (err)
2876                         return err;
2877
2878                 if (is_pointer_value(env, insn->dst_reg)) {
2879                         verbose(env, "R%d pointer arithmetic prohibited\n",
2880                                 insn->dst_reg);
2881                         return -EACCES;
2882                 }
2883
2884                 /* check dest operand */
2885                 err = check_reg_arg(env, insn->dst_reg, DST_OP);
2886                 if (err)
2887                         return err;
2888
2889         } else if (opcode == BPF_MOV) {
2890
2891                 if (BPF_SRC(insn->code) == BPF_X) {
2892                         if (insn->imm != 0 || insn->off != 0) {
2893                                 verbose(env, "BPF_MOV uses reserved fields\n");
2894                                 return -EINVAL;
2895                         }
2896
2897                         /* check src operand */
2898                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
2899                         if (err)
2900                                 return err;
2901                 } else {
2902                         if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
2903                                 verbose(env, "BPF_MOV uses reserved fields\n");
2904                                 return -EINVAL;
2905                         }
2906                 }
2907
2908                 /* check dest operand */
2909                 err = check_reg_arg(env, insn->dst_reg, DST_OP);
2910                 if (err)
2911                         return err;
2912
2913                 if (BPF_SRC(insn->code) == BPF_X) {
2914                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
2915                                 /* case: R1 = R2
2916                                  * copy register state to dest reg
2917                                  */
2918                                 regs[insn->dst_reg] = regs[insn->src_reg];
2919                                 regs[insn->dst_reg].live |= REG_LIVE_WRITTEN;
2920                         } else {
2921                                 /* R1 = (u32) R2 */
2922                                 if (is_pointer_value(env, insn->src_reg)) {
2923                                         verbose(env,
2924                                                 "R%d partial copy of pointer\n",
2925                                                 insn->src_reg);
2926                                         return -EACCES;
2927                                 }
2928                                 mark_reg_unknown(env, regs, insn->dst_reg);
2929                                 /* high 32 bits are known zero. */
2930                                 regs[insn->dst_reg].var_off = tnum_cast(
2931                                                 regs[insn->dst_reg].var_off, 4);
2932                                 __update_reg_bounds(&regs[insn->dst_reg]);
2933                         }
2934                 } else {
2935                         /* case: R = imm
2936                          * remember the value we stored into this reg
2937                          */
2938                         regs[insn->dst_reg].type = SCALAR_VALUE;
2939                         __mark_reg_known(regs + insn->dst_reg, insn->imm);
2940                 }
2941
2942         } else if (opcode > BPF_END) {
2943                 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
2944                 return -EINVAL;
2945
2946         } else {        /* all other ALU ops: and, sub, xor, add, ... */
2947
2948                 if (BPF_SRC(insn->code) == BPF_X) {
2949                         if (insn->imm != 0 || insn->off != 0) {
2950                                 verbose(env, "BPF_ALU uses reserved fields\n");
2951                                 return -EINVAL;
2952                         }
2953                         /* check src1 operand */
2954                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
2955                         if (err)
2956                                 return err;
2957                 } else {
2958                         if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
2959                                 verbose(env, "BPF_ALU uses reserved fields\n");
2960                                 return -EINVAL;
2961                         }
2962                 }
2963
2964                 /* check src2 operand */
2965                 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
2966                 if (err)
2967                         return err;
2968
2969                 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
2970                     BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
2971                         verbose(env, "div by zero\n");
2972                         return -EINVAL;
2973                 }
2974
2975                 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
2976                      opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
2977                         int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
2978
2979                         if (insn->imm < 0 || insn->imm >= size) {
2980                                 verbose(env, "invalid shift %d\n", insn->imm);
2981                                 return -EINVAL;
2982                         }
2983                 }
2984
2985                 /* check dest operand */
2986                 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
2987                 if (err)
2988                         return err;
2989
2990                 return adjust_reg_min_max_vals(env, insn);
2991         }
2992
2993         return 0;
2994 }
2995
2996 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
2997                                    struct bpf_reg_state *dst_reg,
2998                                    enum bpf_reg_type type,
2999                                    bool range_right_open)
3000 {
3001         struct bpf_func_state *state = vstate->frame[vstate->curframe];
3002         struct bpf_reg_state *regs = state->regs, *reg;
3003         u16 new_range;
3004         int i, j;
3005
3006         if (dst_reg->off < 0 ||
3007             (dst_reg->off == 0 && range_right_open))
3008                 /* This doesn't give us any range */
3009                 return;
3010
3011         if (dst_reg->umax_value > MAX_PACKET_OFF ||
3012             dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
3013                 /* Risk of overflow.  For instance, ptr + (1<<63) may be less
3014                  * than pkt_end, but that's because it's also less than pkt.
3015                  */
3016                 return;
3017
3018         new_range = dst_reg->off;
3019         if (range_right_open)
3020                 new_range--;
3021
3022         /* Examples for register markings:
3023          *
3024          * pkt_data in dst register:
3025          *
3026          *   r2 = r3;
3027          *   r2 += 8;
3028          *   if (r2 > pkt_end) goto <handle exception>
3029          *   <access okay>
3030          *
3031          *   r2 = r3;
3032          *   r2 += 8;
3033          *   if (r2 < pkt_end) goto <access okay>
3034          *   <handle exception>
3035          *
3036          *   Where:
3037          *     r2 == dst_reg, pkt_end == src_reg
3038          *     r2=pkt(id=n,off=8,r=0)
3039          *     r3=pkt(id=n,off=0,r=0)
3040          *
3041          * pkt_data in src register:
3042          *
3043          *   r2 = r3;
3044          *   r2 += 8;
3045          *   if (pkt_end >= r2) goto <access okay>
3046          *   <handle exception>
3047          *
3048          *   r2 = r3;
3049          *   r2 += 8;
3050          *   if (pkt_end <= r2) goto <handle exception>
3051          *   <access okay>
3052          *
3053          *   Where:
3054          *     pkt_end == dst_reg, r2 == src_reg
3055          *     r2=pkt(id=n,off=8,r=0)
3056          *     r3=pkt(id=n,off=0,r=0)
3057          *
3058          * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
3059          * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
3060          * and [r3, r3 + 8-1) respectively is safe to access depending on
3061          * the check.
3062          */
3063
3064         /* If our ids match, then we must have the same max_value.  And we
3065          * don't care about the other reg's fixed offset, since if it's too big
3066          * the range won't allow anything.
3067          * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
3068          */
3069         for (i = 0; i < MAX_BPF_REG; i++)
3070                 if (regs[i].type == type && regs[i].id == dst_reg->id)
3071                         /* keep the maximum range already checked */
3072                         regs[i].range = max(regs[i].range, new_range);
3073
3074         for (j = 0; j <= vstate->curframe; j++) {
3075                 state = vstate->frame[j];
3076                 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
3077                         if (state->stack[i].slot_type[0] != STACK_SPILL)
3078                                 continue;
3079                         reg = &state->stack[i].spilled_ptr;
3080                         if (reg->type == type && reg->id == dst_reg->id)
3081                                 reg->range = max(reg->range, new_range);
3082                 }
3083         }
3084 }
3085
3086 /* Adjusts the register min/max values in the case that the dst_reg is the
3087  * variable register that we are working on, and src_reg is a constant or we're
3088  * simply doing a BPF_K check.
3089  * In JEQ/JNE cases we also adjust the var_off values.
3090  */
3091 static void reg_set_min_max(struct bpf_reg_state *true_reg,
3092                             struct bpf_reg_state *false_reg, u64 val,
3093                             u8 opcode)
3094 {
3095         /* If the dst_reg is a pointer, we can't learn anything about its
3096          * variable offset from the compare (unless src_reg were a pointer into
3097          * the same object, but we don't bother with that.
3098          * Since false_reg and true_reg have the same type by construction, we
3099          * only need to check one of them for pointerness.
3100          */
3101         if (__is_pointer_value(false, false_reg))
3102                 return;
3103
3104         switch (opcode) {
3105         case BPF_JEQ:
3106                 /* If this is false then we know nothing Jon Snow, but if it is
3107                  * true then we know for sure.
3108                  */
3109                 __mark_reg_known(true_reg, val);
3110                 break;
3111         case BPF_JNE:
3112                 /* If this is true we know nothing Jon Snow, but if it is false
3113                  * we know the value for sure;
3114                  */
3115                 __mark_reg_known(false_reg, val);
3116                 break;
3117         case BPF_JGT:
3118                 false_reg->umax_value = min(false_reg->umax_value, val);
3119                 true_reg->umin_value = max(true_reg->umin_value, val + 1);
3120                 break;
3121         case BPF_JSGT:
3122                 false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
3123                 true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
3124                 break;
3125         case BPF_JLT:
3126                 false_reg->umin_value = max(false_reg->umin_value, val);
3127                 true_reg->umax_value = min(true_reg->umax_value, val - 1);
3128                 break;
3129         case BPF_JSLT:
3130                 false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
3131                 true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
3132                 break;
3133         case BPF_JGE:
3134                 false_reg->umax_value = min(false_reg->umax_value, val - 1);
3135                 true_reg->umin_value = max(true_reg->umin_value, val);
3136                 break;
3137         case BPF_JSGE:
3138                 false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
3139                 true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
3140                 break;
3141         case BPF_JLE:
3142                 false_reg->umin_value = max(false_reg->umin_value, val + 1);
3143                 true_reg->umax_value = min(true_reg->umax_value, val);
3144                 break;
3145         case BPF_JSLE:
3146                 false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
3147                 true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
3148                 break;
3149         default:
3150                 break;
3151         }
3152
3153         __reg_deduce_bounds(false_reg);
3154         __reg_deduce_bounds(true_reg);
3155         /* We might have learned some bits from the bounds. */
3156         __reg_bound_offset(false_reg);
3157         __reg_bound_offset(true_reg);
3158         /* Intersecting with the old var_off might have improved our bounds
3159          * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
3160          * then new var_off is (0; 0x7f...fc) which improves our umax.
3161          */
3162         __update_reg_bounds(false_reg);
3163         __update_reg_bounds(true_reg);
3164 }
3165
3166 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
3167  * the variable reg.
3168  */
3169 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
3170                                 struct bpf_reg_state *false_reg, u64 val,
3171                                 u8 opcode)
3172 {
3173         if (__is_pointer_value(false, false_reg))
3174                 return;
3175
3176         switch (opcode) {
3177         case BPF_JEQ:
3178                 /* If this is false then we know nothing Jon Snow, but if it is
3179                  * true then we know for sure.
3180                  */
3181                 __mark_reg_known(true_reg, val);
3182                 break;
3183         case BPF_JNE:
3184                 /* If this is true we know nothing Jon Snow, but if it is false
3185                  * we know the value for sure;
3186                  */
3187                 __mark_reg_known(false_reg, val);
3188                 break;
3189         case BPF_JGT:
3190                 true_reg->umax_value = min(true_reg->umax_value, val - 1);
3191                 false_reg->umin_value = max(false_reg->umin_value, val);
3192                 break;
3193         case BPF_JSGT:
3194                 true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
3195                 false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
3196                 break;
3197         case BPF_JLT:
3198                 true_reg->umin_value = max(true_reg->umin_value, val + 1);
3199                 false_reg->umax_value = min(false_reg->umax_value, val);
3200                 break;
3201         case BPF_JSLT:
3202                 true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
3203                 false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
3204                 break;
3205         case BPF_JGE:
3206                 true_reg->umax_value = min(true_reg->umax_value, val);
3207                 false_reg->umin_value = max(false_reg->umin_value, val + 1);
3208                 break;
3209         case BPF_JSGE:
3210                 true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
3211                 false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
3212                 break;
3213         case BPF_JLE:
3214                 true_reg->umin_value = max(true_reg->umin_value, val);
3215                 false_reg->umax_value = min(false_reg->umax_value, val - 1);
3216                 break;
3217         case BPF_JSLE:
3218                 true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
3219                 false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
3220                 break;
3221         default:
3222                 break;
3223         }
3224
3225         __reg_deduce_bounds(false_reg);
3226         __reg_deduce_bounds(true_reg);
3227         /* We might have learned some bits from the bounds. */
3228         __reg_bound_offset(false_reg);
3229         __reg_bound_offset(true_reg);
3230         /* Intersecting with the old var_off might have improved our bounds
3231          * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
3232          * then new var_off is (0; 0x7f...fc) which improves our umax.
3233          */
3234         __update_reg_bounds(false_reg);
3235         __update_reg_bounds(true_reg);
3236 }
3237
3238 /* Regs are known to be equal, so intersect their min/max/var_off */
3239 static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
3240                                   struct bpf_reg_state *dst_reg)
3241 {
3242         src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
3243                                                         dst_reg->umin_value);
3244         src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
3245                                                         dst_reg->umax_value);
3246         src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
3247                                                         dst_reg->smin_value);
3248         src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
3249                                                         dst_reg->smax_value);
3250         src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
3251                                                              dst_reg->var_off);
3252         /* We might have learned new bounds from the var_off. */
3253         __update_reg_bounds(src_reg);
3254         __update_reg_bounds(dst_reg);
3255         /* We might have learned something about the sign bit. */
3256         __reg_deduce_bounds(src_reg);
3257         __reg_deduce_bounds(dst_reg);
3258         /* We might have learned some bits from the bounds. */
3259         __reg_bound_offset(src_reg);
3260         __reg_bound_offset(dst_reg);
3261         /* Intersecting with the old var_off might have improved our bounds
3262          * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
3263          * then new var_off is (0; 0x7f...fc) which improves our umax.
3264          */
3265         __update_reg_bounds(src_reg);
3266         __update_reg_bounds(dst_reg);
3267 }
3268
3269 static void reg_combine_min_max(struct bpf_reg_state *true_src,
3270                                 struct bpf_reg_state *true_dst,
3271                                 struct bpf_reg_state *false_src,
3272                                 struct bpf_reg_state *false_dst,
3273                                 u8 opcode)
3274 {
3275         switch (opcode) {
3276         case BPF_JEQ:
3277                 __reg_combine_min_max(true_src, true_dst);
3278                 break;
3279         case BPF_JNE:
3280                 __reg_combine_min_max(false_src, false_dst);
3281                 break;
3282         }
3283 }
3284
3285 static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
3286                          bool is_null)
3287 {
3288         struct bpf_reg_state *reg = &regs[regno];
3289
3290         if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) {
3291                 /* Old offset (both fixed and variable parts) should
3292                  * have been known-zero, because we don't allow pointer
3293                  * arithmetic on pointers that might be NULL.
3294                  */
3295                 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
3296                                  !tnum_equals_const(reg->var_off, 0) ||
3297                                  reg->off)) {
3298                         __mark_reg_known_zero(reg);
3299                         reg->off = 0;
3300                 }
3301                 if (is_null) {
3302                         reg->type = SCALAR_VALUE;
3303                 } else if (reg->map_ptr->inner_map_meta) {
3304                         reg->type = CONST_PTR_TO_MAP;
3305                         reg->map_ptr = reg->map_ptr->inner_map_meta;
3306                 } else {
3307                         reg->type = PTR_TO_MAP_VALUE;
3308                 }
3309                 /* We don't need id from this point onwards anymore, thus we
3310                  * should better reset it, so that state pruning has chances
3311                  * to take effect.
3312                  */
3313                 reg->id = 0;
3314         }
3315 }
3316
3317 /* The logic is similar to find_good_pkt_pointers(), both could eventually
3318  * be folded together at some point.
3319  */
3320 static void mark_map_regs(struct bpf_verifier_state *vstate, u32 regno,
3321                           bool is_null)
3322 {
3323         struct bpf_func_state *state = vstate->frame[vstate->curframe];
3324         struct bpf_reg_state *regs = state->regs;
3325         u32 id = regs[regno].id;
3326         int i, j;
3327
3328         for (i = 0; i < MAX_BPF_REG; i++)
3329                 mark_map_reg(regs, i, id, is_null);
3330
3331         for (j = 0; j <= vstate->curframe; j++) {
3332                 state = vstate->frame[j];
3333                 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
3334                         if (state->stack[i].slot_type[0] != STACK_SPILL)
3335                                 continue;
3336                         mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null);
3337                 }
3338         }
3339 }
3340
3341 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
3342                                    struct bpf_reg_state *dst_reg,
3343                                    struct bpf_reg_state *src_reg,
3344                                    struct bpf_verifier_state *this_branch,
3345                                    struct bpf_verifier_state *other_branch)
3346 {
3347         if (BPF_SRC(insn->code) != BPF_X)
3348                 return false;
3349
3350         switch (BPF_OP(insn->code)) {
3351         case BPF_JGT:
3352                 if ((dst_reg->type == PTR_TO_PACKET &&
3353                      src_reg->type == PTR_TO_PACKET_END) ||
3354                     (dst_reg->type == PTR_TO_PACKET_META &&
3355                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
3356                         /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
3357                         find_good_pkt_pointers(this_branch, dst_reg,
3358                                                dst_reg->type, false);
3359                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
3360                             src_reg->type == PTR_TO_PACKET) ||
3361                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
3362                             src_reg->type == PTR_TO_PACKET_META)) {
3363                         /* pkt_end > pkt_data', pkt_data > pkt_meta' */
3364                         find_good_pkt_pointers(other_branch, src_reg,
3365                                                src_reg->type, true);
3366                 } else {
3367                         return false;
3368                 }
3369                 break;
3370         case BPF_JLT:
3371                 if ((dst_reg->type == PTR_TO_PACKET &&
3372                      src_reg->type == PTR_TO_PACKET_END) ||
3373                     (dst_reg->type == PTR_TO_PACKET_META &&
3374                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
3375                         /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
3376                         find_good_pkt_pointers(other_branch, dst_reg,
3377                                                dst_reg->type, true);
3378                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
3379                             src_reg->type == PTR_TO_PACKET) ||
3380                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
3381                             src_reg->type == PTR_TO_PACKET_META)) {
3382                         /* pkt_end < pkt_data', pkt_data > pkt_meta' */
3383                         find_good_pkt_pointers(this_branch, src_reg,
3384                                                src_reg->type, false);
3385                 } else {
3386                         return false;
3387                 }
3388                 break;
3389         case BPF_JGE:
3390                 if ((dst_reg->type == PTR_TO_PACKET &&
3391                      src_reg->type == PTR_TO_PACKET_END) ||
3392                     (dst_reg->type == PTR_TO_PACKET_META &&
3393                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
3394                         /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
3395                         find_good_pkt_pointers(this_branch, dst_reg,
3396                                                dst_reg->type, true);
3397                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
3398                             src_reg->type == PTR_TO_PACKET) ||
3399                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
3400                             src_reg->type == PTR_TO_PACKET_META)) {
3401                         /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
3402                         find_good_pkt_pointers(other_branch, src_reg,
3403                                                src_reg->type, false);
3404                 } else {
3405                         return false;
3406                 }
3407                 break;
3408         case BPF_JLE:
3409                 if ((dst_reg->type == PTR_TO_PACKET &&
3410                      src_reg->type == PTR_TO_PACKET_END) ||
3411                     (dst_reg->type == PTR_TO_PACKET_META &&
3412                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
3413                         /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
3414                         find_good_pkt_pointers(other_branch, dst_reg,
3415                                                dst_reg->type, false);
3416                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
3417                             src_reg->type == PTR_TO_PACKET) ||
3418                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
3419                             src_reg->type == PTR_TO_PACKET_META)) {
3420                         /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
3421                         find_good_pkt_pointers(this_branch, src_reg,
3422                                                src_reg->type, true);
3423                 } else {
3424                         return false;
3425                 }
3426                 break;
3427         default:
3428                 return false;
3429         }
3430
3431         return true;
3432 }
3433
3434 static int check_cond_jmp_op(struct bpf_verifier_env *env,
3435                              struct bpf_insn *insn, int *insn_idx)
3436 {
3437         struct bpf_verifier_state *this_branch = env->cur_state;
3438         struct bpf_verifier_state *other_branch;
3439         struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
3440         struct bpf_reg_state *dst_reg, *other_branch_regs;
3441         u8 opcode = BPF_OP(insn->code);
3442         int err;
3443
3444         if (opcode > BPF_JSLE) {
3445                 verbose(env, "invalid BPF_JMP opcode %x\n", opcode);
3446                 return -EINVAL;
3447         }
3448
3449         if (BPF_SRC(insn->code) == BPF_X) {
3450                 if (insn->imm != 0) {
3451                         verbose(env, "BPF_JMP uses reserved fields\n");
3452                         return -EINVAL;
3453                 }
3454
3455                 /* check src1 operand */
3456                 err = check_reg_arg(env, insn->src_reg, SRC_OP);
3457                 if (err)
3458                         return err;
3459
3460                 if (is_pointer_value(env, insn->src_reg)) {
3461                         verbose(env, "R%d pointer comparison prohibited\n",
3462                                 insn->src_reg);
3463                         return -EACCES;
3464                 }
3465         } else {
3466                 if (insn->src_reg != BPF_REG_0) {
3467                         verbose(env, "BPF_JMP uses reserved fields\n");
3468                         return -EINVAL;
3469                 }
3470         }
3471
3472         /* check src2 operand */
3473         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
3474         if (err)
3475                 return err;
3476
3477         dst_reg = &regs[insn->dst_reg];
3478
3479         /* detect if R == 0 where R was initialized to zero earlier */
3480         if (BPF_SRC(insn->code) == BPF_K &&
3481             (opcode == BPF_JEQ || opcode == BPF_JNE) &&
3482             dst_reg->type == SCALAR_VALUE &&
3483             tnum_is_const(dst_reg->var_off)) {
3484                 if ((opcode == BPF_JEQ && dst_reg->var_off.value == insn->imm) ||
3485                     (opcode == BPF_JNE && dst_reg->var_off.value != insn->imm)) {
3486                         /* if (imm == imm) goto pc+off;
3487                          * only follow the goto, ignore fall-through
3488                          */
3489                         *insn_idx += insn->off;
3490                         return 0;
3491                 } else {
3492                         /* if (imm != imm) goto pc+off;
3493                          * only follow fall-through branch, since
3494                          * that's where the program will go
3495                          */
3496                         return 0;
3497                 }
3498         }
3499
3500         other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
3501         if (!other_branch)
3502                 return -EFAULT;
3503         other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
3504
3505         /* detect if we are comparing against a constant value so we can adjust
3506          * our min/max values for our dst register.
3507          * this is only legit if both are scalars (or pointers to the same
3508          * object, I suppose, but we don't support that right now), because
3509          * otherwise the different base pointers mean the offsets aren't
3510          * comparable.
3511          */
3512         if (BPF_SRC(insn->code) == BPF_X) {
3513                 if (dst_reg->type == SCALAR_VALUE &&
3514                     regs[insn->src_reg].type == SCALAR_VALUE) {
3515                         if (tnum_is_const(regs[insn->src_reg].var_off))
3516                                 reg_set_min_max(&other_branch_regs[insn->dst_reg],
3517                                                 dst_reg, regs[insn->src_reg].var_off.value,
3518                                                 opcode);
3519                         else if (tnum_is_const(dst_reg->var_off))
3520                                 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
3521                                                     &regs[insn->src_reg],
3522                                                     dst_reg->var_off.value, opcode);
3523                         else if (opcode == BPF_JEQ || opcode == BPF_JNE)
3524                                 /* Comparing for equality, we can combine knowledge */
3525                                 reg_combine_min_max(&other_branch_regs[insn->src_reg],
3526                                                     &other_branch_regs[insn->dst_reg],
3527                                                     &regs[insn->src_reg],
3528                                                     &regs[insn->dst_reg], opcode);
3529                 }
3530         } else if (dst_reg->type == SCALAR_VALUE) {
3531                 reg_set_min_max(&other_branch_regs[insn->dst_reg],
3532                                         dst_reg, insn->imm, opcode);
3533         }
3534
3535         /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
3536         if (BPF_SRC(insn->code) == BPF_K &&
3537             insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
3538             dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
3539                 /* Mark all identical map registers in each branch as either
3540                  * safe or unknown depending R == 0 or R != 0 conditional.
3541                  */
3542                 mark_map_regs(this_branch, insn->dst_reg, opcode == BPF_JNE);
3543                 mark_map_regs(other_branch, insn->dst_reg, opcode == BPF_JEQ);
3544         } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
3545                                            this_branch, other_branch) &&
3546                    is_pointer_value(env, insn->dst_reg)) {
3547                 verbose(env, "R%d pointer comparison prohibited\n",
3548                         insn->dst_reg);
3549                 return -EACCES;
3550         }
3551         if (env->log.level)
3552                 print_verifier_state(env, this_branch->frame[this_branch->curframe]);
3553         return 0;
3554 }
3555
3556 /* return the map pointer stored inside BPF_LD_IMM64 instruction */
3557 static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
3558 {
3559         u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
3560
3561         return (struct bpf_map *) (unsigned long) imm64;
3562 }
3563
3564 /* verify BPF_LD_IMM64 instruction */
3565 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
3566 {
3567         struct bpf_reg_state *regs = cur_regs(env);
3568         int err;
3569
3570         if (BPF_SIZE(insn->code) != BPF_DW) {
3571                 verbose(env, "invalid BPF_LD_IMM insn\n");
3572                 return -EINVAL;
3573         }
3574         if (insn->off != 0) {
3575                 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
3576                 return -EINVAL;
3577         }
3578
3579         err = check_reg_arg(env, insn->dst_reg, DST_OP);
3580         if (err)
3581                 return err;
3582
3583         if (insn->src_reg == 0) {
3584                 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
3585
3586                 regs[insn->dst_reg].type = SCALAR_VALUE;
3587                 __mark_reg_known(&regs[insn->dst_reg], imm);
3588                 return 0;
3589         }
3590
3591         /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
3592         BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
3593
3594         regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
3595         regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
3596         return 0;
3597 }
3598
3599 static bool may_access_skb(enum bpf_prog_type type)
3600 {
3601         switch (type) {
3602         case BPF_PROG_TYPE_SOCKET_FILTER:
3603         case BPF_PROG_TYPE_SCHED_CLS:
3604         case BPF_PROG_TYPE_SCHED_ACT:
3605                 return true;
3606         default:
3607                 return false;
3608         }
3609 }
3610
3611 /* verify safety of LD_ABS|LD_IND instructions:
3612  * - they can only appear in the programs where ctx == skb
3613  * - since they are wrappers of function calls, they scratch R1-R5 registers,
3614  *   preserve R6-R9, and store return value into R0
3615  *
3616  * Implicit input:
3617  *   ctx == skb == R6 == CTX
3618  *
3619  * Explicit input:
3620  *   SRC == any register
3621  *   IMM == 32-bit immediate
3622  *
3623  * Output:
3624  *   R0 - 8/16/32-bit skb data converted to cpu endianness
3625  */
3626 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
3627 {
3628         struct bpf_reg_state *regs = cur_regs(env);
3629         u8 mode = BPF_MODE(insn->code);
3630         int i, err;
3631
3632         if (!may_access_skb(env->prog->type)) {
3633                 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
3634                 return -EINVAL;
3635         }
3636
3637         if (env->subprog_cnt) {
3638                 /* when program has LD_ABS insn JITs and interpreter assume
3639                  * that r1 == ctx == skb which is not the case for callees
3640                  * that can have arbitrary arguments. It's problematic
3641                  * for main prog as well since JITs would need to analyze
3642                  * all functions in order to make proper register save/restore
3643                  * decisions in the main prog. Hence disallow LD_ABS with calls
3644                  */
3645                 verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
3646                 return -EINVAL;
3647         }
3648
3649         if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
3650             BPF_SIZE(insn->code) == BPF_DW ||
3651             (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
3652                 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
3653                 return -EINVAL;
3654         }
3655
3656         /* check whether implicit source operand (register R6) is readable */
3657         err = check_reg_arg(env, BPF_REG_6, SRC_OP);
3658         if (err)
3659                 return err;
3660
3661         if (regs[BPF_REG_6].type != PTR_TO_CTX) {
3662                 verbose(env,
3663                         "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
3664                 return -EINVAL;
3665         }
3666
3667         if (mode == BPF_IND) {
3668                 /* check explicit source operand */
3669                 err = check_reg_arg(env, insn->src_reg, SRC_OP);
3670                 if (err)
3671                         return err;
3672         }
3673
3674         /* reset caller saved regs to unreadable */
3675         for (i = 0; i < CALLER_SAVED_REGS; i++) {
3676                 mark_reg_not_init(env, regs, caller_saved[i]);
3677                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
3678         }
3679
3680         /* mark destination R0 register as readable, since it contains
3681          * the value fetched from the packet.
3682          * Already marked as written above.
3683          */
3684         mark_reg_unknown(env, regs, BPF_REG_0);
3685         return 0;
3686 }
3687
3688 static int check_return_code(struct bpf_verifier_env *env)
3689 {
3690         struct bpf_reg_state *reg;
3691         struct tnum range = tnum_range(0, 1);
3692
3693         switch (env->prog->type) {
3694         case BPF_PROG_TYPE_CGROUP_SKB:
3695         case BPF_PROG_TYPE_CGROUP_SOCK:
3696         case BPF_PROG_TYPE_SOCK_OPS:
3697         case BPF_PROG_TYPE_CGROUP_DEVICE:
3698                 break;
3699         default:
3700                 return 0;
3701         }
3702
3703         reg = cur_regs(env) + BPF_REG_0;
3704         if (reg->type != SCALAR_VALUE) {
3705                 verbose(env, "At program exit the register R0 is not a known value (%s)\n",
3706                         reg_type_str[reg->type]);
3707                 return -EINVAL;
3708         }
3709
3710         if (!tnum_in(range, reg->var_off)) {
3711                 verbose(env, "At program exit the register R0 ");
3712                 if (!tnum_is_unknown(reg->var_off)) {
3713                         char tn_buf[48];
3714
3715                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3716                         verbose(env, "has value %s", tn_buf);
3717                 } else {
3718                         verbose(env, "has unknown scalar value");
3719                 }
3720                 verbose(env, " should have been 0 or 1\n");
3721                 return -EINVAL;
3722         }
3723         return 0;
3724 }
3725
3726 /* non-recursive DFS pseudo code
3727  * 1  procedure DFS-iterative(G,v):
3728  * 2      label v as discovered
3729  * 3      let S be a stack
3730  * 4      S.push(v)
3731  * 5      while S is not empty
3732  * 6            t <- S.pop()
3733  * 7            if t is what we're looking for:
3734  * 8                return t
3735  * 9            for all edges e in G.adjacentEdges(t) do
3736  * 10               if edge e is already labelled
3737  * 11                   continue with the next edge
3738  * 12               w <- G.adjacentVertex(t,e)
3739  * 13               if vertex w is not discovered and not explored
3740  * 14                   label e as tree-edge
3741  * 15                   label w as discovered
3742  * 16                   S.push(w)
3743  * 17                   continue at 5
3744  * 18               else if vertex w is discovered
3745  * 19                   label e as back-edge
3746  * 20               else
3747  * 21                   // vertex w is explored
3748  * 22                   label e as forward- or cross-edge
3749  * 23           label t as explored
3750  * 24           S.pop()
3751  *
3752  * convention:
3753  * 0x10 - discovered
3754  * 0x11 - discovered and fall-through edge labelled
3755  * 0x12 - discovered and fall-through and branch edges labelled
3756  * 0x20 - explored
3757  */
3758
3759 enum {
3760         DISCOVERED = 0x10,
3761         EXPLORED = 0x20,
3762         FALLTHROUGH = 1,
3763         BRANCH = 2,
3764 };
3765
3766 #define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
3767
3768 static int *insn_stack; /* stack of insns to process */
3769 static int cur_stack;   /* current stack index */
3770 static int *insn_state;
3771
3772 /* t, w, e - match pseudo-code above:
3773  * t - index of current instruction
3774  * w - next instruction
3775  * e - edge
3776  */
3777 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
3778 {
3779         if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
3780                 return 0;
3781
3782         if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
3783                 return 0;
3784
3785         if (w < 0 || w >= env->prog->len) {
3786                 verbose(env, "jump out of range from insn %d to %d\n", t, w);
3787                 return -EINVAL;
3788         }
3789
3790         if (e == BRANCH)
3791                 /* mark branch target for state pruning */
3792                 env->explored_states[w] = STATE_LIST_MARK;
3793
3794         if (insn_state[w] == 0) {
3795                 /* tree-edge */
3796                 insn_state[t] = DISCOVERED | e;
3797                 insn_state[w] = DISCOVERED;
3798                 if (cur_stack >= env->prog->len)
3799                         return -E2BIG;
3800                 insn_stack[cur_stack++] = w;
3801                 return 1;
3802         } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
3803                 verbose(env, "back-edge from insn %d to %d\n", t, w);
3804                 return -EINVAL;
3805         } else if (insn_state[w] == EXPLORED) {
3806                 /* forward- or cross-edge */
3807                 insn_state[t] = DISCOVERED | e;
3808         } else {
3809                 verbose(env, "insn state internal bug\n");
3810                 return -EFAULT;
3811         }
3812         return 0;
3813 }
3814
3815 /* non-recursive depth-first-search to detect loops in BPF program
3816  * loop == back-edge in directed graph
3817  */
3818 static int check_cfg(struct bpf_verifier_env *env)
3819 {
3820         struct bpf_insn *insns = env->prog->insnsi;
3821         int insn_cnt = env->prog->len;
3822         int ret = 0;
3823         int i, t;
3824
3825         ret = check_subprogs(env);
3826         if (ret < 0)
3827                 return ret;
3828
3829         insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
3830         if (!insn_state)
3831                 return -ENOMEM;
3832
3833         insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
3834         if (!insn_stack) {
3835                 kfree(insn_state);
3836                 return -ENOMEM;
3837         }
3838
3839         insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
3840         insn_stack[0] = 0; /* 0 is the first instruction */
3841         cur_stack = 1;
3842
3843 peek_stack:
3844         if (cur_stack == 0)
3845                 goto check_state;
3846         t = insn_stack[cur_stack - 1];
3847
3848         if (BPF_CLASS(insns[t].code) == BPF_JMP) {
3849                 u8 opcode = BPF_OP(insns[t].code);
3850
3851                 if (opcode == BPF_EXIT) {
3852                         goto mark_explored;
3853                 } else if (opcode == BPF_CALL) {
3854                         ret = push_insn(t, t + 1, FALLTHROUGH, env);
3855                         if (ret == 1)
3856                                 goto peek_stack;
3857                         else if (ret < 0)
3858                                 goto err_free;
3859                         if (t + 1 < insn_cnt)
3860                                 env->explored_states[t + 1] = STATE_LIST_MARK;
3861                         if (insns[t].src_reg == BPF_PSEUDO_CALL) {
3862                                 env->explored_states[t] = STATE_LIST_MARK;
3863                                 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
3864                                 if (ret == 1)
3865                                         goto peek_stack;
3866                                 else if (ret < 0)
3867                                         goto err_free;
3868                         }
3869                 } else if (opcode == BPF_JA) {
3870                         if (BPF_SRC(insns[t].code) != BPF_K) {
3871                                 ret = -EINVAL;
3872                                 goto err_free;
3873                         }
3874                         /* unconditional jump with single edge */
3875                         ret = push_insn(t, t + insns[t].off + 1,
3876                                         FALLTHROUGH, env);
3877                         if (ret == 1)
3878                                 goto peek_stack;
3879                         else if (ret < 0)
3880                                 goto err_free;
3881                         /* tell verifier to check for equivalent states
3882                          * after every call and jump
3883                          */
3884                         if (t + 1 < insn_cnt)
3885                                 env->explored_states[t + 1] = STATE_LIST_MARK;
3886                 } else {
3887                         /* conditional jump with two edges */
3888                         env->explored_states[t] = STATE_LIST_MARK;
3889                         ret = push_insn(t, t + 1, FALLTHROUGH, env);
3890                         if (ret == 1)
3891                                 goto peek_stack;
3892                         else if (ret < 0)
3893                                 goto err_free;
3894
3895                         ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
3896                         if (ret == 1)
3897                                 goto peek_stack;
3898                         else if (ret < 0)
3899                                 goto err_free;
3900                 }
3901         } else {
3902                 /* all other non-branch instructions with single
3903                  * fall-through edge
3904                  */
3905                 ret = push_insn(t, t + 1, FALLTHROUGH, env);
3906                 if (ret == 1)
3907                         goto peek_stack;
3908                 else if (ret < 0)
3909                         goto err_free;
3910         }
3911
3912 mark_explored:
3913         insn_state[t] = EXPLORED;
3914         if (cur_stack-- <= 0) {
3915                 verbose(env, "pop stack internal bug\n");
3916                 ret = -EFAULT;
3917                 goto err_free;
3918         }
3919         goto peek_stack;
3920
3921 check_state:
3922         for (i = 0; i < insn_cnt; i++) {
3923                 if (insn_state[i] != EXPLORED) {
3924                         verbose(env, "unreachable insn %d\n", i);
3925                         ret = -EINVAL;
3926                         goto err_free;
3927                 }
3928         }
3929         ret = 0; /* cfg looks good */
3930
3931 err_free:
3932         kfree(insn_state);
3933         kfree(insn_stack);
3934         return ret;
3935 }
3936
3937 /* check %cur's range satisfies %old's */
3938 static bool range_within(struct bpf_reg_state *old,
3939                          struct bpf_reg_state *cur)
3940 {
3941         return old->umin_value <= cur->umin_value &&
3942                old->umax_value >= cur->umax_value &&
3943                old->smin_value <= cur->smin_value &&
3944                old->smax_value >= cur->smax_value;
3945 }
3946
3947 /* Maximum number of register states that can exist at once */
3948 #define ID_MAP_SIZE     (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
3949 struct idpair {
3950         u32 old;
3951         u32 cur;
3952 };
3953
3954 /* If in the old state two registers had the same id, then they need to have
3955  * the same id in the new state as well.  But that id could be different from
3956  * the old state, so we need to track the mapping from old to new ids.
3957  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
3958  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
3959  * regs with a different old id could still have new id 9, we don't care about
3960  * that.
3961  * So we look through our idmap to see if this old id has been seen before.  If
3962  * so, we require the new id to match; otherwise, we add the id pair to the map.
3963  */
3964 static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
3965 {
3966         unsigned int i;
3967
3968         for (i = 0; i < ID_MAP_SIZE; i++) {
3969                 if (!idmap[i].old) {
3970                         /* Reached an empty slot; haven't seen this id before */
3971                         idmap[i].old = old_id;
3972                         idmap[i].cur = cur_id;
3973                         return true;
3974                 }
3975                 if (idmap[i].old == old_id)
3976                         return idmap[i].cur == cur_id;
3977         }
3978         /* We ran out of idmap slots, which should be impossible */
3979         WARN_ON_ONCE(1);
3980         return false;
3981 }
3982
3983 /* Returns true if (rold safe implies rcur safe) */
3984 static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
3985                     struct idpair *idmap)
3986 {
3987         bool equal;
3988
3989         if (!(rold->live & REG_LIVE_READ))
3990                 /* explored state didn't use this */
3991                 return true;
3992
3993         equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, frameno)) == 0;
3994
3995         if (rold->type == PTR_TO_STACK)
3996                 /* two stack pointers are equal only if they're pointing to
3997                  * the same stack frame, since fp-8 in foo != fp-8 in bar
3998                  */
3999                 return equal && rold->frameno == rcur->frameno;
4000
4001         if (equal)
4002                 return true;
4003
4004         if (rold->type == NOT_INIT)
4005                 /* explored state can't have used this */
4006                 return true;
4007         if (rcur->type == NOT_INIT)
4008                 return false;
4009         switch (rold->type) {
4010         case SCALAR_VALUE:
4011                 if (rcur->type == SCALAR_VALUE) {
4012                         /* new val must satisfy old val knowledge */
4013                         return range_within(rold, rcur) &&
4014                                tnum_in(rold->var_off, rcur->var_off);
4015                 } else {
4016                         /* if we knew anything about the old value, we're not
4017                          * equal, because we can't know anything about the
4018                          * scalar value of the pointer in the new value.
4019                          */
4020                         return rold->umin_value == 0 &&
4021                                rold->umax_value == U64_MAX &&
4022                                rold->smin_value == S64_MIN &&
4023                                rold->smax_value == S64_MAX &&
4024                                tnum_is_unknown(rold->var_off);
4025                 }
4026         case PTR_TO_MAP_VALUE:
4027                 /* If the new min/max/var_off satisfy the old ones and
4028                  * everything else matches, we are OK.
4029                  * We don't care about the 'id' value, because nothing
4030                  * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL)
4031                  */
4032                 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
4033                        range_within(rold, rcur) &&
4034                        tnum_in(rold->var_off, rcur->var_off);
4035         case PTR_TO_MAP_VALUE_OR_NULL:
4036                 /* a PTR_TO_MAP_VALUE could be safe to use as a
4037                  * PTR_TO_MAP_VALUE_OR_NULL into the same map.
4038                  * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
4039                  * checked, doing so could have affected others with the same
4040                  * id, and we can't check for that because we lost the id when
4041                  * we converted to a PTR_TO_MAP_VALUE.
4042                  */
4043                 if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
4044                         return false;
4045                 if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
4046                         return false;
4047                 /* Check our ids match any regs they're supposed to */
4048                 return check_ids(rold->id, rcur->id, idmap);
4049         case PTR_TO_PACKET_META:
4050         case PTR_TO_PACKET:
4051                 if (rcur->type != rold->type)
4052                         return false;
4053                 /* We must have at least as much range as the old ptr
4054                  * did, so that any accesses which were safe before are
4055                  * still safe.  This is true even if old range < old off,
4056                  * since someone could have accessed through (ptr - k), or
4057                  * even done ptr -= k in a register, to get a safe access.
4058                  */
4059                 if (rold->range > rcur->range)
4060                         return false;
4061                 /* If the offsets don't match, we can't trust our alignment;
4062                  * nor can we be sure that we won't fall out of range.
4063                  */
4064                 if (rold->off != rcur->off)
4065                         return false;
4066                 /* id relations must be preserved */
4067                 if (rold->id && !check_ids(rold->id, rcur->id, idmap))
4068                         return false;
4069                 /* new val must satisfy old val knowledge */
4070                 return range_within(rold, rcur) &&
4071                        tnum_in(rold->var_off, rcur->var_off);
4072         case PTR_TO_CTX:
4073         case CONST_PTR_TO_MAP:
4074         case PTR_TO_PACKET_END:
4075                 /* Only valid matches are exact, which memcmp() above
4076                  * would have accepted
4077                  */
4078         default:
4079                 /* Don't know what's going on, just say it's not safe */
4080                 return false;
4081         }
4082
4083         /* Shouldn't get here; if we do, say it's not safe */
4084         WARN_ON_ONCE(1);
4085         return false;
4086 }
4087
4088 static bool stacksafe(struct bpf_func_state *old,
4089                       struct bpf_func_state *cur,
4090                       struct idpair *idmap)
4091 {
4092         int i, spi;
4093
4094         /* if explored stack has more populated slots than current stack
4095          * such stacks are not equivalent
4096          */
4097         if (old->allocated_stack > cur->allocated_stack)
4098                 return false;
4099
4100         /* walk slots of the explored stack and ignore any additional
4101          * slots in the current stack, since explored(safe) state
4102          * didn't use them
4103          */
4104         for (i = 0; i < old->allocated_stack; i++) {
4105                 spi = i / BPF_REG_SIZE;
4106
4107                 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ))
4108                         /* explored state didn't use this */
4109                         return true;
4110
4111                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
4112                         continue;
4113                 /* if old state was safe with misc data in the stack
4114                  * it will be safe with zero-initialized stack.
4115                  * The opposite is not true
4116                  */
4117                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
4118                     cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
4119                         continue;
4120                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
4121                     cur->stack[spi].slot_type[i % BPF_REG_SIZE])
4122                         /* Ex: old explored (safe) state has STACK_SPILL in
4123                          * this stack slot, but current has has STACK_MISC ->
4124                          * this verifier states are not equivalent,
4125                          * return false to continue verification of this path
4126                          */
4127                         return false;
4128                 if (i % BPF_REG_SIZE)
4129                         continue;
4130                 if (old->stack[spi].slot_type[0] != STACK_SPILL)
4131                         continue;
4132                 if (!regsafe(&old->stack[spi].spilled_ptr,
4133                              &cur->stack[spi].spilled_ptr,
4134                              idmap))
4135                         /* when explored and current stack slot are both storing
4136                          * spilled registers, check that stored pointers types
4137                          * are the same as well.
4138                          * Ex: explored safe path could have stored
4139                          * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
4140                          * but current path has stored:
4141                          * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
4142                          * such verifier states are not equivalent.
4143                          * return false to continue verification of this path
4144                          */
4145                         return false;
4146         }
4147         return true;
4148 }
4149
4150 /* compare two verifier states
4151  *
4152  * all states stored in state_list are known to be valid, since
4153  * verifier reached 'bpf_exit' instruction through them
4154  *
4155  * this function is called when verifier exploring different branches of
4156  * execution popped from the state stack. If it sees an old state that has
4157  * more strict register state and more strict stack state then this execution
4158  * branch doesn't need to be explored further, since verifier already
4159  * concluded that more strict state leads to valid finish.
4160  *
4161  * Therefore two states are equivalent if register state is more conservative
4162  * and explored stack state is more conservative than the current one.
4163  * Example:
4164  *       explored                   current
4165  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
4166  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
4167  *
4168  * In other words if current stack state (one being explored) has more
4169  * valid slots than old one that already passed validation, it means
4170  * the verifier can stop exploring and conclude that current state is valid too
4171  *
4172  * Similarly with registers. If explored state has register type as invalid
4173  * whereas register type in current state is meaningful, it means that
4174  * the current state will reach 'bpf_exit' instruction safely
4175  */
4176 static bool func_states_equal(struct bpf_func_state *old,
4177                               struct bpf_func_state *cur)
4178 {
4179         struct idpair *idmap;
4180         bool ret = false;
4181         int i;
4182
4183         idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
4184         /* If we failed to allocate the idmap, just say it's not safe */
4185         if (!idmap)
4186                 return false;
4187
4188         for (i = 0; i < MAX_BPF_REG; i++) {
4189                 if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
4190                         goto out_free;
4191         }
4192
4193         if (!stacksafe(old, cur, idmap))
4194                 goto out_free;
4195         ret = true;
4196 out_free:
4197         kfree(idmap);
4198         return ret;
4199 }
4200
4201 static bool states_equal(struct bpf_verifier_env *env,
4202                          struct bpf_verifier_state *old,
4203                          struct bpf_verifier_state *cur)
4204 {
4205         int i;
4206
4207         if (old->curframe != cur->curframe)
4208                 return false;
4209
4210         /* for states to be equal callsites have to be the same
4211          * and all frame states need to be equivalent
4212          */
4213         for (i = 0; i <= old->curframe; i++) {
4214                 if (old->frame[i]->callsite != cur->frame[i]->callsite)
4215                         return false;
4216                 if (!func_states_equal(old->frame[i], cur->frame[i]))
4217                         return false;
4218         }
4219         return true;
4220 }
4221
4222 /* A write screens off any subsequent reads; but write marks come from the
4223  * straight-line code between a state and its parent.  When we arrive at an
4224  * equivalent state (jump target or such) we didn't arrive by the straight-line
4225  * code, so read marks in the state must propagate to the parent regardless
4226  * of the state's write marks. That's what 'parent == state->parent' comparison
4227  * in mark_reg_read() and mark_stack_slot_read() is for.
4228  */
4229 static int propagate_liveness(struct bpf_verifier_env *env,
4230                               const struct bpf_verifier_state *vstate,
4231                               struct bpf_verifier_state *vparent)
4232 {
4233         int i, frame, err = 0;
4234         struct bpf_func_state *state, *parent;
4235
4236         if (vparent->curframe != vstate->curframe) {
4237                 WARN(1, "propagate_live: parent frame %d current frame %d\n",
4238                      vparent->curframe, vstate->curframe);
4239                 return -EFAULT;
4240         }
4241         /* Propagate read liveness of registers... */
4242         BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
4243         /* We don't need to worry about FP liveness because it's read-only */
4244         for (i = 0; i < BPF_REG_FP; i++) {
4245                 if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ)
4246                         continue;
4247                 if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) {
4248                         err = mark_reg_read(env, vstate, vparent, i);
4249                         if (err)
4250                                 return err;
4251                 }
4252         }
4253
4254         /* ... and stack slots */
4255         for (frame = 0; frame <= vstate->curframe; frame++) {
4256                 state = vstate->frame[frame];
4257                 parent = vparent->frame[frame];
4258                 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
4259                             i < parent->allocated_stack / BPF_REG_SIZE; i++) {
4260                         if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
4261                                 continue;
4262                         if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
4263                                 mark_stack_slot_read(env, vstate, vparent, i, frame);
4264                 }
4265         }
4266         return err;
4267 }
4268
4269 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
4270 {
4271         struct bpf_verifier_state_list *new_sl;
4272         struct bpf_verifier_state_list *sl;
4273         struct bpf_verifier_state *cur = env->cur_state;
4274         int i, j, err;
4275
4276         sl = env->explored_states[insn_idx];
4277         if (!sl)
4278                 /* this 'insn_idx' instruction wasn't marked, so we will not
4279                  * be doing state search here
4280                  */
4281                 return 0;
4282
4283         while (sl != STATE_LIST_MARK) {
4284                 if (states_equal(env, &sl->state, cur)) {
4285                         /* reached equivalent register/stack state,
4286                          * prune the search.
4287                          * Registers read by the continuation are read by us.
4288                          * If we have any write marks in env->cur_state, they
4289                          * will prevent corresponding reads in the continuation
4290                          * from reaching our parent (an explored_state).  Our
4291                          * own state will get the read marks recorded, but
4292                          * they'll be immediately forgotten as we're pruning
4293                          * this state and will pop a new one.
4294                          */
4295                         err = propagate_liveness(env, &sl->state, cur);
4296                         if (err)
4297                                 return err;
4298                         return 1;
4299                 }
4300                 sl = sl->next;
4301         }
4302
4303         /* there were no equivalent states, remember current one.
4304          * technically the current state is not proven to be safe yet,
4305          * but it will either reach outer most bpf_exit (which means it's safe)
4306          * or it will be rejected. Since there are no loops, we won't be
4307          * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
4308          * again on the way to bpf_exit
4309          */
4310         new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
4311         if (!new_sl)
4312                 return -ENOMEM;
4313
4314         /* add new state to the head of linked list */
4315         err = copy_verifier_state(&new_sl->state, cur);
4316         if (err) {
4317                 free_verifier_state(&new_sl->state, false);
4318                 kfree(new_sl);
4319                 return err;
4320         }
4321         new_sl->next = env->explored_states[insn_idx];
4322         env->explored_states[insn_idx] = new_sl;
4323         /* connect new state to parentage chain */
4324         cur->parent = &new_sl->state;
4325         /* clear write marks in current state: the writes we did are not writes
4326          * our child did, so they don't screen off its reads from us.
4327          * (There are no read marks in current state, because reads always mark
4328          * their parent and current state never has children yet.  Only
4329          * explored_states can get read marks.)
4330          */
4331         for (i = 0; i < BPF_REG_FP; i++)
4332                 cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE;
4333
4334         /* all stack frames are accessible from callee, clear them all */
4335         for (j = 0; j <= cur->curframe; j++) {
4336                 struct bpf_func_state *frame = cur->frame[j];
4337
4338                 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++)
4339                         frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
4340         }
4341         return 0;
4342 }
4343
4344 static int ext_analyzer_insn_hook(struct bpf_verifier_env *env,
4345                                   int insn_idx, int prev_insn_idx)
4346 {
4347         if (env->dev_ops && env->dev_ops->insn_hook)
4348                 return env->dev_ops->insn_hook(env, insn_idx, prev_insn_idx);
4349
4350         return 0;
4351 }
4352
4353 static int do_check(struct bpf_verifier_env *env)
4354 {
4355         struct bpf_verifier_state *state;
4356         struct bpf_insn *insns = env->prog->insnsi;
4357         struct bpf_reg_state *regs;
4358         int insn_cnt = env->prog->len, i;
4359         int insn_idx, prev_insn_idx = 0;
4360         int insn_processed = 0;
4361         bool do_print_state = false;
4362
4363         state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
4364         if (!state)
4365                 return -ENOMEM;
4366         state->curframe = 0;
4367         state->parent = NULL;
4368         state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
4369         if (!state->frame[0]) {
4370                 kfree(state);
4371                 return -ENOMEM;
4372         }
4373         env->cur_state = state;
4374         init_func_state(env, state->frame[0],
4375                         BPF_MAIN_FUNC /* callsite */,
4376                         0 /* frameno */,
4377                         0 /* subprogno, zero == main subprog */);
4378         insn_idx = 0;
4379         for (;;) {
4380                 struct bpf_insn *insn;
4381                 u8 class;
4382                 int err;
4383
4384                 if (insn_idx >= insn_cnt) {
4385                         verbose(env, "invalid insn idx %d insn_cnt %d\n",
4386                                 insn_idx, insn_cnt);
4387                         return -EFAULT;
4388                 }
4389
4390                 insn = &insns[insn_idx];
4391                 class = BPF_CLASS(insn->code);
4392
4393                 if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
4394                         verbose(env,
4395                                 "BPF program is too large. Processed %d insn\n",
4396                                 insn_processed);
4397                         return -E2BIG;
4398                 }
4399
4400                 err = is_state_visited(env, insn_idx);
4401                 if (err < 0)
4402                         return err;
4403                 if (err == 1) {
4404                         /* found equivalent state, can prune the search */
4405                         if (env->log.level) {
4406                                 if (do_print_state)
4407                                         verbose(env, "\nfrom %d to %d: safe\n",
4408                                                 prev_insn_idx, insn_idx);
4409                                 else
4410                                         verbose(env, "%d: safe\n", insn_idx);
4411                         }
4412                         goto process_bpf_exit;
4413                 }
4414
4415                 if (need_resched())
4416                         cond_resched();
4417
4418                 if (env->log.level > 1 || (env->log.level && do_print_state)) {
4419                         if (env->log.level > 1)
4420                                 verbose(env, "%d:", insn_idx);
4421                         else
4422                                 verbose(env, "\nfrom %d to %d:",
4423                                         prev_insn_idx, insn_idx);
4424                         print_verifier_state(env, state->frame[state->curframe]);
4425                         do_print_state = false;
4426                 }
4427
4428                 if (env->log.level) {
4429                         verbose(env, "%d: ", insn_idx);
4430                         print_bpf_insn(verbose, env, insn,
4431                                        env->allow_ptr_leaks);
4432                 }
4433
4434                 err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx);
4435                 if (err)
4436                         return err;
4437
4438                 regs = cur_regs(env);
4439                 env->insn_aux_data[insn_idx].seen = true;
4440                 if (class == BPF_ALU || class == BPF_ALU64) {
4441                         err = check_alu_op(env, insn);
4442                         if (err)
4443                                 return err;
4444
4445                 } else if (class == BPF_LDX) {
4446                         enum bpf_reg_type *prev_src_type, src_reg_type;
4447
4448                         /* check for reserved fields is already done */
4449
4450                         /* check src operand */
4451                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
4452                         if (err)
4453                                 return err;
4454
4455                         err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
4456                         if (err)
4457                                 return err;
4458
4459                         src_reg_type = regs[insn->src_reg].type;
4460
4461                         /* check that memory (src_reg + off) is readable,
4462                          * the state of dst_reg will be updated by this func
4463                          */
4464                         err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
4465                                                BPF_SIZE(insn->code), BPF_READ,
4466                                                insn->dst_reg);
4467                         if (err)
4468                                 return err;
4469
4470                         prev_src_type = &env->insn_aux_data[insn_idx].ptr_type;
4471
4472                         if (*prev_src_type == NOT_INIT) {
4473                                 /* saw a valid insn
4474                                  * dst_reg = *(u32 *)(src_reg + off)
4475                                  * save type to validate intersecting paths
4476                                  */
4477                                 *prev_src_type = src_reg_type;
4478
4479                         } else if (src_reg_type != *prev_src_type &&
4480                                    (src_reg_type == PTR_TO_CTX ||
4481                                     *prev_src_type == PTR_TO_CTX)) {
4482                                 /* ABuser program is trying to use the same insn
4483                                  * dst_reg = *(u32*) (src_reg + off)
4484                                  * with different pointer types:
4485                                  * src_reg == ctx in one branch and
4486                                  * src_reg == stack|map in some other branch.
4487                                  * Reject it.
4488                                  */
4489                                 verbose(env, "same insn cannot be used with different pointers\n");
4490                                 return -EINVAL;
4491                         }
4492
4493                 } else if (class == BPF_STX) {
4494                         enum bpf_reg_type *prev_dst_type, dst_reg_type;
4495
4496                         if (BPF_MODE(insn->code) == BPF_XADD) {
4497                                 err = check_xadd(env, insn_idx, insn);
4498                                 if (err)
4499                                         return err;
4500                                 insn_idx++;
4501                                 continue;
4502                         }
4503
4504                         /* check src1 operand */
4505                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
4506                         if (err)
4507                                 return err;
4508                         /* check src2 operand */
4509                         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
4510                         if (err)
4511                                 return err;
4512
4513                         dst_reg_type = regs[insn->dst_reg].type;
4514
4515                         /* check that memory (dst_reg + off) is writeable */
4516                         err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4517                                                BPF_SIZE(insn->code), BPF_WRITE,
4518                                                insn->src_reg);
4519                         if (err)
4520                                 return err;
4521
4522                         prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type;
4523
4524                         if (*prev_dst_type == NOT_INIT) {
4525                                 *prev_dst_type = dst_reg_type;
4526                         } else if (dst_reg_type != *prev_dst_type &&
4527                                    (dst_reg_type == PTR_TO_CTX ||
4528                                     *prev_dst_type == PTR_TO_CTX)) {
4529                                 verbose(env, "same insn cannot be used with different pointers\n");
4530                                 return -EINVAL;
4531                         }
4532
4533                 } else if (class == BPF_ST) {
4534                         if (BPF_MODE(insn->code) != BPF_MEM ||
4535                             insn->src_reg != BPF_REG_0) {
4536                                 verbose(env, "BPF_ST uses reserved fields\n");
4537                                 return -EINVAL;
4538                         }
4539                         /* check src operand */
4540                         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
4541                         if (err)
4542                                 return err;
4543
4544                         /* check that memory (dst_reg + off) is writeable */
4545                         err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4546                                                BPF_SIZE(insn->code), BPF_WRITE,
4547                                                -1);
4548                         if (err)
4549                                 return err;
4550
4551                 } else if (class == BPF_JMP) {
4552                         u8 opcode = BPF_OP(insn->code);
4553
4554                         if (opcode == BPF_CALL) {
4555                                 if (BPF_SRC(insn->code) != BPF_K ||
4556                                     insn->off != 0 ||
4557                                     (insn->src_reg != BPF_REG_0 &&
4558                                      insn->src_reg != BPF_PSEUDO_CALL) ||
4559                                     insn->dst_reg != BPF_REG_0) {
4560                                         verbose(env, "BPF_CALL uses reserved fields\n");
4561                                         return -EINVAL;
4562                                 }
4563
4564                                 if (insn->src_reg == BPF_PSEUDO_CALL)
4565                                         err = check_func_call(env, insn, &insn_idx);
4566                                 else
4567                                         err = check_helper_call(env, insn->imm, insn_idx);
4568                                 if (err)
4569                                         return err;
4570
4571                         } else if (opcode == BPF_JA) {
4572                                 if (BPF_SRC(insn->code) != BPF_K ||
4573                                     insn->imm != 0 ||
4574                                     insn->src_reg != BPF_REG_0 ||
4575                                     insn->dst_reg != BPF_REG_0) {
4576                                         verbose(env, "BPF_JA uses reserved fields\n");
4577                                         return -EINVAL;
4578                                 }
4579
4580                                 insn_idx += insn->off + 1;
4581                                 continue;
4582
4583                         } else if (opcode == BPF_EXIT) {
4584                                 if (BPF_SRC(insn->code) != BPF_K ||
4585                                     insn->imm != 0 ||
4586                                     insn->src_reg != BPF_REG_0 ||
4587                                     insn->dst_reg != BPF_REG_0) {
4588                                         verbose(env, "BPF_EXIT uses reserved fields\n");
4589                                         return -EINVAL;
4590                                 }
4591
4592                                 if (state->curframe) {
4593                                         /* exit from nested function */
4594                                         prev_insn_idx = insn_idx;
4595                                         err = prepare_func_exit(env, &insn_idx);
4596                                         if (err)
4597                                                 return err;
4598                                         do_print_state = true;
4599                                         continue;
4600                                 }
4601
4602                                 /* eBPF calling convetion is such that R0 is used
4603                                  * to return the value from eBPF program.
4604                                  * Make sure that it's readable at this time
4605                                  * of bpf_exit, which means that program wrote
4606                                  * something into it earlier
4607                                  */
4608                                 err = check_reg_arg(env, BPF_REG_0, SRC_OP);
4609                                 if (err)
4610                                         return err;
4611
4612                                 if (is_pointer_value(env, BPF_REG_0)) {
4613                                         verbose(env, "R0 leaks addr as return value\n");
4614                                         return -EACCES;
4615                                 }
4616
4617                                 err = check_return_code(env);
4618                                 if (err)
4619                                         return err;
4620 process_bpf_exit:
4621                                 err = pop_stack(env, &prev_insn_idx, &insn_idx);
4622                                 if (err < 0) {
4623                                         if (err != -ENOENT)
4624                                                 return err;
4625                                         break;
4626                                 } else {
4627                                         do_print_state = true;
4628                                         continue;
4629                                 }
4630                         } else {
4631                                 err = check_cond_jmp_op(env, insn, &insn_idx);
4632                                 if (err)
4633                                         return err;
4634                         }
4635                 } else if (class == BPF_LD) {
4636                         u8 mode = BPF_MODE(insn->code);
4637
4638                         if (mode == BPF_ABS || mode == BPF_IND) {
4639                                 err = check_ld_abs(env, insn);
4640                                 if (err)
4641                                         return err;
4642
4643                         } else if (mode == BPF_IMM) {
4644                                 err = check_ld_imm(env, insn);
4645                                 if (err)
4646                                         return err;
4647
4648                                 insn_idx++;
4649                                 env->insn_aux_data[insn_idx].seen = true;
4650                         } else {
4651                                 verbose(env, "invalid BPF_LD mode\n");
4652                                 return -EINVAL;
4653                         }
4654                 } else {
4655                         verbose(env, "unknown insn class %d\n", class);
4656                         return -EINVAL;
4657                 }
4658
4659                 insn_idx++;
4660         }
4661
4662         verbose(env, "processed %d insns, stack depth ", insn_processed);
4663         for (i = 0; i < env->subprog_cnt + 1; i++) {
4664                 u32 depth = env->subprog_stack_depth[i];
4665
4666                 verbose(env, "%d", depth);
4667                 if (i + 1 < env->subprog_cnt + 1)
4668                         verbose(env, "+");
4669         }
4670         verbose(env, "\n");
4671         env->prog->aux->stack_depth = env->subprog_stack_depth[0];
4672         return 0;
4673 }
4674
4675 static int check_map_prealloc(struct bpf_map *map)
4676 {
4677         return (map->map_type != BPF_MAP_TYPE_HASH &&
4678                 map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
4679                 map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
4680                 !(map->map_flags & BPF_F_NO_PREALLOC);
4681 }
4682
4683 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
4684                                         struct bpf_map *map,
4685                                         struct bpf_prog *prog)
4686
4687 {
4688         /* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use
4689          * preallocated hash maps, since doing memory allocation
4690          * in overflow_handler can crash depending on where nmi got
4691          * triggered.
4692          */
4693         if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
4694                 if (!check_map_prealloc(map)) {
4695                         verbose(env, "perf_event programs can only use preallocated hash map\n");
4696                         return -EINVAL;
4697                 }
4698                 if (map->inner_map_meta &&
4699                     !check_map_prealloc(map->inner_map_meta)) {
4700                         verbose(env, "perf_event programs can only use preallocated inner hash map\n");
4701                         return -EINVAL;
4702                 }
4703         }
4704         return 0;
4705 }
4706
4707 /* look for pseudo eBPF instructions that access map FDs and
4708  * replace them with actual map pointers
4709  */
4710 static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
4711 {
4712         struct bpf_insn *insn = env->prog->insnsi;
4713         int insn_cnt = env->prog->len;
4714         int i, j, err;
4715
4716         err = bpf_prog_calc_tag(env->prog);
4717         if (err)
4718                 return err;
4719
4720         for (i = 0; i < insn_cnt; i++, insn++) {
4721                 if (BPF_CLASS(insn->code) == BPF_LDX &&
4722                     (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
4723                         verbose(env, "BPF_LDX uses reserved fields\n");
4724                         return -EINVAL;
4725                 }
4726
4727                 if (BPF_CLASS(insn->code) == BPF_STX &&
4728                     ((BPF_MODE(insn->code) != BPF_MEM &&
4729                       BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
4730                         verbose(env, "BPF_STX uses reserved fields\n");
4731                         return -EINVAL;
4732                 }
4733
4734                 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
4735                         struct bpf_map *map;
4736                         struct fd f;
4737
4738                         if (i == insn_cnt - 1 || insn[1].code != 0 ||
4739                             insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
4740                             insn[1].off != 0) {
4741                                 verbose(env, "invalid bpf_ld_imm64 insn\n");
4742                                 return -EINVAL;
4743                         }
4744
4745                         if (insn->src_reg == 0)
4746                                 /* valid generic load 64-bit imm */
4747                                 goto next_insn;
4748
4749                         if (insn->src_reg != BPF_PSEUDO_MAP_FD) {
4750                                 verbose(env,
4751                                         "unrecognized bpf_ld_imm64 insn\n");
4752                                 return -EINVAL;
4753                         }
4754
4755                         f = fdget(insn->imm);
4756                         map = __bpf_map_get(f);
4757                         if (IS_ERR(map)) {
4758                                 verbose(env, "fd %d is not pointing to valid bpf_map\n",
4759                                         insn->imm);
4760                                 return PTR_ERR(map);
4761                         }
4762
4763                         err = check_map_prog_compatibility(env, map, env->prog);
4764                         if (err) {
4765                                 fdput(f);
4766                                 return err;
4767                         }
4768
4769                         /* store map pointer inside BPF_LD_IMM64 instruction */
4770                         insn[0].imm = (u32) (unsigned long) map;
4771                         insn[1].imm = ((u64) (unsigned long) map) >> 32;
4772
4773                         /* check whether we recorded this map already */
4774                         for (j = 0; j < env->used_map_cnt; j++)
4775                                 if (env->used_maps[j] == map) {
4776                                         fdput(f);
4777                                         goto next_insn;
4778                                 }
4779
4780                         if (env->used_map_cnt >= MAX_USED_MAPS) {
4781                                 fdput(f);
4782                                 return -E2BIG;
4783                         }
4784
4785                         /* hold the map. If the program is rejected by verifier,
4786                          * the map will be released by release_maps() or it
4787                          * will be used by the valid program until it's unloaded
4788                          * and all maps are released in free_bpf_prog_info()
4789                          */
4790                         map = bpf_map_inc(map, false);
4791                         if (IS_ERR(map)) {
4792                                 fdput(f);
4793                                 return PTR_ERR(map);
4794                         }
4795                         env->used_maps[env->used_map_cnt++] = map;
4796
4797                         fdput(f);
4798 next_insn:
4799                         insn++;
4800                         i++;
4801                 }
4802         }
4803
4804         /* now all pseudo BPF_LD_IMM64 instructions load valid
4805          * 'struct bpf_map *' into a register instead of user map_fd.
4806          * These pointers will be used later by verifier to validate map access.
4807          */
4808         return 0;
4809 }
4810
4811 /* drop refcnt of maps used by the rejected program */
4812 static void release_maps(struct bpf_verifier_env *env)
4813 {
4814         int i;
4815
4816         for (i = 0; i < env->used_map_cnt; i++)
4817                 bpf_map_put(env->used_maps[i]);
4818 }
4819
4820 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
4821 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
4822 {
4823         struct bpf_insn *insn = env->prog->insnsi;
4824         int insn_cnt = env->prog->len;
4825         int i;
4826
4827         for (i = 0; i < insn_cnt; i++, insn++)
4828                 if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
4829                         insn->src_reg = 0;
4830 }
4831
4832 /* single env->prog->insni[off] instruction was replaced with the range
4833  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
4834  * [0, off) and [off, end) to new locations, so the patched range stays zero
4835  */
4836 static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
4837                                 u32 off, u32 cnt)
4838 {
4839         struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
4840         int i;
4841
4842         if (cnt == 1)
4843                 return 0;
4844         new_data = vzalloc(sizeof(struct bpf_insn_aux_data) * prog_len);
4845         if (!new_data)
4846                 return -ENOMEM;
4847         memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
4848         memcpy(new_data + off + cnt - 1, old_data + off,
4849                sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
4850         for (i = off; i < off + cnt - 1; i++)
4851                 new_data[i].seen = true;
4852         env->insn_aux_data = new_data;
4853         vfree(old_data);
4854         return 0;
4855 }
4856
4857 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
4858 {
4859         int i;
4860
4861         if (len == 1)
4862                 return;
4863         for (i = 0; i < env->subprog_cnt; i++) {
4864                 if (env->subprog_starts[i] < off)
4865                         continue;
4866                 env->subprog_starts[i] += len - 1;
4867         }
4868 }
4869
4870 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
4871                                             const struct bpf_insn *patch, u32 len)
4872 {
4873         struct bpf_prog *new_prog;
4874
4875         new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
4876         if (!new_prog)
4877                 return NULL;
4878         if (adjust_insn_aux_data(env, new_prog->len, off, len))
4879                 return NULL;
4880         adjust_subprog_starts(env, off, len);
4881         return new_prog;
4882 }
4883
4884 /* The verifier does more data flow analysis than llvm and will not explore
4885  * branches that are dead at run time. Malicious programs can have dead code
4886  * too. Therefore replace all dead at-run-time code with nops.
4887  */
4888 static void sanitize_dead_code(struct bpf_verifier_env *env)
4889 {
4890         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
4891         struct bpf_insn nop = BPF_MOV64_REG(BPF_REG_0, BPF_REG_0);
4892         struct bpf_insn *insn = env->prog->insnsi;
4893         const int insn_cnt = env->prog->len;
4894         int i;
4895
4896         for (i = 0; i < insn_cnt; i++) {
4897                 if (aux_data[i].seen)
4898                         continue;
4899                 memcpy(insn + i, &nop, sizeof(nop));
4900         }
4901 }
4902
4903 /* convert load instructions that access fields of 'struct __sk_buff'
4904  * into sequence of instructions that access fields of 'struct sk_buff'
4905  */
4906 static int convert_ctx_accesses(struct bpf_verifier_env *env)
4907 {
4908         const struct bpf_verifier_ops *ops = env->ops;
4909         int i, cnt, size, ctx_field_size, delta = 0;
4910         const int insn_cnt = env->prog->len;
4911         struct bpf_insn insn_buf[16], *insn;
4912         struct bpf_prog *new_prog;
4913         enum bpf_access_type type;
4914         bool is_narrower_load;
4915         u32 target_size;
4916
4917         if (ops->gen_prologue) {
4918                 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
4919                                         env->prog);
4920                 if (cnt >= ARRAY_SIZE(insn_buf)) {
4921                         verbose(env, "bpf verifier is misconfigured\n");
4922                         return -EINVAL;
4923                 } else if (cnt) {
4924                         new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
4925                         if (!new_prog)
4926                                 return -ENOMEM;
4927
4928                         env->prog = new_prog;
4929                         delta += cnt - 1;
4930                 }
4931         }
4932
4933         if (!ops->convert_ctx_access)
4934                 return 0;
4935
4936         insn = env->prog->insnsi + delta;
4937
4938         for (i = 0; i < insn_cnt; i++, insn++) {
4939                 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
4940                     insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
4941                     insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
4942                     insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
4943                         type = BPF_READ;
4944                 else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
4945                          insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
4946                          insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
4947                          insn->code == (BPF_STX | BPF_MEM | BPF_DW))
4948                         type = BPF_WRITE;
4949                 else
4950                         continue;
4951
4952                 if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
4953                         continue;
4954
4955                 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
4956                 size = BPF_LDST_BYTES(insn);
4957
4958                 /* If the read access is a narrower load of the field,
4959                  * convert to a 4/8-byte load, to minimum program type specific
4960                  * convert_ctx_access changes. If conversion is successful,
4961                  * we will apply proper mask to the result.
4962                  */
4963                 is_narrower_load = size < ctx_field_size;
4964                 if (is_narrower_load) {
4965                         u32 off = insn->off;
4966                         u8 size_code;
4967
4968                         if (type == BPF_WRITE) {
4969                                 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
4970                                 return -EINVAL;
4971                         }
4972
4973                         size_code = BPF_H;
4974                         if (ctx_field_size == 4)
4975                                 size_code = BPF_W;
4976                         else if (ctx_field_size == 8)
4977                                 size_code = BPF_DW;
4978
4979                         insn->off = off & ~(ctx_field_size - 1);
4980                         insn->code = BPF_LDX | BPF_MEM | size_code;
4981                 }
4982
4983                 target_size = 0;
4984                 cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog,
4985                                               &target_size);
4986                 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
4987                     (ctx_field_size && !target_size)) {
4988                         verbose(env, "bpf verifier is misconfigured\n");
4989                         return -EINVAL;
4990                 }
4991
4992                 if (is_narrower_load && size < target_size) {
4993                         if (ctx_field_size <= 4)
4994                                 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
4995                                                                 (1 << size * 8) - 1);
4996                         else
4997                                 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
4998                                                                 (1 << size * 8) - 1);
4999                 }
5000
5001                 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
5002                 if (!new_prog)
5003                         return -ENOMEM;
5004
5005                 delta += cnt - 1;
5006
5007                 /* keep walking new program and skip insns we just inserted */
5008                 env->prog = new_prog;
5009                 insn      = new_prog->insnsi + i + delta;
5010         }
5011
5012         return 0;
5013 }
5014
5015 static int jit_subprogs(struct bpf_verifier_env *env)
5016 {
5017         struct bpf_prog *prog = env->prog, **func, *tmp;
5018         int i, j, subprog_start, subprog_end = 0, len, subprog;
5019         struct bpf_insn *insn = prog->insnsi;
5020         void *old_bpf_func;
5021         int err = -ENOMEM;
5022
5023         if (env->subprog_cnt == 0)
5024                 return 0;
5025
5026         for (i = 0; i < prog->len; i++, insn++) {
5027                 if (insn->code != (BPF_JMP | BPF_CALL) ||
5028                     insn->src_reg != BPF_PSEUDO_CALL)
5029                         continue;
5030                 subprog = find_subprog(env, i + insn->imm + 1);
5031                 if (subprog < 0) {
5032                         WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
5033                                   i + insn->imm + 1);
5034                         return -EFAULT;
5035                 }
5036                 /* temporarily remember subprog id inside insn instead of
5037                  * aux_data, since next loop will split up all insns into funcs
5038                  */
5039                 insn->off = subprog + 1;
5040                 /* remember original imm in case JIT fails and fallback
5041                  * to interpreter will be needed
5042                  */
5043                 env->insn_aux_data[i].call_imm = insn->imm;
5044                 /* point imm to __bpf_call_base+1 from JITs point of view */
5045                 insn->imm = 1;
5046         }
5047
5048         func = kzalloc(sizeof(prog) * (env->subprog_cnt + 1), GFP_KERNEL);
5049         if (!func)
5050                 return -ENOMEM;
5051
5052         for (i = 0; i <= env->subprog_cnt; i++) {
5053                 subprog_start = subprog_end;
5054                 if (env->subprog_cnt == i)
5055                         subprog_end = prog->len;
5056                 else
5057                         subprog_end = env->subprog_starts[i];
5058
5059                 len = subprog_end - subprog_start;
5060                 func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER);
5061                 if (!func[i])
5062                         goto out_free;
5063                 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
5064                        len * sizeof(struct bpf_insn));
5065                 func[i]->len = len;
5066                 func[i]->is_func = 1;
5067                 /* Use bpf_prog_F_tag to indicate functions in stack traces.
5068                  * Long term would need debug info to populate names
5069                  */
5070                 func[i]->aux->name[0] = 'F';
5071                 func[i]->aux->stack_depth = env->subprog_stack_depth[i];
5072                 func[i]->jit_requested = 1;
5073                 func[i] = bpf_int_jit_compile(func[i]);
5074                 if (!func[i]->jited) {
5075                         err = -ENOTSUPP;
5076                         goto out_free;
5077                 }
5078                 cond_resched();
5079         }
5080         /* at this point all bpf functions were successfully JITed
5081          * now populate all bpf_calls with correct addresses and
5082          * run last pass of JIT
5083          */
5084         for (i = 0; i <= env->subprog_cnt; i++) {
5085                 insn = func[i]->insnsi;
5086                 for (j = 0; j < func[i]->len; j++, insn++) {
5087                         if (insn->code != (BPF_JMP | BPF_CALL) ||
5088                             insn->src_reg != BPF_PSEUDO_CALL)
5089                                 continue;
5090                         subprog = insn->off;
5091                         insn->off = 0;
5092                         insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
5093                                 func[subprog]->bpf_func -
5094                                 __bpf_call_base;
5095                 }
5096         }
5097         for (i = 0; i <= env->subprog_cnt; i++) {
5098                 old_bpf_func = func[i]->bpf_func;
5099                 tmp = bpf_int_jit_compile(func[i]);
5100                 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
5101                         verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
5102                         err = -EFAULT;
5103                         goto out_free;
5104                 }
5105                 cond_resched();
5106         }
5107
5108         /* finally lock prog and jit images for all functions and
5109          * populate kallsysm
5110          */
5111         for (i = 0; i <= env->subprog_cnt; i++) {
5112                 bpf_prog_lock_ro(func[i]);
5113                 bpf_prog_kallsyms_add(func[i]);
5114         }
5115         prog->jited = 1;
5116         prog->bpf_func = func[0]->bpf_func;
5117         prog->aux->func = func;
5118         prog->aux->func_cnt = env->subprog_cnt + 1;
5119         return 0;
5120 out_free:
5121         for (i = 0; i <= env->subprog_cnt; i++)
5122                 if (func[i])
5123                         bpf_jit_free(func[i]);
5124         kfree(func);
5125         /* cleanup main prog to be interpreted */
5126         prog->jit_requested = 0;
5127         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
5128                 if (insn->code != (BPF_JMP | BPF_CALL) ||
5129                     insn->src_reg != BPF_PSEUDO_CALL)
5130                         continue;
5131                 insn->off = 0;
5132                 insn->imm = env->insn_aux_data[i].call_imm;
5133         }
5134         return err;
5135 }
5136
5137 static int fixup_call_args(struct bpf_verifier_env *env)
5138 {
5139         struct bpf_prog *prog = env->prog;
5140         struct bpf_insn *insn = prog->insnsi;
5141         int i, depth;
5142
5143         if (env->prog->jit_requested)
5144                 if (jit_subprogs(env) == 0)
5145                         return 0;
5146
5147         for (i = 0; i < prog->len; i++, insn++) {
5148                 if (insn->code != (BPF_JMP | BPF_CALL) ||
5149                     insn->src_reg != BPF_PSEUDO_CALL)
5150                         continue;
5151                 depth = get_callee_stack_depth(env, insn, i);
5152                 if (depth < 0)
5153                         return depth;
5154                 bpf_patch_call_args(insn, depth);
5155         }
5156         return 0;
5157 }
5158
5159 /* fixup insn->imm field of bpf_call instructions
5160  * and inline eligible helpers as explicit sequence of BPF instructions
5161  *
5162  * this function is called after eBPF program passed verification
5163  */
5164 static int fixup_bpf_calls(struct bpf_verifier_env *env)
5165 {
5166         struct bpf_prog *prog = env->prog;
5167         struct bpf_insn *insn = prog->insnsi;
5168         const struct bpf_func_proto *fn;
5169         const int insn_cnt = prog->len;
5170         struct bpf_insn insn_buf[16];
5171         struct bpf_prog *new_prog;
5172         struct bpf_map *map_ptr;
5173         int i, cnt, delta = 0;
5174
5175         for (i = 0; i < insn_cnt; i++, insn++) {
5176                 if (insn->code != (BPF_JMP | BPF_CALL))
5177                         continue;
5178                 if (insn->src_reg == BPF_PSEUDO_CALL)
5179                         continue;
5180
5181                 if (insn->imm == BPF_FUNC_get_route_realm)
5182                         prog->dst_needed = 1;
5183                 if (insn->imm == BPF_FUNC_get_prandom_u32)
5184                         bpf_user_rnd_init_once();
5185                 if (insn->imm == BPF_FUNC_override_return)
5186                         prog->kprobe_override = 1;
5187                 if (insn->imm == BPF_FUNC_tail_call) {
5188                         /* If we tail call into other programs, we
5189                          * cannot make any assumptions since they can
5190                          * be replaced dynamically during runtime in
5191                          * the program array.
5192                          */
5193                         prog->cb_access = 1;
5194                         env->prog->aux->stack_depth = MAX_BPF_STACK;
5195
5196                         /* mark bpf_tail_call as different opcode to avoid
5197                          * conditional branch in the interpeter for every normal
5198                          * call and to prevent accidental JITing by JIT compiler
5199                          * that doesn't support bpf_tail_call yet
5200                          */
5201                         insn->imm = 0;
5202                         insn->code = BPF_JMP | BPF_TAIL_CALL;
5203                         continue;
5204                 }
5205
5206                 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
5207                  * handlers are currently limited to 64 bit only.
5208                  */
5209                 if (prog->jit_requested && BITS_PER_LONG == 64 &&
5210                     insn->imm == BPF_FUNC_map_lookup_elem) {
5211                         map_ptr = env->insn_aux_data[i + delta].map_ptr;
5212                         if (map_ptr == BPF_MAP_PTR_POISON ||
5213                             !map_ptr->ops->map_gen_lookup)
5214                                 goto patch_call_imm;
5215
5216                         cnt = map_ptr->ops->map_gen_lookup(map_ptr, insn_buf);
5217                         if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
5218                                 verbose(env, "bpf verifier is misconfigured\n");
5219                                 return -EINVAL;
5220                         }
5221
5222                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
5223                                                        cnt);
5224                         if (!new_prog)
5225                                 return -ENOMEM;
5226
5227                         delta += cnt - 1;
5228
5229                         /* keep walking new program and skip insns we just inserted */
5230                         env->prog = prog = new_prog;
5231                         insn      = new_prog->insnsi + i + delta;
5232                         continue;
5233                 }
5234
5235                 if (insn->imm == BPF_FUNC_redirect_map) {
5236                         /* Note, we cannot use prog directly as imm as subsequent
5237                          * rewrites would still change the prog pointer. The only
5238                          * stable address we can use is aux, which also works with
5239                          * prog clones during blinding.
5240                          */
5241                         u64 addr = (unsigned long)prog->aux;
5242                         struct bpf_insn r4_ld[] = {
5243                                 BPF_LD_IMM64(BPF_REG_4, addr),
5244                                 *insn,
5245                         };
5246                         cnt = ARRAY_SIZE(r4_ld);
5247
5248                         new_prog = bpf_patch_insn_data(env, i + delta, r4_ld, cnt);
5249                         if (!new_prog)
5250                                 return -ENOMEM;
5251
5252                         delta    += cnt - 1;
5253                         env->prog = prog = new_prog;
5254                         insn      = new_prog->insnsi + i + delta;
5255                 }
5256 patch_call_imm:
5257                 fn = env->ops->get_func_proto(insn->imm);
5258                 /* all functions that have prototype and verifier allowed
5259                  * programs to call them, must be real in-kernel functions
5260                  */
5261                 if (!fn->func) {
5262                         verbose(env,
5263                                 "kernel subsystem misconfigured func %s#%d\n",
5264                                 func_id_name(insn->imm), insn->imm);
5265                         return -EFAULT;
5266                 }
5267                 insn->imm = fn->func - __bpf_call_base;
5268         }
5269
5270         return 0;
5271 }
5272
5273 static void free_states(struct bpf_verifier_env *env)
5274 {
5275         struct bpf_verifier_state_list *sl, *sln;
5276         int i;
5277
5278         if (!env->explored_states)
5279                 return;
5280
5281         for (i = 0; i < env->prog->len; i++) {
5282                 sl = env->explored_states[i];
5283
5284                 if (sl)
5285                         while (sl != STATE_LIST_MARK) {
5286                                 sln = sl->next;
5287                                 free_verifier_state(&sl->state, false);
5288                                 kfree(sl);
5289                                 sl = sln;
5290                         }
5291         }
5292
5293         kfree(env->explored_states);
5294 }
5295
5296 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
5297 {
5298         struct bpf_verifier_env *env;
5299         struct bpf_verifer_log *log;
5300         int ret = -EINVAL;
5301
5302         /* no program is valid */
5303         if (ARRAY_SIZE(bpf_verifier_ops) == 0)
5304                 return -EINVAL;
5305
5306         /* 'struct bpf_verifier_env' can be global, but since it's not small,
5307          * allocate/free it every time bpf_check() is called
5308          */
5309         env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
5310         if (!env)
5311                 return -ENOMEM;
5312         log = &env->log;
5313
5314         env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
5315                                      (*prog)->len);
5316         ret = -ENOMEM;
5317         if (!env->insn_aux_data)
5318                 goto err_free_env;
5319         env->prog = *prog;
5320         env->ops = bpf_verifier_ops[env->prog->type];
5321
5322         /* grab the mutex to protect few globals used by verifier */
5323         mutex_lock(&bpf_verifier_lock);
5324
5325         if (attr->log_level || attr->log_buf || attr->log_size) {
5326                 /* user requested verbose verifier output
5327                  * and supplied buffer to store the verification trace
5328                  */
5329                 log->level = attr->log_level;
5330                 log->ubuf = (char __user *) (unsigned long) attr->log_buf;
5331                 log->len_total = attr->log_size;
5332
5333                 ret = -EINVAL;
5334                 /* log attributes have to be sane */
5335                 if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
5336                     !log->level || !log->ubuf)
5337                         goto err_unlock;
5338         }
5339
5340         env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
5341         if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
5342                 env->strict_alignment = true;
5343
5344         if (env->prog->aux->offload) {
5345                 ret = bpf_prog_offload_verifier_prep(env);
5346                 if (ret)
5347                         goto err_unlock;
5348         }
5349
5350         ret = replace_map_fd_with_map_ptr(env);
5351         if (ret < 0)
5352                 goto skip_full_check;
5353
5354         env->explored_states = kcalloc(env->prog->len,
5355                                        sizeof(struct bpf_verifier_state_list *),
5356                                        GFP_USER);
5357         ret = -ENOMEM;
5358         if (!env->explored_states)
5359                 goto skip_full_check;
5360
5361         env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
5362
5363         ret = check_cfg(env);
5364         if (ret < 0)
5365                 goto skip_full_check;
5366
5367         ret = do_check(env);
5368         if (env->cur_state) {
5369                 free_verifier_state(env->cur_state, true);
5370                 env->cur_state = NULL;
5371         }
5372
5373 skip_full_check:
5374         while (!pop_stack(env, NULL, NULL));
5375         free_states(env);
5376
5377         if (ret == 0)
5378                 sanitize_dead_code(env);
5379
5380         if (ret == 0)
5381                 /* program is valid, convert *(u32*)(ctx + off) accesses */
5382                 ret = convert_ctx_accesses(env);
5383
5384         if (ret == 0)
5385                 ret = fixup_bpf_calls(env);
5386
5387         if (ret == 0)
5388                 ret = fixup_call_args(env);
5389
5390         if (log->level && bpf_verifier_log_full(log))
5391                 ret = -ENOSPC;
5392         if (log->level && !log->ubuf) {
5393                 ret = -EFAULT;
5394                 goto err_release_maps;
5395         }
5396
5397         if (ret == 0 && env->used_map_cnt) {
5398                 /* if program passed verifier, update used_maps in bpf_prog_info */
5399                 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
5400                                                           sizeof(env->used_maps[0]),
5401                                                           GFP_KERNEL);
5402
5403                 if (!env->prog->aux->used_maps) {
5404                         ret = -ENOMEM;
5405                         goto err_release_maps;
5406                 }
5407
5408                 memcpy(env->prog->aux->used_maps, env->used_maps,
5409                        sizeof(env->used_maps[0]) * env->used_map_cnt);
5410                 env->prog->aux->used_map_cnt = env->used_map_cnt;
5411
5412                 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
5413                  * bpf_ld_imm64 instructions
5414                  */
5415                 convert_pseudo_ld_imm64(env);
5416         }
5417
5418 err_release_maps:
5419         if (!env->prog->aux->used_maps)
5420                 /* if we didn't copy map pointers into bpf_prog_info, release
5421                  * them now. Otherwise free_bpf_prog_info() will release them.
5422                  */
5423                 release_maps(env);
5424         *prog = env->prog;
5425 err_unlock:
5426         mutex_unlock(&bpf_verifier_lock);
5427         vfree(env->insn_aux_data);
5428 err_free_env:
5429         kfree(env);
5430         return ret;
5431 }