kernel/bpf/verifier.c

   1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   2  * Copyright (c) 2016 Facebook
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of version 2 of the GNU General Public
   6  * License as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful, but
   9  * WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11  * General Public License for more details.
  12  */
  13 #include <linux/kernel.h>
  14 #include <linux/types.h>
  15 #include <linux/slab.h>
  16 #include <linux/bpf.h>
  17 #include <linux/bpf_verifier.h>
  18 #include <linux/filter.h>
  19 #include <net/netlink.h>
  20 #include <linux/file.h>
  21 #include <linux/vmalloc.h>
  22 #include <linux/stringify.h>
  23
  24 #include "disasm.h"
  25
  26 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
  27 #define BPF_PROG_TYPE(_id, _name) \
  28         [_id] = & _name ## _verifier_ops,
  29 #define BPF_MAP_TYPE(_id, _ops)
  30 #include <linux/bpf_types.h>
  31 #undef BPF_PROG_TYPE
  32 #undef BPF_MAP_TYPE
  33 };
  34
  35 /* bpf_check() is a static code analyzer that walks eBPF program
  36  * instruction by instruction and updates register/stack state.
  37  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
  38  *
  39  * The first pass is depth-first-search to check that the program is a DAG.
  40  * It rejects the following programs:
  41  * - larger than BPF_MAXINSNS insns
  42  * - if loop is present (detected via back-edge)
  43  * - unreachable insns exist (shouldn't be a forest. program = one function)
  44  * - out of bounds or malformed jumps
  45  * The second pass is all possible path descent from the 1st insn.
  46  * Since it's analyzing all pathes through the program, the length of the
  47  * analysis is limited to 64k insn, which may be hit even if total number of
  48  * insn is less then 4K, but there are too many branches that change stack/regs.
  49  * Number of 'branches to be analyzed' is limited to 1k
  50  *
  51  * On entry to each instruction, each register has a type, and the instruction
  52  * changes the types of the registers depending on instruction semantics.
  53  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
  54  * copied to R1.
  55  *
  56  * All registers are 64-bit.
  57  * R0 - return register
  58  * R1-R5 argument passing registers
  59  * R6-R9 callee saved registers
  60  * R10 - frame pointer read-only
  61  *
  62  * At the start of BPF program the register R1 contains a pointer to bpf_context
  63  * and has type PTR_TO_CTX.
  64  *
  65  * Verifier tracks arithmetic operations on pointers in case:
  66  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
  67  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
  68  * 1st insn copies R10 (which has FRAME_PTR) type into R1
  69  * and 2nd arithmetic instruction is pattern matched to recognize
  70  * that it wants to construct a pointer to some element within stack.
  71  * So after 2nd insn, the register R1 has type PTR_TO_STACK
  72  * (and -20 constant is saved for further stack bounds checking).
  73  * Meaning that this reg is a pointer to stack plus known immediate constant.
  74  *
  75  * Most of the time the registers have SCALAR_VALUE type, which
  76  * means the register has some value, but it's not a valid pointer.
  77  * (like pointer plus pointer becomes SCALAR_VALUE type)
  78  *
  79  * When verifier sees load or store instructions the type of base register
  80  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer
  81  * types recognized by check_mem_access() function.
  82  *
  83  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
  84  * and the range of [ptr, ptr + map's value_size) is accessible.
  85  *
  86  * registers used to pass values to function calls are checked against
  87  * function argument constraints.
  88  *
  89  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
  90  * It means that the register type passed to this function must be
  91  * PTR_TO_STACK and it will be used inside the function as
  92  * 'pointer to map element key'
  93  *
  94  * For example the argument constraints for bpf_map_lookup_elem():
  95  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
  96  *   .arg1_type = ARG_CONST_MAP_PTR,
  97  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
  98  *
  99  * ret_type says that this function returns 'pointer to map elem value or null'
 100  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
 101  * 2nd argument should be a pointer to stack, which will be used inside
 102  * the helper function as a pointer to map element key.
 103  *
 104  * On the kernel side the helper function looks like:
 105  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 106  * {
 107  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
 108  *    void *key = (void *) (unsigned long) r2;
 109  *    void *value;
 110  *
 111  *    here kernel can access 'key' and 'map' pointers safely, knowing that
 112  *    [key, key + map->key_size) bytes are valid and were initialized on
 113  *    the stack of eBPF program.
 114  * }
 115  *
 116  * Corresponding eBPF program may look like:
 117  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
 118  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
 119  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
 120  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 121  * here verifier looks at prototype of map_lookup_elem() and sees:
 122  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
 123  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
 124  *
 125  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
 126  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
 127  * and were initialized prior to this call.
 128  * If it's ok, then verifier allows this BPF_CALL insn and looks at
 129  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
 130  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
 131  * returns ether pointer to map value or NULL.
 132  *
 133  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
 134  * insn, the register holding that pointer in the true branch changes state to
 135  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
 136  * branch. See check_cond_jmp_op().
 137  *
 138  * After the call R0 is set to return type of the function and registers R1-R5
 139  * are set to NOT_INIT to indicate that they are no longer readable.
 140  */
 141
 142 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
 143 struct bpf_verifier_stack_elem {
 144         /* verifer state is 'st'
 145          * before processing instruction 'insn_idx'
 146          * and after processing instruction 'prev_insn_idx'
 147          */
 148         struct bpf_verifier_state st;
 149         int insn_idx;
 150         int prev_insn_idx;
 151         struct bpf_verifier_stack_elem *next;
 152 };
 153
 154 #define BPF_COMPLEXITY_LIMIT_INSNS      131072
 155 #define BPF_COMPLEXITY_LIMIT_STACK      1024
 156
 157 #define BPF_MAP_PTR_UNPRIV      1UL
 158 #define BPF_MAP_PTR_POISON      ((void *)((0xeB9FUL << 1) +     \
 159                                           POISON_POINTER_DELTA))
 160 #define BPF_MAP_PTR(X)          ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
 161
 162 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
 163 {
 164         return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON;
 165 }
 166
 167 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
 168 {
 169         return aux->map_state & BPF_MAP_PTR_UNPRIV;
 170 }
 171
 172 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
 173                               const struct bpf_map *map, bool unpriv)
 174 {
 175         BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
 176         unpriv |= bpf_map_ptr_unpriv(aux);
 177         aux->map_state = (unsigned long)map |
 178                          (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
 179 }
 180
 181 struct bpf_call_arg_meta {
 182         struct bpf_map *map_ptr;
 183         bool raw_mode;
 184         bool pkt_access;
 185         int regno;
 186         int access_size;
 187 };
 188
 189 static DEFINE_MUTEX(bpf_verifier_lock);
 190
 191 /* log_level controls verbosity level of eBPF verifier.
 192  * verbose() is used to dump the verification trace to the log, so the user
 193  * can figure out what's wrong with the program
 194  */
 195 static __printf(2, 3) void verbose(struct bpf_verifier_env *env,
 196                                    const char *fmt, ...)
 197 {
 198         struct bpf_verifer_log *log = &env->log;
 199         unsigned int n;
 200         va_list args;
 201
 202         if (!log->level || !log->ubuf || bpf_verifier_log_full(log))
 203                 return;
 204
 205         va_start(args, fmt);
 206         n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
 207         va_end(args);
 208
 209         WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
 210                   "verifier log line truncated - local buffer too short\n");
 211
 212         n = min(log->len_total - log->len_used - 1, n);
 213         log->kbuf[n] = '\0';
 214
 215         if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
 216                 log->len_used += n;
 217         else
 218                 log->ubuf = NULL;
 219 }
 220
 221 static bool type_is_pkt_pointer(enum bpf_reg_type type)
 222 {
 223         return type == PTR_TO_PACKET ||
 224                type == PTR_TO_PACKET_META;
 225 }
 226
 227 /* string representation of 'enum bpf_reg_type' */
 228 static const char * const reg_type_str[] = {
 229         [NOT_INIT]              = "?",
 230         [SCALAR_VALUE]          = "inv",
 231         [PTR_TO_CTX]            = "ctx",
 232         [CONST_PTR_TO_MAP]      = "map_ptr",
 233         [PTR_TO_MAP_VALUE]      = "map_value",
 234         [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
 235         [PTR_TO_STACK]          = "fp",
 236         [PTR_TO_PACKET]         = "pkt",
 237         [PTR_TO_PACKET_META]    = "pkt_meta",
 238         [PTR_TO_PACKET_END]     = "pkt_end",
 239 };
 240
 241 static void print_verifier_state(struct bpf_verifier_env *env,
 242                                  struct bpf_verifier_state *state)
 243 {
 244         struct bpf_reg_state *reg;
 245         enum bpf_reg_type t;
 246         int i;
 247
 248         for (i = 0; i < MAX_BPF_REG; i++) {
 249                 reg = &state->regs[i];
 250                 t = reg->type;
 251                 if (t == NOT_INIT)
 252                         continue;
 253                 verbose(env, " R%d=%s", i, reg_type_str[t]);
 254                 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
 255                     tnum_is_const(reg->var_off)) {
 256                         /* reg->off should be 0 for SCALAR_VALUE */
 257                         verbose(env, "%lld", reg->var_off.value + reg->off);
 258                 } else {
 259                         verbose(env, "(id=%d", reg->id);
 260                         if (t != SCALAR_VALUE)
 261                                 verbose(env, ",off=%d", reg->off);
 262                         if (type_is_pkt_pointer(t))
 263                                 verbose(env, ",r=%d", reg->range);
 264                         else if (t == CONST_PTR_TO_MAP ||
 265                                  t == PTR_TO_MAP_VALUE ||
 266                                  t == PTR_TO_MAP_VALUE_OR_NULL)
 267                                 verbose(env, ",ks=%d,vs=%d",
 268                                         reg->map_ptr->key_size,
 269                                         reg->map_ptr->value_size);
 270                         if (tnum_is_const(reg->var_off)) {
 271                                 /* Typically an immediate SCALAR_VALUE, but
 272                                  * could be a pointer whose offset is too big
 273                                  * for reg->off
 274                                  */
 275                                 verbose(env, ",imm=%llx", reg->var_off.value);
 276                         } else {
 277                                 if (reg->smin_value != reg->umin_value &&
 278                                     reg->smin_value != S64_MIN)
 279                                         verbose(env, ",smin_value=%lld",
 280                                                 (long long)reg->smin_value);
 281                                 if (reg->smax_value != reg->umax_value &&
 282                                     reg->smax_value != S64_MAX)
 283                                         verbose(env, ",smax_value=%lld",
 284                                                 (long long)reg->smax_value);
 285                                 if (reg->umin_value != 0)
 286                                         verbose(env, ",umin_value=%llu",
 287                                                 (unsigned long long)reg->umin_value);
 288                                 if (reg->umax_value != U64_MAX)
 289                                         verbose(env, ",umax_value=%llu",
 290                                                 (unsigned long long)reg->umax_value);
 291                                 if (!tnum_is_unknown(reg->var_off)) {
 292                                         char tn_buf[48];
 293
 294                                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 295                                         verbose(env, ",var_off=%s", tn_buf);
 296                                 }
 297                         }
 298                         verbose(env, ")");
 299                 }
 300         }
 301         for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
 302                 if (state->stack[i].slot_type[0] == STACK_SPILL)
 303                         verbose(env, " fp%d=%s",
 304                                 (-i - 1) * BPF_REG_SIZE,
 305                                 reg_type_str[state->stack[i].spilled_ptr.type]);
 306         }
 307         verbose(env, "\n");
 308 }
 309
 310 static int copy_stack_state(struct bpf_verifier_state *dst,
 311                             const struct bpf_verifier_state *src)
 312 {
 313         if (!src->stack)
 314                 return 0;
 315         if (WARN_ON_ONCE(dst->allocated_stack < src->allocated_stack)) {
 316                 /* internal bug, make state invalid to reject the program */
 317                 memset(dst, 0, sizeof(*dst));
 318                 return -EFAULT;
 319         }
 320         memcpy(dst->stack, src->stack,
 321                sizeof(*src->stack) * (src->allocated_stack / BPF_REG_SIZE));
 322         return 0;
 323 }
 324
 325 /* do_check() starts with zero-sized stack in struct bpf_verifier_state to
 326  * make it consume minimal amount of memory. check_stack_write() access from
 327  * the program calls into realloc_verifier_state() to grow the stack size.
 328  * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
 329  * which this function copies over. It points to previous bpf_verifier_state
 330  * which is never reallocated
 331  */
 332 static int realloc_verifier_state(struct bpf_verifier_state *state, int size,
 333                                   bool copy_old)
 334 {
 335         u32 old_size = state->allocated_stack;
 336         struct bpf_stack_state *new_stack;
 337         int slot = size / BPF_REG_SIZE;
 338
 339         if (size <= old_size || !size) {
 340                 if (copy_old)
 341                         return 0;
 342                 state->allocated_stack = slot * BPF_REG_SIZE;
 343                 if (!size && old_size) {
 344                         kfree(state->stack);
 345                         state->stack = NULL;
 346                 }
 347                 return 0;
 348         }
 349         new_stack = kmalloc_array(slot, sizeof(struct bpf_stack_state),
 350                                   GFP_KERNEL);
 351         if (!new_stack)
 352                 return -ENOMEM;
 353         if (copy_old) {
 354                 if (state->stack)
 355                         memcpy(new_stack, state->stack,
 356                                sizeof(*new_stack) * (old_size / BPF_REG_SIZE));
 357                 memset(new_stack + old_size / BPF_REG_SIZE, 0,
 358                        sizeof(*new_stack) * (size - old_size) / BPF_REG_SIZE);
 359         }
 360         state->allocated_stack = slot * BPF_REG_SIZE;
 361         kfree(state->stack);
 362         state->stack = new_stack;
 363         return 0;
 364 }
 365
 366 static void free_verifier_state(struct bpf_verifier_state *state,
 367                                 bool free_self)
 368 {
 369         kfree(state->stack);
 370         if (free_self)
 371                 kfree(state);
 372 }
 373
 374 /* copy verifier state from src to dst growing dst stack space
 375  * when necessary to accommodate larger src stack
 376  */
 377 static int copy_verifier_state(struct bpf_verifier_state *dst,
 378                                const struct bpf_verifier_state *src)
 379 {
 380         int err;
 381
 382         err = realloc_verifier_state(dst, src->allocated_stack, false);
 383         if (err)
 384                 return err;
 385         memcpy(dst, src, offsetof(struct bpf_verifier_state, allocated_stack));
 386         return copy_stack_state(dst, src);
 387 }
 388
 389 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
 390                      int *insn_idx)
 391 {
 392         struct bpf_verifier_state *cur = env->cur_state;
 393         struct bpf_verifier_stack_elem *elem, *head = env->head;
 394         int err;
 395
 396         if (env->head == NULL)
 397                 return -ENOENT;
 398
 399         if (cur) {
 400                 err = copy_verifier_state(cur, &head->st);
 401                 if (err)
 402                         return err;
 403         }
 404         if (insn_idx)
 405                 *insn_idx = head->insn_idx;
 406         if (prev_insn_idx)
 407                 *prev_insn_idx = head->prev_insn_idx;
 408         elem = head->next;
 409         free_verifier_state(&head->st, false);
 410         kfree(head);
 411         env->head = elem;
 412         env->stack_size--;
 413         return 0;
 414 }
 415
 416 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
 417                                              int insn_idx, int prev_insn_idx)
 418 {
 419         struct bpf_verifier_state *cur = env->cur_state;
 420         struct bpf_verifier_stack_elem *elem;
 421         int err;
 422
 423         elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
 424         if (!elem)
 425                 goto err;
 426
 427         elem->insn_idx = insn_idx;
 428         elem->prev_insn_idx = prev_insn_idx;
 429         elem->next = env->head;
 430         env->head = elem;
 431         env->stack_size++;
 432         err = copy_verifier_state(&elem->st, cur);
 433         if (err)
 434                 goto err;
 435         if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
 436                 verbose(env, "BPF program is too complex\n");
 437                 goto err;
 438         }
 439         return &elem->st;
 440 err:
 441         /* pop all elements and return */
 442         while (!pop_stack(env, NULL, NULL));
 443         return NULL;
 444 }
 445
 446 #define CALLER_SAVED_REGS 6
 447 static const int caller_saved[CALLER_SAVED_REGS] = {
 448         BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 449 };
 450
 451 static void __mark_reg_not_init(struct bpf_reg_state *reg);
 452
 453 /* Mark the unknown part of a register (variable offset or scalar value) as
 454  * known to have the value @imm.
 455  */
 456 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
 457 {
 458         reg->id = 0;
 459         reg->var_off = tnum_const(imm);
 460         reg->smin_value = (s64)imm;
 461         reg->smax_value = (s64)imm;
 462         reg->umin_value = imm;
 463         reg->umax_value = imm;
 464 }
 465
 466 /* Mark the 'variable offset' part of a register as zero.  This should be
 467  * used only on registers holding a pointer type.
 468  */
 469 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
 470 {
 471         __mark_reg_known(reg, 0);
 472 }
 473
 474 static void mark_reg_known_zero(struct bpf_verifier_env *env,
 475                                 struct bpf_reg_state *regs, u32 regno)
 476 {
 477         if (WARN_ON(regno >= MAX_BPF_REG)) {
 478                 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
 479                 /* Something bad happened, let's kill all regs */
 480                 for (regno = 0; regno < MAX_BPF_REG; regno++)
 481                         __mark_reg_not_init(regs + regno);
 482                 return;
 483         }
 484         __mark_reg_known_zero(regs + regno);
 485 }
 486
 487 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
 488 {
 489         return type_is_pkt_pointer(reg->type);
 490 }
 491
 492 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
 493 {
 494         return reg_is_pkt_pointer(reg) ||
 495                reg->type == PTR_TO_PACKET_END;
 496 }
 497
 498 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
 499 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
 500                                     enum bpf_reg_type which)
 501 {
 502         /* The register can already have a range from prior markings.
 503          * This is fine as long as it hasn't been advanced from its
 504          * origin.
 505          */
 506         return reg->type == which &&
 507                reg->id == 0 &&
 508                reg->off == 0 &&
 509                tnum_equals_const(reg->var_off, 0);
 510 }
 511
 512 /* Attempts to improve min/max values based on var_off information */
 513 static void __update_reg_bounds(struct bpf_reg_state *reg)
 514 {
 515         /* min signed is max(sign bit) | min(other bits) */
 516         reg->smin_value = max_t(s64, reg->smin_value,
 517                                 reg->var_off.value | (reg->var_off.mask & S64_MIN));
 518         /* max signed is min(sign bit) | max(other bits) */
 519         reg->smax_value = min_t(s64, reg->smax_value,
 520                                 reg->var_off.value | (reg->var_off.mask & S64_MAX));
 521         reg->umin_value = max(reg->umin_value, reg->var_off.value);
 522         reg->umax_value = min(reg->umax_value,
 523                               reg->var_off.value | reg->var_off.mask);
 524 }
 525
 526 /* Uses signed min/max values to inform unsigned, and vice-versa */
 527 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
 528 {
 529         /* Learn sign from signed bounds.
 530          * If we cannot cross the sign boundary, then signed and unsigned bounds
 531          * are the same, so combine.  This works even in the negative case, e.g.
 532          * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
 533          */
 534         if (reg->smin_value >= 0 || reg->smax_value < 0) {
 535                 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
 536                                                           reg->umin_value);
 537                 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
 538                                                           reg->umax_value);
 539                 return;
 540         }
 541         /* Learn sign from unsigned bounds.  Signed bounds cross the sign
 542          * boundary, so we must be careful.
 543          */
 544         if ((s64)reg->umax_value >= 0) {
 545                 /* Positive.  We can't learn anything from the smin, but smax
 546                  * is positive, hence safe.
 547                  */
 548                 reg->smin_value = reg->umin_value;
 549                 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
 550                                                           reg->umax_value);
 551         } else if ((s64)reg->umin_value < 0) {
 552                 /* Negative.  We can't learn anything from the smax, but smin
 553                  * is negative, hence safe.
 554                  */
 555                 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
 556                                                           reg->umin_value);
 557                 reg->smax_value = reg->umax_value;
 558         }
 559 }
 560
 561 /* Attempts to improve var_off based on unsigned min/max information */
 562 static void __reg_bound_offset(struct bpf_reg_state *reg)
 563 {
 564         reg->var_off = tnum_intersect(reg->var_off,
 565                                       tnum_range(reg->umin_value,
 566                                                  reg->umax_value));
 567 }
 568
 569 /* Reset the min/max bounds of a register */
 570 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
 571 {
 572         reg->smin_value = S64_MIN;
 573         reg->smax_value = S64_MAX;
 574         reg->umin_value = 0;
 575         reg->umax_value = U64_MAX;
 576 }
 577
 578 /* Mark a register as having a completely unknown (scalar) value. */
 579 static void __mark_reg_unknown(struct bpf_reg_state *reg)
 580 {
 581         reg->type = SCALAR_VALUE;
 582         reg->id = 0;
 583         reg->off = 0;
 584         reg->var_off = tnum_unknown;
 585         __mark_reg_unbounded(reg);
 586 }
 587
 588 static void mark_reg_unknown(struct bpf_verifier_env *env,
 589                              struct bpf_reg_state *regs, u32 regno)
 590 {
 591         if (WARN_ON(regno >= MAX_BPF_REG)) {
 592                 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
 593                 /* Something bad happened, let's kill all regs */
 594                 for (regno = 0; regno < MAX_BPF_REG; regno++)
 595                         __mark_reg_not_init(regs + regno);
 596                 return;
 597         }
 598         __mark_reg_unknown(regs + regno);
 599 }
 600
 601 static void __mark_reg_not_init(struct bpf_reg_state *reg)
 602 {
 603         __mark_reg_unknown(reg);
 604         reg->type = NOT_INIT;
 605 }
 606
 607 static void mark_reg_not_init(struct bpf_verifier_env *env,
 608                               struct bpf_reg_state *regs, u32 regno)
 609 {
 610         if (WARN_ON(regno >= MAX_BPF_REG)) {
 611                 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
 612                 /* Something bad happened, let's kill all regs */
 613                 for (regno = 0; regno < MAX_BPF_REG; regno++)
 614                         __mark_reg_not_init(regs + regno);
 615                 return;
 616         }
 617         __mark_reg_not_init(regs + regno);
 618 }
 619
 620 static void init_reg_state(struct bpf_verifier_env *env,
 621                            struct bpf_reg_state *regs)
 622 {
 623         int i;
 624
 625         for (i = 0; i < MAX_BPF_REG; i++) {
 626                 mark_reg_not_init(env, regs, i);
 627                 regs[i].live = REG_LIVE_NONE;
 628         }
 629
 630         /* frame pointer */
 631         regs[BPF_REG_FP].type = PTR_TO_STACK;
 632         mark_reg_known_zero(env, regs, BPF_REG_FP);
 633
 634         /* 1st arg to a function */
 635         regs[BPF_REG_1].type = PTR_TO_CTX;
 636         mark_reg_known_zero(env, regs, BPF_REG_1);
 637 }
 638
 639 enum reg_arg_type {
 640         SRC_OP,         /* register is used as source operand */
 641         DST_OP,         /* register is used as destination operand */
 642         DST_OP_NO_MARK  /* same as above, check only, don't mark */
 643 };
 644
 645 static void mark_reg_read(const struct bpf_verifier_state *state, u32 regno)
 646 {
 647         struct bpf_verifier_state *parent = state->parent;
 648
 649         if (regno == BPF_REG_FP)
 650                 /* We don't need to worry about FP liveness because it's read-only */
 651                 return;
 652
 653         while (parent) {
 654                 /* if read wasn't screened by an earlier write ... */
 655                 if (state->regs[regno].live & REG_LIVE_WRITTEN)
 656                         break;
 657                 /* ... then we depend on parent's value */
 658                 parent->regs[regno].live |= REG_LIVE_READ;
 659                 state = parent;
 660                 parent = state->parent;
 661         }
 662 }
 663
 664 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
 665                          enum reg_arg_type t)
 666 {
 667         struct bpf_reg_state *regs = env->cur_state->regs;
 668
 669         if (regno >= MAX_BPF_REG) {
 670                 verbose(env, "R%d is invalid\n", regno);
 671                 return -EINVAL;
 672         }
 673
 674         if (t == SRC_OP) {
 675                 /* check whether register used as source operand can be read */
 676                 if (regs[regno].type == NOT_INIT) {
 677                         verbose(env, "R%d !read_ok\n", regno);
 678                         return -EACCES;
 679                 }
 680                 mark_reg_read(env->cur_state, regno);
 681         } else {
 682                 /* check whether register used as dest operand can be written to */
 683                 if (regno == BPF_REG_FP) {
 684                         verbose(env, "frame pointer is read only\n");
 685                         return -EACCES;
 686                 }
 687                 regs[regno].live |= REG_LIVE_WRITTEN;
 688                 if (t == DST_OP)
 689                         mark_reg_unknown(env, regs, regno);
 690         }
 691         return 0;
 692 }
 693
 694 static bool is_spillable_regtype(enum bpf_reg_type type)
 695 {
 696         switch (type) {
 697         case PTR_TO_MAP_VALUE:
 698         case PTR_TO_MAP_VALUE_OR_NULL:
 699         case PTR_TO_STACK:
 700         case PTR_TO_CTX:
 701         case PTR_TO_PACKET:
 702         case PTR_TO_PACKET_META:
 703         case PTR_TO_PACKET_END:
 704         case CONST_PTR_TO_MAP:
 705                 return true;
 706         default:
 707                 return false;
 708         }
 709 }
 710
 711 /* check_stack_read/write functions track spill/fill of registers,
 712  * stack boundary and alignment are checked in check_mem_access()
 713  */
 714 static int check_stack_write(struct bpf_verifier_env *env,
 715                              struct bpf_verifier_state *state, int off,
 716                              int size, int value_regno, int insn_idx)
 717 {
 718         int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
 719
 720         err = realloc_verifier_state(state, round_up(slot + 1, BPF_REG_SIZE),
 721                                      true);
 722         if (err)
 723                 return err;
 724         /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
 725          * so it's aligned access and [off, off + size) are within stack limits
 726          */
 727         if (!env->allow_ptr_leaks &&
 728             state->stack[spi].slot_type[0] == STACK_SPILL &&
 729             size != BPF_REG_SIZE) {
 730                 verbose(env, "attempt to corrupt spilled pointer on stack\n");
 731                 return -EACCES;
 732         }
 733
 734         if (value_regno >= 0 &&
 735             is_spillable_regtype(state->regs[value_regno].type)) {
 736
 737                 /* register containing pointer is being spilled into stack */
 738                 if (size != BPF_REG_SIZE) {
 739                         verbose(env, "invalid size of register spill\n");
 740                         return -EACCES;
 741                 }
 742
 743                 /* save register state */
 744                 state->stack[spi].spilled_ptr = state->regs[value_regno];
 745                 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
 746
 747                 for (i = 0; i < BPF_REG_SIZE; i++) {
 748                         if (state->stack[spi].slot_type[i] == STACK_MISC &&
 749                             !env->allow_ptr_leaks) {
 750                                 int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
 751                                 int soff = (-spi - 1) * BPF_REG_SIZE;
 752
 753                                 /* detected reuse of integer stack slot with a pointer
 754                                  * which means either llvm is reusing stack slot or
 755                                  * an attacker is trying to exploit CVE-2018-3639
 756                                  * (speculative store bypass)
 757                                  * Have to sanitize that slot with preemptive
 758                                  * store of zero.
 759                                  */
 760                                 if (*poff && *poff != soff) {
 761                                         /* disallow programs where single insn stores
 762                                          * into two different stack slots, since verifier
 763                                          * cannot sanitize them
 764                                          */
 765                                         verbose(env,
 766                                                 "insn %d cannot access two stack slots fp%d and fp%d",
 767                                                 insn_idx, *poff, soff);
 768                                         return -EINVAL;
 769                                 }
 770                                 *poff = soff;
 771                         }
 772                         state->stack[spi].slot_type[i] = STACK_SPILL;
 773                 }
 774         } else {
 775                 /* regular write of data into stack */
 776                 state->stack[spi].spilled_ptr = (struct bpf_reg_state) {};
 777
 778                 for (i = 0; i < size; i++)
 779                         state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
 780                                 STACK_MISC;
 781         }
 782         return 0;
 783 }
 784
 785 static void mark_stack_slot_read(const struct bpf_verifier_state *state, int slot)
 786 {
 787         struct bpf_verifier_state *parent = state->parent;
 788
 789         while (parent) {
 790                 /* if read wasn't screened by an earlier write ... */
 791                 if (state->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN)
 792                         break;
 793                 /* ... then we depend on parent's value */
 794                 parent->stack[slot].spilled_ptr.live |= REG_LIVE_READ;
 795                 state = parent;
 796                 parent = state->parent;
 797         }
 798 }
 799
 800 static int check_stack_read(struct bpf_verifier_env *env,
 801                             struct bpf_verifier_state *state, int off, int size,
 802                             int value_regno)
 803 {
 804         int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
 805         u8 *stype;
 806
 807         if (state->allocated_stack <= slot) {
 808                 verbose(env, "invalid read from stack off %d+0 size %d\n",
 809                         off, size);
 810                 return -EACCES;
 811         }
 812         stype = state->stack[spi].slot_type;
 813
 814         if (stype[0] == STACK_SPILL) {
 815                 if (size != BPF_REG_SIZE) {
 816                         verbose(env, "invalid size of register spill\n");
 817                         return -EACCES;
 818                 }
 819                 for (i = 1; i < BPF_REG_SIZE; i++) {
 820                         if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
 821                                 verbose(env, "corrupted spill memory\n");
 822                                 return -EACCES;
 823                         }
 824                 }
 825
 826                 if (value_regno >= 0) {
 827                         /* restore register state from stack */
 828                         state->regs[value_regno] = state->stack[spi].spilled_ptr;
 829                         mark_stack_slot_read(state, spi);
 830                 }
 831                 return 0;
 832         } else {
 833                 for (i = 0; i < size; i++) {
 834                         if (stype[(slot - i) % BPF_REG_SIZE] != STACK_MISC) {
 835                                 verbose(env, "invalid read from stack off %d+%d size %d\n",
 836                                         off, i, size);
 837                                 return -EACCES;
 838                         }
 839                 }
 840                 if (value_regno >= 0)
 841                         /* have read misc data from the stack */
 842                         mark_reg_unknown(env, state->regs, value_regno);
 843                 return 0;
 844         }
 845 }
 846
 847 /* check read/write into map element returned by bpf_map_lookup_elem() */
 848 static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
 849                               int size, bool zero_size_allowed)
 850 {
 851         struct bpf_reg_state *regs = cur_regs(env);
 852         struct bpf_map *map = regs[regno].map_ptr;
 853
 854         if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
 855             off + size > map->value_size) {
 856                 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
 857                         map->value_size, off, size);
 858                 return -EACCES;
 859         }
 860         return 0;
 861 }
 862
 863 /* check read/write into a map element with possible variable offset */
 864 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
 865                             int off, int size, bool zero_size_allowed)
 866 {
 867         struct bpf_verifier_state *state = env->cur_state;
 868         struct bpf_reg_state *reg = &state->regs[regno];
 869         int err;
 870
 871         /* We may have adjusted the register to this map value, so we
 872          * need to try adding each of min_value and max_value to off
 873          * to make sure our theoretical access will be safe.
 874          */
 875         if (env->log.level)
 876                 print_verifier_state(env, state);
 877         /* The minimum value is only important with signed
 878          * comparisons where we can't assume the floor of a
 879          * value is 0.  If we are using signed variables for our
 880          * index'es we need to make sure that whatever we use
 881          * will have a set floor within our range.
 882          */
 883         if (reg->smin_value < 0) {
 884                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
 885                         regno);
 886                 return -EACCES;
 887         }
 888         err = __check_map_access(env, regno, reg->smin_value + off, size,
 889                                  zero_size_allowed);
 890         if (err) {
 891                 verbose(env, "R%d min value is outside of the array range\n",
 892                         regno);
 893                 return err;
 894         }
 895
 896         /* If we haven't set a max value then we need to bail since we can't be
 897          * sure we won't do bad things.
 898          * If reg->umax_value + off could overflow, treat that as unbounded too.
 899          */
 900         if (reg->umax_value >= BPF_MAX_VAR_OFF) {
 901                 verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
 902                         regno);
 903                 return -EACCES;
 904         }
 905         err = __check_map_access(env, regno, reg->umax_value + off, size,
 906                                  zero_size_allowed);
 907         if (err)
 908                 verbose(env, "R%d max value is outside of the array range\n",
 909                         regno);
 910         return err;
 911 }
 912
 913 #define MAX_PACKET_OFF 0xffff
 914
 915 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
 916                                        const struct bpf_call_arg_meta *meta,
 917                                        enum bpf_access_type t)
 918 {
 919         switch (env->prog->type) {
 920         case BPF_PROG_TYPE_LWT_IN:
 921         case BPF_PROG_TYPE_LWT_OUT:
 922                 /* dst_input() and dst_output() can't write for now */
 923                 if (t == BPF_WRITE)
 924                         return false;
 925                 /* fallthrough */
 926         case BPF_PROG_TYPE_SCHED_CLS:
 927         case BPF_PROG_TYPE_SCHED_ACT:
 928         case BPF_PROG_TYPE_XDP:
 929         case BPF_PROG_TYPE_LWT_XMIT:
 930         case BPF_PROG_TYPE_SK_SKB:
 931                 if (meta)
 932                         return meta->pkt_access;
 933
 934                 env->seen_direct_write = true;
 935                 return true;
 936         default:
 937                 return false;
 938         }
 939 }
 940
 941 static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
 942                                  int off, int size, bool zero_size_allowed)
 943 {
 944         struct bpf_reg_state *regs = cur_regs(env);
 945         struct bpf_reg_state *reg = &regs[regno];
 946
 947         if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
 948             (u64)off + size > reg->range) {
 949                 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
 950                         off, size, regno, reg->id, reg->off, reg->range);
 951                 return -EACCES;
 952         }
 953         return 0;
 954 }
 955
 956 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 957                                int size, bool zero_size_allowed)
 958 {
 959         struct bpf_reg_state *regs = cur_regs(env);
 960         struct bpf_reg_state *reg = &regs[regno];
 961         int err;
 962
 963         /* We may have added a variable offset to the packet pointer; but any
 964          * reg->range we have comes after that.  We are only checking the fixed
 965          * offset.
 966          */
 967
 968         /* We don't allow negative numbers, because we aren't tracking enough
 969          * detail to prove they're safe.
 970          */
 971         if (reg->smin_value < 0) {
 972                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
 973                         regno);
 974                 return -EACCES;
 975         }
 976         err = __check_packet_access(env, regno, off, size, zero_size_allowed);
 977         if (err) {
 978                 verbose(env, "R%d offset is outside of the packet\n", regno);
 979                 return err;
 980         }
 981         return err;
 982 }
 983
 984 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
 985 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
 986                             enum bpf_access_type t, enum bpf_reg_type *reg_type)
 987 {
 988         struct bpf_insn_access_aux info = {
 989                 .reg_type = *reg_type,
 990         };
 991
 992         if (env->ops->is_valid_access &&
 993             env->ops->is_valid_access(off, size, t, &info)) {
 994                 /* A non zero info.ctx_field_size indicates that this field is a
 995                  * candidate for later verifier transformation to load the whole
 996                  * field and then apply a mask when accessed with a narrower
 997                  * access than actual ctx access size. A zero info.ctx_field_size
 998                  * will only allow for whole field access and rejects any other
 999                  * type of narrower access.
1000                  */
1001                 *reg_type = info.reg_type;
1002
1003                 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
1004                 /* remember the offset of last byte accessed in ctx */
1005                 if (env->prog->aux->max_ctx_offset < off + size)
1006                         env->prog->aux->max_ctx_offset = off + size;
1007                 return 0;
1008         }
1009
1010         verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
1011         return -EACCES;
1012 }
1013
1014 static bool __is_pointer_value(bool allow_ptr_leaks,
1015                                const struct bpf_reg_state *reg)
1016 {
1017         if (allow_ptr_leaks)
1018                 return false;
1019
1020         return reg->type != SCALAR_VALUE;
1021 }
1022
1023 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
1024 {
1025         return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno);
1026 }
1027
1028 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
1029 {
1030         const struct bpf_reg_state *reg = cur_regs(env) + regno;
1031
1032         return reg->type == PTR_TO_CTX;
1033 }
1034
1035 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
1036 {
1037         const struct bpf_reg_state *reg = cur_regs(env) + regno;
1038
1039         return type_is_pkt_pointer(reg->type);
1040 }
1041
1042 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
1043                                    const struct bpf_reg_state *reg,
1044                                    int off, int size, bool strict)
1045 {
1046         struct tnum reg_off;
1047         int ip_align;
1048
1049         /* Byte size accesses are always allowed. */
1050         if (!strict || size == 1)
1051                 return 0;
1052
1053         /* For platforms that do not have a Kconfig enabling
1054          * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
1055          * NET_IP_ALIGN is universally set to '2'.  And on platforms
1056          * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
1057          * to this code only in strict mode where we want to emulate
1058          * the NET_IP_ALIGN==2 checking.  Therefore use an
1059          * unconditional IP align value of '2'.
1060          */
1061         ip_align = 2;
1062
1063         reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
1064         if (!tnum_is_aligned(reg_off, size)) {
1065                 char tn_buf[48];
1066
1067                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1068                 verbose(env,
1069                         "misaligned packet access off %d+%s+%d+%d size %d\n",
1070                         ip_align, tn_buf, reg->off, off, size);
1071                 return -EACCES;
1072         }
1073
1074         return 0;
1075 }
1076
1077 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
1078                                        const struct bpf_reg_state *reg,
1079                                        const char *pointer_desc,
1080                                        int off, int size, bool strict)
1081 {
1082         struct tnum reg_off;
1083
1084         /* Byte size accesses are always allowed. */
1085         if (!strict || size == 1)
1086                 return 0;
1087
1088         reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
1089         if (!tnum_is_aligned(reg_off, size)) {
1090                 char tn_buf[48];
1091
1092                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1093                 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
1094                         pointer_desc, tn_buf, reg->off, off, size);
1095                 return -EACCES;
1096         }
1097
1098         return 0;
1099 }
1100
1101 static int check_ptr_alignment(struct bpf_verifier_env *env,
1102                                const struct bpf_reg_state *reg, int off,
1103                                int size, bool strict_alignment_once)
1104 {
1105         bool strict = env->strict_alignment || strict_alignment_once;
1106         const char *pointer_desc = "";
1107
1108         switch (reg->type) {
1109         case PTR_TO_PACKET:
1110         case PTR_TO_PACKET_META:
1111                 /* Special case, because of NET_IP_ALIGN. Given metadata sits
1112                  * right in front, treat it the very same way.
1113                  */
1114                 return check_pkt_ptr_alignment(env, reg, off, size, strict);
1115         case PTR_TO_MAP_VALUE:
1116                 pointer_desc = "value ";
1117                 break;
1118         case PTR_TO_CTX:
1119                 pointer_desc = "context ";
1120                 break;
1121         case PTR_TO_STACK:
1122                 pointer_desc = "stack ";
1123                 /* The stack spill tracking logic in check_stack_write()
1124                  * and check_stack_read() relies on stack accesses being
1125                  * aligned.
1126                  */
1127                 strict = true;
1128                 break;
1129         default:
1130                 break;
1131         }
1132         return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
1133                                            strict);
1134 }
1135
1136 static int check_ctx_reg(struct bpf_verifier_env *env,
1137                          const struct bpf_reg_state *reg, int regno)
1138 {
1139         /* Access to ctx or passing it to a helper is only allowed in
1140          * its original, unmodified form.
1141          */
1142
1143         if (reg->off) {
1144                 verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
1145                         regno, reg->off);
1146                 return -EACCES;
1147         }
1148
1149         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
1150                 char tn_buf[48];
1151
1152                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1153                 verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
1154                 return -EACCES;
1155         }
1156
1157         return 0;
1158 }
1159
1160 /* truncate register to smaller size (in bytes)
1161  * must be called with size < BPF_REG_SIZE
1162  */
1163 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
1164 {
1165         u64 mask;
1166
1167         /* clear high bits in bit representation */
1168         reg->var_off = tnum_cast(reg->var_off, size);
1169
1170         /* fix arithmetic bounds */
1171         mask = ((u64)1 << (size * 8)) - 1;
1172         if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
1173                 reg->umin_value &= mask;
1174                 reg->umax_value &= mask;
1175         } else {
1176                 reg->umin_value = 0;
1177                 reg->umax_value = mask;
1178         }
1179         reg->smin_value = reg->umin_value;
1180         reg->smax_value = reg->umax_value;
1181 }
1182
1183 /* check whether memory at (regno + off) is accessible for t = (read | write)
1184  * if t==write, value_regno is a register which value is stored into memory
1185  * if t==read, value_regno is a register which will receive the value from memory
1186  * if t==write && value_regno==-1, some unknown value is stored into memory
1187  * if t==read && value_regno==-1, don't care what we read from memory
1188  */
1189 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
1190                             int off, int bpf_size, enum bpf_access_type t,
1191                             int value_regno, bool strict_alignment_once)
1192 {
1193         struct bpf_verifier_state *state = env->cur_state;
1194         struct bpf_reg_state *regs = cur_regs(env);
1195         struct bpf_reg_state *reg = regs + regno;
1196         int size, err = 0;
1197
1198         size = bpf_size_to_bytes(bpf_size);
1199         if (size < 0)
1200                 return size;
1201
1202         /* alignment checks will add in reg->off themselves */
1203         err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
1204         if (err)
1205                 return err;
1206
1207         /* for access checks, reg->off is just part of off */
1208         off += reg->off;
1209
1210         if (reg->type == PTR_TO_MAP_VALUE) {
1211                 if (t == BPF_WRITE && value_regno >= 0 &&
1212                     is_pointer_value(env, value_regno)) {
1213                         verbose(env, "R%d leaks addr into map\n", value_regno);
1214                         return -EACCES;
1215                 }
1216
1217                 err = check_map_access(env, regno, off, size, false);
1218                 if (!err && t == BPF_READ && value_regno >= 0)
1219                         mark_reg_unknown(env, regs, value_regno);
1220
1221         } else if (reg->type == PTR_TO_CTX) {
1222                 enum bpf_reg_type reg_type = SCALAR_VALUE;
1223
1224                 if (t == BPF_WRITE && value_regno >= 0 &&
1225                     is_pointer_value(env, value_regno)) {
1226                         verbose(env, "R%d leaks addr into ctx\n", value_regno);
1227                         return -EACCES;
1228                 }
1229
1230                 err = check_ctx_reg(env, reg, regno);
1231                 if (err < 0)
1232                         return err;
1233
1234                 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
1235                 if (!err && t == BPF_READ && value_regno >= 0) {
1236                         /* ctx access returns either a scalar, or a
1237                          * PTR_TO_PACKET[_META,_END]. In the latter
1238                          * case, we know the offset is zero.
1239                          */
1240                         if (reg_type == SCALAR_VALUE)
1241                                 mark_reg_unknown(env, regs, value_regno);
1242                         else
1243                                 mark_reg_known_zero(env, regs,
1244                                                     value_regno);
1245                         regs[value_regno].id = 0;
1246                         regs[value_regno].off = 0;
1247                         regs[value_regno].range = 0;
1248                         regs[value_regno].type = reg_type;
1249                 }
1250
1251         } else if (reg->type == PTR_TO_STACK) {
1252                 /* stack accesses must be at a fixed offset, so that we can
1253                  * determine what type of data were returned.
1254                  * See check_stack_read().
1255                  */
1256                 if (!tnum_is_const(reg->var_off)) {
1257                         char tn_buf[48];
1258
1259                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1260                         verbose(env, "variable stack access var_off=%s off=%d size=%d",
1261                                 tn_buf, off, size);
1262                         return -EACCES;
1263                 }
1264                 off += reg->var_off.value;
1265                 if (off >= 0 || off < -MAX_BPF_STACK) {
1266                         verbose(env, "invalid stack off=%d size=%d\n", off,
1267                                 size);
1268                         return -EACCES;
1269                 }
1270
1271                 if (env->prog->aux->stack_depth < -off)
1272                         env->prog->aux->stack_depth = -off;
1273
1274                 if (t == BPF_WRITE)
1275                         err = check_stack_write(env, state, off, size,
1276                                                 value_regno, insn_idx);
1277                 else
1278                         err = check_stack_read(env, state, off, size,
1279                                                value_regno);
1280         } else if (reg_is_pkt_pointer(reg)) {
1281                 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
1282                         verbose(env, "cannot write into packet\n");
1283                         return -EACCES;
1284                 }
1285                 if (t == BPF_WRITE && value_regno >= 0 &&
1286                     is_pointer_value(env, value_regno)) {
1287                         verbose(env, "R%d leaks addr into packet\n",
1288                                 value_regno);
1289                         return -EACCES;
1290                 }
1291                 err = check_packet_access(env, regno, off, size, false);
1292                 if (!err && t == BPF_READ && value_regno >= 0)
1293                         mark_reg_unknown(env, regs, value_regno);
1294         } else {
1295                 verbose(env, "R%d invalid mem access '%s'\n", regno,
1296                         reg_type_str[reg->type]);
1297                 return -EACCES;
1298         }
1299
1300         if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
1301             regs[value_regno].type == SCALAR_VALUE) {
1302                 /* b/h/w load zero-extends, mark upper bits as known 0 */
1303                 coerce_reg_to_size(&regs[value_regno], size);
1304         }
1305         return err;
1306 }
1307
1308 static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
1309 {
1310         int err;
1311
1312         if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
1313             insn->imm != 0) {
1314                 verbose(env, "BPF_XADD uses reserved fields\n");
1315                 return -EINVAL;
1316         }
1317
1318         /* check src1 operand */
1319         err = check_reg_arg(env, insn->src_reg, SRC_OP);
1320         if (err)
1321                 return err;
1322
1323         /* check src2 operand */
1324         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
1325         if (err)
1326                 return err;
1327
1328         if (is_pointer_value(env, insn->src_reg)) {
1329                 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
1330                 return -EACCES;
1331         }
1332
1333         if (is_ctx_reg(env, insn->dst_reg) ||
1334             is_pkt_reg(env, insn->dst_reg)) {
1335                 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
1336                         insn->dst_reg, is_ctx_reg(env, insn->dst_reg) ?
1337                         "context" : "packet");
1338                 return -EACCES;
1339         }
1340
1341         /* check whether atomic_add can read the memory */
1342         err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
1343                                BPF_SIZE(insn->code), BPF_READ, -1, true);
1344         if (err)
1345                 return err;
1346
1347         /* check whether atomic_add can write into the same memory */
1348         return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
1349                                 BPF_SIZE(insn->code), BPF_WRITE, -1, true);
1350 }
1351
1352 /* Does this register contain a constant zero? */
1353 static bool register_is_null(struct bpf_reg_state reg)
1354 {
1355         return reg.type == SCALAR_VALUE && tnum_equals_const(reg.var_off, 0);
1356 }
1357
1358 /* when register 'regno' is passed into function that will read 'access_size'
1359  * bytes from that pointer, make sure that it's within stack boundary
1360  * and all elements of stack are initialized.
1361  * Unlike most pointer bounds-checking functions, this one doesn't take an
1362  * 'off' argument, so it has to add in reg->off itself.
1363  */
1364 static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
1365                                 int access_size, bool zero_size_allowed,
1366                                 struct bpf_call_arg_meta *meta)
1367 {
1368         struct bpf_verifier_state *state = env->cur_state;
1369         struct bpf_reg_state *regs = state->regs;
1370         int off, i, slot, spi;
1371
1372         if (regs[regno].type != PTR_TO_STACK) {
1373                 /* Allow zero-byte read from NULL, regardless of pointer type */
1374                 if (zero_size_allowed && access_size == 0 &&
1375                     register_is_null(regs[regno]))
1376                         return 0;
1377
1378                 verbose(env, "R%d type=%s expected=%s\n", regno,
1379                         reg_type_str[regs[regno].type],
1380                         reg_type_str[PTR_TO_STACK]);
1381                 return -EACCES;
1382         }
1383
1384         /* Only allow fixed-offset stack reads */
1385         if (!tnum_is_const(regs[regno].var_off)) {
1386                 char tn_buf[48];
1387
1388                 tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off);
1389                 verbose(env, "invalid variable stack read R%d var_off=%s\n",
1390                         regno, tn_buf);
1391                 return -EACCES;
1392         }
1393         off = regs[regno].off + regs[regno].var_off.value;
1394         if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
1395             access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
1396                 verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
1397                         regno, off, access_size);
1398                 return -EACCES;
1399         }
1400
1401         if (env->prog->aux->stack_depth < -off)
1402                 env->prog->aux->stack_depth = -off;
1403
1404         if (meta && meta->raw_mode) {
1405                 meta->access_size = access_size;
1406                 meta->regno = regno;
1407                 return 0;
1408         }
1409
1410         for (i = 0; i < access_size; i++) {
1411                 slot = -(off + i) - 1;
1412                 spi = slot / BPF_REG_SIZE;
1413                 if (state->allocated_stack <= slot ||
1414                     state->stack[spi].slot_type[slot % BPF_REG_SIZE] !=
1415                         STACK_MISC) {
1416                         verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
1417                                 off, i, access_size);
1418                         return -EACCES;
1419                 }
1420         }
1421         return 0;
1422 }
1423
1424 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
1425                                    int access_size, bool zero_size_allowed,
1426                                    struct bpf_call_arg_meta *meta)
1427 {
1428         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
1429
1430         switch (reg->type) {
1431         case PTR_TO_PACKET:
1432         case PTR_TO_PACKET_META:
1433                 return check_packet_access(env, regno, reg->off, access_size,
1434                                            zero_size_allowed);
1435         case PTR_TO_MAP_VALUE:
1436                 return check_map_access(env, regno, reg->off, access_size,
1437                                         zero_size_allowed);
1438         default: /* scalar_value|ptr_to_stack or invalid ptr */
1439                 return check_stack_boundary(env, regno, access_size,
1440                                             zero_size_allowed, meta);
1441         }
1442 }
1443
1444 static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
1445                           enum bpf_arg_type arg_type,
1446                           struct bpf_call_arg_meta *meta)
1447 {
1448         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
1449         enum bpf_reg_type expected_type, type = reg->type;
1450         int err = 0;
1451
1452         if (arg_type == ARG_DONTCARE)
1453                 return 0;
1454
1455         err = check_reg_arg(env, regno, SRC_OP);
1456         if (err)
1457                 return err;
1458
1459         if (arg_type == ARG_ANYTHING) {
1460                 if (is_pointer_value(env, regno)) {
1461                         verbose(env, "R%d leaks addr into helper function\n",
1462                                 regno);
1463                         return -EACCES;
1464                 }
1465                 return 0;
1466         }
1467
1468         if (type_is_pkt_pointer(type) &&
1469             !may_access_direct_pkt_data(env, meta, BPF_READ)) {
1470                 verbose(env, "helper access to the packet is not allowed\n");
1471                 return -EACCES;
1472         }
1473
1474         if (arg_type == ARG_PTR_TO_MAP_KEY ||
1475             arg_type == ARG_PTR_TO_MAP_VALUE) {
1476                 expected_type = PTR_TO_STACK;
1477                 if (!type_is_pkt_pointer(type) &&
1478                     type != expected_type)
1479                         goto err_type;
1480         } else if (arg_type == ARG_CONST_SIZE ||
1481                    arg_type == ARG_CONST_SIZE_OR_ZERO) {
1482                 expected_type = SCALAR_VALUE;
1483                 if (type != expected_type)
1484                         goto err_type;
1485         } else if (arg_type == ARG_CONST_MAP_PTR) {
1486                 expected_type = CONST_PTR_TO_MAP;
1487                 if (type != expected_type)
1488                         goto err_type;
1489         } else if (arg_type == ARG_PTR_TO_CTX) {
1490                 expected_type = PTR_TO_CTX;
1491                 if (type != expected_type)
1492                         goto err_type;
1493                 err = check_ctx_reg(env, reg, regno);
1494                 if (err < 0)
1495                         return err;
1496         } else if (arg_type == ARG_PTR_TO_MEM ||
1497                    arg_type == ARG_PTR_TO_MEM_OR_NULL ||
1498                    arg_type == ARG_PTR_TO_UNINIT_MEM) {
1499                 expected_type = PTR_TO_STACK;
1500                 /* One exception here. In case function allows for NULL to be
1501                  * passed in as argument, it's a SCALAR_VALUE type. Final test
1502                  * happens during stack boundary checking.
1503                  */
1504                 if (register_is_null(*reg) &&
1505                     arg_type == ARG_PTR_TO_MEM_OR_NULL)
1506                         /* final test in check_stack_boundary() */;
1507                 else if (!type_is_pkt_pointer(type) &&
1508                          type != PTR_TO_MAP_VALUE &&
1509                          type != expected_type)
1510                         goto err_type;
1511                 meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
1512         } else {
1513                 verbose(env, "unsupported arg_type %d\n", arg_type);
1514                 return -EFAULT;
1515         }
1516
1517         if (arg_type == ARG_CONST_MAP_PTR) {
1518                 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
1519                 meta->map_ptr = reg->map_ptr;
1520         } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
1521                 /* bpf_map_xxx(..., map_ptr, ..., key) call:
1522                  * check that [key, key + map->key_size) are within
1523                  * stack limits and initialized
1524                  */
1525                 if (!meta->map_ptr) {
1526                         /* in function declaration map_ptr must come before
1527                          * map_key, so that it's verified and known before
1528                          * we have to check map_key here. Otherwise it means
1529                          * that kernel subsystem misconfigured verifier
1530                          */
1531                         verbose(env, "invalid map_ptr to access map->key\n");
1532                         return -EACCES;
1533                 }
1534                 if (type_is_pkt_pointer(type))
1535                         err = check_packet_access(env, regno, reg->off,
1536                                                   meta->map_ptr->key_size,
1537                                                   false);
1538                 else
1539                         err = check_stack_boundary(env, regno,
1540                                                    meta->map_ptr->key_size,
1541                                                    false, NULL);
1542         } else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
1543                 /* bpf_map_xxx(..., map_ptr, ..., value) call:
1544                  * check [value, value + map->value_size) validity
1545                  */
1546                 if (!meta->map_ptr) {
1547                         /* kernel subsystem misconfigured verifier */
1548                         verbose(env, "invalid map_ptr to access map->value\n");
1549                         return -EACCES;
1550                 }
1551                 if (type_is_pkt_pointer(type))
1552                         err = check_packet_access(env, regno, reg->off,
1553                                                   meta->map_ptr->value_size,
1554                                                   false);
1555                 else
1556                         err = check_stack_boundary(env, regno,
1557                                                    meta->map_ptr->value_size,
1558                                                    false, NULL);
1559         } else if (arg_type == ARG_CONST_SIZE ||
1560                    arg_type == ARG_CONST_SIZE_OR_ZERO) {
1561                 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
1562
1563                 /* bpf_xxx(..., buf, len) call will access 'len' bytes
1564                  * from stack pointer 'buf'. Check it
1565                  * note: regno == len, regno - 1 == buf
1566                  */
1567                 if (regno == 0) {
1568                         /* kernel subsystem misconfigured verifier */
1569                         verbose(env,
1570                                 "ARG_CONST_SIZE cannot be first argument\n");
1571                         return -EACCES;
1572                 }
1573
1574                 /* The register is SCALAR_VALUE; the access check
1575                  * happens using its boundaries.
1576                  */
1577
1578                 if (!tnum_is_const(reg->var_off))
1579                         /* For unprivileged variable accesses, disable raw
1580                          * mode so that the program is required to
1581                          * initialize all the memory that the helper could
1582                          * just partially fill up.
1583                          */
1584                         meta = NULL;
1585
1586                 if (reg->smin_value < 0) {
1587                         verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
1588                                 regno);
1589                         return -EACCES;
1590                 }
1591
1592                 if (reg->umin_value == 0) {
1593                         err = check_helper_mem_access(env, regno - 1, 0,
1594                                                       zero_size_allowed,
1595                                                       meta);
1596                         if (err)
1597                                 return err;
1598                 }
1599
1600                 if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
1601                         verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
1602                                 regno);
1603                         return -EACCES;
1604                 }
1605                 err = check_helper_mem_access(env, regno - 1,
1606                                               reg->umax_value,
1607                                               zero_size_allowed, meta);
1608         }
1609
1610         return err;
1611 err_type:
1612         verbose(env, "R%d type=%s expected=%s\n", regno,
1613                 reg_type_str[type], reg_type_str[expected_type]);
1614         return -EACCES;
1615 }
1616
1617 static int check_map_func_compatibility(struct bpf_verifier_env *env,
1618                                         struct bpf_map *map, int func_id)
1619 {
1620         if (!map)
1621                 return 0;
1622
1623         /* We need a two way check, first is from map perspective ... */
1624         switch (map->map_type) {
1625         case BPF_MAP_TYPE_PROG_ARRAY:
1626                 if (func_id != BPF_FUNC_tail_call)
1627                         goto error;
1628                 break;
1629         case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
1630                 if (func_id != BPF_FUNC_perf_event_read &&
1631                     func_id != BPF_FUNC_perf_event_output &&
1632                     func_id != BPF_FUNC_perf_event_read_value)
1633                         goto error;
1634                 break;
1635         case BPF_MAP_TYPE_STACK_TRACE:
1636                 if (func_id != BPF_FUNC_get_stackid)
1637                         goto error;
1638                 break;
1639         case BPF_MAP_TYPE_CGROUP_ARRAY:
1640                 if (func_id != BPF_FUNC_skb_under_cgroup &&
1641                     func_id != BPF_FUNC_current_task_under_cgroup)
1642                         goto error;
1643                 break;
1644         /* devmap returns a pointer to a live net_device ifindex that we cannot
1645          * allow to be modified from bpf side. So do not allow lookup elements
1646          * for now.
1647          */
1648         case BPF_MAP_TYPE_DEVMAP:
1649                 if (func_id != BPF_FUNC_redirect_map)
1650                         goto error;
1651                 break;
1652         /* Restrict bpf side of cpumap, open when use-cases appear */
1653         case BPF_MAP_TYPE_CPUMAP:
1654                 if (func_id != BPF_FUNC_redirect_map)
1655                         goto error;
1656                 break;
1657         case BPF_MAP_TYPE_ARRAY_OF_MAPS:
1658         case BPF_MAP_TYPE_HASH_OF_MAPS:
1659                 if (func_id != BPF_FUNC_map_lookup_elem)
1660                         goto error;
1661                 break;
1662         case BPF_MAP_TYPE_SOCKMAP:
1663                 if (func_id != BPF_FUNC_sk_redirect_map &&
1664                     func_id != BPF_FUNC_sock_map_update &&
1665                     func_id != BPF_FUNC_map_delete_elem)
1666                         goto error;
1667                 break;
1668         default:
1669                 break;
1670         }
1671
1672         /* ... and second from the function itself. */
1673         switch (func_id) {
1674         case BPF_FUNC_tail_call:
1675                 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
1676                         goto error;
1677                 break;
1678         case BPF_FUNC_perf_event_read:
1679         case BPF_FUNC_perf_event_output:
1680         case BPF_FUNC_perf_event_read_value:
1681                 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
1682                         goto error;
1683                 break;
1684         case BPF_FUNC_get_stackid:
1685                 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
1686                         goto error;
1687                 break;
1688         case BPF_FUNC_current_task_under_cgroup:
1689         case BPF_FUNC_skb_under_cgroup:
1690                 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
1691                         goto error;
1692                 break;
1693         case BPF_FUNC_redirect_map:
1694                 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
1695                     map->map_type != BPF_MAP_TYPE_CPUMAP)
1696                         goto error;
1697                 break;
1698         case BPF_FUNC_sk_redirect_map:
1699                 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
1700                         goto error;
1701                 break;
1702         case BPF_FUNC_sock_map_update:
1703                 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
1704                         goto error;
1705                 break;
1706         default:
1707                 break;
1708         }
1709
1710         return 0;
1711 error:
1712         verbose(env, "cannot pass map_type %d into func %s#%d\n",
1713                 map->map_type, func_id_name(func_id), func_id);
1714         return -EINVAL;
1715 }
1716
1717 static int check_raw_mode(const struct bpf_func_proto *fn)
1718 {
1719         int count = 0;
1720
1721         if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
1722                 count++;
1723         if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
1724                 count++;
1725         if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
1726                 count++;
1727         if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
1728                 count++;
1729         if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
1730                 count++;
1731
1732         return count > 1 ? -EINVAL : 0;
1733 }
1734
1735 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
1736  * are now invalid, so turn them into unknown SCALAR_VALUE.
1737  */
1738 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
1739 {
1740         struct bpf_verifier_state *state = env->cur_state;
1741         struct bpf_reg_state *regs = state->regs, *reg;
1742         int i;
1743
1744         for (i = 0; i < MAX_BPF_REG; i++)
1745                 if (reg_is_pkt_pointer_any(&regs[i]))
1746                         mark_reg_unknown(env, regs, i);
1747
1748         for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
1749                 if (state->stack[i].slot_type[0] != STACK_SPILL)
1750                         continue;
1751                 reg = &state->stack[i].spilled_ptr;
1752                 if (reg_is_pkt_pointer_any(reg))
1753                         __mark_reg_unknown(reg);
1754         }
1755 }
1756
1757 static int
1758 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
1759                 int func_id, int insn_idx)
1760 {
1761         struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
1762
1763         if (func_id != BPF_FUNC_tail_call &&
1764             func_id != BPF_FUNC_map_lookup_elem)
1765                 return 0;
1766         if (meta->map_ptr == NULL) {
1767                 verbose(env, "kernel subsystem misconfigured verifier\n");
1768                 return -EINVAL;
1769         }
1770
1771         if (!BPF_MAP_PTR(aux->map_state))
1772                 bpf_map_ptr_store(aux, meta->map_ptr,
1773                                   meta->map_ptr->unpriv_array);
1774         else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr)
1775                 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
1776                                   meta->map_ptr->unpriv_array);
1777         return 0;
1778 }
1779
1780 static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
1781 {
1782         const struct bpf_func_proto *fn = NULL;
1783         struct bpf_reg_state *regs;
1784         struct bpf_call_arg_meta meta;
1785         bool changes_data;
1786         int i, err;
1787
1788         /* find function prototype */
1789         if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
1790                 verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
1791                         func_id);
1792                 return -EINVAL;
1793         }
1794
1795         if (env->ops->get_func_proto)
1796                 fn = env->ops->get_func_proto(func_id);
1797
1798         if (!fn) {
1799                 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
1800                         func_id);
1801                 return -EINVAL;
1802         }
1803
1804         /* eBPF programs must be GPL compatible to use GPL-ed functions */
1805         if (!env->prog->gpl_compatible && fn->gpl_only) {
1806                 verbose(env, "cannot call GPL only function from proprietary program\n");
1807                 return -EINVAL;
1808         }
1809
1810         /* With LD_ABS/IND some JITs save/restore skb from r1. */
1811         changes_data = bpf_helper_changes_pkt_data(fn->func);
1812         if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
1813                 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
1814                         func_id_name(func_id), func_id);
1815                 return -EINVAL;
1816         }
1817
1818         memset(&meta, 0, sizeof(meta));
1819         meta.pkt_access = fn->pkt_access;
1820
1821         /* We only support one arg being in raw mode at the moment, which
1822          * is sufficient for the helper functions we have right now.
1823          */
1824         err = check_raw_mode(fn);
1825         if (err) {
1826                 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
1827                         func_id_name(func_id), func_id);
1828                 return err;
1829         }
1830
1831         /* check args */
1832         err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
1833         if (err)
1834                 return err;
1835         err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
1836         if (err)
1837                 return err;
1838         err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
1839         if (err)
1840                 return err;
1841         err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
1842         if (err)
1843                 return err;
1844         err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
1845         if (err)
1846                 return err;
1847
1848         err = record_func_map(env, &meta, func_id, insn_idx);
1849         if (err)
1850                 return err;
1851
1852         /* Mark slots with STACK_MISC in case of raw mode, stack offset
1853          * is inferred from register state.
1854          */
1855         for (i = 0; i < meta.access_size; i++) {
1856                 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
1857                                        BPF_WRITE, -1, false);
1858                 if (err)
1859                         return err;
1860         }
1861
1862         regs = cur_regs(env);
1863         /* reset caller saved regs */
1864         for (i = 0; i < CALLER_SAVED_REGS; i++) {
1865                 mark_reg_not_init(env, regs, caller_saved[i]);
1866                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
1867         }
1868
1869         /* update return register (already marked as written above) */
1870         if (fn->ret_type == RET_INTEGER) {
1871                 /* sets type to SCALAR_VALUE */
1872                 mark_reg_unknown(env, regs, BPF_REG_0);
1873         } else if (fn->ret_type == RET_VOID) {
1874                 regs[BPF_REG_0].type = NOT_INIT;
1875         } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
1876                 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
1877                 /* There is no offset yet applied, variable or fixed */
1878                 mark_reg_known_zero(env, regs, BPF_REG_0);
1879                 regs[BPF_REG_0].off = 0;
1880                 /* remember map_ptr, so that check_map_access()
1881                  * can check 'value_size' boundary of memory access
1882                  * to map element returned from bpf_map_lookup_elem()
1883                  */
1884                 if (meta.map_ptr == NULL) {
1885                         verbose(env,
1886                                 "kernel subsystem misconfigured verifier\n");
1887                         return -EINVAL;
1888                 }
1889                 regs[BPF_REG_0].map_ptr = meta.map_ptr;
1890                 regs[BPF_REG_0].id = ++env->id_gen;
1891         } else {
1892                 verbose(env, "unknown return type %d of func %s#%d\n",
1893                         fn->ret_type, func_id_name(func_id), func_id);
1894                 return -EINVAL;
1895         }
1896
1897         err = check_map_func_compatibility(env, meta.map_ptr, func_id);
1898         if (err)
1899                 return err;
1900
1901         if (changes_data)
1902                 clear_all_pkt_pointers(env);
1903         return 0;
1904 }
1905
1906 static bool signed_add_overflows(s64 a, s64 b)
1907 {
1908         /* Do the add in u64, where overflow is well-defined */
1909         s64 res = (s64)((u64)a + (u64)b);
1910
1911         if (b < 0)
1912                 return res > a;
1913         return res < a;
1914 }
1915
1916 static bool signed_sub_overflows(s64 a, s64 b)
1917 {
1918         /* Do the sub in u64, where overflow is well-defined */
1919         s64 res = (s64)((u64)a - (u64)b);
1920
1921         if (b < 0)
1922                 return res < a;
1923         return res > a;
1924 }
1925
1926 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
1927                                   const struct bpf_reg_state *reg,
1928                                   enum bpf_reg_type type)
1929 {
1930         bool known = tnum_is_const(reg->var_off);
1931         s64 val = reg->var_off.value;
1932         s64 smin = reg->smin_value;
1933
1934         if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
1935                 verbose(env, "math between %s pointer and %lld is not allowed\n",
1936                         reg_type_str[type], val);
1937                 return false;
1938         }
1939
1940         if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
1941                 verbose(env, "%s pointer offset %d is not allowed\n",
1942                         reg_type_str[type], reg->off);
1943                 return false;
1944         }
1945
1946         if (smin == S64_MIN) {
1947                 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
1948                         reg_type_str[type]);
1949                 return false;
1950         }
1951
1952         if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
1953                 verbose(env, "value %lld makes %s pointer be out of bounds\n",
1954                         smin, reg_type_str[type]);
1955                 return false;
1956         }
1957
1958         return true;
1959 }
1960
1961 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
1962  * Caller should also handle BPF_MOV case separately.
1963  * If we return -EACCES, caller may want to try again treating pointer as a
1964  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
1965  */
1966 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1967                                    struct bpf_insn *insn,
1968                                    const struct bpf_reg_state *ptr_reg,
1969                                    const struct bpf_reg_state *off_reg)
1970 {
1971         struct bpf_reg_state *regs = cur_regs(env), *dst_reg;
1972         bool known = tnum_is_const(off_reg->var_off);
1973         s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
1974             smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
1975         u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
1976             umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
1977         u8 opcode = BPF_OP(insn->code);
1978         u32 dst = insn->dst_reg;
1979
1980         dst_reg = &regs[dst];
1981
1982         if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
1983             smin_val > smax_val || umin_val > umax_val) {
1984                 /* Taint dst register if offset had invalid bounds derived from
1985                  * e.g. dead branches.
1986                  */
1987                 __mark_reg_unknown(dst_reg);
1988                 return 0;
1989         }
1990
1991         if (BPF_CLASS(insn->code) != BPF_ALU64) {
1992                 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
1993                 verbose(env,
1994                         "R%d 32-bit pointer arithmetic prohibited\n",
1995                         dst);
1996                 return -EACCES;
1997         }
1998
1999         if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
2000                 verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
2001                         dst);
2002                 return -EACCES;
2003         }
2004         if (ptr_reg->type == CONST_PTR_TO_MAP) {
2005                 verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
2006                         dst);
2007                 return -EACCES;
2008         }
2009         if (ptr_reg->type == PTR_TO_PACKET_END) {
2010                 verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
2011                         dst);
2012                 return -EACCES;
2013         }
2014
2015         /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
2016          * The id may be overwritten later if we create a new variable offset.
2017          */
2018         dst_reg->type = ptr_reg->type;
2019         dst_reg->id = ptr_reg->id;
2020
2021         if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
2022             !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
2023                 return -EINVAL;
2024
2025         switch (opcode) {
2026         case BPF_ADD:
2027                 /* We can take a fixed offset as long as it doesn't overflow
2028                  * the s32 'off' field
2029                  */
2030                 if (known && (ptr_reg->off + smin_val ==
2031                               (s64)(s32)(ptr_reg->off + smin_val))) {
2032                         /* pointer += K.  Accumulate it into fixed offset */
2033                         dst_reg->smin_value = smin_ptr;
2034                         dst_reg->smax_value = smax_ptr;
2035                         dst_reg->umin_value = umin_ptr;
2036                         dst_reg->umax_value = umax_ptr;
2037                         dst_reg->var_off = ptr_reg->var_off;
2038                         dst_reg->off = ptr_reg->off + smin_val;
2039                         dst_reg->range = ptr_reg->range;
2040                         break;
2041                 }
2042                 /* A new variable offset is created.  Note that off_reg->off
2043                  * == 0, since it's a scalar.
2044                  * dst_reg gets the pointer type and since some positive
2045                  * integer value was added to the pointer, give it a new 'id'
2046                  * if it's a PTR_TO_PACKET.
2047                  * this creates a new 'base' pointer, off_reg (variable) gets
2048                  * added into the variable offset, and we copy the fixed offset
2049                  * from ptr_reg.
2050                  */
2051                 if (signed_add_overflows(smin_ptr, smin_val) ||
2052                     signed_add_overflows(smax_ptr, smax_val)) {
2053                         dst_reg->smin_value = S64_MIN;
2054                         dst_reg->smax_value = S64_MAX;
2055                 } else {
2056                         dst_reg->smin_value = smin_ptr + smin_val;
2057                         dst_reg->smax_value = smax_ptr + smax_val;
2058                 }
2059                 if (umin_ptr + umin_val < umin_ptr ||
2060                     umax_ptr + umax_val < umax_ptr) {
2061                         dst_reg->umin_value = 0;
2062                         dst_reg->umax_value = U64_MAX;
2063                 } else {
2064                         dst_reg->umin_value = umin_ptr + umin_val;
2065                         dst_reg->umax_value = umax_ptr + umax_val;
2066                 }
2067                 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
2068                 dst_reg->off = ptr_reg->off;
2069                 if (reg_is_pkt_pointer(ptr_reg)) {
2070                         dst_reg->id = ++env->id_gen;
2071                         /* something was added to pkt_ptr, set range to zero */
2072                         dst_reg->range = 0;
2073                 }
2074                 break;
2075         case BPF_SUB:
2076                 if (dst_reg == off_reg) {
2077                         /* scalar -= pointer.  Creates an unknown scalar */
2078                         verbose(env, "R%d tried to subtract pointer from scalar\n",
2079                                 dst);
2080                         return -EACCES;
2081                 }
2082                 /* We don't allow subtraction from FP, because (according to
2083                  * test_verifier.c test "invalid fp arithmetic", JITs might not
2084                  * be able to deal with it.
2085                  */
2086                 if (ptr_reg->type == PTR_TO_STACK) {
2087                         verbose(env, "R%d subtraction from stack pointer prohibited\n",
2088                                 dst);
2089                         return -EACCES;
2090                 }
2091                 if (known && (ptr_reg->off - smin_val ==
2092                               (s64)(s32)(ptr_reg->off - smin_val))) {
2093                         /* pointer -= K.  Subtract it from fixed offset */
2094                         dst_reg->smin_value = smin_ptr;
2095                         dst_reg->smax_value = smax_ptr;
2096                         dst_reg->umin_value = umin_ptr;
2097                         dst_reg->umax_value = umax_ptr;
2098                         dst_reg->var_off = ptr_reg->var_off;
2099                         dst_reg->id = ptr_reg->id;
2100                         dst_reg->off = ptr_reg->off - smin_val;
2101                         dst_reg->range = ptr_reg->range;
2102                         break;
2103                 }
2104                 /* A new variable offset is created.  If the subtrahend is known
2105                  * nonnegative, then any reg->range we had before is still good.
2106                  */
2107                 if (signed_sub_overflows(smin_ptr, smax_val) ||
2108                     signed_sub_overflows(smax_ptr, smin_val)) {
2109                         /* Overflow possible, we know nothing */
2110                         dst_reg->smin_value = S64_MIN;
2111                         dst_reg->smax_value = S64_MAX;
2112                 } else {
2113                         dst_reg->smin_value = smin_ptr - smax_val;
2114                         dst_reg->smax_value = smax_ptr - smin_val;
2115                 }
2116                 if (umin_ptr < umax_val) {
2117                         /* Overflow possible, we know nothing */
2118                         dst_reg->umin_value = 0;
2119                         dst_reg->umax_value = U64_MAX;
2120                 } else {
2121                         /* Cannot overflow (as long as bounds are consistent) */
2122                         dst_reg->umin_value = umin_ptr - umax_val;
2123                         dst_reg->umax_value = umax_ptr - umin_val;
2124                 }
2125                 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
2126                 dst_reg->off = ptr_reg->off;
2127                 if (reg_is_pkt_pointer(ptr_reg)) {
2128                         dst_reg->id = ++env->id_gen;
2129                         /* something was added to pkt_ptr, set range to zero */
2130                         if (smin_val < 0)
2131                                 dst_reg->range = 0;
2132                 }
2133                 break;
2134         case BPF_AND:
2135         case BPF_OR:
2136         case BPF_XOR:
2137                 /* bitwise ops on pointers are troublesome, prohibit. */
2138                 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
2139                         dst, bpf_alu_string[opcode >> 4]);
2140                 return -EACCES;
2141         default:
2142                 /* other operators (e.g. MUL,LSH) produce non-pointer results */
2143                 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
2144                         dst, bpf_alu_string[opcode >> 4]);
2145                 return -EACCES;
2146         }
2147
2148         if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
2149                 return -EINVAL;
2150
2151         __update_reg_bounds(dst_reg);
2152         __reg_deduce_bounds(dst_reg);
2153         __reg_bound_offset(dst_reg);
2154
2155         /* For unprivileged we require that resulting offset must be in bounds
2156          * in order to be able to sanitize access later on.
2157          */
2158         if (!env->allow_ptr_leaks && dst_reg->type == PTR_TO_MAP_VALUE &&
2159             check_map_access(env, dst, dst_reg->off, 1, false)) {
2160                 verbose(env, "R%d pointer arithmetic of map value goes out of range, prohibited for !root\n",
2161                         dst);
2162                 return -EACCES;
2163         }
2164
2165         return 0;
2166 }
2167
2168 /* WARNING: This function does calculations on 64-bit values, but the actual
2169  * execution may occur on 32-bit values. Therefore, things like bitshifts
2170  * need extra checks in the 32-bit case.
2171  */
2172 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2173                                       struct bpf_insn *insn,
2174                                       struct bpf_reg_state *dst_reg,
2175                                       struct bpf_reg_state src_reg)
2176 {
2177         struct bpf_reg_state *regs = cur_regs(env);
2178         u8 opcode = BPF_OP(insn->code);
2179         bool src_known, dst_known;
2180         s64 smin_val, smax_val;
2181         u64 umin_val, umax_val;
2182         u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
2183
2184         if (insn_bitness == 32) {
2185                 /* Relevant for 32-bit RSH: Information can propagate towards
2186                  * LSB, so it isn't sufficient to only truncate the output to
2187                  * 32 bits.
2188                  */
2189                 coerce_reg_to_size(dst_reg, 4);
2190                 coerce_reg_to_size(&src_reg, 4);
2191         }
2192
2193         smin_val = src_reg.smin_value;
2194         smax_val = src_reg.smax_value;
2195         umin_val = src_reg.umin_value;
2196         umax_val = src_reg.umax_value;
2197         src_known = tnum_is_const(src_reg.var_off);
2198         dst_known = tnum_is_const(dst_reg->var_off);
2199
2200         if ((src_known && (smin_val != smax_val || umin_val != umax_val)) ||
2201             smin_val > smax_val || umin_val > umax_val) {
2202                 /* Taint dst register if offset had invalid bounds derived from
2203                  * e.g. dead branches.
2204                  */
2205                 __mark_reg_unknown(dst_reg);
2206                 return 0;
2207         }
2208
2209         if (!src_known &&
2210             opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
2211                 __mark_reg_unknown(dst_reg);
2212                 return 0;
2213         }
2214
2215         switch (opcode) {
2216         case BPF_ADD:
2217                 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
2218                     signed_add_overflows(dst_reg->smax_value, smax_val)) {
2219                         dst_reg->smin_value = S64_MIN;
2220                         dst_reg->smax_value = S64_MAX;
2221                 } else {
2222                         dst_reg->smin_value += smin_val;
2223                         dst_reg->smax_value += smax_val;
2224                 }
2225                 if (dst_reg->umin_value + umin_val < umin_val ||
2226                     dst_reg->umax_value + umax_val < umax_val) {
2227                         dst_reg->umin_value = 0;
2228                         dst_reg->umax_value = U64_MAX;
2229                 } else {
2230                         dst_reg->umin_value += umin_val;
2231                         dst_reg->umax_value += umax_val;
2232                 }
2233                 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
2234                 break;
2235         case BPF_SUB:
2236                 if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
2237                     signed_sub_overflows(dst_reg->smax_value, smin_val)) {
2238                         /* Overflow possible, we know nothing */
2239                         dst_reg->smin_value = S64_MIN;
2240                         dst_reg->smax_value = S64_MAX;
2241                 } else {
2242                         dst_reg->smin_value -= smax_val;
2243                         dst_reg->smax_value -= smin_val;
2244                 }
2245                 if (dst_reg->umin_value < umax_val) {
2246                         /* Overflow possible, we know nothing */
2247                         dst_reg->umin_value = 0;
2248                         dst_reg->umax_value = U64_MAX;
2249                 } else {
2250                         /* Cannot overflow (as long as bounds are consistent) */
2251                         dst_reg->umin_value -= umax_val;
2252                         dst_reg->umax_value -= umin_val;
2253                 }
2254                 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
2255                 break;
2256         case BPF_MUL:
2257                 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
2258                 if (smin_val < 0 || dst_reg->smin_value < 0) {
2259                         /* Ain't nobody got time to multiply that sign */
2260                         __mark_reg_unbounded(dst_reg);
2261                         __update_reg_bounds(dst_reg);
2262                         break;
2263                 }
2264                 /* Both values are positive, so we can work with unsigned and
2265                  * copy the result to signed (unless it exceeds S64_MAX).
2266                  */
2267                 if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
2268                         /* Potential overflow, we know nothing */
2269                         __mark_reg_unbounded(dst_reg);
2270                         /* (except what we can learn from the var_off) */
2271                         __update_reg_bounds(dst_reg);
2272                         break;
2273                 }
2274                 dst_reg->umin_value *= umin_val;
2275                 dst_reg->umax_value *= umax_val;
2276                 if (dst_reg->umax_value > S64_MAX) {
2277                         /* Overflow possible, we know nothing */
2278                         dst_reg->smin_value = S64_MIN;
2279                         dst_reg->smax_value = S64_MAX;
2280                 } else {
2281                         dst_reg->smin_value = dst_reg->umin_value;
2282                         dst_reg->smax_value = dst_reg->umax_value;
2283                 }
2284                 break;
2285         case BPF_AND:
2286                 if (src_known && dst_known) {
2287                         __mark_reg_known(dst_reg, dst_reg->var_off.value &
2288                                                   src_reg.var_off.value);
2289                         break;
2290                 }
2291                 /* We get our minimum from the var_off, since that's inherently
2292                  * bitwise.  Our maximum is the minimum of the operands' maxima.
2293                  */
2294                 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
2295                 dst_reg->umin_value = dst_reg->var_off.value;
2296                 dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
2297                 if (dst_reg->smin_value < 0 || smin_val < 0) {
2298                         /* Lose signed bounds when ANDing negative numbers,
2299                          * ain't nobody got time for that.
2300                          */
2301                         dst_reg->smin_value = S64_MIN;
2302                         dst_reg->smax_value = S64_MAX;
2303                 } else {
2304                         /* ANDing two positives gives a positive, so safe to
2305                          * cast result into s64.
2306                          */
2307                         dst_reg->smin_value = dst_reg->umin_value;
2308                         dst_reg->smax_value = dst_reg->umax_value;
2309                 }
2310                 /* We may learn something more from the var_off */
2311                 __update_reg_bounds(dst_reg);
2312                 break;
2313         case BPF_OR:
2314                 if (src_known && dst_known) {
2315                         __mark_reg_known(dst_reg, dst_reg->var_off.value |
2316                                                   src_reg.var_off.value);
2317                         break;
2318                 }
2319                 /* We get our maximum from the var_off, and our minimum is the
2320                  * maximum of the operands' minima
2321                  */
2322                 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
2323                 dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
2324                 dst_reg->umax_value = dst_reg->var_off.value |
2325                                       dst_reg->var_off.mask;
2326                 if (dst_reg->smin_value < 0 || smin_val < 0) {
2327                         /* Lose signed bounds when ORing negative numbers,
2328                          * ain't nobody got time for that.
2329                          */
2330                         dst_reg->smin_value = S64_MIN;
2331                         dst_reg->smax_value = S64_MAX;
2332                 } else {
2333                         /* ORing two positives gives a positive, so safe to
2334                          * cast result into s64.
2335                          */
2336                         dst_reg->smin_value = dst_reg->umin_value;
2337                         dst_reg->smax_value = dst_reg->umax_value;
2338                 }
2339                 /* We may learn something more from the var_off */
2340                 __update_reg_bounds(dst_reg);
2341                 break;
2342         case BPF_LSH:
2343                 if (umax_val >= insn_bitness) {
2344                         /* Shifts greater than 31 or 63 are undefined.
2345                          * This includes shifts by a negative number.
2346                          */
2347                         mark_reg_unknown(env, regs, insn->dst_reg);
2348                         break;
2349                 }
2350                 /* We lose all sign bit information (except what we can pick
2351                  * up from var_off)
2352                  */
2353                 dst_reg->smin_value = S64_MIN;
2354                 dst_reg->smax_value = S64_MAX;
2355                 /* If we might shift our top bit out, then we know nothing */
2356                 if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
2357                         dst_reg->umin_value = 0;
2358                         dst_reg->umax_value = U64_MAX;
2359                 } else {
2360                         dst_reg->umin_value <<= umin_val;
2361                         dst_reg->umax_value <<= umax_val;
2362                 }
2363                 if (src_known)
2364                         dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
2365                 else
2366                         dst_reg->var_off = tnum_lshift(tnum_unknown, umin_val);
2367                 /* We may learn something more from the var_off */
2368                 __update_reg_bounds(dst_reg);
2369                 break;
2370         case BPF_RSH:
2371                 if (umax_val >= insn_bitness) {
2372                         /* Shifts greater than 31 or 63 are undefined.
2373                          * This includes shifts by a negative number.
2374                          */
2375                         mark_reg_unknown(env, regs, insn->dst_reg);
2376                         break;
2377                 }
2378                 /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
2379                  * be negative, then either:
2380                  * 1) src_reg might be zero, so the sign bit of the result is
2381                  *    unknown, so we lose our signed bounds
2382                  * 2) it's known negative, thus the unsigned bounds capture the
2383                  *    signed bounds
2384                  * 3) the signed bounds cross zero, so they tell us nothing
2385                  *    about the result
2386                  * If the value in dst_reg is known nonnegative, then again the
2387                  * unsigned bounts capture the signed bounds.
2388                  * Thus, in all cases it suffices to blow away our signed bounds
2389                  * and rely on inferring new ones from the unsigned bounds and
2390                  * var_off of the result.
2391                  */
2392                 dst_reg->smin_value = S64_MIN;
2393                 dst_reg->smax_value = S64_MAX;
2394                 if (src_known)
2395                         dst_reg->var_off = tnum_rshift(dst_reg->var_off,
2396                                                        umin_val);
2397                 else
2398                         dst_reg->var_off = tnum_rshift(tnum_unknown, umin_val);
2399                 dst_reg->umin_value >>= umax_val;
2400                 dst_reg->umax_value >>= umin_val;
2401                 /* We may learn something more from the var_off */
2402                 __update_reg_bounds(dst_reg);
2403                 break;
2404         default:
2405                 mark_reg_unknown(env, regs, insn->dst_reg);
2406                 break;
2407         }
2408
2409         if (BPF_CLASS(insn->code) != BPF_ALU64) {
2410                 /* 32-bit ALU ops are (32,32)->32 */
2411                 coerce_reg_to_size(dst_reg, 4);
2412         }
2413
2414         __reg_deduce_bounds(dst_reg);
2415         __reg_bound_offset(dst_reg);
2416         return 0;
2417 }
2418
2419 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
2420  * and var_off.
2421  */
2422 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2423                                    struct bpf_insn *insn)
2424 {
2425         struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg;
2426         struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
2427         u8 opcode = BPF_OP(insn->code);
2428
2429         dst_reg = &regs[insn->dst_reg];
2430         src_reg = NULL;
2431         if (dst_reg->type != SCALAR_VALUE)
2432                 ptr_reg = dst_reg;
2433         if (BPF_SRC(insn->code) == BPF_X) {
2434                 src_reg = &regs[insn->src_reg];
2435                 if (src_reg->type != SCALAR_VALUE) {
2436                         if (dst_reg->type != SCALAR_VALUE) {
2437                                 /* Combining two pointers by any ALU op yields
2438                                  * an arbitrary scalar. Disallow all math except
2439                                  * pointer subtraction
2440                                  */
2441                                 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
2442                                         mark_reg_unknown(env, regs, insn->dst_reg);
2443                                         return 0;
2444                                 }
2445                                 verbose(env, "R%d pointer %s pointer prohibited\n",
2446                                         insn->dst_reg,
2447                                         bpf_alu_string[opcode >> 4]);
2448                                 return -EACCES;
2449                         } else {
2450                                 /* scalar += pointer
2451                                  * This is legal, but we have to reverse our
2452                                  * src/dest handling in computing the range
2453                                  */
2454                                 return adjust_ptr_min_max_vals(env, insn,
2455                                                                src_reg, dst_reg);
2456                         }
2457                 } else if (ptr_reg) {
2458                         /* pointer += scalar */
2459                         return adjust_ptr_min_max_vals(env, insn,
2460                                                        dst_reg, src_reg);
2461                 }
2462         } else {
2463                 /* Pretend the src is a reg with a known value, since we only
2464                  * need to be able to read from this state.
2465                  */
2466                 off_reg.type = SCALAR_VALUE;
2467                 __mark_reg_known(&off_reg, insn->imm);
2468                 src_reg = &off_reg;
2469                 if (ptr_reg) /* pointer += K */
2470                         return adjust_ptr_min_max_vals(env, insn,
2471                                                        ptr_reg, src_reg);
2472         }
2473
2474         /* Got here implies adding two SCALAR_VALUEs */
2475         if (WARN_ON_ONCE(ptr_reg)) {
2476                 print_verifier_state(env, env->cur_state);
2477                 verbose(env, "verifier internal error: unexpected ptr_reg\n");
2478                 return -EINVAL;
2479         }
2480         if (WARN_ON(!src_reg)) {
2481                 print_verifier_state(env, env->cur_state);
2482                 verbose(env, "verifier internal error: no src_reg\n");
2483                 return -EINVAL;
2484         }
2485         return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
2486 }
2487
2488 /* check validity of 32-bit and 64-bit arithmetic operations */
2489 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
2490 {
2491         struct bpf_reg_state *regs = cur_regs(env);
2492         u8 opcode = BPF_OP(insn->code);
2493         int err;
2494
2495         if (opcode == BPF_END || opcode == BPF_NEG) {
2496                 if (opcode == BPF_NEG) {
2497                         if (BPF_SRC(insn->code) != 0 ||
2498                             insn->src_reg != BPF_REG_0 ||
2499                             insn->off != 0 || insn->imm != 0) {
2500                                 verbose(env, "BPF_NEG uses reserved fields\n");
2501                                 return -EINVAL;
2502                         }
2503                 } else {
2504                         if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
2505                             (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
2506                             BPF_CLASS(insn->code) == BPF_ALU64) {
2507                                 verbose(env, "BPF_END uses reserved fields\n");
2508                                 return -EINVAL;
2509                         }
2510                 }
2511
2512                 /* check src operand */
2513                 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
2514                 if (err)
2515                         return err;
2516
2517                 if (is_pointer_value(env, insn->dst_reg)) {
2518                         verbose(env, "R%d pointer arithmetic prohibited\n",
2519                                 insn->dst_reg);
2520                         return -EACCES;
2521                 }
2522
2523                 /* check dest operand */
2524                 err = check_reg_arg(env, insn->dst_reg, DST_OP);
2525                 if (err)
2526                         return err;
2527
2528         } else if (opcode == BPF_MOV) {
2529
2530                 if (BPF_SRC(insn->code) == BPF_X) {
2531                         if (insn->imm != 0 || insn->off != 0) {
2532                                 verbose(env, "BPF_MOV uses reserved fields\n");
2533                                 return -EINVAL;
2534                         }
2535
2536                         /* check src operand */
2537                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
2538                         if (err)
2539                                 return err;
2540                 } else {
2541                         if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
2542                                 verbose(env, "BPF_MOV uses reserved fields\n");
2543                                 return -EINVAL;
2544                         }
2545                 }
2546
2547                 /* check dest operand */
2548                 err = check_reg_arg(env, insn->dst_reg, DST_OP);
2549                 if (err)
2550                         return err;
2551
2552                 if (BPF_SRC(insn->code) == BPF_X) {
2553                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
2554                                 /* case: R1 = R2
2555                                  * copy register state to dest reg
2556                                  */
2557                                 regs[insn->dst_reg] = regs[insn->src_reg];
2558                                 regs[insn->dst_reg].live |= REG_LIVE_WRITTEN;
2559                         } else {
2560                                 /* R1 = (u32) R2 */
2561                                 if (is_pointer_value(env, insn->src_reg)) {
2562                                         verbose(env,
2563                                                 "R%d partial copy of pointer\n",
2564                                                 insn->src_reg);
2565                                         return -EACCES;
2566                                 }
2567                                 mark_reg_unknown(env, regs, insn->dst_reg);
2568                                 coerce_reg_to_size(&regs[insn->dst_reg], 4);
2569                         }
2570                 } else {
2571                         /* case: R = imm
2572                          * remember the value we stored into this reg
2573                          */
2574                         regs[insn->dst_reg].type = SCALAR_VALUE;
2575                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
2576                                 __mark_reg_known(regs + insn->dst_reg,
2577                                                  insn->imm);
2578                         } else {
2579                                 __mark_reg_known(regs + insn->dst_reg,
2580                                                  (u32)insn->imm);
2581                         }
2582                 }
2583
2584         } else if (opcode > BPF_END) {
2585                 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
2586                 return -EINVAL;
2587
2588         } else {        /* all other ALU ops: and, sub, xor, add, ... */
2589
2590                 if (BPF_SRC(insn->code) == BPF_X) {
2591                         if (insn->imm != 0 || insn->off != 0) {
2592                                 verbose(env, "BPF_ALU uses reserved fields\n");
2593                                 return -EINVAL;
2594                         }
2595                         /* check src1 operand */
2596                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
2597                         if (err)
2598                                 return err;
2599                 } else {
2600                         if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
2601                                 verbose(env, "BPF_ALU uses reserved fields\n");
2602                                 return -EINVAL;
2603                         }
2604                 }
2605
2606                 /* check src2 operand */
2607                 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
2608                 if (err)
2609                         return err;
2610
2611                 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
2612                     BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
2613                         verbose(env, "div by zero\n");
2614                         return -EINVAL;
2615                 }
2616
2617                 if (opcode == BPF_ARSH && BPF_CLASS(insn->code) != BPF_ALU64) {
2618                         verbose(env, "BPF_ARSH not supported for 32 bit ALU\n");
2619                         return -EINVAL;
2620                 }
2621
2622                 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
2623                      opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
2624                         int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
2625
2626                         if (insn->imm < 0 || insn->imm >= size) {
2627                                 verbose(env, "invalid shift %d\n", insn->imm);
2628                                 return -EINVAL;
2629                         }
2630                 }
2631
2632                 /* check dest operand */
2633                 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
2634                 if (err)
2635                         return err;
2636
2637                 return adjust_reg_min_max_vals(env, insn);
2638         }
2639
2640         return 0;
2641 }
2642
2643 static void find_good_pkt_pointers(struct bpf_verifier_state *state,
2644                                    struct bpf_reg_state *dst_reg,
2645                                    enum bpf_reg_type type,
2646                                    bool range_right_open)
2647 {
2648         struct bpf_reg_state *regs = state->regs, *reg;
2649         u16 new_range;
2650         int i;
2651
2652         if (dst_reg->off < 0 ||
2653             (dst_reg->off == 0 && range_right_open))
2654                 /* This doesn't give us any range */
2655                 return;
2656
2657         if (dst_reg->umax_value > MAX_PACKET_OFF ||
2658             dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
2659                 /* Risk of overflow.  For instance, ptr + (1<<63) may be less
2660                  * than pkt_end, but that's because it's also less than pkt.
2661                  */
2662                 return;
2663
2664         new_range = dst_reg->off;
2665         if (range_right_open)
2666                 new_range--;
2667
2668         /* Examples for register markings:
2669          *
2670          * pkt_data in dst register:
2671          *
2672          *   r2 = r3;
2673          *   r2 += 8;
2674          *   if (r2 > pkt_end) goto <handle exception>
2675          *   <access okay>
2676          *
2677          *   r2 = r3;
2678          *   r2 += 8;
2679          *   if (r2 < pkt_end) goto <access okay>
2680          *   <handle exception>
2681          *
2682          *   Where:
2683          *     r2 == dst_reg, pkt_end == src_reg
2684          *     r2=pkt(id=n,off=8,r=0)
2685          *     r3=pkt(id=n,off=0,r=0)
2686          *
2687          * pkt_data in src register:
2688          *
2689          *   r2 = r3;
2690          *   r2 += 8;
2691          *   if (pkt_end >= r2) goto <access okay>
2692          *   <handle exception>
2693          *
2694          *   r2 = r3;
2695          *   r2 += 8;
2696          *   if (pkt_end <= r2) goto <handle exception>
2697          *   <access okay>
2698          *
2699          *   Where:
2700          *     pkt_end == dst_reg, r2 == src_reg
2701          *     r2=pkt(id=n,off=8,r=0)
2702          *     r3=pkt(id=n,off=0,r=0)
2703          *
2704          * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
2705          * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
2706          * and [r3, r3 + 8-1) respectively is safe to access depending on
2707          * the check.
2708          */
2709
2710         /* If our ids match, then we must have the same max_value.  And we
2711          * don't care about the other reg's fixed offset, since if it's too big
2712          * the range won't allow anything.
2713          * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
2714          */
2715         for (i = 0; i < MAX_BPF_REG; i++)
2716                 if (regs[i].type == type && regs[i].id == dst_reg->id)
2717                         /* keep the maximum range already checked */
2718                         regs[i].range = max(regs[i].range, new_range);
2719
2720         for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
2721                 if (state->stack[i].slot_type[0] != STACK_SPILL)
2722                         continue;
2723                 reg = &state->stack[i].spilled_ptr;
2724                 if (reg->type == type && reg->id == dst_reg->id)
2725                         reg->range = max(reg->range, new_range);
2726         }
2727 }
2728
2729 /* Adjusts the register min/max values in the case that the dst_reg is the
2730  * variable register that we are working on, and src_reg is a constant or we're
2731  * simply doing a BPF_K check.
2732  * In JEQ/JNE cases we also adjust the var_off values.
2733  */
2734 static void reg_set_min_max(struct bpf_reg_state *true_reg,
2735                             struct bpf_reg_state *false_reg, u64 val,
2736                             u8 opcode)
2737 {
2738         /* If the dst_reg is a pointer, we can't learn anything about its
2739          * variable offset from the compare (unless src_reg were a pointer into
2740          * the same object, but we don't bother with that.
2741          * Since false_reg and true_reg have the same type by construction, we
2742          * only need to check one of them for pointerness.
2743          */
2744         if (__is_pointer_value(false, false_reg))
2745                 return;
2746
2747         switch (opcode) {
2748         case BPF_JEQ:
2749                 /* If this is false then we know nothing Jon Snow, but if it is
2750                  * true then we know for sure.
2751                  */
2752                 __mark_reg_known(true_reg, val);
2753                 break;
2754         case BPF_JNE:
2755                 /* If this is true we know nothing Jon Snow, but if it is false
2756                  * we know the value for sure;
2757                  */
2758                 __mark_reg_known(false_reg, val);
2759                 break;
2760         case BPF_JGT:
2761                 false_reg->umax_value = min(false_reg->umax_value, val);
2762                 true_reg->umin_value = max(true_reg->umin_value, val + 1);
2763                 break;
2764         case BPF_JSGT:
2765                 false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
2766                 true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
2767                 break;
2768         case BPF_JLT:
2769                 false_reg->umin_value = max(false_reg->umin_value, val);
2770                 true_reg->umax_value = min(true_reg->umax_value, val - 1);
2771                 break;
2772         case BPF_JSLT:
2773                 false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
2774                 true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
2775                 break;
2776         case BPF_JGE:
2777                 false_reg->umax_value = min(false_reg->umax_value, val - 1);
2778                 true_reg->umin_value = max(true_reg->umin_value, val);
2779                 break;
2780         case BPF_JSGE:
2781                 false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
2782                 true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
2783                 break;
2784         case BPF_JLE:
2785                 false_reg->umin_value = max(false_reg->umin_value, val + 1);
2786                 true_reg->umax_value = min(true_reg->umax_value, val);
2787                 break;
2788         case BPF_JSLE:
2789                 false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
2790                 true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
2791                 break;
2792         default:
2793                 break;
2794         }
2795
2796         __reg_deduce_bounds(false_reg);
2797         __reg_deduce_bounds(true_reg);
2798         /* We might have learned some bits from the bounds. */
2799         __reg_bound_offset(false_reg);
2800         __reg_bound_offset(true_reg);
2801         /* Intersecting with the old var_off might have improved our bounds
2802          * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
2803          * then new var_off is (0; 0x7f...fc) which improves our umax.
2804          */
2805         __update_reg_bounds(false_reg);
2806         __update_reg_bounds(true_reg);
2807 }
2808
2809 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
2810  * the variable reg.
2811  */
2812 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
2813                                 struct bpf_reg_state *false_reg, u64 val,
2814                                 u8 opcode)
2815 {
2816         if (__is_pointer_value(false, false_reg))
2817                 return;
2818
2819         switch (opcode) {
2820         case BPF_JEQ:
2821                 /* If this is false then we know nothing Jon Snow, but if it is
2822                  * true then we know for sure.
2823                  */
2824                 __mark_reg_known(true_reg, val);
2825                 break;
2826         case BPF_JNE:
2827                 /* If this is true we know nothing Jon Snow, but if it is false
2828                  * we know the value for sure;
2829                  */
2830                 __mark_reg_known(false_reg, val);
2831                 break;
2832         case BPF_JGT:
2833                 true_reg->umax_value = min(true_reg->umax_value, val - 1);
2834                 false_reg->umin_value = max(false_reg->umin_value, val);
2835                 break;
2836         case BPF_JSGT:
2837                 true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
2838                 false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
2839                 break;
2840         case BPF_JLT:
2841                 true_reg->umin_value = max(true_reg->umin_value, val + 1);
2842                 false_reg->umax_value = min(false_reg->umax_value, val);
2843                 break;
2844         case BPF_JSLT:
2845                 true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
2846                 false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
2847                 break;
2848         case BPF_JGE:
2849                 true_reg->umax_value = min(true_reg->umax_value, val);
2850                 false_reg->umin_value = max(false_reg->umin_value, val + 1);
2851                 break;
2852         case BPF_JSGE:
2853                 true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
2854                 false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
2855                 break;
2856         case BPF_JLE:
2857                 true_reg->umin_value = max(true_reg->umin_value, val);
2858                 false_reg->umax_value = min(false_reg->umax_value, val - 1);
2859                 break;
2860         case BPF_JSLE:
2861                 true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
2862                 false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
2863                 break;
2864         default:
2865                 break;
2866         }
2867
2868         __reg_deduce_bounds(false_reg);
2869         __reg_deduce_bounds(true_reg);
2870         /* We might have learned some bits from the bounds. */
2871         __reg_bound_offset(false_reg);
2872         __reg_bound_offset(true_reg);
2873         /* Intersecting with the old var_off might have improved our bounds
2874          * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
2875          * then new var_off is (0; 0x7f...fc) which improves our umax.
2876          */
2877         __update_reg_bounds(false_reg);
2878         __update_reg_bounds(true_reg);
2879 }
2880
2881 /* Regs are known to be equal, so intersect their min/max/var_off */
2882 static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
2883                                   struct bpf_reg_state *dst_reg)
2884 {
2885         src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
2886                                                         dst_reg->umin_value);
2887         src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
2888                                                         dst_reg->umax_value);
2889         src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
2890                                                         dst_reg->smin_value);
2891         src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
2892                                                         dst_reg->smax_value);
2893         src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
2894                                                              dst_reg->var_off);
2895         /* We might have learned new bounds from the var_off. */
2896         __update_reg_bounds(src_reg);
2897         __update_reg_bounds(dst_reg);
2898         /* We might have learned something about the sign bit. */
2899         __reg_deduce_bounds(src_reg);
2900         __reg_deduce_bounds(dst_reg);
2901         /* We might have learned some bits from the bounds. */
2902         __reg_bound_offset(src_reg);
2903         __reg_bound_offset(dst_reg);
2904         /* Intersecting with the old var_off might have improved our bounds
2905          * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
2906          * then new var_off is (0; 0x7f...fc) which improves our umax.
2907          */
2908         __update_reg_bounds(src_reg);
2909         __update_reg_bounds(dst_reg);
2910 }
2911
2912 static void reg_combine_min_max(struct bpf_reg_state *true_src,
2913                                 struct bpf_reg_state *true_dst,
2914                                 struct bpf_reg_state *false_src,
2915                                 struct bpf_reg_state *false_dst,
2916                                 u8 opcode)
2917 {
2918         switch (opcode) {
2919         case BPF_JEQ:
2920                 __reg_combine_min_max(true_src, true_dst);
2921                 break;
2922         case BPF_JNE:
2923                 __reg_combine_min_max(false_src, false_dst);
2924                 break;
2925         }
2926 }
2927
2928 static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
2929                          bool is_null)
2930 {
2931         struct bpf_reg_state *reg = &regs[regno];
2932
2933         if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) {
2934                 /* Old offset (both fixed and variable parts) should
2935                  * have been known-zero, because we don't allow pointer
2936                  * arithmetic on pointers that might be NULL.
2937                  */
2938                 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
2939                                  !tnum_equals_const(reg->var_off, 0) ||
2940                                  reg->off)) {
2941                         __mark_reg_known_zero(reg);
2942                         reg->off = 0;
2943                 }
2944                 if (is_null) {
2945                         reg->type = SCALAR_VALUE;
2946                 } else if (reg->map_ptr->inner_map_meta) {
2947                         reg->type = CONST_PTR_TO_MAP;
2948                         reg->map_ptr = reg->map_ptr->inner_map_meta;
2949                 } else {
2950                         reg->type = PTR_TO_MAP_VALUE;
2951                 }
2952                 /* We don't need id from this point onwards anymore, thus we
2953                  * should better reset it, so that state pruning has chances
2954                  * to take effect.
2955                  */
2956                 reg->id = 0;
2957         }
2958 }
2959
2960 /* The logic is similar to find_good_pkt_pointers(), both could eventually
2961  * be folded together at some point.
2962  */
2963 static void mark_map_regs(struct bpf_verifier_state *state, u32 regno,
2964                           bool is_null)
2965 {
2966         struct bpf_reg_state *regs = state->regs;
2967         u32 id = regs[regno].id;
2968         int i;
2969
2970         for (i = 0; i < MAX_BPF_REG; i++)
2971                 mark_map_reg(regs, i, id, is_null);
2972
2973         for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
2974                 if (state->stack[i].slot_type[0] != STACK_SPILL)
2975                         continue;
2976                 mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null);
2977         }
2978 }
2979
2980 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
2981                                    struct bpf_reg_state *dst_reg,
2982                                    struct bpf_reg_state *src_reg,
2983                                    struct bpf_verifier_state *this_branch,
2984                                    struct bpf_verifier_state *other_branch)
2985 {
2986         if (BPF_SRC(insn->code) != BPF_X)
2987                 return false;
2988
2989         switch (BPF_OP(insn->code)) {
2990         case BPF_JGT:
2991                 if ((dst_reg->type == PTR_TO_PACKET &&
2992                      src_reg->type == PTR_TO_PACKET_END) ||
2993                     (dst_reg->type == PTR_TO_PACKET_META &&
2994                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
2995                         /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
2996                         find_good_pkt_pointers(this_branch, dst_reg,
2997                                                dst_reg->type, false);
2998                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
2999                             src_reg->type == PTR_TO_PACKET) ||
3000                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
3001                             src_reg->type == PTR_TO_PACKET_META)) {
3002                         /* pkt_end > pkt_data', pkt_data > pkt_meta' */
3003                         find_good_pkt_pointers(other_branch, src_reg,
3004                                                src_reg->type, true);
3005                 } else {
3006                         return false;
3007                 }
3008                 break;
3009         case BPF_JLT:
3010                 if ((dst_reg->type == PTR_TO_PACKET &&
3011                      src_reg->type == PTR_TO_PACKET_END) ||
3012                     (dst_reg->type == PTR_TO_PACKET_META &&
3013                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
3014                         /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
3015                         find_good_pkt_pointers(other_branch, dst_reg,
3016                                                dst_reg->type, true);
3017                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
3018                             src_reg->type == PTR_TO_PACKET) ||
3019                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
3020                             src_reg->type == PTR_TO_PACKET_META)) {
3021                         /* pkt_end < pkt_data', pkt_data > pkt_meta' */
3022                         find_good_pkt_pointers(this_branch, src_reg,
3023                                                src_reg->type, false);
3024                 } else {
3025                         return false;
3026                 }
3027                 break;
3028         case BPF_JGE:
3029                 if ((dst_reg->type == PTR_TO_PACKET &&
3030                      src_reg->type == PTR_TO_PACKET_END) ||
3031                     (dst_reg->type == PTR_TO_PACKET_META &&
3032                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
3033                         /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
3034                         find_good_pkt_pointers(this_branch, dst_reg,
3035                                                dst_reg->type, true);
3036                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
3037                             src_reg->type == PTR_TO_PACKET) ||
3038                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
3039                             src_reg->type == PTR_TO_PACKET_META)) {
3040                         /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
3041                         find_good_pkt_pointers(other_branch, src_reg,
3042                                                src_reg->type, false);
3043                 } else {
3044                         return false;
3045                 }
3046                 break;
3047         case BPF_JLE:
3048                 if ((dst_reg->type == PTR_TO_PACKET &&
3049                      src_reg->type == PTR_TO_PACKET_END) ||
3050                     (dst_reg->type == PTR_TO_PACKET_META &&
3051                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
3052                         /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
3053                         find_good_pkt_pointers(other_branch, dst_reg,
3054                                                dst_reg->type, false);
3055                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
3056                             src_reg->type == PTR_TO_PACKET) ||
3057                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
3058                             src_reg->type == PTR_TO_PACKET_META)) {
3059                         /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
3060                         find_good_pkt_pointers(this_branch, src_reg,
3061                                                src_reg->type, true);
3062                 } else {
3063                         return false;
3064                 }
3065                 break;
3066         default:
3067                 return false;
3068         }
3069
3070         return true;
3071 }
3072
3073 static int check_cond_jmp_op(struct bpf_verifier_env *env,
3074                              struct bpf_insn *insn, int *insn_idx)
3075 {
3076         struct bpf_verifier_state *other_branch, *this_branch = env->cur_state;
3077         struct bpf_reg_state *regs = this_branch->regs, *dst_reg;
3078         u8 opcode = BPF_OP(insn->code);
3079         int err;
3080
3081         if (opcode > BPF_JSLE) {
3082                 verbose(env, "invalid BPF_JMP opcode %x\n", opcode);
3083                 return -EINVAL;
3084         }
3085
3086         if (BPF_SRC(insn->code) == BPF_X) {
3087                 if (insn->imm != 0) {
3088                         verbose(env, "BPF_JMP uses reserved fields\n");
3089                         return -EINVAL;
3090                 }
3091
3092                 /* check src1 operand */
3093                 err = check_reg_arg(env, insn->src_reg, SRC_OP);
3094                 if (err)
3095                         return err;
3096
3097                 if (is_pointer_value(env, insn->src_reg)) {
3098                         verbose(env, "R%d pointer comparison prohibited\n",
3099                                 insn->src_reg);
3100                         return -EACCES;
3101                 }
3102         } else {
3103                 if (insn->src_reg != BPF_REG_0) {
3104                         verbose(env, "BPF_JMP uses reserved fields\n");
3105                         return -EINVAL;
3106                 }
3107         }
3108
3109         /* check src2 operand */
3110         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
3111         if (err)
3112                 return err;
3113
3114         dst_reg = &regs[insn->dst_reg];
3115
3116         /* detect if R == 0 where R was initialized to zero earlier */
3117         if (BPF_SRC(insn->code) == BPF_K &&
3118             (opcode == BPF_JEQ || opcode == BPF_JNE) &&
3119             dst_reg->type == SCALAR_VALUE &&
3120             tnum_equals_const(dst_reg->var_off, insn->imm)) {
3121                 if (opcode == BPF_JEQ) {
3122                         /* if (imm == imm) goto pc+off;
3123                          * only follow the goto, ignore fall-through
3124                          */
3125                         *insn_idx += insn->off;
3126                         return 0;
3127                 } else {
3128                         /* if (imm != imm) goto pc+off;
3129                          * only follow fall-through branch, since
3130                          * that's where the program will go
3131                          */
3132                         return 0;
3133                 }
3134         }
3135
3136         other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
3137         if (!other_branch)
3138                 return -EFAULT;
3139
3140         /* detect if we are comparing against a constant value so we can adjust
3141          * our min/max values for our dst register.
3142          * this is only legit if both are scalars (or pointers to the same
3143          * object, I suppose, but we don't support that right now), because
3144          * otherwise the different base pointers mean the offsets aren't
3145          * comparable.
3146          */
3147         if (BPF_SRC(insn->code) == BPF_X) {
3148                 if (dst_reg->type == SCALAR_VALUE &&
3149                     regs[insn->src_reg].type == SCALAR_VALUE) {
3150                         if (tnum_is_const(regs[insn->src_reg].var_off))
3151                                 reg_set_min_max(&other_branch->regs[insn->dst_reg],
3152                                                 dst_reg, regs[insn->src_reg].var_off.value,
3153                                                 opcode);
3154                         else if (tnum_is_const(dst_reg->var_off))
3155                                 reg_set_min_max_inv(&other_branch->regs[insn->src_reg],
3156                                                     &regs[insn->src_reg],
3157                                                     dst_reg->var_off.value, opcode);
3158                         else if (opcode == BPF_JEQ || opcode == BPF_JNE)
3159                                 /* Comparing for equality, we can combine knowledge */
3160                                 reg_combine_min_max(&other_branch->regs[insn->src_reg],
3161                                                     &other_branch->regs[insn->dst_reg],
3162                                                     &regs[insn->src_reg],
3163                                                     &regs[insn->dst_reg], opcode);
3164                 }
3165         } else if (dst_reg->type == SCALAR_VALUE) {
3166                 reg_set_min_max(&other_branch->regs[insn->dst_reg],
3167                                         dst_reg, insn->imm, opcode);
3168         }
3169
3170         /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
3171         if (BPF_SRC(insn->code) == BPF_K &&
3172             insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
3173             dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
3174                 /* Mark all identical map registers in each branch as either
3175                  * safe or unknown depending R == 0 or R != 0 conditional.
3176                  */
3177                 mark_map_regs(this_branch, insn->dst_reg, opcode == BPF_JNE);
3178                 mark_map_regs(other_branch, insn->dst_reg, opcode == BPF_JEQ);
3179         } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
3180                                            this_branch, other_branch) &&
3181                    is_pointer_value(env, insn->dst_reg)) {
3182                 verbose(env, "R%d pointer comparison prohibited\n",
3183                         insn->dst_reg);
3184                 return -EACCES;
3185         }
3186         if (env->log.level)
3187                 print_verifier_state(env, this_branch);
3188         return 0;
3189 }
3190
3191 /* return the map pointer stored inside BPF_LD_IMM64 instruction */
3192 static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
3193 {
3194         u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
3195
3196         return (struct bpf_map *) (unsigned long) imm64;
3197 }
3198
3199 /* verify BPF_LD_IMM64 instruction */
3200 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
3201 {
3202         struct bpf_reg_state *regs = cur_regs(env);
3203         int err;
3204
3205         if (BPF_SIZE(insn->code) != BPF_DW) {
3206                 verbose(env, "invalid BPF_LD_IMM insn\n");
3207                 return -EINVAL;
3208         }
3209         if (insn->off != 0) {
3210                 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
3211                 return -EINVAL;
3212         }
3213
3214         err = check_reg_arg(env, insn->dst_reg, DST_OP);
3215         if (err)
3216                 return err;
3217
3218         if (insn->src_reg == 0) {
3219                 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
3220
3221                 regs[insn->dst_reg].type = SCALAR_VALUE;
3222                 __mark_reg_known(&regs[insn->dst_reg], imm);
3223                 return 0;
3224         }
3225
3226         /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
3227         BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
3228
3229         regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
3230         regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
3231         return 0;
3232 }
3233
3234 static bool may_access_skb(enum bpf_prog_type type)
3235 {
3236         switch (type) {
3237         case BPF_PROG_TYPE_SOCKET_FILTER:
3238         case BPF_PROG_TYPE_SCHED_CLS:
3239         case BPF_PROG_TYPE_SCHED_ACT:
3240                 return true;
3241         default:
3242                 return false;
3243         }
3244 }
3245
3246 /* verify safety of LD_ABS|LD_IND instructions:
3247  * - they can only appear in the programs where ctx == skb
3248  * - since they are wrappers of function calls, they scratch R1-R5 registers,
3249  *   preserve R6-R9, and store return value into R0
3250  *
3251  * Implicit input:
3252  *   ctx == skb == R6 == CTX
3253  *
3254  * Explicit input:
3255  *   SRC == any register
3256  *   IMM == 32-bit immediate
3257  *
3258  * Output:
3259  *   R0 - 8/16/32-bit skb data converted to cpu endianness
3260  */
3261 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
3262 {
3263         struct bpf_reg_state *regs = cur_regs(env);
3264         u8 mode = BPF_MODE(insn->code);
3265         int i, err;
3266
3267         if (!may_access_skb(env->prog->type)) {
3268                 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
3269                 return -EINVAL;
3270         }
3271
3272         if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
3273             BPF_SIZE(insn->code) == BPF_DW ||
3274             (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
3275                 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
3276                 return -EINVAL;
3277         }
3278
3279         /* check whether implicit source operand (register R6) is readable */
3280         err = check_reg_arg(env, BPF_REG_6, SRC_OP);
3281         if (err)
3282                 return err;
3283
3284         if (regs[BPF_REG_6].type != PTR_TO_CTX) {
3285                 verbose(env,
3286                         "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
3287                 return -EINVAL;
3288         }
3289
3290         if (mode == BPF_IND) {
3291                 /* check explicit source operand */
3292                 err = check_reg_arg(env, insn->src_reg, SRC_OP);
3293                 if (err)
3294                         return err;
3295         }
3296
3297         /* reset caller saved regs to unreadable */
3298         for (i = 0; i < CALLER_SAVED_REGS; i++) {
3299                 mark_reg_not_init(env, regs, caller_saved[i]);
3300                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
3301         }
3302
3303         /* mark destination R0 register as readable, since it contains
3304          * the value fetched from the packet.
3305          * Already marked as written above.
3306          */
3307         mark_reg_unknown(env, regs, BPF_REG_0);
3308         return 0;
3309 }
3310
3311 static int check_return_code(struct bpf_verifier_env *env)
3312 {
3313         struct bpf_reg_state *reg;
3314         struct tnum range = tnum_range(0, 1);
3315
3316         switch (env->prog->type) {
3317         case BPF_PROG_TYPE_CGROUP_SKB:
3318         case BPF_PROG_TYPE_CGROUP_SOCK:
3319         case BPF_PROG_TYPE_SOCK_OPS:
3320         case BPF_PROG_TYPE_CGROUP_DEVICE:
3321                 break;
3322         default:
3323                 return 0;
3324         }
3325
3326         reg = cur_regs(env) + BPF_REG_0;
3327         if (reg->type != SCALAR_VALUE) {
3328                 verbose(env, "At program exit the register R0 is not a known value (%s)\n",
3329                         reg_type_str[reg->type]);
3330                 return -EINVAL;
3331         }
3332
3333         if (!tnum_in(range, reg->var_off)) {
3334                 verbose(env, "At program exit the register R0 ");
3335                 if (!tnum_is_unknown(reg->var_off)) {
3336                         char tn_buf[48];
3337
3338                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3339                         verbose(env, "has value %s", tn_buf);
3340                 } else {
3341                         verbose(env, "has unknown scalar value");
3342                 }
3343                 verbose(env, " should have been 0 or 1\n");
3344                 return -EINVAL;
3345         }
3346         return 0;
3347 }
3348
3349 /* non-recursive DFS pseudo code
3350  * 1  procedure DFS-iterative(G,v):
3351  * 2      label v as discovered
3352  * 3      let S be a stack
3353  * 4      S.push(v)
3354  * 5      while S is not empty
3355  * 6            t <- S.pop()
3356  * 7            if t is what we're looking for:
3357  * 8                return t
3358  * 9            for all edges e in G.adjacentEdges(t) do
3359  * 10               if edge e is already labelled
3360  * 11                   continue with the next edge
3361  * 12               w <- G.adjacentVertex(t,e)
3362  * 13               if vertex w is not discovered and not explored
3363  * 14                   label e as tree-edge
3364  * 15                   label w as discovered
3365  * 16                   S.push(w)
3366  * 17                   continue at 5
3367  * 18               else if vertex w is discovered
3368  * 19                   label e as back-edge
3369  * 20               else
3370  * 21                   // vertex w is explored
3371  * 22                   label e as forward- or cross-edge
3372  * 23           label t as explored
3373  * 24           S.pop()
3374  *
3375  * convention:
3376  * 0x10 - discovered
3377  * 0x11 - discovered and fall-through edge labelled
3378  * 0x12 - discovered and fall-through and branch edges labelled
3379  * 0x20 - explored
3380  */
3381
3382 enum {
3383         DISCOVERED = 0x10,
3384         EXPLORED = 0x20,
3385         FALLTHROUGH = 1,
3386         BRANCH = 2,
3387 };
3388
3389 #define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
3390
3391 static int *insn_stack; /* stack of insns to process */
3392 static int cur_stack;   /* current stack index */
3393 static int *insn_state;
3394
3395 /* t, w, e - match pseudo-code above:
3396  * t - index of current instruction
3397  * w - next instruction
3398  * e - edge
3399  */
3400 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
3401 {
3402         if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
3403                 return 0;
3404
3405         if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
3406                 return 0;
3407
3408         if (w < 0 || w >= env->prog->len) {
3409                 verbose(env, "jump out of range from insn %d to %d\n", t, w);
3410                 return -EINVAL;
3411         }
3412
3413         if (e == BRANCH)
3414                 /* mark branch target for state pruning */
3415                 env->explored_states[w] = STATE_LIST_MARK;
3416
3417         if (insn_state[w] == 0) {
3418                 /* tree-edge */
3419                 insn_state[t] = DISCOVERED | e;
3420                 insn_state[w] = DISCOVERED;
3421                 if (cur_stack >= env->prog->len)
3422                         return -E2BIG;
3423                 insn_stack[cur_stack++] = w;
3424                 return 1;
3425         } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
3426                 verbose(env, "back-edge from insn %d to %d\n", t, w);
3427                 return -EINVAL;
3428         } else if (insn_state[w] == EXPLORED) {
3429                 /* forward- or cross-edge */
3430                 insn_state[t] = DISCOVERED | e;
3431         } else {
3432                 verbose(env, "insn state internal bug\n");
3433                 return -EFAULT;
3434         }
3435         return 0;
3436 }
3437
3438 /* non-recursive depth-first-search to detect loops in BPF program
3439  * loop == back-edge in directed graph
3440  */
3441 static int check_cfg(struct bpf_verifier_env *env)
3442 {
3443         struct bpf_insn *insns = env->prog->insnsi;
3444         int insn_cnt = env->prog->len;
3445         int ret = 0;
3446         int i, t;
3447
3448         insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
3449         if (!insn_state)
3450                 return -ENOMEM;
3451
3452         insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
3453         if (!insn_stack) {
3454                 kfree(insn_state);
3455                 return -ENOMEM;
3456         }
3457
3458         insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
3459         insn_stack[0] = 0; /* 0 is the first instruction */
3460         cur_stack = 1;
3461
3462 peek_stack:
3463         if (cur_stack == 0)
3464                 goto check_state;
3465         t = insn_stack[cur_stack - 1];
3466
3467         if (BPF_CLASS(insns[t].code) == BPF_JMP) {
3468                 u8 opcode = BPF_OP(insns[t].code);
3469
3470                 if (opcode == BPF_EXIT) {
3471                         goto mark_explored;
3472                 } else if (opcode == BPF_CALL) {
3473                         ret = push_insn(t, t + 1, FALLTHROUGH, env);
3474                         if (ret == 1)
3475                                 goto peek_stack;
3476                         else if (ret < 0)
3477                                 goto err_free;
3478                         if (t + 1 < insn_cnt)
3479                                 env->explored_states[t + 1] = STATE_LIST_MARK;
3480                 } else if (opcode == BPF_JA) {
3481                         if (BPF_SRC(insns[t].code) != BPF_K) {
3482                                 ret = -EINVAL;
3483                                 goto err_free;
3484                         }
3485                         /* unconditional jump with single edge */
3486                         ret = push_insn(t, t + insns[t].off + 1,
3487                                         FALLTHROUGH, env);
3488                         if (ret == 1)
3489                                 goto peek_stack;
3490                         else if (ret < 0)
3491                                 goto err_free;
3492                         /* tell verifier to check for equivalent states
3493                          * after every call and jump
3494                          */
3495                         if (t + 1 < insn_cnt)
3496                                 env->explored_states[t + 1] = STATE_LIST_MARK;
3497                 } else {
3498                         /* conditional jump with two edges */
3499                         env->explored_states[t] = STATE_LIST_MARK;
3500                         ret = push_insn(t, t + 1, FALLTHROUGH, env);
3501                         if (ret == 1)
3502                                 goto peek_stack;
3503                         else if (ret < 0)
3504                                 goto err_free;
3505
3506                         ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
3507                         if (ret == 1)
3508                                 goto peek_stack;
3509                         else if (ret < 0)
3510                                 goto err_free;
3511                 }
3512         } else {
3513                 /* all other non-branch instructions with single
3514                  * fall-through edge
3515                  */
3516                 ret = push_insn(t, t + 1, FALLTHROUGH, env);
3517                 if (ret == 1)
3518                         goto peek_stack;
3519                 else if (ret < 0)
3520                         goto err_free;
3521         }
3522
3523 mark_explored:
3524         insn_state[t] = EXPLORED;
3525         if (cur_stack-- <= 0) {
3526                 verbose(env, "pop stack internal bug\n");
3527                 ret = -EFAULT;
3528                 goto err_free;
3529         }
3530         goto peek_stack;
3531
3532 check_state:
3533         for (i = 0; i < insn_cnt; i++) {
3534                 if (insn_state[i] != EXPLORED) {
3535                         verbose(env, "unreachable insn %d\n", i);
3536                         ret = -EINVAL;
3537                         goto err_free;
3538                 }
3539         }
3540         ret = 0; /* cfg looks good */
3541
3542 err_free:
3543         kfree(insn_state);
3544         kfree(insn_stack);
3545         return ret;
3546 }
3547
3548 /* check %cur's range satisfies %old's */
3549 static bool range_within(struct bpf_reg_state *old,
3550                          struct bpf_reg_state *cur)
3551 {
3552         return old->umin_value <= cur->umin_value &&
3553                old->umax_value >= cur->umax_value &&
3554                old->smin_value <= cur->smin_value &&
3555                old->smax_value >= cur->smax_value;
3556 }
3557
3558 /* Maximum number of register states that can exist at once */
3559 #define ID_MAP_SIZE     (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
3560 struct idpair {
3561         u32 old;
3562         u32 cur;
3563 };
3564
3565 /* If in the old state two registers had the same id, then they need to have
3566  * the same id in the new state as well.  But that id could be different from
3567  * the old state, so we need to track the mapping from old to new ids.
3568  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
3569  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
3570  * regs with a different old id could still have new id 9, we don't care about
3571  * that.
3572  * So we look through our idmap to see if this old id has been seen before.  If
3573  * so, we require the new id to match; otherwise, we add the id pair to the map.
3574  */
3575 static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
3576 {
3577         unsigned int i;
3578
3579         for (i = 0; i < ID_MAP_SIZE; i++) {
3580                 if (!idmap[i].old) {
3581                         /* Reached an empty slot; haven't seen this id before */
3582                         idmap[i].old = old_id;
3583                         idmap[i].cur = cur_id;
3584                         return true;
3585                 }
3586                 if (idmap[i].old == old_id)
3587                         return idmap[i].cur == cur_id;
3588         }
3589         /* We ran out of idmap slots, which should be impossible */
3590         WARN_ON_ONCE(1);
3591         return false;
3592 }
3593
3594 /* Returns true if (rold safe implies rcur safe) */
3595 static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
3596                     struct idpair *idmap)
3597 {
3598         if (!(rold->live & REG_LIVE_READ))
3599                 /* explored state didn't use this */
3600                 return true;
3601
3602         if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, live)) == 0)
3603                 return true;
3604
3605         if (rold->type == NOT_INIT)
3606                 /* explored state can't have used this */
3607                 return true;
3608         if (rcur->type == NOT_INIT)
3609                 return false;
3610         switch (rold->type) {
3611         case SCALAR_VALUE:
3612                 if (rcur->type == SCALAR_VALUE) {
3613                         /* new val must satisfy old val knowledge */
3614                         return range_within(rold, rcur) &&
3615                                tnum_in(rold->var_off, rcur->var_off);
3616                 } else {
3617                         /* We're trying to use a pointer in place of a scalar.
3618                          * Even if the scalar was unbounded, this could lead to
3619                          * pointer leaks because scalars are allowed to leak
3620                          * while pointers are not. We could make this safe in
3621                          * special cases if root is calling us, but it's
3622                          * probably not worth the hassle.
3623                          */
3624                         return false;
3625                 }
3626         case PTR_TO_MAP_VALUE:
3627                 /* If the new min/max/var_off satisfy the old ones and
3628                  * everything else matches, we are OK.
3629                  * We don't care about the 'id' value, because nothing
3630                  * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL)
3631                  */
3632                 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
3633                        range_within(rold, rcur) &&
3634                        tnum_in(rold->var_off, rcur->var_off);
3635         case PTR_TO_MAP_VALUE_OR_NULL:
3636                 /* a PTR_TO_MAP_VALUE could be safe to use as a
3637                  * PTR_TO_MAP_VALUE_OR_NULL into the same map.
3638                  * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
3639                  * checked, doing so could have affected others with the same
3640                  * id, and we can't check for that because we lost the id when
3641                  * we converted to a PTR_TO_MAP_VALUE.
3642                  */
3643                 if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
3644                         return false;
3645                 if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
3646                         return false;
3647                 /* Check our ids match any regs they're supposed to */
3648                 return check_ids(rold->id, rcur->id, idmap);
3649         case PTR_TO_PACKET_META:
3650         case PTR_TO_PACKET:
3651                 if (rcur->type != rold->type)
3652                         return false;
3653                 /* We must have at least as much range as the old ptr
3654                  * did, so that any accesses which were safe before are
3655                  * still safe.  This is true even if old range < old off,
3656                  * since someone could have accessed through (ptr - k), or
3657                  * even done ptr -= k in a register, to get a safe access.
3658                  */
3659                 if (rold->range > rcur->range)
3660                         return false;
3661                 /* If the offsets don't match, we can't trust our alignment;
3662                  * nor can we be sure that we won't fall out of range.
3663                  */
3664                 if (rold->off != rcur->off)
3665                         return false;
3666                 /* id relations must be preserved */
3667                 if (rold->id && !check_ids(rold->id, rcur->id, idmap))
3668                         return false;
3669                 /* new val must satisfy old val knowledge */
3670                 return range_within(rold, rcur) &&
3671                        tnum_in(rold->var_off, rcur->var_off);
3672         case PTR_TO_CTX:
3673         case CONST_PTR_TO_MAP:
3674         case PTR_TO_STACK:
3675         case PTR_TO_PACKET_END:
3676                 /* Only valid matches are exact, which memcmp() above
3677                  * would have accepted
3678                  */
3679         default:
3680                 /* Don't know what's going on, just say it's not safe */
3681                 return false;
3682         }
3683
3684         /* Shouldn't get here; if we do, say it's not safe */
3685         WARN_ON_ONCE(1);
3686         return false;
3687 }
3688
3689 static bool stacksafe(struct bpf_verifier_state *old,
3690                       struct bpf_verifier_state *cur,
3691                       struct idpair *idmap)
3692 {
3693         int i, spi;
3694
3695         /* if explored stack has more populated slots than current stack
3696          * such stacks are not equivalent
3697          */
3698         if (old->allocated_stack > cur->allocated_stack)
3699                 return false;
3700
3701         /* walk slots of the explored stack and ignore any additional
3702          * slots in the current stack, since explored(safe) state
3703          * didn't use them
3704          */
3705         for (i = 0; i < old->allocated_stack; i++) {
3706                 spi = i / BPF_REG_SIZE;
3707
3708                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
3709                         continue;
3710                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
3711                     cur->stack[spi].slot_type[i % BPF_REG_SIZE])
3712                         /* Ex: old explored (safe) state has STACK_SPILL in
3713                          * this stack slot, but current has has STACK_MISC ->
3714                          * this verifier states are not equivalent,
3715                          * return false to continue verification of this path
3716                          */
3717                         return false;
3718                 if (i % BPF_REG_SIZE)
3719                         continue;
3720                 if (old->stack[spi].slot_type[0] != STACK_SPILL)
3721                         continue;
3722                 if (!regsafe(&old->stack[spi].spilled_ptr,
3723                              &cur->stack[spi].spilled_ptr,
3724                              idmap))
3725                         /* when explored and current stack slot are both storing
3726                          * spilled registers, check that stored pointers types
3727                          * are the same as well.
3728                          * Ex: explored safe path could have stored
3729                          * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
3730                          * but current path has stored:
3731                          * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
3732                          * such verifier states are not equivalent.
3733                          * return false to continue verification of this path
3734                          */
3735                         return false;
3736         }
3737         return true;
3738 }
3739
3740 /* compare two verifier states
3741  *
3742  * all states stored in state_list are known to be valid, since
3743  * verifier reached 'bpf_exit' instruction through them
3744  *
3745  * this function is called when verifier exploring different branches of
3746  * execution popped from the state stack. If it sees an old state that has
3747  * more strict register state and more strict stack state then this execution
3748  * branch doesn't need to be explored further, since verifier already
3749  * concluded that more strict state leads to valid finish.
3750  *
3751  * Therefore two states are equivalent if register state is more conservative
3752  * and explored stack state is more conservative than the current one.
3753  * Example:
3754  *       explored                   current
3755  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
3756  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
3757  *
3758  * In other words if current stack state (one being explored) has more
3759  * valid slots than old one that already passed validation, it means
3760  * the verifier can stop exploring and conclude that current state is valid too
3761  *
3762  * Similarly with registers. If explored state has register type as invalid
3763  * whereas register type in current state is meaningful, it means that
3764  * the current state will reach 'bpf_exit' instruction safely
3765  */
3766 static bool states_equal(struct bpf_verifier_env *env,
3767                          struct bpf_verifier_state *old,
3768                          struct bpf_verifier_state *cur)
3769 {
3770         struct idpair *idmap;
3771         bool ret = false;
3772         int i;
3773
3774         idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
3775         /* If we failed to allocate the idmap, just say it's not safe */
3776         if (!idmap)
3777                 return false;
3778
3779         for (i = 0; i < MAX_BPF_REG; i++) {
3780                 if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
3781                         goto out_free;
3782         }
3783
3784         if (!stacksafe(old, cur, idmap))
3785                 goto out_free;
3786         ret = true;
3787 out_free:
3788         kfree(idmap);
3789         return ret;
3790 }
3791
3792 /* A write screens off any subsequent reads; but write marks come from the
3793  * straight-line code between a state and its parent.  When we arrive at a
3794  * jump target (in the first iteration of the propagate_liveness() loop),
3795  * we didn't arrive by the straight-line code, so read marks in state must
3796  * propagate to parent regardless of state's write marks.
3797  */
3798 static bool do_propagate_liveness(const struct bpf_verifier_state *state,
3799                                   struct bpf_verifier_state *parent)
3800 {
3801         bool writes = parent == state->parent; /* Observe write marks */
3802         bool touched = false; /* any changes made? */
3803         int i;
3804
3805         if (!parent)
3806                 return touched;
3807         /* Propagate read liveness of registers... */
3808         BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
3809         /* We don't need to worry about FP liveness because it's read-only */
3810         for (i = 0; i < BPF_REG_FP; i++) {
3811                 if (parent->regs[i].live & REG_LIVE_READ)
3812                         continue;
3813                 if (writes && (state->regs[i].live & REG_LIVE_WRITTEN))
3814                         continue;
3815                 if (state->regs[i].live & REG_LIVE_READ) {
3816                         parent->regs[i].live |= REG_LIVE_READ;
3817                         touched = true;
3818                 }
3819         }
3820         /* ... and stack slots */
3821         for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
3822                     i < parent->allocated_stack / BPF_REG_SIZE; i++) {
3823                 if (parent->stack[i].slot_type[0] != STACK_SPILL)
3824                         continue;
3825                 if (state->stack[i].slot_type[0] != STACK_SPILL)
3826                         continue;
3827                 if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
3828                         continue;
3829                 if (writes &&
3830                     (state->stack[i].spilled_ptr.live & REG_LIVE_WRITTEN))
3831                         continue;
3832                 if (state->stack[i].spilled_ptr.live & REG_LIVE_READ) {
3833                         parent->stack[i].spilled_ptr.live |= REG_LIVE_READ;
3834                         touched = true;
3835                 }
3836         }
3837         return touched;
3838 }
3839
3840 /* "parent" is "a state from which we reach the current state", but initially
3841  * it is not the state->parent (i.e. "the state whose straight-line code leads
3842  * to the current state"), instead it is the state that happened to arrive at
3843  * a (prunable) equivalent of the current state.  See comment above
3844  * do_propagate_liveness() for consequences of this.
3845  * This function is just a more efficient way of calling mark_reg_read() or
3846  * mark_stack_slot_read() on each reg in "parent" that is read in "state",
3847  * though it requires that parent != state->parent in the call arguments.
3848  */
3849 static void propagate_liveness(const struct bpf_verifier_state *state,
3850                                struct bpf_verifier_state *parent)
3851 {
3852         while (do_propagate_liveness(state, parent)) {
3853                 /* Something changed, so we need to feed those changes onward */
3854                 state = parent;
3855                 parent = state->parent;
3856         }
3857 }
3858
3859 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
3860 {
3861         struct bpf_verifier_state_list *new_sl;
3862         struct bpf_verifier_state_list *sl;
3863         struct bpf_verifier_state *cur = env->cur_state;
3864         int i, err;
3865
3866         sl = env->explored_states[insn_idx];
3867         if (!sl)
3868                 /* this 'insn_idx' instruction wasn't marked, so we will not
3869                  * be doing state search here
3870                  */
3871                 return 0;
3872
3873         while (sl != STATE_LIST_MARK) {
3874                 if (states_equal(env, &sl->state, cur)) {
3875                         /* reached equivalent register/stack state,
3876                          * prune the search.
3877                          * Registers read by the continuation are read by us.
3878                          * If we have any write marks in env->cur_state, they
3879                          * will prevent corresponding reads in the continuation
3880                          * from reaching our parent (an explored_state).  Our
3881                          * own state will get the read marks recorded, but
3882                          * they'll be immediately forgotten as we're pruning
3883                          * this state and will pop a new one.
3884                          */
3885                         propagate_liveness(&sl->state, cur);
3886                         return 1;
3887                 }
3888                 sl = sl->next;
3889         }
3890
3891         /* there were no equivalent states, remember current one.
3892          * technically the current state is not proven to be safe yet,
3893          * but it will either reach bpf_exit (which means it's safe) or
3894          * it will be rejected. Since there are no loops, we won't be
3895          * seeing this 'insn_idx' instruction again on the way to bpf_exit
3896          */
3897         new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
3898         if (!new_sl)
3899                 return -ENOMEM;
3900
3901         /* add new state to the head of linked list */
3902         err = copy_verifier_state(&new_sl->state, cur);
3903         if (err) {
3904                 free_verifier_state(&new_sl->state, false);
3905                 kfree(new_sl);
3906                 return err;
3907         }
3908         new_sl->next = env->explored_states[insn_idx];
3909         env->explored_states[insn_idx] = new_sl;
3910         /* connect new state to parentage chain */
3911         cur->parent = &new_sl->state;
3912         /* clear write marks in current state: the writes we did are not writes
3913          * our child did, so they don't screen off its reads from us.
3914          * (There are no read marks in current state, because reads always mark
3915          * their parent and current state never has children yet.  Only
3916          * explored_states can get read marks.)
3917          */
3918         for (i = 0; i < BPF_REG_FP; i++)
3919                 cur->regs[i].live = REG_LIVE_NONE;
3920         for (i = 0; i < cur->allocated_stack / BPF_REG_SIZE; i++)
3921                 if (cur->stack[i].slot_type[0] == STACK_SPILL)
3922                         cur->stack[i].spilled_ptr.live = REG_LIVE_NONE;
3923         return 0;
3924 }
3925
3926 static int ext_analyzer_insn_hook(struct bpf_verifier_env *env,
3927                                   int insn_idx, int prev_insn_idx)
3928 {
3929         if (env->dev_ops && env->dev_ops->insn_hook)
3930                 return env->dev_ops->insn_hook(env, insn_idx, prev_insn_idx);
3931
3932         return 0;
3933 }
3934
3935 static int do_check(struct bpf_verifier_env *env)
3936 {
3937         struct bpf_verifier_state *state;
3938         struct bpf_insn *insns = env->prog->insnsi;
3939         struct bpf_reg_state *regs;
3940         int insn_cnt = env->prog->len;
3941         int insn_processed = 0;
3942         bool do_print_state = false;
3943
3944         state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
3945         if (!state)
3946                 return -ENOMEM;
3947         env->cur_state = state;
3948         init_reg_state(env, state->regs);
3949         state->parent = NULL;
3950
3951         for (;;) {
3952                 struct bpf_insn *insn;
3953                 u8 class;
3954                 int err;
3955
3956                 if (env->insn_idx >= insn_cnt) {
3957                         verbose(env, "invalid insn idx %d insn_cnt %d\n",
3958                                 env->insn_idx, insn_cnt);
3959                         return -EFAULT;
3960                 }
3961
3962                 insn = &insns[env->insn_idx];
3963                 class = BPF_CLASS(insn->code);
3964
3965                 if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
3966                         verbose(env,
3967                                 "BPF program is too large. Processed %d insn\n",
3968                                 insn_processed);
3969                         return -E2BIG;
3970                 }
3971
3972                 err = is_state_visited(env, env->insn_idx);
3973                 if (err < 0)
3974                         return err;
3975                 if (err == 1) {
3976                         /* found equivalent state, can prune the search */
3977                         if (env->log.level) {
3978                                 if (do_print_state)
3979                                         verbose(env, "\nfrom %d to %d: safe\n",
3980                                                 env->prev_insn_idx, env->insn_idx);
3981                                 else
3982                                         verbose(env, "%d: safe\n", env->insn_idx);
3983                         }
3984                         goto process_bpf_exit;
3985                 }
3986
3987                 if (need_resched())
3988                         cond_resched();
3989
3990                 if (env->log.level > 1 || (env->log.level && do_print_state)) {
3991                         if (env->log.level > 1)
3992                                 verbose(env, "%d:", env->insn_idx);
3993                         else
3994                                 verbose(env, "\nfrom %d to %d:",
3995                                         env->prev_insn_idx, env->insn_idx);
3996                         print_verifier_state(env, state);
3997                         do_print_state = false;
3998                 }
3999
4000                 if (env->log.level) {
4001                         verbose(env, "%d: ", env->insn_idx);
4002                         print_bpf_insn(verbose, env, insn,
4003                                        env->allow_ptr_leaks);
4004                 }
4005
4006                 err = ext_analyzer_insn_hook(env, env->insn_idx, env->prev_insn_idx);
4007                 if (err)
4008                         return err;
4009
4010                 regs = cur_regs(env);
4011                 env->insn_aux_data[env->insn_idx].seen = true;
4012
4013                 if (class == BPF_ALU || class == BPF_ALU64) {
4014                         err = check_alu_op(env, insn);
4015                         if (err)
4016                                 return err;
4017
4018                 } else if (class == BPF_LDX) {
4019                         enum bpf_reg_type *prev_src_type, src_reg_type;
4020
4021                         /* check for reserved fields is already done */
4022
4023                         /* check src operand */
4024                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
4025                         if (err)
4026                                 return err;
4027
4028                         err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
4029                         if (err)
4030                                 return err;
4031
4032                         src_reg_type = regs[insn->src_reg].type;
4033
4034                         /* check that memory (src_reg + off) is readable,
4035                          * the state of dst_reg will be updated by this func
4036                          */
4037                         err = check_mem_access(env, env->insn_idx, insn->src_reg,
4038                                                insn->off, BPF_SIZE(insn->code),
4039                                                BPF_READ, insn->dst_reg, false);
4040                         if (err)
4041                                 return err;
4042
4043                         prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
4044
4045                         if (*prev_src_type == NOT_INIT) {
4046                                 /* saw a valid insn
4047                                  * dst_reg = *(u32 *)(src_reg + off)
4048                                  * save type to validate intersecting paths
4049                                  */
4050                                 *prev_src_type = src_reg_type;
4051
4052                         } else if (src_reg_type != *prev_src_type &&
4053                                    (src_reg_type == PTR_TO_CTX ||
4054                                     *prev_src_type == PTR_TO_CTX)) {
4055                                 /* ABuser program is trying to use the same insn
4056                                  * dst_reg = *(u32*) (src_reg + off)
4057                                  * with different pointer types:
4058                                  * src_reg == ctx in one branch and
4059                                  * src_reg == stack|map in some other branch.
4060                                  * Reject it.
4061                                  */
4062                                 verbose(env, "same insn cannot be used with different pointers\n");
4063                                 return -EINVAL;
4064                         }
4065
4066                 } else if (class == BPF_STX) {
4067                         enum bpf_reg_type *prev_dst_type, dst_reg_type;
4068
4069                         if (BPF_MODE(insn->code) == BPF_XADD) {
4070                                 err = check_xadd(env, env->insn_idx, insn);
4071                                 if (err)
4072                                         return err;
4073                                 env->insn_idx++;
4074                                 continue;
4075                         }
4076
4077                         /* check src1 operand */
4078                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
4079                         if (err)
4080                                 return err;
4081                         /* check src2 operand */
4082                         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
4083                         if (err)
4084                                 return err;
4085
4086                         dst_reg_type = regs[insn->dst_reg].type;
4087
4088                         /* check that memory (dst_reg + off) is writeable */
4089                         err = check_mem_access(env, env->insn_idx, insn->dst_reg,
4090                                                insn->off, BPF_SIZE(insn->code),
4091                                                BPF_WRITE, insn->src_reg, false);
4092                         if (err)
4093                                 return err;
4094
4095                         prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
4096
4097                         if (*prev_dst_type == NOT_INIT) {
4098                                 *prev_dst_type = dst_reg_type;
4099                         } else if (dst_reg_type != *prev_dst_type &&
4100                                    (dst_reg_type == PTR_TO_CTX ||
4101                                     *prev_dst_type == PTR_TO_CTX)) {
4102                                 verbose(env, "same insn cannot be used with different pointers\n");
4103                                 return -EINVAL;
4104                         }
4105
4106                 } else if (class == BPF_ST) {
4107                         if (BPF_MODE(insn->code) != BPF_MEM ||
4108                             insn->src_reg != BPF_REG_0) {
4109                                 verbose(env, "BPF_ST uses reserved fields\n");
4110                                 return -EINVAL;
4111                         }
4112                         /* check src operand */
4113                         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
4114                         if (err)
4115                                 return err;
4116
4117                         if (is_ctx_reg(env, insn->dst_reg)) {
4118                                 verbose(env, "BPF_ST stores into R%d context is not allowed\n",
4119                                         insn->dst_reg);
4120                                 return -EACCES;
4121                         }
4122
4123                         /* check that memory (dst_reg + off) is writeable */
4124                         err = check_mem_access(env, env->insn_idx, insn->dst_reg,
4125                                                insn->off, BPF_SIZE(insn->code),
4126                                                BPF_WRITE, -1, false);
4127                         if (err)
4128                                 return err;
4129
4130                 } else if (class == BPF_JMP) {
4131                         u8 opcode = BPF_OP(insn->code);
4132
4133                         if (opcode == BPF_CALL) {
4134                                 if (BPF_SRC(insn->code) != BPF_K ||
4135                                     insn->off != 0 ||
4136                                     insn->src_reg != BPF_REG_0 ||
4137                                     insn->dst_reg != BPF_REG_0) {
4138                                         verbose(env, "BPF_CALL uses reserved fields\n");
4139                                         return -EINVAL;
4140                                 }
4141
4142                                 err = check_call(env, insn->imm, env->insn_idx);
4143                                 if (err)
4144                                         return err;
4145
4146                         } else if (opcode == BPF_JA) {
4147                                 if (BPF_SRC(insn->code) != BPF_K ||
4148                                     insn->imm != 0 ||
4149                                     insn->src_reg != BPF_REG_0 ||
4150                                     insn->dst_reg != BPF_REG_0) {
4151                                         verbose(env, "BPF_JA uses reserved fields\n");
4152                                         return -EINVAL;
4153                                 }
4154
4155                                 env->insn_idx += insn->off + 1;
4156                                 continue;
4157
4158                         } else if (opcode == BPF_EXIT) {
4159                                 if (BPF_SRC(insn->code) != BPF_K ||
4160                                     insn->imm != 0 ||
4161                                     insn->src_reg != BPF_REG_0 ||
4162                                     insn->dst_reg != BPF_REG_0) {
4163                                         verbose(env, "BPF_EXIT uses reserved fields\n");
4164                                         return -EINVAL;
4165                                 }
4166
4167                                 /* eBPF calling convetion is such that R0 is used
4168                                  * to return the value from eBPF program.
4169                                  * Make sure that it's readable at this time
4170                                  * of bpf_exit, which means that program wrote
4171                                  * something into it earlier
4172                                  */
4173                                 err = check_reg_arg(env, BPF_REG_0, SRC_OP);
4174                                 if (err)
4175                                         return err;
4176
4177                                 if (is_pointer_value(env, BPF_REG_0)) {
4178                                         verbose(env, "R0 leaks addr as return value\n");
4179                                         return -EACCES;
4180                                 }
4181
4182                                 err = check_return_code(env);
4183                                 if (err)
4184                                         return err;
4185 process_bpf_exit:
4186                                 err = pop_stack(env, &env->prev_insn_idx,
4187                                                 &env->insn_idx);
4188                                 if (err < 0) {
4189                                         if (err != -ENOENT)
4190                                                 return err;
4191                                         break;
4192                                 } else {
4193                                         do_print_state = true;
4194                                         continue;
4195                                 }
4196                         } else {
4197                                 err = check_cond_jmp_op(env, insn, &env->insn_idx);
4198                                 if (err)
4199                                         return err;
4200                         }
4201                 } else if (class == BPF_LD) {
4202                         u8 mode = BPF_MODE(insn->code);
4203
4204                         if (mode == BPF_ABS || mode == BPF_IND) {
4205                                 err = check_ld_abs(env, insn);
4206                                 if (err)
4207                                         return err;
4208
4209                         } else if (mode == BPF_IMM) {
4210                                 err = check_ld_imm(env, insn);
4211                                 if (err)
4212                                         return err;
4213
4214                                 env->insn_idx++;
4215                                 env->insn_aux_data[env->insn_idx].seen = true;
4216                         } else {
4217                                 verbose(env, "invalid BPF_LD mode\n");
4218                                 return -EINVAL;
4219                         }
4220                 } else {
4221                         verbose(env, "unknown insn class %d\n", class);
4222                         return -EINVAL;
4223                 }
4224
4225                 env->insn_idx++;
4226         }
4227
4228         verbose(env, "processed %d insns, stack depth %d\n", insn_processed,
4229                 env->prog->aux->stack_depth);
4230         return 0;
4231 }
4232
4233 static int check_map_prealloc(struct bpf_map *map)
4234 {
4235         return (map->map_type != BPF_MAP_TYPE_HASH &&
4236                 map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
4237                 map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
4238                 !(map->map_flags & BPF_F_NO_PREALLOC);
4239 }
4240
4241 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
4242                                         struct bpf_map *map,
4243                                         struct bpf_prog *prog)
4244
4245 {
4246         /* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use
4247          * preallocated hash maps, since doing memory allocation
4248          * in overflow_handler can crash depending on where nmi got
4249          * triggered.
4250          */
4251         if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
4252                 if (!check_map_prealloc(map)) {
4253                         verbose(env, "perf_event programs can only use preallocated hash map\n");
4254                         return -EINVAL;
4255                 }
4256                 if (map->inner_map_meta &&
4257                     !check_map_prealloc(map->inner_map_meta)) {
4258                         verbose(env, "perf_event programs can only use preallocated inner hash map\n");
4259                         return -EINVAL;
4260                 }
4261         }
4262         return 0;
4263 }
4264
4265 /* look for pseudo eBPF instructions that access map FDs and
4266  * replace them with actual map pointers
4267  */
4268 static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
4269 {
4270         struct bpf_insn *insn = env->prog->insnsi;
4271         int insn_cnt = env->prog->len;
4272         int i, j, err;
4273
4274         err = bpf_prog_calc_tag(env->prog);
4275         if (err)
4276                 return err;
4277
4278         for (i = 0; i < insn_cnt; i++, insn++) {
4279                 if (BPF_CLASS(insn->code) == BPF_LDX &&
4280                     (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
4281                         verbose(env, "BPF_LDX uses reserved fields\n");
4282                         return -EINVAL;
4283                 }
4284
4285                 if (BPF_CLASS(insn->code) == BPF_STX &&
4286                     ((BPF_MODE(insn->code) != BPF_MEM &&
4287                       BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
4288                         verbose(env, "BPF_STX uses reserved fields\n");
4289                         return -EINVAL;
4290                 }
4291
4292                 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
4293                         struct bpf_map *map;
4294                         struct fd f;
4295
4296                         if (i == insn_cnt - 1 || insn[1].code != 0 ||
4297                             insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
4298                             insn[1].off != 0) {
4299                                 verbose(env, "invalid bpf_ld_imm64 insn\n");
4300                                 return -EINVAL;
4301                         }
4302
4303                         if (insn->src_reg == 0)
4304                                 /* valid generic load 64-bit imm */
4305                                 goto next_insn;
4306
4307                         if (insn->src_reg != BPF_PSEUDO_MAP_FD) {
4308                                 verbose(env,
4309                                         "unrecognized bpf_ld_imm64 insn\n");
4310                                 return -EINVAL;
4311                         }
4312
4313                         f = fdget(insn->imm);
4314                         map = __bpf_map_get(f);
4315                         if (IS_ERR(map)) {
4316                                 verbose(env, "fd %d is not pointing to valid bpf_map\n",
4317                                         insn->imm);
4318                                 return PTR_ERR(map);
4319                         }
4320
4321                         err = check_map_prog_compatibility(env, map, env->prog);
4322                         if (err) {
4323                                 fdput(f);
4324                                 return err;
4325                         }
4326
4327                         /* store map pointer inside BPF_LD_IMM64 instruction */
4328                         insn[0].imm = (u32) (unsigned long) map;
4329                         insn[1].imm = ((u64) (unsigned long) map) >> 32;
4330
4331                         /* check whether we recorded this map already */
4332                         for (j = 0; j < env->used_map_cnt; j++)
4333                                 if (env->used_maps[j] == map) {
4334                                         fdput(f);
4335                                         goto next_insn;
4336                                 }
4337
4338                         if (env->used_map_cnt >= MAX_USED_MAPS) {
4339                                 fdput(f);
4340                                 return -E2BIG;
4341                         }
4342
4343                         /* hold the map. If the program is rejected by verifier,
4344                          * the map will be released by release_maps() or it
4345                          * will be used by the valid program until it's unloaded
4346                          * and all maps are released in free_used_maps()
4347                          */
4348                         map = bpf_map_inc(map, false);
4349                         if (IS_ERR(map)) {
4350                                 fdput(f);
4351                                 return PTR_ERR(map);
4352                         }
4353                         env->used_maps[env->used_map_cnt++] = map;
4354
4355                         fdput(f);
4356 next_insn:
4357                         insn++;
4358                         i++;
4359                 }
4360         }
4361
4362         /* now all pseudo BPF_LD_IMM64 instructions load valid
4363          * 'struct bpf_map *' into a register instead of user map_fd.
4364          * These pointers will be used later by verifier to validate map access.
4365          */
4366         return 0;
4367 }
4368
4369 /* drop refcnt of maps used by the rejected program */
4370 static void release_maps(struct bpf_verifier_env *env)
4371 {
4372         int i;
4373
4374         for (i = 0; i < env->used_map_cnt; i++)
4375                 bpf_map_put(env->used_maps[i]);
4376 }
4377
4378 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
4379 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
4380 {
4381         struct bpf_insn *insn = env->prog->insnsi;
4382         int insn_cnt = env->prog->len;
4383         int i;
4384
4385         for (i = 0; i < insn_cnt; i++, insn++)
4386                 if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
4387                         insn->src_reg = 0;
4388 }
4389
4390 /* single env->prog->insni[off] instruction was replaced with the range
4391  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
4392  * [0, off) and [off, end) to new locations, so the patched range stays zero
4393  */
4394 static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
4395                                 u32 off, u32 cnt)
4396 {
4397         struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
4398         int i;
4399
4400         if (cnt == 1)
4401                 return 0;
4402         new_data = vzalloc(sizeof(struct bpf_insn_aux_data) * prog_len);
4403         if (!new_data)
4404                 return -ENOMEM;
4405         memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
4406         memcpy(new_data + off + cnt - 1, old_data + off,
4407                sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
4408         for (i = off; i < off + cnt - 1; i++)
4409                 new_data[i].seen = true;
4410         env->insn_aux_data = new_data;
4411         vfree(old_data);
4412         return 0;
4413 }
4414
4415 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
4416                                             const struct bpf_insn *patch, u32 len)
4417 {
4418         struct bpf_prog *new_prog;
4419
4420         new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
4421         if (!new_prog)
4422                 return NULL;
4423         if (adjust_insn_aux_data(env, new_prog->len, off, len))
4424                 return NULL;
4425         return new_prog;
4426 }
4427
4428 /* The verifier does more data flow analysis than llvm and will not explore
4429  * branches that are dead at run time. Malicious programs can have dead code
4430  * too. Therefore replace all dead at-run-time code with nops.
4431  */
4432 static void sanitize_dead_code(struct bpf_verifier_env *env)
4433 {
4434         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
4435         struct bpf_insn nop = BPF_MOV64_REG(BPF_REG_0, BPF_REG_0);
4436         struct bpf_insn *insn = env->prog->insnsi;
4437         const int insn_cnt = env->prog->len;
4438         int i;
4439
4440         for (i = 0; i < insn_cnt; i++) {
4441                 if (aux_data[i].seen)
4442                         continue;
4443                 memcpy(insn + i, &nop, sizeof(nop));
4444         }
4445 }
4446
4447 /* convert load instructions that access fields of 'struct __sk_buff'
4448  * into sequence of instructions that access fields of 'struct sk_buff'
4449  */
4450 static int convert_ctx_accesses(struct bpf_verifier_env *env)
4451 {
4452         const struct bpf_verifier_ops *ops = env->ops;
4453         int i, cnt, size, ctx_field_size, delta = 0;
4454         const int insn_cnt = env->prog->len;
4455         struct bpf_insn insn_buf[16], *insn;
4456         struct bpf_prog *new_prog;
4457         enum bpf_access_type type;
4458         bool is_narrower_load;
4459         u32 target_size;
4460
4461         if (ops->gen_prologue) {
4462                 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
4463                                         env->prog);
4464                 if (cnt >= ARRAY_SIZE(insn_buf)) {
4465                         verbose(env, "bpf verifier is misconfigured\n");
4466                         return -EINVAL;
4467                 } else if (cnt) {
4468                         new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
4469                         if (!new_prog)
4470                                 return -ENOMEM;
4471
4472                         env->prog = new_prog;
4473                         delta += cnt - 1;
4474                 }
4475         }
4476
4477         if (!ops->convert_ctx_access)
4478                 return 0;
4479
4480         insn = env->prog->insnsi + delta;
4481
4482         for (i = 0; i < insn_cnt; i++, insn++) {
4483                 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
4484                     insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
4485                     insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
4486                     insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
4487                         type = BPF_READ;
4488                 else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
4489                          insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
4490                          insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
4491                          insn->code == (BPF_STX | BPF_MEM | BPF_DW))
4492                         type = BPF_WRITE;
4493                 else
4494                         continue;
4495
4496                 if (type == BPF_WRITE &&
4497                     env->insn_aux_data[i + delta].sanitize_stack_off) {
4498                         struct bpf_insn patch[] = {
4499                                 /* Sanitize suspicious stack slot with zero.
4500                                  * There are no memory dependencies for this store,
4501                                  * since it's only using frame pointer and immediate
4502                                  * constant of zero
4503                                  */
4504                                 BPF_ST_MEM(BPF_DW, BPF_REG_FP,
4505                                            env->insn_aux_data[i + delta].sanitize_stack_off,
4506                                            0),
4507                                 /* the original STX instruction will immediately
4508                                  * overwrite the same stack slot with appropriate value
4509                                  */
4510                                 *insn,
4511                         };
4512
4513                         cnt = ARRAY_SIZE(patch);
4514                         new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
4515                         if (!new_prog)
4516                                 return -ENOMEM;
4517
4518                         delta    += cnt - 1;
4519                         env->prog = new_prog;
4520                         insn      = new_prog->insnsi + i + delta;
4521                         continue;
4522                 }
4523
4524                 if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
4525                         continue;
4526
4527                 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
4528                 size = BPF_LDST_BYTES(insn);
4529
4530                 /* If the read access is a narrower load of the field,
4531                  * convert to a 4/8-byte load, to minimum program type specific
4532                  * convert_ctx_access changes. If conversion is successful,
4533                  * we will apply proper mask to the result.
4534                  */
4535                 is_narrower_load = size < ctx_field_size;
4536                 if (is_narrower_load) {
4537                         u32 off = insn->off;
4538                         u8 size_code;
4539
4540                         if (type == BPF_WRITE) {
4541                                 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
4542                                 return -EINVAL;
4543                         }
4544
4545                         size_code = BPF_H;
4546                         if (ctx_field_size == 4)
4547                                 size_code = BPF_W;
4548                         else if (ctx_field_size == 8)
4549                                 size_code = BPF_DW;
4550
4551                         insn->off = off & ~(ctx_field_size - 1);
4552                         insn->code = BPF_LDX | BPF_MEM | size_code;
4553                 }
4554
4555                 target_size = 0;
4556                 cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog,
4557                                               &target_size);
4558                 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
4559                     (ctx_field_size && !target_size)) {
4560                         verbose(env, "bpf verifier is misconfigured\n");
4561                         return -EINVAL;
4562                 }
4563
4564                 if (is_narrower_load && size < target_size) {
4565                         if (ctx_field_size <= 4)
4566                                 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
4567                                                                 (1 << size * 8) - 1);
4568                         else
4569                                 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
4570                                                                 (1 << size * 8) - 1);
4571                 }
4572
4573                 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
4574                 if (!new_prog)
4575                         return -ENOMEM;
4576
4577                 delta += cnt - 1;
4578
4579                 /* keep walking new program and skip insns we just inserted */
4580                 env->prog = new_prog;
4581                 insn      = new_prog->insnsi + i + delta;
4582         }
4583
4584         return 0;
4585 }
4586
4587 /* fixup insn->imm field of bpf_call instructions
4588  * and inline eligible helpers as explicit sequence of BPF instructions
4589  *
4590  * this function is called after eBPF program passed verification
4591  */
4592 static int fixup_bpf_calls(struct bpf_verifier_env *env)
4593 {
4594         struct bpf_prog *prog = env->prog;
4595         struct bpf_insn *insn = prog->insnsi;
4596         const struct bpf_func_proto *fn;
4597         const int insn_cnt = prog->len;
4598         struct bpf_insn_aux_data *aux;
4599         struct bpf_insn insn_buf[16];
4600         struct bpf_prog *new_prog;
4601         struct bpf_map *map_ptr;
4602         int i, cnt, delta = 0;
4603
4604         for (i = 0; i < insn_cnt; i++, insn++) {
4605                 if (insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
4606                     insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
4607                         /* due to JIT bugs clear upper 32-bits of src register
4608                          * before div/mod operation
4609                          */
4610                         insn_buf[0] = BPF_MOV32_REG(insn->src_reg, insn->src_reg);
4611                         insn_buf[1] = *insn;
4612                         cnt = 2;
4613                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
4614                         if (!new_prog)
4615                                 return -ENOMEM;
4616
4617                         delta    += cnt - 1;
4618                         env->prog = prog = new_prog;
4619                         insn      = new_prog->insnsi + i + delta;
4620                         continue;
4621                 }
4622
4623                 if (insn->code != (BPF_JMP | BPF_CALL))
4624                         continue;
4625
4626                 if (insn->imm == BPF_FUNC_get_route_realm)
4627                         prog->dst_needed = 1;
4628                 if (insn->imm == BPF_FUNC_get_prandom_u32)
4629                         bpf_user_rnd_init_once();
4630                 if (insn->imm == BPF_FUNC_tail_call) {
4631                         /* If we tail call into other programs, we
4632                          * cannot make any assumptions since they can
4633                          * be replaced dynamically during runtime in
4634                          * the program array.
4635                          */
4636                         prog->cb_access = 1;
4637                         env->prog->aux->stack_depth = MAX_BPF_STACK;
4638
4639                         /* mark bpf_tail_call as different opcode to avoid
4640                          * conditional branch in the interpeter for every normal
4641                          * call and to prevent accidental JITing by JIT compiler
4642                          * that doesn't support bpf_tail_call yet
4643                          */
4644                         insn->imm = 0;
4645                         insn->code = BPF_JMP | BPF_TAIL_CALL;
4646
4647                         aux = &env->insn_aux_data[i + delta];
4648                         if (!bpf_map_ptr_unpriv(aux))
4649                                 continue;
4650
4651                         /* instead of changing every JIT dealing with tail_call
4652                          * emit two extra insns:
4653                          * if (index >= max_entries) goto out;
4654                          * index &= array->index_mask;
4655                          * to avoid out-of-bounds cpu speculation
4656                          */
4657                         if (bpf_map_ptr_poisoned(aux)) {
4658                                 verbose(env, "tail_call abusing map_ptr\n");
4659                                 return -EINVAL;
4660                         }
4661
4662                         map_ptr = BPF_MAP_PTR(aux->map_state);
4663                         insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
4664                                                   map_ptr->max_entries, 2);
4665                         insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
4666                                                     container_of(map_ptr,
4667                                                                  struct bpf_array,
4668                                                                  map)->index_mask);
4669                         insn_buf[2] = *insn;
4670                         cnt = 3;
4671                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
4672                         if (!new_prog)
4673                                 return -ENOMEM;
4674
4675                         delta    += cnt - 1;
4676                         env->prog = prog = new_prog;
4677                         insn      = new_prog->insnsi + i + delta;
4678                         continue;
4679                 }
4680
4681                 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
4682                  * handlers are currently limited to 64 bit only.
4683                  */
4684                 if (ebpf_jit_enabled() && BITS_PER_LONG == 64 &&
4685                     insn->imm == BPF_FUNC_map_lookup_elem) {
4686                         aux = &env->insn_aux_data[i + delta];
4687                         if (bpf_map_ptr_poisoned(aux))
4688                                 goto patch_call_imm;
4689
4690                         map_ptr = BPF_MAP_PTR(aux->map_state);
4691                         if (!map_ptr->ops->map_gen_lookup)
4692                                 goto patch_call_imm;
4693
4694                         cnt = map_ptr->ops->map_gen_lookup(map_ptr, insn_buf);
4695                         if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
4696                                 verbose(env, "bpf verifier is misconfigured\n");
4697                                 return -EINVAL;
4698                         }
4699
4700                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
4701                                                        cnt);
4702                         if (!new_prog)
4703                                 return -ENOMEM;
4704
4705                         delta += cnt - 1;
4706
4707                         /* keep walking new program and skip insns we just inserted */
4708                         env->prog = prog = new_prog;
4709                         insn      = new_prog->insnsi + i + delta;
4710                         continue;
4711                 }
4712
4713                 if (insn->imm == BPF_FUNC_redirect_map) {
4714                         /* Note, we cannot use prog directly as imm as subsequent
4715                          * rewrites would still change the prog pointer. The only
4716                          * stable address we can use is aux, which also works with
4717                          * prog clones during blinding.
4718                          */
4719                         u64 addr = (unsigned long)prog->aux;
4720                         struct bpf_insn r4_ld[] = {
4721                                 BPF_LD_IMM64(BPF_REG_4, addr),
4722                                 *insn,
4723                         };
4724                         cnt = ARRAY_SIZE(r4_ld);
4725
4726                         new_prog = bpf_patch_insn_data(env, i + delta, r4_ld, cnt);
4727                         if (!new_prog)
4728                                 return -ENOMEM;
4729
4730                         delta    += cnt - 1;
4731                         env->prog = prog = new_prog;
4732                         insn      = new_prog->insnsi + i + delta;
4733                 }
4734 patch_call_imm:
4735                 fn = env->ops->get_func_proto(insn->imm);
4736                 /* all functions that have prototype and verifier allowed
4737                  * programs to call them, must be real in-kernel functions
4738                  */
4739                 if (!fn->func) {
4740                         verbose(env,
4741                                 "kernel subsystem misconfigured func %s#%d\n",
4742                                 func_id_name(insn->imm), insn->imm);
4743                         return -EFAULT;
4744                 }
4745                 insn->imm = fn->func - __bpf_call_base;
4746         }
4747
4748         return 0;
4749 }
4750
4751 static void free_states(struct bpf_verifier_env *env)
4752 {
4753         struct bpf_verifier_state_list *sl, *sln;
4754         int i;
4755
4756         if (!env->explored_states)
4757                 return;
4758
4759         for (i = 0; i < env->prog->len; i++) {
4760                 sl = env->explored_states[i];
4761
4762                 if (sl)
4763                         while (sl != STATE_LIST_MARK) {
4764                                 sln = sl->next;
4765                                 free_verifier_state(&sl->state, false);
4766                                 kfree(sl);
4767                                 sl = sln;
4768                         }
4769         }
4770
4771         kfree(env->explored_states);
4772 }
4773
4774 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
4775 {
4776         struct bpf_verifier_env *env;
4777         struct bpf_verifer_log *log;
4778         int ret = -EINVAL;
4779
4780         /* no program is valid */
4781         if (ARRAY_SIZE(bpf_verifier_ops) == 0)
4782                 return -EINVAL;
4783
4784         /* 'struct bpf_verifier_env' can be global, but since it's not small,
4785          * allocate/free it every time bpf_check() is called
4786          */
4787         env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
4788         if (!env)
4789                 return -ENOMEM;
4790         log = &env->log;
4791
4792         env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
4793                                      (*prog)->len);
4794         ret = -ENOMEM;
4795         if (!env->insn_aux_data)
4796                 goto err_free_env;
4797         env->prog = *prog;
4798         env->ops = bpf_verifier_ops[env->prog->type];
4799
4800         /* grab the mutex to protect few globals used by verifier */
4801         mutex_lock(&bpf_verifier_lock);
4802
4803         if (attr->log_level || attr->log_buf || attr->log_size) {
4804                 /* user requested verbose verifier output
4805                  * and supplied buffer to store the verification trace
4806                  */
4807                 log->level = attr->log_level;
4808                 log->ubuf = (char __user *) (unsigned long) attr->log_buf;
4809                 log->len_total = attr->log_size;
4810
4811                 ret = -EINVAL;
4812                 /* log attributes have to be sane */
4813                 if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
4814                     !log->level || !log->ubuf)
4815                         goto err_unlock;
4816         }
4817
4818         env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
4819         if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
4820                 env->strict_alignment = true;
4821
4822         if (env->prog->aux->offload) {
4823                 ret = bpf_prog_offload_verifier_prep(env);
4824                 if (ret)
4825                         goto err_unlock;
4826         }
4827
4828         ret = replace_map_fd_with_map_ptr(env);
4829         if (ret < 0)
4830                 goto skip_full_check;
4831
4832         env->explored_states = kcalloc(env->prog->len,
4833                                        sizeof(struct bpf_verifier_state_list *),
4834                                        GFP_USER);
4835         ret = -ENOMEM;
4836         if (!env->explored_states)
4837                 goto skip_full_check;
4838
4839         ret = check_cfg(env);
4840         if (ret < 0)
4841                 goto skip_full_check;
4842
4843         env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
4844
4845         ret = do_check(env);
4846         if (env->cur_state) {
4847                 free_verifier_state(env->cur_state, true);
4848                 env->cur_state = NULL;
4849         }
4850
4851 skip_full_check:
4852         while (!pop_stack(env, NULL, NULL));
4853         free_states(env);
4854
4855         if (ret == 0)
4856                 sanitize_dead_code(env);
4857
4858         if (ret == 0)
4859                 /* program is valid, convert *(u32*)(ctx + off) accesses */
4860                 ret = convert_ctx_accesses(env);
4861
4862         if (ret == 0)
4863                 ret = fixup_bpf_calls(env);
4864
4865         if (log->level && bpf_verifier_log_full(log))
4866                 ret = -ENOSPC;
4867         if (log->level && !log->ubuf) {
4868                 ret = -EFAULT;
4869                 goto err_release_maps;
4870         }
4871
4872         if (ret == 0 && env->used_map_cnt) {
4873                 /* if program passed verifier, update used_maps in bpf_prog_info */
4874                 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
4875                                                           sizeof(env->used_maps[0]),
4876                                                           GFP_KERNEL);
4877
4878                 if (!env->prog->aux->used_maps) {
4879                         ret = -ENOMEM;
4880                         goto err_release_maps;
4881                 }
4882
4883                 memcpy(env->prog->aux->used_maps, env->used_maps,
4884                        sizeof(env->used_maps[0]) * env->used_map_cnt);
4885                 env->prog->aux->used_map_cnt = env->used_map_cnt;
4886
4887                 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
4888                  * bpf_ld_imm64 instructions
4889                  */
4890                 convert_pseudo_ld_imm64(env);
4891         }
4892
4893 err_release_maps:
4894         if (!env->prog->aux->used_maps)
4895                 /* if we didn't copy map pointers into bpf_prog_info, release
4896                  * them now. Otherwise free_used_maps() will release them.
4897                  */
4898                 release_maps(env);
4899         *prog = env->prog;
4900 err_unlock:
4901         mutex_unlock(&bpf_verifier_lock);
4902         vfree(env->insn_aux_data);
4903 err_free_env:
4904         kfree(env);
4905         return ret;
4906 }