]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blobdiff - kernel/bpf/verifier.c
bpf: Introduce ARG_PTR_TO_{INT,LONG} arg types
[mirror_ubuntu-hirsute-kernel.git] / kernel / bpf / verifier.c
index 6c5a41f7f33856d79f641c57767c7c093ec2a831..15ab6fa817ce495a261303171da839bfa2da31b4 100644 (file)
@@ -176,7 +176,6 @@ struct bpf_verifier_stack_elem {
        struct bpf_verifier_stack_elem *next;
 };
 
-#define BPF_COMPLEXITY_LIMIT_INSNS     131072
 #define BPF_COMPLEXITY_LIMIT_STACK     1024
 #define BPF_COMPLEXITY_LIMIT_STATES    64
 
@@ -377,7 +376,8 @@ static bool is_release_function(enum bpf_func_id func_id)
 static bool is_acquire_function(enum bpf_func_id func_id)
 {
        return func_id == BPF_FUNC_sk_lookup_tcp ||
-               func_id == BPF_FUNC_sk_lookup_udp;
+               func_id == BPF_FUNC_sk_lookup_udp ||
+               func_id == BPF_FUNC_skc_lookup_tcp;
 }
 
 static bool is_ptr_cast_function(enum bpf_func_id func_id)
@@ -1091,7 +1091,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
         */
        subprog[env->subprog_cnt].start = insn_cnt;
 
-       if (env->log.level > 1)
+       if (env->log.level & BPF_LOG_LEVEL2)
                for (i = 0; i < env->subprog_cnt; i++)
                        verbose(env, "func#%d @%d\n", i, subprog[i].start);
 
@@ -1138,6 +1138,7 @@ static int mark_reg_read(struct bpf_verifier_env *env,
                         struct bpf_reg_state *parent)
 {
        bool writes = parent == state->parent; /* Observe write marks */
+       int cnt = 0;
 
        while (parent) {
                /* if read wasn't screened by an earlier write ... */
@@ -1149,12 +1150,25 @@ static int mark_reg_read(struct bpf_verifier_env *env,
                                parent->var_off.value, parent->off);
                        return -EFAULT;
                }
+               if (parent->live & REG_LIVE_READ)
+                       /* The parentage chain never changes and
+                        * this parent was already marked as LIVE_READ.
+                        * There is no need to keep walking the chain again and
+                        * keep re-marking all parents as LIVE_READ.
+                        * This case happens when the same register is read
+                        * multiple times without writes into it in-between.
+                        */
+                       break;
                /* ... then we depend on parent's value */
                parent->live |= REG_LIVE_READ;
                state = parent;
                parent = state->parent;
                writes = true;
+               cnt++;
        }
+
+       if (env->longest_mark_read_walk < cnt)
+               env->longest_mark_read_walk = cnt;
        return 0;
 }
 
@@ -1412,7 +1426,7 @@ static int check_stack_access(struct bpf_verifier_env *env,
                char tn_buf[48];
 
                tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
-               verbose(env, "variable stack access var_off=%s off=%d size=%d",
+               verbose(env, "variable stack access var_off=%s off=%d size=%d\n",
                        tn_buf, off, size);
                return -EACCES;
        }
@@ -1425,6 +1439,28 @@ static int check_stack_access(struct bpf_verifier_env *env,
        return 0;
 }
 
+static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
+                                int off, int size, enum bpf_access_type type)
+{
+       struct bpf_reg_state *regs = cur_regs(env);
+       struct bpf_map *map = regs[regno].map_ptr;
+       u32 cap = bpf_map_flags_to_cap(map);
+
+       if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
+               verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
+                       map->value_size, off, size);
+               return -EACCES;
+       }
+
+       if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
+               verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
+                       map->value_size, off, size);
+               return -EACCES;
+       }
+
+       return 0;
+}
+
 /* check read/write into map element returned by bpf_map_lookup_elem() */
 static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
                              int size, bool zero_size_allowed)
@@ -1454,7 +1490,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
         * need to try adding each of min_value and max_value to off
         * to make sure our theoretical access will be safe.
         */
-       if (env->log.level)
+       if (env->log.level & BPF_LOG_LEVEL)
                print_verifier_state(env, state);
 
        /* The minimum value is only important with signed
@@ -2011,7 +2047,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
                        verbose(env, "R%d leaks addr into map\n", value_regno);
                        return -EACCES;
                }
-
+               err = check_map_access_type(env, regno, off, size, t);
+               if (err)
+                       return err;
                err = check_map_access(env, regno, off, size, false);
                if (!err && t == BPF_READ && value_regno >= 0)
                        mark_reg_unknown(env, regs, value_regno);
@@ -2157,6 +2195,29 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
                                BPF_SIZE(insn->code), BPF_WRITE, -1, true);
 }
 
+static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno,
+                                 int off, int access_size,
+                                 bool zero_size_allowed)
+{
+       struct bpf_reg_state *reg = reg_state(env, regno);
+
+       if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
+           access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
+               if (tnum_is_const(reg->var_off)) {
+                       verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
+                               regno, off, access_size);
+               } else {
+                       char tn_buf[48];
+
+                       tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+                       verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n",
+                               regno, tn_buf, access_size);
+               }
+               return -EACCES;
+       }
+       return 0;
+}
+
 /* when register 'regno' is passed into function that will read 'access_size'
  * bytes from that pointer, make sure that it's within stack boundary
  * and all elements of stack are initialized.
@@ -2169,7 +2230,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 {
        struct bpf_reg_state *reg = reg_state(env, regno);
        struct bpf_func_state *state = func(env, reg);
-       int off, i, slot, spi;
+       int err, min_off, max_off, i, slot, spi;
 
        if (reg->type != PTR_TO_STACK) {
                /* Allow zero-byte read from NULL, regardless of pointer type */
@@ -2183,21 +2244,57 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
                return -EACCES;
        }
 
-       /* Only allow fixed-offset stack reads */
-       if (!tnum_is_const(reg->var_off)) {
-               char tn_buf[48];
+       if (tnum_is_const(reg->var_off)) {
+               min_off = max_off = reg->var_off.value + reg->off;
+               err = __check_stack_boundary(env, regno, min_off, access_size,
+                                            zero_size_allowed);
+               if (err)
+                       return err;
+       } else {
+               /* Variable offset is prohibited for unprivileged mode for
+                * simplicity since it requires corresponding support in
+                * Spectre masking for stack ALU.
+                * See also retrieve_ptr_limit().
+                */
+               if (!env->allow_ptr_leaks) {
+                       char tn_buf[48];
 
-               tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
-               verbose(env, "invalid variable stack read R%d var_off=%s\n",
-                       regno, tn_buf);
-               return -EACCES;
-       }
-       off = reg->off + reg->var_off.value;
-       if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
-           access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
-               verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
-                       regno, off, access_size);
-               return -EACCES;
+                       tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+                       verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n",
+                               regno, tn_buf);
+                       return -EACCES;
+               }
+               /* Only initialized buffer on stack is allowed to be accessed
+                * with variable offset. With uninitialized buffer it's hard to
+                * guarantee that whole memory is marked as initialized on
+                * helper return since specific bounds are unknown what may
+                * cause uninitialized stack leaking.
+                */
+               if (meta && meta->raw_mode)
+                       meta = NULL;
+
+               if (reg->smax_value >= BPF_MAX_VAR_OFF ||
+                   reg->smax_value <= -BPF_MAX_VAR_OFF) {
+                       verbose(env, "R%d unbounded indirect variable offset stack access\n",
+                               regno);
+                       return -EACCES;
+               }
+               min_off = reg->smin_value + reg->off;
+               max_off = reg->smax_value + reg->off;
+               err = __check_stack_boundary(env, regno, min_off, access_size,
+                                            zero_size_allowed);
+               if (err) {
+                       verbose(env, "R%d min value is outside of stack bound\n",
+                               regno);
+                       return err;
+               }
+               err = __check_stack_boundary(env, regno, max_off, access_size,
+                                            zero_size_allowed);
+               if (err) {
+                       verbose(env, "R%d max value is outside of stack bound\n",
+                               regno);
+                       return err;
+               }
        }
 
        if (meta && meta->raw_mode) {
@@ -2206,10 +2303,10 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
                return 0;
        }
 
-       for (i = 0; i < access_size; i++) {
+       for (i = min_off; i < max_off + access_size; i++) {
                u8 *stype;
 
-               slot = -(off + i) - 1;
+               slot = -i - 1;
                spi = slot / BPF_REG_SIZE;
                if (state->allocated_stack <= slot)
                        goto err;
@@ -2222,8 +2319,16 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
                        goto mark;
                }
 err:
-               verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
-                       off, i, access_size);
+               if (tnum_is_const(reg->var_off)) {
+                       verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
+                               min_off, i - min_off, access_size);
+               } else {
+                       char tn_buf[48];
+
+                       tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+                       verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n",
+                               tn_buf, i - min_off, access_size);
+               }
                return -EACCES;
 mark:
                /* reading any byte out of 8-byte 'spill_slot' will cause
@@ -2232,7 +2337,7 @@ mark:
                mark_reg_read(env, &state->stack[spi].spilled_ptr,
                              state->stack[spi].spilled_ptr.parent);
        }
-       return update_stack_depth(env, state, off);
+       return update_stack_depth(env, state, min_off);
 }
 
 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
@@ -2247,6 +2352,10 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
                return check_packet_access(env, regno, reg->off, access_size,
                                           zero_size_allowed);
        case PTR_TO_MAP_VALUE:
+               if (check_map_access_type(env, regno, reg->off, access_size,
+                                         meta && meta->raw_mode ? BPF_WRITE :
+                                         BPF_READ))
+                       return -EACCES;
                return check_map_access(env, regno, reg->off, access_size,
                                        zero_size_allowed);
        default: /* scalar_value|ptr_to_stack or invalid ptr */
@@ -2353,6 +2462,22 @@ static bool arg_type_is_mem_size(enum bpf_arg_type type)
               type == ARG_CONST_SIZE_OR_ZERO;
 }
 
+static bool arg_type_is_int_ptr(enum bpf_arg_type type)
+{
+       return type == ARG_PTR_TO_INT ||
+              type == ARG_PTR_TO_LONG;
+}
+
+static int int_ptr_type_to_size(enum bpf_arg_type type)
+{
+       if (type == ARG_PTR_TO_INT)
+               return sizeof(u32);
+       else if (type == ARG_PTR_TO_LONG)
+               return sizeof(u64);
+
+       return -EINVAL;
+}
+
 static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
                          enum bpf_arg_type arg_type,
                          struct bpf_call_arg_meta *meta)
@@ -2445,6 +2570,12 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
                         type != expected_type)
                        goto err_type;
                meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
+       } else if (arg_type_is_int_ptr(arg_type)) {
+               expected_type = PTR_TO_STACK;
+               if (!type_is_pkt_pointer(type) &&
+                   type != PTR_TO_MAP_VALUE &&
+                   type != expected_type)
+                       goto err_type;
        } else {
                verbose(env, "unsupported arg_type %d\n", arg_type);
                return -EFAULT;
@@ -2526,6 +2657,13 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
                err = check_helper_mem_access(env, regno - 1,
                                              reg->umax_value,
                                              zero_size_allowed, meta);
+       } else if (arg_type_is_int_ptr(arg_type)) {
+               int size = int_ptr_type_to_size(arg_type);
+
+               err = check_helper_mem_access(env, regno, size, false, meta);
+               if (err)
+                       return err;
+               err = check_ptr_alignment(env, reg, 0, size, true);
        }
 
        return err;
@@ -2905,7 +3043,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
        /* and go analyze first insn of the callee */
        *insn_idx = target_insn;
 
-       if (env->log.level) {
+       if (env->log.level & BPF_LOG_LEVEL) {
                verbose(env, "caller:\n");
                print_verifier_state(env, caller);
                verbose(env, "callee:\n");
@@ -2945,7 +3083,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
                return err;
 
        *insn_idx = callee->callsite + 1;
-       if (env->log.level) {
+       if (env->log.level & BPF_LOG_LEVEL) {
                verbose(env, "returning from callee:\n");
                print_verifier_state(env, callee);
                verbose(env, "to caller at %d:\n", *insn_idx);
@@ -2979,6 +3117,7 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
                int func_id, int insn_idx)
 {
        struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
+       struct bpf_map *map = meta->map_ptr;
 
        if (func_id != BPF_FUNC_tail_call &&
            func_id != BPF_FUNC_map_lookup_elem &&
@@ -2989,11 +3128,24 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
            func_id != BPF_FUNC_map_peek_elem)
                return 0;
 
-       if (meta->map_ptr == NULL) {
+       if (map == NULL) {
                verbose(env, "kernel subsystem misconfigured verifier\n");
                return -EINVAL;
        }
 
+       /* In case of read-only, some additional restrictions
+        * need to be applied in order to prevent altering the
+        * state of the map from program side.
+        */
+       if ((map->map_flags & BPF_F_RDONLY_PROG) &&
+           (func_id == BPF_FUNC_map_delete_elem ||
+            func_id == BPF_FUNC_map_update_elem ||
+            func_id == BPF_FUNC_map_push_elem ||
+            func_id == BPF_FUNC_map_pop_elem)) {
+               verbose(env, "write into map forbidden\n");
+               return -EACCES;
+       }
+
        if (!BPF_MAP_PTR(aux->map_state))
                bpf_map_ptr_store(aux, meta->map_ptr,
                                  meta->map_ptr->unpriv_array);
@@ -3157,19 +3309,11 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
        } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
                mark_reg_known_zero(env, regs, BPF_REG_0);
                regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
-               if (is_acquire_function(func_id)) {
-                       int id = acquire_reference_state(env, insn_idx);
-
-                       if (id < 0)
-                               return id;
-                       /* For mark_ptr_or_null_reg() */
-                       regs[BPF_REG_0].id = id;
-                       /* For release_reference() */
-                       regs[BPF_REG_0].ref_obj_id = id;
-               } else {
-                       /* For mark_ptr_or_null_reg() */
-                       regs[BPF_REG_0].id = ++env->id_gen;
-               }
+               regs[BPF_REG_0].id = ++env->id_gen;
+       } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
+               mark_reg_known_zero(env, regs, BPF_REG_0);
+               regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
+               regs[BPF_REG_0].id = ++env->id_gen;
        } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
                mark_reg_known_zero(env, regs, BPF_REG_0);
                regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
@@ -3180,9 +3324,19 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
                return -EINVAL;
        }
 
-       if (is_ptr_cast_function(func_id))
+       if (is_ptr_cast_function(func_id)) {
                /* For release_reference() */
                regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
+       } else if (is_acquire_function(func_id)) {
+               int id = acquire_reference_state(env, insn_idx);
+
+               if (id < 0)
+                       return id;
+               /* For mark_ptr_or_null_reg() */
+               regs[BPF_REG_0].id = id;
+               /* For release_reference() */
+               regs[BPF_REG_0].ref_obj_id = id;
+       }
 
        do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
 
@@ -3282,6 +3436,9 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
 
        switch (ptr_reg->type) {
        case PTR_TO_STACK:
+               /* Indirect variable offset stack access is prohibited in
+                * unprivileged mode so it's not handled here.
+                */
                off = ptr_reg->off + ptr_reg->var_off.value;
                if (mask_to_left)
                        *ptr_limit = MAX_BPF_STACK + off;
@@ -4966,23 +5123,17 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
                        insn->dst_reg);
                return -EACCES;
        }
-       if (env->log.level)
+       if (env->log.level & BPF_LOG_LEVEL)
                print_verifier_state(env, this_branch->frame[this_branch->curframe]);
        return 0;
 }
 
-/* return the map pointer stored inside BPF_LD_IMM64 instruction */
-static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
-{
-       u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
-
-       return (struct bpf_map *) (unsigned long) imm64;
-}
-
 /* verify BPF_LD_IMM64 instruction */
 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
+       struct bpf_insn_aux_data *aux = cur_aux(env);
        struct bpf_reg_state *regs = cur_regs(env);
+       struct bpf_map *map;
        int err;
 
        if (BPF_SIZE(insn->code) != BPF_DW) {
@@ -5006,11 +5157,22 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
                return 0;
        }
 
-       /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
-       BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
+       map = env->used_maps[aux->map_index];
+       mark_reg_known_zero(env, regs, insn->dst_reg);
+       regs[insn->dst_reg].map_ptr = map;
+
+       if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
+               regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
+               regs[insn->dst_reg].off = aux->map_off;
+               if (map_value_has_spin_lock(map))
+                       regs[insn->dst_reg].id = ++env->id_gen;
+       } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
+               regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
+       } else {
+               verbose(env, "bpf verifier is misconfigured\n");
+               return -EINVAL;
+       }
 
-       regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
-       regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
        return 0;
 }
 
@@ -5134,6 +5296,7 @@ static int check_return_code(struct bpf_verifier_env *env)
        case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
        case BPF_PROG_TYPE_SOCK_OPS:
        case BPF_PROG_TYPE_CGROUP_DEVICE:
+       case BPF_PROG_TYPE_CGROUP_SYSCTL:
                break;
        default:
                return 0;
@@ -5264,13 +5427,13 @@ static int check_cfg(struct bpf_verifier_env *env)
        int ret = 0;
        int i, t;
 
-       insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
+       insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
        if (!insn_state)
                return -ENOMEM;
 
-       insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
+       insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
        if (!insn_stack) {
-               kfree(insn_state);
+               kvfree(insn_state);
                return -ENOMEM;
        }
 
@@ -5368,8 +5531,8 @@ check_state:
        ret = 0; /* cfg looks good */
 
 err_free:
-       kfree(insn_state);
-       kfree(insn_stack);
+       kvfree(insn_state);
+       kvfree(insn_stack);
        return ret;
 }
 
@@ -6112,11 +6275,13 @@ static int propagate_liveness(struct bpf_verifier_env *env,
 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 {
        struct bpf_verifier_state_list *new_sl;
-       struct bpf_verifier_state_list *sl;
+       struct bpf_verifier_state_list *sl, **pprev;
        struct bpf_verifier_state *cur = env->cur_state, *new;
        int i, j, err, states_cnt = 0;
 
-       sl = env->explored_states[insn_idx];
+       pprev = &env->explored_states[insn_idx];
+       sl = *pprev;
+
        if (!sl)
                /* this 'insn_idx' instruction wasn't marked, so we will not
                 * be doing state search here
@@ -6127,6 +6292,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 
        while (sl != STATE_LIST_MARK) {
                if (states_equal(env, &sl->state, cur)) {
+                       sl->hit_cnt++;
                        /* reached equivalent register/stack state,
                         * prune the search.
                         * Registers read by the continuation are read by us.
@@ -6142,10 +6308,40 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
                                return err;
                        return 1;
                }
-               sl = sl->next;
                states_cnt++;
+               sl->miss_cnt++;
+               /* heuristic to determine whether this state is beneficial
+                * to keep checking from state equivalence point of view.
+                * Higher numbers increase max_states_per_insn and verification time,
+                * but do not meaningfully decrease insn_processed.
+                */
+               if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
+                       /* the state is unlikely to be useful. Remove it to
+                        * speed up verification
+                        */
+                       *pprev = sl->next;
+                       if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
+                               free_verifier_state(&sl->state, false);
+                               kfree(sl);
+                               env->peak_states--;
+                       } else {
+                               /* cannot free this state, since parentage chain may
+                                * walk it later. Add it for free_list instead to
+                                * be freed at the end of verification
+                                */
+                               sl->next = env->free_list;
+                               env->free_list = sl;
+                       }
+                       sl = *pprev;
+                       continue;
+               }
+               pprev = &sl->next;
+               sl = *pprev;
        }
 
+       if (env->max_states_per_insn < states_cnt)
+               env->max_states_per_insn = states_cnt;
+
        if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
                return 0;
 
@@ -6159,6 +6355,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
        new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
        if (!new_sl)
                return -ENOMEM;
+       env->total_states++;
+       env->peak_states++;
 
        /* add new state to the head of linked list */
        new = &new_sl->state;
@@ -6243,8 +6441,7 @@ static int do_check(struct bpf_verifier_env *env)
        struct bpf_verifier_state *state;
        struct bpf_insn *insns = env->prog->insnsi;
        struct bpf_reg_state *regs;
-       int insn_cnt = env->prog->len, i;
-       int insn_processed = 0;
+       int insn_cnt = env->prog->len;
        bool do_print_state = false;
 
        env->prev_linfo = NULL;
@@ -6279,10 +6476,10 @@ static int do_check(struct bpf_verifier_env *env)
                insn = &insns[env->insn_idx];
                class = BPF_CLASS(insn->code);
 
-               if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
+               if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
                        verbose(env,
                                "BPF program is too large. Processed %d insn\n",
-                               insn_processed);
+                               env->insn_processed);
                        return -E2BIG;
                }
 
@@ -6291,7 +6488,7 @@ static int do_check(struct bpf_verifier_env *env)
                        return err;
                if (err == 1) {
                        /* found equivalent state, can prune the search */
-                       if (env->log.level) {
+                       if (env->log.level & BPF_LOG_LEVEL) {
                                if (do_print_state)
                                        verbose(env, "\nfrom %d to %d%s: safe\n",
                                                env->prev_insn_idx, env->insn_idx,
@@ -6309,8 +6506,9 @@ static int do_check(struct bpf_verifier_env *env)
                if (need_resched())
                        cond_resched();
 
-               if (env->log.level > 1 || (env->log.level && do_print_state)) {
-                       if (env->log.level > 1)
+               if (env->log.level & BPF_LOG_LEVEL2 ||
+                   (env->log.level & BPF_LOG_LEVEL && do_print_state)) {
+                       if (env->log.level & BPF_LOG_LEVEL2)
                                verbose(env, "%d:", env->insn_idx);
                        else
                                verbose(env, "\nfrom %d to %d%s:",
@@ -6321,7 +6519,7 @@ static int do_check(struct bpf_verifier_env *env)
                        do_print_state = false;
                }
 
-               if (env->log.level) {
+               if (env->log.level & BPF_LOG_LEVEL) {
                        const struct bpf_insn_cbs cbs = {
                                .cb_print       = verbose,
                                .private_data   = env,
@@ -6586,16 +6784,6 @@ process_bpf_exit:
                env->insn_idx++;
        }
 
-       verbose(env, "processed %d insns (limit %d), stack depth ",
-               insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
-       for (i = 0; i < env->subprog_cnt; i++) {
-               u32 depth = env->subprog_info[i].stack_depth;
-
-               verbose(env, "%d", depth);
-               if (i + 1 < env->subprog_cnt)
-                       verbose(env, "+");
-       }
-       verbose(env, "\n");
        env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
        return 0;
 }
@@ -6693,8 +6881,10 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
                }
 
                if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
+                       struct bpf_insn_aux_data *aux;
                        struct bpf_map *map;
                        struct fd f;
+                       u64 addr;
 
                        if (i == insn_cnt - 1 || insn[1].code != 0 ||
                            insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
@@ -6703,13 +6893,19 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
                                return -EINVAL;
                        }
 
-                       if (insn->src_reg == 0)
+                       if (insn[0].src_reg == 0)
                                /* valid generic load 64-bit imm */
                                goto next_insn;
 
-                       if (insn[0].src_reg != BPF_PSEUDO_MAP_FD ||
-                           insn[1].imm != 0) {
-                               verbose(env, "unrecognized bpf_ld_imm64 insn\n");
+                       /* In final convert_pseudo_ld_imm64() step, this is
+                        * converted into regular 64-bit imm load insn.
+                        */
+                       if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD &&
+                            insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
+                           (insn[0].src_reg == BPF_PSEUDO_MAP_FD &&
+                            insn[1].imm != 0)) {
+                               verbose(env,
+                                       "unrecognized bpf_ld_imm64 insn\n");
                                return -EINVAL;
                        }
 
@@ -6727,16 +6923,47 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
                                return err;
                        }
 
-                       /* store map pointer inside BPF_LD_IMM64 instruction */
-                       insn[0].imm = (u32) (unsigned long) map;
-                       insn[1].imm = ((u64) (unsigned long) map) >> 32;
+                       aux = &env->insn_aux_data[i];
+                       if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
+                               addr = (unsigned long)map;
+                       } else {
+                               u32 off = insn[1].imm;
+
+                               if (off >= BPF_MAX_VAR_OFF) {
+                                       verbose(env, "direct value offset of %u is not allowed\n", off);
+                                       fdput(f);
+                                       return -EINVAL;
+                               }
+
+                               if (!map->ops->map_direct_value_addr) {
+                                       verbose(env, "no direct value access support for this map type\n");
+                                       fdput(f);
+                                       return -EINVAL;
+                               }
+
+                               err = map->ops->map_direct_value_addr(map, &addr, off);
+                               if (err) {
+                                       verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
+                                               map->value_size, off);
+                                       fdput(f);
+                                       return err;
+                               }
+
+                               aux->map_off = off;
+                               addr += off;
+                       }
+
+                       insn[0].imm = (u32)addr;
+                       insn[1].imm = addr >> 32;
 
                        /* check whether we recorded this map already */
-                       for (j = 0; j < env->used_map_cnt; j++)
+                       for (j = 0; j < env->used_map_cnt; j++) {
                                if (env->used_maps[j] == map) {
+                                       aux->map_index = j;
                                        fdput(f);
                                        goto next_insn;
                                }
+                       }
 
                        if (env->used_map_cnt >= MAX_USED_MAPS) {
                                fdput(f);
@@ -6753,6 +6980,8 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
                                fdput(f);
                                return PTR_ERR(map);
                        }
+
+                       aux->map_index = env->used_map_cnt;
                        env->used_maps[env->used_map_cnt++] = map;
 
                        if (bpf_map_is_cgroup_storage(map) &&
@@ -6858,8 +7087,13 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
        struct bpf_prog *new_prog;
 
        new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
-       if (!new_prog)
+       if (IS_ERR(new_prog)) {
+               if (PTR_ERR(new_prog) == -ERANGE)
+                       verbose(env,
+                               "insn %d cannot be patched due to 16-bit range\n",
+                               env->insn_aux_data[off].orig_idx);
                return NULL;
+       }
        if (adjust_insn_aux_data(env, new_prog->len, off, len))
                return NULL;
        adjust_subprog_starts(env, off, len);
@@ -7801,6 +8035,14 @@ static void free_states(struct bpf_verifier_env *env)
        struct bpf_verifier_state_list *sl, *sln;
        int i;
 
+       sl = env->free_list;
+       while (sl) {
+               sln = sl->next;
+               free_verifier_state(&sl->state, false);
+               kfree(sl);
+               sl = sln;
+       }
+
        if (!env->explored_states)
                return;
 
@@ -7816,12 +8058,37 @@ static void free_states(struct bpf_verifier_env *env)
                        }
        }
 
-       kfree(env->explored_states);
+       kvfree(env->explored_states);
+}
+
+static void print_verification_stats(struct bpf_verifier_env *env)
+{
+       int i;
+
+       if (env->log.level & BPF_LOG_STATS) {
+               verbose(env, "verification time %lld usec\n",
+                       div_u64(env->verification_time, 1000));
+               verbose(env, "stack depth ");
+               for (i = 0; i < env->subprog_cnt; i++) {
+                       u32 depth = env->subprog_info[i].stack_depth;
+
+                       verbose(env, "%d", depth);
+                       if (i + 1 < env->subprog_cnt)
+                               verbose(env, "+");
+               }
+               verbose(env, "\n");
+       }
+       verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
+               "total_states %d peak_states %d mark_read %d\n",
+               env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
+               env->max_states_per_insn, env->total_states,
+               env->peak_states, env->longest_mark_read_walk);
 }
 
 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
              union bpf_attr __user *uattr)
 {
+       u64 start_time = ktime_get_ns();
        struct bpf_verifier_env *env;
        struct bpf_verifier_log *log;
        int i, len, ret = -EINVAL;
@@ -7863,8 +8130,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 
                ret = -EINVAL;
                /* log attributes have to be sane */
-               if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
-                   !log->level || !log->ubuf)
+               if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 ||
+                   !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK)
                        goto err_unlock;
        }
 
@@ -7887,7 +8154,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
                        goto skip_full_check;
        }
 
-       env->explored_states = kcalloc(env->prog->len,
+       env->explored_states = kvcalloc(env->prog->len,
                                       sizeof(struct bpf_verifier_state_list *),
                                       GFP_USER);
        ret = -ENOMEM;
@@ -7945,6 +8212,9 @@ skip_full_check:
        if (ret == 0)
                ret = fixup_call_args(env);
 
+       env->verification_time = ktime_get_ns() - start_time;
+       print_verification_stats(env);
+
        if (log->level && bpf_verifier_log_full(log))
                ret = -ENOSPC;
        if (log->level && !log->ubuf) {