]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - kernel/bpf/core.c
treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 151
[mirror_ubuntu-jammy-kernel.git] / kernel / bpf / core.c
CommitLineData
f5bffecd
AS
1/*
2 * Linux Socket Filter - Kernel level socket filtering
3 *
4 * Based on the design of the Berkeley Packet Filter. The new
5 * internal format has been designed by PLUMgrid:
6 *
7 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
8 *
9 * Authors:
10 *
11 * Jay Schulist <jschlst@samba.org>
12 * Alexei Starovoitov <ast@plumgrid.com>
13 * Daniel Borkmann <dborkman@redhat.com>
14 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version.
19 *
20 * Andi Kleen - Fix a few bad bugs and races.
4df95ff4 21 * Kris Katterjohn - Added many additional checks in bpf_check_classic()
f5bffecd 22 */
738cbe72 23
838e9690 24#include <uapi/linux/btf.h>
f5bffecd
AS
25#include <linux/filter.h>
26#include <linux/skbuff.h>
60a3b225 27#include <linux/vmalloc.h>
738cbe72
DB
28#include <linux/random.h>
29#include <linux/moduleloader.h>
09756af4 30#include <linux/bpf.h>
838e9690 31#include <linux/btf.h>
39853cc0 32#include <linux/frame.h>
74451e66
DB
33#include <linux/rbtree_latch.h>
34#include <linux/kallsyms.h>
35#include <linux/rcupdate.h>
c195651e 36#include <linux/perf_event.h>
f5bffecd 37
3324b584
DB
38#include <asm/unaligned.h>
39
f5bffecd
AS
40/* Registers */
41#define BPF_R0 regs[BPF_REG_0]
42#define BPF_R1 regs[BPF_REG_1]
43#define BPF_R2 regs[BPF_REG_2]
44#define BPF_R3 regs[BPF_REG_3]
45#define BPF_R4 regs[BPF_REG_4]
46#define BPF_R5 regs[BPF_REG_5]
47#define BPF_R6 regs[BPF_REG_6]
48#define BPF_R7 regs[BPF_REG_7]
49#define BPF_R8 regs[BPF_REG_8]
50#define BPF_R9 regs[BPF_REG_9]
51#define BPF_R10 regs[BPF_REG_10]
52
53/* Named registers */
54#define DST regs[insn->dst_reg]
55#define SRC regs[insn->src_reg]
56#define FP regs[BPF_REG_FP]
144cd91c 57#define AX regs[BPF_REG_AX]
f5bffecd
AS
58#define ARG1 regs[BPF_REG_ARG1]
59#define CTX regs[BPF_REG_CTX]
60#define IMM insn->imm
61
62/* No hurry in this branch
63 *
64 * Exported for the bpf jit load helper.
65 */
66void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
67{
68 u8 *ptr = NULL;
69
70 if (k >= SKF_NET_OFF)
71 ptr = skb_network_header(skb) + k - SKF_NET_OFF;
72 else if (k >= SKF_LL_OFF)
73 ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
3324b584 74
f5bffecd
AS
75 if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
76 return ptr;
77
78 return NULL;
79}
80
492ecee8 81struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
60a3b225 82{
19809c2d 83 gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
09756af4 84 struct bpf_prog_aux *aux;
60a3b225
DB
85 struct bpf_prog *fp;
86
87 size = round_up(size, PAGE_SIZE);
88 fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
89 if (fp == NULL)
90 return NULL;
91
09756af4
AS
92 aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags);
93 if (aux == NULL) {
60a3b225
DB
94 vfree(fp);
95 return NULL;
96 }
97
98 fp->pages = size / PAGE_SIZE;
09756af4 99 fp->aux = aux;
e9d8afa9 100 fp->aux->prog = fp;
60b58afc 101 fp->jit_requested = ebpf_jit_enabled();
60a3b225 102
74451e66
DB
103 INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode);
104
60a3b225
DB
105 return fp;
106}
492ecee8
AS
107
108struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
109{
110 gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
111 struct bpf_prog *prog;
4b911304 112 int cpu;
492ecee8
AS
113
114 prog = bpf_prog_alloc_no_stats(size, gfp_extra_flags);
115 if (!prog)
116 return NULL;
117
118 prog->aux->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
119 if (!prog->aux->stats) {
120 kfree(prog->aux);
121 vfree(prog);
122 return NULL;
123 }
124
4b911304
ED
125 for_each_possible_cpu(cpu) {
126 struct bpf_prog_stats *pstats;
127
128 pstats = per_cpu_ptr(prog->aux->stats, cpu);
129 u64_stats_init(&pstats->syncp);
130 }
492ecee8
AS
131 return prog;
132}
60a3b225
DB
133EXPORT_SYMBOL_GPL(bpf_prog_alloc);
134
c454a46b
MKL
135int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
136{
137 if (!prog->aux->nr_linfo || !prog->jit_requested)
138 return 0;
139
140 prog->aux->jited_linfo = kcalloc(prog->aux->nr_linfo,
141 sizeof(*prog->aux->jited_linfo),
142 GFP_KERNEL | __GFP_NOWARN);
143 if (!prog->aux->jited_linfo)
144 return -ENOMEM;
145
146 return 0;
147}
148
149void bpf_prog_free_jited_linfo(struct bpf_prog *prog)
150{
151 kfree(prog->aux->jited_linfo);
152 prog->aux->jited_linfo = NULL;
153}
154
155void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog)
156{
157 if (prog->aux->jited_linfo && !prog->aux->jited_linfo[0])
158 bpf_prog_free_jited_linfo(prog);
159}
160
161/* The jit engine is responsible to provide an array
162 * for insn_off to the jited_off mapping (insn_to_jit_off).
163 *
164 * The idx to this array is the insn_off. Hence, the insn_off
165 * here is relative to the prog itself instead of the main prog.
166 * This array has one entry for each xlated bpf insn.
167 *
168 * jited_off is the byte off to the last byte of the jited insn.
169 *
170 * Hence, with
171 * insn_start:
172 * The first bpf insn off of the prog. The insn off
173 * here is relative to the main prog.
174 * e.g. if prog is a subprog, insn_start > 0
175 * linfo_idx:
176 * The prog's idx to prog->aux->linfo and jited_linfo
177 *
178 * jited_linfo[linfo_idx] = prog->bpf_func
179 *
180 * For i > linfo_idx,
181 *
182 * jited_linfo[i] = prog->bpf_func +
183 * insn_to_jit_off[linfo[i].insn_off - insn_start - 1]
184 */
185void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
186 const u32 *insn_to_jit_off)
187{
188 u32 linfo_idx, insn_start, insn_end, nr_linfo, i;
189 const struct bpf_line_info *linfo;
190 void **jited_linfo;
191
192 if (!prog->aux->jited_linfo)
193 /* Userspace did not provide linfo */
194 return;
195
196 linfo_idx = prog->aux->linfo_idx;
197 linfo = &prog->aux->linfo[linfo_idx];
198 insn_start = linfo[0].insn_off;
199 insn_end = insn_start + prog->len;
200
201 jited_linfo = &prog->aux->jited_linfo[linfo_idx];
202 jited_linfo[0] = prog->bpf_func;
203
204 nr_linfo = prog->aux->nr_linfo - linfo_idx;
205
206 for (i = 1; i < nr_linfo && linfo[i].insn_off < insn_end; i++)
207 /* The verifier ensures that linfo[i].insn_off is
208 * strictly increasing
209 */
210 jited_linfo[i] = prog->bpf_func +
211 insn_to_jit_off[linfo[i].insn_off - insn_start - 1];
212}
213
214void bpf_prog_free_linfo(struct bpf_prog *prog)
215{
216 bpf_prog_free_jited_linfo(prog);
217 kvfree(prog->aux->linfo);
218}
219
60a3b225
DB
220struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
221 gfp_t gfp_extra_flags)
222{
19809c2d 223 gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
60a3b225 224 struct bpf_prog *fp;
5ccb071e
DB
225 u32 pages, delta;
226 int ret;
60a3b225
DB
227
228 BUG_ON(fp_old == NULL);
229
230 size = round_up(size, PAGE_SIZE);
5ccb071e
DB
231 pages = size / PAGE_SIZE;
232 if (pages <= fp_old->pages)
60a3b225
DB
233 return fp_old;
234
5ccb071e
DB
235 delta = pages - fp_old->pages;
236 ret = __bpf_prog_charge(fp_old->aux->user, delta);
237 if (ret)
238 return NULL;
239
60a3b225 240 fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
5ccb071e
DB
241 if (fp == NULL) {
242 __bpf_prog_uncharge(fp_old->aux->user, delta);
243 } else {
60a3b225 244 memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
5ccb071e 245 fp->pages = pages;
e9d8afa9 246 fp->aux->prog = fp;
60a3b225 247
09756af4 248 /* We keep fp->aux from fp_old around in the new
60a3b225
DB
249 * reallocated structure.
250 */
09756af4 251 fp_old->aux = NULL;
60a3b225
DB
252 __bpf_prog_free(fp_old);
253 }
254
255 return fp;
256}
60a3b225
DB
257
258void __bpf_prog_free(struct bpf_prog *fp)
259{
492ecee8
AS
260 if (fp->aux) {
261 free_percpu(fp->aux->stats);
262 kfree(fp->aux);
263 }
60a3b225
DB
264 vfree(fp);
265}
60a3b225 266
f1f7714e 267int bpf_prog_calc_tag(struct bpf_prog *fp)
7bd509e3
DB
268{
269 const u32 bits_offset = SHA_MESSAGE_BYTES - sizeof(__be64);
f1f7714e
DB
270 u32 raw_size = bpf_prog_tag_scratch_size(fp);
271 u32 digest[SHA_DIGEST_WORDS];
aafe6ae9 272 u32 ws[SHA_WORKSPACE_WORDS];
7bd509e3 273 u32 i, bsize, psize, blocks;
aafe6ae9 274 struct bpf_insn *dst;
7bd509e3 275 bool was_ld_map;
aafe6ae9 276 u8 *raw, *todo;
7bd509e3
DB
277 __be32 *result;
278 __be64 *bits;
279
aafe6ae9
DB
280 raw = vmalloc(raw_size);
281 if (!raw)
282 return -ENOMEM;
283
f1f7714e 284 sha_init(digest);
7bd509e3
DB
285 memset(ws, 0, sizeof(ws));
286
287 /* We need to take out the map fd for the digest calculation
288 * since they are unstable from user space side.
289 */
aafe6ae9 290 dst = (void *)raw;
7bd509e3
DB
291 for (i = 0, was_ld_map = false; i < fp->len; i++) {
292 dst[i] = fp->insnsi[i];
293 if (!was_ld_map &&
294 dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
d8eca5bb
DB
295 (dst[i].src_reg == BPF_PSEUDO_MAP_FD ||
296 dst[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
7bd509e3
DB
297 was_ld_map = true;
298 dst[i].imm = 0;
299 } else if (was_ld_map &&
300 dst[i].code == 0 &&
301 dst[i].dst_reg == 0 &&
302 dst[i].src_reg == 0 &&
303 dst[i].off == 0) {
304 was_ld_map = false;
305 dst[i].imm = 0;
306 } else {
307 was_ld_map = false;
308 }
309 }
310
aafe6ae9
DB
311 psize = bpf_prog_insn_size(fp);
312 memset(&raw[psize], 0, raw_size - psize);
7bd509e3
DB
313 raw[psize++] = 0x80;
314
315 bsize = round_up(psize, SHA_MESSAGE_BYTES);
316 blocks = bsize / SHA_MESSAGE_BYTES;
aafe6ae9 317 todo = raw;
7bd509e3
DB
318 if (bsize - psize >= sizeof(__be64)) {
319 bits = (__be64 *)(todo + bsize - sizeof(__be64));
320 } else {
321 bits = (__be64 *)(todo + bsize + bits_offset);
322 blocks++;
323 }
324 *bits = cpu_to_be64((psize - 1) << 3);
325
326 while (blocks--) {
f1f7714e 327 sha_transform(digest, todo, ws);
7bd509e3
DB
328 todo += SHA_MESSAGE_BYTES;
329 }
330
f1f7714e 331 result = (__force __be32 *)digest;
7bd509e3 332 for (i = 0; i < SHA_DIGEST_WORDS; i++)
f1f7714e
DB
333 result[i] = cpu_to_be32(digest[i]);
334 memcpy(fp->tag, result, sizeof(fp->tag));
aafe6ae9
DB
335
336 vfree(raw);
337 return 0;
7bd509e3
DB
338}
339
2cbd95a5 340static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old,
af959b18 341 s32 end_new, s32 curr, const bool probe_pass)
c237ee5e 342{
050fad7c 343 const s64 imm_min = S32_MIN, imm_max = S32_MAX;
2cbd95a5 344 s32 delta = end_new - end_old;
050fad7c
DB
345 s64 imm = insn->imm;
346
2cbd95a5 347 if (curr < pos && curr + imm + 1 >= end_old)
050fad7c 348 imm += delta;
2cbd95a5 349 else if (curr >= end_new && curr + imm + 1 < end_new)
050fad7c
DB
350 imm -= delta;
351 if (imm < imm_min || imm > imm_max)
352 return -ERANGE;
353 if (!probe_pass)
354 insn->imm = imm;
355 return 0;
356}
357
2cbd95a5 358static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old,
af959b18 359 s32 end_new, s32 curr, const bool probe_pass)
050fad7c
DB
360{
361 const s32 off_min = S16_MIN, off_max = S16_MAX;
2cbd95a5 362 s32 delta = end_new - end_old;
050fad7c
DB
363 s32 off = insn->off;
364
2cbd95a5 365 if (curr < pos && curr + off + 1 >= end_old)
050fad7c 366 off += delta;
2cbd95a5 367 else if (curr >= end_new && curr + off + 1 < end_new)
050fad7c
DB
368 off -= delta;
369 if (off < off_min || off > off_max)
370 return -ERANGE;
371 if (!probe_pass)
372 insn->off = off;
373 return 0;
374}
375
2cbd95a5
JK
376static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old,
377 s32 end_new, const bool probe_pass)
050fad7c 378{
2cbd95a5 379 u32 i, insn_cnt = prog->len + (probe_pass ? end_new - end_old : 0);
c237ee5e 380 struct bpf_insn *insn = prog->insnsi;
050fad7c 381 int ret = 0;
c237ee5e
DB
382
383 for (i = 0; i < insn_cnt; i++, insn++) {
050fad7c
DB
384 u8 code;
385
386 /* In the probing pass we still operate on the original,
387 * unpatched image in order to check overflows before we
388 * do any other adjustments. Therefore skip the patchlet.
389 */
390 if (probe_pass && i == pos) {
2cbd95a5
JK
391 i = end_new;
392 insn = prog->insnsi + end_old;
050fad7c 393 }
1ea47e01 394 code = insn->code;
092ed096
JW
395 if ((BPF_CLASS(code) != BPF_JMP &&
396 BPF_CLASS(code) != BPF_JMP32) ||
050fad7c 397 BPF_OP(code) == BPF_EXIT)
1ea47e01 398 continue;
050fad7c 399 /* Adjust offset of jmps if we cross patch boundaries. */
1ea47e01 400 if (BPF_OP(code) == BPF_CALL) {
050fad7c 401 if (insn->src_reg != BPF_PSEUDO_CALL)
1ea47e01 402 continue;
2cbd95a5
JK
403 ret = bpf_adj_delta_to_imm(insn, pos, end_old,
404 end_new, i, probe_pass);
1ea47e01 405 } else {
2cbd95a5
JK
406 ret = bpf_adj_delta_to_off(insn, pos, end_old,
407 end_new, i, probe_pass);
1ea47e01 408 }
050fad7c
DB
409 if (ret)
410 break;
c237ee5e 411 }
050fad7c
DB
412
413 return ret;
c237ee5e
DB
414}
415
c454a46b
MKL
416static void bpf_adj_linfo(struct bpf_prog *prog, u32 off, u32 delta)
417{
418 struct bpf_line_info *linfo;
419 u32 i, nr_linfo;
420
421 nr_linfo = prog->aux->nr_linfo;
422 if (!nr_linfo || !delta)
423 return;
424
425 linfo = prog->aux->linfo;
426
427 for (i = 0; i < nr_linfo; i++)
428 if (off < linfo[i].insn_off)
429 break;
430
431 /* Push all off < linfo[i].insn_off by delta */
432 for (; i < nr_linfo; i++)
433 linfo[i].insn_off += delta;
434}
435
c237ee5e
DB
436struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
437 const struct bpf_insn *patch, u32 len)
438{
439 u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
050fad7c 440 const u32 cnt_max = S16_MAX;
c237ee5e 441 struct bpf_prog *prog_adj;
4f73379e 442 int err;
c237ee5e
DB
443
444 /* Since our patchlet doesn't expand the image, we're done. */
445 if (insn_delta == 0) {
446 memcpy(prog->insnsi + off, patch, sizeof(*patch));
447 return prog;
448 }
449
450 insn_adj_cnt = prog->len + insn_delta;
451
050fad7c
DB
452 /* Reject anything that would potentially let the insn->off
453 * target overflow when we have excessive program expansions.
454 * We need to probe here before we do any reallocation where
455 * we afterwards may not fail anymore.
456 */
457 if (insn_adj_cnt > cnt_max &&
4f73379e
AS
458 (err = bpf_adj_branches(prog, off, off + 1, off + len, true)))
459 return ERR_PTR(err);
050fad7c 460
c237ee5e
DB
461 /* Several new instructions need to be inserted. Make room
462 * for them. Likely, there's no need for a new allocation as
463 * last page could have large enough tailroom.
464 */
465 prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt),
466 GFP_USER);
467 if (!prog_adj)
4f73379e 468 return ERR_PTR(-ENOMEM);
c237ee5e
DB
469
470 prog_adj->len = insn_adj_cnt;
471
472 /* Patching happens in 3 steps:
473 *
474 * 1) Move over tail of insnsi from next instruction onwards,
475 * so we can patch the single target insn with one or more
476 * new ones (patching is always from 1 to n insns, n > 0).
477 * 2) Inject new instructions at the target location.
478 * 3) Adjust branch offsets if necessary.
479 */
480 insn_rest = insn_adj_cnt - off - len;
481
482 memmove(prog_adj->insnsi + off + len, prog_adj->insnsi + off + 1,
483 sizeof(*patch) * insn_rest);
484 memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len);
485
050fad7c
DB
486 /* We are guaranteed to not fail at this point, otherwise
487 * the ship has sailed to reverse to the original state. An
488 * overflow cannot happen at this point.
489 */
2cbd95a5 490 BUG_ON(bpf_adj_branches(prog_adj, off, off + 1, off + len, false));
c237ee5e 491
c454a46b
MKL
492 bpf_adj_linfo(prog_adj, off, insn_delta);
493
c237ee5e
DB
494 return prog_adj;
495}
496
52875a04
JK
497int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt)
498{
499 /* Branch offsets can't overflow when program is shrinking, no need
500 * to call bpf_adj_branches(..., true) here
501 */
502 memmove(prog->insnsi + off, prog->insnsi + off + cnt,
503 sizeof(struct bpf_insn) * (prog->len - off - cnt));
504 prog->len -= cnt;
505
506 return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false));
507}
508
7d1982b4
DB
509void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
510{
511 int i;
512
513 for (i = 0; i < fp->aux->func_cnt; i++)
514 bpf_prog_kallsyms_del(fp->aux->func[i]);
515}
516
517void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
518{
519 bpf_prog_kallsyms_del_subprogs(fp);
520 bpf_prog_kallsyms_del(fp);
521}
522
b954d834 523#ifdef CONFIG_BPF_JIT
fa9dd599
DB
524/* All BPF JIT sysctl knobs here. */
525int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
526int bpf_jit_harden __read_mostly;
527int bpf_jit_kallsyms __read_mostly;
fdadd049 528long bpf_jit_limit __read_mostly;
fa9dd599 529
74451e66
DB
530static __always_inline void
531bpf_get_prog_addr_region(const struct bpf_prog *prog,
532 unsigned long *symbol_start,
533 unsigned long *symbol_end)
534{
535 const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog);
536 unsigned long addr = (unsigned long)hdr;
537
538 WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
539
540 *symbol_start = addr;
541 *symbol_end = addr + hdr->pages * PAGE_SIZE;
542}
543
6ee52e2a 544void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
74451e66 545{
368211fb 546 const char *end = sym + KSYM_NAME_LEN;
838e9690
YS
547 const struct btf_type *type;
548 const char *func_name;
368211fb 549
74451e66 550 BUILD_BUG_ON(sizeof("bpf_prog_") +
368211fb
MKL
551 sizeof(prog->tag) * 2 +
552 /* name has been null terminated.
553 * We should need +1 for the '_' preceding
554 * the name. However, the null character
555 * is double counted between the name and the
556 * sizeof("bpf_prog_") above, so we omit
557 * the +1 here.
558 */
559 sizeof(prog->aux->name) > KSYM_NAME_LEN);
74451e66
DB
560
561 sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_");
562 sym = bin2hex(sym, prog->tag, sizeof(prog->tag));
838e9690
YS
563
564 /* prog->aux->name will be ignored if full btf name is available */
7337224f 565 if (prog->aux->func_info_cnt) {
ba64e7d8
YS
566 type = btf_type_by_id(prog->aux->btf,
567 prog->aux->func_info[prog->aux->func_idx].type_id);
838e9690
YS
568 func_name = btf_name_by_offset(prog->aux->btf, type->name_off);
569 snprintf(sym, (size_t)(end - sym), "_%s", func_name);
570 return;
571 }
572
368211fb
MKL
573 if (prog->aux->name[0])
574 snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name);
575 else
576 *sym = 0;
74451e66
DB
577}
578
579static __always_inline unsigned long
580bpf_get_prog_addr_start(struct latch_tree_node *n)
581{
582 unsigned long symbol_start, symbol_end;
583 const struct bpf_prog_aux *aux;
584
585 aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
586 bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
587
588 return symbol_start;
589}
590
591static __always_inline bool bpf_tree_less(struct latch_tree_node *a,
592 struct latch_tree_node *b)
593{
594 return bpf_get_prog_addr_start(a) < bpf_get_prog_addr_start(b);
595}
596
597static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n)
598{
599 unsigned long val = (unsigned long)key;
600 unsigned long symbol_start, symbol_end;
601 const struct bpf_prog_aux *aux;
602
603 aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
604 bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
605
606 if (val < symbol_start)
607 return -1;
608 if (val >= symbol_end)
609 return 1;
610
611 return 0;
612}
613
614static const struct latch_tree_ops bpf_tree_ops = {
615 .less = bpf_tree_less,
616 .comp = bpf_tree_comp,
617};
618
619static DEFINE_SPINLOCK(bpf_lock);
620static LIST_HEAD(bpf_kallsyms);
621static struct latch_tree_root bpf_tree __cacheline_aligned;
622
74451e66
DB
623static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux)
624{
625 WARN_ON_ONCE(!list_empty(&aux->ksym_lnode));
626 list_add_tail_rcu(&aux->ksym_lnode, &bpf_kallsyms);
627 latch_tree_insert(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
628}
629
630static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux)
631{
632 if (list_empty(&aux->ksym_lnode))
633 return;
634
635 latch_tree_erase(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
636 list_del_rcu(&aux->ksym_lnode);
637}
638
639static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
640{
641 return fp->jited && !bpf_prog_was_classic(fp);
642}
643
644static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
645{
646 return list_empty(&fp->aux->ksym_lnode) ||
647 fp->aux->ksym_lnode.prev == LIST_POISON2;
648}
649
650void bpf_prog_kallsyms_add(struct bpf_prog *fp)
651{
74451e66
DB
652 if (!bpf_prog_kallsyms_candidate(fp) ||
653 !capable(CAP_SYS_ADMIN))
654 return;
655
d24f7c7f 656 spin_lock_bh(&bpf_lock);
74451e66 657 bpf_prog_ksym_node_add(fp->aux);
d24f7c7f 658 spin_unlock_bh(&bpf_lock);
74451e66
DB
659}
660
661void bpf_prog_kallsyms_del(struct bpf_prog *fp)
662{
74451e66
DB
663 if (!bpf_prog_kallsyms_candidate(fp))
664 return;
665
d24f7c7f 666 spin_lock_bh(&bpf_lock);
74451e66 667 bpf_prog_ksym_node_del(fp->aux);
d24f7c7f 668 spin_unlock_bh(&bpf_lock);
74451e66
DB
669}
670
671static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr)
672{
673 struct latch_tree_node *n;
674
675 if (!bpf_jit_kallsyms_enabled())
676 return NULL;
677
678 n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops);
679 return n ?
680 container_of(n, struct bpf_prog_aux, ksym_tnode)->prog :
681 NULL;
682}
683
684const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
685 unsigned long *off, char *sym)
686{
687 unsigned long symbol_start, symbol_end;
688 struct bpf_prog *prog;
689 char *ret = NULL;
690
691 rcu_read_lock();
692 prog = bpf_prog_kallsyms_find(addr);
693 if (prog) {
694 bpf_get_prog_addr_region(prog, &symbol_start, &symbol_end);
695 bpf_get_prog_name(prog, sym);
696
697 ret = sym;
698 if (size)
699 *size = symbol_end - symbol_start;
700 if (off)
701 *off = addr - symbol_start;
702 }
703 rcu_read_unlock();
704
705 return ret;
706}
707
708bool is_bpf_text_address(unsigned long addr)
709{
710 bool ret;
711
712 rcu_read_lock();
713 ret = bpf_prog_kallsyms_find(addr) != NULL;
714 rcu_read_unlock();
715
716 return ret;
717}
718
719int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
720 char *sym)
721{
74451e66
DB
722 struct bpf_prog_aux *aux;
723 unsigned int it = 0;
724 int ret = -ERANGE;
725
726 if (!bpf_jit_kallsyms_enabled())
727 return ret;
728
729 rcu_read_lock();
730 list_for_each_entry_rcu(aux, &bpf_kallsyms, ksym_lnode) {
731 if (it++ != symnum)
732 continue;
733
74451e66
DB
734 bpf_get_prog_name(aux->prog, sym);
735
df073470 736 *value = (unsigned long)aux->prog->bpf_func;
74451e66
DB
737 *type = BPF_SYM_ELF_TYPE;
738
739 ret = 0;
740 break;
741 }
742 rcu_read_unlock();
743
744 return ret;
745}
746
ede95a63
DB
747static atomic_long_t bpf_jit_current;
748
fdadd049
DB
749/* Can be overridden by an arch's JIT compiler if it has a custom,
750 * dedicated BPF backend memory area, or if neither of the two
751 * below apply.
752 */
753u64 __weak bpf_jit_alloc_exec_limit(void)
754{
ede95a63 755#if defined(MODULES_VADDR)
fdadd049
DB
756 return MODULES_END - MODULES_VADDR;
757#else
758 return VMALLOC_END - VMALLOC_START;
759#endif
760}
761
ede95a63
DB
762static int __init bpf_jit_charge_init(void)
763{
764 /* Only used as heuristic here to derive limit. */
fdadd049
DB
765 bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2,
766 PAGE_SIZE), LONG_MAX);
ede95a63
DB
767 return 0;
768}
769pure_initcall(bpf_jit_charge_init);
ede95a63
DB
770
771static int bpf_jit_charge_modmem(u32 pages)
772{
773 if (atomic_long_add_return(pages, &bpf_jit_current) >
774 (bpf_jit_limit >> PAGE_SHIFT)) {
775 if (!capable(CAP_SYS_ADMIN)) {
776 atomic_long_sub(pages, &bpf_jit_current);
777 return -EPERM;
778 }
779 }
780
781 return 0;
782}
783
784static void bpf_jit_uncharge_modmem(u32 pages)
785{
786 atomic_long_sub(pages, &bpf_jit_current);
787}
788
dc002bb6
AB
789void *__weak bpf_jit_alloc_exec(unsigned long size)
790{
791 return module_alloc(size);
792}
793
794void __weak bpf_jit_free_exec(void *addr)
795{
796 module_memfree(addr);
797}
798
738cbe72
DB
799struct bpf_binary_header *
800bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
801 unsigned int alignment,
802 bpf_jit_fill_hole_t bpf_fill_ill_insns)
803{
804 struct bpf_binary_header *hdr;
ede95a63 805 u32 size, hole, start, pages;
738cbe72
DB
806
807 /* Most of BPF filters are really small, but if some of them
808 * fill a page, allow at least 128 extra bytes to insert a
809 * random section of illegal instructions.
810 */
811 size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE);
ede95a63
DB
812 pages = size / PAGE_SIZE;
813
814 if (bpf_jit_charge_modmem(pages))
815 return NULL;
dc002bb6 816 hdr = bpf_jit_alloc_exec(size);
ede95a63
DB
817 if (!hdr) {
818 bpf_jit_uncharge_modmem(pages);
738cbe72 819 return NULL;
ede95a63 820 }
738cbe72
DB
821
822 /* Fill space with illegal/arch-dep instructions. */
823 bpf_fill_ill_insns(hdr, size);
824
ede95a63 825 hdr->pages = pages;
738cbe72
DB
826 hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
827 PAGE_SIZE - sizeof(*hdr));
b7552e1b 828 start = (get_random_int() % hole) & ~(alignment - 1);
738cbe72
DB
829
830 /* Leave a random number of instructions before BPF code. */
831 *image_ptr = &hdr->image[start];
832
833 return hdr;
834}
835
836void bpf_jit_binary_free(struct bpf_binary_header *hdr)
837{
ede95a63
DB
838 u32 pages = hdr->pages;
839
dc002bb6 840 bpf_jit_free_exec(hdr);
ede95a63 841 bpf_jit_uncharge_modmem(pages);
738cbe72 842}
4f3446bb 843
74451e66
DB
844/* This symbol is only overridden by archs that have different
845 * requirements than the usual eBPF JITs, f.e. when they only
846 * implement cBPF JIT, do not set images read-only, etc.
847 */
848void __weak bpf_jit_free(struct bpf_prog *fp)
849{
850 if (fp->jited) {
851 struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
852
74451e66
DB
853 bpf_jit_binary_free(hdr);
854
855 WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
856 }
857
858 bpf_prog_unlock_free(fp);
859}
860
e2c95a61
DB
861int bpf_jit_get_func_addr(const struct bpf_prog *prog,
862 const struct bpf_insn *insn, bool extra_pass,
863 u64 *func_addr, bool *func_addr_fixed)
864{
865 s16 off = insn->off;
866 s32 imm = insn->imm;
867 u8 *addr;
868
869 *func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL;
870 if (!*func_addr_fixed) {
871 /* Place-holder address till the last pass has collected
872 * all addresses for JITed subprograms in which case we
873 * can pick them up from prog->aux.
874 */
875 if (!extra_pass)
876 addr = NULL;
877 else if (prog->aux->func &&
878 off >= 0 && off < prog->aux->func_cnt)
879 addr = (u8 *)prog->aux->func[off]->bpf_func;
880 else
881 return -EINVAL;
882 } else {
883 /* Address of a BPF helper call. Since part of the core
884 * kernel, it's always at a fixed location. __bpf_call_base
885 * and the helper with imm relative to it are both in core
886 * kernel.
887 */
888 addr = (u8 *)__bpf_call_base + imm;
889 }
890
891 *func_addr = (unsigned long)addr;
892 return 0;
893}
894
4f3446bb
DB
895static int bpf_jit_blind_insn(const struct bpf_insn *from,
896 const struct bpf_insn *aux,
897 struct bpf_insn *to_buff)
898{
899 struct bpf_insn *to = to_buff;
b7552e1b 900 u32 imm_rnd = get_random_int();
4f3446bb
DB
901 s16 off;
902
903 BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG);
904 BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG);
905
9b73bfdd
DB
906 /* Constraints on AX register:
907 *
908 * AX register is inaccessible from user space. It is mapped in
909 * all JITs, and used here for constant blinding rewrites. It is
910 * typically "stateless" meaning its contents are only valid within
911 * the executed instruction, but not across several instructions.
912 * There are a few exceptions however which are further detailed
913 * below.
914 *
915 * Constant blinding is only used by JITs, not in the interpreter.
916 * The interpreter uses AX in some occasions as a local temporary
917 * register e.g. in DIV or MOD instructions.
918 *
919 * In restricted circumstances, the verifier can also use the AX
920 * register for rewrites as long as they do not interfere with
921 * the above cases!
922 */
923 if (from->dst_reg == BPF_REG_AX || from->src_reg == BPF_REG_AX)
924 goto out;
925
4f3446bb
DB
926 if (from->imm == 0 &&
927 (from->code == (BPF_ALU | BPF_MOV | BPF_K) ||
928 from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) {
929 *to++ = BPF_ALU64_REG(BPF_XOR, from->dst_reg, from->dst_reg);
930 goto out;
931 }
932
933 switch (from->code) {
934 case BPF_ALU | BPF_ADD | BPF_K:
935 case BPF_ALU | BPF_SUB | BPF_K:
936 case BPF_ALU | BPF_AND | BPF_K:
937 case BPF_ALU | BPF_OR | BPF_K:
938 case BPF_ALU | BPF_XOR | BPF_K:
939 case BPF_ALU | BPF_MUL | BPF_K:
940 case BPF_ALU | BPF_MOV | BPF_K:
941 case BPF_ALU | BPF_DIV | BPF_K:
942 case BPF_ALU | BPF_MOD | BPF_K:
943 *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
944 *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
945 *to++ = BPF_ALU32_REG(from->code, from->dst_reg, BPF_REG_AX);
946 break;
947
948 case BPF_ALU64 | BPF_ADD | BPF_K:
949 case BPF_ALU64 | BPF_SUB | BPF_K:
950 case BPF_ALU64 | BPF_AND | BPF_K:
951 case BPF_ALU64 | BPF_OR | BPF_K:
952 case BPF_ALU64 | BPF_XOR | BPF_K:
953 case BPF_ALU64 | BPF_MUL | BPF_K:
954 case BPF_ALU64 | BPF_MOV | BPF_K:
955 case BPF_ALU64 | BPF_DIV | BPF_K:
956 case BPF_ALU64 | BPF_MOD | BPF_K:
957 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
958 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
959 *to++ = BPF_ALU64_REG(from->code, from->dst_reg, BPF_REG_AX);
960 break;
961
962 case BPF_JMP | BPF_JEQ | BPF_K:
963 case BPF_JMP | BPF_JNE | BPF_K:
964 case BPF_JMP | BPF_JGT | BPF_K:
92b31a9a 965 case BPF_JMP | BPF_JLT | BPF_K:
4f3446bb 966 case BPF_JMP | BPF_JGE | BPF_K:
92b31a9a 967 case BPF_JMP | BPF_JLE | BPF_K:
4f3446bb 968 case BPF_JMP | BPF_JSGT | BPF_K:
92b31a9a 969 case BPF_JMP | BPF_JSLT | BPF_K:
4f3446bb 970 case BPF_JMP | BPF_JSGE | BPF_K:
92b31a9a 971 case BPF_JMP | BPF_JSLE | BPF_K:
4f3446bb
DB
972 case BPF_JMP | BPF_JSET | BPF_K:
973 /* Accommodate for extra offset in case of a backjump. */
974 off = from->off;
975 if (off < 0)
976 off -= 2;
977 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
978 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
979 *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off);
980 break;
981
a7b76c88
JW
982 case BPF_JMP32 | BPF_JEQ | BPF_K:
983 case BPF_JMP32 | BPF_JNE | BPF_K:
984 case BPF_JMP32 | BPF_JGT | BPF_K:
985 case BPF_JMP32 | BPF_JLT | BPF_K:
986 case BPF_JMP32 | BPF_JGE | BPF_K:
987 case BPF_JMP32 | BPF_JLE | BPF_K:
988 case BPF_JMP32 | BPF_JSGT | BPF_K:
989 case BPF_JMP32 | BPF_JSLT | BPF_K:
990 case BPF_JMP32 | BPF_JSGE | BPF_K:
991 case BPF_JMP32 | BPF_JSLE | BPF_K:
992 case BPF_JMP32 | BPF_JSET | BPF_K:
993 /* Accommodate for extra offset in case of a backjump. */
994 off = from->off;
995 if (off < 0)
996 off -= 2;
997 *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
998 *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
999 *to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX,
1000 off);
1001 break;
1002
4f3446bb
DB
1003 case BPF_LD | BPF_IMM | BPF_DW:
1004 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm);
1005 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1006 *to++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
1007 *to++ = BPF_ALU64_REG(BPF_MOV, aux[0].dst_reg, BPF_REG_AX);
1008 break;
1009 case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */
1010 *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm);
1011 *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1012 *to++ = BPF_ALU64_REG(BPF_OR, aux[0].dst_reg, BPF_REG_AX);
1013 break;
1014
1015 case BPF_ST | BPF_MEM | BPF_DW:
1016 case BPF_ST | BPF_MEM | BPF_W:
1017 case BPF_ST | BPF_MEM | BPF_H:
1018 case BPF_ST | BPF_MEM | BPF_B:
1019 *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
1020 *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
1021 *to++ = BPF_STX_MEM(from->code, from->dst_reg, BPF_REG_AX, from->off);
1022 break;
1023 }
1024out:
1025 return to - to_buff;
1026}
1027
1028static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other,
1029 gfp_t gfp_extra_flags)
1030{
19809c2d 1031 gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
4f3446bb
DB
1032 struct bpf_prog *fp;
1033
1034 fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL);
1035 if (fp != NULL) {
4f3446bb
DB
1036 /* aux->prog still points to the fp_other one, so
1037 * when promoting the clone to the real program,
1038 * this still needs to be adapted.
1039 */
1040 memcpy(fp, fp_other, fp_other->pages * PAGE_SIZE);
1041 }
1042
1043 return fp;
1044}
1045
1046static void bpf_prog_clone_free(struct bpf_prog *fp)
1047{
1048 /* aux was stolen by the other clone, so we cannot free
1049 * it from this path! It will be freed eventually by the
1050 * other program on release.
1051 *
1052 * At this point, we don't need a deferred release since
1053 * clone is guaranteed to not be locked.
1054 */
1055 fp->aux = NULL;
1056 __bpf_prog_free(fp);
1057}
1058
1059void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other)
1060{
1061 /* We have to repoint aux->prog to self, as we don't
1062 * know whether fp here is the clone or the original.
1063 */
1064 fp->aux->prog = fp;
1065 bpf_prog_clone_free(fp_other);
1066}
1067
1068struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
1069{
1070 struct bpf_insn insn_buff[16], aux[2];
1071 struct bpf_prog *clone, *tmp;
1072 int insn_delta, insn_cnt;
1073 struct bpf_insn *insn;
1074 int i, rewritten;
1075
1c2a088a 1076 if (!bpf_jit_blinding_enabled(prog) || prog->blinded)
4f3446bb
DB
1077 return prog;
1078
1079 clone = bpf_prog_clone_create(prog, GFP_USER);
1080 if (!clone)
1081 return ERR_PTR(-ENOMEM);
1082
1083 insn_cnt = clone->len;
1084 insn = clone->insnsi;
1085
1086 for (i = 0; i < insn_cnt; i++, insn++) {
1087 /* We temporarily need to hold the original ld64 insn
1088 * so that we can still access the first part in the
1089 * second blinding run.
1090 */
1091 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW) &&
1092 insn[1].code == 0)
1093 memcpy(aux, insn, sizeof(aux));
1094
1095 rewritten = bpf_jit_blind_insn(insn, aux, insn_buff);
1096 if (!rewritten)
1097 continue;
1098
1099 tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten);
4f73379e 1100 if (IS_ERR(tmp)) {
4f3446bb
DB
1101 /* Patching may have repointed aux->prog during
1102 * realloc from the original one, so we need to
1103 * fix it up here on error.
1104 */
1105 bpf_jit_prog_release_other(prog, clone);
4f73379e 1106 return tmp;
4f3446bb
DB
1107 }
1108
1109 clone = tmp;
1110 insn_delta = rewritten - 1;
1111
1112 /* Walk new program and skip insns we just inserted. */
1113 insn = clone->insnsi + i + insn_delta;
1114 insn_cnt += insn_delta;
1115 i += insn_delta;
1116 }
1117
1c2a088a 1118 clone->blinded = 1;
4f3446bb
DB
1119 return clone;
1120}
b954d834 1121#endif /* CONFIG_BPF_JIT */
738cbe72 1122
f5bffecd
AS
1123/* Base function for offset calculation. Needs to go into .text section,
1124 * therefore keeping it non-static as well; will also be used by JITs
7105e828
DB
1125 * anyway later on, so do not let the compiler omit it. This also needs
1126 * to go into kallsyms for correlation from e.g. bpftool, so naming
1127 * must not change.
f5bffecd
AS
1128 */
1129noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
1130{
1131 return 0;
1132}
4d9c5c53 1133EXPORT_SYMBOL_GPL(__bpf_call_base);
f5bffecd 1134
5e581dad
DB
1135/* All UAPI available opcodes. */
1136#define BPF_INSN_MAP(INSN_2, INSN_3) \
1137 /* 32 bit ALU operations. */ \
1138 /* Register based. */ \
2dc6b100
JW
1139 INSN_3(ALU, ADD, X), \
1140 INSN_3(ALU, SUB, X), \
1141 INSN_3(ALU, AND, X), \
1142 INSN_3(ALU, OR, X), \
1143 INSN_3(ALU, LSH, X), \
1144 INSN_3(ALU, RSH, X), \
1145 INSN_3(ALU, XOR, X), \
1146 INSN_3(ALU, MUL, X), \
1147 INSN_3(ALU, MOV, X), \
1148 INSN_3(ALU, ARSH, X), \
1149 INSN_3(ALU, DIV, X), \
1150 INSN_3(ALU, MOD, X), \
5e581dad
DB
1151 INSN_2(ALU, NEG), \
1152 INSN_3(ALU, END, TO_BE), \
1153 INSN_3(ALU, END, TO_LE), \
1154 /* Immediate based. */ \
2dc6b100
JW
1155 INSN_3(ALU, ADD, K), \
1156 INSN_3(ALU, SUB, K), \
1157 INSN_3(ALU, AND, K), \
1158 INSN_3(ALU, OR, K), \
1159 INSN_3(ALU, LSH, K), \
1160 INSN_3(ALU, RSH, K), \
1161 INSN_3(ALU, XOR, K), \
1162 INSN_3(ALU, MUL, K), \
1163 INSN_3(ALU, MOV, K), \
1164 INSN_3(ALU, ARSH, K), \
1165 INSN_3(ALU, DIV, K), \
1166 INSN_3(ALU, MOD, K), \
5e581dad
DB
1167 /* 64 bit ALU operations. */ \
1168 /* Register based. */ \
1169 INSN_3(ALU64, ADD, X), \
1170 INSN_3(ALU64, SUB, X), \
1171 INSN_3(ALU64, AND, X), \
1172 INSN_3(ALU64, OR, X), \
1173 INSN_3(ALU64, LSH, X), \
1174 INSN_3(ALU64, RSH, X), \
1175 INSN_3(ALU64, XOR, X), \
1176 INSN_3(ALU64, MUL, X), \
1177 INSN_3(ALU64, MOV, X), \
1178 INSN_3(ALU64, ARSH, X), \
1179 INSN_3(ALU64, DIV, X), \
1180 INSN_3(ALU64, MOD, X), \
1181 INSN_2(ALU64, NEG), \
1182 /* Immediate based. */ \
1183 INSN_3(ALU64, ADD, K), \
1184 INSN_3(ALU64, SUB, K), \
1185 INSN_3(ALU64, AND, K), \
1186 INSN_3(ALU64, OR, K), \
1187 INSN_3(ALU64, LSH, K), \
1188 INSN_3(ALU64, RSH, K), \
1189 INSN_3(ALU64, XOR, K), \
1190 INSN_3(ALU64, MUL, K), \
1191 INSN_3(ALU64, MOV, K), \
1192 INSN_3(ALU64, ARSH, K), \
1193 INSN_3(ALU64, DIV, K), \
1194 INSN_3(ALU64, MOD, K), \
1195 /* Call instruction. */ \
1196 INSN_2(JMP, CALL), \
1197 /* Exit instruction. */ \
1198 INSN_2(JMP, EXIT), \
503a8865
JW
1199 /* 32-bit Jump instructions. */ \
1200 /* Register based. */ \
1201 INSN_3(JMP32, JEQ, X), \
1202 INSN_3(JMP32, JNE, X), \
1203 INSN_3(JMP32, JGT, X), \
1204 INSN_3(JMP32, JLT, X), \
1205 INSN_3(JMP32, JGE, X), \
1206 INSN_3(JMP32, JLE, X), \
1207 INSN_3(JMP32, JSGT, X), \
1208 INSN_3(JMP32, JSLT, X), \
1209 INSN_3(JMP32, JSGE, X), \
1210 INSN_3(JMP32, JSLE, X), \
1211 INSN_3(JMP32, JSET, X), \
1212 /* Immediate based. */ \
1213 INSN_3(JMP32, JEQ, K), \
1214 INSN_3(JMP32, JNE, K), \
1215 INSN_3(JMP32, JGT, K), \
1216 INSN_3(JMP32, JLT, K), \
1217 INSN_3(JMP32, JGE, K), \
1218 INSN_3(JMP32, JLE, K), \
1219 INSN_3(JMP32, JSGT, K), \
1220 INSN_3(JMP32, JSLT, K), \
1221 INSN_3(JMP32, JSGE, K), \
1222 INSN_3(JMP32, JSLE, K), \
1223 INSN_3(JMP32, JSET, K), \
5e581dad
DB
1224 /* Jump instructions. */ \
1225 /* Register based. */ \
1226 INSN_3(JMP, JEQ, X), \
1227 INSN_3(JMP, JNE, X), \
1228 INSN_3(JMP, JGT, X), \
1229 INSN_3(JMP, JLT, X), \
1230 INSN_3(JMP, JGE, X), \
1231 INSN_3(JMP, JLE, X), \
1232 INSN_3(JMP, JSGT, X), \
1233 INSN_3(JMP, JSLT, X), \
1234 INSN_3(JMP, JSGE, X), \
1235 INSN_3(JMP, JSLE, X), \
1236 INSN_3(JMP, JSET, X), \
1237 /* Immediate based. */ \
1238 INSN_3(JMP, JEQ, K), \
1239 INSN_3(JMP, JNE, K), \
1240 INSN_3(JMP, JGT, K), \
1241 INSN_3(JMP, JLT, K), \
1242 INSN_3(JMP, JGE, K), \
1243 INSN_3(JMP, JLE, K), \
1244 INSN_3(JMP, JSGT, K), \
1245 INSN_3(JMP, JSLT, K), \
1246 INSN_3(JMP, JSGE, K), \
1247 INSN_3(JMP, JSLE, K), \
1248 INSN_3(JMP, JSET, K), \
1249 INSN_2(JMP, JA), \
1250 /* Store instructions. */ \
1251 /* Register based. */ \
1252 INSN_3(STX, MEM, B), \
1253 INSN_3(STX, MEM, H), \
1254 INSN_3(STX, MEM, W), \
1255 INSN_3(STX, MEM, DW), \
1256 INSN_3(STX, XADD, W), \
1257 INSN_3(STX, XADD, DW), \
1258 /* Immediate based. */ \
1259 INSN_3(ST, MEM, B), \
1260 INSN_3(ST, MEM, H), \
1261 INSN_3(ST, MEM, W), \
1262 INSN_3(ST, MEM, DW), \
1263 /* Load instructions. */ \
1264 /* Register based. */ \
1265 INSN_3(LDX, MEM, B), \
1266 INSN_3(LDX, MEM, H), \
1267 INSN_3(LDX, MEM, W), \
1268 INSN_3(LDX, MEM, DW), \
1269 /* Immediate based. */ \
e0cea7ce 1270 INSN_3(LD, IMM, DW)
5e581dad
DB
1271
1272bool bpf_opcode_in_insntable(u8 code)
1273{
1274#define BPF_INSN_2_TBL(x, y) [BPF_##x | BPF_##y] = true
1275#define BPF_INSN_3_TBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = true
1276 static const bool public_insntable[256] = {
1277 [0 ... 255] = false,
1278 /* Now overwrite non-defaults ... */
1279 BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL),
e0cea7ce
DB
1280 /* UAPI exposed, but rewritten opcodes. cBPF carry-over. */
1281 [BPF_LD | BPF_ABS | BPF_B] = true,
1282 [BPF_LD | BPF_ABS | BPF_H] = true,
1283 [BPF_LD | BPF_ABS | BPF_W] = true,
1284 [BPF_LD | BPF_IND | BPF_B] = true,
1285 [BPF_LD | BPF_IND | BPF_H] = true,
1286 [BPF_LD | BPF_IND | BPF_W] = true,
5e581dad
DB
1287 };
1288#undef BPF_INSN_3_TBL
1289#undef BPF_INSN_2_TBL
1290 return public_insntable[code];
1291}
1292
290af866 1293#ifndef CONFIG_BPF_JIT_ALWAYS_ON
f5bffecd 1294/**
7ae457c1 1295 * __bpf_prog_run - run eBPF program on a given context
de1da68d 1296 * @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
7ae457c1 1297 * @insn: is the array of eBPF instructions
de1da68d 1298 * @stack: is the eBPF storage stack
f5bffecd 1299 *
7ae457c1 1300 * Decode and execute eBPF instructions.
f5bffecd 1301 */
1ea47e01 1302static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
f5bffecd 1303{
5e581dad
DB
1304#define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y
1305#define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
f5bffecd
AS
1306 static const void *jumptable[256] = {
1307 [0 ... 255] = &&default_label,
1308 /* Now overwrite non-defaults ... */
5e581dad
DB
1309 BPF_INSN_MAP(BPF_INSN_2_LBL, BPF_INSN_3_LBL),
1310 /* Non-UAPI available opcodes. */
1ea47e01 1311 [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
71189fa9 1312 [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
f5bffecd 1313 };
5e581dad
DB
1314#undef BPF_INSN_3_LBL
1315#undef BPF_INSN_2_LBL
04fd61ab 1316 u32 tail_call_cnt = 0;
f5bffecd
AS
1317
1318#define CONT ({ insn++; goto select_insn; })
1319#define CONT_JMP ({ insn++; goto select_insn; })
1320
f5bffecd
AS
1321select_insn:
1322 goto *jumptable[insn->code];
1323
1324 /* ALU */
1325#define ALU(OPCODE, OP) \
1326 ALU64_##OPCODE##_X: \
1327 DST = DST OP SRC; \
1328 CONT; \
1329 ALU_##OPCODE##_X: \
1330 DST = (u32) DST OP (u32) SRC; \
1331 CONT; \
1332 ALU64_##OPCODE##_K: \
1333 DST = DST OP IMM; \
1334 CONT; \
1335 ALU_##OPCODE##_K: \
1336 DST = (u32) DST OP (u32) IMM; \
1337 CONT;
1338
1339 ALU(ADD, +)
1340 ALU(SUB, -)
1341 ALU(AND, &)
1342 ALU(OR, |)
1343 ALU(LSH, <<)
1344 ALU(RSH, >>)
1345 ALU(XOR, ^)
1346 ALU(MUL, *)
1347#undef ALU
1348 ALU_NEG:
1349 DST = (u32) -DST;
1350 CONT;
1351 ALU64_NEG:
1352 DST = -DST;
1353 CONT;
1354 ALU_MOV_X:
1355 DST = (u32) SRC;
1356 CONT;
1357 ALU_MOV_K:
1358 DST = (u32) IMM;
1359 CONT;
1360 ALU64_MOV_X:
1361 DST = SRC;
1362 CONT;
1363 ALU64_MOV_K:
1364 DST = IMM;
1365 CONT;
02ab695b
AS
1366 LD_IMM_DW:
1367 DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32;
1368 insn++;
1369 CONT;
2dc6b100
JW
1370 ALU_ARSH_X:
1371 DST = (u64) (u32) ((*(s32 *) &DST) >> SRC);
1372 CONT;
1373 ALU_ARSH_K:
1374 DST = (u64) (u32) ((*(s32 *) &DST) >> IMM);
1375 CONT;
f5bffecd
AS
1376 ALU64_ARSH_X:
1377 (*(s64 *) &DST) >>= SRC;
1378 CONT;
1379 ALU64_ARSH_K:
1380 (*(s64 *) &DST) >>= IMM;
1381 CONT;
1382 ALU64_MOD_X:
144cd91c
DB
1383 div64_u64_rem(DST, SRC, &AX);
1384 DST = AX;
f5bffecd
AS
1385 CONT;
1386 ALU_MOD_X:
144cd91c
DB
1387 AX = (u32) DST;
1388 DST = do_div(AX, (u32) SRC);
f5bffecd
AS
1389 CONT;
1390 ALU64_MOD_K:
144cd91c
DB
1391 div64_u64_rem(DST, IMM, &AX);
1392 DST = AX;
f5bffecd
AS
1393 CONT;
1394 ALU_MOD_K:
144cd91c
DB
1395 AX = (u32) DST;
1396 DST = do_div(AX, (u32) IMM);
f5bffecd
AS
1397 CONT;
1398 ALU64_DIV_X:
876a7ae6 1399 DST = div64_u64(DST, SRC);
f5bffecd
AS
1400 CONT;
1401 ALU_DIV_X:
144cd91c
DB
1402 AX = (u32) DST;
1403 do_div(AX, (u32) SRC);
1404 DST = (u32) AX;
f5bffecd
AS
1405 CONT;
1406 ALU64_DIV_K:
876a7ae6 1407 DST = div64_u64(DST, IMM);
f5bffecd
AS
1408 CONT;
1409 ALU_DIV_K:
144cd91c
DB
1410 AX = (u32) DST;
1411 do_div(AX, (u32) IMM);
1412 DST = (u32) AX;
f5bffecd
AS
1413 CONT;
1414 ALU_END_TO_BE:
1415 switch (IMM) {
1416 case 16:
1417 DST = (__force u16) cpu_to_be16(DST);
1418 break;
1419 case 32:
1420 DST = (__force u32) cpu_to_be32(DST);
1421 break;
1422 case 64:
1423 DST = (__force u64) cpu_to_be64(DST);
1424 break;
1425 }
1426 CONT;
1427 ALU_END_TO_LE:
1428 switch (IMM) {
1429 case 16:
1430 DST = (__force u16) cpu_to_le16(DST);
1431 break;
1432 case 32:
1433 DST = (__force u32) cpu_to_le32(DST);
1434 break;
1435 case 64:
1436 DST = (__force u64) cpu_to_le64(DST);
1437 break;
1438 }
1439 CONT;
1440
1441 /* CALL */
1442 JMP_CALL:
1443 /* Function call scratches BPF_R1-BPF_R5 registers,
1444 * preserves BPF_R6-BPF_R9, and stores return value
1445 * into BPF_R0.
1446 */
1447 BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3,
1448 BPF_R4, BPF_R5);
1449 CONT;
1450
1ea47e01
AS
1451 JMP_CALL_ARGS:
1452 BPF_R0 = (__bpf_call_base_args + insn->imm)(BPF_R1, BPF_R2,
1453 BPF_R3, BPF_R4,
1454 BPF_R5,
1455 insn + insn->off + 1);
1456 CONT;
1457
04fd61ab
AS
1458 JMP_TAIL_CALL: {
1459 struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
1460 struct bpf_array *array = container_of(map, struct bpf_array, map);
1461 struct bpf_prog *prog;
90caccdd 1462 u32 index = BPF_R3;
04fd61ab
AS
1463
1464 if (unlikely(index >= array->map.max_entries))
1465 goto out;
04fd61ab
AS
1466 if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT))
1467 goto out;
1468
1469 tail_call_cnt++;
1470
2a36f0b9 1471 prog = READ_ONCE(array->ptrs[index]);
1ca1cc98 1472 if (!prog)
04fd61ab
AS
1473 goto out;
1474
c4675f93
DB
1475 /* ARG1 at this point is guaranteed to point to CTX from
1476 * the verifier side due to the fact that the tail call is
1477 * handeled like a helper, that is, bpf_tail_call_proto,
1478 * where arg1_type is ARG_PTR_TO_CTX.
1479 */
04fd61ab
AS
1480 insn = prog->insnsi;
1481 goto select_insn;
1482out:
1483 CONT;
1484 }
f5bffecd
AS
1485 JMP_JA:
1486 insn += insn->off;
1487 CONT;
f5bffecd
AS
1488 JMP_EXIT:
1489 return BPF_R0;
503a8865
JW
1490 /* JMP */
1491#define COND_JMP(SIGN, OPCODE, CMP_OP) \
1492 JMP_##OPCODE##_X: \
1493 if ((SIGN##64) DST CMP_OP (SIGN##64) SRC) { \
1494 insn += insn->off; \
1495 CONT_JMP; \
1496 } \
1497 CONT; \
1498 JMP32_##OPCODE##_X: \
1499 if ((SIGN##32) DST CMP_OP (SIGN##32) SRC) { \
1500 insn += insn->off; \
1501 CONT_JMP; \
1502 } \
1503 CONT; \
1504 JMP_##OPCODE##_K: \
1505 if ((SIGN##64) DST CMP_OP (SIGN##64) IMM) { \
1506 insn += insn->off; \
1507 CONT_JMP; \
1508 } \
1509 CONT; \
1510 JMP32_##OPCODE##_K: \
1511 if ((SIGN##32) DST CMP_OP (SIGN##32) IMM) { \
1512 insn += insn->off; \
1513 CONT_JMP; \
1514 } \
1515 CONT;
1516 COND_JMP(u, JEQ, ==)
1517 COND_JMP(u, JNE, !=)
1518 COND_JMP(u, JGT, >)
1519 COND_JMP(u, JLT, <)
1520 COND_JMP(u, JGE, >=)
1521 COND_JMP(u, JLE, <=)
1522 COND_JMP(u, JSET, &)
1523 COND_JMP(s, JSGT, >)
1524 COND_JMP(s, JSLT, <)
1525 COND_JMP(s, JSGE, >=)
1526 COND_JMP(s, JSLE, <=)
1527#undef COND_JMP
f5bffecd
AS
1528 /* STX and ST and LDX*/
1529#define LDST(SIZEOP, SIZE) \
1530 STX_MEM_##SIZEOP: \
1531 *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \
1532 CONT; \
1533 ST_MEM_##SIZEOP: \
1534 *(SIZE *)(unsigned long) (DST + insn->off) = IMM; \
1535 CONT; \
1536 LDX_MEM_##SIZEOP: \
1537 DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
1538 CONT;
1539
1540 LDST(B, u8)
1541 LDST(H, u16)
1542 LDST(W, u32)
1543 LDST(DW, u64)
1544#undef LDST
1545 STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
1546 atomic_add((u32) SRC, (atomic_t *)(unsigned long)
1547 (DST + insn->off));
1548 CONT;
1549 STX_XADD_DW: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
1550 atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
1551 (DST + insn->off));
1552 CONT;
f5bffecd
AS
1553
1554 default_label:
5e581dad
DB
1555 /* If we ever reach this, we have a bug somewhere. Die hard here
1556 * instead of just returning 0; we could be somewhere in a subprog,
1557 * so execution could continue otherwise which we do /not/ want.
1558 *
1559 * Note, verifier whitelists all opcodes in bpf_opcode_in_insntable().
1560 */
1561 pr_warn("BPF interpreter: unknown opcode %02x\n", insn->code);
1562 BUG_ON(1);
f5bffecd
AS
1563 return 0;
1564}
f696b8f4
AS
1565STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */
1566
b870aa90
AS
1567#define PROG_NAME(stack_size) __bpf_prog_run##stack_size
1568#define DEFINE_BPF_PROG_RUN(stack_size) \
1569static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \
1570{ \
1571 u64 stack[stack_size / sizeof(u64)]; \
144cd91c 1572 u64 regs[MAX_BPF_EXT_REG]; \
b870aa90
AS
1573\
1574 FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
1575 ARG1 = (u64) (unsigned long) ctx; \
1576 return ___bpf_prog_run(regs, insn, stack); \
f696b8f4 1577}
f5bffecd 1578
1ea47e01
AS
1579#define PROG_NAME_ARGS(stack_size) __bpf_prog_run_args##stack_size
1580#define DEFINE_BPF_PROG_RUN_ARGS(stack_size) \
1581static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \
1582 const struct bpf_insn *insn) \
1583{ \
1584 u64 stack[stack_size / sizeof(u64)]; \
144cd91c 1585 u64 regs[MAX_BPF_EXT_REG]; \
1ea47e01
AS
1586\
1587 FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
1588 BPF_R1 = r1; \
1589 BPF_R2 = r2; \
1590 BPF_R3 = r3; \
1591 BPF_R4 = r4; \
1592 BPF_R5 = r5; \
1593 return ___bpf_prog_run(regs, insn, stack); \
1594}
1595
b870aa90
AS
1596#define EVAL1(FN, X) FN(X)
1597#define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y)
1598#define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y)
1599#define EVAL4(FN, X, Y...) FN(X) EVAL3(FN, Y)
1600#define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y)
1601#define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y)
1602
1603EVAL6(DEFINE_BPF_PROG_RUN, 32, 64, 96, 128, 160, 192);
1604EVAL6(DEFINE_BPF_PROG_RUN, 224, 256, 288, 320, 352, 384);
1605EVAL4(DEFINE_BPF_PROG_RUN, 416, 448, 480, 512);
1606
1ea47e01
AS
1607EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 32, 64, 96, 128, 160, 192);
1608EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 224, 256, 288, 320, 352, 384);
1609EVAL4(DEFINE_BPF_PROG_RUN_ARGS, 416, 448, 480, 512);
1610
b870aa90
AS
1611#define PROG_NAME_LIST(stack_size) PROG_NAME(stack_size),
1612
1613static unsigned int (*interpreters[])(const void *ctx,
1614 const struct bpf_insn *insn) = {
1615EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
1616EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
1617EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
1618};
1ea47e01
AS
1619#undef PROG_NAME_LIST
1620#define PROG_NAME_LIST(stack_size) PROG_NAME_ARGS(stack_size),
1621static u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5,
1622 const struct bpf_insn *insn) = {
1623EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
1624EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
1625EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
1626};
1627#undef PROG_NAME_LIST
1628
1629void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth)
1630{
1631 stack_depth = max_t(u32, stack_depth, 1);
1632 insn->off = (s16) insn->imm;
1633 insn->imm = interpreters_args[(round_up(stack_depth, 32) / 32) - 1] -
1634 __bpf_call_base_args;
1635 insn->code = BPF_JMP | BPF_CALL_ARGS;
1636}
b870aa90 1637
290af866 1638#else
fa9dd599
DB
1639static unsigned int __bpf_prog_ret0_warn(const void *ctx,
1640 const struct bpf_insn *insn)
290af866 1641{
fa9dd599
DB
1642 /* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON
1643 * is not working properly, so warn about it!
1644 */
1645 WARN_ON_ONCE(1);
290af866
AS
1646 return 0;
1647}
1648#endif
1649
3324b584
DB
1650bool bpf_prog_array_compatible(struct bpf_array *array,
1651 const struct bpf_prog *fp)
04fd61ab 1652{
9802d865
JB
1653 if (fp->kprobe_override)
1654 return false;
1655
3324b584
DB
1656 if (!array->owner_prog_type) {
1657 /* There's no owner yet where we could check for
1658 * compatibility.
1659 */
04fd61ab
AS
1660 array->owner_prog_type = fp->type;
1661 array->owner_jited = fp->jited;
3324b584
DB
1662
1663 return true;
04fd61ab 1664 }
3324b584
DB
1665
1666 return array->owner_prog_type == fp->type &&
1667 array->owner_jited == fp->jited;
04fd61ab
AS
1668}
1669
3324b584 1670static int bpf_check_tail_call(const struct bpf_prog *fp)
04fd61ab
AS
1671{
1672 struct bpf_prog_aux *aux = fp->aux;
1673 int i;
1674
1675 for (i = 0; i < aux->used_map_cnt; i++) {
3324b584 1676 struct bpf_map *map = aux->used_maps[i];
04fd61ab 1677 struct bpf_array *array;
04fd61ab 1678
04fd61ab
AS
1679 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
1680 continue;
3324b584 1681
04fd61ab
AS
1682 array = container_of(map, struct bpf_array, map);
1683 if (!bpf_prog_array_compatible(array, fp))
1684 return -EINVAL;
1685 }
1686
1687 return 0;
1688}
1689
9facc336
DB
1690static void bpf_prog_select_func(struct bpf_prog *fp)
1691{
1692#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1693 u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
1694
1695 fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
1696#else
1697 fp->bpf_func = __bpf_prog_ret0_warn;
1698#endif
1699}
1700
f5bffecd 1701/**
3324b584 1702 * bpf_prog_select_runtime - select exec runtime for BPF program
7ae457c1 1703 * @fp: bpf_prog populated with internal BPF program
d1c55ab5 1704 * @err: pointer to error variable
f5bffecd 1705 *
3324b584
DB
1706 * Try to JIT eBPF program, if JIT is not available, use interpreter.
1707 * The BPF program will be executed via BPF_PROG_RUN() macro.
f5bffecd 1708 */
d1c55ab5 1709struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
f5bffecd 1710{
9facc336
DB
1711 /* In case of BPF to BPF calls, verifier did all the prep
1712 * work with regards to JITing, etc.
1713 */
1714 if (fp->bpf_func)
1715 goto finalize;
8007e40a 1716
9facc336 1717 bpf_prog_select_func(fp);
f5bffecd 1718
d1c55ab5
DB
1719 /* eBPF JITs can rewrite the program in case constant
1720 * blinding is active. However, in case of error during
1721 * blinding, bpf_int_jit_compile() must always return a
1722 * valid program, which in this case would simply not
1723 * be JITed, but falls back to the interpreter.
1724 */
ab3f0063 1725 if (!bpf_prog_is_dev_bound(fp->aux)) {
c454a46b
MKL
1726 *err = bpf_prog_alloc_jited_linfo(fp);
1727 if (*err)
1728 return fp;
1729
ab3f0063 1730 fp = bpf_int_jit_compile(fp);
290af866 1731 if (!fp->jited) {
c454a46b
MKL
1732 bpf_prog_free_jited_linfo(fp);
1733#ifdef CONFIG_BPF_JIT_ALWAYS_ON
290af866
AS
1734 *err = -ENOTSUPP;
1735 return fp;
290af866 1736#endif
c454a46b
MKL
1737 } else {
1738 bpf_prog_free_unused_jited_linfo(fp);
1739 }
ab3f0063
JK
1740 } else {
1741 *err = bpf_prog_offload_compile(fp);
1742 if (*err)
1743 return fp;
1744 }
9facc336
DB
1745
1746finalize:
60a3b225 1747 bpf_prog_lock_ro(fp);
04fd61ab 1748
3324b584
DB
1749 /* The tail call compatibility check can only be done at
1750 * this late stage as we need to determine, if we deal
1751 * with JITed or non JITed program concatenations and not
1752 * all eBPF JITs might immediately support all features.
1753 */
d1c55ab5 1754 *err = bpf_check_tail_call(fp);
85782e03 1755
d1c55ab5 1756 return fp;
f5bffecd 1757}
7ae457c1 1758EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
f5bffecd 1759
e87c6bc3
YS
1760static unsigned int __bpf_prog_ret1(const void *ctx,
1761 const struct bpf_insn *insn)
1762{
1763 return 1;
1764}
1765
1766static struct bpf_prog_dummy {
1767 struct bpf_prog prog;
1768} dummy_bpf_prog = {
1769 .prog = {
1770 .bpf_func = __bpf_prog_ret1,
1771 },
1772};
1773
324bda9e
AS
1774/* to avoid allocating empty bpf_prog_array for cgroups that
1775 * don't have bpf program attached use one global 'empty_prog_array'
1776 * It will not be modified the caller of bpf_prog_array_alloc()
1777 * (since caller requested prog_cnt == 0)
1778 * that pointer should be 'freed' by bpf_prog_array_free()
1779 */
1780static struct {
1781 struct bpf_prog_array hdr;
1782 struct bpf_prog *null_prog;
1783} empty_prog_array = {
1784 .null_prog = NULL,
1785};
1786
d29ab6e1 1787struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
324bda9e
AS
1788{
1789 if (prog_cnt)
1790 return kzalloc(sizeof(struct bpf_prog_array) +
394e40a2
RG
1791 sizeof(struct bpf_prog_array_item) *
1792 (prog_cnt + 1),
324bda9e
AS
1793 flags);
1794
1795 return &empty_prog_array.hdr;
1796}
1797
1798void bpf_prog_array_free(struct bpf_prog_array __rcu *progs)
1799{
1800 if (!progs ||
1801 progs == (struct bpf_prog_array __rcu *)&empty_prog_array.hdr)
1802 return;
1803 kfree_rcu(progs, rcu);
1804}
1805
394e40a2 1806int bpf_prog_array_length(struct bpf_prog_array __rcu *array)
468e2f64 1807{
394e40a2 1808 struct bpf_prog_array_item *item;
468e2f64
AS
1809 u32 cnt = 0;
1810
1811 rcu_read_lock();
394e40a2
RG
1812 item = rcu_dereference(array)->items;
1813 for (; item->prog; item++)
1814 if (item->prog != &dummy_bpf_prog.prog)
c8c088ba 1815 cnt++;
468e2f64
AS
1816 rcu_read_unlock();
1817 return cnt;
1818}
1819
394e40a2
RG
1820
1821static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array,
3a38bb98
YS
1822 u32 *prog_ids,
1823 u32 request_cnt)
1824{
394e40a2 1825 struct bpf_prog_array_item *item;
3a38bb98
YS
1826 int i = 0;
1827
965931e3 1828 item = rcu_dereference_check(array, 1)->items;
394e40a2
RG
1829 for (; item->prog; item++) {
1830 if (item->prog == &dummy_bpf_prog.prog)
3a38bb98 1831 continue;
394e40a2 1832 prog_ids[i] = item->prog->aux->id;
3a38bb98 1833 if (++i == request_cnt) {
394e40a2 1834 item++;
3a38bb98
YS
1835 break;
1836 }
1837 }
1838
394e40a2 1839 return !!(item->prog);
3a38bb98
YS
1840}
1841
394e40a2 1842int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array,
468e2f64
AS
1843 __u32 __user *prog_ids, u32 cnt)
1844{
0911287c 1845 unsigned long err = 0;
0911287c 1846 bool nospc;
3a38bb98 1847 u32 *ids;
0911287c
AS
1848
1849 /* users of this function are doing:
1850 * cnt = bpf_prog_array_length();
1851 * if (cnt > 0)
1852 * bpf_prog_array_copy_to_user(..., cnt);
1853 * so below kcalloc doesn't need extra cnt > 0 check, but
1854 * bpf_prog_array_length() releases rcu lock and
1855 * prog array could have been swapped with empty or larger array,
1856 * so always copy 'cnt' prog_ids to the user.
1857 * In a rare race the user will see zero prog_ids
1858 */
9c481b90 1859 ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN);
0911287c
AS
1860 if (!ids)
1861 return -ENOMEM;
468e2f64 1862 rcu_read_lock();
394e40a2 1863 nospc = bpf_prog_array_copy_core(array, ids, cnt);
468e2f64 1864 rcu_read_unlock();
0911287c
AS
1865 err = copy_to_user(prog_ids, ids, cnt * sizeof(u32));
1866 kfree(ids);
1867 if (err)
1868 return -EFAULT;
1869 if (nospc)
468e2f64
AS
1870 return -ENOSPC;
1871 return 0;
1872}
1873
394e40a2 1874void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *array,
e87c6bc3
YS
1875 struct bpf_prog *old_prog)
1876{
394e40a2 1877 struct bpf_prog_array_item *item = array->items;
e87c6bc3 1878
394e40a2
RG
1879 for (; item->prog; item++)
1880 if (item->prog == old_prog) {
1881 WRITE_ONCE(item->prog, &dummy_bpf_prog.prog);
e87c6bc3
YS
1882 break;
1883 }
1884}
1885
1886int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
1887 struct bpf_prog *exclude_prog,
1888 struct bpf_prog *include_prog,
1889 struct bpf_prog_array **new_array)
1890{
1891 int new_prog_cnt, carry_prog_cnt = 0;
394e40a2 1892 struct bpf_prog_array_item *existing;
e87c6bc3 1893 struct bpf_prog_array *array;
170a7e3e 1894 bool found_exclude = false;
e87c6bc3
YS
1895 int new_prog_idx = 0;
1896
1897 /* Figure out how many existing progs we need to carry over to
1898 * the new array.
1899 */
1900 if (old_array) {
394e40a2
RG
1901 existing = old_array->items;
1902 for (; existing->prog; existing++) {
1903 if (existing->prog == exclude_prog) {
170a7e3e
SY
1904 found_exclude = true;
1905 continue;
1906 }
394e40a2 1907 if (existing->prog != &dummy_bpf_prog.prog)
e87c6bc3 1908 carry_prog_cnt++;
394e40a2 1909 if (existing->prog == include_prog)
e87c6bc3
YS
1910 return -EEXIST;
1911 }
1912 }
1913
170a7e3e
SY
1914 if (exclude_prog && !found_exclude)
1915 return -ENOENT;
1916
e87c6bc3
YS
1917 /* How many progs (not NULL) will be in the new array? */
1918 new_prog_cnt = carry_prog_cnt;
1919 if (include_prog)
1920 new_prog_cnt += 1;
1921
1922 /* Do we have any prog (not NULL) in the new array? */
1923 if (!new_prog_cnt) {
1924 *new_array = NULL;
1925 return 0;
1926 }
1927
1928 /* +1 as the end of prog_array is marked with NULL */
1929 array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL);
1930 if (!array)
1931 return -ENOMEM;
1932
1933 /* Fill in the new prog array */
1934 if (carry_prog_cnt) {
394e40a2
RG
1935 existing = old_array->items;
1936 for (; existing->prog; existing++)
1937 if (existing->prog != exclude_prog &&
1938 existing->prog != &dummy_bpf_prog.prog) {
1939 array->items[new_prog_idx++].prog =
1940 existing->prog;
1941 }
e87c6bc3
YS
1942 }
1943 if (include_prog)
394e40a2
RG
1944 array->items[new_prog_idx++].prog = include_prog;
1945 array->items[new_prog_idx].prog = NULL;
e87c6bc3
YS
1946 *new_array = array;
1947 return 0;
1948}
1949
f371b304 1950int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
3a38bb98
YS
1951 u32 *prog_ids, u32 request_cnt,
1952 u32 *prog_cnt)
f371b304
YS
1953{
1954 u32 cnt = 0;
1955
1956 if (array)
1957 cnt = bpf_prog_array_length(array);
1958
3a38bb98 1959 *prog_cnt = cnt;
f371b304
YS
1960
1961 /* return early if user requested only program count or nothing to copy */
1962 if (!request_cnt || !cnt)
1963 return 0;
1964
3a38bb98 1965 /* this function is called under trace/bpf_trace.c: bpf_event_mutex */
394e40a2 1966 return bpf_prog_array_copy_core(array, prog_ids, request_cnt) ? -ENOSPC
3a38bb98 1967 : 0;
f371b304
YS
1968}
1969
60a3b225
DB
1970static void bpf_prog_free_deferred(struct work_struct *work)
1971{
09756af4 1972 struct bpf_prog_aux *aux;
1c2a088a 1973 int i;
60a3b225 1974
09756af4 1975 aux = container_of(work, struct bpf_prog_aux, work);
ab3f0063
JK
1976 if (bpf_prog_is_dev_bound(aux))
1977 bpf_prog_offload_destroy(aux->prog);
c195651e
YS
1978#ifdef CONFIG_PERF_EVENTS
1979 if (aux->prog->has_callchain_buf)
1980 put_callchain_buffers();
1981#endif
1c2a088a
AS
1982 for (i = 0; i < aux->func_cnt; i++)
1983 bpf_jit_free(aux->func[i]);
1984 if (aux->func_cnt) {
1985 kfree(aux->func);
1986 bpf_prog_unlock_free(aux->prog);
1987 } else {
1988 bpf_jit_free(aux->prog);
1989 }
60a3b225
DB
1990}
1991
1992/* Free internal BPF program */
7ae457c1 1993void bpf_prog_free(struct bpf_prog *fp)
f5bffecd 1994{
09756af4 1995 struct bpf_prog_aux *aux = fp->aux;
60a3b225 1996
09756af4 1997 INIT_WORK(&aux->work, bpf_prog_free_deferred);
09756af4 1998 schedule_work(&aux->work);
f5bffecd 1999}
7ae457c1 2000EXPORT_SYMBOL_GPL(bpf_prog_free);
f89b7755 2001
3ad00405
DB
2002/* RNG for unpriviledged user space with separated state from prandom_u32(). */
2003static DEFINE_PER_CPU(struct rnd_state, bpf_user_rnd_state);
2004
2005void bpf_user_rnd_init_once(void)
2006{
2007 prandom_init_once(&bpf_user_rnd_state);
2008}
2009
f3694e00 2010BPF_CALL_0(bpf_user_rnd_u32)
3ad00405
DB
2011{
2012 /* Should someone ever have the rather unwise idea to use some
2013 * of the registers passed into this function, then note that
2014 * this function is called from native eBPF and classic-to-eBPF
2015 * transformations. Register assignments from both sides are
2016 * different, f.e. classic always sets fn(ctx, A, X) here.
2017 */
2018 struct rnd_state *state;
2019 u32 res;
2020
2021 state = &get_cpu_var(bpf_user_rnd_state);
2022 res = prandom_u32_state(state);
b761fe22 2023 put_cpu_var(bpf_user_rnd_state);
3ad00405
DB
2024
2025 return res;
2026}
2027
3ba67dab
DB
2028/* Weak definitions of helper functions in case we don't have bpf syscall. */
2029const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
2030const struct bpf_func_proto bpf_map_update_elem_proto __weak;
2031const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
f1a2e44a
MV
2032const struct bpf_func_proto bpf_map_push_elem_proto __weak;
2033const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
2034const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
d83525ca
AS
2035const struct bpf_func_proto bpf_spin_lock_proto __weak;
2036const struct bpf_func_proto bpf_spin_unlock_proto __weak;
3ba67dab 2037
03e69b50 2038const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
c04167ce 2039const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
2d0e30c3 2040const struct bpf_func_proto bpf_get_numa_node_id_proto __weak;
17ca8cbf 2041const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
bd570ff9 2042
ffeedafb
AS
2043const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
2044const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
2045const struct bpf_func_proto bpf_get_current_comm_proto __weak;
bf6fa2c8 2046const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
cd339431 2047const struct bpf_func_proto bpf_get_local_storage_proto __weak;
bd570ff9 2048
0756ea3e
AS
2049const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
2050{
2051 return NULL;
2052}
03e69b50 2053
555c8a86
DB
2054u64 __weak
2055bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
2056 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
bd570ff9 2057{
555c8a86 2058 return -ENOTSUPP;
bd570ff9 2059}
6cb5fb38 2060EXPORT_SYMBOL_GPL(bpf_event_output);
bd570ff9 2061
3324b584
DB
2062/* Always built-in helper functions. */
2063const struct bpf_func_proto bpf_tail_call_proto = {
2064 .func = NULL,
2065 .gpl_only = false,
2066 .ret_type = RET_VOID,
2067 .arg1_type = ARG_PTR_TO_CTX,
2068 .arg2_type = ARG_CONST_MAP_PTR,
2069 .arg3_type = ARG_ANYTHING,
2070};
2071
9383191d
DB
2072/* Stub for JITs that only support cBPF. eBPF programs are interpreted.
2073 * It is encouraged to implement bpf_int_jit_compile() instead, so that
2074 * eBPF and implicitly also cBPF can get JITed!
2075 */
d1c55ab5 2076struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog)
3324b584 2077{
d1c55ab5 2078 return prog;
3324b584
DB
2079}
2080
9383191d
DB
2081/* Stub for JITs that support eBPF. All cBPF code gets transformed into
2082 * eBPF by the kernel and is later compiled by bpf_int_jit_compile().
2083 */
2084void __weak bpf_jit_compile(struct bpf_prog *prog)
2085{
2086}
2087
17bedab2 2088bool __weak bpf_helper_changes_pkt_data(void *func)
969bf05e
AS
2089{
2090 return false;
2091}
2092
f89b7755
AS
2093/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
2094 * skb_copy_bits(), so provide a weak definition of it for NET-less config.
2095 */
2096int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
2097 int len)
2098{
2099 return -EFAULT;
2100}
a67edbf4 2101
492ecee8
AS
2102DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
2103EXPORT_SYMBOL(bpf_stats_enabled_key);
2104int sysctl_bpf_stats_enabled __read_mostly;
2105
a67edbf4
DB
2106/* All definitions of tracepoints related to BPF. */
2107#define CREATE_TRACE_POINTS
2108#include <linux/bpf_trace.h>
2109
2110EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception);