]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - kernel/bpf/syscall.c
bpf: Add schedule points in batch ops
[mirror_ubuntu-jammy-kernel.git] / kernel / bpf / syscall.c
CommitLineData
5b497af4 1// SPDX-License-Identifier: GPL-2.0-only
99c55f7d 2/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
99c55f7d
AS
3 */
4#include <linux/bpf.h>
a67edbf4 5#include <linux/bpf_trace.h>
f4364dcf 6#include <linux/bpf_lirc.h>
4a1e7c0c 7#include <linux/bpf_verifier.h>
f56a653c 8#include <linux/btf.h>
99c55f7d
AS
9#include <linux/syscalls.h>
10#include <linux/slab.h>
3f07c014 11#include <linux/sched/signal.h>
d407bd25
DB
12#include <linux/vmalloc.h>
13#include <linux/mmzone.h>
99c55f7d 14#include <linux/anon_inodes.h>
41bdc4b4 15#include <linux/fdtable.h>
db20fd2b 16#include <linux/file.h>
41bdc4b4 17#include <linux/fs.h>
09756af4
AS
18#include <linux/license.h>
19#include <linux/filter.h>
535e7b4b 20#include <linux/kernel.h>
dc4bb0e2 21#include <linux/idr.h>
cb4d2b3f
MKL
22#include <linux/cred.h>
23#include <linux/timekeeping.h>
24#include <linux/ctype.h>
9ef09e35 25#include <linux/nospec.h>
bae141f5 26#include <linux/audit.h>
ccfe29eb 27#include <uapi/linux/btf.h>
ca5999fd 28#include <linux/pgtable.h>
9e4e01df 29#include <linux/bpf_lsm.h>
457f4436 30#include <linux/poll.h>
a3fd7cee 31#include <linux/bpf-netns.h>
1e6c62a8 32#include <linux/rcupdate_trace.h>
48edc1f7 33#include <linux/memcontrol.h>
99c55f7d 34
da765a2f
DB
35#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
36 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
37 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
38#define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY)
14dc6f04 39#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
da765a2f
DB
40#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || \
41 IS_FD_HASH(map))
14dc6f04 42
6e71b04a
CF
43#define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY)
44
b121d1e7 45DEFINE_PER_CPU(int, bpf_prog_active);
dc4bb0e2
MKL
46static DEFINE_IDR(prog_idr);
47static DEFINE_SPINLOCK(prog_idr_lock);
f3f1c054
MKL
48static DEFINE_IDR(map_idr);
49static DEFINE_SPINLOCK(map_idr_lock);
a3b80e10
AN
50static DEFINE_IDR(link_idr);
51static DEFINE_SPINLOCK(link_idr_lock);
b121d1e7 52
08389d88
DB
53int sysctl_unprivileged_bpf_disabled __read_mostly =
54 IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0;
1be7f75d 55
40077e0c 56static const struct bpf_map_ops * const bpf_map_types[] = {
91cc1a99 57#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
40077e0c
JB
58#define BPF_MAP_TYPE(_id, _ops) \
59 [_id] = &_ops,
f2e10bff 60#define BPF_LINK_TYPE(_id, _name)
40077e0c
JB
61#include <linux/bpf_types.h>
62#undef BPF_PROG_TYPE
63#undef BPF_MAP_TYPE
f2e10bff 64#undef BPF_LINK_TYPE
40077e0c 65};
99c55f7d 66
752ba56f
MS
67/*
68 * If we're handed a bigger struct than we know of, ensure all the unknown bits
69 * are 0 - i.e. new user-space does not rely on any kernel feature extensions
70 * we don't know about yet.
71 *
72 * There is a ToCToU between this function call and the following
73 * copy_from_user() call. However, this is not a concern since this function is
74 * meant to be a future-proofing of bits.
75 */
af2ac3e1 76int bpf_check_uarg_tail_zero(bpfptr_t uaddr,
dcab51f1
MKL
77 size_t expected_size,
78 size_t actual_size)
58291a74 79{
b7e4b65f 80 int res;
58291a74 81
752ba56f
MS
82 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */
83 return -E2BIG;
84
58291a74
MS
85 if (actual_size <= expected_size)
86 return 0;
87
af2ac3e1
AS
88 if (uaddr.is_kernel)
89 res = memchr_inv(uaddr.kernel + expected_size, 0,
90 actual_size - expected_size) == NULL;
91 else
92 res = check_zeroed_user(uaddr.user + expected_size,
93 actual_size - expected_size);
b7e4b65f
AV
94 if (res < 0)
95 return res;
96 return res ? 0 : -E2BIG;
58291a74
MS
97}
98
a3884572 99const struct bpf_map_ops bpf_map_offload_ops = {
f4d05259 100 .map_meta_equal = bpf_map_meta_equal,
a3884572
JK
101 .map_alloc = bpf_map_offload_map_alloc,
102 .map_free = bpf_map_offload_map_free,
e8d2bec0 103 .map_check_btf = map_check_no_btf,
a3884572
JK
104};
105
99c55f7d
AS
106static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
107{
1110f3a9 108 const struct bpf_map_ops *ops;
9ef09e35 109 u32 type = attr->map_type;
99c55f7d 110 struct bpf_map *map;
1110f3a9 111 int err;
99c55f7d 112
9ef09e35 113 if (type >= ARRAY_SIZE(bpf_map_types))
1110f3a9 114 return ERR_PTR(-EINVAL);
9ef09e35
MR
115 type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types));
116 ops = bpf_map_types[type];
1110f3a9 117 if (!ops)
40077e0c 118 return ERR_PTR(-EINVAL);
99c55f7d 119
1110f3a9
JK
120 if (ops->map_alloc_check) {
121 err = ops->map_alloc_check(attr);
122 if (err)
123 return ERR_PTR(err);
124 }
a3884572
JK
125 if (attr->map_ifindex)
126 ops = &bpf_map_offload_ops;
1110f3a9 127 map = ops->map_alloc(attr);
40077e0c
JB
128 if (IS_ERR(map))
129 return map;
1110f3a9 130 map->ops = ops;
9ef09e35 131 map->map_type = type;
40077e0c 132 return map;
99c55f7d
AS
133}
134
e887677d
DB
135static void bpf_map_write_active_inc(struct bpf_map *map)
136{
137 atomic64_inc(&map->writecnt);
138}
139
140static void bpf_map_write_active_dec(struct bpf_map *map)
141{
142 atomic64_dec(&map->writecnt);
143}
144
145bool bpf_map_write_active(const struct bpf_map *map)
146{
147 return atomic64_read(&map->writecnt) != 0;
148}
149
80ee81e0 150static u32 bpf_map_value_size(const struct bpf_map *map)
15c14a3d
BV
151{
152 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
153 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
154 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
155 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
156 return round_up(map->value_size, 8) * num_possible_cpus();
157 else if (IS_FD_MAP(map))
158 return sizeof(u32);
159 else
160 return map->value_size;
161}
162
163static void maybe_wait_bpf_programs(struct bpf_map *map)
164{
165 /* Wait for any running BPF programs to complete so that
166 * userspace, when we return to it, knows that all programs
167 * that could be running use the new map value.
168 */
169 if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
170 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
171 synchronize_rcu();
172}
173
174static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
175 void *value, __u64 flags)
176{
177 int err;
178
179 /* Need to create a kthread, thus must support schedule */
180 if (bpf_map_is_dev_bound(map)) {
181 return bpf_map_offload_update_elem(map, key, value, flags);
182 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
15c14a3d
BV
183 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
184 return map->ops->map_update_elem(map, key, value, flags);
13b79d3f
LB
185 } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH ||
186 map->map_type == BPF_MAP_TYPE_SOCKMAP) {
187 return sock_map_update_elem_sys(map, key, value, flags);
15c14a3d
BV
188 } else if (IS_FD_PROG_ARRAY(map)) {
189 return bpf_fd_array_map_update_elem(map, f.file, key, value,
190 flags);
191 }
192
b6e5dae1 193 bpf_disable_instrumentation();
15c14a3d
BV
194 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
195 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
196 err = bpf_percpu_hash_update(map, key, value, flags);
197 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
198 err = bpf_percpu_array_update(map, key, value, flags);
199 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
200 err = bpf_percpu_cgroup_storage_update(map, key, value,
201 flags);
202 } else if (IS_FD_ARRAY(map)) {
203 rcu_read_lock();
204 err = bpf_fd_array_map_update_elem(map, f.file, key, value,
205 flags);
206 rcu_read_unlock();
207 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
208 rcu_read_lock();
209 err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
210 flags);
211 rcu_read_unlock();
212 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
213 /* rcu_read_lock() is not needed */
214 err = bpf_fd_reuseport_array_update_elem(map, key, value,
215 flags);
216 } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
217 map->map_type == BPF_MAP_TYPE_STACK) {
218 err = map->ops->map_push_elem(map, value, flags);
219 } else {
220 rcu_read_lock();
221 err = map->ops->map_update_elem(map, key, value, flags);
222 rcu_read_unlock();
223 }
b6e5dae1 224 bpf_enable_instrumentation();
15c14a3d
BV
225 maybe_wait_bpf_programs(map);
226
227 return err;
228}
229
230static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
231 __u64 flags)
232{
233 void *ptr;
234 int err;
235
cb4d03ab
BV
236 if (bpf_map_is_dev_bound(map))
237 return bpf_map_offload_lookup_elem(map, key, value);
15c14a3d 238
b6e5dae1 239 bpf_disable_instrumentation();
15c14a3d
BV
240 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
241 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
242 err = bpf_percpu_hash_copy(map, key, value);
243 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
244 err = bpf_percpu_array_copy(map, key, value);
245 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
246 err = bpf_percpu_cgroup_storage_copy(map, key, value);
247 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
248 err = bpf_stackmap_copy(map, key, value);
249 } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
250 err = bpf_fd_array_map_lookup_elem(map, key, value);
251 } else if (IS_FD_HASH(map)) {
252 err = bpf_fd_htab_map_lookup_elem(map, key, value);
253 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
254 err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
255 } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
256 map->map_type == BPF_MAP_TYPE_STACK) {
257 err = map->ops->map_peek_elem(map, value);
258 } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
259 /* struct_ops map requires directly updating "value" */
260 err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
261 } else {
262 rcu_read_lock();
263 if (map->ops->map_lookup_elem_sys_only)
264 ptr = map->ops->map_lookup_elem_sys_only(map, key);
265 else
266 ptr = map->ops->map_lookup_elem(map, key);
267 if (IS_ERR(ptr)) {
268 err = PTR_ERR(ptr);
269 } else if (!ptr) {
270 err = -ENOENT;
271 } else {
272 err = 0;
273 if (flags & BPF_F_LOCK)
274 /* lock 'ptr' and copy everything but lock */
275 copy_map_value_locked(map, value, ptr, true);
276 else
277 copy_map_value(map, value, ptr);
68134668
AS
278 /* mask lock and timer, since value wasn't zero inited */
279 check_and_init_map_value(map, value);
15c14a3d
BV
280 }
281 rcu_read_unlock();
282 }
283
b6e5dae1 284 bpf_enable_instrumentation();
15c14a3d
BV
285 maybe_wait_bpf_programs(map);
286
287 return err;
288}
289
d5299b67
RG
290/* Please, do not use this function outside from the map creation path
291 * (e.g. in map update path) without taking care of setting the active
292 * memory cgroup (see at bpf_map_kmalloc_node() for example).
293 */
196e8ca7 294static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
d407bd25 295{
f01a7dbe
MP
296 /* We really just want to fail instead of triggering OOM killer
297 * under memory pressure, therefore we set __GFP_NORETRY to kmalloc,
298 * which is used for lower order allocation requests.
299 *
300 * It has been observed that higher order allocation requests done by
301 * vmalloc with __GFP_NORETRY being set might fail due to not trying
302 * to reclaim memory from the page cache, thus we set
303 * __GFP_RETRY_MAYFAIL to avoid such situations.
d407bd25 304 */
f01a7dbe 305
d5299b67 306 const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_ACCOUNT;
041de93f
CH
307 unsigned int flags = 0;
308 unsigned long align = 1;
d407bd25
DB
309 void *area;
310
196e8ca7
DB
311 if (size >= SIZE_MAX)
312 return NULL;
313
fc970227 314 /* kmalloc()'ed memory can't be mmap()'ed */
041de93f
CH
315 if (mmapable) {
316 BUG_ON(!PAGE_ALIGNED(size));
317 align = SHMLBA;
318 flags = VM_USERMAP;
319 } else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
320 area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY,
f01a7dbe 321 numa_node);
d407bd25
DB
322 if (area != NULL)
323 return area;
324 }
041de93f
CH
325
326 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
327 gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL,
328 flags, numa_node, __builtin_return_address(0));
d407bd25
DB
329}
330
196e8ca7 331void *bpf_map_area_alloc(u64 size, int numa_node)
fc970227
AN
332{
333 return __bpf_map_area_alloc(size, numa_node, false);
334}
335
196e8ca7 336void *bpf_map_area_mmapable_alloc(u64 size, int numa_node)
fc970227
AN
337{
338 return __bpf_map_area_alloc(size, numa_node, true);
339}
340
d407bd25
DB
341void bpf_map_area_free(void *area)
342{
343 kvfree(area);
344}
345
be70bcd5
DB
346static u32 bpf_map_flags_retain_permanent(u32 flags)
347{
348 /* Some map creation flags are not tied to the map object but
349 * rather to the map fd instead, so they have no meaning upon
350 * map object inspection since multiple file descriptors with
351 * different (access) properties can exist here. Thus, given
352 * this has zero meaning for the map itself, lets clear these
353 * from here.
354 */
355 return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY);
356}
357
bd475643
JK
358void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
359{
360 map->map_type = attr->map_type;
361 map->key_size = attr->key_size;
362 map->value_size = attr->value_size;
363 map->max_entries = attr->max_entries;
be70bcd5 364 map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags);
bd475643
JK
365 map->numa_node = bpf_map_attr_numa_node(attr);
366}
367
f3f1c054
MKL
368static int bpf_map_alloc_id(struct bpf_map *map)
369{
370 int id;
371
b76354cd 372 idr_preload(GFP_KERNEL);
f3f1c054
MKL
373 spin_lock_bh(&map_idr_lock);
374 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC);
375 if (id > 0)
376 map->id = id;
377 spin_unlock_bh(&map_idr_lock);
b76354cd 378 idr_preload_end();
f3f1c054
MKL
379
380 if (WARN_ON_ONCE(!id))
381 return -ENOSPC;
382
383 return id > 0 ? 0 : id;
384}
385
a3884572 386void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
f3f1c054 387{
930651a7
ED
388 unsigned long flags;
389
a3884572
JK
390 /* Offloaded maps are removed from the IDR store when their device
391 * disappears - even if someone holds an fd to them they are unusable,
392 * the memory is gone, all ops will fail; they are simply waiting for
393 * refcnt to drop to be freed.
394 */
395 if (!map->id)
396 return;
397
bd5f5f4e 398 if (do_idr_lock)
930651a7 399 spin_lock_irqsave(&map_idr_lock, flags);
bd5f5f4e
MKL
400 else
401 __acquire(&map_idr_lock);
402
f3f1c054 403 idr_remove(&map_idr, map->id);
a3884572 404 map->id = 0;
bd5f5f4e
MKL
405
406 if (do_idr_lock)
930651a7 407 spin_unlock_irqrestore(&map_idr_lock, flags);
bd5f5f4e
MKL
408 else
409 __release(&map_idr_lock);
f3f1c054
MKL
410}
411
48edc1f7
RG
412#ifdef CONFIG_MEMCG_KMEM
413static void bpf_map_save_memcg(struct bpf_map *map)
414{
415 map->memcg = get_mem_cgroup_from_mm(current->mm);
416}
417
418static void bpf_map_release_memcg(struct bpf_map *map)
419{
420 mem_cgroup_put(map->memcg);
421}
422
423void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
424 int node)
425{
426 struct mem_cgroup *old_memcg;
427 void *ptr;
428
429 old_memcg = set_active_memcg(map->memcg);
430 ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node);
431 set_active_memcg(old_memcg);
432
433 return ptr;
434}
435
436void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
437{
438 struct mem_cgroup *old_memcg;
439 void *ptr;
440
441 old_memcg = set_active_memcg(map->memcg);
442 ptr = kzalloc(size, flags | __GFP_ACCOUNT);
443 set_active_memcg(old_memcg);
444
445 return ptr;
446}
447
448void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
449 size_t align, gfp_t flags)
450{
451 struct mem_cgroup *old_memcg;
452 void __percpu *ptr;
453
454 old_memcg = set_active_memcg(map->memcg);
455 ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT);
456 set_active_memcg(old_memcg);
457
458 return ptr;
459}
460
461#else
462static void bpf_map_save_memcg(struct bpf_map *map)
463{
464}
465
466static void bpf_map_release_memcg(struct bpf_map *map)
467{
468}
469#endif
470
99c55f7d
AS
471/* called from workqueue */
472static void bpf_map_free_deferred(struct work_struct *work)
473{
474 struct bpf_map *map = container_of(work, struct bpf_map, work);
475
afdb09c7 476 security_bpf_map_free(map);
48edc1f7 477 bpf_map_release_memcg(map);
99c55f7d
AS
478 /* implementation dependent freeing */
479 map->ops->map_free(map);
480}
481
c9da161c
DB
482static void bpf_map_put_uref(struct bpf_map *map)
483{
1e0bd5a0 484 if (atomic64_dec_and_test(&map->usercnt)) {
ba6b8de4
JF
485 if (map->ops->map_release_uref)
486 map->ops->map_release_uref(map);
c9da161c
DB
487 }
488}
489
99c55f7d
AS
490/* decrement map refcnt and schedule it for freeing via workqueue
491 * (unrelying map implementation ops->map_free() might sleep)
492 */
bd5f5f4e 493static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
99c55f7d 494{
1e0bd5a0 495 if (atomic64_dec_and_test(&map->refcnt)) {
34ad5580 496 /* bpf_map_free_id() must be called first */
bd5f5f4e 497 bpf_map_free_id(map, do_idr_lock);
78958fca 498 btf_put(map->btf);
99c55f7d
AS
499 INIT_WORK(&map->work, bpf_map_free_deferred);
500 schedule_work(&map->work);
501 }
502}
503
bd5f5f4e
MKL
504void bpf_map_put(struct bpf_map *map)
505{
506 __bpf_map_put(map, true);
507}
630a4d38 508EXPORT_SYMBOL_GPL(bpf_map_put);
bd5f5f4e 509
c9da161c 510void bpf_map_put_with_uref(struct bpf_map *map)
99c55f7d 511{
c9da161c 512 bpf_map_put_uref(map);
99c55f7d 513 bpf_map_put(map);
c9da161c
DB
514}
515
516static int bpf_map_release(struct inode *inode, struct file *filp)
517{
61d1b6a4
DB
518 struct bpf_map *map = filp->private_data;
519
520 if (map->ops->map_release)
521 map->ops->map_release(map, filp);
522
523 bpf_map_put_with_uref(map);
99c55f7d
AS
524 return 0;
525}
526
87df15de
DB
527static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f)
528{
529 fmode_t mode = f.file->f_mode;
530
531 /* Our file permissions may have been overridden by global
532 * map permissions facing syscall side.
533 */
534 if (READ_ONCE(map->frozen))
535 mode &= ~FMODE_CAN_WRITE;
536 return mode;
537}
538
f99bf205 539#ifdef CONFIG_PROC_FS
80ee81e0
RG
540/* Provides an approximation of the map's memory footprint.
541 * Used only to provide a backward compatibility and display
542 * a reasonable "memlock" info.
543 */
544static unsigned long bpf_map_memory_footprint(const struct bpf_map *map)
545{
546 unsigned long size;
547
548 size = round_up(map->key_size + bpf_map_value_size(map), 8);
549
550 return round_up(map->max_entries * size, PAGE_SIZE);
551}
552
f99bf205
DB
553static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
554{
555 const struct bpf_map *map = filp->private_data;
21116b70 556 const struct bpf_array *array;
2beee5f5 557 u32 type = 0, jited = 0;
21116b70
DB
558
559 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
560 array = container_of(map, struct bpf_array, map);
54713c85
THJ
561 spin_lock(&array->aux->owner.lock);
562 type = array->aux->owner.type;
563 jited = array->aux->owner.jited;
564 spin_unlock(&array->aux->owner.lock);
21116b70 565 }
f99bf205
DB
566
567 seq_printf(m,
568 "map_type:\t%u\n"
569 "key_size:\t%u\n"
570 "value_size:\t%u\n"
322cea2f 571 "max_entries:\t%u\n"
21116b70 572 "map_flags:\t%#x\n"
80ee81e0 573 "memlock:\t%lu\n"
87df15de
DB
574 "map_id:\t%u\n"
575 "frozen:\t%u\n",
f99bf205
DB
576 map->map_type,
577 map->key_size,
578 map->value_size,
322cea2f 579 map->max_entries,
21116b70 580 map->map_flags,
80ee81e0 581 bpf_map_memory_footprint(map),
87df15de
DB
582 map->id,
583 READ_ONCE(map->frozen));
2beee5f5
DB
584 if (type) {
585 seq_printf(m, "owner_prog_type:\t%u\n", type);
586 seq_printf(m, "owner_jited:\t%u\n", jited);
9780c0ab 587 }
f99bf205
DB
588}
589#endif
590
6e71b04a
CF
591static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz,
592 loff_t *ppos)
593{
594 /* We need this handler such that alloc_file() enables
595 * f_mode with FMODE_CAN_READ.
596 */
597 return -EINVAL;
598}
599
600static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf,
601 size_t siz, loff_t *ppos)
602{
603 /* We need this handler such that alloc_file() enables
604 * f_mode with FMODE_CAN_WRITE.
605 */
606 return -EINVAL;
607}
608
fc970227
AN
609/* called for any extra memory-mapped regions (except initial) */
610static void bpf_map_mmap_open(struct vm_area_struct *vma)
611{
612 struct bpf_map *map = vma->vm_file->private_data;
613
e887677d
DB
614 if (vma->vm_flags & VM_MAYWRITE)
615 bpf_map_write_active_inc(map);
fc970227
AN
616}
617
618/* called for all unmapped memory region (including initial) */
619static void bpf_map_mmap_close(struct vm_area_struct *vma)
620{
621 struct bpf_map *map = vma->vm_file->private_data;
622
e887677d
DB
623 if (vma->vm_flags & VM_MAYWRITE)
624 bpf_map_write_active_dec(map);
fc970227
AN
625}
626
627static const struct vm_operations_struct bpf_map_default_vmops = {
628 .open = bpf_map_mmap_open,
629 .close = bpf_map_mmap_close,
630};
631
632static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
633{
634 struct bpf_map *map = filp->private_data;
635 int err;
636
68134668
AS
637 if (!map->ops->map_mmap || map_value_has_spin_lock(map) ||
638 map_value_has_timer(map))
fc970227
AN
639 return -ENOTSUPP;
640
641 if (!(vma->vm_flags & VM_SHARED))
642 return -EINVAL;
643
644 mutex_lock(&map->freeze_mutex);
645
dfeb376d
AN
646 if (vma->vm_flags & VM_WRITE) {
647 if (map->frozen) {
648 err = -EPERM;
649 goto out;
650 }
651 /* map is meant to be read-only, so do not allow mapping as
652 * writable, because it's possible to leak a writable page
653 * reference and allows user-space to still modify it after
654 * freezing, while verifier will assume contents do not change
655 */
656 if (map->map_flags & BPF_F_RDONLY_PROG) {
657 err = -EACCES;
658 goto out;
659 }
fc970227
AN
660 }
661
662 /* set default open/close callbacks */
663 vma->vm_ops = &bpf_map_default_vmops;
664 vma->vm_private_data = map;
1f6cb19b
AN
665 vma->vm_flags &= ~VM_MAYEXEC;
666 if (!(vma->vm_flags & VM_WRITE))
667 /* disallow re-mapping with PROT_WRITE */
668 vma->vm_flags &= ~VM_MAYWRITE;
fc970227
AN
669
670 err = map->ops->map_mmap(map, vma);
671 if (err)
672 goto out;
673
1f6cb19b 674 if (vma->vm_flags & VM_MAYWRITE)
e887677d 675 bpf_map_write_active_inc(map);
fc970227
AN
676out:
677 mutex_unlock(&map->freeze_mutex);
678 return err;
679}
680
457f4436
AN
681static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts)
682{
683 struct bpf_map *map = filp->private_data;
684
685 if (map->ops->map_poll)
686 return map->ops->map_poll(map, filp, pts);
687
688 return EPOLLERR;
689}
690
f66e448c 691const struct file_operations bpf_map_fops = {
f99bf205
DB
692#ifdef CONFIG_PROC_FS
693 .show_fdinfo = bpf_map_show_fdinfo,
694#endif
695 .release = bpf_map_release,
6e71b04a
CF
696 .read = bpf_dummy_read,
697 .write = bpf_dummy_write,
fc970227 698 .mmap = bpf_map_mmap,
457f4436 699 .poll = bpf_map_poll,
99c55f7d
AS
700};
701
6e71b04a 702int bpf_map_new_fd(struct bpf_map *map, int flags)
aa79781b 703{
afdb09c7
CF
704 int ret;
705
706 ret = security_bpf_map(map, OPEN_FMODE(flags));
707 if (ret < 0)
708 return ret;
709
aa79781b 710 return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
6e71b04a
CF
711 flags | O_CLOEXEC);
712}
713
714int bpf_get_file_flag(int flags)
715{
716 if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY))
717 return -EINVAL;
718 if (flags & BPF_F_RDONLY)
719 return O_RDONLY;
720 if (flags & BPF_F_WRONLY)
721 return O_WRONLY;
722 return O_RDWR;
aa79781b
DB
723}
724
99c55f7d
AS
725/* helper macro to check that unused fields 'union bpf_attr' are zero */
726#define CHECK_ATTR(CMD) \
727 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
728 sizeof(attr->CMD##_LAST_FIELD), 0, \
729 sizeof(*attr) - \
730 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
731 sizeof(attr->CMD##_LAST_FIELD)) != NULL
732
8e7ae251
MKL
733/* dst and src must have at least "size" number of bytes.
734 * Return strlen on success and < 0 on error.
cb4d2b3f 735 */
8e7ae251 736int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size)
cb4d2b3f 737{
8e7ae251
MKL
738 const char *end = src + size;
739 const char *orig_src = src;
cb4d2b3f 740
8e7ae251 741 memset(dst, 0, size);
3e0ddc4f 742 /* Copy all isalnum(), '_' and '.' chars. */
cb4d2b3f 743 while (src < end && *src) {
3e0ddc4f
DB
744 if (!isalnum(*src) &&
745 *src != '_' && *src != '.')
cb4d2b3f
MKL
746 return -EINVAL;
747 *dst++ = *src++;
748 }
749
8e7ae251 750 /* No '\0' found in "size" number of bytes */
cb4d2b3f
MKL
751 if (src == end)
752 return -EINVAL;
753
8e7ae251 754 return src - orig_src;
cb4d2b3f
MKL
755}
756
e8d2bec0 757int map_check_no_btf(const struct bpf_map *map,
1b2b234b 758 const struct btf *btf,
e8d2bec0
DB
759 const struct btf_type *key_type,
760 const struct btf_type *value_type)
761{
762 return -ENOTSUPP;
763}
764
d83525ca 765static int map_check_btf(struct bpf_map *map, const struct btf *btf,
e8d2bec0
DB
766 u32 btf_key_id, u32 btf_value_id)
767{
768 const struct btf_type *key_type, *value_type;
769 u32 key_size, value_size;
770 int ret = 0;
771
2824ecb7
DB
772 /* Some maps allow key to be unspecified. */
773 if (btf_key_id) {
774 key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
775 if (!key_type || key_size != map->key_size)
776 return -EINVAL;
777 } else {
778 key_type = btf_type_by_id(btf, 0);
779 if (!map->ops->map_check_btf)
780 return -EINVAL;
781 }
e8d2bec0
DB
782
783 value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
784 if (!value_type || value_size != map->value_size)
785 return -EINVAL;
786
d83525ca
AS
787 map->spin_lock_off = btf_find_spin_lock(btf, value_type);
788
789 if (map_value_has_spin_lock(map)) {
591fe988
DB
790 if (map->map_flags & BPF_F_RDONLY_PROG)
791 return -EACCES;
d83525ca 792 if (map->map_type != BPF_MAP_TYPE_HASH &&
e16d2f1a 793 map->map_type != BPF_MAP_TYPE_ARRAY &&
6ac99e8f 794 map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
8ea63684 795 map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
4cf1bc1f
KS
796 map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
797 map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
d83525ca
AS
798 return -ENOTSUPP;
799 if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
800 map->value_size) {
801 WARN_ONCE(1,
802 "verifier bug spin_lock_off %d value_size %d\n",
803 map->spin_lock_off, map->value_size);
804 return -EFAULT;
805 }
806 }
807
68134668
AS
808 map->timer_off = btf_find_timer(btf, value_type);
809 if (map_value_has_timer(map)) {
810 if (map->map_flags & BPF_F_RDONLY_PROG)
811 return -EACCES;
812 if (map->map_type != BPF_MAP_TYPE_HASH &&
813 map->map_type != BPF_MAP_TYPE_LRU_HASH &&
814 map->map_type != BPF_MAP_TYPE_ARRAY)
815 return -EOPNOTSUPP;
816 }
817
e8d2bec0 818 if (map->ops->map_check_btf)
1b2b234b 819 ret = map->ops->map_check_btf(map, btf, key_type, value_type);
e8d2bec0
DB
820
821 return ret;
822}
823
85d33df3 824#define BPF_MAP_CREATE_LAST_FIELD btf_vmlinux_value_type_id
99c55f7d
AS
825/* called via syscall */
826static int map_create(union bpf_attr *attr)
827{
96eabe7a 828 int numa_node = bpf_map_attr_numa_node(attr);
99c55f7d 829 struct bpf_map *map;
6e71b04a 830 int f_flags;
99c55f7d
AS
831 int err;
832
833 err = CHECK_ATTR(BPF_MAP_CREATE);
834 if (err)
835 return -EINVAL;
836
85d33df3
MKL
837 if (attr->btf_vmlinux_value_type_id) {
838 if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS ||
839 attr->btf_key_type_id || attr->btf_value_type_id)
840 return -EINVAL;
841 } else if (attr->btf_key_type_id && !attr->btf_value_type_id) {
842 return -EINVAL;
843 }
844
6e71b04a
CF
845 f_flags = bpf_get_file_flag(attr->map_flags);
846 if (f_flags < 0)
847 return f_flags;
848
96eabe7a 849 if (numa_node != NUMA_NO_NODE &&
96e5ae4e
ED
850 ((unsigned int)numa_node >= nr_node_ids ||
851 !node_online(numa_node)))
96eabe7a
MKL
852 return -EINVAL;
853
99c55f7d
AS
854 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
855 map = find_and_alloc_map(attr);
856 if (IS_ERR(map))
857 return PTR_ERR(map);
858
8e7ae251
MKL
859 err = bpf_obj_name_cpy(map->name, attr->map_name,
860 sizeof(attr->map_name));
861 if (err < 0)
b936ca64 862 goto free_map;
ad5b177b 863
1e0bd5a0
AN
864 atomic64_set(&map->refcnt, 1);
865 atomic64_set(&map->usercnt, 1);
fc970227 866 mutex_init(&map->freeze_mutex);
99c55f7d 867
85d33df3 868 map->spin_lock_off = -EINVAL;
68134668 869 map->timer_off = -EINVAL;
85d33df3
MKL
870 if (attr->btf_key_type_id || attr->btf_value_type_id ||
871 /* Even the map's value is a kernel's struct,
872 * the bpf_prog.o must have BTF to begin with
873 * to figure out the corresponding kernel's
874 * counter part. Thus, attr->btf_fd has
875 * to be valid also.
876 */
877 attr->btf_vmlinux_value_type_id) {
a26ca7c9
MKL
878 struct btf *btf;
879
a26ca7c9
MKL
880 btf = btf_get_by_fd(attr->btf_fd);
881 if (IS_ERR(btf)) {
882 err = PTR_ERR(btf);
b936ca64 883 goto free_map;
a26ca7c9 884 }
350a5c4d
AS
885 if (btf_is_kernel(btf)) {
886 btf_put(btf);
887 err = -EACCES;
888 goto free_map;
889 }
85d33df3 890 map->btf = btf;
a26ca7c9 891
85d33df3
MKL
892 if (attr->btf_value_type_id) {
893 err = map_check_btf(map, btf, attr->btf_key_type_id,
894 attr->btf_value_type_id);
895 if (err)
896 goto free_map;
a26ca7c9
MKL
897 }
898
9b2cf328
MKL
899 map->btf_key_type_id = attr->btf_key_type_id;
900 map->btf_value_type_id = attr->btf_value_type_id;
85d33df3
MKL
901 map->btf_vmlinux_value_type_id =
902 attr->btf_vmlinux_value_type_id;
a26ca7c9
MKL
903 }
904
afdb09c7 905 err = security_bpf_map_alloc(map);
aaac3ba9 906 if (err)
b936ca64 907 goto free_map;
afdb09c7 908
f3f1c054
MKL
909 err = bpf_map_alloc_id(map);
910 if (err)
b936ca64 911 goto free_map_sec;
f3f1c054 912
48edc1f7
RG
913 bpf_map_save_memcg(map);
914
6e71b04a 915 err = bpf_map_new_fd(map, f_flags);
bd5f5f4e
MKL
916 if (err < 0) {
917 /* failed to allocate fd.
352d20d6 918 * bpf_map_put_with_uref() is needed because the above
bd5f5f4e
MKL
919 * bpf_map_alloc_id() has published the map
920 * to the userspace and the userspace may
921 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID.
922 */
352d20d6 923 bpf_map_put_with_uref(map);
bd5f5f4e
MKL
924 return err;
925 }
99c55f7d
AS
926
927 return err;
928
afdb09c7
CF
929free_map_sec:
930 security_bpf_map_free(map);
b936ca64 931free_map:
a26ca7c9 932 btf_put(map->btf);
99c55f7d
AS
933 map->ops->map_free(map);
934 return err;
935}
936
db20fd2b
AS
937/* if error is returned, fd is released.
938 * On success caller should complete fd access with matching fdput()
939 */
c2101297 940struct bpf_map *__bpf_map_get(struct fd f)
db20fd2b 941{
db20fd2b
AS
942 if (!f.file)
943 return ERR_PTR(-EBADF);
db20fd2b
AS
944 if (f.file->f_op != &bpf_map_fops) {
945 fdput(f);
946 return ERR_PTR(-EINVAL);
947 }
948
c2101297
DB
949 return f.file->private_data;
950}
951
1e0bd5a0 952void bpf_map_inc(struct bpf_map *map)
c9da161c 953{
1e0bd5a0 954 atomic64_inc(&map->refcnt);
c9da161c 955}
630a4d38 956EXPORT_SYMBOL_GPL(bpf_map_inc);
c9da161c 957
1e0bd5a0
AN
958void bpf_map_inc_with_uref(struct bpf_map *map)
959{
960 atomic64_inc(&map->refcnt);
961 atomic64_inc(&map->usercnt);
962}
963EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref);
964
1ed4d924
MKL
965struct bpf_map *bpf_map_get(u32 ufd)
966{
967 struct fd f = fdget(ufd);
968 struct bpf_map *map;
969
970 map = __bpf_map_get(f);
971 if (IS_ERR(map))
972 return map;
973
974 bpf_map_inc(map);
975 fdput(f);
976
977 return map;
978}
979
c9da161c 980struct bpf_map *bpf_map_get_with_uref(u32 ufd)
c2101297
DB
981{
982 struct fd f = fdget(ufd);
983 struct bpf_map *map;
984
985 map = __bpf_map_get(f);
986 if (IS_ERR(map))
987 return map;
988
1e0bd5a0 989 bpf_map_inc_with_uref(map);
c2101297 990 fdput(f);
db20fd2b
AS
991
992 return map;
993}
994
bd5f5f4e 995/* map_idr_lock should have been held */
1e0bd5a0 996static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
bd5f5f4e
MKL
997{
998 int refold;
999
1e0bd5a0 1000 refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0);
bd5f5f4e
MKL
1001 if (!refold)
1002 return ERR_PTR(-ENOENT);
bd5f5f4e 1003 if (uref)
1e0bd5a0 1004 atomic64_inc(&map->usercnt);
bd5f5f4e
MKL
1005
1006 return map;
1007}
1008
1e0bd5a0 1009struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map)
b0e4701c
SF
1010{
1011 spin_lock_bh(&map_idr_lock);
1e0bd5a0 1012 map = __bpf_map_inc_not_zero(map, false);
b0e4701c
SF
1013 spin_unlock_bh(&map_idr_lock);
1014
1015 return map;
1016}
1017EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero);
1018
b8cdc051
AS
1019int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
1020{
1021 return -ENOTSUPP;
1022}
1023
c9d29f46
MV
1024static void *__bpf_copy_key(void __user *ukey, u64 key_size)
1025{
1026 if (key_size)
44779a4b 1027 return vmemdup_user(ukey, key_size);
c9d29f46
MV
1028
1029 if (ukey)
1030 return ERR_PTR(-EINVAL);
1031
1032 return NULL;
1033}
1034
af2ac3e1
AS
1035static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size)
1036{
1037 if (key_size)
44779a4b 1038 return kvmemdup_bpfptr(ukey, key_size);
af2ac3e1
AS
1039
1040 if (!bpfptr_is_null(ukey))
1041 return ERR_PTR(-EINVAL);
1042
1043 return NULL;
1044}
1045
db20fd2b 1046/* last field in 'union bpf_attr' used by this command */
96049f3a 1047#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
db20fd2b
AS
1048
1049static int map_lookup_elem(union bpf_attr *attr)
1050{
535e7b4b
MS
1051 void __user *ukey = u64_to_user_ptr(attr->key);
1052 void __user *uvalue = u64_to_user_ptr(attr->value);
db20fd2b 1053 int ufd = attr->map_fd;
db20fd2b 1054 struct bpf_map *map;
15c14a3d 1055 void *key, *value;
15a07b33 1056 u32 value_size;
592867bf 1057 struct fd f;
db20fd2b
AS
1058 int err;
1059
1060 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
1061 return -EINVAL;
1062
96049f3a
AS
1063 if (attr->flags & ~BPF_F_LOCK)
1064 return -EINVAL;
1065
592867bf 1066 f = fdget(ufd);
c2101297 1067 map = __bpf_map_get(f);
db20fd2b
AS
1068 if (IS_ERR(map))
1069 return PTR_ERR(map);
87df15de 1070 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
6e71b04a
CF
1071 err = -EPERM;
1072 goto err_put;
1073 }
1074
96049f3a
AS
1075 if ((attr->flags & BPF_F_LOCK) &&
1076 !map_value_has_spin_lock(map)) {
1077 err = -EINVAL;
1078 goto err_put;
1079 }
1080
c9d29f46 1081 key = __bpf_copy_key(ukey, map->key_size);
e4448ed8
AV
1082 if (IS_ERR(key)) {
1083 err = PTR_ERR(key);
db20fd2b 1084 goto err_put;
e4448ed8 1085 }
db20fd2b 1086
15c14a3d 1087 value_size = bpf_map_value_size(map);
15a07b33 1088
8ebe667c 1089 err = -ENOMEM;
f0dce1d9 1090 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
db20fd2b 1091 if (!value)
8ebe667c
AS
1092 goto free_key;
1093
15c14a3d 1094 err = bpf_map_copy_value(map, key, value, attr->flags);
15a07b33 1095 if (err)
8ebe667c 1096 goto free_value;
db20fd2b
AS
1097
1098 err = -EFAULT;
15a07b33 1099 if (copy_to_user(uvalue, value, value_size) != 0)
8ebe667c 1100 goto free_value;
db20fd2b
AS
1101
1102 err = 0;
1103
8ebe667c 1104free_value:
f0dce1d9 1105 kvfree(value);
db20fd2b 1106free_key:
44779a4b 1107 kvfree(key);
db20fd2b
AS
1108err_put:
1109 fdput(f);
1110 return err;
1111}
1112
1ae80cf3 1113
3274f520 1114#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
db20fd2b 1115
af2ac3e1 1116static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
db20fd2b 1117{
af2ac3e1
AS
1118 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel);
1119 bpfptr_t uvalue = make_bpfptr(attr->value, uattr.is_kernel);
db20fd2b 1120 int ufd = attr->map_fd;
db20fd2b
AS
1121 struct bpf_map *map;
1122 void *key, *value;
15a07b33 1123 u32 value_size;
592867bf 1124 struct fd f;
db20fd2b
AS
1125 int err;
1126
1127 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
1128 return -EINVAL;
1129
592867bf 1130 f = fdget(ufd);
c2101297 1131 map = __bpf_map_get(f);
db20fd2b
AS
1132 if (IS_ERR(map))
1133 return PTR_ERR(map);
e887677d 1134 bpf_map_write_active_inc(map);
87df15de 1135 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
6e71b04a
CF
1136 err = -EPERM;
1137 goto err_put;
1138 }
1139
96049f3a
AS
1140 if ((attr->flags & BPF_F_LOCK) &&
1141 !map_value_has_spin_lock(map)) {
1142 err = -EINVAL;
1143 goto err_put;
1144 }
1145
af2ac3e1 1146 key = ___bpf_copy_key(ukey, map->key_size);
e4448ed8
AV
1147 if (IS_ERR(key)) {
1148 err = PTR_ERR(key);
db20fd2b 1149 goto err_put;
e4448ed8 1150 }
db20fd2b 1151
f0dce1d9 1152 value_size = bpf_map_value_size(map);
15a07b33 1153
db20fd2b 1154 err = -ENOMEM;
f0dce1d9 1155 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
db20fd2b
AS
1156 if (!value)
1157 goto free_key;
1158
1159 err = -EFAULT;
af2ac3e1 1160 if (copy_from_bpfptr(value, uvalue, value_size) != 0)
db20fd2b
AS
1161 goto free_value;
1162
15c14a3d 1163 err = bpf_map_update_value(map, f, key, value, attr->flags);
6710e112 1164
db20fd2b 1165free_value:
f0dce1d9 1166 kvfree(value);
db20fd2b 1167free_key:
44779a4b 1168 kvfree(key);
db20fd2b 1169err_put:
e887677d 1170 bpf_map_write_active_dec(map);
db20fd2b
AS
1171 fdput(f);
1172 return err;
1173}
1174
1175#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
1176
1177static int map_delete_elem(union bpf_attr *attr)
1178{
535e7b4b 1179 void __user *ukey = u64_to_user_ptr(attr->key);
db20fd2b 1180 int ufd = attr->map_fd;
db20fd2b 1181 struct bpf_map *map;
592867bf 1182 struct fd f;
db20fd2b
AS
1183 void *key;
1184 int err;
1185
1186 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
1187 return -EINVAL;
1188
592867bf 1189 f = fdget(ufd);
c2101297 1190 map = __bpf_map_get(f);
db20fd2b
AS
1191 if (IS_ERR(map))
1192 return PTR_ERR(map);
e887677d 1193 bpf_map_write_active_inc(map);
87df15de 1194 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
6e71b04a
CF
1195 err = -EPERM;
1196 goto err_put;
1197 }
1198
c9d29f46 1199 key = __bpf_copy_key(ukey, map->key_size);
e4448ed8
AV
1200 if (IS_ERR(key)) {
1201 err = PTR_ERR(key);
db20fd2b 1202 goto err_put;
e4448ed8 1203 }
db20fd2b 1204
a3884572
JK
1205 if (bpf_map_is_dev_bound(map)) {
1206 err = bpf_map_offload_delete_elem(map, key);
1207 goto out;
85d33df3
MKL
1208 } else if (IS_FD_PROG_ARRAY(map) ||
1209 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
1210 /* These maps require sleepable context */
da765a2f
DB
1211 err = map->ops->map_delete_elem(map, key);
1212 goto out;
a3884572
JK
1213 }
1214
b6e5dae1 1215 bpf_disable_instrumentation();
db20fd2b
AS
1216 rcu_read_lock();
1217 err = map->ops->map_delete_elem(map, key);
1218 rcu_read_unlock();
b6e5dae1 1219 bpf_enable_instrumentation();
1ae80cf3 1220 maybe_wait_bpf_programs(map);
a3884572 1221out:
44779a4b 1222 kvfree(key);
db20fd2b 1223err_put:
e887677d 1224 bpf_map_write_active_dec(map);
db20fd2b
AS
1225 fdput(f);
1226 return err;
1227}
1228
1229/* last field in 'union bpf_attr' used by this command */
1230#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
1231
1232static int map_get_next_key(union bpf_attr *attr)
1233{
535e7b4b
MS
1234 void __user *ukey = u64_to_user_ptr(attr->key);
1235 void __user *unext_key = u64_to_user_ptr(attr->next_key);
db20fd2b 1236 int ufd = attr->map_fd;
db20fd2b
AS
1237 struct bpf_map *map;
1238 void *key, *next_key;
592867bf 1239 struct fd f;
db20fd2b
AS
1240 int err;
1241
1242 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
1243 return -EINVAL;
1244
592867bf 1245 f = fdget(ufd);
c2101297 1246 map = __bpf_map_get(f);
db20fd2b
AS
1247 if (IS_ERR(map))
1248 return PTR_ERR(map);
87df15de 1249 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
6e71b04a
CF
1250 err = -EPERM;
1251 goto err_put;
1252 }
1253
8fe45924 1254 if (ukey) {
c9d29f46 1255 key = __bpf_copy_key(ukey, map->key_size);
e4448ed8
AV
1256 if (IS_ERR(key)) {
1257 err = PTR_ERR(key);
8fe45924 1258 goto err_put;
e4448ed8 1259 }
8fe45924
TQ
1260 } else {
1261 key = NULL;
1262 }
db20fd2b
AS
1263
1264 err = -ENOMEM;
44779a4b 1265 next_key = kvmalloc(map->key_size, GFP_USER);
db20fd2b
AS
1266 if (!next_key)
1267 goto free_key;
1268
a3884572
JK
1269 if (bpf_map_is_dev_bound(map)) {
1270 err = bpf_map_offload_get_next_key(map, key, next_key);
1271 goto out;
1272 }
1273
db20fd2b
AS
1274 rcu_read_lock();
1275 err = map->ops->map_get_next_key(map, key, next_key);
1276 rcu_read_unlock();
a3884572 1277out:
db20fd2b
AS
1278 if (err)
1279 goto free_next_key;
1280
1281 err = -EFAULT;
1282 if (copy_to_user(unext_key, next_key, map->key_size) != 0)
1283 goto free_next_key;
1284
1285 err = 0;
1286
1287free_next_key:
44779a4b 1288 kvfree(next_key);
db20fd2b 1289free_key:
44779a4b 1290 kvfree(key);
db20fd2b
AS
1291err_put:
1292 fdput(f);
1293 return err;
1294}
1295
aa2e93b8
BV
1296int generic_map_delete_batch(struct bpf_map *map,
1297 const union bpf_attr *attr,
1298 union bpf_attr __user *uattr)
1299{
1300 void __user *keys = u64_to_user_ptr(attr->batch.keys);
1301 u32 cp, max_count;
1302 int err = 0;
1303 void *key;
1304
1305 if (attr->batch.elem_flags & ~BPF_F_LOCK)
1306 return -EINVAL;
1307
1308 if ((attr->batch.elem_flags & BPF_F_LOCK) &&
1309 !map_value_has_spin_lock(map)) {
1310 return -EINVAL;
1311 }
1312
1313 max_count = attr->batch.count;
1314 if (!max_count)
1315 return 0;
1316
44779a4b 1317 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
2e3a94aa
BV
1318 if (!key)
1319 return -ENOMEM;
1320
aa2e93b8 1321 for (cp = 0; cp < max_count; cp++) {
2e3a94aa
BV
1322 err = -EFAULT;
1323 if (copy_from_user(key, keys + cp * map->key_size,
1324 map->key_size))
aa2e93b8 1325 break;
aa2e93b8
BV
1326
1327 if (bpf_map_is_dev_bound(map)) {
1328 err = bpf_map_offload_delete_elem(map, key);
1329 break;
1330 }
1331
b6e5dae1 1332 bpf_disable_instrumentation();
aa2e93b8
BV
1333 rcu_read_lock();
1334 err = map->ops->map_delete_elem(map, key);
1335 rcu_read_unlock();
b6e5dae1 1336 bpf_enable_instrumentation();
aa2e93b8
BV
1337 maybe_wait_bpf_programs(map);
1338 if (err)
1339 break;
cb249930 1340 cond_resched();
aa2e93b8
BV
1341 }
1342 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
1343 err = -EFAULT;
2e3a94aa 1344
44779a4b 1345 kvfree(key);
aa2e93b8
BV
1346 return err;
1347}
1348
1349int generic_map_update_batch(struct bpf_map *map,
1350 const union bpf_attr *attr,
1351 union bpf_attr __user *uattr)
1352{
1353 void __user *values = u64_to_user_ptr(attr->batch.values);
1354 void __user *keys = u64_to_user_ptr(attr->batch.keys);
1355 u32 value_size, cp, max_count;
fda7a387 1356 int ufd = attr->batch.map_fd;
aa2e93b8
BV
1357 void *key, *value;
1358 struct fd f;
1359 int err = 0;
1360
aa2e93b8
BV
1361 if (attr->batch.elem_flags & ~BPF_F_LOCK)
1362 return -EINVAL;
1363
1364 if ((attr->batch.elem_flags & BPF_F_LOCK) &&
1365 !map_value_has_spin_lock(map)) {
1366 return -EINVAL;
1367 }
1368
1369 value_size = bpf_map_value_size(map);
1370
1371 max_count = attr->batch.count;
1372 if (!max_count)
1373 return 0;
1374
44779a4b 1375 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
2e3a94aa
BV
1376 if (!key)
1377 return -ENOMEM;
1378
f0dce1d9 1379 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
2e3a94aa 1380 if (!value) {
44779a4b 1381 kvfree(key);
aa2e93b8 1382 return -ENOMEM;
2e3a94aa 1383 }
aa2e93b8 1384
fda7a387 1385 f = fdget(ufd); /* bpf_map_do_batch() guarantees ufd is valid */
aa2e93b8 1386 for (cp = 0; cp < max_count; cp++) {
aa2e93b8 1387 err = -EFAULT;
2e3a94aa
BV
1388 if (copy_from_user(key, keys + cp * map->key_size,
1389 map->key_size) ||
1390 copy_from_user(value, values + cp * value_size, value_size))
aa2e93b8
BV
1391 break;
1392
1393 err = bpf_map_update_value(map, f, key, value,
1394 attr->batch.elem_flags);
1395
1396 if (err)
1397 break;
cb249930 1398 cond_resched();
aa2e93b8
BV
1399 }
1400
1401 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
1402 err = -EFAULT;
1403
f0dce1d9 1404 kvfree(value);
44779a4b 1405 kvfree(key);
fda7a387 1406 fdput(f);
aa2e93b8
BV
1407 return err;
1408}
1409
cb4d03ab
BV
1410#define MAP_LOOKUP_RETRIES 3
1411
1412int generic_map_lookup_batch(struct bpf_map *map,
1413 const union bpf_attr *attr,
1414 union bpf_attr __user *uattr)
1415{
1416 void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch);
1417 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
1418 void __user *values = u64_to_user_ptr(attr->batch.values);
1419 void __user *keys = u64_to_user_ptr(attr->batch.keys);
1420 void *buf, *buf_prevkey, *prev_key, *key, *value;
1421 int err, retry = MAP_LOOKUP_RETRIES;
1422 u32 value_size, cp, max_count;
cb4d03ab
BV
1423
1424 if (attr->batch.elem_flags & ~BPF_F_LOCK)
1425 return -EINVAL;
1426
1427 if ((attr->batch.elem_flags & BPF_F_LOCK) &&
1428 !map_value_has_spin_lock(map))
1429 return -EINVAL;
1430
1431 value_size = bpf_map_value_size(map);
1432
1433 max_count = attr->batch.count;
1434 if (!max_count)
1435 return 0;
1436
1437 if (put_user(0, &uattr->batch.count))
1438 return -EFAULT;
1439
44779a4b 1440 buf_prevkey = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
cb4d03ab
BV
1441 if (!buf_prevkey)
1442 return -ENOMEM;
1443
f0dce1d9 1444 buf = kvmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
cb4d03ab 1445 if (!buf) {
44779a4b 1446 kvfree(buf_prevkey);
cb4d03ab
BV
1447 return -ENOMEM;
1448 }
1449
1450 err = -EFAULT;
cb4d03ab
BV
1451 prev_key = NULL;
1452 if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size))
1453 goto free_buf;
1454 key = buf;
1455 value = key + map->key_size;
1456 if (ubatch)
1457 prev_key = buf_prevkey;
1458
1459 for (cp = 0; cp < max_count;) {
1460 rcu_read_lock();
1461 err = map->ops->map_get_next_key(map, prev_key, key);
1462 rcu_read_unlock();
1463 if (err)
1464 break;
1465 err = bpf_map_copy_value(map, key, value,
1466 attr->batch.elem_flags);
1467
1468 if (err == -ENOENT) {
1469 if (retry) {
1470 retry--;
1471 continue;
1472 }
1473 err = -EINTR;
1474 break;
1475 }
1476
1477 if (err)
1478 goto free_buf;
1479
1480 if (copy_to_user(keys + cp * map->key_size, key,
1481 map->key_size)) {
1482 err = -EFAULT;
1483 goto free_buf;
1484 }
1485 if (copy_to_user(values + cp * value_size, value, value_size)) {
1486 err = -EFAULT;
1487 goto free_buf;
1488 }
1489
1490 if (!prev_key)
1491 prev_key = buf_prevkey;
1492
1493 swap(prev_key, key);
1494 retry = MAP_LOOKUP_RETRIES;
1495 cp++;
cb249930 1496 cond_resched();
cb4d03ab
BV
1497 }
1498
1499 if (err == -EFAULT)
1500 goto free_buf;
1501
1502 if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) ||
1503 (cp && copy_to_user(uobatch, prev_key, map->key_size))))
1504 err = -EFAULT;
1505
1506free_buf:
44779a4b 1507 kvfree(buf_prevkey);
f0dce1d9 1508 kvfree(buf);
cb4d03ab
BV
1509 return err;
1510}
1511
3e87f192 1512#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD flags
bd513cd0
MV
1513
1514static int map_lookup_and_delete_elem(union bpf_attr *attr)
1515{
1516 void __user *ukey = u64_to_user_ptr(attr->key);
1517 void __user *uvalue = u64_to_user_ptr(attr->value);
1518 int ufd = attr->map_fd;
1519 struct bpf_map *map;
540fefc0 1520 void *key, *value;
bd513cd0
MV
1521 u32 value_size;
1522 struct fd f;
1523 int err;
1524
1525 if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
1526 return -EINVAL;
1527
3e87f192
DS
1528 if (attr->flags & ~BPF_F_LOCK)
1529 return -EINVAL;
1530
bd513cd0
MV
1531 f = fdget(ufd);
1532 map = __bpf_map_get(f);
1533 if (IS_ERR(map))
1534 return PTR_ERR(map);
e887677d 1535 bpf_map_write_active_inc(map);
1ea0f912
AP
1536 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) ||
1537 !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
bd513cd0
MV
1538 err = -EPERM;
1539 goto err_put;
1540 }
1541
3e87f192
DS
1542 if (attr->flags &&
1543 (map->map_type == BPF_MAP_TYPE_QUEUE ||
1544 map->map_type == BPF_MAP_TYPE_STACK)) {
1545 err = -EINVAL;
1546 goto err_put;
1547 }
1548
1549 if ((attr->flags & BPF_F_LOCK) &&
1550 !map_value_has_spin_lock(map)) {
1551 err = -EINVAL;
1552 goto err_put;
1553 }
1554
bd513cd0
MV
1555 key = __bpf_copy_key(ukey, map->key_size);
1556 if (IS_ERR(key)) {
1557 err = PTR_ERR(key);
1558 goto err_put;
1559 }
1560
3e87f192 1561 value_size = bpf_map_value_size(map);
bd513cd0
MV
1562
1563 err = -ENOMEM;
f0dce1d9 1564 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
bd513cd0
MV
1565 if (!value)
1566 goto free_key;
1567
3e87f192 1568 err = -ENOTSUPP;
bd513cd0
MV
1569 if (map->map_type == BPF_MAP_TYPE_QUEUE ||
1570 map->map_type == BPF_MAP_TYPE_STACK) {
1571 err = map->ops->map_pop_elem(map, value);
3e87f192
DS
1572 } else if (map->map_type == BPF_MAP_TYPE_HASH ||
1573 map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
1574 map->map_type == BPF_MAP_TYPE_LRU_HASH ||
1575 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
1576 if (!bpf_map_is_dev_bound(map)) {
1577 bpf_disable_instrumentation();
1578 rcu_read_lock();
1579 err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags);
1580 rcu_read_unlock();
1581 bpf_enable_instrumentation();
1582 }
bd513cd0
MV
1583 }
1584
1585 if (err)
1586 goto free_value;
1587
7f645462
WY
1588 if (copy_to_user(uvalue, value, value_size) != 0) {
1589 err = -EFAULT;
bd513cd0 1590 goto free_value;
7f645462 1591 }
bd513cd0
MV
1592
1593 err = 0;
1594
1595free_value:
f0dce1d9 1596 kvfree(value);
bd513cd0 1597free_key:
44779a4b 1598 kvfree(key);
bd513cd0 1599err_put:
e887677d 1600 bpf_map_write_active_dec(map);
bd513cd0
MV
1601 fdput(f);
1602 return err;
1603}
1604
87df15de
DB
1605#define BPF_MAP_FREEZE_LAST_FIELD map_fd
1606
1607static int map_freeze(const union bpf_attr *attr)
1608{
1609 int err = 0, ufd = attr->map_fd;
1610 struct bpf_map *map;
1611 struct fd f;
1612
1613 if (CHECK_ATTR(BPF_MAP_FREEZE))
1614 return -EINVAL;
1615
1616 f = fdget(ufd);
1617 map = __bpf_map_get(f);
1618 if (IS_ERR(map))
1619 return PTR_ERR(map);
fc970227 1620
68134668
AS
1621 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS ||
1622 map_value_has_timer(map)) {
849b4d94
MKL
1623 fdput(f);
1624 return -ENOTSUPP;
1625 }
1626
fc970227 1627 mutex_lock(&map->freeze_mutex);
e887677d 1628 if (bpf_map_write_active(map)) {
fc970227
AN
1629 err = -EBUSY;
1630 goto err_put;
1631 }
87df15de
DB
1632 if (READ_ONCE(map->frozen)) {
1633 err = -EBUSY;
1634 goto err_put;
1635 }
2c78ee89 1636 if (!bpf_capable()) {
87df15de
DB
1637 err = -EPERM;
1638 goto err_put;
1639 }
1640
1641 WRITE_ONCE(map->frozen, true);
1642err_put:
fc970227 1643 mutex_unlock(&map->freeze_mutex);
87df15de
DB
1644 fdput(f);
1645 return err;
1646}
1647
7de16e3a 1648static const struct bpf_prog_ops * const bpf_prog_types[] = {
91cc1a99 1649#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
7de16e3a
JK
1650 [_id] = & _name ## _prog_ops,
1651#define BPF_MAP_TYPE(_id, _ops)
f2e10bff 1652#define BPF_LINK_TYPE(_id, _name)
7de16e3a
JK
1653#include <linux/bpf_types.h>
1654#undef BPF_PROG_TYPE
1655#undef BPF_MAP_TYPE
f2e10bff 1656#undef BPF_LINK_TYPE
7de16e3a
JK
1657};
1658
09756af4
AS
1659static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
1660{
d0f1a451
DB
1661 const struct bpf_prog_ops *ops;
1662
1663 if (type >= ARRAY_SIZE(bpf_prog_types))
1664 return -EINVAL;
1665 type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types));
1666 ops = bpf_prog_types[type];
1667 if (!ops)
be9370a7 1668 return -EINVAL;
09756af4 1669
ab3f0063 1670 if (!bpf_prog_is_dev_bound(prog->aux))
d0f1a451 1671 prog->aux->ops = ops;
ab3f0063
JK
1672 else
1673 prog->aux->ops = &bpf_offload_prog_ops;
be9370a7
JB
1674 prog->type = type;
1675 return 0;
09756af4
AS
1676}
1677
bae141f5
DB
1678enum bpf_audit {
1679 BPF_AUDIT_LOAD,
1680 BPF_AUDIT_UNLOAD,
1681 BPF_AUDIT_MAX,
1682};
1683
1684static const char * const bpf_audit_str[BPF_AUDIT_MAX] = {
1685 [BPF_AUDIT_LOAD] = "LOAD",
1686 [BPF_AUDIT_UNLOAD] = "UNLOAD",
1687};
1688
1689static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op)
1690{
1691 struct audit_context *ctx = NULL;
1692 struct audit_buffer *ab;
1693
1694 if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX))
1695 return;
1696 if (audit_enabled == AUDIT_OFF)
1697 return;
1698 if (op == BPF_AUDIT_LOAD)
1699 ctx = audit_context();
1700 ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF);
1701 if (unlikely(!ab))
1702 return;
1703 audit_log_format(ab, "prog-id=%u op=%s",
1704 prog->aux->id, bpf_audit_str[op]);
1705 audit_log_end(ab);
1706}
1707
dc4bb0e2
MKL
1708static int bpf_prog_alloc_id(struct bpf_prog *prog)
1709{
1710 int id;
1711
b76354cd 1712 idr_preload(GFP_KERNEL);
dc4bb0e2
MKL
1713 spin_lock_bh(&prog_idr_lock);
1714 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC);
1715 if (id > 0)
1716 prog->aux->id = id;
1717 spin_unlock_bh(&prog_idr_lock);
b76354cd 1718 idr_preload_end();
dc4bb0e2
MKL
1719
1720 /* id is in [1, INT_MAX) */
1721 if (WARN_ON_ONCE(!id))
1722 return -ENOSPC;
1723
1724 return id > 0 ? 0 : id;
1725}
1726
ad8ad79f 1727void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
dc4bb0e2 1728{
d809e134
AS
1729 unsigned long flags;
1730
ad8ad79f
JK
1731 /* cBPF to eBPF migrations are currently not in the idr store.
1732 * Offloaded programs are removed from the store when their device
1733 * disappears - even if someone grabs an fd to them they are unusable,
1734 * simply waiting for refcnt to drop to be freed.
1735 */
dc4bb0e2
MKL
1736 if (!prog->aux->id)
1737 return;
1738
b16d9aa4 1739 if (do_idr_lock)
d809e134 1740 spin_lock_irqsave(&prog_idr_lock, flags);
b16d9aa4
MKL
1741 else
1742 __acquire(&prog_idr_lock);
1743
dc4bb0e2 1744 idr_remove(&prog_idr, prog->aux->id);
ad8ad79f 1745 prog->aux->id = 0;
b16d9aa4
MKL
1746
1747 if (do_idr_lock)
d809e134 1748 spin_unlock_irqrestore(&prog_idr_lock, flags);
b16d9aa4
MKL
1749 else
1750 __release(&prog_idr_lock);
dc4bb0e2
MKL
1751}
1752
1aacde3d 1753static void __bpf_prog_put_rcu(struct rcu_head *rcu)
abf2e7d6
AS
1754{
1755 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
1756
3b4d9eb2 1757 kvfree(aux->func_info);
8c1b6e69 1758 kfree(aux->func_info_aux);
3ac1f01b 1759 free_uid(aux->user);
afdb09c7 1760 security_bpf_prog_free(aux);
abf2e7d6
AS
1761 bpf_prog_free(aux->prog);
1762}
1763
cd7455f1
DB
1764static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
1765{
1766 bpf_prog_kallsyms_del_all(prog);
1767 btf_put(prog->aux->btf);
e16301fb
MKL
1768 kvfree(prog->aux->jited_linfo);
1769 kvfree(prog->aux->linfo);
e6ac2450 1770 kfree(prog->aux->kfunc_tab);
22dc4a0f
AN
1771 if (prog->aux->attach_btf)
1772 btf_put(prog->aux->attach_btf);
cd7455f1 1773
1e6c62a8
AS
1774 if (deferred) {
1775 if (prog->aux->sleepable)
1776 call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu);
1777 else
1778 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
1779 } else {
cd7455f1 1780 __bpf_prog_put_rcu(&prog->aux->rcu);
1e6c62a8 1781 }
cd7455f1
DB
1782}
1783
d809e134
AS
1784static void bpf_prog_put_deferred(struct work_struct *work)
1785{
1786 struct bpf_prog_aux *aux;
1787 struct bpf_prog *prog;
1788
1789 aux = container_of(work, struct bpf_prog_aux, work);
1790 prog = aux->prog;
1791 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
1792 bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
1793 __bpf_prog_put_noref(prog, true);
1794}
1795
b16d9aa4 1796static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
09756af4 1797{
d809e134
AS
1798 struct bpf_prog_aux *aux = prog->aux;
1799
1800 if (atomic64_dec_and_test(&aux->refcnt)) {
34ad5580 1801 /* bpf_prog_free_id() must be called first */
b16d9aa4 1802 bpf_prog_free_id(prog, do_idr_lock);
d809e134
AS
1803
1804 if (in_irq() || irqs_disabled()) {
1805 INIT_WORK(&aux->work, bpf_prog_put_deferred);
1806 schedule_work(&aux->work);
1807 } else {
1808 bpf_prog_put_deferred(&aux->work);
1809 }
a67edbf4 1810 }
09756af4 1811}
b16d9aa4
MKL
1812
1813void bpf_prog_put(struct bpf_prog *prog)
1814{
1815 __bpf_prog_put(prog, true);
1816}
e2e9b654 1817EXPORT_SYMBOL_GPL(bpf_prog_put);
09756af4
AS
1818
1819static int bpf_prog_release(struct inode *inode, struct file *filp)
1820{
1821 struct bpf_prog *prog = filp->private_data;
1822
1aacde3d 1823 bpf_prog_put(prog);
09756af4
AS
1824 return 0;
1825}
1826
492ecee8
AS
1827static void bpf_prog_get_stats(const struct bpf_prog *prog,
1828 struct bpf_prog_stats *stats)
1829{
9ed9e9ba 1830 u64 nsecs = 0, cnt = 0, misses = 0;
492ecee8
AS
1831 int cpu;
1832
1833 for_each_possible_cpu(cpu) {
1834 const struct bpf_prog_stats *st;
1835 unsigned int start;
9ed9e9ba 1836 u64 tnsecs, tcnt, tmisses;
492ecee8 1837
700d4796 1838 st = per_cpu_ptr(prog->stats, cpu);
492ecee8
AS
1839 do {
1840 start = u64_stats_fetch_begin_irq(&st->syncp);
1841 tnsecs = st->nsecs;
1842 tcnt = st->cnt;
9ed9e9ba 1843 tmisses = st->misses;
492ecee8
AS
1844 } while (u64_stats_fetch_retry_irq(&st->syncp, start));
1845 nsecs += tnsecs;
1846 cnt += tcnt;
9ed9e9ba 1847 misses += tmisses;
492ecee8
AS
1848 }
1849 stats->nsecs = nsecs;
1850 stats->cnt = cnt;
9ed9e9ba 1851 stats->misses = misses;
492ecee8
AS
1852}
1853
7bd509e3
DB
1854#ifdef CONFIG_PROC_FS
1855static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
1856{
1857 const struct bpf_prog *prog = filp->private_data;
f1f7714e 1858 char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
492ecee8 1859 struct bpf_prog_stats stats;
7bd509e3 1860
492ecee8 1861 bpf_prog_get_stats(prog, &stats);
f1f7714e 1862 bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
7bd509e3
DB
1863 seq_printf(m,
1864 "prog_type:\t%u\n"
1865 "prog_jited:\t%u\n"
f1f7714e 1866 "prog_tag:\t%s\n"
4316b409 1867 "memlock:\t%llu\n"
492ecee8
AS
1868 "prog_id:\t%u\n"
1869 "run_time_ns:\t%llu\n"
9ed9e9ba
AS
1870 "run_cnt:\t%llu\n"
1871 "recursion_misses:\t%llu\n",
7bd509e3
DB
1872 prog->type,
1873 prog->jited,
f1f7714e 1874 prog_tag,
4316b409 1875 prog->pages * 1ULL << PAGE_SHIFT,
492ecee8
AS
1876 prog->aux->id,
1877 stats.nsecs,
9ed9e9ba
AS
1878 stats.cnt,
1879 stats.misses);
7bd509e3
DB
1880}
1881#endif
1882
f66e448c 1883const struct file_operations bpf_prog_fops = {
7bd509e3
DB
1884#ifdef CONFIG_PROC_FS
1885 .show_fdinfo = bpf_prog_show_fdinfo,
1886#endif
1887 .release = bpf_prog_release,
6e71b04a
CF
1888 .read = bpf_dummy_read,
1889 .write = bpf_dummy_write,
09756af4
AS
1890};
1891
b2197755 1892int bpf_prog_new_fd(struct bpf_prog *prog)
aa79781b 1893{
afdb09c7
CF
1894 int ret;
1895
1896 ret = security_bpf_prog(prog);
1897 if (ret < 0)
1898 return ret;
1899
aa79781b
DB
1900 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
1901 O_RDWR | O_CLOEXEC);
1902}
1903
113214be 1904static struct bpf_prog *____bpf_prog_get(struct fd f)
09756af4 1905{
09756af4
AS
1906 if (!f.file)
1907 return ERR_PTR(-EBADF);
09756af4
AS
1908 if (f.file->f_op != &bpf_prog_fops) {
1909 fdput(f);
1910 return ERR_PTR(-EINVAL);
1911 }
1912
c2101297 1913 return f.file->private_data;
09756af4
AS
1914}
1915
85192dbf 1916void bpf_prog_add(struct bpf_prog *prog, int i)
92117d84 1917{
85192dbf 1918 atomic64_add(i, &prog->aux->refcnt);
92117d84 1919}
59d3656d
BB
1920EXPORT_SYMBOL_GPL(bpf_prog_add);
1921
c540594f
DB
1922void bpf_prog_sub(struct bpf_prog *prog, int i)
1923{
1924 /* Only to be used for undoing previous bpf_prog_add() in some
1925 * error path. We still know that another entity in our call
1926 * path holds a reference to the program, thus atomic_sub() can
1927 * be safely used in such cases!
1928 */
85192dbf 1929 WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0);
c540594f
DB
1930}
1931EXPORT_SYMBOL_GPL(bpf_prog_sub);
1932
85192dbf 1933void bpf_prog_inc(struct bpf_prog *prog)
59d3656d 1934{
85192dbf 1935 atomic64_inc(&prog->aux->refcnt);
59d3656d 1936}
97bc402d 1937EXPORT_SYMBOL_GPL(bpf_prog_inc);
92117d84 1938
b16d9aa4 1939/* prog_idr_lock should have been held */
a6f6df69 1940struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
b16d9aa4
MKL
1941{
1942 int refold;
1943
85192dbf 1944 refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0);
b16d9aa4
MKL
1945
1946 if (!refold)
1947 return ERR_PTR(-ENOENT);
1948
1949 return prog;
1950}
a6f6df69 1951EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
b16d9aa4 1952
040ee692 1953bool bpf_prog_get_ok(struct bpf_prog *prog,
288b3de5 1954 enum bpf_prog_type *attach_type, bool attach_drv)
248f346f 1955{
288b3de5
JK
1956 /* not an attachment, just a refcount inc, always allow */
1957 if (!attach_type)
1958 return true;
248f346f
JK
1959
1960 if (prog->type != *attach_type)
1961 return false;
288b3de5 1962 if (bpf_prog_is_dev_bound(prog->aux) && !attach_drv)
248f346f
JK
1963 return false;
1964
1965 return true;
1966}
1967
1968static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type,
288b3de5 1969 bool attach_drv)
09756af4
AS
1970{
1971 struct fd f = fdget(ufd);
1972 struct bpf_prog *prog;
1973
113214be 1974 prog = ____bpf_prog_get(f);
09756af4
AS
1975 if (IS_ERR(prog))
1976 return prog;
288b3de5 1977 if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) {
113214be
DB
1978 prog = ERR_PTR(-EINVAL);
1979 goto out;
1980 }
09756af4 1981
85192dbf 1982 bpf_prog_inc(prog);
113214be 1983out:
09756af4
AS
1984 fdput(f);
1985 return prog;
1986}
113214be
DB
1987
1988struct bpf_prog *bpf_prog_get(u32 ufd)
1989{
288b3de5 1990 return __bpf_prog_get(ufd, NULL, false);
113214be
DB
1991}
1992
248f346f 1993struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
288b3de5 1994 bool attach_drv)
248f346f 1995{
4d220ed0 1996 return __bpf_prog_get(ufd, &type, attach_drv);
248f346f 1997}
6c8dfe21 1998EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
248f346f 1999
aac3fc32
AI
2000/* Initially all BPF programs could be loaded w/o specifying
2001 * expected_attach_type. Later for some of them specifying expected_attach_type
2002 * at load time became required so that program could be validated properly.
2003 * Programs of types that are allowed to be loaded both w/ and w/o (for
2004 * backward compatibility) expected_attach_type, should have the default attach
2005 * type assigned to expected_attach_type for the latter case, so that it can be
2006 * validated later at attach time.
2007 *
2008 * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if
2009 * prog type requires it but has some attach types that have to be backward
2010 * compatible.
2011 */
2012static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
2013{
2014 switch (attr->prog_type) {
2015 case BPF_PROG_TYPE_CGROUP_SOCK:
2016 /* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't
2017 * exist so checking for non-zero is the way to go here.
2018 */
2019 if (!attr->expected_attach_type)
2020 attr->expected_attach_type =
2021 BPF_CGROUP_INET_SOCK_CREATE;
2022 break;
d5e4ddae
KI
2023 case BPF_PROG_TYPE_SK_REUSEPORT:
2024 if (!attr->expected_attach_type)
2025 attr->expected_attach_type =
2026 BPF_SK_REUSEPORT_SELECT;
2027 break;
aac3fc32
AI
2028 }
2029}
2030
5e43f899 2031static int
ccfe29eb
AS
2032bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
2033 enum bpf_attach_type expected_attach_type,
290248a5
AN
2034 struct btf *attach_btf, u32 btf_id,
2035 struct bpf_prog *dst_prog)
5e43f899 2036{
27ae7997 2037 if (btf_id) {
c108e3c1
AS
2038 if (btf_id > BTF_MAX_TYPE)
2039 return -EINVAL;
27ae7997 2040
290248a5
AN
2041 if (!attach_btf && !dst_prog)
2042 return -EINVAL;
2043
27ae7997
MKL
2044 switch (prog_type) {
2045 case BPF_PROG_TYPE_TRACING:
9e4e01df 2046 case BPF_PROG_TYPE_LSM:
27ae7997 2047 case BPF_PROG_TYPE_STRUCT_OPS:
be8704ff 2048 case BPF_PROG_TYPE_EXT:
27ae7997
MKL
2049 break;
2050 default:
c108e3c1 2051 return -EINVAL;
27ae7997 2052 }
c108e3c1
AS
2053 }
2054
290248a5
AN
2055 if (attach_btf && (!btf_id || dst_prog))
2056 return -EINVAL;
2057
2058 if (dst_prog && prog_type != BPF_PROG_TYPE_TRACING &&
be8704ff 2059 prog_type != BPF_PROG_TYPE_EXT)
27ae7997
MKL
2060 return -EINVAL;
2061
4fbac77d 2062 switch (prog_type) {
aac3fc32
AI
2063 case BPF_PROG_TYPE_CGROUP_SOCK:
2064 switch (expected_attach_type) {
2065 case BPF_CGROUP_INET_SOCK_CREATE:
f5836749 2066 case BPF_CGROUP_INET_SOCK_RELEASE:
aac3fc32
AI
2067 case BPF_CGROUP_INET4_POST_BIND:
2068 case BPF_CGROUP_INET6_POST_BIND:
2069 return 0;
2070 default:
2071 return -EINVAL;
2072 }
4fbac77d
AI
2073 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
2074 switch (expected_attach_type) {
2075 case BPF_CGROUP_INET4_BIND:
2076 case BPF_CGROUP_INET6_BIND:
d74bad4e
AI
2077 case BPF_CGROUP_INET4_CONNECT:
2078 case BPF_CGROUP_INET6_CONNECT:
1b66d253
DB
2079 case BPF_CGROUP_INET4_GETPEERNAME:
2080 case BPF_CGROUP_INET6_GETPEERNAME:
2081 case BPF_CGROUP_INET4_GETSOCKNAME:
2082 case BPF_CGROUP_INET6_GETSOCKNAME:
1cedee13
AI
2083 case BPF_CGROUP_UDP4_SENDMSG:
2084 case BPF_CGROUP_UDP6_SENDMSG:
983695fa
DB
2085 case BPF_CGROUP_UDP4_RECVMSG:
2086 case BPF_CGROUP_UDP6_RECVMSG:
4fbac77d
AI
2087 return 0;
2088 default:
2089 return -EINVAL;
2090 }
5cf1e914 2091 case BPF_PROG_TYPE_CGROUP_SKB:
2092 switch (expected_attach_type) {
2093 case BPF_CGROUP_INET_INGRESS:
2094 case BPF_CGROUP_INET_EGRESS:
2095 return 0;
2096 default:
2097 return -EINVAL;
2098 }
0d01da6a
SF
2099 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2100 switch (expected_attach_type) {
2101 case BPF_CGROUP_SETSOCKOPT:
2102 case BPF_CGROUP_GETSOCKOPT:
2103 return 0;
2104 default:
2105 return -EINVAL;
2106 }
e9ddbb77
JS
2107 case BPF_PROG_TYPE_SK_LOOKUP:
2108 if (expected_attach_type == BPF_SK_LOOKUP)
2109 return 0;
2110 return -EINVAL;
d5e4ddae
KI
2111 case BPF_PROG_TYPE_SK_REUSEPORT:
2112 switch (expected_attach_type) {
2113 case BPF_SK_REUSEPORT_SELECT:
2114 case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE:
2115 return 0;
2116 default:
2117 return -EINVAL;
2118 }
79a7f8bd 2119 case BPF_PROG_TYPE_SYSCALL:
be8704ff
AS
2120 case BPF_PROG_TYPE_EXT:
2121 if (expected_attach_type)
2122 return -EINVAL;
df561f66 2123 fallthrough;
4fbac77d
AI
2124 default:
2125 return 0;
2126 }
5e43f899
AI
2127}
2128
2c78ee89
AS
2129static bool is_net_admin_prog_type(enum bpf_prog_type prog_type)
2130{
2131 switch (prog_type) {
2132 case BPF_PROG_TYPE_SCHED_CLS:
2133 case BPF_PROG_TYPE_SCHED_ACT:
2134 case BPF_PROG_TYPE_XDP:
2135 case BPF_PROG_TYPE_LWT_IN:
2136 case BPF_PROG_TYPE_LWT_OUT:
2137 case BPF_PROG_TYPE_LWT_XMIT:
2138 case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2139 case BPF_PROG_TYPE_SK_SKB:
2140 case BPF_PROG_TYPE_SK_MSG:
2141 case BPF_PROG_TYPE_LIRC_MODE2:
2142 case BPF_PROG_TYPE_FLOW_DISSECTOR:
2143 case BPF_PROG_TYPE_CGROUP_DEVICE:
2144 case BPF_PROG_TYPE_CGROUP_SOCK:
2145 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
2146 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2147 case BPF_PROG_TYPE_CGROUP_SYSCTL:
2148 case BPF_PROG_TYPE_SOCK_OPS:
2149 case BPF_PROG_TYPE_EXT: /* extends any prog */
2150 return true;
2151 case BPF_PROG_TYPE_CGROUP_SKB:
2152 /* always unpriv */
2153 case BPF_PROG_TYPE_SK_REUSEPORT:
2154 /* equivalent to SOCKET_FILTER. need CAP_BPF only */
2155 default:
2156 return false;
2157 }
2158}
2159
2160static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
2161{
2162 switch (prog_type) {
2163 case BPF_PROG_TYPE_KPROBE:
2164 case BPF_PROG_TYPE_TRACEPOINT:
2165 case BPF_PROG_TYPE_PERF_EVENT:
2166 case BPF_PROG_TYPE_RAW_TRACEPOINT:
2167 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
2168 case BPF_PROG_TYPE_TRACING:
2169 case BPF_PROG_TYPE_LSM:
2170 case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */
2171 case BPF_PROG_TYPE_EXT: /* extends any prog */
2172 return true;
2173 default:
2174 return false;
2175 }
2176}
2177
09756af4 2178/* last field in 'union bpf_attr' used by this command */
387544bf 2179#define BPF_PROG_LOAD_LAST_FIELD fd_array
09756af4 2180
af2ac3e1 2181static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
09756af4
AS
2182{
2183 enum bpf_prog_type type = attr->prog_type;
290248a5
AN
2184 struct bpf_prog *prog, *dst_prog = NULL;
2185 struct btf *attach_btf = NULL;
09756af4
AS
2186 int err;
2187 char license[128];
2188 bool is_gpl;
2189
2190 if (CHECK_ATTR(BPF_PROG_LOAD))
2191 return -EINVAL;
2192
c240eff6
JW
2193 if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT |
2194 BPF_F_ANY_ALIGNMENT |
10d274e8 2195 BPF_F_TEST_STATE_FREQ |
1e6c62a8 2196 BPF_F_SLEEPABLE |
c240eff6 2197 BPF_F_TEST_RND_HI32))
e07b98d9
DM
2198 return -EINVAL;
2199
e9ee9efc
DM
2200 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
2201 (attr->prog_flags & BPF_F_ANY_ALIGNMENT) &&
2c78ee89 2202 !bpf_capable())
e9ee9efc
DM
2203 return -EPERM;
2204
09756af4 2205 /* copy eBPF program license from user space */
af2ac3e1
AS
2206 if (strncpy_from_bpfptr(license,
2207 make_bpfptr(attr->license, uattr.is_kernel),
2208 sizeof(license) - 1) < 0)
09756af4
AS
2209 return -EFAULT;
2210 license[sizeof(license) - 1] = 0;
2211
2212 /* eBPF programs must be GPL compatible to use GPL-ed functions */
2213 is_gpl = license_is_gpl_compatible(license);
2214
c04c0d2b 2215 if (attr->insn_cnt == 0 ||
2c78ee89 2216 attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
ef0915ca 2217 return -E2BIG;
80b7d819
CF
2218 if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
2219 type != BPF_PROG_TYPE_CGROUP_SKB &&
2c78ee89
AS
2220 !bpf_capable())
2221 return -EPERM;
2222
b338cb92 2223 if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN))
2c78ee89
AS
2224 return -EPERM;
2225 if (is_perfmon_prog_type(type) && !perfmon_capable())
1be7f75d
AS
2226 return -EPERM;
2227
290248a5
AN
2228 /* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog
2229 * or btf, we need to check which one it is
2230 */
2231 if (attr->attach_prog_fd) {
2232 dst_prog = bpf_prog_get(attr->attach_prog_fd);
2233 if (IS_ERR(dst_prog)) {
2234 dst_prog = NULL;
2235 attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd);
2236 if (IS_ERR(attach_btf))
2237 return -EINVAL;
2238 if (!btf_is_kernel(attach_btf)) {
8bdd8e27
AN
2239 /* attaching through specifying bpf_prog's BTF
2240 * objects directly might be supported eventually
2241 */
290248a5 2242 btf_put(attach_btf);
8bdd8e27 2243 return -ENOTSUPP;
290248a5
AN
2244 }
2245 }
2246 } else if (attr->attach_btf_id) {
2247 /* fall back to vmlinux BTF, if BTF type ID is specified */
2248 attach_btf = bpf_get_btf_vmlinux();
2249 if (IS_ERR(attach_btf))
2250 return PTR_ERR(attach_btf);
2251 if (!attach_btf)
2252 return -EINVAL;
2253 btf_get(attach_btf);
2254 }
2255
aac3fc32 2256 bpf_prog_load_fixup_attach_type(attr);
ccfe29eb 2257 if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
290248a5
AN
2258 attach_btf, attr->attach_btf_id,
2259 dst_prog)) {
2260 if (dst_prog)
2261 bpf_prog_put(dst_prog);
2262 if (attach_btf)
2263 btf_put(attach_btf);
5e43f899 2264 return -EINVAL;
290248a5 2265 }
5e43f899 2266
09756af4
AS
2267 /* plain bpf_prog allocation */
2268 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
290248a5
AN
2269 if (!prog) {
2270 if (dst_prog)
2271 bpf_prog_put(dst_prog);
2272 if (attach_btf)
2273 btf_put(attach_btf);
09756af4 2274 return -ENOMEM;
290248a5 2275 }
09756af4 2276
5e43f899 2277 prog->expected_attach_type = attr->expected_attach_type;
290248a5 2278 prog->aux->attach_btf = attach_btf;
ccfe29eb 2279 prog->aux->attach_btf_id = attr->attach_btf_id;
290248a5 2280 prog->aux->dst_prog = dst_prog;
9a18eedb 2281 prog->aux->offload_requested = !!attr->prog_ifindex;
1e6c62a8 2282 prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
9a18eedb 2283
afdb09c7 2284 err = security_bpf_prog_alloc(prog->aux);
aaac3ba9 2285 if (err)
3ac1f01b 2286 goto free_prog;
afdb09c7 2287
3ac1f01b 2288 prog->aux->user = get_current_user();
09756af4
AS
2289 prog->len = attr->insn_cnt;
2290
2291 err = -EFAULT;
af2ac3e1
AS
2292 if (copy_from_bpfptr(prog->insns,
2293 make_bpfptr(attr->insns, uattr.is_kernel),
2294 bpf_prog_insn_size(prog)) != 0)
3ac1f01b 2295 goto free_prog_sec;
09756af4
AS
2296
2297 prog->orig_prog = NULL;
a91263d5 2298 prog->jited = 0;
09756af4 2299
85192dbf 2300 atomic64_set(&prog->aux->refcnt, 1);
a91263d5 2301 prog->gpl_compatible = is_gpl ? 1 : 0;
09756af4 2302
9a18eedb 2303 if (bpf_prog_is_dev_bound(prog->aux)) {
ab3f0063
JK
2304 err = bpf_prog_offload_init(prog, attr);
2305 if (err)
3ac1f01b 2306 goto free_prog_sec;
ab3f0063
JK
2307 }
2308
09756af4
AS
2309 /* find program type: socket_filter vs tracing_filter */
2310 err = find_prog_type(type, prog);
2311 if (err < 0)
3ac1f01b 2312 goto free_prog_sec;
09756af4 2313
9285ec4c 2314 prog->aux->load_time = ktime_get_boottime_ns();
8e7ae251
MKL
2315 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name,
2316 sizeof(attr->prog_name));
2317 if (err < 0)
3ac1f01b 2318 goto free_prog_sec;
cb4d2b3f 2319
09756af4 2320 /* run eBPF verifier */
838e9690 2321 err = bpf_check(&prog, attr, uattr);
09756af4
AS
2322 if (err < 0)
2323 goto free_used_maps;
2324
9facc336 2325 prog = bpf_prog_select_runtime(prog, &err);
04fd61ab
AS
2326 if (err < 0)
2327 goto free_used_maps;
09756af4 2328
dc4bb0e2
MKL
2329 err = bpf_prog_alloc_id(prog);
2330 if (err)
2331 goto free_used_maps;
2332
c751798a
DB
2333 /* Upon success of bpf_prog_alloc_id(), the BPF prog is
2334 * effectively publicly exposed. However, retrieving via
2335 * bpf_prog_get_fd_by_id() will take another reference,
2336 * therefore it cannot be gone underneath us.
2337 *
2338 * Only for the time /after/ successful bpf_prog_new_fd()
2339 * and before returning to userspace, we might just hold
2340 * one reference and any parallel close on that fd could
2341 * rip everything out. Hence, below notifications must
2342 * happen before bpf_prog_new_fd().
2343 *
2344 * Also, any failure handling from this point onwards must
2345 * be using bpf_prog_put() given the program is exposed.
2346 */
74451e66 2347 bpf_prog_kallsyms_add(prog);
6ee52e2a 2348 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
bae141f5 2349 bpf_audit_prog(prog, BPF_AUDIT_LOAD);
c751798a
DB
2350
2351 err = bpf_prog_new_fd(prog);
2352 if (err < 0)
2353 bpf_prog_put(prog);
09756af4
AS
2354 return err;
2355
2356free_used_maps:
cd7455f1
DB
2357 /* In case we have subprogs, we need to wait for a grace
2358 * period before we can tear down JIT memory since symbols
2359 * are already exposed under kallsyms.
2360 */
2361 __bpf_prog_put_noref(prog, prog->aux->func_cnt);
2362 return err;
afdb09c7 2363free_prog_sec:
3ac1f01b 2364 free_uid(prog->aux->user);
afdb09c7 2365 security_bpf_prog_free(prog->aux);
3ac1f01b 2366free_prog:
22dc4a0f
AN
2367 if (prog->aux->attach_btf)
2368 btf_put(prog->aux->attach_btf);
09756af4
AS
2369 bpf_prog_free(prog);
2370 return err;
2371}
2372
6e71b04a 2373#define BPF_OBJ_LAST_FIELD file_flags
b2197755
DB
2374
2375static int bpf_obj_pin(const union bpf_attr *attr)
2376{
6e71b04a 2377 if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0)
b2197755
DB
2378 return -EINVAL;
2379
535e7b4b 2380 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
b2197755
DB
2381}
2382
2383static int bpf_obj_get(const union bpf_attr *attr)
2384{
6e71b04a
CF
2385 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 ||
2386 attr->file_flags & ~BPF_OBJ_FLAG_MASK)
b2197755
DB
2387 return -EINVAL;
2388
6e71b04a
CF
2389 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname),
2390 attr->file_flags);
b2197755
DB
2391}
2392
f2e10bff 2393void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
a3b80e10 2394 const struct bpf_link_ops *ops, struct bpf_prog *prog)
fec56f58 2395{
70ed506c 2396 atomic64_set(&link->refcnt, 1);
f2e10bff 2397 link->type = type;
a3b80e10 2398 link->id = 0;
70ed506c
AN
2399 link->ops = ops;
2400 link->prog = prog;
2401}
2402
a3b80e10
AN
2403static void bpf_link_free_id(int id)
2404{
2405 if (!id)
2406 return;
2407
2408 spin_lock_bh(&link_idr_lock);
2409 idr_remove(&link_idr, id);
2410 spin_unlock_bh(&link_idr_lock);
2411}
2412
98868668
AN
2413/* Clean up bpf_link and corresponding anon_inode file and FD. After
2414 * anon_inode is created, bpf_link can't be just kfree()'d due to deferred
a3b80e10
AN
2415 * anon_inode's release() call. This helper marksbpf_link as
2416 * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt
2417 * is not decremented, it's the responsibility of a calling code that failed
2418 * to complete bpf_link initialization.
98868668 2419 */
a3b80e10 2420void bpf_link_cleanup(struct bpf_link_primer *primer)
babf3164 2421{
a3b80e10
AN
2422 primer->link->prog = NULL;
2423 bpf_link_free_id(primer->id);
2424 fput(primer->file);
2425 put_unused_fd(primer->fd);
babf3164
AN
2426}
2427
70ed506c
AN
2428void bpf_link_inc(struct bpf_link *link)
2429{
2430 atomic64_inc(&link->refcnt);
2431}
2432
2433/* bpf_link_free is guaranteed to be called from process context */
2434static void bpf_link_free(struct bpf_link *link)
2435{
a3b80e10 2436 bpf_link_free_id(link->id);
babf3164
AN
2437 if (link->prog) {
2438 /* detach BPF program, clean up used resources */
2439 link->ops->release(link);
2440 bpf_prog_put(link->prog);
2441 }
2442 /* free bpf_link and its containing memory */
2443 link->ops->dealloc(link);
70ed506c
AN
2444}
2445
2446static void bpf_link_put_deferred(struct work_struct *work)
2447{
2448 struct bpf_link *link = container_of(work, struct bpf_link, work);
2449
2450 bpf_link_free(link);
2451}
2452
2453/* bpf_link_put can be called from atomic context, but ensures that resources
2454 * are freed from process context
2455 */
2456void bpf_link_put(struct bpf_link *link)
2457{
2458 if (!atomic64_dec_and_test(&link->refcnt))
2459 return;
2460
f00f2f7f
AS
2461 if (in_atomic()) {
2462 INIT_WORK(&link->work, bpf_link_put_deferred);
2463 schedule_work(&link->work);
2464 } else {
2465 bpf_link_free(link);
2466 }
70ed506c
AN
2467}
2468
2469static int bpf_link_release(struct inode *inode, struct file *filp)
2470{
2471 struct bpf_link *link = filp->private_data;
2472
2473 bpf_link_put(link);
fec56f58
AS
2474 return 0;
2475}
2476
70ed506c 2477#ifdef CONFIG_PROC_FS
f2e10bff
AN
2478#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
2479#define BPF_MAP_TYPE(_id, _ops)
2480#define BPF_LINK_TYPE(_id, _name) [_id] = #_name,
2481static const char *bpf_link_type_strs[] = {
2482 [BPF_LINK_TYPE_UNSPEC] = "<invalid>",
2483#include <linux/bpf_types.h>
2484};
2485#undef BPF_PROG_TYPE
2486#undef BPF_MAP_TYPE
2487#undef BPF_LINK_TYPE
70ed506c
AN
2488
2489static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
2490{
2491 const struct bpf_link *link = filp->private_data;
2492 const struct bpf_prog *prog = link->prog;
2493 char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
70ed506c
AN
2494
2495 bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
2496 seq_printf(m,
2497 "link_type:\t%s\n"
a3b80e10 2498 "link_id:\t%u\n"
70ed506c
AN
2499 "prog_tag:\t%s\n"
2500 "prog_id:\t%u\n",
f2e10bff 2501 bpf_link_type_strs[link->type],
a3b80e10 2502 link->id,
70ed506c
AN
2503 prog_tag,
2504 prog->aux->id);
f2e10bff
AN
2505 if (link->ops->show_fdinfo)
2506 link->ops->show_fdinfo(link, m);
70ed506c
AN
2507}
2508#endif
2509
6f302bfb 2510static const struct file_operations bpf_link_fops = {
70ed506c
AN
2511#ifdef CONFIG_PROC_FS
2512 .show_fdinfo = bpf_link_show_fdinfo,
2513#endif
2514 .release = bpf_link_release,
fec56f58
AS
2515 .read = bpf_dummy_read,
2516 .write = bpf_dummy_write,
2517};
2518
a3b80e10 2519static int bpf_link_alloc_id(struct bpf_link *link)
70ed506c 2520{
a3b80e10 2521 int id;
70ed506c 2522
a3b80e10
AN
2523 idr_preload(GFP_KERNEL);
2524 spin_lock_bh(&link_idr_lock);
2525 id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC);
2526 spin_unlock_bh(&link_idr_lock);
2527 idr_preload_end();
70ed506c 2528
a3b80e10
AN
2529 return id;
2530}
2531
2532/* Prepare bpf_link to be exposed to user-space by allocating anon_inode file,
2533 * reserving unused FD and allocating ID from link_idr. This is to be paired
2534 * with bpf_link_settle() to install FD and ID and expose bpf_link to
2535 * user-space, if bpf_link is successfully attached. If not, bpf_link and
2536 * pre-allocated resources are to be freed with bpf_cleanup() call. All the
2537 * transient state is passed around in struct bpf_link_primer.
2538 * This is preferred way to create and initialize bpf_link, especially when
2539 * there are complicated and expensive operations inbetween creating bpf_link
2540 * itself and attaching it to BPF hook. By using bpf_link_prime() and
2541 * bpf_link_settle() kernel code using bpf_link doesn't have to perform
2542 * expensive (and potentially failing) roll back operations in a rare case
2543 * that file, FD, or ID can't be allocated.
babf3164 2544 */
a3b80e10 2545int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer)
babf3164
AN
2546{
2547 struct file *file;
a3b80e10 2548 int fd, id;
babf3164
AN
2549
2550 fd = get_unused_fd_flags(O_CLOEXEC);
2551 if (fd < 0)
a3b80e10 2552 return fd;
babf3164 2553
babf3164 2554
a3b80e10
AN
2555 id = bpf_link_alloc_id(link);
2556 if (id < 0) {
2557 put_unused_fd(fd);
a3b80e10
AN
2558 return id;
2559 }
babf3164
AN
2560
2561 file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC);
2562 if (IS_ERR(file)) {
138c6767 2563 bpf_link_free_id(id);
babf3164 2564 put_unused_fd(fd);
138c6767 2565 return PTR_ERR(file);
babf3164
AN
2566 }
2567
a3b80e10
AN
2568 primer->link = link;
2569 primer->file = file;
2570 primer->fd = fd;
2571 primer->id = id;
2572 return 0;
2573}
2574
2575int bpf_link_settle(struct bpf_link_primer *primer)
2576{
2577 /* make bpf_link fetchable by ID */
2578 spin_lock_bh(&link_idr_lock);
2579 primer->link->id = primer->id;
2580 spin_unlock_bh(&link_idr_lock);
2581 /* make bpf_link fetchable by FD */
2582 fd_install(primer->fd, primer->file);
2583 /* pass through installed FD */
2584 return primer->fd;
2585}
2586
2587int bpf_link_new_fd(struct bpf_link *link)
2588{
2589 return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
babf3164
AN
2590}
2591
70ed506c
AN
2592struct bpf_link *bpf_link_get_from_fd(u32 ufd)
2593{
2594 struct fd f = fdget(ufd);
2595 struct bpf_link *link;
2596
2597 if (!f.file)
2598 return ERR_PTR(-EBADF);
2599 if (f.file->f_op != &bpf_link_fops) {
2600 fdput(f);
2601 return ERR_PTR(-EINVAL);
2602 }
2603
2604 link = f.file->private_data;
2605 bpf_link_inc(link);
2606 fdput(f);
2607
2608 return link;
2609}
2610
2611struct bpf_tracing_link {
2612 struct bpf_link link;
f2e10bff 2613 enum bpf_attach_type attach_type;
3aac1ead
THJ
2614 struct bpf_trampoline *trampoline;
2615 struct bpf_prog *tgt_prog;
70ed506c
AN
2616};
2617
2618static void bpf_tracing_link_release(struct bpf_link *link)
babf3164 2619{
3aac1ead
THJ
2620 struct bpf_tracing_link *tr_link =
2621 container_of(link, struct bpf_tracing_link, link);
2622
2623 WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog,
2624 tr_link->trampoline));
2625
2626 bpf_trampoline_put(tr_link->trampoline);
2627
2628 /* tgt_prog is NULL if target is a kernel function */
2629 if (tr_link->tgt_prog)
2630 bpf_prog_put(tr_link->tgt_prog);
babf3164
AN
2631}
2632
2633static void bpf_tracing_link_dealloc(struct bpf_link *link)
70ed506c
AN
2634{
2635 struct bpf_tracing_link *tr_link =
2636 container_of(link, struct bpf_tracing_link, link);
2637
70ed506c
AN
2638 kfree(tr_link);
2639}
2640
f2e10bff
AN
2641static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link,
2642 struct seq_file *seq)
2643{
2644 struct bpf_tracing_link *tr_link =
2645 container_of(link, struct bpf_tracing_link, link);
2646
2647 seq_printf(seq,
2648 "attach_type:\t%d\n",
2649 tr_link->attach_type);
2650}
2651
2652static int bpf_tracing_link_fill_link_info(const struct bpf_link *link,
2653 struct bpf_link_info *info)
2654{
2655 struct bpf_tracing_link *tr_link =
2656 container_of(link, struct bpf_tracing_link, link);
2657
2658 info->tracing.attach_type = tr_link->attach_type;
441e8c66
THJ
2659 bpf_trampoline_unpack_key(tr_link->trampoline->key,
2660 &info->tracing.target_obj_id,
2661 &info->tracing.target_btf_id);
f2e10bff
AN
2662
2663 return 0;
2664}
2665
70ed506c
AN
2666static const struct bpf_link_ops bpf_tracing_link_lops = {
2667 .release = bpf_tracing_link_release,
babf3164 2668 .dealloc = bpf_tracing_link_dealloc,
f2e10bff
AN
2669 .show_fdinfo = bpf_tracing_link_show_fdinfo,
2670 .fill_link_info = bpf_tracing_link_fill_link_info,
70ed506c
AN
2671};
2672
4a1e7c0c
THJ
2673static int bpf_tracing_prog_attach(struct bpf_prog *prog,
2674 int tgt_prog_fd,
2675 u32 btf_id)
fec56f58 2676{
a3b80e10 2677 struct bpf_link_primer link_primer;
3aac1ead 2678 struct bpf_prog *tgt_prog = NULL;
4a1e7c0c 2679 struct bpf_trampoline *tr = NULL;
70ed506c 2680 struct bpf_tracing_link *link;
4a1e7c0c 2681 u64 key = 0;
a3b80e10 2682 int err;
fec56f58 2683
9e4e01df
KS
2684 switch (prog->type) {
2685 case BPF_PROG_TYPE_TRACING:
2686 if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
2687 prog->expected_attach_type != BPF_TRACE_FEXIT &&
2688 prog->expected_attach_type != BPF_MODIFY_RETURN) {
2689 err = -EINVAL;
2690 goto out_put_prog;
2691 }
2692 break;
2693 case BPF_PROG_TYPE_EXT:
2694 if (prog->expected_attach_type != 0) {
2695 err = -EINVAL;
2696 goto out_put_prog;
2697 }
2698 break;
2699 case BPF_PROG_TYPE_LSM:
2700 if (prog->expected_attach_type != BPF_LSM_MAC) {
2701 err = -EINVAL;
2702 goto out_put_prog;
2703 }
2704 break;
2705 default:
fec56f58
AS
2706 err = -EINVAL;
2707 goto out_put_prog;
2708 }
2709
4a1e7c0c
THJ
2710 if (!!tgt_prog_fd != !!btf_id) {
2711 err = -EINVAL;
2712 goto out_put_prog;
2713 }
2714
2715 if (tgt_prog_fd) {
2716 /* For now we only allow new targets for BPF_PROG_TYPE_EXT */
2717 if (prog->type != BPF_PROG_TYPE_EXT) {
2718 err = -EINVAL;
2719 goto out_put_prog;
2720 }
2721
2722 tgt_prog = bpf_prog_get(tgt_prog_fd);
2723 if (IS_ERR(tgt_prog)) {
2724 err = PTR_ERR(tgt_prog);
2725 tgt_prog = NULL;
2726 goto out_put_prog;
2727 }
2728
22dc4a0f 2729 key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id);
4a1e7c0c
THJ
2730 }
2731
70ed506c
AN
2732 link = kzalloc(sizeof(*link), GFP_USER);
2733 if (!link) {
2734 err = -ENOMEM;
2735 goto out_put_prog;
2736 }
f2e10bff
AN
2737 bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING,
2738 &bpf_tracing_link_lops, prog);
2739 link->attach_type = prog->expected_attach_type;
70ed506c 2740
3aac1ead
THJ
2741 mutex_lock(&prog->aux->dst_mutex);
2742
4a1e7c0c
THJ
2743 /* There are a few possible cases here:
2744 *
2745 * - if prog->aux->dst_trampoline is set, the program was just loaded
2746 * and not yet attached to anything, so we can use the values stored
2747 * in prog->aux
2748 *
2749 * - if prog->aux->dst_trampoline is NULL, the program has already been
2750 * attached to a target and its initial target was cleared (below)
2751 *
2752 * - if tgt_prog != NULL, the caller specified tgt_prog_fd +
2753 * target_btf_id using the link_create API.
2754 *
2755 * - if tgt_prog == NULL when this function was called using the old
f3a95075
JO
2756 * raw_tracepoint_open API, and we need a target from prog->aux
2757 *
2758 * - if prog->aux->dst_trampoline and tgt_prog is NULL, the program
2759 * was detached and is going for re-attachment.
4a1e7c0c
THJ
2760 */
2761 if (!prog->aux->dst_trampoline && !tgt_prog) {
f3a95075
JO
2762 /*
2763 * Allow re-attach for TRACING and LSM programs. If it's
2764 * currently linked, bpf_trampoline_link_prog will fail.
2765 * EXT programs need to specify tgt_prog_fd, so they
2766 * re-attach in separate code path.
2767 */
2768 if (prog->type != BPF_PROG_TYPE_TRACING &&
2769 prog->type != BPF_PROG_TYPE_LSM) {
2770 err = -EINVAL;
2771 goto out_unlock;
2772 }
2773 btf_id = prog->aux->attach_btf_id;
2774 key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, btf_id);
babf3164 2775 }
4a1e7c0c
THJ
2776
2777 if (!prog->aux->dst_trampoline ||
2778 (key && key != prog->aux->dst_trampoline->key)) {
2779 /* If there is no saved target, or the specified target is
2780 * different from the destination specified at load time, we
2781 * need a new trampoline and a check for compatibility
2782 */
2783 struct bpf_attach_target_info tgt_info = {};
2784
2785 err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id,
2786 &tgt_info);
2787 if (err)
2788 goto out_unlock;
2789
2790 tr = bpf_trampoline_get(key, &tgt_info);
2791 if (!tr) {
2792 err = -ENOMEM;
2793 goto out_unlock;
2794 }
2795 } else {
2796 /* The caller didn't specify a target, or the target was the
2797 * same as the destination supplied during program load. This
2798 * means we can reuse the trampoline and reference from program
2799 * load time, and there is no need to allocate a new one. This
2800 * can only happen once for any program, as the saved values in
2801 * prog->aux are cleared below.
2802 */
2803 tr = prog->aux->dst_trampoline;
2804 tgt_prog = prog->aux->dst_prog;
2805 }
3aac1ead
THJ
2806
2807 err = bpf_link_prime(&link->link, &link_primer);
2808 if (err)
2809 goto out_unlock;
fec56f58 2810
3aac1ead 2811 err = bpf_trampoline_link_prog(prog, tr);
babf3164 2812 if (err) {
a3b80e10 2813 bpf_link_cleanup(&link_primer);
3aac1ead
THJ
2814 link = NULL;
2815 goto out_unlock;
fec56f58 2816 }
babf3164 2817
3aac1ead
THJ
2818 link->tgt_prog = tgt_prog;
2819 link->trampoline = tr;
2820
4a1e7c0c
THJ
2821 /* Always clear the trampoline and target prog from prog->aux to make
2822 * sure the original attach destination is not kept alive after a
2823 * program is (re-)attached to another target.
2824 */
2825 if (prog->aux->dst_prog &&
2826 (tgt_prog_fd || tr != prog->aux->dst_trampoline))
2827 /* got extra prog ref from syscall, or attaching to different prog */
2828 bpf_prog_put(prog->aux->dst_prog);
2829 if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline)
2830 /* we allocated a new trampoline, so free the old one */
2831 bpf_trampoline_put(prog->aux->dst_trampoline);
2832
3aac1ead
THJ
2833 prog->aux->dst_prog = NULL;
2834 prog->aux->dst_trampoline = NULL;
2835 mutex_unlock(&prog->aux->dst_mutex);
2836
a3b80e10 2837 return bpf_link_settle(&link_primer);
3aac1ead 2838out_unlock:
4a1e7c0c
THJ
2839 if (tr && tr != prog->aux->dst_trampoline)
2840 bpf_trampoline_put(tr);
3aac1ead
THJ
2841 mutex_unlock(&prog->aux->dst_mutex);
2842 kfree(link);
fec56f58 2843out_put_prog:
4a1e7c0c
THJ
2844 if (tgt_prog_fd && tgt_prog)
2845 bpf_prog_put(tgt_prog);
fec56f58
AS
2846 return err;
2847}
2848
70ed506c
AN
2849struct bpf_raw_tp_link {
2850 struct bpf_link link;
c4f6699d 2851 struct bpf_raw_event_map *btp;
c4f6699d
AS
2852};
2853
70ed506c 2854static void bpf_raw_tp_link_release(struct bpf_link *link)
c4f6699d 2855{
70ed506c
AN
2856 struct bpf_raw_tp_link *raw_tp =
2857 container_of(link, struct bpf_raw_tp_link, link);
c4f6699d 2858
70ed506c 2859 bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
a38d1107 2860 bpf_put_raw_tracepoint(raw_tp->btp);
babf3164
AN
2861}
2862
2863static void bpf_raw_tp_link_dealloc(struct bpf_link *link)
2864{
2865 struct bpf_raw_tp_link *raw_tp =
2866 container_of(link, struct bpf_raw_tp_link, link);
2867
c4f6699d 2868 kfree(raw_tp);
c4f6699d
AS
2869}
2870
f2e10bff
AN
2871static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link,
2872 struct seq_file *seq)
2873{
2874 struct bpf_raw_tp_link *raw_tp_link =
2875 container_of(link, struct bpf_raw_tp_link, link);
2876
2877 seq_printf(seq,
2878 "tp_name:\t%s\n",
2879 raw_tp_link->btp->tp->name);
2880}
2881
2882static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,
2883 struct bpf_link_info *info)
2884{
2885 struct bpf_raw_tp_link *raw_tp_link =
2886 container_of(link, struct bpf_raw_tp_link, link);
2887 char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name);
2888 const char *tp_name = raw_tp_link->btp->tp->name;
2889 u32 ulen = info->raw_tracepoint.tp_name_len;
2890 size_t tp_len = strlen(tp_name);
2891
b474959d 2892 if (!ulen ^ !ubuf)
f2e10bff
AN
2893 return -EINVAL;
2894
2895 info->raw_tracepoint.tp_name_len = tp_len + 1;
2896
2897 if (!ubuf)
2898 return 0;
2899
2900 if (ulen >= tp_len + 1) {
2901 if (copy_to_user(ubuf, tp_name, tp_len + 1))
2902 return -EFAULT;
2903 } else {
2904 char zero = '\0';
2905
2906 if (copy_to_user(ubuf, tp_name, ulen - 1))
2907 return -EFAULT;
2908 if (put_user(zero, ubuf + ulen - 1))
2909 return -EFAULT;
2910 return -ENOSPC;
2911 }
2912
2913 return 0;
2914}
2915
a3b80e10 2916static const struct bpf_link_ops bpf_raw_tp_link_lops = {
70ed506c 2917 .release = bpf_raw_tp_link_release,
babf3164 2918 .dealloc = bpf_raw_tp_link_dealloc,
f2e10bff
AN
2919 .show_fdinfo = bpf_raw_tp_link_show_fdinfo,
2920 .fill_link_info = bpf_raw_tp_link_fill_link_info,
c4f6699d
AS
2921};
2922
b89fbfbb
AN
2923#ifdef CONFIG_PERF_EVENTS
2924struct bpf_perf_link {
2925 struct bpf_link link;
2926 struct file *perf_file;
2927};
2928
2929static void bpf_perf_link_release(struct bpf_link *link)
2930{
2931 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link);
2932 struct perf_event *event = perf_link->perf_file->private_data;
2933
2934 perf_event_free_bpf_prog(event);
2935 fput(perf_link->perf_file);
2936}
2937
2938static void bpf_perf_link_dealloc(struct bpf_link *link)
2939{
2940 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link);
2941
2942 kfree(perf_link);
2943}
2944
2945static const struct bpf_link_ops bpf_perf_link_lops = {
2946 .release = bpf_perf_link_release,
2947 .dealloc = bpf_perf_link_dealloc,
2948};
2949
2950static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
2951{
2952 struct bpf_link_primer link_primer;
2953 struct bpf_perf_link *link;
2954 struct perf_event *event;
2955 struct file *perf_file;
2956 int err;
2957
2958 if (attr->link_create.flags)
2959 return -EINVAL;
2960
2961 perf_file = perf_event_get(attr->link_create.target_fd);
2962 if (IS_ERR(perf_file))
2963 return PTR_ERR(perf_file);
2964
2965 link = kzalloc(sizeof(*link), GFP_USER);
2966 if (!link) {
2967 err = -ENOMEM;
2968 goto out_put_file;
2969 }
2970 bpf_link_init(&link->link, BPF_LINK_TYPE_PERF_EVENT, &bpf_perf_link_lops, prog);
2971 link->perf_file = perf_file;
2972
2973 err = bpf_link_prime(&link->link, &link_primer);
2974 if (err) {
2975 kfree(link);
2976 goto out_put_file;
2977 }
2978
2979 event = perf_file->private_data;
82e6b1ee 2980 err = perf_event_set_bpf_prog(event, prog, attr->link_create.perf_event.bpf_cookie);
b89fbfbb
AN
2981 if (err) {
2982 bpf_link_cleanup(&link_primer);
2983 goto out_put_file;
2984 }
2985 /* perf_event_set_bpf_prog() doesn't take its own refcnt on prog */
2986 bpf_prog_inc(prog);
2987
2988 return bpf_link_settle(&link_primer);
2989
2990out_put_file:
2991 fput(perf_file);
2992 return err;
2993}
2994#endif /* CONFIG_PERF_EVENTS */
2995
c4f6699d
AS
2996#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
2997
2998static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
2999{
a3b80e10 3000 struct bpf_link_primer link_primer;
babf3164 3001 struct bpf_raw_tp_link *link;
c4f6699d
AS
3002 struct bpf_raw_event_map *btp;
3003 struct bpf_prog *prog;
ac4414b5
AS
3004 const char *tp_name;
3005 char buf[128];
a3b80e10 3006 int err;
c4f6699d 3007
ac4414b5
AS
3008 if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
3009 return -EINVAL;
3010
3011 prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
3012 if (IS_ERR(prog))
3013 return PTR_ERR(prog);
3014
9e4e01df
KS
3015 switch (prog->type) {
3016 case BPF_PROG_TYPE_TRACING:
3017 case BPF_PROG_TYPE_EXT:
3018 case BPF_PROG_TYPE_LSM:
ac4414b5 3019 if (attr->raw_tracepoint.name) {
fec56f58
AS
3020 /* The attach point for this category of programs
3021 * should be specified via btf_id during program load.
ac4414b5
AS
3022 */
3023 err = -EINVAL;
3024 goto out_put_prog;
3025 }
9e4e01df
KS
3026 if (prog->type == BPF_PROG_TYPE_TRACING &&
3027 prog->expected_attach_type == BPF_TRACE_RAW_TP) {
fec56f58 3028 tp_name = prog->aux->attach_func_name;
9e4e01df
KS
3029 break;
3030 }
5541075a
JO
3031 err = bpf_tracing_prog_attach(prog, 0, 0);
3032 if (err >= 0)
3033 return err;
3034 goto out_put_prog;
9e4e01df
KS
3035 case BPF_PROG_TYPE_RAW_TRACEPOINT:
3036 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
ac4414b5
AS
3037 if (strncpy_from_user(buf,
3038 u64_to_user_ptr(attr->raw_tracepoint.name),
3039 sizeof(buf) - 1) < 0) {
3040 err = -EFAULT;
3041 goto out_put_prog;
3042 }
3043 buf[sizeof(buf) - 1] = 0;
3044 tp_name = buf;
9e4e01df
KS
3045 break;
3046 default:
3047 err = -EINVAL;
3048 goto out_put_prog;
ac4414b5 3049 }
c4f6699d 3050
a38d1107 3051 btp = bpf_get_raw_tracepoint(tp_name);
ac4414b5
AS
3052 if (!btp) {
3053 err = -ENOENT;
3054 goto out_put_prog;
3055 }
c4f6699d 3056
babf3164
AN
3057 link = kzalloc(sizeof(*link), GFP_USER);
3058 if (!link) {
a38d1107
MM
3059 err = -ENOMEM;
3060 goto out_put_btp;
3061 }
f2e10bff
AN
3062 bpf_link_init(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT,
3063 &bpf_raw_tp_link_lops, prog);
babf3164 3064 link->btp = btp;
c4f6699d 3065
a3b80e10
AN
3066 err = bpf_link_prime(&link->link, &link_primer);
3067 if (err) {
babf3164 3068 kfree(link);
babf3164
AN
3069 goto out_put_btp;
3070 }
c4f6699d 3071
babf3164
AN
3072 err = bpf_probe_register(link->btp, prog);
3073 if (err) {
a3b80e10 3074 bpf_link_cleanup(&link_primer);
babf3164 3075 goto out_put_btp;
c4f6699d 3076 }
babf3164 3077
a3b80e10 3078 return bpf_link_settle(&link_primer);
c4f6699d 3079
a38d1107
MM
3080out_put_btp:
3081 bpf_put_raw_tracepoint(btp);
ac4414b5
AS
3082out_put_prog:
3083 bpf_prog_put(prog);
c4f6699d
AS
3084 return err;
3085}
3086
33491588
AR
3087static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
3088 enum bpf_attach_type attach_type)
3089{
3090 switch (prog->type) {
3091 case BPF_PROG_TYPE_CGROUP_SOCK:
3092 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
0d01da6a 3093 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
e9ddbb77 3094 case BPF_PROG_TYPE_SK_LOOKUP:
33491588 3095 return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
5cf1e914 3096 case BPF_PROG_TYPE_CGROUP_SKB:
2c78ee89
AS
3097 if (!capable(CAP_NET_ADMIN))
3098 /* cg-skb progs can be loaded by unpriv user.
3099 * check permissions at attach time.
3100 */
3101 return -EPERM;
5cf1e914 3102 return prog->enforce_expected_attach_type &&
3103 prog->expected_attach_type != attach_type ?
3104 -EINVAL : 0;
33491588
AR
3105 default:
3106 return 0;
3107 }
3108}
3109
e28784e3
AN
3110static enum bpf_prog_type
3111attach_type_to_prog_type(enum bpf_attach_type attach_type)
f4324551 3112{
e28784e3 3113 switch (attach_type) {
f4324551
DM
3114 case BPF_CGROUP_INET_INGRESS:
3115 case BPF_CGROUP_INET_EGRESS:
e28784e3 3116 return BPF_PROG_TYPE_CGROUP_SKB;
61023658 3117 case BPF_CGROUP_INET_SOCK_CREATE:
f5836749 3118 case BPF_CGROUP_INET_SOCK_RELEASE:
aac3fc32
AI
3119 case BPF_CGROUP_INET4_POST_BIND:
3120 case BPF_CGROUP_INET6_POST_BIND:
e28784e3 3121 return BPF_PROG_TYPE_CGROUP_SOCK;
4fbac77d
AI
3122 case BPF_CGROUP_INET4_BIND:
3123 case BPF_CGROUP_INET6_BIND:
d74bad4e
AI
3124 case BPF_CGROUP_INET4_CONNECT:
3125 case BPF_CGROUP_INET6_CONNECT:
1b66d253
DB
3126 case BPF_CGROUP_INET4_GETPEERNAME:
3127 case BPF_CGROUP_INET6_GETPEERNAME:
3128 case BPF_CGROUP_INET4_GETSOCKNAME:
3129 case BPF_CGROUP_INET6_GETSOCKNAME:
1cedee13
AI
3130 case BPF_CGROUP_UDP4_SENDMSG:
3131 case BPF_CGROUP_UDP6_SENDMSG:
983695fa
DB
3132 case BPF_CGROUP_UDP4_RECVMSG:
3133 case BPF_CGROUP_UDP6_RECVMSG:
e28784e3 3134 return BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
40304b2a 3135 case BPF_CGROUP_SOCK_OPS:
e28784e3 3136 return BPF_PROG_TYPE_SOCK_OPS;
ebc614f6 3137 case BPF_CGROUP_DEVICE:
e28784e3 3138 return BPF_PROG_TYPE_CGROUP_DEVICE;
4f738adb 3139 case BPF_SK_MSG_VERDICT:
e28784e3 3140 return BPF_PROG_TYPE_SK_MSG;
464bc0fd
JF
3141 case BPF_SK_SKB_STREAM_PARSER:
3142 case BPF_SK_SKB_STREAM_VERDICT:
a7ba4558 3143 case BPF_SK_SKB_VERDICT:
e28784e3 3144 return BPF_PROG_TYPE_SK_SKB;
f4364dcf 3145 case BPF_LIRC_MODE2:
e28784e3 3146 return BPF_PROG_TYPE_LIRC_MODE2;
d58e468b 3147 case BPF_FLOW_DISSECTOR:
e28784e3 3148 return BPF_PROG_TYPE_FLOW_DISSECTOR;
7b146ceb 3149 case BPF_CGROUP_SYSCTL:
e28784e3 3150 return BPF_PROG_TYPE_CGROUP_SYSCTL;
0d01da6a
SF
3151 case BPF_CGROUP_GETSOCKOPT:
3152 case BPF_CGROUP_SETSOCKOPT:
e28784e3 3153 return BPF_PROG_TYPE_CGROUP_SOCKOPT;
de4e05ca
YS
3154 case BPF_TRACE_ITER:
3155 return BPF_PROG_TYPE_TRACING;
e9ddbb77
JS
3156 case BPF_SK_LOOKUP:
3157 return BPF_PROG_TYPE_SK_LOOKUP;
aa8d3a71
AN
3158 case BPF_XDP:
3159 return BPF_PROG_TYPE_XDP;
f4324551 3160 default:
e28784e3 3161 return BPF_PROG_TYPE_UNSPEC;
f4324551 3162 }
e28784e3
AN
3163}
3164
3165#define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd
3166
3167#define BPF_F_ATTACH_MASK \
3168 (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE)
3169
3170static int bpf_prog_attach(const union bpf_attr *attr)
3171{
3172 enum bpf_prog_type ptype;
3173 struct bpf_prog *prog;
3174 int ret;
3175
e28784e3
AN
3176 if (CHECK_ATTR(BPF_PROG_ATTACH))
3177 return -EINVAL;
3178
3179 if (attr->attach_flags & ~BPF_F_ATTACH_MASK)
3180 return -EINVAL;
3181
3182 ptype = attach_type_to_prog_type(attr->attach_type);
3183 if (ptype == BPF_PROG_TYPE_UNSPEC)
3184 return -EINVAL;
f4324551 3185
b2cd1257
DA
3186 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
3187 if (IS_ERR(prog))
3188 return PTR_ERR(prog);
3189
5e43f899
AI
3190 if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) {
3191 bpf_prog_put(prog);
3192 return -EINVAL;
3193 }
3194
fdb5c453
SY
3195 switch (ptype) {
3196 case BPF_PROG_TYPE_SK_SKB:
3197 case BPF_PROG_TYPE_SK_MSG:
604326b4 3198 ret = sock_map_get_from_fd(attr, prog);
fdb5c453
SY
3199 break;
3200 case BPF_PROG_TYPE_LIRC_MODE2:
3201 ret = lirc_prog_attach(attr, prog);
3202 break;
d58e468b 3203 case BPF_PROG_TYPE_FLOW_DISSECTOR:
a3fd7cee 3204 ret = netns_bpf_prog_attach(attr, prog);
d58e468b 3205 break;
e28784e3
AN
3206 case BPF_PROG_TYPE_CGROUP_DEVICE:
3207 case BPF_PROG_TYPE_CGROUP_SKB:
3208 case BPF_PROG_TYPE_CGROUP_SOCK:
3209 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
3210 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3211 case BPF_PROG_TYPE_CGROUP_SYSCTL:
3212 case BPF_PROG_TYPE_SOCK_OPS:
fdb5c453 3213 ret = cgroup_bpf_prog_attach(attr, ptype, prog);
e28784e3
AN
3214 break;
3215 default:
3216 ret = -EINVAL;
b2cd1257
DA
3217 }
3218
7f677633
AS
3219 if (ret)
3220 bpf_prog_put(prog);
7f677633 3221 return ret;
f4324551
DM
3222}
3223
3224#define BPF_PROG_DETACH_LAST_FIELD attach_type
3225
3226static int bpf_prog_detach(const union bpf_attr *attr)
3227{
324bda9e 3228 enum bpf_prog_type ptype;
f4324551 3229
f4324551
DM
3230 if (CHECK_ATTR(BPF_PROG_DETACH))
3231 return -EINVAL;
3232
e28784e3
AN
3233 ptype = attach_type_to_prog_type(attr->attach_type);
3234
3235 switch (ptype) {
3236 case BPF_PROG_TYPE_SK_MSG:
3237 case BPF_PROG_TYPE_SK_SKB:
bb0de313 3238 return sock_map_prog_detach(attr, ptype);
e28784e3 3239 case BPF_PROG_TYPE_LIRC_MODE2:
f4364dcf 3240 return lirc_prog_detach(attr);
e28784e3 3241 case BPF_PROG_TYPE_FLOW_DISSECTOR:
4ac2add6 3242 return netns_bpf_prog_detach(attr, ptype);
e28784e3
AN
3243 case BPF_PROG_TYPE_CGROUP_DEVICE:
3244 case BPF_PROG_TYPE_CGROUP_SKB:
3245 case BPF_PROG_TYPE_CGROUP_SOCK:
3246 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
3247 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3248 case BPF_PROG_TYPE_CGROUP_SYSCTL:
3249 case BPF_PROG_TYPE_SOCK_OPS:
3250 return cgroup_bpf_prog_detach(attr, ptype);
f4324551
DM
3251 default:
3252 return -EINVAL;
3253 }
f4324551 3254}
40304b2a 3255
468e2f64
AS
3256#define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt
3257
3258static int bpf_prog_query(const union bpf_attr *attr,
3259 union bpf_attr __user *uattr)
3260{
468e2f64
AS
3261 if (!capable(CAP_NET_ADMIN))
3262 return -EPERM;
3263 if (CHECK_ATTR(BPF_PROG_QUERY))
3264 return -EINVAL;
3265 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE)
3266 return -EINVAL;
3267
3268 switch (attr->query.attach_type) {
3269 case BPF_CGROUP_INET_INGRESS:
3270 case BPF_CGROUP_INET_EGRESS:
3271 case BPF_CGROUP_INET_SOCK_CREATE:
f5836749 3272 case BPF_CGROUP_INET_SOCK_RELEASE:
4fbac77d
AI
3273 case BPF_CGROUP_INET4_BIND:
3274 case BPF_CGROUP_INET6_BIND:
aac3fc32
AI
3275 case BPF_CGROUP_INET4_POST_BIND:
3276 case BPF_CGROUP_INET6_POST_BIND:
d74bad4e
AI
3277 case BPF_CGROUP_INET4_CONNECT:
3278 case BPF_CGROUP_INET6_CONNECT:
1b66d253
DB
3279 case BPF_CGROUP_INET4_GETPEERNAME:
3280 case BPF_CGROUP_INET6_GETPEERNAME:
3281 case BPF_CGROUP_INET4_GETSOCKNAME:
3282 case BPF_CGROUP_INET6_GETSOCKNAME:
1cedee13
AI
3283 case BPF_CGROUP_UDP4_SENDMSG:
3284 case BPF_CGROUP_UDP6_SENDMSG:
983695fa
DB
3285 case BPF_CGROUP_UDP4_RECVMSG:
3286 case BPF_CGROUP_UDP6_RECVMSG:
468e2f64 3287 case BPF_CGROUP_SOCK_OPS:
ebc614f6 3288 case BPF_CGROUP_DEVICE:
7b146ceb 3289 case BPF_CGROUP_SYSCTL:
0d01da6a
SF
3290 case BPF_CGROUP_GETSOCKOPT:
3291 case BPF_CGROUP_SETSOCKOPT:
e28784e3 3292 return cgroup_bpf_prog_query(attr, uattr);
f4364dcf
SY
3293 case BPF_LIRC_MODE2:
3294 return lirc_prog_query(attr, uattr);
118c8e9a 3295 case BPF_FLOW_DISSECTOR:
e9ddbb77 3296 case BPF_SK_LOOKUP:
a3fd7cee 3297 return netns_bpf_prog_query(attr, uattr);
468e2f64
AS
3298 default:
3299 return -EINVAL;
3300 }
468e2f64 3301}
f4324551 3302
1b4d60ec 3303#define BPF_PROG_TEST_RUN_LAST_FIELD test.cpu
1cf1cae9
AS
3304
3305static int bpf_prog_test_run(const union bpf_attr *attr,
3306 union bpf_attr __user *uattr)
3307{
3308 struct bpf_prog *prog;
3309 int ret = -ENOTSUPP;
3310
3311 if (CHECK_ATTR(BPF_PROG_TEST_RUN))
3312 return -EINVAL;
3313
b0b9395d
SF
3314 if ((attr->test.ctx_size_in && !attr->test.ctx_in) ||
3315 (!attr->test.ctx_size_in && attr->test.ctx_in))
3316 return -EINVAL;
3317
3318 if ((attr->test.ctx_size_out && !attr->test.ctx_out) ||
3319 (!attr->test.ctx_size_out && attr->test.ctx_out))
3320 return -EINVAL;
3321
1cf1cae9
AS
3322 prog = bpf_prog_get(attr->test.prog_fd);
3323 if (IS_ERR(prog))
3324 return PTR_ERR(prog);
3325
3326 if (prog->aux->ops->test_run)
3327 ret = prog->aux->ops->test_run(prog, attr, uattr);
3328
3329 bpf_prog_put(prog);
3330 return ret;
3331}
3332
34ad5580
MKL
3333#define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id
3334
3335static int bpf_obj_get_next_id(const union bpf_attr *attr,
3336 union bpf_attr __user *uattr,
3337 struct idr *idr,
3338 spinlock_t *lock)
3339{
3340 u32 next_id = attr->start_id;
3341 int err = 0;
3342
3343 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX)
3344 return -EINVAL;
3345
3346 if (!capable(CAP_SYS_ADMIN))
3347 return -EPERM;
3348
3349 next_id++;
3350 spin_lock_bh(lock);
3351 if (!idr_get_next(idr, &next_id))
3352 err = -ENOENT;
3353 spin_unlock_bh(lock);
3354
3355 if (!err)
3356 err = put_user(next_id, &uattr->next_id);
3357
3358 return err;
3359}
3360
6086d29d
YS
3361struct bpf_map *bpf_map_get_curr_or_next(u32 *id)
3362{
3363 struct bpf_map *map;
3364
3365 spin_lock_bh(&map_idr_lock);
3366again:
3367 map = idr_get_next(&map_idr, id);
3368 if (map) {
3369 map = __bpf_map_inc_not_zero(map, false);
3370 if (IS_ERR(map)) {
3371 (*id)++;
3372 goto again;
3373 }
3374 }
3375 spin_unlock_bh(&map_idr_lock);
3376
3377 return map;
3378}
3379
a228a64f
AS
3380struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id)
3381{
3382 struct bpf_prog *prog;
3383
3384 spin_lock_bh(&prog_idr_lock);
3385again:
3386 prog = idr_get_next(&prog_idr, id);
3387 if (prog) {
3388 prog = bpf_prog_inc_not_zero(prog);
3389 if (IS_ERR(prog)) {
3390 (*id)++;
3391 goto again;
3392 }
3393 }
3394 spin_unlock_bh(&prog_idr_lock);
3395
3396 return prog;
3397}
3398
b16d9aa4
MKL
3399#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
3400
7e6897f9 3401struct bpf_prog *bpf_prog_by_id(u32 id)
b16d9aa4
MKL
3402{
3403 struct bpf_prog *prog;
b16d9aa4 3404
7e6897f9
BT
3405 if (!id)
3406 return ERR_PTR(-ENOENT);
b16d9aa4
MKL
3407
3408 spin_lock_bh(&prog_idr_lock);
3409 prog = idr_find(&prog_idr, id);
3410 if (prog)
3411 prog = bpf_prog_inc_not_zero(prog);
3412 else
3413 prog = ERR_PTR(-ENOENT);
3414 spin_unlock_bh(&prog_idr_lock);
7e6897f9
BT
3415 return prog;
3416}
3417
3418static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
3419{
3420 struct bpf_prog *prog;
3421 u32 id = attr->prog_id;
3422 int fd;
3423
3424 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID))
3425 return -EINVAL;
3426
3427 if (!capable(CAP_SYS_ADMIN))
3428 return -EPERM;
b16d9aa4 3429
7e6897f9 3430 prog = bpf_prog_by_id(id);
b16d9aa4
MKL
3431 if (IS_ERR(prog))
3432 return PTR_ERR(prog);
3433
3434 fd = bpf_prog_new_fd(prog);
3435 if (fd < 0)
3436 bpf_prog_put(prog);
3437
3438 return fd;
3439}
3440
6e71b04a 3441#define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags
bd5f5f4e
MKL
3442
3443static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
3444{
3445 struct bpf_map *map;
3446 u32 id = attr->map_id;
6e71b04a 3447 int f_flags;
bd5f5f4e
MKL
3448 int fd;
3449
6e71b04a
CF
3450 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) ||
3451 attr->open_flags & ~BPF_OBJ_FLAG_MASK)
bd5f5f4e
MKL
3452 return -EINVAL;
3453
3454 if (!capable(CAP_SYS_ADMIN))
3455 return -EPERM;
3456
6e71b04a
CF
3457 f_flags = bpf_get_file_flag(attr->open_flags);
3458 if (f_flags < 0)
3459 return f_flags;
3460
bd5f5f4e
MKL
3461 spin_lock_bh(&map_idr_lock);
3462 map = idr_find(&map_idr, id);
3463 if (map)
b0e4701c 3464 map = __bpf_map_inc_not_zero(map, true);
bd5f5f4e
MKL
3465 else
3466 map = ERR_PTR(-ENOENT);
3467 spin_unlock_bh(&map_idr_lock);
3468
3469 if (IS_ERR(map))
3470 return PTR_ERR(map);
3471
6e71b04a 3472 fd = bpf_map_new_fd(map, f_flags);
bd5f5f4e 3473 if (fd < 0)
781e6282 3474 bpf_map_put_with_uref(map);
bd5f5f4e
MKL
3475
3476 return fd;
3477}
3478
7105e828 3479static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
d8eca5bb
DB
3480 unsigned long addr, u32 *off,
3481 u32 *type)
7105e828 3482{
d8eca5bb 3483 const struct bpf_map *map;
7105e828
DB
3484 int i;
3485
984fe94f 3486 mutex_lock(&prog->aux->used_maps_mutex);
d8eca5bb
DB
3487 for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
3488 map = prog->aux->used_maps[i];
3489 if (map == (void *)addr) {
3490 *type = BPF_PSEUDO_MAP_FD;
984fe94f 3491 goto out;
d8eca5bb
DB
3492 }
3493 if (!map->ops->map_direct_value_meta)
3494 continue;
3495 if (!map->ops->map_direct_value_meta(map, addr, off)) {
3496 *type = BPF_PSEUDO_MAP_VALUE;
984fe94f 3497 goto out;
d8eca5bb
DB
3498 }
3499 }
984fe94f 3500 map = NULL;
d8eca5bb 3501
984fe94f
YZ
3502out:
3503 mutex_unlock(&prog->aux->used_maps_mutex);
3504 return map;
7105e828
DB
3505}
3506
63960260
KC
3507static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
3508 const struct cred *f_cred)
7105e828
DB
3509{
3510 const struct bpf_map *map;
3511 struct bpf_insn *insns;
d8eca5bb 3512 u32 off, type;
7105e828 3513 u64 imm;
29fcb05b 3514 u8 code;
7105e828
DB
3515 int i;
3516
3517 insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog),
3518 GFP_USER);
3519 if (!insns)
3520 return insns;
3521
3522 for (i = 0; i < prog->len; i++) {
29fcb05b
AN
3523 code = insns[i].code;
3524
3525 if (code == (BPF_JMP | BPF_TAIL_CALL)) {
7105e828
DB
3526 insns[i].code = BPF_JMP | BPF_CALL;
3527 insns[i].imm = BPF_FUNC_tail_call;
3528 /* fall-through */
3529 }
29fcb05b
AN
3530 if (code == (BPF_JMP | BPF_CALL) ||
3531 code == (BPF_JMP | BPF_CALL_ARGS)) {
3532 if (code == (BPF_JMP | BPF_CALL_ARGS))
7105e828 3533 insns[i].code = BPF_JMP | BPF_CALL;
63960260 3534 if (!bpf_dump_raw_ok(f_cred))
7105e828
DB
3535 insns[i].imm = 0;
3536 continue;
3537 }
29fcb05b
AN
3538 if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) {
3539 insns[i].code = BPF_LDX | BPF_SIZE(code) | BPF_MEM;
3540 continue;
3541 }
7105e828 3542
29fcb05b 3543 if (code != (BPF_LD | BPF_IMM | BPF_DW))
7105e828
DB
3544 continue;
3545
3546 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm;
d8eca5bb 3547 map = bpf_map_from_imm(prog, imm, &off, &type);
7105e828 3548 if (map) {
d8eca5bb 3549 insns[i].src_reg = type;
7105e828 3550 insns[i].imm = map->id;
d8eca5bb 3551 insns[i + 1].imm = off;
7105e828
DB
3552 continue;
3553 }
7105e828
DB
3554 }
3555
3556 return insns;
3557}
3558
c454a46b
MKL
3559static int set_info_rec_size(struct bpf_prog_info *info)
3560{
3561 /*
3562 * Ensure info.*_rec_size is the same as kernel expected size
3563 *
3564 * or
3565 *
3566 * Only allow zero *_rec_size if both _rec_size and _cnt are
3567 * zero. In this case, the kernel will set the expected
3568 * _rec_size back to the info.
3569 */
3570
11d8b82d 3571 if ((info->nr_func_info || info->func_info_rec_size) &&
c454a46b
MKL
3572 info->func_info_rec_size != sizeof(struct bpf_func_info))
3573 return -EINVAL;
3574
11d8b82d 3575 if ((info->nr_line_info || info->line_info_rec_size) &&
c454a46b
MKL
3576 info->line_info_rec_size != sizeof(struct bpf_line_info))
3577 return -EINVAL;
3578
11d8b82d 3579 if ((info->nr_jited_line_info || info->jited_line_info_rec_size) &&
c454a46b
MKL
3580 info->jited_line_info_rec_size != sizeof(__u64))
3581 return -EINVAL;
3582
3583 info->func_info_rec_size = sizeof(struct bpf_func_info);
3584 info->line_info_rec_size = sizeof(struct bpf_line_info);
3585 info->jited_line_info_rec_size = sizeof(__u64);
3586
3587 return 0;
3588}
3589
63960260
KC
3590static int bpf_prog_get_info_by_fd(struct file *file,
3591 struct bpf_prog *prog,
1e270976
MKL
3592 const union bpf_attr *attr,
3593 union bpf_attr __user *uattr)
3594{
3595 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
5c6f2588 3596 struct bpf_prog_info info;
1e270976 3597 u32 info_len = attr->info.info_len;
5f8f8b93 3598 struct bpf_prog_stats stats;
1e270976
MKL
3599 char __user *uinsns;
3600 u32 ulen;
3601 int err;
3602
af2ac3e1 3603 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len);
1e270976
MKL
3604 if (err)
3605 return err;
3606 info_len = min_t(u32, sizeof(info), info_len);
3607
5c6f2588 3608 memset(&info, 0, sizeof(info));
1e270976 3609 if (copy_from_user(&info, uinfo, info_len))
89b09689 3610 return -EFAULT;
1e270976
MKL
3611
3612 info.type = prog->type;
3613 info.id = prog->aux->id;
cb4d2b3f
MKL
3614 info.load_time = prog->aux->load_time;
3615 info.created_by_uid = from_kuid_munged(current_user_ns(),
3616 prog->aux->user->uid);
b85fab0e 3617 info.gpl_compatible = prog->gpl_compatible;
1e270976
MKL
3618
3619 memcpy(info.tag, prog->tag, sizeof(prog->tag));
cb4d2b3f
MKL
3620 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
3621
984fe94f 3622 mutex_lock(&prog->aux->used_maps_mutex);
cb4d2b3f
MKL
3623 ulen = info.nr_map_ids;
3624 info.nr_map_ids = prog->aux->used_map_cnt;
3625 ulen = min_t(u32, info.nr_map_ids, ulen);
3626 if (ulen) {
721e08da 3627 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids);
cb4d2b3f
MKL
3628 u32 i;
3629
3630 for (i = 0; i < ulen; i++)
3631 if (put_user(prog->aux->used_maps[i]->id,
984fe94f
YZ
3632 &user_map_ids[i])) {
3633 mutex_unlock(&prog->aux->used_maps_mutex);
cb4d2b3f 3634 return -EFAULT;
984fe94f 3635 }
cb4d2b3f 3636 }
984fe94f 3637 mutex_unlock(&prog->aux->used_maps_mutex);
1e270976 3638
c454a46b
MKL
3639 err = set_info_rec_size(&info);
3640 if (err)
3641 return err;
7337224f 3642
5f8f8b93
AS
3643 bpf_prog_get_stats(prog, &stats);
3644 info.run_time_ns = stats.nsecs;
3645 info.run_cnt = stats.cnt;
9ed9e9ba 3646 info.recursion_misses = stats.misses;
5f8f8b93 3647
2c78ee89 3648 if (!bpf_capable()) {
1e270976
MKL
3649 info.jited_prog_len = 0;
3650 info.xlated_prog_len = 0;
dbecd738 3651 info.nr_jited_ksyms = 0;
28c2fae7 3652 info.nr_jited_func_lens = 0;
11d8b82d
YS
3653 info.nr_func_info = 0;
3654 info.nr_line_info = 0;
3655 info.nr_jited_line_info = 0;
1e270976
MKL
3656 goto done;
3657 }
3658
1e270976 3659 ulen = info.xlated_prog_len;
9975a54b 3660 info.xlated_prog_len = bpf_prog_insn_size(prog);
1e270976 3661 if (info.xlated_prog_len && ulen) {
7105e828
DB
3662 struct bpf_insn *insns_sanitized;
3663 bool fault;
3664
63960260 3665 if (prog->blinded && !bpf_dump_raw_ok(file->f_cred)) {
7105e828
DB
3666 info.xlated_prog_insns = 0;
3667 goto done;
3668 }
63960260 3669 insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred);
7105e828
DB
3670 if (!insns_sanitized)
3671 return -ENOMEM;
1e270976
MKL
3672 uinsns = u64_to_user_ptr(info.xlated_prog_insns);
3673 ulen = min_t(u32, info.xlated_prog_len, ulen);
7105e828
DB
3674 fault = copy_to_user(uinsns, insns_sanitized, ulen);
3675 kfree(insns_sanitized);
3676 if (fault)
1e270976
MKL
3677 return -EFAULT;
3678 }
3679
675fc275
JK
3680 if (bpf_prog_is_dev_bound(prog->aux)) {
3681 err = bpf_prog_offload_info_fill(&info, prog);
3682 if (err)
3683 return err;
fcfb126d
JW
3684 goto done;
3685 }
3686
3687 /* NOTE: the following code is supposed to be skipped for offload.
3688 * bpf_prog_offload_info_fill() is the place to fill similar fields
3689 * for offload.
3690 */
3691 ulen = info.jited_prog_len;
4d56a76e
SD
3692 if (prog->aux->func_cnt) {
3693 u32 i;
3694
3695 info.jited_prog_len = 0;
3696 for (i = 0; i < prog->aux->func_cnt; i++)
3697 info.jited_prog_len += prog->aux->func[i]->jited_len;
3698 } else {
3699 info.jited_prog_len = prog->jited_len;
3700 }
3701
fcfb126d 3702 if (info.jited_prog_len && ulen) {
63960260 3703 if (bpf_dump_raw_ok(file->f_cred)) {
fcfb126d
JW
3704 uinsns = u64_to_user_ptr(info.jited_prog_insns);
3705 ulen = min_t(u32, info.jited_prog_len, ulen);
4d56a76e
SD
3706
3707 /* for multi-function programs, copy the JITed
3708 * instructions for all the functions
3709 */
3710 if (prog->aux->func_cnt) {
3711 u32 len, free, i;
3712 u8 *img;
3713
3714 free = ulen;
3715 for (i = 0; i < prog->aux->func_cnt; i++) {
3716 len = prog->aux->func[i]->jited_len;
3717 len = min_t(u32, len, free);
3718 img = (u8 *) prog->aux->func[i]->bpf_func;
3719 if (copy_to_user(uinsns, img, len))
3720 return -EFAULT;
3721 uinsns += len;
3722 free -= len;
3723 if (!free)
3724 break;
3725 }
3726 } else {
3727 if (copy_to_user(uinsns, prog->bpf_func, ulen))
3728 return -EFAULT;
3729 }
fcfb126d
JW
3730 } else {
3731 info.jited_prog_insns = 0;
3732 }
675fc275
JK
3733 }
3734
dbecd738 3735 ulen = info.nr_jited_ksyms;
ff1889fc 3736 info.nr_jited_ksyms = prog->aux->func_cnt ? : 1;
7a5725dd 3737 if (ulen) {
63960260 3738 if (bpf_dump_raw_ok(file->f_cred)) {
ff1889fc 3739 unsigned long ksym_addr;
dbecd738 3740 u64 __user *user_ksyms;
dbecd738
SD
3741 u32 i;
3742
3743 /* copy the address of the kernel symbol
3744 * corresponding to each function
3745 */
3746 ulen = min_t(u32, info.nr_jited_ksyms, ulen);
3747 user_ksyms = u64_to_user_ptr(info.jited_ksyms);
ff1889fc
SL
3748 if (prog->aux->func_cnt) {
3749 for (i = 0; i < ulen; i++) {
3750 ksym_addr = (unsigned long)
3751 prog->aux->func[i]->bpf_func;
3752 if (put_user((u64) ksym_addr,
3753 &user_ksyms[i]))
3754 return -EFAULT;
3755 }
3756 } else {
3757 ksym_addr = (unsigned long) prog->bpf_func;
3758 if (put_user((u64) ksym_addr, &user_ksyms[0]))
dbecd738
SD
3759 return -EFAULT;
3760 }
3761 } else {
3762 info.jited_ksyms = 0;
3763 }
3764 }
3765
815581c1 3766 ulen = info.nr_jited_func_lens;
ff1889fc 3767 info.nr_jited_func_lens = prog->aux->func_cnt ? : 1;
7a5725dd 3768 if (ulen) {
63960260 3769 if (bpf_dump_raw_ok(file->f_cred)) {
815581c1
SD
3770 u32 __user *user_lens;
3771 u32 func_len, i;
3772
3773 /* copy the JITed image lengths for each function */
3774 ulen = min_t(u32, info.nr_jited_func_lens, ulen);
3775 user_lens = u64_to_user_ptr(info.jited_func_lens);
ff1889fc
SL
3776 if (prog->aux->func_cnt) {
3777 for (i = 0; i < ulen; i++) {
3778 func_len =
3779 prog->aux->func[i]->jited_len;
3780 if (put_user(func_len, &user_lens[i]))
3781 return -EFAULT;
3782 }
3783 } else {
3784 func_len = prog->jited_len;
3785 if (put_user(func_len, &user_lens[0]))
815581c1
SD
3786 return -EFAULT;
3787 }
3788 } else {
3789 info.jited_func_lens = 0;
3790 }
3791 }
3792
7337224f 3793 if (prog->aux->btf)
22dc4a0f 3794 info.btf_id = btf_obj_id(prog->aux->btf);
838e9690 3795
11d8b82d
YS
3796 ulen = info.nr_func_info;
3797 info.nr_func_info = prog->aux->func_info_cnt;
3798 if (info.nr_func_info && ulen) {
9e794163 3799 char __user *user_finfo;
7337224f 3800
9e794163
MKL
3801 user_finfo = u64_to_user_ptr(info.func_info);
3802 ulen = min_t(u32, info.nr_func_info, ulen);
3803 if (copy_to_user(user_finfo, prog->aux->func_info,
3804 info.func_info_rec_size * ulen))
3805 return -EFAULT;
838e9690
YS
3806 }
3807
11d8b82d
YS
3808 ulen = info.nr_line_info;
3809 info.nr_line_info = prog->aux->nr_linfo;
3810 if (info.nr_line_info && ulen) {
9e794163 3811 __u8 __user *user_linfo;
c454a46b 3812
9e794163
MKL
3813 user_linfo = u64_to_user_ptr(info.line_info);
3814 ulen = min_t(u32, info.nr_line_info, ulen);
3815 if (copy_to_user(user_linfo, prog->aux->linfo,
3816 info.line_info_rec_size * ulen))
3817 return -EFAULT;
c454a46b
MKL
3818 }
3819
11d8b82d 3820 ulen = info.nr_jited_line_info;
c454a46b 3821 if (prog->aux->jited_linfo)
11d8b82d 3822 info.nr_jited_line_info = prog->aux->nr_linfo;
c454a46b 3823 else
11d8b82d
YS
3824 info.nr_jited_line_info = 0;
3825 if (info.nr_jited_line_info && ulen) {
63960260 3826 if (bpf_dump_raw_ok(file->f_cred)) {
c454a46b
MKL
3827 __u64 __user *user_linfo;
3828 u32 i;
3829
3830 user_linfo = u64_to_user_ptr(info.jited_line_info);
11d8b82d 3831 ulen = min_t(u32, info.nr_jited_line_info, ulen);
c454a46b
MKL
3832 for (i = 0; i < ulen; i++) {
3833 if (put_user((__u64)(long)prog->aux->jited_linfo[i],
3834 &user_linfo[i]))
3835 return -EFAULT;
3836 }
3837 } else {
3838 info.jited_line_info = 0;
3839 }
3840 }
3841
c872bdb3
SL
3842 ulen = info.nr_prog_tags;
3843 info.nr_prog_tags = prog->aux->func_cnt ? : 1;
3844 if (ulen) {
3845 __u8 __user (*user_prog_tags)[BPF_TAG_SIZE];
3846 u32 i;
3847
3848 user_prog_tags = u64_to_user_ptr(info.prog_tags);
3849 ulen = min_t(u32, info.nr_prog_tags, ulen);
3850 if (prog->aux->func_cnt) {
3851 for (i = 0; i < ulen; i++) {
3852 if (copy_to_user(user_prog_tags[i],
3853 prog->aux->func[i]->tag,
3854 BPF_TAG_SIZE))
3855 return -EFAULT;
3856 }
3857 } else {
3858 if (copy_to_user(user_prog_tags[0],
3859 prog->tag, BPF_TAG_SIZE))
3860 return -EFAULT;
3861 }
3862 }
3863
1e270976
MKL
3864done:
3865 if (copy_to_user(uinfo, &info, info_len) ||
3866 put_user(info_len, &uattr->info.info_len))
3867 return -EFAULT;
3868
3869 return 0;
3870}
3871
63960260
KC
3872static int bpf_map_get_info_by_fd(struct file *file,
3873 struct bpf_map *map,
1e270976
MKL
3874 const union bpf_attr *attr,
3875 union bpf_attr __user *uattr)
3876{
3877 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
5c6f2588 3878 struct bpf_map_info info;
1e270976
MKL
3879 u32 info_len = attr->info.info_len;
3880 int err;
3881
af2ac3e1 3882 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len);
1e270976
MKL
3883 if (err)
3884 return err;
3885 info_len = min_t(u32, sizeof(info), info_len);
3886
5c6f2588 3887 memset(&info, 0, sizeof(info));
1e270976
MKL
3888 info.type = map->map_type;
3889 info.id = map->id;
3890 info.key_size = map->key_size;
3891 info.value_size = map->value_size;
3892 info.max_entries = map->max_entries;
3893 info.map_flags = map->map_flags;
ad5b177b 3894 memcpy(info.name, map->name, sizeof(map->name));
1e270976 3895
78958fca 3896 if (map->btf) {
22dc4a0f 3897 info.btf_id = btf_obj_id(map->btf);
9b2cf328
MKL
3898 info.btf_key_type_id = map->btf_key_type_id;
3899 info.btf_value_type_id = map->btf_value_type_id;
78958fca 3900 }
85d33df3 3901 info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
78958fca 3902
52775b33
JK
3903 if (bpf_map_is_dev_bound(map)) {
3904 err = bpf_map_offload_info_fill(&info, map);
3905 if (err)
3906 return err;
3907 }
3908
1e270976
MKL
3909 if (copy_to_user(uinfo, &info, info_len) ||
3910 put_user(info_len, &uattr->info.info_len))
3911 return -EFAULT;
3912
3913 return 0;
3914}
3915
63960260
KC
3916static int bpf_btf_get_info_by_fd(struct file *file,
3917 struct btf *btf,
62dab84c
MKL
3918 const union bpf_attr *attr,
3919 union bpf_attr __user *uattr)
3920{
3921 struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info);
3922 u32 info_len = attr->info.info_len;
3923 int err;
3924
af2ac3e1 3925 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len);
62dab84c
MKL
3926 if (err)
3927 return err;
3928
3929 return btf_get_info_by_fd(btf, attr, uattr);
3930}
3931
63960260
KC
3932static int bpf_link_get_info_by_fd(struct file *file,
3933 struct bpf_link *link,
f2e10bff
AN
3934 const union bpf_attr *attr,
3935 union bpf_attr __user *uattr)
3936{
3937 struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info);
3938 struct bpf_link_info info;
3939 u32 info_len = attr->info.info_len;
3940 int err;
3941
af2ac3e1 3942 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len);
f2e10bff
AN
3943 if (err)
3944 return err;
3945 info_len = min_t(u32, sizeof(info), info_len);
3946
3947 memset(&info, 0, sizeof(info));
3948 if (copy_from_user(&info, uinfo, info_len))
3949 return -EFAULT;
3950
3951 info.type = link->type;
3952 info.id = link->id;
3953 info.prog_id = link->prog->aux->id;
3954
3955 if (link->ops->fill_link_info) {
3956 err = link->ops->fill_link_info(link, &info);
3957 if (err)
3958 return err;
3959 }
3960
3961 if (copy_to_user(uinfo, &info, info_len) ||
3962 put_user(info_len, &uattr->info.info_len))
3963 return -EFAULT;
3964
3965 return 0;
3966}
3967
3968
1e270976
MKL
3969#define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
3970
3971static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
3972 union bpf_attr __user *uattr)
3973{
3974 int ufd = attr->info.bpf_fd;
3975 struct fd f;
3976 int err;
3977
3978 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD))
3979 return -EINVAL;
3980
3981 f = fdget(ufd);
3982 if (!f.file)
3983 return -EBADFD;
3984
3985 if (f.file->f_op == &bpf_prog_fops)
63960260 3986 err = bpf_prog_get_info_by_fd(f.file, f.file->private_data, attr,
1e270976
MKL
3987 uattr);
3988 else if (f.file->f_op == &bpf_map_fops)
63960260 3989 err = bpf_map_get_info_by_fd(f.file, f.file->private_data, attr,
1e270976 3990 uattr);
60197cfb 3991 else if (f.file->f_op == &btf_fops)
63960260 3992 err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr);
f2e10bff 3993 else if (f.file->f_op == &bpf_link_fops)
63960260 3994 err = bpf_link_get_info_by_fd(f.file, f.file->private_data,
f2e10bff 3995 attr, uattr);
1e270976
MKL
3996 else
3997 err = -EINVAL;
3998
3999 fdput(f);
4000 return err;
4001}
4002
f56a653c
MKL
4003#define BPF_BTF_LOAD_LAST_FIELD btf_log_level
4004
c571bd75 4005static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr)
f56a653c
MKL
4006{
4007 if (CHECK_ATTR(BPF_BTF_LOAD))
4008 return -EINVAL;
4009
2c78ee89 4010 if (!bpf_capable())
f56a653c
MKL
4011 return -EPERM;
4012
c571bd75 4013 return btf_new_fd(attr, uattr);
f56a653c
MKL
4014}
4015
78958fca
MKL
4016#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id
4017
4018static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
4019{
4020 if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID))
4021 return -EINVAL;
4022
4023 if (!capable(CAP_SYS_ADMIN))
4024 return -EPERM;
4025
4026 return btf_get_fd_by_id(attr->btf_id);
4027}
4028
41bdc4b4
YS
4029static int bpf_task_fd_query_copy(const union bpf_attr *attr,
4030 union bpf_attr __user *uattr,
4031 u32 prog_id, u32 fd_type,
4032 const char *buf, u64 probe_offset,
4033 u64 probe_addr)
4034{
4035 char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
4036 u32 len = buf ? strlen(buf) : 0, input_len;
4037 int err = 0;
4038
4039 if (put_user(len, &uattr->task_fd_query.buf_len))
4040 return -EFAULT;
4041 input_len = attr->task_fd_query.buf_len;
4042 if (input_len && ubuf) {
4043 if (!len) {
4044 /* nothing to copy, just make ubuf NULL terminated */
4045 char zero = '\0';
4046
4047 if (put_user(zero, ubuf))
4048 return -EFAULT;
4049 } else if (input_len >= len + 1) {
4050 /* ubuf can hold the string with NULL terminator */
4051 if (copy_to_user(ubuf, buf, len + 1))
4052 return -EFAULT;
4053 } else {
4054 /* ubuf cannot hold the string with NULL terminator,
4055 * do a partial copy with NULL terminator.
4056 */
4057 char zero = '\0';
4058
4059 err = -ENOSPC;
4060 if (copy_to_user(ubuf, buf, input_len - 1))
4061 return -EFAULT;
4062 if (put_user(zero, ubuf + input_len - 1))
4063 return -EFAULT;
4064 }
4065 }
4066
4067 if (put_user(prog_id, &uattr->task_fd_query.prog_id) ||
4068 put_user(fd_type, &uattr->task_fd_query.fd_type) ||
4069 put_user(probe_offset, &uattr->task_fd_query.probe_offset) ||
4070 put_user(probe_addr, &uattr->task_fd_query.probe_addr))
4071 return -EFAULT;
4072
4073 return err;
4074}
4075
4076#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
4077
4078static int bpf_task_fd_query(const union bpf_attr *attr,
4079 union bpf_attr __user *uattr)
4080{
4081 pid_t pid = attr->task_fd_query.pid;
4082 u32 fd = attr->task_fd_query.fd;
4083 const struct perf_event *event;
41bdc4b4
YS
4084 struct task_struct *task;
4085 struct file *file;
4086 int err;
4087
4088 if (CHECK_ATTR(BPF_TASK_FD_QUERY))
4089 return -EINVAL;
4090
4091 if (!capable(CAP_SYS_ADMIN))
4092 return -EPERM;
4093
4094 if (attr->task_fd_query.flags != 0)
4095 return -EINVAL;
4096
4097 task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
4098 if (!task)
4099 return -ENOENT;
4100
41bdc4b4 4101 err = 0;
b48845af
EB
4102 file = fget_task(task, fd);
4103 put_task_struct(task);
41bdc4b4 4104 if (!file)
b48845af 4105 return -EBADF;
41bdc4b4 4106
70ed506c
AN
4107 if (file->f_op == &bpf_link_fops) {
4108 struct bpf_link *link = file->private_data;
41bdc4b4 4109
a3b80e10 4110 if (link->ops == &bpf_raw_tp_link_lops) {
70ed506c
AN
4111 struct bpf_raw_tp_link *raw_tp =
4112 container_of(link, struct bpf_raw_tp_link, link);
4113 struct bpf_raw_event_map *btp = raw_tp->btp;
4114
4115 err = bpf_task_fd_query_copy(attr, uattr,
4116 raw_tp->link.prog->aux->id,
4117 BPF_FD_TYPE_RAW_TRACEPOINT,
4118 btp->tp->name, 0, 0);
4119 goto put_file;
4120 }
4121 goto out_not_supp;
41bdc4b4
YS
4122 }
4123
4124 event = perf_get_event(file);
4125 if (!IS_ERR(event)) {
4126 u64 probe_offset, probe_addr;
4127 u32 prog_id, fd_type;
4128 const char *buf;
4129
4130 err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
4131 &buf, &probe_offset,
4132 &probe_addr);
4133 if (!err)
4134 err = bpf_task_fd_query_copy(attr, uattr, prog_id,
4135 fd_type, buf,
4136 probe_offset,
4137 probe_addr);
4138 goto put_file;
4139 }
4140
70ed506c 4141out_not_supp:
41bdc4b4
YS
4142 err = -ENOTSUPP;
4143put_file:
4144 fput(file);
41bdc4b4
YS
4145 return err;
4146}
4147
cb4d03ab
BV
4148#define BPF_MAP_BATCH_LAST_FIELD batch.flags
4149
4150#define BPF_DO_BATCH(fn) \
4151 do { \
4152 if (!fn) { \
4153 err = -ENOTSUPP; \
4154 goto err_put; \
4155 } \
4156 err = fn(map, attr, uattr); \
4157 } while (0)
4158
4159static int bpf_map_do_batch(const union bpf_attr *attr,
4160 union bpf_attr __user *uattr,
4161 int cmd)
4162{
e887677d
DB
4163 bool has_read = cmd == BPF_MAP_LOOKUP_BATCH ||
4164 cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH;
4165 bool has_write = cmd != BPF_MAP_LOOKUP_BATCH;
cb4d03ab
BV
4166 struct bpf_map *map;
4167 int err, ufd;
4168 struct fd f;
4169
4170 if (CHECK_ATTR(BPF_MAP_BATCH))
4171 return -EINVAL;
4172
4173 ufd = attr->batch.map_fd;
4174 f = fdget(ufd);
4175 map = __bpf_map_get(f);
4176 if (IS_ERR(map))
4177 return PTR_ERR(map);
e887677d
DB
4178 if (has_write)
4179 bpf_map_write_active_inc(map);
4180 if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
cb4d03ab
BV
4181 err = -EPERM;
4182 goto err_put;
4183 }
e887677d 4184 if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
cb4d03ab
BV
4185 err = -EPERM;
4186 goto err_put;
4187 }
4188
4189 if (cmd == BPF_MAP_LOOKUP_BATCH)
4190 BPF_DO_BATCH(map->ops->map_lookup_batch);
05799638
YS
4191 else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH)
4192 BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch);
aa2e93b8
BV
4193 else if (cmd == BPF_MAP_UPDATE_BATCH)
4194 BPF_DO_BATCH(map->ops->map_update_batch);
4195 else
4196 BPF_DO_BATCH(map->ops->map_delete_batch);
cb4d03ab 4197err_put:
e887677d
DB
4198 if (has_write)
4199 bpf_map_write_active_dec(map);
cb4d03ab
BV
4200 fdput(f);
4201 return err;
4202}
4203
af2ac3e1
AS
4204static int tracing_bpf_link_attach(const union bpf_attr *attr, bpfptr_t uattr,
4205 struct bpf_prog *prog)
de4e05ca 4206{
4a1e7c0c
THJ
4207 if (attr->link_create.attach_type != prog->expected_attach_type)
4208 return -EINVAL;
de4e05ca 4209
4a1e7c0c 4210 if (prog->expected_attach_type == BPF_TRACE_ITER)
af2ac3e1 4211 return bpf_iter_link_attach(attr, uattr, prog);
4a1e7c0c
THJ
4212 else if (prog->type == BPF_PROG_TYPE_EXT)
4213 return bpf_tracing_prog_attach(prog,
4214 attr->link_create.target_fd,
4215 attr->link_create.target_btf_id);
de4e05ca
YS
4216 return -EINVAL;
4217}
4218
5e7b3020 4219#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len
af2ac3e1 4220static int link_create(union bpf_attr *attr, bpfptr_t uattr)
af6eea57
AN
4221{
4222 enum bpf_prog_type ptype;
4223 struct bpf_prog *prog;
4224 int ret;
4225
af6eea57
AN
4226 if (CHECK_ATTR(BPF_LINK_CREATE))
4227 return -EINVAL;
4228
4a1e7c0c 4229 prog = bpf_prog_get(attr->link_create.prog_fd);
af6eea57
AN
4230 if (IS_ERR(prog))
4231 return PTR_ERR(prog);
4232
4233 ret = bpf_prog_attach_check_attach_type(prog,
4234 attr->link_create.attach_type);
4235 if (ret)
4a1e7c0c
THJ
4236 goto out;
4237
b89fbfbb
AN
4238 switch (prog->type) {
4239 case BPF_PROG_TYPE_EXT:
af2ac3e1 4240 ret = tracing_bpf_link_attach(attr, uattr, prog);
4a1e7c0c 4241 goto out;
b89fbfbb
AN
4242 case BPF_PROG_TYPE_PERF_EVENT:
4243 case BPF_PROG_TYPE_KPROBE:
4244 case BPF_PROG_TYPE_TRACEPOINT:
4245 if (attr->link_create.attach_type != BPF_PERF_EVENT) {
4246 ret = -EINVAL;
4247 goto out;
4248 }
4249 ptype = prog->type;
4250 break;
4251 default:
4252 ptype = attach_type_to_prog_type(attr->link_create.attach_type);
4253 if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
4254 ret = -EINVAL;
4255 goto out;
4256 }
4257 break;
4a1e7c0c 4258 }
af6eea57
AN
4259
4260 switch (ptype) {
4261 case BPF_PROG_TYPE_CGROUP_SKB:
4262 case BPF_PROG_TYPE_CGROUP_SOCK:
4263 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
4264 case BPF_PROG_TYPE_SOCK_OPS:
4265 case BPF_PROG_TYPE_CGROUP_DEVICE:
4266 case BPF_PROG_TYPE_CGROUP_SYSCTL:
4267 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
4268 ret = cgroup_bpf_link_attach(attr, prog);
4269 break;
de4e05ca 4270 case BPF_PROG_TYPE_TRACING:
af2ac3e1 4271 ret = tracing_bpf_link_attach(attr, uattr, prog);
de4e05ca 4272 break;
7f045a49 4273 case BPF_PROG_TYPE_FLOW_DISSECTOR:
e9ddbb77 4274 case BPF_PROG_TYPE_SK_LOOKUP:
7f045a49
JS
4275 ret = netns_bpf_link_create(attr, prog);
4276 break;
310ad797 4277#ifdef CONFIG_NET
aa8d3a71
AN
4278 case BPF_PROG_TYPE_XDP:
4279 ret = bpf_xdp_link_attach(attr, prog);
4280 break;
b89fbfbb
AN
4281#endif
4282#ifdef CONFIG_PERF_EVENTS
4283 case BPF_PROG_TYPE_PERF_EVENT:
4284 case BPF_PROG_TYPE_TRACEPOINT:
4285 case BPF_PROG_TYPE_KPROBE:
4286 ret = bpf_perf_link_attach(attr, prog);
4287 break;
310ad797 4288#endif
af6eea57
AN
4289 default:
4290 ret = -EINVAL;
4291 }
4292
4a1e7c0c 4293out:
af6eea57
AN
4294 if (ret < 0)
4295 bpf_prog_put(prog);
4296 return ret;
4297}
4298
0c991ebc
AN
4299#define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd
4300
4301static int link_update(union bpf_attr *attr)
4302{
4303 struct bpf_prog *old_prog = NULL, *new_prog;
4304 struct bpf_link *link;
4305 u32 flags;
4306 int ret;
4307
0c991ebc
AN
4308 if (CHECK_ATTR(BPF_LINK_UPDATE))
4309 return -EINVAL;
4310
4311 flags = attr->link_update.flags;
4312 if (flags & ~BPF_F_REPLACE)
4313 return -EINVAL;
4314
4315 link = bpf_link_get_from_fd(attr->link_update.link_fd);
4316 if (IS_ERR(link))
4317 return PTR_ERR(link);
4318
4319 new_prog = bpf_prog_get(attr->link_update.new_prog_fd);
4adb7a4a
AN
4320 if (IS_ERR(new_prog)) {
4321 ret = PTR_ERR(new_prog);
4322 goto out_put_link;
4323 }
0c991ebc
AN
4324
4325 if (flags & BPF_F_REPLACE) {
4326 old_prog = bpf_prog_get(attr->link_update.old_prog_fd);
4327 if (IS_ERR(old_prog)) {
4328 ret = PTR_ERR(old_prog);
4329 old_prog = NULL;
4330 goto out_put_progs;
4331 }
4adb7a4a
AN
4332 } else if (attr->link_update.old_prog_fd) {
4333 ret = -EINVAL;
4334 goto out_put_progs;
0c991ebc
AN
4335 }
4336
f9d04127
AN
4337 if (link->ops->update_prog)
4338 ret = link->ops->update_prog(link, new_prog, old_prog);
4339 else
fe537393 4340 ret = -EINVAL;
0c991ebc
AN
4341
4342out_put_progs:
4343 if (old_prog)
4344 bpf_prog_put(old_prog);
4345 if (ret)
4346 bpf_prog_put(new_prog);
4adb7a4a
AN
4347out_put_link:
4348 bpf_link_put(link);
0c991ebc
AN
4349 return ret;
4350}
4351
73b11c2a
AN
4352#define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd
4353
4354static int link_detach(union bpf_attr *attr)
4355{
4356 struct bpf_link *link;
4357 int ret;
4358
4359 if (CHECK_ATTR(BPF_LINK_DETACH))
4360 return -EINVAL;
4361
4362 link = bpf_link_get_from_fd(attr->link_detach.link_fd);
4363 if (IS_ERR(link))
4364 return PTR_ERR(link);
4365
4366 if (link->ops->detach)
4367 ret = link->ops->detach(link);
4368 else
4369 ret = -EOPNOTSUPP;
4370
4371 bpf_link_put(link);
4372 return ret;
4373}
4374
005142b8 4375static struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link)
2d602c8c 4376{
005142b8 4377 return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT);
2d602c8c
AN
4378}
4379
005142b8 4380struct bpf_link *bpf_link_by_id(u32 id)
2d602c8c
AN
4381{
4382 struct bpf_link *link;
2d602c8c 4383
005142b8
AS
4384 if (!id)
4385 return ERR_PTR(-ENOENT);
2d602c8c
AN
4386
4387 spin_lock_bh(&link_idr_lock);
2d602c8c 4388 /* before link is "settled", ID is 0, pretend it doesn't exist yet */
005142b8 4389 link = idr_find(&link_idr, id);
2d602c8c
AN
4390 if (link) {
4391 if (link->id)
005142b8 4392 link = bpf_link_inc_not_zero(link);
2d602c8c 4393 else
005142b8 4394 link = ERR_PTR(-EAGAIN);
2d602c8c 4395 } else {
005142b8 4396 link = ERR_PTR(-ENOENT);
2d602c8c
AN
4397 }
4398 spin_unlock_bh(&link_idr_lock);
005142b8
AS
4399 return link;
4400}
2d602c8c 4401
005142b8
AS
4402#define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id
4403
4404static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
4405{
4406 struct bpf_link *link;
4407 u32 id = attr->link_id;
4408 int fd;
4409
4410 if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID))
4411 return -EINVAL;
4412
4413 if (!capable(CAP_SYS_ADMIN))
4414 return -EPERM;
4415
4416 link = bpf_link_by_id(id);
4417 if (IS_ERR(link))
4418 return PTR_ERR(link);
2d602c8c
AN
4419
4420 fd = bpf_link_new_fd(link);
4421 if (fd < 0)
4422 bpf_link_put(link);
4423
4424 return fd;
4425}
4426
d46edd67
SL
4427DEFINE_MUTEX(bpf_stats_enabled_mutex);
4428
4429static int bpf_stats_release(struct inode *inode, struct file *file)
4430{
4431 mutex_lock(&bpf_stats_enabled_mutex);
4432 static_key_slow_dec(&bpf_stats_enabled_key.key);
4433 mutex_unlock(&bpf_stats_enabled_mutex);
4434 return 0;
4435}
4436
4437static const struct file_operations bpf_stats_fops = {
4438 .release = bpf_stats_release,
4439};
4440
4441static int bpf_enable_runtime_stats(void)
4442{
4443 int fd;
4444
4445 mutex_lock(&bpf_stats_enabled_mutex);
4446
4447 /* Set a very high limit to avoid overflow */
4448 if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) {
4449 mutex_unlock(&bpf_stats_enabled_mutex);
4450 return -EBUSY;
4451 }
4452
4453 fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC);
4454 if (fd >= 0)
4455 static_key_slow_inc(&bpf_stats_enabled_key.key);
4456
4457 mutex_unlock(&bpf_stats_enabled_mutex);
4458 return fd;
4459}
4460
4461#define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type
4462
4463static int bpf_enable_stats(union bpf_attr *attr)
4464{
4465
4466 if (CHECK_ATTR(BPF_ENABLE_STATS))
4467 return -EINVAL;
4468
4469 if (!capable(CAP_SYS_ADMIN))
4470 return -EPERM;
4471
4472 switch (attr->enable_stats.type) {
4473 case BPF_STATS_RUN_TIME:
4474 return bpf_enable_runtime_stats();
4475 default:
4476 break;
4477 }
4478 return -EINVAL;
4479}
4480
ac51d99b
YS
4481#define BPF_ITER_CREATE_LAST_FIELD iter_create.flags
4482
4483static int bpf_iter_create(union bpf_attr *attr)
4484{
4485 struct bpf_link *link;
4486 int err;
4487
4488 if (CHECK_ATTR(BPF_ITER_CREATE))
4489 return -EINVAL;
4490
4491 if (attr->iter_create.flags)
4492 return -EINVAL;
4493
4494 link = bpf_link_get_from_fd(attr->iter_create.link_fd);
4495 if (IS_ERR(link))
4496 return PTR_ERR(link);
4497
4498 err = bpf_iter_new_fd(link);
4499 bpf_link_put(link);
4500
4501 return err;
4502}
4503
ef15314a
YZ
4504#define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags
4505
4506static int bpf_prog_bind_map(union bpf_attr *attr)
4507{
4508 struct bpf_prog *prog;
4509 struct bpf_map *map;
4510 struct bpf_map **used_maps_old, **used_maps_new;
4511 int i, ret = 0;
4512
4513 if (CHECK_ATTR(BPF_PROG_BIND_MAP))
4514 return -EINVAL;
4515
4516 if (attr->prog_bind_map.flags)
4517 return -EINVAL;
4518
4519 prog = bpf_prog_get(attr->prog_bind_map.prog_fd);
4520 if (IS_ERR(prog))
4521 return PTR_ERR(prog);
4522
4523 map = bpf_map_get(attr->prog_bind_map.map_fd);
4524 if (IS_ERR(map)) {
4525 ret = PTR_ERR(map);
4526 goto out_prog_put;
4527 }
4528
4529 mutex_lock(&prog->aux->used_maps_mutex);
4530
4531 used_maps_old = prog->aux->used_maps;
4532
4533 for (i = 0; i < prog->aux->used_map_cnt; i++)
1028ae40
SF
4534 if (used_maps_old[i] == map) {
4535 bpf_map_put(map);
ef15314a 4536 goto out_unlock;
1028ae40 4537 }
ef15314a
YZ
4538
4539 used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1,
4540 sizeof(used_maps_new[0]),
4541 GFP_KERNEL);
4542 if (!used_maps_new) {
4543 ret = -ENOMEM;
4544 goto out_unlock;
4545 }
4546
4547 memcpy(used_maps_new, used_maps_old,
4548 sizeof(used_maps_old[0]) * prog->aux->used_map_cnt);
4549 used_maps_new[prog->aux->used_map_cnt] = map;
4550
4551 prog->aux->used_map_cnt++;
4552 prog->aux->used_maps = used_maps_new;
4553
4554 kfree(used_maps_old);
4555
4556out_unlock:
4557 mutex_unlock(&prog->aux->used_maps_mutex);
4558
4559 if (ret)
4560 bpf_map_put(map);
4561out_prog_put:
4562 bpf_prog_put(prog);
4563 return ret;
4564}
4565
af2ac3e1 4566static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
99c55f7d 4567{
8096f229 4568 union bpf_attr attr;
99c55f7d
AS
4569 int err;
4570
2c78ee89 4571 if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
99c55f7d
AS
4572 return -EPERM;
4573
dcab51f1 4574 err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
1e270976
MKL
4575 if (err)
4576 return err;
4577 size = min_t(u32, size, sizeof(attr));
99c55f7d
AS
4578
4579 /* copy attributes from user space, may be less than sizeof(bpf_attr) */
8096f229 4580 memset(&attr, 0, sizeof(attr));
af2ac3e1 4581 if (copy_from_bpfptr(&attr, uattr, size) != 0)
99c55f7d
AS
4582 return -EFAULT;
4583
afdb09c7
CF
4584 err = security_bpf(cmd, &attr, size);
4585 if (err < 0)
4586 return err;
4587
99c55f7d
AS
4588 switch (cmd) {
4589 case BPF_MAP_CREATE:
4590 err = map_create(&attr);
4591 break;
db20fd2b
AS
4592 case BPF_MAP_LOOKUP_ELEM:
4593 err = map_lookup_elem(&attr);
4594 break;
4595 case BPF_MAP_UPDATE_ELEM:
af2ac3e1 4596 err = map_update_elem(&attr, uattr);
db20fd2b
AS
4597 break;
4598 case BPF_MAP_DELETE_ELEM:
4599 err = map_delete_elem(&attr);
4600 break;
4601 case BPF_MAP_GET_NEXT_KEY:
4602 err = map_get_next_key(&attr);
4603 break;
87df15de
DB
4604 case BPF_MAP_FREEZE:
4605 err = map_freeze(&attr);
4606 break;
09756af4 4607 case BPF_PROG_LOAD:
838e9690 4608 err = bpf_prog_load(&attr, uattr);
09756af4 4609 break;
b2197755
DB
4610 case BPF_OBJ_PIN:
4611 err = bpf_obj_pin(&attr);
4612 break;
4613 case BPF_OBJ_GET:
4614 err = bpf_obj_get(&attr);
4615 break;
f4324551
DM
4616 case BPF_PROG_ATTACH:
4617 err = bpf_prog_attach(&attr);
4618 break;
4619 case BPF_PROG_DETACH:
4620 err = bpf_prog_detach(&attr);
4621 break;
468e2f64 4622 case BPF_PROG_QUERY:
af2ac3e1 4623 err = bpf_prog_query(&attr, uattr.user);
468e2f64 4624 break;
1cf1cae9 4625 case BPF_PROG_TEST_RUN:
af2ac3e1 4626 err = bpf_prog_test_run(&attr, uattr.user);
1cf1cae9 4627 break;
34ad5580 4628 case BPF_PROG_GET_NEXT_ID:
af2ac3e1 4629 err = bpf_obj_get_next_id(&attr, uattr.user,
34ad5580
MKL
4630 &prog_idr, &prog_idr_lock);
4631 break;
4632 case BPF_MAP_GET_NEXT_ID:
af2ac3e1 4633 err = bpf_obj_get_next_id(&attr, uattr.user,
34ad5580
MKL
4634 &map_idr, &map_idr_lock);
4635 break;
1b9ed84e 4636 case BPF_BTF_GET_NEXT_ID:
af2ac3e1 4637 err = bpf_obj_get_next_id(&attr, uattr.user,
1b9ed84e
QM
4638 &btf_idr, &btf_idr_lock);
4639 break;
b16d9aa4
MKL
4640 case BPF_PROG_GET_FD_BY_ID:
4641 err = bpf_prog_get_fd_by_id(&attr);
4642 break;
bd5f5f4e
MKL
4643 case BPF_MAP_GET_FD_BY_ID:
4644 err = bpf_map_get_fd_by_id(&attr);
4645 break;
1e270976 4646 case BPF_OBJ_GET_INFO_BY_FD:
af2ac3e1 4647 err = bpf_obj_get_info_by_fd(&attr, uattr.user);
1e270976 4648 break;
c4f6699d
AS
4649 case BPF_RAW_TRACEPOINT_OPEN:
4650 err = bpf_raw_tracepoint_open(&attr);
4651 break;
f56a653c 4652 case BPF_BTF_LOAD:
c571bd75 4653 err = bpf_btf_load(&attr, uattr);
f56a653c 4654 break;
78958fca
MKL
4655 case BPF_BTF_GET_FD_BY_ID:
4656 err = bpf_btf_get_fd_by_id(&attr);
4657 break;
41bdc4b4 4658 case BPF_TASK_FD_QUERY:
af2ac3e1 4659 err = bpf_task_fd_query(&attr, uattr.user);
41bdc4b4 4660 break;
bd513cd0
MV
4661 case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
4662 err = map_lookup_and_delete_elem(&attr);
4663 break;
cb4d03ab 4664 case BPF_MAP_LOOKUP_BATCH:
af2ac3e1 4665 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_LOOKUP_BATCH);
cb4d03ab 4666 break;
05799638 4667 case BPF_MAP_LOOKUP_AND_DELETE_BATCH:
af2ac3e1 4668 err = bpf_map_do_batch(&attr, uattr.user,
05799638
YS
4669 BPF_MAP_LOOKUP_AND_DELETE_BATCH);
4670 break;
aa2e93b8 4671 case BPF_MAP_UPDATE_BATCH:
af2ac3e1 4672 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_UPDATE_BATCH);
aa2e93b8
BV
4673 break;
4674 case BPF_MAP_DELETE_BATCH:
af2ac3e1 4675 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_DELETE_BATCH);
aa2e93b8 4676 break;
af6eea57 4677 case BPF_LINK_CREATE:
af2ac3e1 4678 err = link_create(&attr, uattr);
af6eea57 4679 break;
0c991ebc
AN
4680 case BPF_LINK_UPDATE:
4681 err = link_update(&attr);
4682 break;
2d602c8c
AN
4683 case BPF_LINK_GET_FD_BY_ID:
4684 err = bpf_link_get_fd_by_id(&attr);
4685 break;
4686 case BPF_LINK_GET_NEXT_ID:
af2ac3e1 4687 err = bpf_obj_get_next_id(&attr, uattr.user,
2d602c8c
AN
4688 &link_idr, &link_idr_lock);
4689 break;
d46edd67
SL
4690 case BPF_ENABLE_STATS:
4691 err = bpf_enable_stats(&attr);
4692 break;
ac51d99b
YS
4693 case BPF_ITER_CREATE:
4694 err = bpf_iter_create(&attr);
4695 break;
73b11c2a
AN
4696 case BPF_LINK_DETACH:
4697 err = link_detach(&attr);
4698 break;
ef15314a
YZ
4699 case BPF_PROG_BIND_MAP:
4700 err = bpf_prog_bind_map(&attr);
4701 break;
99c55f7d
AS
4702 default:
4703 err = -EINVAL;
4704 break;
4705 }
4706
4707 return err;
4708}
79a7f8bd 4709
af2ac3e1
AS
4710SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
4711{
4712 return __sys_bpf(cmd, USER_BPFPTR(uattr), size);
4713}
4714
79a7f8bd
AS
4715static bool syscall_prog_is_valid_access(int off, int size,
4716 enum bpf_access_type type,
4717 const struct bpf_prog *prog,
4718 struct bpf_insn_access_aux *info)
4719{
4720 if (off < 0 || off >= U16_MAX)
4721 return false;
4722 if (off % size != 0)
4723 return false;
4724 return true;
4725}
4726
4727BPF_CALL_3(bpf_sys_bpf, int, cmd, void *, attr, u32, attr_size)
4728{
af2ac3e1
AS
4729 switch (cmd) {
4730 case BPF_MAP_CREATE:
4731 case BPF_MAP_UPDATE_ELEM:
4732 case BPF_MAP_FREEZE:
4733 case BPF_PROG_LOAD:
c571bd75 4734 case BPF_BTF_LOAD:
af2ac3e1
AS
4735 break;
4736 /* case BPF_PROG_TEST_RUN:
4737 * is not part of this list to prevent recursive test_run
4738 */
4739 default:
4740 return -EINVAL;
4741 }
4742 return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size);
79a7f8bd
AS
4743}
4744
3a2daa72 4745static const struct bpf_func_proto bpf_sys_bpf_proto = {
79a7f8bd
AS
4746 .func = bpf_sys_bpf,
4747 .gpl_only = false,
4748 .ret_type = RET_INTEGER,
4749 .arg1_type = ARG_ANYTHING,
4750 .arg2_type = ARG_PTR_TO_MEM,
4751 .arg3_type = ARG_CONST_SIZE,
4752};
4753
4754const struct bpf_func_proto * __weak
4755tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
4756{
4757 return bpf_base_func_proto(func_id);
4758}
4759
3abea089
AS
4760BPF_CALL_1(bpf_sys_close, u32, fd)
4761{
4762 /* When bpf program calls this helper there should not be
4763 * an fdget() without matching completed fdput().
4764 * This helper is allowed in the following callchain only:
4765 * sys_bpf->prog_test_run->bpf_prog->bpf_sys_close
4766 */
4767 return close_fd(fd);
4768}
4769
3a2daa72 4770static const struct bpf_func_proto bpf_sys_close_proto = {
3abea089
AS
4771 .func = bpf_sys_close,
4772 .gpl_only = false,
4773 .ret_type = RET_INTEGER,
4774 .arg1_type = ARG_ANYTHING,
4775};
4776
79a7f8bd
AS
4777static const struct bpf_func_proto *
4778syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
4779{
4780 switch (func_id) {
4781 case BPF_FUNC_sys_bpf:
4782 return &bpf_sys_bpf_proto;
3d78417b
AS
4783 case BPF_FUNC_btf_find_by_name_kind:
4784 return &bpf_btf_find_by_name_kind_proto;
3abea089
AS
4785 case BPF_FUNC_sys_close:
4786 return &bpf_sys_close_proto;
79a7f8bd
AS
4787 default:
4788 return tracing_prog_func_proto(func_id, prog);
4789 }
4790}
4791
4792const struct bpf_verifier_ops bpf_syscall_verifier_ops = {
4793 .get_func_proto = syscall_prog_func_proto,
4794 .is_valid_access = syscall_prog_is_valid_access,
4795};
4796
4797const struct bpf_prog_ops bpf_syscall_prog_ops = {
4798 .test_run = bpf_prog_test_run_syscall,
4799};