]> git.proxmox.com Git - mirror_ubuntu-kernels.git/blame - kernel/bpf/syscall.c
Merge branch 'intel_pstate'
[mirror_ubuntu-kernels.git] / kernel / bpf / syscall.c
CommitLineData
99c55f7d
AS
1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
11 */
12#include <linux/bpf.h>
a67edbf4 13#include <linux/bpf_trace.h>
99c55f7d
AS
14#include <linux/syscalls.h>
15#include <linux/slab.h>
3f07c014 16#include <linux/sched/signal.h>
d407bd25
DB
17#include <linux/vmalloc.h>
18#include <linux/mmzone.h>
99c55f7d 19#include <linux/anon_inodes.h>
db20fd2b 20#include <linux/file.h>
09756af4
AS
21#include <linux/license.h>
22#include <linux/filter.h>
2541517c 23#include <linux/version.h>
535e7b4b 24#include <linux/kernel.h>
dc4bb0e2 25#include <linux/idr.h>
99c55f7d 26
14dc6f04
MKL
27#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
28 (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
29 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
30 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
31#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
32#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map))
33
b121d1e7 34DEFINE_PER_CPU(int, bpf_prog_active);
dc4bb0e2
MKL
35static DEFINE_IDR(prog_idr);
36static DEFINE_SPINLOCK(prog_idr_lock);
f3f1c054
MKL
37static DEFINE_IDR(map_idr);
38static DEFINE_SPINLOCK(map_idr_lock);
b121d1e7 39
1be7f75d
AS
40int sysctl_unprivileged_bpf_disabled __read_mostly;
41
40077e0c
JB
42static const struct bpf_map_ops * const bpf_map_types[] = {
43#define BPF_PROG_TYPE(_id, _ops)
44#define BPF_MAP_TYPE(_id, _ops) \
45 [_id] = &_ops,
46#include <linux/bpf_types.h>
47#undef BPF_PROG_TYPE
48#undef BPF_MAP_TYPE
49};
99c55f7d
AS
50
51static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
52{
99c55f7d
AS
53 struct bpf_map *map;
54
40077e0c
JB
55 if (attr->map_type >= ARRAY_SIZE(bpf_map_types) ||
56 !bpf_map_types[attr->map_type])
57 return ERR_PTR(-EINVAL);
99c55f7d 58
40077e0c
JB
59 map = bpf_map_types[attr->map_type]->map_alloc(attr);
60 if (IS_ERR(map))
61 return map;
62 map->ops = bpf_map_types[attr->map_type];
63 map->map_type = attr->map_type;
64 return map;
99c55f7d
AS
65}
66
d407bd25
DB
67void *bpf_map_area_alloc(size_t size)
68{
69 /* We definitely need __GFP_NORETRY, so OOM killer doesn't
70 * trigger under memory pressure as we really just want to
71 * fail instead.
72 */
73 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
74 void *area;
75
76 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
77 area = kmalloc(size, GFP_USER | flags);
78 if (area != NULL)
79 return area;
80 }
81
19809c2d 82 return __vmalloc(size, GFP_KERNEL | flags, PAGE_KERNEL);
d407bd25
DB
83}
84
85void bpf_map_area_free(void *area)
86{
87 kvfree(area);
88}
89
6c905981
AS
90int bpf_map_precharge_memlock(u32 pages)
91{
92 struct user_struct *user = get_current_user();
93 unsigned long memlock_limit, cur;
94
95 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
96 cur = atomic_long_read(&user->locked_vm);
97 free_uid(user);
98 if (cur + pages > memlock_limit)
99 return -EPERM;
100 return 0;
101}
102
aaac3ba9
AS
103static int bpf_map_charge_memlock(struct bpf_map *map)
104{
105 struct user_struct *user = get_current_user();
106 unsigned long memlock_limit;
107
108 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
109
110 atomic_long_add(map->pages, &user->locked_vm);
111
112 if (atomic_long_read(&user->locked_vm) > memlock_limit) {
113 atomic_long_sub(map->pages, &user->locked_vm);
114 free_uid(user);
115 return -EPERM;
116 }
117 map->user = user;
118 return 0;
119}
120
121static void bpf_map_uncharge_memlock(struct bpf_map *map)
122{
123 struct user_struct *user = map->user;
124
125 atomic_long_sub(map->pages, &user->locked_vm);
126 free_uid(user);
127}
128
f3f1c054
MKL
129static int bpf_map_alloc_id(struct bpf_map *map)
130{
131 int id;
132
133 spin_lock_bh(&map_idr_lock);
134 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC);
135 if (id > 0)
136 map->id = id;
137 spin_unlock_bh(&map_idr_lock);
138
139 if (WARN_ON_ONCE(!id))
140 return -ENOSPC;
141
142 return id > 0 ? 0 : id;
143}
144
bd5f5f4e 145static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
f3f1c054 146{
bd5f5f4e
MKL
147 if (do_idr_lock)
148 spin_lock_bh(&map_idr_lock);
149 else
150 __acquire(&map_idr_lock);
151
f3f1c054 152 idr_remove(&map_idr, map->id);
bd5f5f4e
MKL
153
154 if (do_idr_lock)
155 spin_unlock_bh(&map_idr_lock);
156 else
157 __release(&map_idr_lock);
f3f1c054
MKL
158}
159
99c55f7d
AS
160/* called from workqueue */
161static void bpf_map_free_deferred(struct work_struct *work)
162{
163 struct bpf_map *map = container_of(work, struct bpf_map, work);
164
aaac3ba9 165 bpf_map_uncharge_memlock(map);
99c55f7d
AS
166 /* implementation dependent freeing */
167 map->ops->map_free(map);
168}
169
c9da161c
DB
170static void bpf_map_put_uref(struct bpf_map *map)
171{
172 if (atomic_dec_and_test(&map->usercnt)) {
173 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
174 bpf_fd_array_map_clear(map);
175 }
176}
177
99c55f7d
AS
178/* decrement map refcnt and schedule it for freeing via workqueue
179 * (unrelying map implementation ops->map_free() might sleep)
180 */
bd5f5f4e 181static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
99c55f7d
AS
182{
183 if (atomic_dec_and_test(&map->refcnt)) {
34ad5580 184 /* bpf_map_free_id() must be called first */
bd5f5f4e 185 bpf_map_free_id(map, do_idr_lock);
99c55f7d
AS
186 INIT_WORK(&map->work, bpf_map_free_deferred);
187 schedule_work(&map->work);
188 }
189}
190
bd5f5f4e
MKL
191void bpf_map_put(struct bpf_map *map)
192{
193 __bpf_map_put(map, true);
194}
195
c9da161c 196void bpf_map_put_with_uref(struct bpf_map *map)
99c55f7d 197{
c9da161c 198 bpf_map_put_uref(map);
99c55f7d 199 bpf_map_put(map);
c9da161c
DB
200}
201
202static int bpf_map_release(struct inode *inode, struct file *filp)
203{
61d1b6a4
DB
204 struct bpf_map *map = filp->private_data;
205
206 if (map->ops->map_release)
207 map->ops->map_release(map, filp);
208
209 bpf_map_put_with_uref(map);
99c55f7d
AS
210 return 0;
211}
212
f99bf205
DB
213#ifdef CONFIG_PROC_FS
214static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
215{
216 const struct bpf_map *map = filp->private_data;
21116b70
DB
217 const struct bpf_array *array;
218 u32 owner_prog_type = 0;
9780c0ab 219 u32 owner_jited = 0;
21116b70
DB
220
221 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
222 array = container_of(map, struct bpf_array, map);
223 owner_prog_type = array->owner_prog_type;
9780c0ab 224 owner_jited = array->owner_jited;
21116b70 225 }
f99bf205
DB
226
227 seq_printf(m,
228 "map_type:\t%u\n"
229 "key_size:\t%u\n"
230 "value_size:\t%u\n"
322cea2f 231 "max_entries:\t%u\n"
21116b70
DB
232 "map_flags:\t%#x\n"
233 "memlock:\t%llu\n",
f99bf205
DB
234 map->map_type,
235 map->key_size,
236 map->value_size,
322cea2f 237 map->max_entries,
21116b70
DB
238 map->map_flags,
239 map->pages * 1ULL << PAGE_SHIFT);
240
9780c0ab 241 if (owner_prog_type) {
21116b70
DB
242 seq_printf(m, "owner_prog_type:\t%u\n",
243 owner_prog_type);
9780c0ab
DB
244 seq_printf(m, "owner_jited:\t%u\n",
245 owner_jited);
246 }
f99bf205
DB
247}
248#endif
249
99c55f7d 250static const struct file_operations bpf_map_fops = {
f99bf205
DB
251#ifdef CONFIG_PROC_FS
252 .show_fdinfo = bpf_map_show_fdinfo,
253#endif
254 .release = bpf_map_release,
99c55f7d
AS
255};
256
b2197755 257int bpf_map_new_fd(struct bpf_map *map)
aa79781b
DB
258{
259 return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
260 O_RDWR | O_CLOEXEC);
261}
262
99c55f7d
AS
263/* helper macro to check that unused fields 'union bpf_attr' are zero */
264#define CHECK_ATTR(CMD) \
265 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
266 sizeof(attr->CMD##_LAST_FIELD), 0, \
267 sizeof(*attr) - \
268 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
269 sizeof(attr->CMD##_LAST_FIELD)) != NULL
270
56f668df 271#define BPF_MAP_CREATE_LAST_FIELD inner_map_fd
99c55f7d
AS
272/* called via syscall */
273static int map_create(union bpf_attr *attr)
274{
275 struct bpf_map *map;
276 int err;
277
278 err = CHECK_ATTR(BPF_MAP_CREATE);
279 if (err)
280 return -EINVAL;
281
282 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
283 map = find_and_alloc_map(attr);
284 if (IS_ERR(map))
285 return PTR_ERR(map);
286
287 atomic_set(&map->refcnt, 1);
c9da161c 288 atomic_set(&map->usercnt, 1);
99c55f7d 289
aaac3ba9
AS
290 err = bpf_map_charge_memlock(map);
291 if (err)
20b2b24f 292 goto free_map_nouncharge;
aaac3ba9 293
f3f1c054
MKL
294 err = bpf_map_alloc_id(map);
295 if (err)
296 goto free_map;
297
aa79781b 298 err = bpf_map_new_fd(map);
bd5f5f4e
MKL
299 if (err < 0) {
300 /* failed to allocate fd.
301 * bpf_map_put() is needed because the above
302 * bpf_map_alloc_id() has published the map
303 * to the userspace and the userspace may
304 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID.
305 */
306 bpf_map_put(map);
307 return err;
308 }
99c55f7d 309
a67edbf4 310 trace_bpf_map_create(map, err);
99c55f7d
AS
311 return err;
312
313free_map:
20b2b24f
DB
314 bpf_map_uncharge_memlock(map);
315free_map_nouncharge:
99c55f7d
AS
316 map->ops->map_free(map);
317 return err;
318}
319
db20fd2b
AS
320/* if error is returned, fd is released.
321 * On success caller should complete fd access with matching fdput()
322 */
c2101297 323struct bpf_map *__bpf_map_get(struct fd f)
db20fd2b 324{
db20fd2b
AS
325 if (!f.file)
326 return ERR_PTR(-EBADF);
db20fd2b
AS
327 if (f.file->f_op != &bpf_map_fops) {
328 fdput(f);
329 return ERR_PTR(-EINVAL);
330 }
331
c2101297
DB
332 return f.file->private_data;
333}
334
92117d84
AS
335/* prog's and map's refcnt limit */
336#define BPF_MAX_REFCNT 32768
337
338struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
c9da161c 339{
92117d84
AS
340 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
341 atomic_dec(&map->refcnt);
342 return ERR_PTR(-EBUSY);
343 }
c9da161c
DB
344 if (uref)
345 atomic_inc(&map->usercnt);
92117d84 346 return map;
c9da161c
DB
347}
348
349struct bpf_map *bpf_map_get_with_uref(u32 ufd)
c2101297
DB
350{
351 struct fd f = fdget(ufd);
352 struct bpf_map *map;
353
354 map = __bpf_map_get(f);
355 if (IS_ERR(map))
356 return map;
357
92117d84 358 map = bpf_map_inc(map, true);
c2101297 359 fdput(f);
db20fd2b
AS
360
361 return map;
362}
363
bd5f5f4e
MKL
364/* map_idr_lock should have been held */
365static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map,
366 bool uref)
367{
368 int refold;
369
370 refold = __atomic_add_unless(&map->refcnt, 1, 0);
371
372 if (refold >= BPF_MAX_REFCNT) {
373 __bpf_map_put(map, false);
374 return ERR_PTR(-EBUSY);
375 }
376
377 if (!refold)
378 return ERR_PTR(-ENOENT);
379
380 if (uref)
381 atomic_inc(&map->usercnt);
382
383 return map;
384}
385
b8cdc051
AS
386int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
387{
388 return -ENOTSUPP;
389}
390
db20fd2b
AS
391/* last field in 'union bpf_attr' used by this command */
392#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
393
394static int map_lookup_elem(union bpf_attr *attr)
395{
535e7b4b
MS
396 void __user *ukey = u64_to_user_ptr(attr->key);
397 void __user *uvalue = u64_to_user_ptr(attr->value);
db20fd2b 398 int ufd = attr->map_fd;
db20fd2b 399 struct bpf_map *map;
8ebe667c 400 void *key, *value, *ptr;
15a07b33 401 u32 value_size;
592867bf 402 struct fd f;
db20fd2b
AS
403 int err;
404
405 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
406 return -EINVAL;
407
592867bf 408 f = fdget(ufd);
c2101297 409 map = __bpf_map_get(f);
db20fd2b
AS
410 if (IS_ERR(map))
411 return PTR_ERR(map);
412
e4448ed8
AV
413 key = memdup_user(ukey, map->key_size);
414 if (IS_ERR(key)) {
415 err = PTR_ERR(key);
db20fd2b 416 goto err_put;
e4448ed8 417 }
db20fd2b 418
15a07b33 419 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
8f844938 420 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
15a07b33
AS
421 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
422 value_size = round_up(map->value_size, 8) * num_possible_cpus();
14dc6f04
MKL
423 else if (IS_FD_MAP(map))
424 value_size = sizeof(u32);
15a07b33
AS
425 else
426 value_size = map->value_size;
427
8ebe667c 428 err = -ENOMEM;
15a07b33 429 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
db20fd2b 430 if (!value)
8ebe667c
AS
431 goto free_key;
432
8f844938
MKL
433 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
434 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
15a07b33
AS
435 err = bpf_percpu_hash_copy(map, key, value);
436 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
437 err = bpf_percpu_array_copy(map, key, value);
557c0c6e
AS
438 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
439 err = bpf_stackmap_copy(map, key, value);
14dc6f04
MKL
440 } else if (IS_FD_ARRAY(map)) {
441 err = bpf_fd_array_map_lookup_elem(map, key, value);
442 } else if (IS_FD_HASH(map)) {
443 err = bpf_fd_htab_map_lookup_elem(map, key, value);
15a07b33
AS
444 } else {
445 rcu_read_lock();
446 ptr = map->ops->map_lookup_elem(map, key);
447 if (ptr)
448 memcpy(value, ptr, value_size);
449 rcu_read_unlock();
450 err = ptr ? 0 : -ENOENT;
451 }
8ebe667c 452
15a07b33 453 if (err)
8ebe667c 454 goto free_value;
db20fd2b
AS
455
456 err = -EFAULT;
15a07b33 457 if (copy_to_user(uvalue, value, value_size) != 0)
8ebe667c 458 goto free_value;
db20fd2b 459
a67edbf4 460 trace_bpf_map_lookup_elem(map, ufd, key, value);
db20fd2b
AS
461 err = 0;
462
8ebe667c
AS
463free_value:
464 kfree(value);
db20fd2b
AS
465free_key:
466 kfree(key);
467err_put:
468 fdput(f);
469 return err;
470}
471
3274f520 472#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
db20fd2b
AS
473
474static int map_update_elem(union bpf_attr *attr)
475{
535e7b4b
MS
476 void __user *ukey = u64_to_user_ptr(attr->key);
477 void __user *uvalue = u64_to_user_ptr(attr->value);
db20fd2b 478 int ufd = attr->map_fd;
db20fd2b
AS
479 struct bpf_map *map;
480 void *key, *value;
15a07b33 481 u32 value_size;
592867bf 482 struct fd f;
db20fd2b
AS
483 int err;
484
485 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
486 return -EINVAL;
487
592867bf 488 f = fdget(ufd);
c2101297 489 map = __bpf_map_get(f);
db20fd2b
AS
490 if (IS_ERR(map))
491 return PTR_ERR(map);
492
e4448ed8
AV
493 key = memdup_user(ukey, map->key_size);
494 if (IS_ERR(key)) {
495 err = PTR_ERR(key);
db20fd2b 496 goto err_put;
e4448ed8 497 }
db20fd2b 498
15a07b33 499 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
8f844938 500 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
15a07b33
AS
501 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
502 value_size = round_up(map->value_size, 8) * num_possible_cpus();
503 else
504 value_size = map->value_size;
505
db20fd2b 506 err = -ENOMEM;
15a07b33 507 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
db20fd2b
AS
508 if (!value)
509 goto free_key;
510
511 err = -EFAULT;
15a07b33 512 if (copy_from_user(value, uvalue, value_size) != 0)
db20fd2b
AS
513 goto free_value;
514
b121d1e7
AS
515 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
516 * inside bpf map update or delete otherwise deadlocks are possible
517 */
518 preempt_disable();
519 __this_cpu_inc(bpf_prog_active);
8f844938
MKL
520 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
521 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
15a07b33
AS
522 err = bpf_percpu_hash_update(map, key, value, attr->flags);
523 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
524 err = bpf_percpu_array_update(map, key, value, attr->flags);
d056a788 525 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
4ed8ec52 526 map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
56f668df
MKL
527 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY ||
528 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
d056a788
DB
529 rcu_read_lock();
530 err = bpf_fd_array_map_update_elem(map, f.file, key, value,
531 attr->flags);
532 rcu_read_unlock();
bcc6b1b7
MKL
533 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
534 rcu_read_lock();
535 err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
536 attr->flags);
537 rcu_read_unlock();
15a07b33
AS
538 } else {
539 rcu_read_lock();
540 err = map->ops->map_update_elem(map, key, value, attr->flags);
541 rcu_read_unlock();
542 }
b121d1e7
AS
543 __this_cpu_dec(bpf_prog_active);
544 preempt_enable();
db20fd2b 545
a67edbf4
DB
546 if (!err)
547 trace_bpf_map_update_elem(map, ufd, key, value);
db20fd2b
AS
548free_value:
549 kfree(value);
550free_key:
551 kfree(key);
552err_put:
553 fdput(f);
554 return err;
555}
556
557#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
558
559static int map_delete_elem(union bpf_attr *attr)
560{
535e7b4b 561 void __user *ukey = u64_to_user_ptr(attr->key);
db20fd2b 562 int ufd = attr->map_fd;
db20fd2b 563 struct bpf_map *map;
592867bf 564 struct fd f;
db20fd2b
AS
565 void *key;
566 int err;
567
568 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
569 return -EINVAL;
570
592867bf 571 f = fdget(ufd);
c2101297 572 map = __bpf_map_get(f);
db20fd2b
AS
573 if (IS_ERR(map))
574 return PTR_ERR(map);
575
e4448ed8
AV
576 key = memdup_user(ukey, map->key_size);
577 if (IS_ERR(key)) {
578 err = PTR_ERR(key);
db20fd2b 579 goto err_put;
e4448ed8 580 }
db20fd2b 581
b121d1e7
AS
582 preempt_disable();
583 __this_cpu_inc(bpf_prog_active);
db20fd2b
AS
584 rcu_read_lock();
585 err = map->ops->map_delete_elem(map, key);
586 rcu_read_unlock();
b121d1e7
AS
587 __this_cpu_dec(bpf_prog_active);
588 preempt_enable();
db20fd2b 589
a67edbf4
DB
590 if (!err)
591 trace_bpf_map_delete_elem(map, ufd, key);
db20fd2b
AS
592 kfree(key);
593err_put:
594 fdput(f);
595 return err;
596}
597
598/* last field in 'union bpf_attr' used by this command */
599#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
600
601static int map_get_next_key(union bpf_attr *attr)
602{
535e7b4b
MS
603 void __user *ukey = u64_to_user_ptr(attr->key);
604 void __user *unext_key = u64_to_user_ptr(attr->next_key);
db20fd2b 605 int ufd = attr->map_fd;
db20fd2b
AS
606 struct bpf_map *map;
607 void *key, *next_key;
592867bf 608 struct fd f;
db20fd2b
AS
609 int err;
610
611 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
612 return -EINVAL;
613
592867bf 614 f = fdget(ufd);
c2101297 615 map = __bpf_map_get(f);
db20fd2b
AS
616 if (IS_ERR(map))
617 return PTR_ERR(map);
618
8fe45924 619 if (ukey) {
e4448ed8
AV
620 key = memdup_user(ukey, map->key_size);
621 if (IS_ERR(key)) {
622 err = PTR_ERR(key);
8fe45924 623 goto err_put;
e4448ed8 624 }
8fe45924
TQ
625 } else {
626 key = NULL;
627 }
db20fd2b
AS
628
629 err = -ENOMEM;
630 next_key = kmalloc(map->key_size, GFP_USER);
631 if (!next_key)
632 goto free_key;
633
634 rcu_read_lock();
635 err = map->ops->map_get_next_key(map, key, next_key);
636 rcu_read_unlock();
637 if (err)
638 goto free_next_key;
639
640 err = -EFAULT;
641 if (copy_to_user(unext_key, next_key, map->key_size) != 0)
642 goto free_next_key;
643
a67edbf4 644 trace_bpf_map_next_key(map, ufd, key, next_key);
db20fd2b
AS
645 err = 0;
646
647free_next_key:
648 kfree(next_key);
649free_key:
650 kfree(key);
651err_put:
652 fdput(f);
653 return err;
654}
655
be9370a7
JB
656static const struct bpf_verifier_ops * const bpf_prog_types[] = {
657#define BPF_PROG_TYPE(_id, _ops) \
658 [_id] = &_ops,
40077e0c 659#define BPF_MAP_TYPE(_id, _ops)
be9370a7
JB
660#include <linux/bpf_types.h>
661#undef BPF_PROG_TYPE
40077e0c 662#undef BPF_MAP_TYPE
be9370a7 663};
09756af4
AS
664
665static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
666{
be9370a7
JB
667 if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type])
668 return -EINVAL;
09756af4 669
be9370a7
JB
670 prog->aux->ops = bpf_prog_types[type];
671 prog->type = type;
672 return 0;
09756af4
AS
673}
674
675/* drop refcnt on maps used by eBPF program and free auxilary data */
676static void free_used_maps(struct bpf_prog_aux *aux)
677{
678 int i;
679
680 for (i = 0; i < aux->used_map_cnt; i++)
681 bpf_map_put(aux->used_maps[i]);
682
683 kfree(aux->used_maps);
684}
685
5ccb071e
DB
686int __bpf_prog_charge(struct user_struct *user, u32 pages)
687{
688 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
689 unsigned long user_bufs;
690
691 if (user) {
692 user_bufs = atomic_long_add_return(pages, &user->locked_vm);
693 if (user_bufs > memlock_limit) {
694 atomic_long_sub(pages, &user->locked_vm);
695 return -EPERM;
696 }
697 }
698
699 return 0;
700}
701
702void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
703{
704 if (user)
705 atomic_long_sub(pages, &user->locked_vm);
706}
707
aaac3ba9
AS
708static int bpf_prog_charge_memlock(struct bpf_prog *prog)
709{
710 struct user_struct *user = get_current_user();
5ccb071e 711 int ret;
aaac3ba9 712
5ccb071e
DB
713 ret = __bpf_prog_charge(user, prog->pages);
714 if (ret) {
aaac3ba9 715 free_uid(user);
5ccb071e 716 return ret;
aaac3ba9 717 }
5ccb071e 718
aaac3ba9
AS
719 prog->aux->user = user;
720 return 0;
721}
722
723static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
724{
725 struct user_struct *user = prog->aux->user;
726
5ccb071e 727 __bpf_prog_uncharge(user, prog->pages);
aaac3ba9
AS
728 free_uid(user);
729}
730
dc4bb0e2
MKL
731static int bpf_prog_alloc_id(struct bpf_prog *prog)
732{
733 int id;
734
735 spin_lock_bh(&prog_idr_lock);
736 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC);
737 if (id > 0)
738 prog->aux->id = id;
739 spin_unlock_bh(&prog_idr_lock);
740
741 /* id is in [1, INT_MAX) */
742 if (WARN_ON_ONCE(!id))
743 return -ENOSPC;
744
745 return id > 0 ? 0 : id;
746}
747
b16d9aa4 748static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
dc4bb0e2
MKL
749{
750 /* cBPF to eBPF migrations are currently not in the idr store. */
751 if (!prog->aux->id)
752 return;
753
b16d9aa4
MKL
754 if (do_idr_lock)
755 spin_lock_bh(&prog_idr_lock);
756 else
757 __acquire(&prog_idr_lock);
758
dc4bb0e2 759 idr_remove(&prog_idr, prog->aux->id);
b16d9aa4
MKL
760
761 if (do_idr_lock)
762 spin_unlock_bh(&prog_idr_lock);
763 else
764 __release(&prog_idr_lock);
dc4bb0e2
MKL
765}
766
1aacde3d 767static void __bpf_prog_put_rcu(struct rcu_head *rcu)
abf2e7d6
AS
768{
769 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
770
771 free_used_maps(aux);
aaac3ba9 772 bpf_prog_uncharge_memlock(aux->prog);
abf2e7d6
AS
773 bpf_prog_free(aux->prog);
774}
775
b16d9aa4 776static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
09756af4 777{
a67edbf4
DB
778 if (atomic_dec_and_test(&prog->aux->refcnt)) {
779 trace_bpf_prog_put_rcu(prog);
34ad5580 780 /* bpf_prog_free_id() must be called first */
b16d9aa4 781 bpf_prog_free_id(prog, do_idr_lock);
74451e66 782 bpf_prog_kallsyms_del(prog);
1aacde3d 783 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
a67edbf4 784 }
09756af4 785}
b16d9aa4
MKL
786
787void bpf_prog_put(struct bpf_prog *prog)
788{
789 __bpf_prog_put(prog, true);
790}
e2e9b654 791EXPORT_SYMBOL_GPL(bpf_prog_put);
09756af4
AS
792
793static int bpf_prog_release(struct inode *inode, struct file *filp)
794{
795 struct bpf_prog *prog = filp->private_data;
796
1aacde3d 797 bpf_prog_put(prog);
09756af4
AS
798 return 0;
799}
800
7bd509e3
DB
801#ifdef CONFIG_PROC_FS
802static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
803{
804 const struct bpf_prog *prog = filp->private_data;
f1f7714e 805 char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
7bd509e3 806
f1f7714e 807 bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
7bd509e3
DB
808 seq_printf(m,
809 "prog_type:\t%u\n"
810 "prog_jited:\t%u\n"
f1f7714e 811 "prog_tag:\t%s\n"
7bd509e3
DB
812 "memlock:\t%llu\n",
813 prog->type,
814 prog->jited,
f1f7714e 815 prog_tag,
7bd509e3
DB
816 prog->pages * 1ULL << PAGE_SHIFT);
817}
818#endif
819
09756af4 820static const struct file_operations bpf_prog_fops = {
7bd509e3
DB
821#ifdef CONFIG_PROC_FS
822 .show_fdinfo = bpf_prog_show_fdinfo,
823#endif
824 .release = bpf_prog_release,
09756af4
AS
825};
826
b2197755 827int bpf_prog_new_fd(struct bpf_prog *prog)
aa79781b
DB
828{
829 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
830 O_RDWR | O_CLOEXEC);
831}
832
113214be 833static struct bpf_prog *____bpf_prog_get(struct fd f)
09756af4 834{
09756af4
AS
835 if (!f.file)
836 return ERR_PTR(-EBADF);
09756af4
AS
837 if (f.file->f_op != &bpf_prog_fops) {
838 fdput(f);
839 return ERR_PTR(-EINVAL);
840 }
841
c2101297 842 return f.file->private_data;
09756af4
AS
843}
844
59d3656d 845struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
92117d84 846{
59d3656d
BB
847 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
848 atomic_sub(i, &prog->aux->refcnt);
92117d84
AS
849 return ERR_PTR(-EBUSY);
850 }
851 return prog;
852}
59d3656d
BB
853EXPORT_SYMBOL_GPL(bpf_prog_add);
854
c540594f
DB
855void bpf_prog_sub(struct bpf_prog *prog, int i)
856{
857 /* Only to be used for undoing previous bpf_prog_add() in some
858 * error path. We still know that another entity in our call
859 * path holds a reference to the program, thus atomic_sub() can
860 * be safely used in such cases!
861 */
862 WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
863}
864EXPORT_SYMBOL_GPL(bpf_prog_sub);
865
59d3656d
BB
866struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
867{
868 return bpf_prog_add(prog, 1);
869}
97bc402d 870EXPORT_SYMBOL_GPL(bpf_prog_inc);
92117d84 871
b16d9aa4
MKL
872/* prog_idr_lock should have been held */
873static struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
874{
875 int refold;
876
877 refold = __atomic_add_unless(&prog->aux->refcnt, 1, 0);
878
879 if (refold >= BPF_MAX_REFCNT) {
880 __bpf_prog_put(prog, false);
881 return ERR_PTR(-EBUSY);
882 }
883
884 if (!refold)
885 return ERR_PTR(-ENOENT);
886
887 return prog;
888}
889
113214be 890static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
09756af4
AS
891{
892 struct fd f = fdget(ufd);
893 struct bpf_prog *prog;
894
113214be 895 prog = ____bpf_prog_get(f);
09756af4
AS
896 if (IS_ERR(prog))
897 return prog;
113214be
DB
898 if (type && prog->type != *type) {
899 prog = ERR_PTR(-EINVAL);
900 goto out;
901 }
09756af4 902
92117d84 903 prog = bpf_prog_inc(prog);
113214be 904out:
09756af4
AS
905 fdput(f);
906 return prog;
907}
113214be
DB
908
909struct bpf_prog *bpf_prog_get(u32 ufd)
910{
911 return __bpf_prog_get(ufd, NULL);
912}
913
914struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
915{
a67edbf4
DB
916 struct bpf_prog *prog = __bpf_prog_get(ufd, &type);
917
918 if (!IS_ERR(prog))
919 trace_bpf_prog_get_type(prog);
920 return prog;
113214be
DB
921}
922EXPORT_SYMBOL_GPL(bpf_prog_get_type);
09756af4
AS
923
924/* last field in 'union bpf_attr' used by this command */
e07b98d9 925#define BPF_PROG_LOAD_LAST_FIELD prog_flags
09756af4
AS
926
927static int bpf_prog_load(union bpf_attr *attr)
928{
929 enum bpf_prog_type type = attr->prog_type;
930 struct bpf_prog *prog;
931 int err;
932 char license[128];
933 bool is_gpl;
934
935 if (CHECK_ATTR(BPF_PROG_LOAD))
936 return -EINVAL;
937
e07b98d9
DM
938 if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT)
939 return -EINVAL;
940
09756af4 941 /* copy eBPF program license from user space */
535e7b4b 942 if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
09756af4
AS
943 sizeof(license) - 1) < 0)
944 return -EFAULT;
945 license[sizeof(license) - 1] = 0;
946
947 /* eBPF programs must be GPL compatible to use GPL-ed functions */
948 is_gpl = license_is_gpl_compatible(license);
949
ef0915ca
DB
950 if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS)
951 return -E2BIG;
09756af4 952
2541517c
AS
953 if (type == BPF_PROG_TYPE_KPROBE &&
954 attr->kern_version != LINUX_VERSION_CODE)
955 return -EINVAL;
956
80b7d819
CF
957 if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
958 type != BPF_PROG_TYPE_CGROUP_SKB &&
959 !capable(CAP_SYS_ADMIN))
1be7f75d
AS
960 return -EPERM;
961
09756af4
AS
962 /* plain bpf_prog allocation */
963 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
964 if (!prog)
965 return -ENOMEM;
966
aaac3ba9
AS
967 err = bpf_prog_charge_memlock(prog);
968 if (err)
969 goto free_prog_nouncharge;
970
09756af4
AS
971 prog->len = attr->insn_cnt;
972
973 err = -EFAULT;
535e7b4b 974 if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
aafe6ae9 975 bpf_prog_insn_size(prog)) != 0)
09756af4
AS
976 goto free_prog;
977
978 prog->orig_prog = NULL;
a91263d5 979 prog->jited = 0;
09756af4
AS
980
981 atomic_set(&prog->aux->refcnt, 1);
a91263d5 982 prog->gpl_compatible = is_gpl ? 1 : 0;
09756af4
AS
983
984 /* find program type: socket_filter vs tracing_filter */
985 err = find_prog_type(type, prog);
986 if (err < 0)
987 goto free_prog;
988
989 /* run eBPF verifier */
9bac3d6d 990 err = bpf_check(&prog, attr);
09756af4
AS
991 if (err < 0)
992 goto free_used_maps;
993
994 /* eBPF program is ready to be JITed */
d1c55ab5 995 prog = bpf_prog_select_runtime(prog, &err);
04fd61ab
AS
996 if (err < 0)
997 goto free_used_maps;
09756af4 998
dc4bb0e2
MKL
999 err = bpf_prog_alloc_id(prog);
1000 if (err)
1001 goto free_used_maps;
1002
aa79781b 1003 err = bpf_prog_new_fd(prog);
b16d9aa4
MKL
1004 if (err < 0) {
1005 /* failed to allocate fd.
1006 * bpf_prog_put() is needed because the above
1007 * bpf_prog_alloc_id() has published the prog
1008 * to the userspace and the userspace may
1009 * have refcnt-ed it through BPF_PROG_GET_FD_BY_ID.
1010 */
1011 bpf_prog_put(prog);
1012 return err;
1013 }
09756af4 1014
74451e66 1015 bpf_prog_kallsyms_add(prog);
a67edbf4 1016 trace_bpf_prog_load(prog, err);
09756af4
AS
1017 return err;
1018
1019free_used_maps:
1020 free_used_maps(prog->aux);
1021free_prog:
aaac3ba9
AS
1022 bpf_prog_uncharge_memlock(prog);
1023free_prog_nouncharge:
09756af4
AS
1024 bpf_prog_free(prog);
1025 return err;
1026}
1027
b2197755
DB
1028#define BPF_OBJ_LAST_FIELD bpf_fd
1029
1030static int bpf_obj_pin(const union bpf_attr *attr)
1031{
1032 if (CHECK_ATTR(BPF_OBJ))
1033 return -EINVAL;
1034
535e7b4b 1035 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
b2197755
DB
1036}
1037
1038static int bpf_obj_get(const union bpf_attr *attr)
1039{
1040 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
1041 return -EINVAL;
1042
535e7b4b 1043 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname));
b2197755
DB
1044}
1045
f4324551
DM
1046#ifdef CONFIG_CGROUP_BPF
1047
7f677633 1048#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
f4324551
DM
1049
1050static int bpf_prog_attach(const union bpf_attr *attr)
1051{
7f677633 1052 enum bpf_prog_type ptype;
f4324551
DM
1053 struct bpf_prog *prog;
1054 struct cgroup *cgrp;
7f677633 1055 int ret;
f4324551
DM
1056
1057 if (!capable(CAP_NET_ADMIN))
1058 return -EPERM;
1059
1060 if (CHECK_ATTR(BPF_PROG_ATTACH))
1061 return -EINVAL;
1062
7f677633
AS
1063 if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
1064 return -EINVAL;
1065
f4324551
DM
1066 switch (attr->attach_type) {
1067 case BPF_CGROUP_INET_INGRESS:
1068 case BPF_CGROUP_INET_EGRESS:
b2cd1257 1069 ptype = BPF_PROG_TYPE_CGROUP_SKB;
f4324551 1070 break;
61023658
DA
1071 case BPF_CGROUP_INET_SOCK_CREATE:
1072 ptype = BPF_PROG_TYPE_CGROUP_SOCK;
1073 break;
40304b2a
LB
1074 case BPF_CGROUP_SOCK_OPS:
1075 ptype = BPF_PROG_TYPE_SOCK_OPS;
1076 break;
f4324551
DM
1077 default:
1078 return -EINVAL;
1079 }
1080
b2cd1257
DA
1081 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
1082 if (IS_ERR(prog))
1083 return PTR_ERR(prog);
1084
1085 cgrp = cgroup_get_from_fd(attr->target_fd);
1086 if (IS_ERR(cgrp)) {
1087 bpf_prog_put(prog);
1088 return PTR_ERR(cgrp);
1089 }
1090
7f677633
AS
1091 ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
1092 attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
1093 if (ret)
1094 bpf_prog_put(prog);
b2cd1257
DA
1095 cgroup_put(cgrp);
1096
7f677633 1097 return ret;
f4324551
DM
1098}
1099
1100#define BPF_PROG_DETACH_LAST_FIELD attach_type
1101
1102static int bpf_prog_detach(const union bpf_attr *attr)
1103{
1104 struct cgroup *cgrp;
7f677633 1105 int ret;
f4324551
DM
1106
1107 if (!capable(CAP_NET_ADMIN))
1108 return -EPERM;
1109
1110 if (CHECK_ATTR(BPF_PROG_DETACH))
1111 return -EINVAL;
1112
1113 switch (attr->attach_type) {
1114 case BPF_CGROUP_INET_INGRESS:
1115 case BPF_CGROUP_INET_EGRESS:
61023658 1116 case BPF_CGROUP_INET_SOCK_CREATE:
40304b2a 1117 case BPF_CGROUP_SOCK_OPS:
f4324551
DM
1118 cgrp = cgroup_get_from_fd(attr->target_fd);
1119 if (IS_ERR(cgrp))
1120 return PTR_ERR(cgrp);
1121
7f677633 1122 ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
f4324551
DM
1123 cgroup_put(cgrp);
1124 break;
1125
1126 default:
1127 return -EINVAL;
1128 }
1129
7f677633 1130 return ret;
f4324551 1131}
40304b2a 1132
f4324551
DM
1133#endif /* CONFIG_CGROUP_BPF */
1134
1cf1cae9
AS
1135#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
1136
1137static int bpf_prog_test_run(const union bpf_attr *attr,
1138 union bpf_attr __user *uattr)
1139{
1140 struct bpf_prog *prog;
1141 int ret = -ENOTSUPP;
1142
1143 if (CHECK_ATTR(BPF_PROG_TEST_RUN))
1144 return -EINVAL;
1145
1146 prog = bpf_prog_get(attr->test.prog_fd);
1147 if (IS_ERR(prog))
1148 return PTR_ERR(prog);
1149
1150 if (prog->aux->ops->test_run)
1151 ret = prog->aux->ops->test_run(prog, attr, uattr);
1152
1153 bpf_prog_put(prog);
1154 return ret;
1155}
1156
34ad5580
MKL
1157#define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id
1158
1159static int bpf_obj_get_next_id(const union bpf_attr *attr,
1160 union bpf_attr __user *uattr,
1161 struct idr *idr,
1162 spinlock_t *lock)
1163{
1164 u32 next_id = attr->start_id;
1165 int err = 0;
1166
1167 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX)
1168 return -EINVAL;
1169
1170 if (!capable(CAP_SYS_ADMIN))
1171 return -EPERM;
1172
1173 next_id++;
1174 spin_lock_bh(lock);
1175 if (!idr_get_next(idr, &next_id))
1176 err = -ENOENT;
1177 spin_unlock_bh(lock);
1178
1179 if (!err)
1180 err = put_user(next_id, &uattr->next_id);
1181
1182 return err;
1183}
1184
b16d9aa4
MKL
1185#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
1186
1187static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
1188{
1189 struct bpf_prog *prog;
1190 u32 id = attr->prog_id;
1191 int fd;
1192
1193 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID))
1194 return -EINVAL;
1195
1196 if (!capable(CAP_SYS_ADMIN))
1197 return -EPERM;
1198
1199 spin_lock_bh(&prog_idr_lock);
1200 prog = idr_find(&prog_idr, id);
1201 if (prog)
1202 prog = bpf_prog_inc_not_zero(prog);
1203 else
1204 prog = ERR_PTR(-ENOENT);
1205 spin_unlock_bh(&prog_idr_lock);
1206
1207 if (IS_ERR(prog))
1208 return PTR_ERR(prog);
1209
1210 fd = bpf_prog_new_fd(prog);
1211 if (fd < 0)
1212 bpf_prog_put(prog);
1213
1214 return fd;
1215}
1216
bd5f5f4e
MKL
1217#define BPF_MAP_GET_FD_BY_ID_LAST_FIELD map_id
1218
1219static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
1220{
1221 struct bpf_map *map;
1222 u32 id = attr->map_id;
1223 int fd;
1224
1225 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID))
1226 return -EINVAL;
1227
1228 if (!capable(CAP_SYS_ADMIN))
1229 return -EPERM;
1230
1231 spin_lock_bh(&map_idr_lock);
1232 map = idr_find(&map_idr, id);
1233 if (map)
1234 map = bpf_map_inc_not_zero(map, true);
1235 else
1236 map = ERR_PTR(-ENOENT);
1237 spin_unlock_bh(&map_idr_lock);
1238
1239 if (IS_ERR(map))
1240 return PTR_ERR(map);
1241
1242 fd = bpf_map_new_fd(map);
1243 if (fd < 0)
1244 bpf_map_put(map);
1245
1246 return fd;
1247}
1248
1e270976
MKL
1249static int check_uarg_tail_zero(void __user *uaddr,
1250 size_t expected_size,
1251 size_t actual_size)
1252{
1253 unsigned char __user *addr;
1254 unsigned char __user *end;
1255 unsigned char val;
1256 int err;
1257
1258 if (actual_size <= expected_size)
1259 return 0;
1260
1261 addr = uaddr + expected_size;
1262 end = uaddr + actual_size;
1263
1264 for (; addr < end; addr++) {
1265 err = get_user(val, addr);
1266 if (err)
1267 return err;
1268 if (val)
1269 return -E2BIG;
1270 }
1271
1272 return 0;
1273}
1274
1275static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
1276 const union bpf_attr *attr,
1277 union bpf_attr __user *uattr)
1278{
1279 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
1280 struct bpf_prog_info info = {};
1281 u32 info_len = attr->info.info_len;
1282 char __user *uinsns;
1283 u32 ulen;
1284 int err;
1285
1286 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
1287 if (err)
1288 return err;
1289 info_len = min_t(u32, sizeof(info), info_len);
1290
1291 if (copy_from_user(&info, uinfo, info_len))
89b09689 1292 return -EFAULT;
1e270976
MKL
1293
1294 info.type = prog->type;
1295 info.id = prog->aux->id;
1296
1297 memcpy(info.tag, prog->tag, sizeof(prog->tag));
1298
1299 if (!capable(CAP_SYS_ADMIN)) {
1300 info.jited_prog_len = 0;
1301 info.xlated_prog_len = 0;
1302 goto done;
1303 }
1304
1305 ulen = info.jited_prog_len;
1306 info.jited_prog_len = prog->jited_len;
1307 if (info.jited_prog_len && ulen) {
1308 uinsns = u64_to_user_ptr(info.jited_prog_insns);
1309 ulen = min_t(u32, info.jited_prog_len, ulen);
1310 if (copy_to_user(uinsns, prog->bpf_func, ulen))
1311 return -EFAULT;
1312 }
1313
1314 ulen = info.xlated_prog_len;
9975a54b 1315 info.xlated_prog_len = bpf_prog_insn_size(prog);
1e270976
MKL
1316 if (info.xlated_prog_len && ulen) {
1317 uinsns = u64_to_user_ptr(info.xlated_prog_insns);
1318 ulen = min_t(u32, info.xlated_prog_len, ulen);
1319 if (copy_to_user(uinsns, prog->insnsi, ulen))
1320 return -EFAULT;
1321 }
1322
1323done:
1324 if (copy_to_user(uinfo, &info, info_len) ||
1325 put_user(info_len, &uattr->info.info_len))
1326 return -EFAULT;
1327
1328 return 0;
1329}
1330
1331static int bpf_map_get_info_by_fd(struct bpf_map *map,
1332 const union bpf_attr *attr,
1333 union bpf_attr __user *uattr)
1334{
1335 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
1336 struct bpf_map_info info = {};
1337 u32 info_len = attr->info.info_len;
1338 int err;
1339
1340 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
1341 if (err)
1342 return err;
1343 info_len = min_t(u32, sizeof(info), info_len);
1344
1345 info.type = map->map_type;
1346 info.id = map->id;
1347 info.key_size = map->key_size;
1348 info.value_size = map->value_size;
1349 info.max_entries = map->max_entries;
1350 info.map_flags = map->map_flags;
1351
1352 if (copy_to_user(uinfo, &info, info_len) ||
1353 put_user(info_len, &uattr->info.info_len))
1354 return -EFAULT;
1355
1356 return 0;
1357}
1358
1359#define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
1360
1361static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
1362 union bpf_attr __user *uattr)
1363{
1364 int ufd = attr->info.bpf_fd;
1365 struct fd f;
1366 int err;
1367
1368 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD))
1369 return -EINVAL;
1370
1371 f = fdget(ufd);
1372 if (!f.file)
1373 return -EBADFD;
1374
1375 if (f.file->f_op == &bpf_prog_fops)
1376 err = bpf_prog_get_info_by_fd(f.file->private_data, attr,
1377 uattr);
1378 else if (f.file->f_op == &bpf_map_fops)
1379 err = bpf_map_get_info_by_fd(f.file->private_data, attr,
1380 uattr);
1381 else
1382 err = -EINVAL;
1383
1384 fdput(f);
1385 return err;
1386}
1387
99c55f7d
AS
1388SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
1389{
1390 union bpf_attr attr = {};
1391 int err;
1392
1be7f75d 1393 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
99c55f7d
AS
1394 return -EPERM;
1395
1396 if (!access_ok(VERIFY_READ, uattr, 1))
1397 return -EFAULT;
1398
1399 if (size > PAGE_SIZE) /* silly large */
1400 return -E2BIG;
1401
1402 /* If we're handed a bigger struct than we know of,
1403 * ensure all the unknown bits are 0 - i.e. new
1404 * user-space does not rely on any kernel feature
1405 * extensions we dont know about yet.
1406 */
1e270976
MKL
1407 err = check_uarg_tail_zero(uattr, sizeof(attr), size);
1408 if (err)
1409 return err;
1410 size = min_t(u32, size, sizeof(attr));
99c55f7d
AS
1411
1412 /* copy attributes from user space, may be less than sizeof(bpf_attr) */
1413 if (copy_from_user(&attr, uattr, size) != 0)
1414 return -EFAULT;
1415
1416 switch (cmd) {
1417 case BPF_MAP_CREATE:
1418 err = map_create(&attr);
1419 break;
db20fd2b
AS
1420 case BPF_MAP_LOOKUP_ELEM:
1421 err = map_lookup_elem(&attr);
1422 break;
1423 case BPF_MAP_UPDATE_ELEM:
1424 err = map_update_elem(&attr);
1425 break;
1426 case BPF_MAP_DELETE_ELEM:
1427 err = map_delete_elem(&attr);
1428 break;
1429 case BPF_MAP_GET_NEXT_KEY:
1430 err = map_get_next_key(&attr);
1431 break;
09756af4
AS
1432 case BPF_PROG_LOAD:
1433 err = bpf_prog_load(&attr);
1434 break;
b2197755
DB
1435 case BPF_OBJ_PIN:
1436 err = bpf_obj_pin(&attr);
1437 break;
1438 case BPF_OBJ_GET:
1439 err = bpf_obj_get(&attr);
1440 break;
f4324551
DM
1441#ifdef CONFIG_CGROUP_BPF
1442 case BPF_PROG_ATTACH:
1443 err = bpf_prog_attach(&attr);
1444 break;
1445 case BPF_PROG_DETACH:
1446 err = bpf_prog_detach(&attr);
1447 break;
1448#endif
1cf1cae9
AS
1449 case BPF_PROG_TEST_RUN:
1450 err = bpf_prog_test_run(&attr, uattr);
1451 break;
34ad5580
MKL
1452 case BPF_PROG_GET_NEXT_ID:
1453 err = bpf_obj_get_next_id(&attr, uattr,
1454 &prog_idr, &prog_idr_lock);
1455 break;
1456 case BPF_MAP_GET_NEXT_ID:
1457 err = bpf_obj_get_next_id(&attr, uattr,
1458 &map_idr, &map_idr_lock);
1459 break;
b16d9aa4
MKL
1460 case BPF_PROG_GET_FD_BY_ID:
1461 err = bpf_prog_get_fd_by_id(&attr);
1462 break;
bd5f5f4e
MKL
1463 case BPF_MAP_GET_FD_BY_ID:
1464 err = bpf_map_get_fd_by_id(&attr);
1465 break;
1e270976
MKL
1466 case BPF_OBJ_GET_INFO_BY_FD:
1467 err = bpf_obj_get_info_by_fd(&attr, uattr);
1468 break;
99c55f7d
AS
1469 default:
1470 err = -EINVAL;
1471 break;
1472 }
1473
1474 return err;
1475}