]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - kernel/bpf/syscall.c
bpf: enable non-root eBPF programs
[mirror_ubuntu-bionic-kernel.git] / kernel / bpf / syscall.c
CommitLineData
99c55f7d
AS
1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
11 */
12#include <linux/bpf.h>
13#include <linux/syscalls.h>
14#include <linux/slab.h>
15#include <linux/anon_inodes.h>
db20fd2b 16#include <linux/file.h>
09756af4
AS
17#include <linux/license.h>
18#include <linux/filter.h>
2541517c 19#include <linux/version.h>
99c55f7d 20
1be7f75d
AS
21int sysctl_unprivileged_bpf_disabled __read_mostly;
22
99c55f7d
AS
23static LIST_HEAD(bpf_map_types);
24
25static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
26{
27 struct bpf_map_type_list *tl;
28 struct bpf_map *map;
29
30 list_for_each_entry(tl, &bpf_map_types, list_node) {
31 if (tl->type == attr->map_type) {
32 map = tl->ops->map_alloc(attr);
33 if (IS_ERR(map))
34 return map;
35 map->ops = tl->ops;
36 map->map_type = attr->map_type;
37 return map;
38 }
39 }
40 return ERR_PTR(-EINVAL);
41}
42
43/* boot time registration of different map implementations */
44void bpf_register_map_type(struct bpf_map_type_list *tl)
45{
46 list_add(&tl->list_node, &bpf_map_types);
47}
48
49/* called from workqueue */
50static void bpf_map_free_deferred(struct work_struct *work)
51{
52 struct bpf_map *map = container_of(work, struct bpf_map, work);
53
54 /* implementation dependent freeing */
55 map->ops->map_free(map);
56}
57
58/* decrement map refcnt and schedule it for freeing via workqueue
59 * (unrelying map implementation ops->map_free() might sleep)
60 */
61void bpf_map_put(struct bpf_map *map)
62{
63 if (atomic_dec_and_test(&map->refcnt)) {
64 INIT_WORK(&map->work, bpf_map_free_deferred);
65 schedule_work(&map->work);
66 }
67}
68
69static int bpf_map_release(struct inode *inode, struct file *filp)
70{
71 struct bpf_map *map = filp->private_data;
72
04fd61ab
AS
73 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
74 /* prog_array stores refcnt-ed bpf_prog pointers
75 * release them all when user space closes prog_array_fd
76 */
2a36f0b9 77 bpf_fd_array_map_clear(map);
04fd61ab 78
99c55f7d
AS
79 bpf_map_put(map);
80 return 0;
81}
82
83static const struct file_operations bpf_map_fops = {
84 .release = bpf_map_release,
85};
86
87/* helper macro to check that unused fields 'union bpf_attr' are zero */
88#define CHECK_ATTR(CMD) \
89 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
90 sizeof(attr->CMD##_LAST_FIELD), 0, \
91 sizeof(*attr) - \
92 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
93 sizeof(attr->CMD##_LAST_FIELD)) != NULL
94
95#define BPF_MAP_CREATE_LAST_FIELD max_entries
96/* called via syscall */
97static int map_create(union bpf_attr *attr)
98{
99 struct bpf_map *map;
100 int err;
101
102 err = CHECK_ATTR(BPF_MAP_CREATE);
103 if (err)
104 return -EINVAL;
105
106 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
107 map = find_and_alloc_map(attr);
108 if (IS_ERR(map))
109 return PTR_ERR(map);
110
111 atomic_set(&map->refcnt, 1);
112
113 err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC);
114
115 if (err < 0)
116 /* failed to allocate fd */
117 goto free_map;
118
119 return err;
120
121free_map:
122 map->ops->map_free(map);
123 return err;
124}
125
db20fd2b
AS
126/* if error is returned, fd is released.
127 * On success caller should complete fd access with matching fdput()
128 */
129struct bpf_map *bpf_map_get(struct fd f)
130{
131 struct bpf_map *map;
132
133 if (!f.file)
134 return ERR_PTR(-EBADF);
135
136 if (f.file->f_op != &bpf_map_fops) {
137 fdput(f);
138 return ERR_PTR(-EINVAL);
139 }
140
141 map = f.file->private_data;
142
143 return map;
144}
145
146/* helper to convert user pointers passed inside __aligned_u64 fields */
147static void __user *u64_to_ptr(__u64 val)
148{
149 return (void __user *) (unsigned long) val;
150}
151
152/* last field in 'union bpf_attr' used by this command */
153#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
154
155static int map_lookup_elem(union bpf_attr *attr)
156{
157 void __user *ukey = u64_to_ptr(attr->key);
158 void __user *uvalue = u64_to_ptr(attr->value);
159 int ufd = attr->map_fd;
db20fd2b 160 struct bpf_map *map;
8ebe667c 161 void *key, *value, *ptr;
592867bf 162 struct fd f;
db20fd2b
AS
163 int err;
164
165 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
166 return -EINVAL;
167
592867bf 168 f = fdget(ufd);
db20fd2b
AS
169 map = bpf_map_get(f);
170 if (IS_ERR(map))
171 return PTR_ERR(map);
172
173 err = -ENOMEM;
174 key = kmalloc(map->key_size, GFP_USER);
175 if (!key)
176 goto err_put;
177
178 err = -EFAULT;
179 if (copy_from_user(key, ukey, map->key_size) != 0)
180 goto free_key;
181
8ebe667c
AS
182 err = -ENOMEM;
183 value = kmalloc(map->value_size, GFP_USER);
db20fd2b 184 if (!value)
8ebe667c
AS
185 goto free_key;
186
187 rcu_read_lock();
188 ptr = map->ops->map_lookup_elem(map, key);
189 if (ptr)
190 memcpy(value, ptr, map->value_size);
191 rcu_read_unlock();
192
193 err = -ENOENT;
194 if (!ptr)
195 goto free_value;
db20fd2b
AS
196
197 err = -EFAULT;
198 if (copy_to_user(uvalue, value, map->value_size) != 0)
8ebe667c 199 goto free_value;
db20fd2b
AS
200
201 err = 0;
202
8ebe667c
AS
203free_value:
204 kfree(value);
db20fd2b
AS
205free_key:
206 kfree(key);
207err_put:
208 fdput(f);
209 return err;
210}
211
3274f520 212#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
db20fd2b
AS
213
214static int map_update_elem(union bpf_attr *attr)
215{
216 void __user *ukey = u64_to_ptr(attr->key);
217 void __user *uvalue = u64_to_ptr(attr->value);
218 int ufd = attr->map_fd;
db20fd2b
AS
219 struct bpf_map *map;
220 void *key, *value;
592867bf 221 struct fd f;
db20fd2b
AS
222 int err;
223
224 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
225 return -EINVAL;
226
592867bf 227 f = fdget(ufd);
db20fd2b
AS
228 map = bpf_map_get(f);
229 if (IS_ERR(map))
230 return PTR_ERR(map);
231
232 err = -ENOMEM;
233 key = kmalloc(map->key_size, GFP_USER);
234 if (!key)
235 goto err_put;
236
237 err = -EFAULT;
238 if (copy_from_user(key, ukey, map->key_size) != 0)
239 goto free_key;
240
241 err = -ENOMEM;
242 value = kmalloc(map->value_size, GFP_USER);
243 if (!value)
244 goto free_key;
245
246 err = -EFAULT;
247 if (copy_from_user(value, uvalue, map->value_size) != 0)
248 goto free_value;
249
250 /* eBPF program that use maps are running under rcu_read_lock(),
251 * therefore all map accessors rely on this fact, so do the same here
252 */
253 rcu_read_lock();
3274f520 254 err = map->ops->map_update_elem(map, key, value, attr->flags);
db20fd2b
AS
255 rcu_read_unlock();
256
257free_value:
258 kfree(value);
259free_key:
260 kfree(key);
261err_put:
262 fdput(f);
263 return err;
264}
265
266#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
267
268static int map_delete_elem(union bpf_attr *attr)
269{
270 void __user *ukey = u64_to_ptr(attr->key);
271 int ufd = attr->map_fd;
db20fd2b 272 struct bpf_map *map;
592867bf 273 struct fd f;
db20fd2b
AS
274 void *key;
275 int err;
276
277 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
278 return -EINVAL;
279
592867bf 280 f = fdget(ufd);
db20fd2b
AS
281 map = bpf_map_get(f);
282 if (IS_ERR(map))
283 return PTR_ERR(map);
284
285 err = -ENOMEM;
286 key = kmalloc(map->key_size, GFP_USER);
287 if (!key)
288 goto err_put;
289
290 err = -EFAULT;
291 if (copy_from_user(key, ukey, map->key_size) != 0)
292 goto free_key;
293
294 rcu_read_lock();
295 err = map->ops->map_delete_elem(map, key);
296 rcu_read_unlock();
297
298free_key:
299 kfree(key);
300err_put:
301 fdput(f);
302 return err;
303}
304
305/* last field in 'union bpf_attr' used by this command */
306#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
307
308static int map_get_next_key(union bpf_attr *attr)
309{
310 void __user *ukey = u64_to_ptr(attr->key);
311 void __user *unext_key = u64_to_ptr(attr->next_key);
312 int ufd = attr->map_fd;
db20fd2b
AS
313 struct bpf_map *map;
314 void *key, *next_key;
592867bf 315 struct fd f;
db20fd2b
AS
316 int err;
317
318 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
319 return -EINVAL;
320
592867bf 321 f = fdget(ufd);
db20fd2b
AS
322 map = bpf_map_get(f);
323 if (IS_ERR(map))
324 return PTR_ERR(map);
325
326 err = -ENOMEM;
327 key = kmalloc(map->key_size, GFP_USER);
328 if (!key)
329 goto err_put;
330
331 err = -EFAULT;
332 if (copy_from_user(key, ukey, map->key_size) != 0)
333 goto free_key;
334
335 err = -ENOMEM;
336 next_key = kmalloc(map->key_size, GFP_USER);
337 if (!next_key)
338 goto free_key;
339
340 rcu_read_lock();
341 err = map->ops->map_get_next_key(map, key, next_key);
342 rcu_read_unlock();
343 if (err)
344 goto free_next_key;
345
346 err = -EFAULT;
347 if (copy_to_user(unext_key, next_key, map->key_size) != 0)
348 goto free_next_key;
349
350 err = 0;
351
352free_next_key:
353 kfree(next_key);
354free_key:
355 kfree(key);
356err_put:
357 fdput(f);
358 return err;
359}
360
09756af4
AS
361static LIST_HEAD(bpf_prog_types);
362
363static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
364{
365 struct bpf_prog_type_list *tl;
366
367 list_for_each_entry(tl, &bpf_prog_types, list_node) {
368 if (tl->type == type) {
369 prog->aux->ops = tl->ops;
24701ece 370 prog->type = type;
09756af4
AS
371 return 0;
372 }
373 }
24701ece 374
09756af4
AS
375 return -EINVAL;
376}
377
378void bpf_register_prog_type(struct bpf_prog_type_list *tl)
379{
380 list_add(&tl->list_node, &bpf_prog_types);
381}
382
0a542a86
AS
383/* fixup insn->imm field of bpf_call instructions:
384 * if (insn->imm == BPF_FUNC_map_lookup_elem)
385 * insn->imm = bpf_map_lookup_elem - __bpf_call_base;
386 * else if (insn->imm == BPF_FUNC_map_update_elem)
387 * insn->imm = bpf_map_update_elem - __bpf_call_base;
388 * else ...
389 *
390 * this function is called after eBPF program passed verification
391 */
392static void fixup_bpf_calls(struct bpf_prog *prog)
393{
394 const struct bpf_func_proto *fn;
395 int i;
396
397 for (i = 0; i < prog->len; i++) {
398 struct bpf_insn *insn = &prog->insnsi[i];
399
400 if (insn->code == (BPF_JMP | BPF_CALL)) {
401 /* we reach here when program has bpf_call instructions
402 * and it passed bpf_check(), means that
403 * ops->get_func_proto must have been supplied, check it
404 */
405 BUG_ON(!prog->aux->ops->get_func_proto);
406
c46646d0
DB
407 if (insn->imm == BPF_FUNC_get_route_realm)
408 prog->dst_needed = 1;
3ad00405
DB
409 if (insn->imm == BPF_FUNC_get_prandom_u32)
410 bpf_user_rnd_init_once();
04fd61ab
AS
411 if (insn->imm == BPF_FUNC_tail_call) {
412 /* mark bpf_tail_call as different opcode
413 * to avoid conditional branch in
414 * interpeter for every normal call
415 * and to prevent accidental JITing by
416 * JIT compiler that doesn't support
417 * bpf_tail_call yet
418 */
419 insn->imm = 0;
420 insn->code |= BPF_X;
421 continue;
422 }
423
0a542a86
AS
424 fn = prog->aux->ops->get_func_proto(insn->imm);
425 /* all functions that have prototype and verifier allowed
426 * programs to call them, must be real in-kernel functions
427 */
428 BUG_ON(!fn->func);
429 insn->imm = fn->func - __bpf_call_base;
430 }
431 }
432}
433
09756af4
AS
434/* drop refcnt on maps used by eBPF program and free auxilary data */
435static void free_used_maps(struct bpf_prog_aux *aux)
436{
437 int i;
438
439 for (i = 0; i < aux->used_map_cnt; i++)
440 bpf_map_put(aux->used_maps[i]);
441
442 kfree(aux->used_maps);
443}
444
abf2e7d6
AS
445static void __prog_put_rcu(struct rcu_head *rcu)
446{
447 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
448
449 free_used_maps(aux);
450 bpf_prog_free(aux->prog);
451}
452
453/* version of bpf_prog_put() that is called after a grace period */
454void bpf_prog_put_rcu(struct bpf_prog *prog)
455{
456 if (atomic_dec_and_test(&prog->aux->refcnt)) {
457 prog->aux->prog = prog;
458 call_rcu(&prog->aux->rcu, __prog_put_rcu);
459 }
460}
461
09756af4
AS
462void bpf_prog_put(struct bpf_prog *prog)
463{
464 if (atomic_dec_and_test(&prog->aux->refcnt)) {
465 free_used_maps(prog->aux);
466 bpf_prog_free(prog);
467 }
468}
e2e9b654 469EXPORT_SYMBOL_GPL(bpf_prog_put);
09756af4
AS
470
471static int bpf_prog_release(struct inode *inode, struct file *filp)
472{
473 struct bpf_prog *prog = filp->private_data;
474
abf2e7d6 475 bpf_prog_put_rcu(prog);
09756af4
AS
476 return 0;
477}
478
479static const struct file_operations bpf_prog_fops = {
480 .release = bpf_prog_release,
481};
482
483static struct bpf_prog *get_prog(struct fd f)
484{
485 struct bpf_prog *prog;
486
487 if (!f.file)
488 return ERR_PTR(-EBADF);
489
490 if (f.file->f_op != &bpf_prog_fops) {
491 fdput(f);
492 return ERR_PTR(-EINVAL);
493 }
494
495 prog = f.file->private_data;
496
497 return prog;
498}
499
500/* called by sockets/tracing/seccomp before attaching program to an event
501 * pairs with bpf_prog_put()
502 */
503struct bpf_prog *bpf_prog_get(u32 ufd)
504{
505 struct fd f = fdget(ufd);
506 struct bpf_prog *prog;
507
508 prog = get_prog(f);
509
510 if (IS_ERR(prog))
511 return prog;
512
513 atomic_inc(&prog->aux->refcnt);
514 fdput(f);
515 return prog;
516}
e2e9b654 517EXPORT_SYMBOL_GPL(bpf_prog_get);
09756af4
AS
518
519/* last field in 'union bpf_attr' used by this command */
2541517c 520#define BPF_PROG_LOAD_LAST_FIELD kern_version
09756af4
AS
521
522static int bpf_prog_load(union bpf_attr *attr)
523{
524 enum bpf_prog_type type = attr->prog_type;
525 struct bpf_prog *prog;
526 int err;
527 char license[128];
528 bool is_gpl;
529
530 if (CHECK_ATTR(BPF_PROG_LOAD))
531 return -EINVAL;
532
533 /* copy eBPF program license from user space */
534 if (strncpy_from_user(license, u64_to_ptr(attr->license),
535 sizeof(license) - 1) < 0)
536 return -EFAULT;
537 license[sizeof(license) - 1] = 0;
538
539 /* eBPF programs must be GPL compatible to use GPL-ed functions */
540 is_gpl = license_is_gpl_compatible(license);
541
542 if (attr->insn_cnt >= BPF_MAXINSNS)
543 return -EINVAL;
544
2541517c
AS
545 if (type == BPF_PROG_TYPE_KPROBE &&
546 attr->kern_version != LINUX_VERSION_CODE)
547 return -EINVAL;
548
1be7f75d
AS
549 if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN))
550 return -EPERM;
551
09756af4
AS
552 /* plain bpf_prog allocation */
553 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
554 if (!prog)
555 return -ENOMEM;
556
557 prog->len = attr->insn_cnt;
558
559 err = -EFAULT;
560 if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
561 prog->len * sizeof(struct bpf_insn)) != 0)
562 goto free_prog;
563
564 prog->orig_prog = NULL;
a91263d5 565 prog->jited = 0;
09756af4
AS
566
567 atomic_set(&prog->aux->refcnt, 1);
a91263d5 568 prog->gpl_compatible = is_gpl ? 1 : 0;
09756af4
AS
569
570 /* find program type: socket_filter vs tracing_filter */
571 err = find_prog_type(type, prog);
572 if (err < 0)
573 goto free_prog;
574
575 /* run eBPF verifier */
9bac3d6d 576 err = bpf_check(&prog, attr);
09756af4
AS
577 if (err < 0)
578 goto free_used_maps;
579
0a542a86
AS
580 /* fixup BPF_CALL->imm field */
581 fixup_bpf_calls(prog);
582
09756af4 583 /* eBPF program is ready to be JITed */
04fd61ab
AS
584 err = bpf_prog_select_runtime(prog);
585 if (err < 0)
586 goto free_used_maps;
09756af4
AS
587
588 err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC);
09756af4
AS
589 if (err < 0)
590 /* failed to allocate fd */
591 goto free_used_maps;
592
593 return err;
594
595free_used_maps:
596 free_used_maps(prog->aux);
597free_prog:
598 bpf_prog_free(prog);
599 return err;
600}
601
99c55f7d
AS
602SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
603{
604 union bpf_attr attr = {};
605 int err;
606
1be7f75d 607 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
99c55f7d
AS
608 return -EPERM;
609
610 if (!access_ok(VERIFY_READ, uattr, 1))
611 return -EFAULT;
612
613 if (size > PAGE_SIZE) /* silly large */
614 return -E2BIG;
615
616 /* If we're handed a bigger struct than we know of,
617 * ensure all the unknown bits are 0 - i.e. new
618 * user-space does not rely on any kernel feature
619 * extensions we dont know about yet.
620 */
621 if (size > sizeof(attr)) {
622 unsigned char __user *addr;
623 unsigned char __user *end;
624 unsigned char val;
625
626 addr = (void __user *)uattr + sizeof(attr);
627 end = (void __user *)uattr + size;
628
629 for (; addr < end; addr++) {
630 err = get_user(val, addr);
631 if (err)
632 return err;
633 if (val)
634 return -E2BIG;
635 }
636 size = sizeof(attr);
637 }
638
639 /* copy attributes from user space, may be less than sizeof(bpf_attr) */
640 if (copy_from_user(&attr, uattr, size) != 0)
641 return -EFAULT;
642
643 switch (cmd) {
644 case BPF_MAP_CREATE:
645 err = map_create(&attr);
646 break;
db20fd2b
AS
647 case BPF_MAP_LOOKUP_ELEM:
648 err = map_lookup_elem(&attr);
649 break;
650 case BPF_MAP_UPDATE_ELEM:
651 err = map_update_elem(&attr);
652 break;
653 case BPF_MAP_DELETE_ELEM:
654 err = map_delete_elem(&attr);
655 break;
656 case BPF_MAP_GET_NEXT_KEY:
657 err = map_get_next_key(&attr);
658 break;
09756af4
AS
659 case BPF_PROG_LOAD:
660 err = bpf_prog_load(&attr);
661 break;
99c55f7d
AS
662 default:
663 err = -EINVAL;
664 break;
665 }
666
667 return err;
668}