]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - kernel/bpf/syscall.c
bpf: add initial bpf tracepoints
[mirror_ubuntu-artful-kernel.git] / kernel / bpf / syscall.c
CommitLineData
99c55f7d
AS
1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
11 */
12#include <linux/bpf.h>
a67edbf4 13#include <linux/bpf_trace.h>
99c55f7d
AS
14#include <linux/syscalls.h>
15#include <linux/slab.h>
16#include <linux/anon_inodes.h>
db20fd2b 17#include <linux/file.h>
09756af4
AS
18#include <linux/license.h>
19#include <linux/filter.h>
2541517c 20#include <linux/version.h>
535e7b4b 21#include <linux/kernel.h>
99c55f7d 22
b121d1e7
AS
23DEFINE_PER_CPU(int, bpf_prog_active);
24
1be7f75d
AS
25int sysctl_unprivileged_bpf_disabled __read_mostly;
26
99c55f7d
AS
27static LIST_HEAD(bpf_map_types);
28
29static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
30{
31 struct bpf_map_type_list *tl;
32 struct bpf_map *map;
33
34 list_for_each_entry(tl, &bpf_map_types, list_node) {
35 if (tl->type == attr->map_type) {
36 map = tl->ops->map_alloc(attr);
37 if (IS_ERR(map))
38 return map;
39 map->ops = tl->ops;
40 map->map_type = attr->map_type;
41 return map;
42 }
43 }
44 return ERR_PTR(-EINVAL);
45}
46
47/* boot time registration of different map implementations */
48void bpf_register_map_type(struct bpf_map_type_list *tl)
49{
50 list_add(&tl->list_node, &bpf_map_types);
51}
52
6c905981
AS
53int bpf_map_precharge_memlock(u32 pages)
54{
55 struct user_struct *user = get_current_user();
56 unsigned long memlock_limit, cur;
57
58 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
59 cur = atomic_long_read(&user->locked_vm);
60 free_uid(user);
61 if (cur + pages > memlock_limit)
62 return -EPERM;
63 return 0;
64}
65
aaac3ba9
AS
66static int bpf_map_charge_memlock(struct bpf_map *map)
67{
68 struct user_struct *user = get_current_user();
69 unsigned long memlock_limit;
70
71 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
72
73 atomic_long_add(map->pages, &user->locked_vm);
74
75 if (atomic_long_read(&user->locked_vm) > memlock_limit) {
76 atomic_long_sub(map->pages, &user->locked_vm);
77 free_uid(user);
78 return -EPERM;
79 }
80 map->user = user;
81 return 0;
82}
83
84static void bpf_map_uncharge_memlock(struct bpf_map *map)
85{
86 struct user_struct *user = map->user;
87
88 atomic_long_sub(map->pages, &user->locked_vm);
89 free_uid(user);
90}
91
99c55f7d
AS
92/* called from workqueue */
93static void bpf_map_free_deferred(struct work_struct *work)
94{
95 struct bpf_map *map = container_of(work, struct bpf_map, work);
96
aaac3ba9 97 bpf_map_uncharge_memlock(map);
99c55f7d
AS
98 /* implementation dependent freeing */
99 map->ops->map_free(map);
100}
101
c9da161c
DB
102static void bpf_map_put_uref(struct bpf_map *map)
103{
104 if (atomic_dec_and_test(&map->usercnt)) {
105 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
106 bpf_fd_array_map_clear(map);
107 }
108}
109
99c55f7d
AS
110/* decrement map refcnt and schedule it for freeing via workqueue
111 * (unrelying map implementation ops->map_free() might sleep)
112 */
113void bpf_map_put(struct bpf_map *map)
114{
115 if (atomic_dec_and_test(&map->refcnt)) {
116 INIT_WORK(&map->work, bpf_map_free_deferred);
117 schedule_work(&map->work);
118 }
119}
120
c9da161c 121void bpf_map_put_with_uref(struct bpf_map *map)
99c55f7d 122{
c9da161c 123 bpf_map_put_uref(map);
99c55f7d 124 bpf_map_put(map);
c9da161c
DB
125}
126
127static int bpf_map_release(struct inode *inode, struct file *filp)
128{
61d1b6a4
DB
129 struct bpf_map *map = filp->private_data;
130
131 if (map->ops->map_release)
132 map->ops->map_release(map, filp);
133
134 bpf_map_put_with_uref(map);
99c55f7d
AS
135 return 0;
136}
137
f99bf205
DB
138#ifdef CONFIG_PROC_FS
139static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
140{
141 const struct bpf_map *map = filp->private_data;
21116b70
DB
142 const struct bpf_array *array;
143 u32 owner_prog_type = 0;
144
145 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
146 array = container_of(map, struct bpf_array, map);
147 owner_prog_type = array->owner_prog_type;
148 }
f99bf205
DB
149
150 seq_printf(m,
151 "map_type:\t%u\n"
152 "key_size:\t%u\n"
153 "value_size:\t%u\n"
322cea2f 154 "max_entries:\t%u\n"
21116b70
DB
155 "map_flags:\t%#x\n"
156 "memlock:\t%llu\n",
f99bf205
DB
157 map->map_type,
158 map->key_size,
159 map->value_size,
322cea2f 160 map->max_entries,
21116b70
DB
161 map->map_flags,
162 map->pages * 1ULL << PAGE_SHIFT);
163
164 if (owner_prog_type)
165 seq_printf(m, "owner_prog_type:\t%u\n",
166 owner_prog_type);
f99bf205
DB
167}
168#endif
169
99c55f7d 170static const struct file_operations bpf_map_fops = {
f99bf205
DB
171#ifdef CONFIG_PROC_FS
172 .show_fdinfo = bpf_map_show_fdinfo,
173#endif
174 .release = bpf_map_release,
99c55f7d
AS
175};
176
b2197755 177int bpf_map_new_fd(struct bpf_map *map)
aa79781b
DB
178{
179 return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
180 O_RDWR | O_CLOEXEC);
181}
182
99c55f7d
AS
183/* helper macro to check that unused fields 'union bpf_attr' are zero */
184#define CHECK_ATTR(CMD) \
185 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
186 sizeof(attr->CMD##_LAST_FIELD), 0, \
187 sizeof(*attr) - \
188 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
189 sizeof(attr->CMD##_LAST_FIELD)) != NULL
190
6c905981 191#define BPF_MAP_CREATE_LAST_FIELD map_flags
99c55f7d
AS
192/* called via syscall */
193static int map_create(union bpf_attr *attr)
194{
195 struct bpf_map *map;
196 int err;
197
198 err = CHECK_ATTR(BPF_MAP_CREATE);
199 if (err)
200 return -EINVAL;
201
202 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
203 map = find_and_alloc_map(attr);
204 if (IS_ERR(map))
205 return PTR_ERR(map);
206
207 atomic_set(&map->refcnt, 1);
c9da161c 208 atomic_set(&map->usercnt, 1);
99c55f7d 209
aaac3ba9
AS
210 err = bpf_map_charge_memlock(map);
211 if (err)
20b2b24f 212 goto free_map_nouncharge;
aaac3ba9 213
aa79781b 214 err = bpf_map_new_fd(map);
99c55f7d
AS
215 if (err < 0)
216 /* failed to allocate fd */
217 goto free_map;
218
a67edbf4 219 trace_bpf_map_create(map, err);
99c55f7d
AS
220 return err;
221
222free_map:
20b2b24f
DB
223 bpf_map_uncharge_memlock(map);
224free_map_nouncharge:
99c55f7d
AS
225 map->ops->map_free(map);
226 return err;
227}
228
db20fd2b
AS
229/* if error is returned, fd is released.
230 * On success caller should complete fd access with matching fdput()
231 */
c2101297 232struct bpf_map *__bpf_map_get(struct fd f)
db20fd2b 233{
db20fd2b
AS
234 if (!f.file)
235 return ERR_PTR(-EBADF);
db20fd2b
AS
236 if (f.file->f_op != &bpf_map_fops) {
237 fdput(f);
238 return ERR_PTR(-EINVAL);
239 }
240
c2101297
DB
241 return f.file->private_data;
242}
243
92117d84
AS
244/* prog's and map's refcnt limit */
245#define BPF_MAX_REFCNT 32768
246
247struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
c9da161c 248{
92117d84
AS
249 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
250 atomic_dec(&map->refcnt);
251 return ERR_PTR(-EBUSY);
252 }
c9da161c
DB
253 if (uref)
254 atomic_inc(&map->usercnt);
92117d84 255 return map;
c9da161c
DB
256}
257
258struct bpf_map *bpf_map_get_with_uref(u32 ufd)
c2101297
DB
259{
260 struct fd f = fdget(ufd);
261 struct bpf_map *map;
262
263 map = __bpf_map_get(f);
264 if (IS_ERR(map))
265 return map;
266
92117d84 267 map = bpf_map_inc(map, true);
c2101297 268 fdput(f);
db20fd2b
AS
269
270 return map;
271}
272
b8cdc051
AS
273int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
274{
275 return -ENOTSUPP;
276}
277
db20fd2b
AS
278/* last field in 'union bpf_attr' used by this command */
279#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
280
281static int map_lookup_elem(union bpf_attr *attr)
282{
535e7b4b
MS
283 void __user *ukey = u64_to_user_ptr(attr->key);
284 void __user *uvalue = u64_to_user_ptr(attr->value);
db20fd2b 285 int ufd = attr->map_fd;
db20fd2b 286 struct bpf_map *map;
8ebe667c 287 void *key, *value, *ptr;
15a07b33 288 u32 value_size;
592867bf 289 struct fd f;
db20fd2b
AS
290 int err;
291
292 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
293 return -EINVAL;
294
592867bf 295 f = fdget(ufd);
c2101297 296 map = __bpf_map_get(f);
db20fd2b
AS
297 if (IS_ERR(map))
298 return PTR_ERR(map);
299
300 err = -ENOMEM;
301 key = kmalloc(map->key_size, GFP_USER);
302 if (!key)
303 goto err_put;
304
305 err = -EFAULT;
306 if (copy_from_user(key, ukey, map->key_size) != 0)
307 goto free_key;
308
15a07b33 309 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
8f844938 310 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
15a07b33
AS
311 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
312 value_size = round_up(map->value_size, 8) * num_possible_cpus();
313 else
314 value_size = map->value_size;
315
8ebe667c 316 err = -ENOMEM;
15a07b33 317 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
db20fd2b 318 if (!value)
8ebe667c
AS
319 goto free_key;
320
8f844938
MKL
321 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
322 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
15a07b33
AS
323 err = bpf_percpu_hash_copy(map, key, value);
324 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
325 err = bpf_percpu_array_copy(map, key, value);
557c0c6e
AS
326 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
327 err = bpf_stackmap_copy(map, key, value);
15a07b33
AS
328 } else {
329 rcu_read_lock();
330 ptr = map->ops->map_lookup_elem(map, key);
331 if (ptr)
332 memcpy(value, ptr, value_size);
333 rcu_read_unlock();
334 err = ptr ? 0 : -ENOENT;
335 }
8ebe667c 336
15a07b33 337 if (err)
8ebe667c 338 goto free_value;
db20fd2b
AS
339
340 err = -EFAULT;
15a07b33 341 if (copy_to_user(uvalue, value, value_size) != 0)
8ebe667c 342 goto free_value;
db20fd2b 343
a67edbf4 344 trace_bpf_map_lookup_elem(map, ufd, key, value);
db20fd2b
AS
345 err = 0;
346
8ebe667c
AS
347free_value:
348 kfree(value);
db20fd2b
AS
349free_key:
350 kfree(key);
351err_put:
352 fdput(f);
353 return err;
354}
355
3274f520 356#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
db20fd2b
AS
357
358static int map_update_elem(union bpf_attr *attr)
359{
535e7b4b
MS
360 void __user *ukey = u64_to_user_ptr(attr->key);
361 void __user *uvalue = u64_to_user_ptr(attr->value);
db20fd2b 362 int ufd = attr->map_fd;
db20fd2b
AS
363 struct bpf_map *map;
364 void *key, *value;
15a07b33 365 u32 value_size;
592867bf 366 struct fd f;
db20fd2b
AS
367 int err;
368
369 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
370 return -EINVAL;
371
592867bf 372 f = fdget(ufd);
c2101297 373 map = __bpf_map_get(f);
db20fd2b
AS
374 if (IS_ERR(map))
375 return PTR_ERR(map);
376
377 err = -ENOMEM;
378 key = kmalloc(map->key_size, GFP_USER);
379 if (!key)
380 goto err_put;
381
382 err = -EFAULT;
383 if (copy_from_user(key, ukey, map->key_size) != 0)
384 goto free_key;
385
15a07b33 386 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
8f844938 387 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
15a07b33
AS
388 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
389 value_size = round_up(map->value_size, 8) * num_possible_cpus();
390 else
391 value_size = map->value_size;
392
db20fd2b 393 err = -ENOMEM;
15a07b33 394 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
db20fd2b
AS
395 if (!value)
396 goto free_key;
397
398 err = -EFAULT;
15a07b33 399 if (copy_from_user(value, uvalue, value_size) != 0)
db20fd2b
AS
400 goto free_value;
401
b121d1e7
AS
402 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
403 * inside bpf map update or delete otherwise deadlocks are possible
404 */
405 preempt_disable();
406 __this_cpu_inc(bpf_prog_active);
8f844938
MKL
407 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
408 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
15a07b33
AS
409 err = bpf_percpu_hash_update(map, key, value, attr->flags);
410 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
411 err = bpf_percpu_array_update(map, key, value, attr->flags);
d056a788 412 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
4ed8ec52
MKL
413 map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
414 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) {
d056a788
DB
415 rcu_read_lock();
416 err = bpf_fd_array_map_update_elem(map, f.file, key, value,
417 attr->flags);
418 rcu_read_unlock();
15a07b33
AS
419 } else {
420 rcu_read_lock();
421 err = map->ops->map_update_elem(map, key, value, attr->flags);
422 rcu_read_unlock();
423 }
b121d1e7
AS
424 __this_cpu_dec(bpf_prog_active);
425 preempt_enable();
db20fd2b 426
a67edbf4
DB
427 if (!err)
428 trace_bpf_map_update_elem(map, ufd, key, value);
db20fd2b
AS
429free_value:
430 kfree(value);
431free_key:
432 kfree(key);
433err_put:
434 fdput(f);
435 return err;
436}
437
438#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
439
440static int map_delete_elem(union bpf_attr *attr)
441{
535e7b4b 442 void __user *ukey = u64_to_user_ptr(attr->key);
db20fd2b 443 int ufd = attr->map_fd;
db20fd2b 444 struct bpf_map *map;
592867bf 445 struct fd f;
db20fd2b
AS
446 void *key;
447 int err;
448
449 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
450 return -EINVAL;
451
592867bf 452 f = fdget(ufd);
c2101297 453 map = __bpf_map_get(f);
db20fd2b
AS
454 if (IS_ERR(map))
455 return PTR_ERR(map);
456
457 err = -ENOMEM;
458 key = kmalloc(map->key_size, GFP_USER);
459 if (!key)
460 goto err_put;
461
462 err = -EFAULT;
463 if (copy_from_user(key, ukey, map->key_size) != 0)
464 goto free_key;
465
b121d1e7
AS
466 preempt_disable();
467 __this_cpu_inc(bpf_prog_active);
db20fd2b
AS
468 rcu_read_lock();
469 err = map->ops->map_delete_elem(map, key);
470 rcu_read_unlock();
b121d1e7
AS
471 __this_cpu_dec(bpf_prog_active);
472 preempt_enable();
db20fd2b 473
a67edbf4
DB
474 if (!err)
475 trace_bpf_map_delete_elem(map, ufd, key);
db20fd2b
AS
476free_key:
477 kfree(key);
478err_put:
479 fdput(f);
480 return err;
481}
482
483/* last field in 'union bpf_attr' used by this command */
484#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
485
486static int map_get_next_key(union bpf_attr *attr)
487{
535e7b4b
MS
488 void __user *ukey = u64_to_user_ptr(attr->key);
489 void __user *unext_key = u64_to_user_ptr(attr->next_key);
db20fd2b 490 int ufd = attr->map_fd;
db20fd2b
AS
491 struct bpf_map *map;
492 void *key, *next_key;
592867bf 493 struct fd f;
db20fd2b
AS
494 int err;
495
496 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
497 return -EINVAL;
498
592867bf 499 f = fdget(ufd);
c2101297 500 map = __bpf_map_get(f);
db20fd2b
AS
501 if (IS_ERR(map))
502 return PTR_ERR(map);
503
504 err = -ENOMEM;
505 key = kmalloc(map->key_size, GFP_USER);
506 if (!key)
507 goto err_put;
508
509 err = -EFAULT;
510 if (copy_from_user(key, ukey, map->key_size) != 0)
511 goto free_key;
512
513 err = -ENOMEM;
514 next_key = kmalloc(map->key_size, GFP_USER);
515 if (!next_key)
516 goto free_key;
517
518 rcu_read_lock();
519 err = map->ops->map_get_next_key(map, key, next_key);
520 rcu_read_unlock();
521 if (err)
522 goto free_next_key;
523
524 err = -EFAULT;
525 if (copy_to_user(unext_key, next_key, map->key_size) != 0)
526 goto free_next_key;
527
a67edbf4 528 trace_bpf_map_next_key(map, ufd, key, next_key);
db20fd2b
AS
529 err = 0;
530
531free_next_key:
532 kfree(next_key);
533free_key:
534 kfree(key);
535err_put:
536 fdput(f);
537 return err;
538}
539
09756af4
AS
540static LIST_HEAD(bpf_prog_types);
541
542static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
543{
544 struct bpf_prog_type_list *tl;
545
546 list_for_each_entry(tl, &bpf_prog_types, list_node) {
547 if (tl->type == type) {
548 prog->aux->ops = tl->ops;
24701ece 549 prog->type = type;
09756af4
AS
550 return 0;
551 }
552 }
24701ece 553
09756af4
AS
554 return -EINVAL;
555}
556
557void bpf_register_prog_type(struct bpf_prog_type_list *tl)
558{
559 list_add(&tl->list_node, &bpf_prog_types);
560}
561
0a542a86
AS
562/* fixup insn->imm field of bpf_call instructions:
563 * if (insn->imm == BPF_FUNC_map_lookup_elem)
564 * insn->imm = bpf_map_lookup_elem - __bpf_call_base;
565 * else if (insn->imm == BPF_FUNC_map_update_elem)
566 * insn->imm = bpf_map_update_elem - __bpf_call_base;
567 * else ...
568 *
569 * this function is called after eBPF program passed verification
570 */
571static void fixup_bpf_calls(struct bpf_prog *prog)
572{
573 const struct bpf_func_proto *fn;
574 int i;
575
576 for (i = 0; i < prog->len; i++) {
577 struct bpf_insn *insn = &prog->insnsi[i];
578
579 if (insn->code == (BPF_JMP | BPF_CALL)) {
580 /* we reach here when program has bpf_call instructions
581 * and it passed bpf_check(), means that
582 * ops->get_func_proto must have been supplied, check it
583 */
584 BUG_ON(!prog->aux->ops->get_func_proto);
585
c46646d0
DB
586 if (insn->imm == BPF_FUNC_get_route_realm)
587 prog->dst_needed = 1;
3ad00405
DB
588 if (insn->imm == BPF_FUNC_get_prandom_u32)
589 bpf_user_rnd_init_once();
17bedab2
MKL
590 if (insn->imm == BPF_FUNC_xdp_adjust_head)
591 prog->xdp_adjust_head = 1;
04fd61ab
AS
592 if (insn->imm == BPF_FUNC_tail_call) {
593 /* mark bpf_tail_call as different opcode
594 * to avoid conditional branch in
595 * interpeter for every normal call
596 * and to prevent accidental JITing by
597 * JIT compiler that doesn't support
598 * bpf_tail_call yet
599 */
600 insn->imm = 0;
601 insn->code |= BPF_X;
602 continue;
603 }
604
0a542a86
AS
605 fn = prog->aux->ops->get_func_proto(insn->imm);
606 /* all functions that have prototype and verifier allowed
607 * programs to call them, must be real in-kernel functions
608 */
609 BUG_ON(!fn->func);
610 insn->imm = fn->func - __bpf_call_base;
611 }
612 }
613}
614
09756af4
AS
615/* drop refcnt on maps used by eBPF program and free auxilary data */
616static void free_used_maps(struct bpf_prog_aux *aux)
617{
618 int i;
619
620 for (i = 0; i < aux->used_map_cnt; i++)
621 bpf_map_put(aux->used_maps[i]);
622
623 kfree(aux->used_maps);
624}
625
5ccb071e
DB
626int __bpf_prog_charge(struct user_struct *user, u32 pages)
627{
628 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
629 unsigned long user_bufs;
630
631 if (user) {
632 user_bufs = atomic_long_add_return(pages, &user->locked_vm);
633 if (user_bufs > memlock_limit) {
634 atomic_long_sub(pages, &user->locked_vm);
635 return -EPERM;
636 }
637 }
638
639 return 0;
640}
641
642void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
643{
644 if (user)
645 atomic_long_sub(pages, &user->locked_vm);
646}
647
aaac3ba9
AS
648static int bpf_prog_charge_memlock(struct bpf_prog *prog)
649{
650 struct user_struct *user = get_current_user();
5ccb071e 651 int ret;
aaac3ba9 652
5ccb071e
DB
653 ret = __bpf_prog_charge(user, prog->pages);
654 if (ret) {
aaac3ba9 655 free_uid(user);
5ccb071e 656 return ret;
aaac3ba9 657 }
5ccb071e 658
aaac3ba9
AS
659 prog->aux->user = user;
660 return 0;
661}
662
663static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
664{
665 struct user_struct *user = prog->aux->user;
666
5ccb071e 667 __bpf_prog_uncharge(user, prog->pages);
aaac3ba9
AS
668 free_uid(user);
669}
670
1aacde3d 671static void __bpf_prog_put_rcu(struct rcu_head *rcu)
abf2e7d6
AS
672{
673 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
674
675 free_used_maps(aux);
aaac3ba9 676 bpf_prog_uncharge_memlock(aux->prog);
abf2e7d6
AS
677 bpf_prog_free(aux->prog);
678}
679
09756af4
AS
680void bpf_prog_put(struct bpf_prog *prog)
681{
a67edbf4
DB
682 if (atomic_dec_and_test(&prog->aux->refcnt)) {
683 trace_bpf_prog_put_rcu(prog);
1aacde3d 684 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
a67edbf4 685 }
09756af4 686}
e2e9b654 687EXPORT_SYMBOL_GPL(bpf_prog_put);
09756af4
AS
688
689static int bpf_prog_release(struct inode *inode, struct file *filp)
690{
691 struct bpf_prog *prog = filp->private_data;
692
1aacde3d 693 bpf_prog_put(prog);
09756af4
AS
694 return 0;
695}
696
7bd509e3
DB
697#ifdef CONFIG_PROC_FS
698static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
699{
700 const struct bpf_prog *prog = filp->private_data;
f1f7714e 701 char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
7bd509e3 702
f1f7714e 703 bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
7bd509e3
DB
704 seq_printf(m,
705 "prog_type:\t%u\n"
706 "prog_jited:\t%u\n"
f1f7714e 707 "prog_tag:\t%s\n"
7bd509e3
DB
708 "memlock:\t%llu\n",
709 prog->type,
710 prog->jited,
f1f7714e 711 prog_tag,
7bd509e3
DB
712 prog->pages * 1ULL << PAGE_SHIFT);
713}
714#endif
715
09756af4 716static const struct file_operations bpf_prog_fops = {
7bd509e3
DB
717#ifdef CONFIG_PROC_FS
718 .show_fdinfo = bpf_prog_show_fdinfo,
719#endif
720 .release = bpf_prog_release,
09756af4
AS
721};
722
b2197755 723int bpf_prog_new_fd(struct bpf_prog *prog)
aa79781b
DB
724{
725 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
726 O_RDWR | O_CLOEXEC);
727}
728
113214be 729static struct bpf_prog *____bpf_prog_get(struct fd f)
09756af4 730{
09756af4
AS
731 if (!f.file)
732 return ERR_PTR(-EBADF);
09756af4
AS
733 if (f.file->f_op != &bpf_prog_fops) {
734 fdput(f);
735 return ERR_PTR(-EINVAL);
736 }
737
c2101297 738 return f.file->private_data;
09756af4
AS
739}
740
59d3656d 741struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
92117d84 742{
59d3656d
BB
743 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
744 atomic_sub(i, &prog->aux->refcnt);
92117d84
AS
745 return ERR_PTR(-EBUSY);
746 }
747 return prog;
748}
59d3656d
BB
749EXPORT_SYMBOL_GPL(bpf_prog_add);
750
c540594f
DB
751void bpf_prog_sub(struct bpf_prog *prog, int i)
752{
753 /* Only to be used for undoing previous bpf_prog_add() in some
754 * error path. We still know that another entity in our call
755 * path holds a reference to the program, thus atomic_sub() can
756 * be safely used in such cases!
757 */
758 WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
759}
760EXPORT_SYMBOL_GPL(bpf_prog_sub);
761
59d3656d
BB
762struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
763{
764 return bpf_prog_add(prog, 1);
765}
97bc402d 766EXPORT_SYMBOL_GPL(bpf_prog_inc);
92117d84 767
113214be 768static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
09756af4
AS
769{
770 struct fd f = fdget(ufd);
771 struct bpf_prog *prog;
772
113214be 773 prog = ____bpf_prog_get(f);
09756af4
AS
774 if (IS_ERR(prog))
775 return prog;
113214be
DB
776 if (type && prog->type != *type) {
777 prog = ERR_PTR(-EINVAL);
778 goto out;
779 }
09756af4 780
92117d84 781 prog = bpf_prog_inc(prog);
113214be 782out:
09756af4
AS
783 fdput(f);
784 return prog;
785}
113214be
DB
786
787struct bpf_prog *bpf_prog_get(u32 ufd)
788{
789 return __bpf_prog_get(ufd, NULL);
790}
791
792struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
793{
a67edbf4
DB
794 struct bpf_prog *prog = __bpf_prog_get(ufd, &type);
795
796 if (!IS_ERR(prog))
797 trace_bpf_prog_get_type(prog);
798 return prog;
113214be
DB
799}
800EXPORT_SYMBOL_GPL(bpf_prog_get_type);
09756af4
AS
801
802/* last field in 'union bpf_attr' used by this command */
2541517c 803#define BPF_PROG_LOAD_LAST_FIELD kern_version
09756af4
AS
804
805static int bpf_prog_load(union bpf_attr *attr)
806{
807 enum bpf_prog_type type = attr->prog_type;
808 struct bpf_prog *prog;
809 int err;
810 char license[128];
811 bool is_gpl;
812
813 if (CHECK_ATTR(BPF_PROG_LOAD))
814 return -EINVAL;
815
816 /* copy eBPF program license from user space */
535e7b4b 817 if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
09756af4
AS
818 sizeof(license) - 1) < 0)
819 return -EFAULT;
820 license[sizeof(license) - 1] = 0;
821
822 /* eBPF programs must be GPL compatible to use GPL-ed functions */
823 is_gpl = license_is_gpl_compatible(license);
824
ef0915ca
DB
825 if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS)
826 return -E2BIG;
09756af4 827
2541517c
AS
828 if (type == BPF_PROG_TYPE_KPROBE &&
829 attr->kern_version != LINUX_VERSION_CODE)
830 return -EINVAL;
831
1be7f75d
AS
832 if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN))
833 return -EPERM;
834
09756af4
AS
835 /* plain bpf_prog allocation */
836 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
837 if (!prog)
838 return -ENOMEM;
839
aaac3ba9
AS
840 err = bpf_prog_charge_memlock(prog);
841 if (err)
842 goto free_prog_nouncharge;
843
09756af4
AS
844 prog->len = attr->insn_cnt;
845
846 err = -EFAULT;
535e7b4b 847 if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
aafe6ae9 848 bpf_prog_insn_size(prog)) != 0)
09756af4
AS
849 goto free_prog;
850
851 prog->orig_prog = NULL;
a91263d5 852 prog->jited = 0;
09756af4
AS
853
854 atomic_set(&prog->aux->refcnt, 1);
a91263d5 855 prog->gpl_compatible = is_gpl ? 1 : 0;
09756af4
AS
856
857 /* find program type: socket_filter vs tracing_filter */
858 err = find_prog_type(type, prog);
859 if (err < 0)
860 goto free_prog;
861
862 /* run eBPF verifier */
9bac3d6d 863 err = bpf_check(&prog, attr);
09756af4
AS
864 if (err < 0)
865 goto free_used_maps;
866
0a542a86
AS
867 /* fixup BPF_CALL->imm field */
868 fixup_bpf_calls(prog);
869
09756af4 870 /* eBPF program is ready to be JITed */
d1c55ab5 871 prog = bpf_prog_select_runtime(prog, &err);
04fd61ab
AS
872 if (err < 0)
873 goto free_used_maps;
09756af4 874
aa79781b 875 err = bpf_prog_new_fd(prog);
09756af4
AS
876 if (err < 0)
877 /* failed to allocate fd */
878 goto free_used_maps;
879
a67edbf4 880 trace_bpf_prog_load(prog, err);
09756af4
AS
881 return err;
882
883free_used_maps:
884 free_used_maps(prog->aux);
885free_prog:
aaac3ba9
AS
886 bpf_prog_uncharge_memlock(prog);
887free_prog_nouncharge:
09756af4
AS
888 bpf_prog_free(prog);
889 return err;
890}
891
b2197755
DB
892#define BPF_OBJ_LAST_FIELD bpf_fd
893
894static int bpf_obj_pin(const union bpf_attr *attr)
895{
896 if (CHECK_ATTR(BPF_OBJ))
897 return -EINVAL;
898
535e7b4b 899 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
b2197755
DB
900}
901
902static int bpf_obj_get(const union bpf_attr *attr)
903{
904 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
905 return -EINVAL;
906
535e7b4b 907 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname));
b2197755
DB
908}
909
f4324551
DM
910#ifdef CONFIG_CGROUP_BPF
911
912#define BPF_PROG_ATTACH_LAST_FIELD attach_type
913
914static int bpf_prog_attach(const union bpf_attr *attr)
915{
916 struct bpf_prog *prog;
917 struct cgroup *cgrp;
b2cd1257 918 enum bpf_prog_type ptype;
f4324551
DM
919
920 if (!capable(CAP_NET_ADMIN))
921 return -EPERM;
922
923 if (CHECK_ATTR(BPF_PROG_ATTACH))
924 return -EINVAL;
925
926 switch (attr->attach_type) {
927 case BPF_CGROUP_INET_INGRESS:
928 case BPF_CGROUP_INET_EGRESS:
b2cd1257 929 ptype = BPF_PROG_TYPE_CGROUP_SKB;
f4324551 930 break;
61023658
DA
931 case BPF_CGROUP_INET_SOCK_CREATE:
932 ptype = BPF_PROG_TYPE_CGROUP_SOCK;
933 break;
f4324551
DM
934 default:
935 return -EINVAL;
936 }
937
b2cd1257
DA
938 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
939 if (IS_ERR(prog))
940 return PTR_ERR(prog);
941
942 cgrp = cgroup_get_from_fd(attr->target_fd);
943 if (IS_ERR(cgrp)) {
944 bpf_prog_put(prog);
945 return PTR_ERR(cgrp);
946 }
947
948 cgroup_bpf_update(cgrp, prog, attr->attach_type);
949 cgroup_put(cgrp);
950
f4324551
DM
951 return 0;
952}
953
954#define BPF_PROG_DETACH_LAST_FIELD attach_type
955
956static int bpf_prog_detach(const union bpf_attr *attr)
957{
958 struct cgroup *cgrp;
959
960 if (!capable(CAP_NET_ADMIN))
961 return -EPERM;
962
963 if (CHECK_ATTR(BPF_PROG_DETACH))
964 return -EINVAL;
965
966 switch (attr->attach_type) {
967 case BPF_CGROUP_INET_INGRESS:
968 case BPF_CGROUP_INET_EGRESS:
61023658 969 case BPF_CGROUP_INET_SOCK_CREATE:
f4324551
DM
970 cgrp = cgroup_get_from_fd(attr->target_fd);
971 if (IS_ERR(cgrp))
972 return PTR_ERR(cgrp);
973
974 cgroup_bpf_update(cgrp, NULL, attr->attach_type);
975 cgroup_put(cgrp);
976 break;
977
978 default:
979 return -EINVAL;
980 }
981
982 return 0;
983}
984#endif /* CONFIG_CGROUP_BPF */
985
99c55f7d
AS
986SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
987{
988 union bpf_attr attr = {};
989 int err;
990
1be7f75d 991 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
99c55f7d
AS
992 return -EPERM;
993
994 if (!access_ok(VERIFY_READ, uattr, 1))
995 return -EFAULT;
996
997 if (size > PAGE_SIZE) /* silly large */
998 return -E2BIG;
999
1000 /* If we're handed a bigger struct than we know of,
1001 * ensure all the unknown bits are 0 - i.e. new
1002 * user-space does not rely on any kernel feature
1003 * extensions we dont know about yet.
1004 */
1005 if (size > sizeof(attr)) {
1006 unsigned char __user *addr;
1007 unsigned char __user *end;
1008 unsigned char val;
1009
1010 addr = (void __user *)uattr + sizeof(attr);
1011 end = (void __user *)uattr + size;
1012
1013 for (; addr < end; addr++) {
1014 err = get_user(val, addr);
1015 if (err)
1016 return err;
1017 if (val)
1018 return -E2BIG;
1019 }
1020 size = sizeof(attr);
1021 }
1022
1023 /* copy attributes from user space, may be less than sizeof(bpf_attr) */
1024 if (copy_from_user(&attr, uattr, size) != 0)
1025 return -EFAULT;
1026
1027 switch (cmd) {
1028 case BPF_MAP_CREATE:
1029 err = map_create(&attr);
1030 break;
db20fd2b
AS
1031 case BPF_MAP_LOOKUP_ELEM:
1032 err = map_lookup_elem(&attr);
1033 break;
1034 case BPF_MAP_UPDATE_ELEM:
1035 err = map_update_elem(&attr);
1036 break;
1037 case BPF_MAP_DELETE_ELEM:
1038 err = map_delete_elem(&attr);
1039 break;
1040 case BPF_MAP_GET_NEXT_KEY:
1041 err = map_get_next_key(&attr);
1042 break;
09756af4
AS
1043 case BPF_PROG_LOAD:
1044 err = bpf_prog_load(&attr);
1045 break;
b2197755
DB
1046 case BPF_OBJ_PIN:
1047 err = bpf_obj_pin(&attr);
1048 break;
1049 case BPF_OBJ_GET:
1050 err = bpf_obj_get(&attr);
1051 break;
f4324551
DM
1052
1053#ifdef CONFIG_CGROUP_BPF
1054 case BPF_PROG_ATTACH:
1055 err = bpf_prog_attach(&attr);
1056 break;
1057 case BPF_PROG_DETACH:
1058 err = bpf_prog_detach(&attr);
1059 break;
1060#endif
1061
99c55f7d
AS
1062 default:
1063 err = -EINVAL;
1064 break;
1065 }
1066
1067 return err;
1068}