kernel/bpf/syscall.c

   1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   2  *
   3  * This program is free software; you can redistribute it and/or
   4  * modify it under the terms of version 2 of the GNU General Public
   5  * License as published by the Free Software Foundation.
   6  *
   7  * This program is distributed in the hope that it will be useful, but
   8  * WITHOUT ANY WARRANTY; without even the implied warranty of
   9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10  * General Public License for more details.
  11  */
  12 #include <linux/bpf.h>
  13 #include <linux/syscalls.h>
  14 #include <linux/slab.h>
  15 #include <linux/anon_inodes.h>
  16 #include <linux/file.h>
  17 #include <linux/license.h>
  18 #include <linux/filter.h>
  19 #include <linux/version.h>
  20 #include <linux/kernel.h>
  21
  22 DEFINE_PER_CPU(int, bpf_prog_active);
  23
  24 int sysctl_unprivileged_bpf_disabled __read_mostly;
  25
  26 static LIST_HEAD(bpf_map_types);
  27
  28 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
  29 {
  30         struct bpf_map_type_list *tl;
  31         struct bpf_map *map;
  32
  33         list_for_each_entry(tl, &bpf_map_types, list_node) {
  34                 if (tl->type == attr->map_type) {
  35                         map = tl->ops->map_alloc(attr);
  36                         if (IS_ERR(map))
  37                                 return map;
  38                         map->ops = tl->ops;
  39                         map->map_type = attr->map_type;
  40                         return map;
  41                 }
  42         }
  43         return ERR_PTR(-EINVAL);
  44 }
  45
  46 /* boot time registration of different map implementations */
  47 void bpf_register_map_type(struct bpf_map_type_list *tl)
  48 {
  49         list_add(&tl->list_node, &bpf_map_types);
  50 }
  51
  52 int bpf_map_precharge_memlock(u32 pages)
  53 {
  54         struct user_struct *user = get_current_user();
  55         unsigned long memlock_limit, cur;
  56
  57         memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  58         cur = atomic_long_read(&user->locked_vm);
  59         free_uid(user);
  60         if (cur + pages > memlock_limit)
  61                 return -EPERM;
  62         return 0;
  63 }
  64
  65 static int bpf_map_charge_memlock(struct bpf_map *map)
  66 {
  67         struct user_struct *user = get_current_user();
  68         unsigned long memlock_limit;
  69
  70         memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  71
  72         atomic_long_add(map->pages, &user->locked_vm);
  73
  74         if (atomic_long_read(&user->locked_vm) > memlock_limit) {
  75                 atomic_long_sub(map->pages, &user->locked_vm);
  76                 free_uid(user);
  77                 return -EPERM;
  78         }
  79         map->user = user;
  80         return 0;
  81 }
  82
  83 static void bpf_map_uncharge_memlock(struct bpf_map *map)
  84 {
  85         struct user_struct *user = map->user;
  86
  87         atomic_long_sub(map->pages, &user->locked_vm);
  88         free_uid(user);
  89 }
  90
  91 /* called from workqueue */
  92 static void bpf_map_free_deferred(struct work_struct *work)
  93 {
  94         struct bpf_map *map = container_of(work, struct bpf_map, work);
  95
  96         bpf_map_uncharge_memlock(map);
  97         /* implementation dependent freeing */
  98         map->ops->map_free(map);
  99 }
 100
 101 static void bpf_map_put_uref(struct bpf_map *map)
 102 {
 103         if (atomic_dec_and_test(&map->usercnt)) {
 104                 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
 105                         bpf_fd_array_map_clear(map);
 106         }
 107 }
 108
 109 /* decrement map refcnt and schedule it for freeing via workqueue
 110  * (unrelying map implementation ops->map_free() might sleep)
 111  */
 112 void bpf_map_put(struct bpf_map *map)
 113 {
 114         if (atomic_dec_and_test(&map->refcnt)) {
 115                 INIT_WORK(&map->work, bpf_map_free_deferred);
 116                 schedule_work(&map->work);
 117         }
 118 }
 119
 120 void bpf_map_put_with_uref(struct bpf_map *map)
 121 {
 122         bpf_map_put_uref(map);
 123         bpf_map_put(map);
 124 }
 125
 126 static int bpf_map_release(struct inode *inode, struct file *filp)
 127 {
 128         struct bpf_map *map = filp->private_data;
 129
 130         if (map->ops->map_release)
 131                 map->ops->map_release(map, filp);
 132
 133         bpf_map_put_with_uref(map);
 134         return 0;
 135 }
 136
 137 #ifdef CONFIG_PROC_FS
 138 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 139 {
 140         const struct bpf_map *map = filp->private_data;
 141         const struct bpf_array *array;
 142         u32 owner_prog_type = 0;
 143
 144         if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
 145                 array = container_of(map, struct bpf_array, map);
 146                 owner_prog_type = array->owner_prog_type;
 147         }
 148
 149         seq_printf(m,
 150                    "map_type:\t%u\n"
 151                    "key_size:\t%u\n"
 152                    "value_size:\t%u\n"
 153                    "max_entries:\t%u\n"
 154                    "map_flags:\t%#x\n"
 155                    "memlock:\t%llu\n",
 156                    map->map_type,
 157                    map->key_size,
 158                    map->value_size,
 159                    map->max_entries,
 160                    map->map_flags,
 161                    map->pages * 1ULL << PAGE_SHIFT);
 162
 163         if (owner_prog_type)
 164                 seq_printf(m, "owner_prog_type:\t%u\n",
 165                            owner_prog_type);
 166 }
 167 #endif
 168
 169 static const struct file_operations bpf_map_fops = {
 170 #ifdef CONFIG_PROC_FS
 171         .show_fdinfo    = bpf_map_show_fdinfo,
 172 #endif
 173         .release        = bpf_map_release,
 174 };
 175
 176 int bpf_map_new_fd(struct bpf_map *map)
 177 {
 178         return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
 179                                 O_RDWR | O_CLOEXEC);
 180 }
 181
 182 /* helper macro to check that unused fields 'union bpf_attr' are zero */
 183 #define CHECK_ATTR(CMD) \
 184         memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
 185                    sizeof(attr->CMD##_LAST_FIELD), 0, \
 186                    sizeof(*attr) - \
 187                    offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
 188                    sizeof(attr->CMD##_LAST_FIELD)) != NULL
 189
 190 #define BPF_MAP_CREATE_LAST_FIELD map_flags
 191 /* called via syscall */
 192 static int map_create(union bpf_attr *attr)
 193 {
 194         struct bpf_map *map;
 195         int err;
 196
 197         err = CHECK_ATTR(BPF_MAP_CREATE);
 198         if (err)
 199                 return -EINVAL;
 200
 201         /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
 202         map = find_and_alloc_map(attr);
 203         if (IS_ERR(map))
 204                 return PTR_ERR(map);
 205
 206         atomic_set(&map->refcnt, 1);
 207         atomic_set(&map->usercnt, 1);
 208
 209         err = bpf_map_charge_memlock(map);
 210         if (err)
 211                 goto free_map_nouncharge;
 212
 213         err = bpf_map_new_fd(map);
 214         if (err < 0)
 215                 /* failed to allocate fd */
 216                 goto free_map;
 217
 218         return err;
 219
 220 free_map:
 221         bpf_map_uncharge_memlock(map);
 222 free_map_nouncharge:
 223         map->ops->map_free(map);
 224         return err;
 225 }
 226
 227 /* if error is returned, fd is released.
 228  * On success caller should complete fd access with matching fdput()
 229  */
 230 struct bpf_map *__bpf_map_get(struct fd f)
 231 {
 232         if (!f.file)
 233                 return ERR_PTR(-EBADF);
 234         if (f.file->f_op != &bpf_map_fops) {
 235                 fdput(f);
 236                 return ERR_PTR(-EINVAL);
 237         }
 238
 239         return f.file->private_data;
 240 }
 241
 242 /* prog's and map's refcnt limit */
 243 #define BPF_MAX_REFCNT 32768
 244
 245 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
 246 {
 247         if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
 248                 atomic_dec(&map->refcnt);
 249                 return ERR_PTR(-EBUSY);
 250         }
 251         if (uref)
 252                 atomic_inc(&map->usercnt);
 253         return map;
 254 }
 255
 256 struct bpf_map *bpf_map_get_with_uref(u32 ufd)
 257 {
 258         struct fd f = fdget(ufd);
 259         struct bpf_map *map;
 260
 261         map = __bpf_map_get(f);
 262         if (IS_ERR(map))
 263                 return map;
 264
 265         map = bpf_map_inc(map, true);
 266         fdput(f);
 267
 268         return map;
 269 }
 270
 271 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
 272 {
 273         return -ENOTSUPP;
 274 }
 275
 276 /* last field in 'union bpf_attr' used by this command */
 277 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
 278
 279 static int map_lookup_elem(union bpf_attr *attr)
 280 {
 281         void __user *ukey = u64_to_user_ptr(attr->key);
 282         void __user *uvalue = u64_to_user_ptr(attr->value);
 283         int ufd = attr->map_fd;
 284         struct bpf_map *map;
 285         void *key, *value, *ptr;
 286         u32 value_size;
 287         struct fd f;
 288         int err;
 289
 290         if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
 291                 return -EINVAL;
 292
 293         f = fdget(ufd);
 294         map = __bpf_map_get(f);
 295         if (IS_ERR(map))
 296                 return PTR_ERR(map);
 297
 298         err = -ENOMEM;
 299         key = kmalloc(map->key_size, GFP_USER);
 300         if (!key)
 301                 goto err_put;
 302
 303         err = -EFAULT;
 304         if (copy_from_user(key, ukey, map->key_size) != 0)
 305                 goto free_key;
 306
 307         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 308             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
 309             map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 310                 value_size = round_up(map->value_size, 8) * num_possible_cpus();
 311         else
 312                 value_size = map->value_size;
 313
 314         err = -ENOMEM;
 315         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
 316         if (!value)
 317                 goto free_key;
 318
 319         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 320             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 321                 err = bpf_percpu_hash_copy(map, key, value);
 322         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 323                 err = bpf_percpu_array_copy(map, key, value);
 324         } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
 325                 err = bpf_stackmap_copy(map, key, value);
 326         } else {
 327                 rcu_read_lock();
 328                 ptr = map->ops->map_lookup_elem(map, key);
 329                 if (ptr)
 330                         memcpy(value, ptr, value_size);
 331                 rcu_read_unlock();
 332                 err = ptr ? 0 : -ENOENT;
 333         }
 334
 335         if (err)
 336                 goto free_value;
 337
 338         err = -EFAULT;
 339         if (copy_to_user(uvalue, value, value_size) != 0)
 340                 goto free_value;
 341
 342         err = 0;
 343
 344 free_value:
 345         kfree(value);
 346 free_key:
 347         kfree(key);
 348 err_put:
 349         fdput(f);
 350         return err;
 351 }
 352
 353 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
 354
 355 static int map_update_elem(union bpf_attr *attr)
 356 {
 357         void __user *ukey = u64_to_user_ptr(attr->key);
 358         void __user *uvalue = u64_to_user_ptr(attr->value);
 359         int ufd = attr->map_fd;
 360         struct bpf_map *map;
 361         void *key, *value;
 362         u32 value_size;
 363         struct fd f;
 364         int err;
 365
 366         if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
 367                 return -EINVAL;
 368
 369         f = fdget(ufd);
 370         map = __bpf_map_get(f);
 371         if (IS_ERR(map))
 372                 return PTR_ERR(map);
 373
 374         err = -ENOMEM;
 375         key = kmalloc(map->key_size, GFP_USER);
 376         if (!key)
 377                 goto err_put;
 378
 379         err = -EFAULT;
 380         if (copy_from_user(key, ukey, map->key_size) != 0)
 381                 goto free_key;
 382
 383         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 384             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
 385             map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 386                 value_size = round_up(map->value_size, 8) * num_possible_cpus();
 387         else
 388                 value_size = map->value_size;
 389
 390         err = -ENOMEM;
 391         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
 392         if (!value)
 393                 goto free_key;
 394
 395         err = -EFAULT;
 396         if (copy_from_user(value, uvalue, value_size) != 0)
 397                 goto free_value;
 398
 399         /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
 400          * inside bpf map update or delete otherwise deadlocks are possible
 401          */
 402         preempt_disable();
 403         __this_cpu_inc(bpf_prog_active);
 404         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 405             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 406                 err = bpf_percpu_hash_update(map, key, value, attr->flags);
 407         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 408                 err = bpf_percpu_array_update(map, key, value, attr->flags);
 409         } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
 410                    map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
 411                    map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) {
 412                 rcu_read_lock();
 413                 err = bpf_fd_array_map_update_elem(map, f.file, key, value,
 414                                                    attr->flags);
 415                 rcu_read_unlock();
 416         } else {
 417                 rcu_read_lock();
 418                 err = map->ops->map_update_elem(map, key, value, attr->flags);
 419                 rcu_read_unlock();
 420         }
 421         __this_cpu_dec(bpf_prog_active);
 422         preempt_enable();
 423
 424 free_value:
 425         kfree(value);
 426 free_key:
 427         kfree(key);
 428 err_put:
 429         fdput(f);
 430         return err;
 431 }
 432
 433 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key
 434
 435 static int map_delete_elem(union bpf_attr *attr)
 436 {
 437         void __user *ukey = u64_to_user_ptr(attr->key);
 438         int ufd = attr->map_fd;
 439         struct bpf_map *map;
 440         struct fd f;
 441         void *key;
 442         int err;
 443
 444         if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
 445                 return -EINVAL;
 446
 447         f = fdget(ufd);
 448         map = __bpf_map_get(f);
 449         if (IS_ERR(map))
 450                 return PTR_ERR(map);
 451
 452         err = -ENOMEM;
 453         key = kmalloc(map->key_size, GFP_USER);
 454         if (!key)
 455                 goto err_put;
 456
 457         err = -EFAULT;
 458         if (copy_from_user(key, ukey, map->key_size) != 0)
 459                 goto free_key;
 460
 461         preempt_disable();
 462         __this_cpu_inc(bpf_prog_active);
 463         rcu_read_lock();
 464         err = map->ops->map_delete_elem(map, key);
 465         rcu_read_unlock();
 466         __this_cpu_dec(bpf_prog_active);
 467         preempt_enable();
 468
 469 free_key:
 470         kfree(key);
 471 err_put:
 472         fdput(f);
 473         return err;
 474 }
 475
 476 /* last field in 'union bpf_attr' used by this command */
 477 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
 478
 479 static int map_get_next_key(union bpf_attr *attr)
 480 {
 481         void __user *ukey = u64_to_user_ptr(attr->key);
 482         void __user *unext_key = u64_to_user_ptr(attr->next_key);
 483         int ufd = attr->map_fd;
 484         struct bpf_map *map;
 485         void *key, *next_key;
 486         struct fd f;
 487         int err;
 488
 489         if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
 490                 return -EINVAL;
 491
 492         f = fdget(ufd);
 493         map = __bpf_map_get(f);
 494         if (IS_ERR(map))
 495                 return PTR_ERR(map);
 496
 497         err = -ENOMEM;
 498         key = kmalloc(map->key_size, GFP_USER);
 499         if (!key)
 500                 goto err_put;
 501
 502         err = -EFAULT;
 503         if (copy_from_user(key, ukey, map->key_size) != 0)
 504                 goto free_key;
 505
 506         err = -ENOMEM;
 507         next_key = kmalloc(map->key_size, GFP_USER);
 508         if (!next_key)
 509                 goto free_key;
 510
 511         rcu_read_lock();
 512         err = map->ops->map_get_next_key(map, key, next_key);
 513         rcu_read_unlock();
 514         if (err)
 515                 goto free_next_key;
 516
 517         err = -EFAULT;
 518         if (copy_to_user(unext_key, next_key, map->key_size) != 0)
 519                 goto free_next_key;
 520
 521         err = 0;
 522
 523 free_next_key:
 524         kfree(next_key);
 525 free_key:
 526         kfree(key);
 527 err_put:
 528         fdput(f);
 529         return err;
 530 }
 531
 532 static LIST_HEAD(bpf_prog_types);
 533
 534 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
 535 {
 536         struct bpf_prog_type_list *tl;
 537
 538         list_for_each_entry(tl, &bpf_prog_types, list_node) {
 539                 if (tl->type == type) {
 540                         prog->aux->ops = tl->ops;
 541                         prog->type = type;
 542                         return 0;
 543                 }
 544         }
 545
 546         return -EINVAL;
 547 }
 548
 549 void bpf_register_prog_type(struct bpf_prog_type_list *tl)
 550 {
 551         list_add(&tl->list_node, &bpf_prog_types);
 552 }
 553
 554 /* fixup insn->imm field of bpf_call instructions:
 555  * if (insn->imm == BPF_FUNC_map_lookup_elem)
 556  *      insn->imm = bpf_map_lookup_elem - __bpf_call_base;
 557  * else if (insn->imm == BPF_FUNC_map_update_elem)
 558  *      insn->imm = bpf_map_update_elem - __bpf_call_base;
 559  * else ...
 560  *
 561  * this function is called after eBPF program passed verification
 562  */
 563 static void fixup_bpf_calls(struct bpf_prog *prog)
 564 {
 565         const struct bpf_func_proto *fn;
 566         int i;
 567
 568         for (i = 0; i < prog->len; i++) {
 569                 struct bpf_insn *insn = &prog->insnsi[i];
 570
 571                 if (insn->code == (BPF_JMP | BPF_CALL)) {
 572                         /* we reach here when program has bpf_call instructions
 573                          * and it passed bpf_check(), means that
 574                          * ops->get_func_proto must have been supplied, check it
 575                          */
 576                         BUG_ON(!prog->aux->ops->get_func_proto);
 577
 578                         if (insn->imm == BPF_FUNC_get_route_realm)
 579                                 prog->dst_needed = 1;
 580                         if (insn->imm == BPF_FUNC_get_prandom_u32)
 581                                 bpf_user_rnd_init_once();
 582                         if (insn->imm == BPF_FUNC_tail_call) {
 583                                 /* mark bpf_tail_call as different opcode
 584                                  * to avoid conditional branch in
 585                                  * interpeter for every normal call
 586                                  * and to prevent accidental JITing by
 587                                  * JIT compiler that doesn't support
 588                                  * bpf_tail_call yet
 589                                  */
 590                                 insn->imm = 0;
 591                                 insn->code |= BPF_X;
 592                                 continue;
 593                         }
 594
 595                         fn = prog->aux->ops->get_func_proto(insn->imm);
 596                         /* all functions that have prototype and verifier allowed
 597                          * programs to call them, must be real in-kernel functions
 598                          */
 599                         BUG_ON(!fn->func);
 600                         insn->imm = fn->func - __bpf_call_base;
 601                 }
 602         }
 603 }
 604
 605 /* drop refcnt on maps used by eBPF program and free auxilary data */
 606 static void free_used_maps(struct bpf_prog_aux *aux)
 607 {
 608         int i;
 609
 610         for (i = 0; i < aux->used_map_cnt; i++)
 611                 bpf_map_put(aux->used_maps[i]);
 612
 613         kfree(aux->used_maps);
 614 }
 615
 616 static int bpf_prog_charge_memlock(struct bpf_prog *prog)
 617 {
 618         struct user_struct *user = get_current_user();
 619         unsigned long memlock_limit;
 620
 621         memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 622
 623         atomic_long_add(prog->pages, &user->locked_vm);
 624         if (atomic_long_read(&user->locked_vm) > memlock_limit) {
 625                 atomic_long_sub(prog->pages, &user->locked_vm);
 626                 free_uid(user);
 627                 return -EPERM;
 628         }
 629         prog->aux->user = user;
 630         return 0;
 631 }
 632
 633 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
 634 {
 635         struct user_struct *user = prog->aux->user;
 636
 637         atomic_long_sub(prog->pages, &user->locked_vm);
 638         free_uid(user);
 639 }
 640
 641 static void __bpf_prog_put_rcu(struct rcu_head *rcu)
 642 {
 643         struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
 644
 645         free_used_maps(aux);
 646         bpf_prog_uncharge_memlock(aux->prog);
 647         bpf_prog_free(aux->prog);
 648 }
 649
 650 void bpf_prog_put(struct bpf_prog *prog)
 651 {
 652         if (atomic_dec_and_test(&prog->aux->refcnt))
 653                 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
 654 }
 655 EXPORT_SYMBOL_GPL(bpf_prog_put);
 656
 657 static int bpf_prog_release(struct inode *inode, struct file *filp)
 658 {
 659         struct bpf_prog *prog = filp->private_data;
 660
 661         bpf_prog_put(prog);
 662         return 0;
 663 }
 664
 665 static const struct file_operations bpf_prog_fops = {
 666         .release = bpf_prog_release,
 667 };
 668
 669 int bpf_prog_new_fd(struct bpf_prog *prog)
 670 {
 671         return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
 672                                 O_RDWR | O_CLOEXEC);
 673 }
 674
 675 static struct bpf_prog *____bpf_prog_get(struct fd f)
 676 {
 677         if (!f.file)
 678                 return ERR_PTR(-EBADF);
 679         if (f.file->f_op != &bpf_prog_fops) {
 680                 fdput(f);
 681                 return ERR_PTR(-EINVAL);
 682         }
 683
 684         return f.file->private_data;
 685 }
 686
 687 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
 688 {
 689         if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
 690                 atomic_sub(i, &prog->aux->refcnt);
 691                 return ERR_PTR(-EBUSY);
 692         }
 693         return prog;
 694 }
 695 EXPORT_SYMBOL_GPL(bpf_prog_add);
 696
 697 void bpf_prog_sub(struct bpf_prog *prog, int i)
 698 {
 699         /* Only to be used for undoing previous bpf_prog_add() in some
 700          * error path. We still know that another entity in our call
 701          * path holds a reference to the program, thus atomic_sub() can
 702          * be safely used in such cases!
 703          */
 704         WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
 705 }
 706 EXPORT_SYMBOL_GPL(bpf_prog_sub);
 707
 708 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
 709 {
 710         return bpf_prog_add(prog, 1);
 711 }
 712 EXPORT_SYMBOL_GPL(bpf_prog_inc);
 713
 714 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
 715 {
 716         struct fd f = fdget(ufd);
 717         struct bpf_prog *prog;
 718
 719         prog = ____bpf_prog_get(f);
 720         if (IS_ERR(prog))
 721                 return prog;
 722         if (type && prog->type != *type) {
 723                 prog = ERR_PTR(-EINVAL);
 724                 goto out;
 725         }
 726
 727         prog = bpf_prog_inc(prog);
 728 out:
 729         fdput(f);
 730         return prog;
 731 }
 732
 733 struct bpf_prog *bpf_prog_get(u32 ufd)
 734 {
 735         return __bpf_prog_get(ufd, NULL);
 736 }
 737
 738 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
 739 {
 740         return __bpf_prog_get(ufd, &type);
 741 }
 742 EXPORT_SYMBOL_GPL(bpf_prog_get_type);
 743
 744 /* last field in 'union bpf_attr' used by this command */
 745 #define BPF_PROG_LOAD_LAST_FIELD kern_version
 746
 747 static int bpf_prog_load(union bpf_attr *attr)
 748 {
 749         enum bpf_prog_type type = attr->prog_type;
 750         struct bpf_prog *prog;
 751         int err;
 752         char license[128];
 753         bool is_gpl;
 754
 755         if (CHECK_ATTR(BPF_PROG_LOAD))
 756                 return -EINVAL;
 757
 758         /* copy eBPF program license from user space */
 759         if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
 760                               sizeof(license) - 1) < 0)
 761                 return -EFAULT;
 762         license[sizeof(license) - 1] = 0;
 763
 764         /* eBPF programs must be GPL compatible to use GPL-ed functions */
 765         is_gpl = license_is_gpl_compatible(license);
 766
 767         if (attr->insn_cnt >= BPF_MAXINSNS)
 768                 return -EINVAL;
 769
 770         if (type == BPF_PROG_TYPE_KPROBE &&
 771             attr->kern_version != LINUX_VERSION_CODE)
 772                 return -EINVAL;
 773
 774         if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN))
 775                 return -EPERM;
 776
 777         /* plain bpf_prog allocation */
 778         prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
 779         if (!prog)
 780                 return -ENOMEM;
 781
 782         err = bpf_prog_charge_memlock(prog);
 783         if (err)
 784                 goto free_prog_nouncharge;
 785
 786         prog->len = attr->insn_cnt;
 787
 788         err = -EFAULT;
 789         if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
 790                            prog->len * sizeof(struct bpf_insn)) != 0)
 791                 goto free_prog;
 792
 793         prog->orig_prog = NULL;
 794         prog->jited = 0;
 795
 796         atomic_set(&prog->aux->refcnt, 1);
 797         prog->gpl_compatible = is_gpl ? 1 : 0;
 798
 799         /* find program type: socket_filter vs tracing_filter */
 800         err = find_prog_type(type, prog);
 801         if (err < 0)
 802                 goto free_prog;
 803
 804         /* run eBPF verifier */
 805         err = bpf_check(&prog, attr);
 806         if (err < 0)
 807                 goto free_used_maps;
 808
 809         /* fixup BPF_CALL->imm field */
 810         fixup_bpf_calls(prog);
 811
 812         /* eBPF program is ready to be JITed */
 813         prog = bpf_prog_select_runtime(prog, &err);
 814         if (err < 0)
 815                 goto free_used_maps;
 816
 817         err = bpf_prog_new_fd(prog);
 818         if (err < 0)
 819                 /* failed to allocate fd */
 820                 goto free_used_maps;
 821
 822         return err;
 823
 824 free_used_maps:
 825         free_used_maps(prog->aux);
 826 free_prog:
 827         bpf_prog_uncharge_memlock(prog);
 828 free_prog_nouncharge:
 829         bpf_prog_free(prog);
 830         return err;
 831 }
 832
 833 #define BPF_OBJ_LAST_FIELD bpf_fd
 834
 835 static int bpf_obj_pin(const union bpf_attr *attr)
 836 {
 837         if (CHECK_ATTR(BPF_OBJ))
 838                 return -EINVAL;
 839
 840         return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
 841 }
 842
 843 static int bpf_obj_get(const union bpf_attr *attr)
 844 {
 845         if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
 846                 return -EINVAL;
 847
 848         return bpf_obj_get_user(u64_to_user_ptr(attr->pathname));
 849 }
 850
 851 #ifdef CONFIG_CGROUP_BPF
 852
 853 #define BPF_PROG_ATTACH_LAST_FIELD attach_type
 854
 855 static int bpf_prog_attach(const union bpf_attr *attr)
 856 {
 857         struct bpf_prog *prog;
 858         struct cgroup *cgrp;
 859         enum bpf_prog_type ptype;
 860
 861         if (!capable(CAP_NET_ADMIN))
 862                 return -EPERM;
 863
 864         if (CHECK_ATTR(BPF_PROG_ATTACH))
 865                 return -EINVAL;
 866
 867         switch (attr->attach_type) {
 868         case BPF_CGROUP_INET_INGRESS:
 869         case BPF_CGROUP_INET_EGRESS:
 870                 ptype = BPF_PROG_TYPE_CGROUP_SKB;
 871                 break;
 872
 873         default:
 874                 return -EINVAL;
 875         }
 876
 877         prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
 878         if (IS_ERR(prog))
 879                 return PTR_ERR(prog);
 880
 881         cgrp = cgroup_get_from_fd(attr->target_fd);
 882         if (IS_ERR(cgrp)) {
 883                 bpf_prog_put(prog);
 884                 return PTR_ERR(cgrp);
 885         }
 886
 887         cgroup_bpf_update(cgrp, prog, attr->attach_type);
 888         cgroup_put(cgrp);
 889
 890         return 0;
 891 }
 892
 893 #define BPF_PROG_DETACH_LAST_FIELD attach_type
 894
 895 static int bpf_prog_detach(const union bpf_attr *attr)
 896 {
 897         struct cgroup *cgrp;
 898
 899         if (!capable(CAP_NET_ADMIN))
 900                 return -EPERM;
 901
 902         if (CHECK_ATTR(BPF_PROG_DETACH))
 903                 return -EINVAL;
 904
 905         switch (attr->attach_type) {
 906         case BPF_CGROUP_INET_INGRESS:
 907         case BPF_CGROUP_INET_EGRESS:
 908                 cgrp = cgroup_get_from_fd(attr->target_fd);
 909                 if (IS_ERR(cgrp))
 910                         return PTR_ERR(cgrp);
 911
 912                 cgroup_bpf_update(cgrp, NULL, attr->attach_type);
 913                 cgroup_put(cgrp);
 914                 break;
 915
 916         default:
 917                 return -EINVAL;
 918         }
 919
 920         return 0;
 921 }
 922 #endif /* CONFIG_CGROUP_BPF */
 923
 924 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 925 {
 926         union bpf_attr attr = {};
 927         int err;
 928
 929         if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
 930                 return -EPERM;
 931
 932         if (!access_ok(VERIFY_READ, uattr, 1))
 933                 return -EFAULT;
 934
 935         if (size > PAGE_SIZE)   /* silly large */
 936                 return -E2BIG;
 937
 938         /* If we're handed a bigger struct than we know of,
 939          * ensure all the unknown bits are 0 - i.e. new
 940          * user-space does not rely on any kernel feature
 941          * extensions we dont know about yet.
 942          */
 943         if (size > sizeof(attr)) {
 944                 unsigned char __user *addr;
 945                 unsigned char __user *end;
 946                 unsigned char val;
 947
 948                 addr = (void __user *)uattr + sizeof(attr);
 949                 end  = (void __user *)uattr + size;
 950
 951                 for (; addr < end; addr++) {
 952                         err = get_user(val, addr);
 953                         if (err)
 954                                 return err;
 955                         if (val)
 956                                 return -E2BIG;
 957                 }
 958                 size = sizeof(attr);
 959         }
 960
 961         /* copy attributes from user space, may be less than sizeof(bpf_attr) */
 962         if (copy_from_user(&attr, uattr, size) != 0)
 963                 return -EFAULT;
 964
 965         switch (cmd) {
 966         case BPF_MAP_CREATE:
 967                 err = map_create(&attr);
 968                 break;
 969         case BPF_MAP_LOOKUP_ELEM:
 970                 err = map_lookup_elem(&attr);
 971                 break;
 972         case BPF_MAP_UPDATE_ELEM:
 973                 err = map_update_elem(&attr);
 974                 break;
 975         case BPF_MAP_DELETE_ELEM:
 976                 err = map_delete_elem(&attr);
 977                 break;
 978         case BPF_MAP_GET_NEXT_KEY:
 979                 err = map_get_next_key(&attr);
 980                 break;
 981         case BPF_PROG_LOAD:
 982                 err = bpf_prog_load(&attr);
 983                 break;
 984         case BPF_OBJ_PIN:
 985                 err = bpf_obj_pin(&attr);
 986                 break;
 987         case BPF_OBJ_GET:
 988                 err = bpf_obj_get(&attr);
 989                 break;
 990
 991 #ifdef CONFIG_CGROUP_BPF
 992         case BPF_PROG_ATTACH:
 993                 err = bpf_prog_attach(&attr);
 994                 break;
 995         case BPF_PROG_DETACH:
 996                 err = bpf_prog_detach(&attr);
 997                 break;
 998 #endif
 999
1000         default:
1001                 err = -EINVAL;
1002                 break;
1003         }
1004
1005         return err;
1006 }