kernel/bpf/syscall.c

   1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   2  *
   3  * This program is free software; you can redistribute it and/or
   4  * modify it under the terms of version 2 of the GNU General Public
   5  * License as published by the Free Software Foundation.
   6  *
   7  * This program is distributed in the hope that it will be useful, but
   8  * WITHOUT ANY WARRANTY; without even the implied warranty of
   9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10  * General Public License for more details.
  11  */
  12 #include <linux/bpf.h>
  13 #include <linux/bpf_trace.h>
  14 #include <linux/syscalls.h>
  15 #include <linux/slab.h>
  16 #include <linux/sched/signal.h>
  17 #include <linux/vmalloc.h>
  18 #include <linux/mmzone.h>
  19 #include <linux/anon_inodes.h>
  20 #include <linux/file.h>
  21 #include <linux/license.h>
  22 #include <linux/filter.h>
  23 #include <linux/version.h>
  24 #include <linux/kernel.h>
  25
  26 DEFINE_PER_CPU(int, bpf_prog_active);
  27
  28 int sysctl_unprivileged_bpf_disabled __read_mostly;
  29
  30 static LIST_HEAD(bpf_map_types);
  31
  32 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
  33 {
  34         struct bpf_map_type_list *tl;
  35         struct bpf_map *map;
  36
  37         list_for_each_entry(tl, &bpf_map_types, list_node) {
  38                 if (tl->type == attr->map_type) {
  39                         map = tl->ops->map_alloc(attr);
  40                         if (IS_ERR(map))
  41                                 return map;
  42                         map->ops = tl->ops;
  43                         map->map_type = attr->map_type;
  44                         return map;
  45                 }
  46         }
  47         return ERR_PTR(-EINVAL);
  48 }
  49
  50 /* boot time registration of different map implementations */
  51 void bpf_register_map_type(struct bpf_map_type_list *tl)
  52 {
  53         list_add(&tl->list_node, &bpf_map_types);
  54 }
  55
  56 void *bpf_map_area_alloc(size_t size)
  57 {
  58         /* We definitely need __GFP_NORETRY, so OOM killer doesn't
  59          * trigger under memory pressure as we really just want to
  60          * fail instead.
  61          */
  62         const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
  63         void *area;
  64
  65         if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
  66                 area = kmalloc(size, GFP_USER | flags);
  67                 if (area != NULL)
  68                         return area;
  69         }
  70
  71         return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags,
  72                          PAGE_KERNEL);
  73 }
  74
  75 void bpf_map_area_free(void *area)
  76 {
  77         kvfree(area);
  78 }
  79
  80 int bpf_map_precharge_memlock(u32 pages)
  81 {
  82         struct user_struct *user = get_current_user();
  83         unsigned long memlock_limit, cur;
  84
  85         memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  86         cur = atomic_long_read(&user->locked_vm);
  87         free_uid(user);
  88         if (cur + pages > memlock_limit)
  89                 return -EPERM;
  90         return 0;
  91 }
  92
  93 static int bpf_map_charge_memlock(struct bpf_map *map)
  94 {
  95         struct user_struct *user = get_current_user();
  96         unsigned long memlock_limit;
  97
  98         memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  99
 100         atomic_long_add(map->pages, &user->locked_vm);
 101
 102         if (atomic_long_read(&user->locked_vm) > memlock_limit) {
 103                 atomic_long_sub(map->pages, &user->locked_vm);
 104                 free_uid(user);
 105                 return -EPERM;
 106         }
 107         map->user = user;
 108         return 0;
 109 }
 110
 111 static void bpf_map_uncharge_memlock(struct bpf_map *map)
 112 {
 113         struct user_struct *user = map->user;
 114
 115         atomic_long_sub(map->pages, &user->locked_vm);
 116         free_uid(user);
 117 }
 118
 119 /* called from workqueue */
 120 static void bpf_map_free_deferred(struct work_struct *work)
 121 {
 122         struct bpf_map *map = container_of(work, struct bpf_map, work);
 123
 124         bpf_map_uncharge_memlock(map);
 125         /* implementation dependent freeing */
 126         map->ops->map_free(map);
 127 }
 128
 129 static void bpf_map_put_uref(struct bpf_map *map)
 130 {
 131         if (atomic_dec_and_test(&map->usercnt)) {
 132                 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
 133                         bpf_fd_array_map_clear(map);
 134         }
 135 }
 136
 137 /* decrement map refcnt and schedule it for freeing via workqueue
 138  * (unrelying map implementation ops->map_free() might sleep)
 139  */
 140 void bpf_map_put(struct bpf_map *map)
 141 {
 142         if (atomic_dec_and_test(&map->refcnt)) {
 143                 INIT_WORK(&map->work, bpf_map_free_deferred);
 144                 schedule_work(&map->work);
 145         }
 146 }
 147
 148 void bpf_map_put_with_uref(struct bpf_map *map)
 149 {
 150         bpf_map_put_uref(map);
 151         bpf_map_put(map);
 152 }
 153
 154 static int bpf_map_release(struct inode *inode, struct file *filp)
 155 {
 156         struct bpf_map *map = filp->private_data;
 157
 158         if (map->ops->map_release)
 159                 map->ops->map_release(map, filp);
 160
 161         bpf_map_put_with_uref(map);
 162         return 0;
 163 }
 164
 165 #ifdef CONFIG_PROC_FS
 166 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 167 {
 168         const struct bpf_map *map = filp->private_data;
 169         const struct bpf_array *array;
 170         u32 owner_prog_type = 0;
 171
 172         if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
 173                 array = container_of(map, struct bpf_array, map);
 174                 owner_prog_type = array->owner_prog_type;
 175         }
 176
 177         seq_printf(m,
 178                    "map_type:\t%u\n"
 179                    "key_size:\t%u\n"
 180                    "value_size:\t%u\n"
 181                    "max_entries:\t%u\n"
 182                    "map_flags:\t%#x\n"
 183                    "memlock:\t%llu\n",
 184                    map->map_type,
 185                    map->key_size,
 186                    map->value_size,
 187                    map->max_entries,
 188                    map->map_flags,
 189                    map->pages * 1ULL << PAGE_SHIFT);
 190
 191         if (owner_prog_type)
 192                 seq_printf(m, "owner_prog_type:\t%u\n",
 193                            owner_prog_type);
 194 }
 195 #endif
 196
 197 static const struct file_operations bpf_map_fops = {
 198 #ifdef CONFIG_PROC_FS
 199         .show_fdinfo    = bpf_map_show_fdinfo,
 200 #endif
 201         .release        = bpf_map_release,
 202 };
 203
 204 int bpf_map_new_fd(struct bpf_map *map)
 205 {
 206         return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
 207                                 O_RDWR | O_CLOEXEC);
 208 }
 209
 210 /* helper macro to check that unused fields 'union bpf_attr' are zero */
 211 #define CHECK_ATTR(CMD) \
 212         memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
 213                    sizeof(attr->CMD##_LAST_FIELD), 0, \
 214                    sizeof(*attr) - \
 215                    offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
 216                    sizeof(attr->CMD##_LAST_FIELD)) != NULL
 217
 218 #define BPF_MAP_CREATE_LAST_FIELD inner_map_fd
 219 /* called via syscall */
 220 static int map_create(union bpf_attr *attr)
 221 {
 222         struct bpf_map *map;
 223         int err;
 224
 225         err = CHECK_ATTR(BPF_MAP_CREATE);
 226         if (err)
 227                 return -EINVAL;
 228
 229         /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
 230         map = find_and_alloc_map(attr);
 231         if (IS_ERR(map))
 232                 return PTR_ERR(map);
 233
 234         atomic_set(&map->refcnt, 1);
 235         atomic_set(&map->usercnt, 1);
 236
 237         err = bpf_map_charge_memlock(map);
 238         if (err)
 239                 goto free_map_nouncharge;
 240
 241         err = bpf_map_new_fd(map);
 242         if (err < 0)
 243                 /* failed to allocate fd */
 244                 goto free_map;
 245
 246         trace_bpf_map_create(map, err);
 247         return err;
 248
 249 free_map:
 250         bpf_map_uncharge_memlock(map);
 251 free_map_nouncharge:
 252         map->ops->map_free(map);
 253         return err;
 254 }
 255
 256 /* if error is returned, fd is released.
 257  * On success caller should complete fd access with matching fdput()
 258  */
 259 struct bpf_map *__bpf_map_get(struct fd f)
 260 {
 261         if (!f.file)
 262                 return ERR_PTR(-EBADF);
 263         if (f.file->f_op != &bpf_map_fops) {
 264                 fdput(f);
 265                 return ERR_PTR(-EINVAL);
 266         }
 267
 268         return f.file->private_data;
 269 }
 270
 271 /* prog's and map's refcnt limit */
 272 #define BPF_MAX_REFCNT 32768
 273
 274 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
 275 {
 276         if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
 277                 atomic_dec(&map->refcnt);
 278                 return ERR_PTR(-EBUSY);
 279         }
 280         if (uref)
 281                 atomic_inc(&map->usercnt);
 282         return map;
 283 }
 284
 285 struct bpf_map *bpf_map_get_with_uref(u32 ufd)
 286 {
 287         struct fd f = fdget(ufd);
 288         struct bpf_map *map;
 289
 290         map = __bpf_map_get(f);
 291         if (IS_ERR(map))
 292                 return map;
 293
 294         map = bpf_map_inc(map, true);
 295         fdput(f);
 296
 297         return map;
 298 }
 299
 300 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
 301 {
 302         return -ENOTSUPP;
 303 }
 304
 305 /* last field in 'union bpf_attr' used by this command */
 306 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
 307
 308 static int map_lookup_elem(union bpf_attr *attr)
 309 {
 310         void __user *ukey = u64_to_user_ptr(attr->key);
 311         void __user *uvalue = u64_to_user_ptr(attr->value);
 312         int ufd = attr->map_fd;
 313         struct bpf_map *map;
 314         void *key, *value, *ptr;
 315         u32 value_size;
 316         struct fd f;
 317         int err;
 318
 319         if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
 320                 return -EINVAL;
 321
 322         f = fdget(ufd);
 323         map = __bpf_map_get(f);
 324         if (IS_ERR(map))
 325                 return PTR_ERR(map);
 326
 327         err = -ENOMEM;
 328         key = kmalloc(map->key_size, GFP_USER);
 329         if (!key)
 330                 goto err_put;
 331
 332         err = -EFAULT;
 333         if (copy_from_user(key, ukey, map->key_size) != 0)
 334                 goto free_key;
 335
 336         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 337             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
 338             map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 339                 value_size = round_up(map->value_size, 8) * num_possible_cpus();
 340         else
 341                 value_size = map->value_size;
 342
 343         err = -ENOMEM;
 344         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
 345         if (!value)
 346                 goto free_key;
 347
 348         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 349             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 350                 err = bpf_percpu_hash_copy(map, key, value);
 351         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 352                 err = bpf_percpu_array_copy(map, key, value);
 353         } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
 354                 err = bpf_stackmap_copy(map, key, value);
 355         } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
 356                    map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
 357                 err = -ENOTSUPP;
 358         } else {
 359                 rcu_read_lock();
 360                 ptr = map->ops->map_lookup_elem(map, key);
 361                 if (ptr)
 362                         memcpy(value, ptr, value_size);
 363                 rcu_read_unlock();
 364                 err = ptr ? 0 : -ENOENT;
 365         }
 366
 367         if (err)
 368                 goto free_value;
 369
 370         err = -EFAULT;
 371         if (copy_to_user(uvalue, value, value_size) != 0)
 372                 goto free_value;
 373
 374         trace_bpf_map_lookup_elem(map, ufd, key, value);
 375         err = 0;
 376
 377 free_value:
 378         kfree(value);
 379 free_key:
 380         kfree(key);
 381 err_put:
 382         fdput(f);
 383         return err;
 384 }
 385
 386 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
 387
 388 static int map_update_elem(union bpf_attr *attr)
 389 {
 390         void __user *ukey = u64_to_user_ptr(attr->key);
 391         void __user *uvalue = u64_to_user_ptr(attr->value);
 392         int ufd = attr->map_fd;
 393         struct bpf_map *map;
 394         void *key, *value;
 395         u32 value_size;
 396         struct fd f;
 397         int err;
 398
 399         if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
 400                 return -EINVAL;
 401
 402         f = fdget(ufd);
 403         map = __bpf_map_get(f);
 404         if (IS_ERR(map))
 405                 return PTR_ERR(map);
 406
 407         err = -ENOMEM;
 408         key = kmalloc(map->key_size, GFP_USER);
 409         if (!key)
 410                 goto err_put;
 411
 412         err = -EFAULT;
 413         if (copy_from_user(key, ukey, map->key_size) != 0)
 414                 goto free_key;
 415
 416         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 417             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
 418             map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 419                 value_size = round_up(map->value_size, 8) * num_possible_cpus();
 420         else
 421                 value_size = map->value_size;
 422
 423         err = -ENOMEM;
 424         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
 425         if (!value)
 426                 goto free_key;
 427
 428         err = -EFAULT;
 429         if (copy_from_user(value, uvalue, value_size) != 0)
 430                 goto free_value;
 431
 432         /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
 433          * inside bpf map update or delete otherwise deadlocks are possible
 434          */
 435         preempt_disable();
 436         __this_cpu_inc(bpf_prog_active);
 437         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 438             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 439                 err = bpf_percpu_hash_update(map, key, value, attr->flags);
 440         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 441                 err = bpf_percpu_array_update(map, key, value, attr->flags);
 442         } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
 443                    map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
 444                    map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY ||
 445                    map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
 446                 rcu_read_lock();
 447                 err = bpf_fd_array_map_update_elem(map, f.file, key, value,
 448                                                    attr->flags);
 449                 rcu_read_unlock();
 450         } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
 451                 rcu_read_lock();
 452                 err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
 453                                                   attr->flags);
 454                 rcu_read_unlock();
 455         } else {
 456                 rcu_read_lock();
 457                 err = map->ops->map_update_elem(map, key, value, attr->flags);
 458                 rcu_read_unlock();
 459         }
 460         __this_cpu_dec(bpf_prog_active);
 461         preempt_enable();
 462
 463         if (!err)
 464                 trace_bpf_map_update_elem(map, ufd, key, value);
 465 free_value:
 466         kfree(value);
 467 free_key:
 468         kfree(key);
 469 err_put:
 470         fdput(f);
 471         return err;
 472 }
 473
 474 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key
 475
 476 static int map_delete_elem(union bpf_attr *attr)
 477 {
 478         void __user *ukey = u64_to_user_ptr(attr->key);
 479         int ufd = attr->map_fd;
 480         struct bpf_map *map;
 481         struct fd f;
 482         void *key;
 483         int err;
 484
 485         if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
 486                 return -EINVAL;
 487
 488         f = fdget(ufd);
 489         map = __bpf_map_get(f);
 490         if (IS_ERR(map))
 491                 return PTR_ERR(map);
 492
 493         err = -ENOMEM;
 494         key = kmalloc(map->key_size, GFP_USER);
 495         if (!key)
 496                 goto err_put;
 497
 498         err = -EFAULT;
 499         if (copy_from_user(key, ukey, map->key_size) != 0)
 500                 goto free_key;
 501
 502         preempt_disable();
 503         __this_cpu_inc(bpf_prog_active);
 504         rcu_read_lock();
 505         err = map->ops->map_delete_elem(map, key);
 506         rcu_read_unlock();
 507         __this_cpu_dec(bpf_prog_active);
 508         preempt_enable();
 509
 510         if (!err)
 511                 trace_bpf_map_delete_elem(map, ufd, key);
 512 free_key:
 513         kfree(key);
 514 err_put:
 515         fdput(f);
 516         return err;
 517 }
 518
 519 /* last field in 'union bpf_attr' used by this command */
 520 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
 521
 522 static int map_get_next_key(union bpf_attr *attr)
 523 {
 524         void __user *ukey = u64_to_user_ptr(attr->key);
 525         void __user *unext_key = u64_to_user_ptr(attr->next_key);
 526         int ufd = attr->map_fd;
 527         struct bpf_map *map;
 528         void *key, *next_key;
 529         struct fd f;
 530         int err;
 531
 532         if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
 533                 return -EINVAL;
 534
 535         f = fdget(ufd);
 536         map = __bpf_map_get(f);
 537         if (IS_ERR(map))
 538                 return PTR_ERR(map);
 539
 540         err = -ENOMEM;
 541         key = kmalloc(map->key_size, GFP_USER);
 542         if (!key)
 543                 goto err_put;
 544
 545         err = -EFAULT;
 546         if (copy_from_user(key, ukey, map->key_size) != 0)
 547                 goto free_key;
 548
 549         err = -ENOMEM;
 550         next_key = kmalloc(map->key_size, GFP_USER);
 551         if (!next_key)
 552                 goto free_key;
 553
 554         rcu_read_lock();
 555         err = map->ops->map_get_next_key(map, key, next_key);
 556         rcu_read_unlock();
 557         if (err)
 558                 goto free_next_key;
 559
 560         err = -EFAULT;
 561         if (copy_to_user(unext_key, next_key, map->key_size) != 0)
 562                 goto free_next_key;
 563
 564         trace_bpf_map_next_key(map, ufd, key, next_key);
 565         err = 0;
 566
 567 free_next_key:
 568         kfree(next_key);
 569 free_key:
 570         kfree(key);
 571 err_put:
 572         fdput(f);
 573         return err;
 574 }
 575
 576 static LIST_HEAD(bpf_prog_types);
 577
 578 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
 579 {
 580         struct bpf_prog_type_list *tl;
 581
 582         list_for_each_entry(tl, &bpf_prog_types, list_node) {
 583                 if (tl->type == type) {
 584                         prog->aux->ops = tl->ops;
 585                         prog->type = type;
 586                         return 0;
 587                 }
 588         }
 589
 590         return -EINVAL;
 591 }
 592
 593 void bpf_register_prog_type(struct bpf_prog_type_list *tl)
 594 {
 595         list_add(&tl->list_node, &bpf_prog_types);
 596 }
 597
 598 /* drop refcnt on maps used by eBPF program and free auxilary data */
 599 static void free_used_maps(struct bpf_prog_aux *aux)
 600 {
 601         int i;
 602
 603         for (i = 0; i < aux->used_map_cnt; i++)
 604                 bpf_map_put(aux->used_maps[i]);
 605
 606         kfree(aux->used_maps);
 607 }
 608
 609 int __bpf_prog_charge(struct user_struct *user, u32 pages)
 610 {
 611         unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 612         unsigned long user_bufs;
 613
 614         if (user) {
 615                 user_bufs = atomic_long_add_return(pages, &user->locked_vm);
 616                 if (user_bufs > memlock_limit) {
 617                         atomic_long_sub(pages, &user->locked_vm);
 618                         return -EPERM;
 619                 }
 620         }
 621
 622         return 0;
 623 }
 624
 625 void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
 626 {
 627         if (user)
 628                 atomic_long_sub(pages, &user->locked_vm);
 629 }
 630
 631 static int bpf_prog_charge_memlock(struct bpf_prog *prog)
 632 {
 633         struct user_struct *user = get_current_user();
 634         int ret;
 635
 636         ret = __bpf_prog_charge(user, prog->pages);
 637         if (ret) {
 638                 free_uid(user);
 639                 return ret;
 640         }
 641
 642         prog->aux->user = user;
 643         return 0;
 644 }
 645
 646 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
 647 {
 648         struct user_struct *user = prog->aux->user;
 649
 650         __bpf_prog_uncharge(user, prog->pages);
 651         free_uid(user);
 652 }
 653
 654 static void __bpf_prog_put_rcu(struct rcu_head *rcu)
 655 {
 656         struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
 657
 658         free_used_maps(aux);
 659         bpf_prog_uncharge_memlock(aux->prog);
 660         bpf_prog_free(aux->prog);
 661 }
 662
 663 void bpf_prog_put(struct bpf_prog *prog)
 664 {
 665         if (atomic_dec_and_test(&prog->aux->refcnt)) {
 666                 trace_bpf_prog_put_rcu(prog);
 667                 bpf_prog_kallsyms_del(prog);
 668                 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
 669         }
 670 }
 671 EXPORT_SYMBOL_GPL(bpf_prog_put);
 672
 673 static int bpf_prog_release(struct inode *inode, struct file *filp)
 674 {
 675         struct bpf_prog *prog = filp->private_data;
 676
 677         bpf_prog_put(prog);
 678         return 0;
 679 }
 680
 681 #ifdef CONFIG_PROC_FS
 682 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
 683 {
 684         const struct bpf_prog *prog = filp->private_data;
 685         char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
 686
 687         bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
 688         seq_printf(m,
 689                    "prog_type:\t%u\n"
 690                    "prog_jited:\t%u\n"
 691                    "prog_tag:\t%s\n"
 692                    "memlock:\t%llu\n",
 693                    prog->type,
 694                    prog->jited,
 695                    prog_tag,
 696                    prog->pages * 1ULL << PAGE_SHIFT);
 697 }
 698 #endif
 699
 700 static const struct file_operations bpf_prog_fops = {
 701 #ifdef CONFIG_PROC_FS
 702         .show_fdinfo    = bpf_prog_show_fdinfo,
 703 #endif
 704         .release        = bpf_prog_release,
 705 };
 706
 707 int bpf_prog_new_fd(struct bpf_prog *prog)
 708 {
 709         return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
 710                                 O_RDWR | O_CLOEXEC);
 711 }
 712
 713 static struct bpf_prog *____bpf_prog_get(struct fd f)
 714 {
 715         if (!f.file)
 716                 return ERR_PTR(-EBADF);
 717         if (f.file->f_op != &bpf_prog_fops) {
 718                 fdput(f);
 719                 return ERR_PTR(-EINVAL);
 720         }
 721
 722         return f.file->private_data;
 723 }
 724
 725 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
 726 {
 727         if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
 728                 atomic_sub(i, &prog->aux->refcnt);
 729                 return ERR_PTR(-EBUSY);
 730         }
 731         return prog;
 732 }
 733 EXPORT_SYMBOL_GPL(bpf_prog_add);
 734
 735 void bpf_prog_sub(struct bpf_prog *prog, int i)
 736 {
 737         /* Only to be used for undoing previous bpf_prog_add() in some
 738          * error path. We still know that another entity in our call
 739          * path holds a reference to the program, thus atomic_sub() can
 740          * be safely used in such cases!
 741          */
 742         WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
 743 }
 744 EXPORT_SYMBOL_GPL(bpf_prog_sub);
 745
 746 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
 747 {
 748         return bpf_prog_add(prog, 1);
 749 }
 750 EXPORT_SYMBOL_GPL(bpf_prog_inc);
 751
 752 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
 753 {
 754         struct fd f = fdget(ufd);
 755         struct bpf_prog *prog;
 756
 757         prog = ____bpf_prog_get(f);
 758         if (IS_ERR(prog))
 759                 return prog;
 760         if (type && prog->type != *type) {
 761                 prog = ERR_PTR(-EINVAL);
 762                 goto out;
 763         }
 764
 765         prog = bpf_prog_inc(prog);
 766 out:
 767         fdput(f);
 768         return prog;
 769 }
 770
 771 struct bpf_prog *bpf_prog_get(u32 ufd)
 772 {
 773         return __bpf_prog_get(ufd, NULL);
 774 }
 775
 776 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
 777 {
 778         struct bpf_prog *prog = __bpf_prog_get(ufd, &type);
 779
 780         if (!IS_ERR(prog))
 781                 trace_bpf_prog_get_type(prog);
 782         return prog;
 783 }
 784 EXPORT_SYMBOL_GPL(bpf_prog_get_type);
 785
 786 /* last field in 'union bpf_attr' used by this command */
 787 #define BPF_PROG_LOAD_LAST_FIELD kern_version
 788
 789 static int bpf_prog_load(union bpf_attr *attr)
 790 {
 791         enum bpf_prog_type type = attr->prog_type;
 792         struct bpf_prog *prog;
 793         int err;
 794         char license[128];
 795         bool is_gpl;
 796
 797         if (CHECK_ATTR(BPF_PROG_LOAD))
 798                 return -EINVAL;
 799
 800         /* copy eBPF program license from user space */
 801         if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
 802                               sizeof(license) - 1) < 0)
 803                 return -EFAULT;
 804         license[sizeof(license) - 1] = 0;
 805
 806         /* eBPF programs must be GPL compatible to use GPL-ed functions */
 807         is_gpl = license_is_gpl_compatible(license);
 808
 809         if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS)
 810                 return -E2BIG;
 811
 812         if (type == BPF_PROG_TYPE_KPROBE &&
 813             attr->kern_version != LINUX_VERSION_CODE)
 814                 return -EINVAL;
 815
 816         if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN))
 817                 return -EPERM;
 818
 819         /* plain bpf_prog allocation */
 820         prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
 821         if (!prog)
 822                 return -ENOMEM;
 823
 824         err = bpf_prog_charge_memlock(prog);
 825         if (err)
 826                 goto free_prog_nouncharge;
 827
 828         prog->len = attr->insn_cnt;
 829
 830         err = -EFAULT;
 831         if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
 832                            bpf_prog_insn_size(prog)) != 0)
 833                 goto free_prog;
 834
 835         prog->orig_prog = NULL;
 836         prog->jited = 0;
 837
 838         atomic_set(&prog->aux->refcnt, 1);
 839         prog->gpl_compatible = is_gpl ? 1 : 0;
 840
 841         /* find program type: socket_filter vs tracing_filter */
 842         err = find_prog_type(type, prog);
 843         if (err < 0)
 844                 goto free_prog;
 845
 846         /* run eBPF verifier */
 847         err = bpf_check(&prog, attr);
 848         if (err < 0)
 849                 goto free_used_maps;
 850
 851         /* eBPF program is ready to be JITed */
 852         prog = bpf_prog_select_runtime(prog, &err);
 853         if (err < 0)
 854                 goto free_used_maps;
 855
 856         err = bpf_prog_new_fd(prog);
 857         if (err < 0)
 858                 /* failed to allocate fd */
 859                 goto free_used_maps;
 860
 861         bpf_prog_kallsyms_add(prog);
 862         trace_bpf_prog_load(prog, err);
 863         return err;
 864
 865 free_used_maps:
 866         free_used_maps(prog->aux);
 867 free_prog:
 868         bpf_prog_uncharge_memlock(prog);
 869 free_prog_nouncharge:
 870         bpf_prog_free(prog);
 871         return err;
 872 }
 873
 874 #define BPF_OBJ_LAST_FIELD bpf_fd
 875
 876 static int bpf_obj_pin(const union bpf_attr *attr)
 877 {
 878         if (CHECK_ATTR(BPF_OBJ))
 879                 return -EINVAL;
 880
 881         return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
 882 }
 883
 884 static int bpf_obj_get(const union bpf_attr *attr)
 885 {
 886         if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
 887                 return -EINVAL;
 888
 889         return bpf_obj_get_user(u64_to_user_ptr(attr->pathname));
 890 }
 891
 892 #ifdef CONFIG_CGROUP_BPF
 893
 894 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
 895
 896 static int bpf_prog_attach(const union bpf_attr *attr)
 897 {
 898         enum bpf_prog_type ptype;
 899         struct bpf_prog *prog;
 900         struct cgroup *cgrp;
 901         int ret;
 902
 903         if (!capable(CAP_NET_ADMIN))
 904                 return -EPERM;
 905
 906         if (CHECK_ATTR(BPF_PROG_ATTACH))
 907                 return -EINVAL;
 908
 909         if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
 910                 return -EINVAL;
 911
 912         switch (attr->attach_type) {
 913         case BPF_CGROUP_INET_INGRESS:
 914         case BPF_CGROUP_INET_EGRESS:
 915                 ptype = BPF_PROG_TYPE_CGROUP_SKB;
 916                 break;
 917         case BPF_CGROUP_INET_SOCK_CREATE:
 918                 ptype = BPF_PROG_TYPE_CGROUP_SOCK;
 919                 break;
 920         default:
 921                 return -EINVAL;
 922         }
 923
 924         prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
 925         if (IS_ERR(prog))
 926                 return PTR_ERR(prog);
 927
 928         cgrp = cgroup_get_from_fd(attr->target_fd);
 929         if (IS_ERR(cgrp)) {
 930                 bpf_prog_put(prog);
 931                 return PTR_ERR(cgrp);
 932         }
 933
 934         ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
 935                                 attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
 936         if (ret)
 937                 bpf_prog_put(prog);
 938         cgroup_put(cgrp);
 939
 940         return ret;
 941 }
 942
 943 #define BPF_PROG_DETACH_LAST_FIELD attach_type
 944
 945 static int bpf_prog_detach(const union bpf_attr *attr)
 946 {
 947         struct cgroup *cgrp;
 948         int ret;
 949
 950         if (!capable(CAP_NET_ADMIN))
 951                 return -EPERM;
 952
 953         if (CHECK_ATTR(BPF_PROG_DETACH))
 954                 return -EINVAL;
 955
 956         switch (attr->attach_type) {
 957         case BPF_CGROUP_INET_INGRESS:
 958         case BPF_CGROUP_INET_EGRESS:
 959         case BPF_CGROUP_INET_SOCK_CREATE:
 960                 cgrp = cgroup_get_from_fd(attr->target_fd);
 961                 if (IS_ERR(cgrp))
 962                         return PTR_ERR(cgrp);
 963
 964                 ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
 965                 cgroup_put(cgrp);
 966                 break;
 967
 968         default:
 969                 return -EINVAL;
 970         }
 971
 972         return ret;
 973 }
 974 #endif /* CONFIG_CGROUP_BPF */
 975
 976 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 977 {
 978         union bpf_attr attr = {};
 979         int err;
 980
 981         if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
 982                 return -EPERM;
 983
 984         if (!access_ok(VERIFY_READ, uattr, 1))
 985                 return -EFAULT;
 986
 987         if (size > PAGE_SIZE)   /* silly large */
 988                 return -E2BIG;
 989
 990         /* If we're handed a bigger struct than we know of,
 991          * ensure all the unknown bits are 0 - i.e. new
 992          * user-space does not rely on any kernel feature
 993          * extensions we dont know about yet.
 994          */
 995         if (size > sizeof(attr)) {
 996                 unsigned char __user *addr;
 997                 unsigned char __user *end;
 998                 unsigned char val;
 999
1000                 addr = (void __user *)uattr + sizeof(attr);
1001                 end  = (void __user *)uattr + size;
1002
1003                 for (; addr < end; addr++) {
1004                         err = get_user(val, addr);
1005                         if (err)
1006                                 return err;
1007                         if (val)
1008                                 return -E2BIG;
1009                 }
1010                 size = sizeof(attr);
1011         }
1012
1013         /* copy attributes from user space, may be less than sizeof(bpf_attr) */
1014         if (copy_from_user(&attr, uattr, size) != 0)
1015                 return -EFAULT;
1016
1017         switch (cmd) {
1018         case BPF_MAP_CREATE:
1019                 err = map_create(&attr);
1020                 break;
1021         case BPF_MAP_LOOKUP_ELEM:
1022                 err = map_lookup_elem(&attr);
1023                 break;
1024         case BPF_MAP_UPDATE_ELEM:
1025                 err = map_update_elem(&attr);
1026                 break;
1027         case BPF_MAP_DELETE_ELEM:
1028                 err = map_delete_elem(&attr);
1029                 break;
1030         case BPF_MAP_GET_NEXT_KEY:
1031                 err = map_get_next_key(&attr);
1032                 break;
1033         case BPF_PROG_LOAD:
1034                 err = bpf_prog_load(&attr);
1035                 break;
1036         case BPF_OBJ_PIN:
1037                 err = bpf_obj_pin(&attr);
1038                 break;
1039         case BPF_OBJ_GET:
1040                 err = bpf_obj_get(&attr);
1041                 break;
1042
1043 #ifdef CONFIG_CGROUP_BPF
1044         case BPF_PROG_ATTACH:
1045                 err = bpf_prog_attach(&attr);
1046                 break;
1047         case BPF_PROG_DETACH:
1048                 err = bpf_prog_detach(&attr);
1049                 break;
1050 #endif
1051
1052         default:
1053                 err = -EINVAL;
1054                 break;
1055         }
1056
1057         return err;
1058 }