fs/fcntl.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  *  linux/fs/fcntl.c
   4  *
   5  *  Copyright (C) 1991, 1992  Linus Torvalds
   6  */
   7
   8 #include <linux/syscalls.h>
   9 #include <linux/init.h>
  10 #include <linux/mm.h>
  11 #include <linux/sched/task.h>
  12 #include <linux/fs.h>
  13 #include <linux/file.h>
  14 #include <linux/fdtable.h>
  15 #include <linux/capability.h>
  16 #include <linux/dnotify.h>
  17 #include <linux/slab.h>
  18 #include <linux/module.h>
  19 #include <linux/pipe_fs_i.h>
  20 #include <linux/security.h>
  21 #include <linux/ptrace.h>
  22 #include <linux/signal.h>
  23 #include <linux/rcupdate.h>
  24 #include <linux/pid_namespace.h>
  25 #include <linux/user_namespace.h>
  26 #include <linux/memfd.h>
  27 #include <linux/compat.h>
  28 #include <linux/mount.h>
  29
  30 #include <linux/poll.h>
  31 #include <asm/siginfo.h>
  32 #include <linux/uaccess.h>
  33
  34 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
  35
  36 int setfl(int fd, struct file *filp, unsigned long arg)
  37 {
  38         struct inode * inode = file_inode(filp);
  39         int error = 0;
  40
  41         /*
  42          * O_APPEND cannot be cleared if the file is marked as append-only
  43          * and the file is open for write.
  44          */
  45         if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
  46                 return -EPERM;
  47
  48         /* O_NOATIME can only be set by the owner or superuser */
  49         if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
  50                 if (!inode_owner_or_capable(file_mnt_user_ns(filp), inode))
  51                         return -EPERM;
  52
  53         /* required for strict SunOS emulation */
  54         if (O_NONBLOCK != O_NDELAY)
  55                if (arg & O_NDELAY)
  56                    arg |= O_NONBLOCK;
  57
  58         /* Pipe packetized mode is controlled by O_DIRECT flag */
  59         if (!S_ISFIFO(inode->i_mode) && (arg & O_DIRECT)) {
  60                 if (!filp->f_mapping || !filp->f_mapping->a_ops ||
  61                         !filp->f_mapping->a_ops->direct_IO)
  62                                 return -EINVAL;
  63         }
  64
  65         if (filp->f_op->check_flags)
  66                 error = filp->f_op->check_flags(arg);
  67         if (!error && filp->f_op->setfl)
  68                 error = filp->f_op->setfl(filp, arg);
  69         if (error)
  70                 return error;
  71
  72         /*
  73          * ->fasync() is responsible for setting the FASYNC bit.
  74          */
  75         if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) {
  76                 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
  77                 if (error < 0)
  78                         goto out;
  79                 if (error > 0)
  80                         error = 0;
  81         }
  82         spin_lock(&filp->f_lock);
  83         filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
  84         spin_unlock(&filp->f_lock);
  85
  86  out:
  87         return error;
  88 }
  89 EXPORT_SYMBOL_GPL(setfl);
  90
  91 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
  92                      int force)
  93 {
  94         write_lock_irq(&filp->f_owner.lock);
  95         if (force || !filp->f_owner.pid) {
  96                 put_pid(filp->f_owner.pid);
  97                 filp->f_owner.pid = get_pid(pid);
  98                 filp->f_owner.pid_type = type;
  99
 100                 if (pid) {
 101                         const struct cred *cred = current_cred();
 102                         filp->f_owner.uid = cred->uid;
 103                         filp->f_owner.euid = cred->euid;
 104                 }
 105         }
 106         write_unlock_irq(&filp->f_owner.lock);
 107 }
 108
 109 void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
 110                 int force)
 111 {
 112         security_file_set_fowner(filp);
 113         f_modown(filp, pid, type, force);
 114 }
 115 EXPORT_SYMBOL(__f_setown);
 116
 117 int f_setown(struct file *filp, unsigned long arg, int force)
 118 {
 119         enum pid_type type;
 120         struct pid *pid = NULL;
 121         int who = arg, ret = 0;
 122
 123         type = PIDTYPE_TGID;
 124         if (who < 0) {
 125                 /* avoid overflow below */
 126                 if (who == INT_MIN)
 127                         return -EINVAL;
 128
 129                 type = PIDTYPE_PGID;
 130                 who = -who;
 131         }
 132
 133         rcu_read_lock();
 134         if (who) {
 135                 pid = find_vpid(who);
 136                 if (!pid)
 137                         ret = -ESRCH;
 138         }
 139
 140         if (!ret)
 141                 __f_setown(filp, pid, type, force);
 142         rcu_read_unlock();
 143
 144         return ret;
 145 }
 146 EXPORT_SYMBOL(f_setown);
 147
 148 void f_delown(struct file *filp)
 149 {
 150         f_modown(filp, NULL, PIDTYPE_TGID, 1);
 151 }
 152
 153 pid_t f_getown(struct file *filp)
 154 {
 155         pid_t pid = 0;
 156
 157         read_lock_irq(&filp->f_owner.lock);
 158         rcu_read_lock();
 159         if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) {
 160                 pid = pid_vnr(filp->f_owner.pid);
 161                 if (filp->f_owner.pid_type == PIDTYPE_PGID)
 162                         pid = -pid;
 163         }
 164         rcu_read_unlock();
 165         read_unlock_irq(&filp->f_owner.lock);
 166         return pid;
 167 }
 168
 169 static int f_setown_ex(struct file *filp, unsigned long arg)
 170 {
 171         struct f_owner_ex __user *owner_p = (void __user *)arg;
 172         struct f_owner_ex owner;
 173         struct pid *pid;
 174         int type;
 175         int ret;
 176
 177         ret = copy_from_user(&owner, owner_p, sizeof(owner));
 178         if (ret)
 179                 return -EFAULT;
 180
 181         switch (owner.type) {
 182         case F_OWNER_TID:
 183                 type = PIDTYPE_PID;
 184                 break;
 185
 186         case F_OWNER_PID:
 187                 type = PIDTYPE_TGID;
 188                 break;
 189
 190         case F_OWNER_PGRP:
 191                 type = PIDTYPE_PGID;
 192                 break;
 193
 194         default:
 195                 return -EINVAL;
 196         }
 197
 198         rcu_read_lock();
 199         pid = find_vpid(owner.pid);
 200         if (owner.pid && !pid)
 201                 ret = -ESRCH;
 202         else
 203                  __f_setown(filp, pid, type, 1);
 204         rcu_read_unlock();
 205
 206         return ret;
 207 }
 208
 209 static int f_getown_ex(struct file *filp, unsigned long arg)
 210 {
 211         struct f_owner_ex __user *owner_p = (void __user *)arg;
 212         struct f_owner_ex owner = {};
 213         int ret = 0;
 214
 215         read_lock_irq(&filp->f_owner.lock);
 216         rcu_read_lock();
 217         if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type))
 218                 owner.pid = pid_vnr(filp->f_owner.pid);
 219         rcu_read_unlock();
 220         switch (filp->f_owner.pid_type) {
 221         case PIDTYPE_PID:
 222                 owner.type = F_OWNER_TID;
 223                 break;
 224
 225         case PIDTYPE_TGID:
 226                 owner.type = F_OWNER_PID;
 227                 break;
 228
 229         case PIDTYPE_PGID:
 230                 owner.type = F_OWNER_PGRP;
 231                 break;
 232
 233         default:
 234                 WARN_ON(1);
 235                 ret = -EINVAL;
 236                 break;
 237         }
 238         read_unlock_irq(&filp->f_owner.lock);
 239
 240         if (!ret) {
 241                 ret = copy_to_user(owner_p, &owner, sizeof(owner));
 242                 if (ret)
 243                         ret = -EFAULT;
 244         }
 245         return ret;
 246 }
 247
 248 #ifdef CONFIG_CHECKPOINT_RESTORE
 249 static int f_getowner_uids(struct file *filp, unsigned long arg)
 250 {
 251         struct user_namespace *user_ns = current_user_ns();
 252         uid_t __user *dst = (void __user *)arg;
 253         uid_t src[2];
 254         int err;
 255
 256         read_lock_irq(&filp->f_owner.lock);
 257         src[0] = from_kuid(user_ns, filp->f_owner.uid);
 258         src[1] = from_kuid(user_ns, filp->f_owner.euid);
 259         read_unlock_irq(&filp->f_owner.lock);
 260
 261         err  = put_user(src[0], &dst[0]);
 262         err |= put_user(src[1], &dst[1]);
 263
 264         return err;
 265 }
 266 #else
 267 static int f_getowner_uids(struct file *filp, unsigned long arg)
 268 {
 269         return -EINVAL;
 270 }
 271 #endif
 272
 273 static bool rw_hint_valid(enum rw_hint hint)
 274 {
 275         switch (hint) {
 276         case RWH_WRITE_LIFE_NOT_SET:
 277         case RWH_WRITE_LIFE_NONE:
 278         case RWH_WRITE_LIFE_SHORT:
 279         case RWH_WRITE_LIFE_MEDIUM:
 280         case RWH_WRITE_LIFE_LONG:
 281         case RWH_WRITE_LIFE_EXTREME:
 282                 return true;
 283         default:
 284                 return false;
 285         }
 286 }
 287
 288 static long fcntl_rw_hint(struct file *file, unsigned int cmd,
 289                           unsigned long arg)
 290 {
 291         struct inode *inode = file_inode(file);
 292         u64 __user *argp = (u64 __user *)arg;
 293         enum rw_hint hint;
 294         u64 h;
 295
 296         switch (cmd) {
 297         case F_GET_FILE_RW_HINT:
 298                 h = file_write_hint(file);
 299                 if (copy_to_user(argp, &h, sizeof(*argp)))
 300                         return -EFAULT;
 301                 return 0;
 302         case F_SET_FILE_RW_HINT:
 303                 if (copy_from_user(&h, argp, sizeof(h)))
 304                         return -EFAULT;
 305                 hint = (enum rw_hint) h;
 306                 if (!rw_hint_valid(hint))
 307                         return -EINVAL;
 308
 309                 spin_lock(&file->f_lock);
 310                 file->f_write_hint = hint;
 311                 spin_unlock(&file->f_lock);
 312                 return 0;
 313         case F_GET_RW_HINT:
 314                 h = inode->i_write_hint;
 315                 if (copy_to_user(argp, &h, sizeof(*argp)))
 316                         return -EFAULT;
 317                 return 0;
 318         case F_SET_RW_HINT:
 319                 if (copy_from_user(&h, argp, sizeof(h)))
 320                         return -EFAULT;
 321                 hint = (enum rw_hint) h;
 322                 if (!rw_hint_valid(hint))
 323                         return -EINVAL;
 324
 325                 inode_lock(inode);
 326                 inode->i_write_hint = hint;
 327                 inode_unlock(inode);
 328                 return 0;
 329         default:
 330                 return -EINVAL;
 331         }
 332 }
 333
 334 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 335                 struct file *filp)
 336 {
 337         void __user *argp = (void __user *)arg;
 338         struct flock flock;
 339         long err = -EINVAL;
 340
 341         switch (cmd) {
 342         case F_DUPFD:
 343                 err = f_dupfd(arg, filp, 0);
 344                 break;
 345         case F_DUPFD_CLOEXEC:
 346                 err = f_dupfd(arg, filp, O_CLOEXEC);
 347                 break;
 348         case F_GETFD:
 349                 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
 350                 break;
 351         case F_SETFD:
 352                 err = 0;
 353                 set_close_on_exec(fd, arg & FD_CLOEXEC);
 354                 break;
 355         case F_GETFL:
 356                 err = filp->f_flags;
 357                 break;
 358         case F_SETFL:
 359                 err = setfl(fd, filp, arg);
 360                 break;
 361 #if BITS_PER_LONG != 32
 362         /* 32-bit arches must use fcntl64() */
 363         case F_OFD_GETLK:
 364 #endif
 365         case F_GETLK:
 366                 if (copy_from_user(&flock, argp, sizeof(flock)))
 367                         return -EFAULT;
 368                 err = fcntl_getlk(filp, cmd, &flock);
 369                 if (!err && copy_to_user(argp, &flock, sizeof(flock)))
 370                         return -EFAULT;
 371                 break;
 372 #if BITS_PER_LONG != 32
 373         /* 32-bit arches must use fcntl64() */
 374         case F_OFD_SETLK:
 375         case F_OFD_SETLKW:
 376                 fallthrough;
 377 #endif
 378         case F_SETLK:
 379         case F_SETLKW:
 380                 if (copy_from_user(&flock, argp, sizeof(flock)))
 381                         return -EFAULT;
 382                 err = fcntl_setlk(fd, filp, cmd, &flock);
 383                 break;
 384         case F_GETOWN:
 385                 /*
 386                  * XXX If f_owner is a process group, the
 387                  * negative return value will get converted
 388                  * into an error.  Oops.  If we keep the
 389                  * current syscall conventions, the only way
 390                  * to fix this will be in libc.
 391                  */
 392                 err = f_getown(filp);
 393                 force_successful_syscall_return();
 394                 break;
 395         case F_SETOWN:
 396                 err = f_setown(filp, arg, 1);
 397                 break;
 398         case F_GETOWN_EX:
 399                 err = f_getown_ex(filp, arg);
 400                 break;
 401         case F_SETOWN_EX:
 402                 err = f_setown_ex(filp, arg);
 403                 break;
 404         case F_GETOWNER_UIDS:
 405                 err = f_getowner_uids(filp, arg);
 406                 break;
 407         case F_GETSIG:
 408                 err = filp->f_owner.signum;
 409                 break;
 410         case F_SETSIG:
 411                 /* arg == 0 restores default behaviour. */
 412                 if (!valid_signal(arg)) {
 413                         break;
 414                 }
 415                 err = 0;
 416                 filp->f_owner.signum = arg;
 417                 break;
 418         case F_GETLEASE:
 419                 err = fcntl_getlease(filp);
 420                 break;
 421         case F_SETLEASE:
 422                 err = fcntl_setlease(fd, filp, arg);
 423                 break;
 424         case F_NOTIFY:
 425                 err = fcntl_dirnotify(fd, filp, arg);
 426                 break;
 427         case F_SETPIPE_SZ:
 428         case F_GETPIPE_SZ:
 429                 err = pipe_fcntl(filp, cmd, arg);
 430                 break;
 431         case F_ADD_SEALS:
 432         case F_GET_SEALS:
 433                 err = memfd_fcntl(filp, cmd, arg);
 434                 break;
 435         case F_GET_RW_HINT:
 436         case F_SET_RW_HINT:
 437         case F_GET_FILE_RW_HINT:
 438         case F_SET_FILE_RW_HINT:
 439                 err = fcntl_rw_hint(filp, cmd, arg);
 440                 break;
 441         default:
 442                 break;
 443         }
 444         return err;
 445 }
 446
 447 static int check_fcntl_cmd(unsigned cmd)
 448 {
 449         switch (cmd) {
 450         case F_DUPFD:
 451         case F_DUPFD_CLOEXEC:
 452         case F_GETFD:
 453         case F_SETFD:
 454         case F_GETFL:
 455                 return 1;
 456         }
 457         return 0;
 458 }
 459
 460 SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
 461 {
 462         struct fd f = fdget_raw(fd);
 463         long err = -EBADF;
 464
 465         if (!f.file)
 466                 goto out;
 467
 468         if (unlikely(f.file->f_mode & FMODE_PATH)) {
 469                 if (!check_fcntl_cmd(cmd))
 470                         goto out1;
 471         }
 472
 473         err = security_file_fcntl(f.file, cmd, arg);
 474         if (!err)
 475                 err = do_fcntl(fd, cmd, arg, f.file);
 476
 477 out1:
 478         fdput(f);
 479 out:
 480         return err;
 481 }
 482
 483 #if BITS_PER_LONG == 32
 484 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
 485                 unsigned long, arg)
 486 {
 487         void __user *argp = (void __user *)arg;
 488         struct fd f = fdget_raw(fd);
 489         struct flock64 flock;
 490         long err = -EBADF;
 491
 492         if (!f.file)
 493                 goto out;
 494
 495         if (unlikely(f.file->f_mode & FMODE_PATH)) {
 496                 if (!check_fcntl_cmd(cmd))
 497                         goto out1;
 498         }
 499
 500         err = security_file_fcntl(f.file, cmd, arg);
 501         if (err)
 502                 goto out1;
 503
 504         switch (cmd) {
 505         case F_GETLK64:
 506         case F_OFD_GETLK:
 507                 err = -EFAULT;
 508                 if (copy_from_user(&flock, argp, sizeof(flock)))
 509                         break;
 510                 err = fcntl_getlk64(f.file, cmd, &flock);
 511                 if (!err && copy_to_user(argp, &flock, sizeof(flock)))
 512                         err = -EFAULT;
 513                 break;
 514         case F_SETLK64:
 515         case F_SETLKW64:
 516         case F_OFD_SETLK:
 517         case F_OFD_SETLKW:
 518                 err = -EFAULT;
 519                 if (copy_from_user(&flock, argp, sizeof(flock)))
 520                         break;
 521                 err = fcntl_setlk64(fd, f.file, cmd, &flock);
 522                 break;
 523         default:
 524                 err = do_fcntl(fd, cmd, arg, f.file);
 525                 break;
 526         }
 527 out1:
 528         fdput(f);
 529 out:
 530         return err;
 531 }
 532 #endif
 533
 534 #ifdef CONFIG_COMPAT
 535 /* careful - don't use anywhere else */
 536 #define copy_flock_fields(dst, src)             \
 537         (dst)->l_type = (src)->l_type;          \
 538         (dst)->l_whence = (src)->l_whence;      \
 539         (dst)->l_start = (src)->l_start;        \
 540         (dst)->l_len = (src)->l_len;            \
 541         (dst)->l_pid = (src)->l_pid;
 542
 543 static int get_compat_flock(struct flock *kfl, const struct compat_flock __user *ufl)
 544 {
 545         struct compat_flock fl;
 546
 547         if (copy_from_user(&fl, ufl, sizeof(struct compat_flock)))
 548                 return -EFAULT;
 549         copy_flock_fields(kfl, &fl);
 550         return 0;
 551 }
 552
 553 static int get_compat_flock64(struct flock *kfl, const struct compat_flock64 __user *ufl)
 554 {
 555         struct compat_flock64 fl;
 556
 557         if (copy_from_user(&fl, ufl, sizeof(struct compat_flock64)))
 558                 return -EFAULT;
 559         copy_flock_fields(kfl, &fl);
 560         return 0;
 561 }
 562
 563 static int put_compat_flock(const struct flock *kfl, struct compat_flock __user *ufl)
 564 {
 565         struct compat_flock fl;
 566
 567         memset(&fl, 0, sizeof(struct compat_flock));
 568         copy_flock_fields(&fl, kfl);
 569         if (copy_to_user(ufl, &fl, sizeof(struct compat_flock)))
 570                 return -EFAULT;
 571         return 0;
 572 }
 573
 574 static int put_compat_flock64(const struct flock *kfl, struct compat_flock64 __user *ufl)
 575 {
 576         struct compat_flock64 fl;
 577
 578         BUILD_BUG_ON(sizeof(kfl->l_start) > sizeof(ufl->l_start));
 579         BUILD_BUG_ON(sizeof(kfl->l_len) > sizeof(ufl->l_len));
 580
 581         memset(&fl, 0, sizeof(struct compat_flock64));
 582         copy_flock_fields(&fl, kfl);
 583         if (copy_to_user(ufl, &fl, sizeof(struct compat_flock64)))
 584                 return -EFAULT;
 585         return 0;
 586 }
 587 #undef copy_flock_fields
 588
 589 static unsigned int
 590 convert_fcntl_cmd(unsigned int cmd)
 591 {
 592         switch (cmd) {
 593         case F_GETLK64:
 594                 return F_GETLK;
 595         case F_SETLK64:
 596                 return F_SETLK;
 597         case F_SETLKW64:
 598                 return F_SETLKW;
 599         }
 600
 601         return cmd;
 602 }
 603
 604 /*
 605  * GETLK was successful and we need to return the data, but it needs to fit in
 606  * the compat structure.
 607  * l_start shouldn't be too big, unless the original start + end is greater than
 608  * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return
 609  * -EOVERFLOW in that case.  l_len could be too big, in which case we just
 610  * truncate it, and only allow the app to see that part of the conflicting lock
 611  * that might make sense to it anyway
 612  */
 613 static int fixup_compat_flock(struct flock *flock)
 614 {
 615         if (flock->l_start > COMPAT_OFF_T_MAX)
 616                 return -EOVERFLOW;
 617         if (flock->l_len > COMPAT_OFF_T_MAX)
 618                 flock->l_len = COMPAT_OFF_T_MAX;
 619         return 0;
 620 }
 621
 622 static long do_compat_fcntl64(unsigned int fd, unsigned int cmd,
 623                              compat_ulong_t arg)
 624 {
 625         struct fd f = fdget_raw(fd);
 626         struct flock flock;
 627         long err = -EBADF;
 628
 629         if (!f.file)
 630                 return err;
 631
 632         if (unlikely(f.file->f_mode & FMODE_PATH)) {
 633                 if (!check_fcntl_cmd(cmd))
 634                         goto out_put;
 635         }
 636
 637         err = security_file_fcntl(f.file, cmd, arg);
 638         if (err)
 639                 goto out_put;
 640
 641         switch (cmd) {
 642         case F_GETLK:
 643                 err = get_compat_flock(&flock, compat_ptr(arg));
 644                 if (err)
 645                         break;
 646                 err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
 647                 if (err)
 648                         break;
 649                 err = fixup_compat_flock(&flock);
 650                 if (!err)
 651                         err = put_compat_flock(&flock, compat_ptr(arg));
 652                 break;
 653         case F_GETLK64:
 654         case F_OFD_GETLK:
 655                 err = get_compat_flock64(&flock, compat_ptr(arg));
 656                 if (err)
 657                         break;
 658                 err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
 659                 if (!err)
 660                         err = put_compat_flock64(&flock, compat_ptr(arg));
 661                 break;
 662         case F_SETLK:
 663         case F_SETLKW:
 664                 err = get_compat_flock(&flock, compat_ptr(arg));
 665                 if (err)
 666                         break;
 667                 err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
 668                 break;
 669         case F_SETLK64:
 670         case F_SETLKW64:
 671         case F_OFD_SETLK:
 672         case F_OFD_SETLKW:
 673                 err = get_compat_flock64(&flock, compat_ptr(arg));
 674                 if (err)
 675                         break;
 676                 err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
 677                 break;
 678         default:
 679                 err = do_fcntl(fd, cmd, arg, f.file);
 680                 break;
 681         }
 682 out_put:
 683         fdput(f);
 684         return err;
 685 }
 686
 687 COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
 688                        compat_ulong_t, arg)
 689 {
 690         return do_compat_fcntl64(fd, cmd, arg);
 691 }
 692
 693 COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
 694                        compat_ulong_t, arg)
 695 {
 696         switch (cmd) {
 697         case F_GETLK64:
 698         case F_SETLK64:
 699         case F_SETLKW64:
 700         case F_OFD_GETLK:
 701         case F_OFD_SETLK:
 702         case F_OFD_SETLKW:
 703                 return -EINVAL;
 704         }
 705         return do_compat_fcntl64(fd, cmd, arg);
 706 }
 707 #endif
 708
 709 /* Table to convert sigio signal codes into poll band bitmaps */
 710
 711 static const __poll_t band_table[NSIGPOLL] = {
 712         EPOLLIN | EPOLLRDNORM,                  /* POLL_IN */
 713         EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND,   /* POLL_OUT */
 714         EPOLLIN | EPOLLRDNORM | EPOLLMSG,               /* POLL_MSG */
 715         EPOLLERR,                               /* POLL_ERR */
 716         EPOLLPRI | EPOLLRDBAND,                 /* POLL_PRI */
 717         EPOLLHUP | EPOLLERR                     /* POLL_HUP */
 718 };
 719
 720 static inline int sigio_perm(struct task_struct *p,
 721                              struct fown_struct *fown, int sig)
 722 {
 723         const struct cred *cred;
 724         int ret;
 725
 726         rcu_read_lock();
 727         cred = __task_cred(p);
 728         ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) ||
 729                 uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) ||
 730                 uid_eq(fown->uid,  cred->suid) || uid_eq(fown->uid,  cred->uid)) &&
 731                !security_file_send_sigiotask(p, fown, sig));
 732         rcu_read_unlock();
 733         return ret;
 734 }
 735
 736 static void send_sigio_to_task(struct task_struct *p,
 737                                struct fown_struct *fown,
 738                                int fd, int reason, enum pid_type type)
 739 {
 740         /*
 741          * F_SETSIG can change ->signum lockless in parallel, make
 742          * sure we read it once and use the same value throughout.
 743          */
 744         int signum = READ_ONCE(fown->signum);
 745
 746         if (!sigio_perm(p, fown, signum))
 747                 return;
 748
 749         switch (signum) {
 750                 default: {
 751                         kernel_siginfo_t si;
 752
 753                         /* Queue a rt signal with the appropriate fd as its
 754                            value.  We use SI_SIGIO as the source, not
 755                            SI_KERNEL, since kernel signals always get
 756                            delivered even if we can't queue.  Failure to
 757                            queue in this case _should_ be reported; we fall
 758                            back to SIGIO in that case. --sct */
 759                         clear_siginfo(&si);
 760                         si.si_signo = signum;
 761                         si.si_errno = 0;
 762                         si.si_code  = reason;
 763                         /*
 764                          * Posix definies POLL_IN and friends to be signal
 765                          * specific si_codes for SIG_POLL.  Linux extended
 766                          * these si_codes to other signals in a way that is
 767                          * ambiguous if other signals also have signal
 768                          * specific si_codes.  In that case use SI_SIGIO instead
 769                          * to remove the ambiguity.
 770                          */
 771                         if ((signum != SIGPOLL) && sig_specific_sicodes(signum))
 772                                 si.si_code = SI_SIGIO;
 773
 774                         /* Make sure we are called with one of the POLL_*
 775                            reasons, otherwise we could leak kernel stack into
 776                            userspace.  */
 777                         BUG_ON((reason < POLL_IN) || ((reason - POLL_IN) >= NSIGPOLL));
 778                         if (reason - POLL_IN >= NSIGPOLL)
 779                                 si.si_band  = ~0L;
 780                         else
 781                                 si.si_band = mangle_poll(band_table[reason - POLL_IN]);
 782                         si.si_fd    = fd;
 783                         if (!do_send_sig_info(signum, &si, p, type))
 784                                 break;
 785                 }
 786                         fallthrough;    /* fall back on the old plain SIGIO signal */
 787                 case 0:
 788                         do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type);
 789         }
 790 }
 791
 792 void send_sigio(struct fown_struct *fown, int fd, int band)
 793 {
 794         struct task_struct *p;
 795         enum pid_type type;
 796         unsigned long flags;
 797         struct pid *pid;
 798
 799         read_lock_irqsave(&fown->lock, flags);
 800
 801         type = fown->pid_type;
 802         pid = fown->pid;
 803         if (!pid)
 804                 goto out_unlock_fown;
 805
 806         if (type <= PIDTYPE_TGID) {
 807                 rcu_read_lock();
 808                 p = pid_task(pid, PIDTYPE_PID);
 809                 if (p)
 810                         send_sigio_to_task(p, fown, fd, band, type);
 811                 rcu_read_unlock();
 812         } else {
 813                 read_lock(&tasklist_lock);
 814                 do_each_pid_task(pid, type, p) {
 815                         send_sigio_to_task(p, fown, fd, band, type);
 816                 } while_each_pid_task(pid, type, p);
 817                 read_unlock(&tasklist_lock);
 818         }
 819  out_unlock_fown:
 820         read_unlock_irqrestore(&fown->lock, flags);
 821 }
 822
 823 static void send_sigurg_to_task(struct task_struct *p,
 824                                 struct fown_struct *fown, enum pid_type type)
 825 {
 826         if (sigio_perm(p, fown, SIGURG))
 827                 do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type);
 828 }
 829
 830 int send_sigurg(struct fown_struct *fown)
 831 {
 832         struct task_struct *p;
 833         enum pid_type type;
 834         struct pid *pid;
 835         unsigned long flags;
 836         int ret = 0;
 837
 838         read_lock_irqsave(&fown->lock, flags);
 839
 840         type = fown->pid_type;
 841         pid = fown->pid;
 842         if (!pid)
 843                 goto out_unlock_fown;
 844
 845         ret = 1;
 846
 847         if (type <= PIDTYPE_TGID) {
 848                 rcu_read_lock();
 849                 p = pid_task(pid, PIDTYPE_PID);
 850                 if (p)
 851                         send_sigurg_to_task(p, fown, type);
 852                 rcu_read_unlock();
 853         } else {
 854                 read_lock(&tasklist_lock);
 855                 do_each_pid_task(pid, type, p) {
 856                         send_sigurg_to_task(p, fown, type);
 857                 } while_each_pid_task(pid, type, p);
 858                 read_unlock(&tasklist_lock);
 859         }
 860  out_unlock_fown:
 861         read_unlock_irqrestore(&fown->lock, flags);
 862         return ret;
 863 }
 864
 865 static DEFINE_SPINLOCK(fasync_lock);
 866 static struct kmem_cache *fasync_cache __read_mostly;
 867
 868 static void fasync_free_rcu(struct rcu_head *head)
 869 {
 870         kmem_cache_free(fasync_cache,
 871                         container_of(head, struct fasync_struct, fa_rcu));
 872 }
 873
 874 /*
 875  * Remove a fasync entry. If successfully removed, return
 876  * positive and clear the FASYNC flag. If no entry exists,
 877  * do nothing and return 0.
 878  *
 879  * NOTE! It is very important that the FASYNC flag always
 880  * match the state "is the filp on a fasync list".
 881  *
 882  */
 883 int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
 884 {
 885         struct fasync_struct *fa, **fp;
 886         int result = 0;
 887
 888         spin_lock(&filp->f_lock);
 889         spin_lock(&fasync_lock);
 890         for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
 891                 if (fa->fa_file != filp)
 892                         continue;
 893
 894                 write_lock_irq(&fa->fa_lock);
 895                 fa->fa_file = NULL;
 896                 write_unlock_irq(&fa->fa_lock);
 897
 898                 *fp = fa->fa_next;
 899                 call_rcu(&fa->fa_rcu, fasync_free_rcu);
 900                 filp->f_flags &= ~FASYNC;
 901                 result = 1;
 902                 break;
 903         }
 904         spin_unlock(&fasync_lock);
 905         spin_unlock(&filp->f_lock);
 906         return result;
 907 }
 908
 909 struct fasync_struct *fasync_alloc(void)
 910 {
 911         return kmem_cache_alloc(fasync_cache, GFP_KERNEL);
 912 }
 913
 914 /*
 915  * NOTE! This can be used only for unused fasync entries:
 916  * entries that actually got inserted on the fasync list
 917  * need to be released by rcu - see fasync_remove_entry.
 918  */
 919 void fasync_free(struct fasync_struct *new)
 920 {
 921         kmem_cache_free(fasync_cache, new);
 922 }
 923
 924 /*
 925  * Insert a new entry into the fasync list.  Return the pointer to the
 926  * old one if we didn't use the new one.
 927  *
 928  * NOTE! It is very important that the FASYNC flag always
 929  * match the state "is the filp on a fasync list".
 930  */
 931 struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
 932 {
 933         struct fasync_struct *fa, **fp;
 934
 935         spin_lock(&filp->f_lock);
 936         spin_lock(&fasync_lock);
 937         for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
 938                 if (fa->fa_file != filp)
 939                         continue;
 940
 941                 write_lock_irq(&fa->fa_lock);
 942                 fa->fa_fd = fd;
 943                 write_unlock_irq(&fa->fa_lock);
 944                 goto out;
 945         }
 946
 947         rwlock_init(&new->fa_lock);
 948         new->magic = FASYNC_MAGIC;
 949         new->fa_file = filp;
 950         new->fa_fd = fd;
 951         new->fa_next = *fapp;
 952         rcu_assign_pointer(*fapp, new);
 953         filp->f_flags |= FASYNC;
 954
 955 out:
 956         spin_unlock(&fasync_lock);
 957         spin_unlock(&filp->f_lock);
 958         return fa;
 959 }
 960
 961 /*
 962  * Add a fasync entry. Return negative on error, positive if
 963  * added, and zero if did nothing but change an existing one.
 964  */
 965 static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
 966 {
 967         struct fasync_struct *new;
 968
 969         new = fasync_alloc();
 970         if (!new)
 971                 return -ENOMEM;
 972
 973         /*
 974          * fasync_insert_entry() returns the old (update) entry if
 975          * it existed.
 976          *
 977          * So free the (unused) new entry and return 0 to let the
 978          * caller know that we didn't add any new fasync entries.
 979          */
 980         if (fasync_insert_entry(fd, filp, fapp, new)) {
 981                 fasync_free(new);
 982                 return 0;
 983         }
 984
 985         return 1;
 986 }
 987
 988 /*
 989  * fasync_helper() is used by almost all character device drivers
 990  * to set up the fasync queue, and for regular files by the file
 991  * lease code. It returns negative on error, 0 if it did no changes
 992  * and positive if it added/deleted the entry.
 993  */
 994 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
 995 {
 996         if (!on)
 997                 return fasync_remove_entry(filp, fapp);
 998         return fasync_add_entry(fd, filp, fapp);
 999 }
1000
1001 EXPORT_SYMBOL(fasync_helper);
1002
1003 /*
1004  * rcu_read_lock() is held
1005  */
1006 static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
1007 {
1008         while (fa) {
1009                 struct fown_struct *fown;
1010                 unsigned long flags;
1011
1012                 if (fa->magic != FASYNC_MAGIC) {
1013                         printk(KERN_ERR "kill_fasync: bad magic number in "
1014                                "fasync_struct!\n");
1015                         return;
1016                 }
1017                 read_lock_irqsave(&fa->fa_lock, flags);
1018                 if (fa->fa_file) {
1019                         fown = &fa->fa_file->f_owner;
1020                         /* Don't send SIGURG to processes which have not set a
1021                            queued signum: SIGURG has its own default signalling
1022                            mechanism. */
1023                         if (!(sig == SIGURG && fown->signum == 0))
1024                                 send_sigio(fown, fa->fa_fd, band);
1025                 }
1026                 read_unlock_irqrestore(&fa->fa_lock, flags);
1027                 fa = rcu_dereference(fa->fa_next);
1028         }
1029 }
1030
1031 void kill_fasync(struct fasync_struct **fp, int sig, int band)
1032 {
1033         /* First a quick test without locking: usually
1034          * the list is empty.
1035          */
1036         if (*fp) {
1037                 rcu_read_lock();
1038                 kill_fasync_rcu(rcu_dereference(*fp), sig, band);
1039                 rcu_read_unlock();
1040         }
1041 }
1042 EXPORT_SYMBOL(kill_fasync);
1043
1044 static int __init fcntl_init(void)
1045 {
1046         /*
1047          * Please add new bits here to ensure allocation uniqueness.
1048          * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
1049          * is defined as O_NONBLOCK on some platforms and not on others.
1050          */
1051         BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ !=
1052                 HWEIGHT32(
1053                         (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
1054                         __FMODE_EXEC | __FMODE_NONOTIFY));
1055
1056         fasync_cache = kmem_cache_create("fasync_cache",
1057                                          sizeof(struct fasync_struct), 0,
1058                                          SLAB_PANIC | SLAB_ACCOUNT, NULL);
1059         return 0;
1060 }
1061
1062 module_init(fcntl_init)