]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - fs/fcntl.c
313eba860346870265bf8a109a3a9c7091ca1864
[mirror_ubuntu-artful-kernel.git] / fs / fcntl.c
1 /*
2 * linux/fs/fcntl.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
6
7 #include <linux/syscalls.h>
8 #include <linux/init.h>
9 #include <linux/mm.h>
10 #include <linux/sched/task.h>
11 #include <linux/fs.h>
12 #include <linux/file.h>
13 #include <linux/fdtable.h>
14 #include <linux/capability.h>
15 #include <linux/dnotify.h>
16 #include <linux/slab.h>
17 #include <linux/module.h>
18 #include <linux/pipe_fs_i.h>
19 #include <linux/security.h>
20 #include <linux/ptrace.h>
21 #include <linux/signal.h>
22 #include <linux/rcupdate.h>
23 #include <linux/pid_namespace.h>
24 #include <linux/user_namespace.h>
25 #include <linux/shmem_fs.h>
26 #include <linux/compat.h>
27
28 #include <asm/poll.h>
29 #include <asm/siginfo.h>
30 #include <linux/uaccess.h>
31
32 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
33
34 static int setfl(int fd, struct file * filp, unsigned long arg)
35 {
36 struct inode * inode = file_inode(filp);
37 int error = 0;
38
39 /*
40 * O_APPEND cannot be cleared if the file is marked as append-only
41 * and the file is open for write.
42 */
43 if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
44 return -EPERM;
45
46 /* O_NOATIME can only be set by the owner or superuser */
47 if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
48 if (!inode_owner_or_capable(inode))
49 return -EPERM;
50
51 /* required for strict SunOS emulation */
52 if (O_NONBLOCK != O_NDELAY)
53 if (arg & O_NDELAY)
54 arg |= O_NONBLOCK;
55
56 /* Pipe packetized mode is controlled by O_DIRECT flag */
57 if (!S_ISFIFO(inode->i_mode) && (arg & O_DIRECT)) {
58 if (!filp->f_mapping || !filp->f_mapping->a_ops ||
59 !filp->f_mapping->a_ops->direct_IO)
60 return -EINVAL;
61 }
62
63 if (filp->f_op->check_flags)
64 error = filp->f_op->check_flags(arg);
65 if (error)
66 return error;
67
68 /*
69 * ->fasync() is responsible for setting the FASYNC bit.
70 */
71 if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) {
72 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
73 if (error < 0)
74 goto out;
75 if (error > 0)
76 error = 0;
77 }
78 spin_lock(&filp->f_lock);
79 filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
80 spin_unlock(&filp->f_lock);
81
82 out:
83 return error;
84 }
85
86 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
87 int force)
88 {
89 write_lock_irq(&filp->f_owner.lock);
90 if (force || !filp->f_owner.pid) {
91 put_pid(filp->f_owner.pid);
92 filp->f_owner.pid = get_pid(pid);
93 filp->f_owner.pid_type = type;
94
95 if (pid) {
96 const struct cred *cred = current_cred();
97 filp->f_owner.uid = cred->uid;
98 filp->f_owner.euid = cred->euid;
99 }
100 }
101 write_unlock_irq(&filp->f_owner.lock);
102 }
103
104 void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
105 int force)
106 {
107 security_file_set_fowner(filp);
108 f_modown(filp, pid, type, force);
109 }
110 EXPORT_SYMBOL(__f_setown);
111
112 int f_setown(struct file *filp, unsigned long arg, int force)
113 {
114 enum pid_type type;
115 struct pid *pid;
116 int who = arg;
117 type = PIDTYPE_PID;
118 if (who < 0) {
119 type = PIDTYPE_PGID;
120 who = -who;
121 }
122 rcu_read_lock();
123 pid = find_vpid(who);
124 __f_setown(filp, pid, type, force);
125 rcu_read_unlock();
126
127 return 0;
128 }
129 EXPORT_SYMBOL(f_setown);
130
131 void f_delown(struct file *filp)
132 {
133 f_modown(filp, NULL, PIDTYPE_PID, 1);
134 }
135
136 pid_t f_getown(struct file *filp)
137 {
138 pid_t pid;
139 read_lock(&filp->f_owner.lock);
140 pid = pid_vnr(filp->f_owner.pid);
141 if (filp->f_owner.pid_type == PIDTYPE_PGID)
142 pid = -pid;
143 read_unlock(&filp->f_owner.lock);
144 return pid;
145 }
146
147 static int f_setown_ex(struct file *filp, unsigned long arg)
148 {
149 struct f_owner_ex __user *owner_p = (void __user *)arg;
150 struct f_owner_ex owner;
151 struct pid *pid;
152 int type;
153 int ret;
154
155 ret = copy_from_user(&owner, owner_p, sizeof(owner));
156 if (ret)
157 return -EFAULT;
158
159 switch (owner.type) {
160 case F_OWNER_TID:
161 type = PIDTYPE_MAX;
162 break;
163
164 case F_OWNER_PID:
165 type = PIDTYPE_PID;
166 break;
167
168 case F_OWNER_PGRP:
169 type = PIDTYPE_PGID;
170 break;
171
172 default:
173 return -EINVAL;
174 }
175
176 rcu_read_lock();
177 pid = find_vpid(owner.pid);
178 if (owner.pid && !pid)
179 ret = -ESRCH;
180 else
181 __f_setown(filp, pid, type, 1);
182 rcu_read_unlock();
183
184 return ret;
185 }
186
187 static int f_getown_ex(struct file *filp, unsigned long arg)
188 {
189 struct f_owner_ex __user *owner_p = (void __user *)arg;
190 struct f_owner_ex owner;
191 int ret = 0;
192
193 read_lock(&filp->f_owner.lock);
194 owner.pid = pid_vnr(filp->f_owner.pid);
195 switch (filp->f_owner.pid_type) {
196 case PIDTYPE_MAX:
197 owner.type = F_OWNER_TID;
198 break;
199
200 case PIDTYPE_PID:
201 owner.type = F_OWNER_PID;
202 break;
203
204 case PIDTYPE_PGID:
205 owner.type = F_OWNER_PGRP;
206 break;
207
208 default:
209 WARN_ON(1);
210 ret = -EINVAL;
211 break;
212 }
213 read_unlock(&filp->f_owner.lock);
214
215 if (!ret) {
216 ret = copy_to_user(owner_p, &owner, sizeof(owner));
217 if (ret)
218 ret = -EFAULT;
219 }
220 return ret;
221 }
222
223 #ifdef CONFIG_CHECKPOINT_RESTORE
224 static int f_getowner_uids(struct file *filp, unsigned long arg)
225 {
226 struct user_namespace *user_ns = current_user_ns();
227 uid_t __user *dst = (void __user *)arg;
228 uid_t src[2];
229 int err;
230
231 read_lock(&filp->f_owner.lock);
232 src[0] = from_kuid(user_ns, filp->f_owner.uid);
233 src[1] = from_kuid(user_ns, filp->f_owner.euid);
234 read_unlock(&filp->f_owner.lock);
235
236 err = put_user(src[0], &dst[0]);
237 err |= put_user(src[1], &dst[1]);
238
239 return err;
240 }
241 #else
242 static int f_getowner_uids(struct file *filp, unsigned long arg)
243 {
244 return -EINVAL;
245 }
246 #endif
247
248 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
249 struct file *filp)
250 {
251 void __user *argp = (void __user *)arg;
252 struct flock flock;
253 long err = -EINVAL;
254
255 switch (cmd) {
256 case F_DUPFD:
257 err = f_dupfd(arg, filp, 0);
258 break;
259 case F_DUPFD_CLOEXEC:
260 err = f_dupfd(arg, filp, O_CLOEXEC);
261 break;
262 case F_GETFD:
263 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
264 break;
265 case F_SETFD:
266 err = 0;
267 set_close_on_exec(fd, arg & FD_CLOEXEC);
268 break;
269 case F_GETFL:
270 err = filp->f_flags;
271 break;
272 case F_SETFL:
273 err = setfl(fd, filp, arg);
274 break;
275 #if BITS_PER_LONG != 32
276 /* 32-bit arches must use fcntl64() */
277 case F_OFD_GETLK:
278 #endif
279 case F_GETLK:
280 if (copy_from_user(&flock, argp, sizeof(flock)))
281 return -EFAULT;
282 err = fcntl_getlk(filp, cmd, &flock);
283 if (!err && copy_to_user(argp, &flock, sizeof(flock)))
284 return -EFAULT;
285 break;
286 #if BITS_PER_LONG != 32
287 /* 32-bit arches must use fcntl64() */
288 case F_OFD_SETLK:
289 case F_OFD_SETLKW:
290 #endif
291 /* Fallthrough */
292 case F_SETLK:
293 case F_SETLKW:
294 if (copy_from_user(&flock, argp, sizeof(flock)))
295 return -EFAULT;
296 err = fcntl_setlk(fd, filp, cmd, &flock);
297 break;
298 case F_GETOWN:
299 /*
300 * XXX If f_owner is a process group, the
301 * negative return value will get converted
302 * into an error. Oops. If we keep the
303 * current syscall conventions, the only way
304 * to fix this will be in libc.
305 */
306 err = f_getown(filp);
307 force_successful_syscall_return();
308 break;
309 case F_SETOWN:
310 err = f_setown(filp, arg, 1);
311 break;
312 case F_GETOWN_EX:
313 err = f_getown_ex(filp, arg);
314 break;
315 case F_SETOWN_EX:
316 err = f_setown_ex(filp, arg);
317 break;
318 case F_GETOWNER_UIDS:
319 err = f_getowner_uids(filp, arg);
320 break;
321 case F_GETSIG:
322 err = filp->f_owner.signum;
323 break;
324 case F_SETSIG:
325 /* arg == 0 restores default behaviour. */
326 if (!valid_signal(arg)) {
327 break;
328 }
329 err = 0;
330 filp->f_owner.signum = arg;
331 break;
332 case F_GETLEASE:
333 err = fcntl_getlease(filp);
334 break;
335 case F_SETLEASE:
336 err = fcntl_setlease(fd, filp, arg);
337 break;
338 case F_NOTIFY:
339 err = fcntl_dirnotify(fd, filp, arg);
340 break;
341 case F_SETPIPE_SZ:
342 case F_GETPIPE_SZ:
343 err = pipe_fcntl(filp, cmd, arg);
344 break;
345 case F_ADD_SEALS:
346 case F_GET_SEALS:
347 err = shmem_fcntl(filp, cmd, arg);
348 break;
349 default:
350 break;
351 }
352 return err;
353 }
354
355 static int check_fcntl_cmd(unsigned cmd)
356 {
357 switch (cmd) {
358 case F_DUPFD:
359 case F_DUPFD_CLOEXEC:
360 case F_GETFD:
361 case F_SETFD:
362 case F_GETFL:
363 return 1;
364 }
365 return 0;
366 }
367
368 SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
369 {
370 struct fd f = fdget_raw(fd);
371 long err = -EBADF;
372
373 if (!f.file)
374 goto out;
375
376 if (unlikely(f.file->f_mode & FMODE_PATH)) {
377 if (!check_fcntl_cmd(cmd))
378 goto out1;
379 }
380
381 err = security_file_fcntl(f.file, cmd, arg);
382 if (!err)
383 err = do_fcntl(fd, cmd, arg, f.file);
384
385 out1:
386 fdput(f);
387 out:
388 return err;
389 }
390
391 #if BITS_PER_LONG == 32
392 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
393 unsigned long, arg)
394 {
395 void __user *argp = (void __user *)arg;
396 struct fd f = fdget_raw(fd);
397 struct flock64 flock;
398 long err = -EBADF;
399
400 if (!f.file)
401 goto out;
402
403 if (unlikely(f.file->f_mode & FMODE_PATH)) {
404 if (!check_fcntl_cmd(cmd))
405 goto out1;
406 }
407
408 err = security_file_fcntl(f.file, cmd, arg);
409 if (err)
410 goto out1;
411
412 switch (cmd) {
413 case F_GETLK64:
414 case F_OFD_GETLK:
415 err = -EFAULT;
416 if (copy_from_user(&flock, argp, sizeof(flock)))
417 break;
418 err = fcntl_getlk64(f.file, cmd, &flock);
419 if (!err && copy_to_user(argp, &flock, sizeof(flock)))
420 err = -EFAULT;
421 break;
422 case F_SETLK64:
423 case F_SETLKW64:
424 case F_OFD_SETLK:
425 case F_OFD_SETLKW:
426 err = -EFAULT;
427 if (copy_from_user(&flock, argp, sizeof(flock)))
428 break;
429 err = fcntl_setlk64(fd, f.file, cmd, &flock);
430 break;
431 default:
432 err = do_fcntl(fd, cmd, arg, f.file);
433 break;
434 }
435 out1:
436 fdput(f);
437 out:
438 return err;
439 }
440 #endif
441
442 #ifdef CONFIG_COMPAT
443 static int get_compat_flock(struct flock *kfl, struct compat_flock __user *ufl)
444 {
445 if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) ||
446 __get_user(kfl->l_type, &ufl->l_type) ||
447 __get_user(kfl->l_whence, &ufl->l_whence) ||
448 __get_user(kfl->l_start, &ufl->l_start) ||
449 __get_user(kfl->l_len, &ufl->l_len) ||
450 __get_user(kfl->l_pid, &ufl->l_pid))
451 return -EFAULT;
452 return 0;
453 }
454
455 static int put_compat_flock(struct flock *kfl, struct compat_flock __user *ufl)
456 {
457 if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) ||
458 __put_user(kfl->l_type, &ufl->l_type) ||
459 __put_user(kfl->l_whence, &ufl->l_whence) ||
460 __put_user(kfl->l_start, &ufl->l_start) ||
461 __put_user(kfl->l_len, &ufl->l_len) ||
462 __put_user(kfl->l_pid, &ufl->l_pid))
463 return -EFAULT;
464 return 0;
465 }
466
467 #ifndef HAVE_ARCH_GET_COMPAT_FLOCK64
468 static int get_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl)
469 {
470 if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) ||
471 __get_user(kfl->l_type, &ufl->l_type) ||
472 __get_user(kfl->l_whence, &ufl->l_whence) ||
473 __get_user(kfl->l_start, &ufl->l_start) ||
474 __get_user(kfl->l_len, &ufl->l_len) ||
475 __get_user(kfl->l_pid, &ufl->l_pid))
476 return -EFAULT;
477 return 0;
478 }
479 #endif
480
481 #ifndef HAVE_ARCH_PUT_COMPAT_FLOCK64
482 static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl)
483 {
484 if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) ||
485 __put_user(kfl->l_type, &ufl->l_type) ||
486 __put_user(kfl->l_whence, &ufl->l_whence) ||
487 __put_user(kfl->l_start, &ufl->l_start) ||
488 __put_user(kfl->l_len, &ufl->l_len) ||
489 __put_user(kfl->l_pid, &ufl->l_pid))
490 return -EFAULT;
491 return 0;
492 }
493 #endif
494
495 static unsigned int
496 convert_fcntl_cmd(unsigned int cmd)
497 {
498 switch (cmd) {
499 case F_GETLK64:
500 return F_GETLK;
501 case F_SETLK64:
502 return F_SETLK;
503 case F_SETLKW64:
504 return F_SETLKW;
505 }
506
507 return cmd;
508 }
509
510 /*
511 * GETLK was successful and we need to return the data, but it needs to fit in
512 * the compat structure.
513 * l_start shouldn't be too big, unless the original start + end is greater than
514 * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return
515 * -EOVERFLOW in that case. l_len could be too big, in which case we just
516 * truncate it, and only allow the app to see that part of the conflicting lock
517 * that might make sense to it anyway
518 */
519 static int fixup_compat_flock(struct flock *flock)
520 {
521 if (flock->l_start > COMPAT_OFF_T_MAX)
522 return -EOVERFLOW;
523 if (flock->l_len > COMPAT_OFF_T_MAX)
524 flock->l_len = COMPAT_OFF_T_MAX;
525 return 0;
526 }
527
528 COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
529 compat_ulong_t, arg)
530 {
531 struct fd f = fdget_raw(fd);
532 struct flock flock;
533 long err = -EBADF;
534
535 if (!f.file)
536 return err;
537
538 if (unlikely(f.file->f_mode & FMODE_PATH)) {
539 if (!check_fcntl_cmd(cmd))
540 goto out_put;
541 }
542
543 err = security_file_fcntl(f.file, cmd, arg);
544 if (err)
545 goto out_put;
546
547 switch (cmd) {
548 case F_GETLK:
549 err = get_compat_flock(&flock, compat_ptr(arg));
550 if (err)
551 break;
552 err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
553 if (err)
554 break;
555 err = fixup_compat_flock(&flock);
556 if (err)
557 return err;
558 err = put_compat_flock(&flock, compat_ptr(arg));
559 break;
560 case F_GETLK64:
561 case F_OFD_GETLK:
562 err = get_compat_flock64(&flock, compat_ptr(arg));
563 if (err)
564 break;
565 err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
566 if (err)
567 break;
568 err = fixup_compat_flock(&flock);
569 if (err)
570 return err;
571 err = put_compat_flock64(&flock, compat_ptr(arg));
572 break;
573 case F_SETLK:
574 case F_SETLKW:
575 err = get_compat_flock(&flock, compat_ptr(arg));
576 if (err)
577 break;
578 err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
579 break;
580 case F_SETLK64:
581 case F_SETLKW64:
582 case F_OFD_SETLK:
583 case F_OFD_SETLKW:
584 err = get_compat_flock64(&flock, compat_ptr(arg));
585 if (err)
586 break;
587 err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
588 break;
589 default:
590 err = do_fcntl(fd, cmd, arg, f.file);
591 break;
592 }
593 out_put:
594 fdput(f);
595 return err;
596 }
597
598 COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
599 compat_ulong_t, arg)
600 {
601 switch (cmd) {
602 case F_GETLK64:
603 case F_SETLK64:
604 case F_SETLKW64:
605 case F_OFD_GETLK:
606 case F_OFD_SETLK:
607 case F_OFD_SETLKW:
608 return -EINVAL;
609 }
610 return compat_sys_fcntl64(fd, cmd, arg);
611 }
612 #endif
613
614 /* Table to convert sigio signal codes into poll band bitmaps */
615
616 static const long band_table[NSIGPOLL] = {
617 POLLIN | POLLRDNORM, /* POLL_IN */
618 POLLOUT | POLLWRNORM | POLLWRBAND, /* POLL_OUT */
619 POLLIN | POLLRDNORM | POLLMSG, /* POLL_MSG */
620 POLLERR, /* POLL_ERR */
621 POLLPRI | POLLRDBAND, /* POLL_PRI */
622 POLLHUP | POLLERR /* POLL_HUP */
623 };
624
625 static inline int sigio_perm(struct task_struct *p,
626 struct fown_struct *fown, int sig)
627 {
628 const struct cred *cred;
629 int ret;
630
631 rcu_read_lock();
632 cred = __task_cred(p);
633 ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) ||
634 uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) ||
635 uid_eq(fown->uid, cred->suid) || uid_eq(fown->uid, cred->uid)) &&
636 !security_file_send_sigiotask(p, fown, sig));
637 rcu_read_unlock();
638 return ret;
639 }
640
641 static void send_sigio_to_task(struct task_struct *p,
642 struct fown_struct *fown,
643 int fd, int reason, int group)
644 {
645 /*
646 * F_SETSIG can change ->signum lockless in parallel, make
647 * sure we read it once and use the same value throughout.
648 */
649 int signum = ACCESS_ONCE(fown->signum);
650
651 if (!sigio_perm(p, fown, signum))
652 return;
653
654 switch (signum) {
655 siginfo_t si;
656 default:
657 /* Queue a rt signal with the appropriate fd as its
658 value. We use SI_SIGIO as the source, not
659 SI_KERNEL, since kernel signals always get
660 delivered even if we can't queue. Failure to
661 queue in this case _should_ be reported; we fall
662 back to SIGIO in that case. --sct */
663 si.si_signo = signum;
664 si.si_errno = 0;
665 si.si_code = reason;
666 /* Make sure we are called with one of the POLL_*
667 reasons, otherwise we could leak kernel stack into
668 userspace. */
669 BUG_ON((reason & __SI_MASK) != __SI_POLL);
670 if (reason - POLL_IN >= NSIGPOLL)
671 si.si_band = ~0L;
672 else
673 si.si_band = band_table[reason - POLL_IN];
674 si.si_fd = fd;
675 if (!do_send_sig_info(signum, &si, p, group))
676 break;
677 /* fall-through: fall back on the old plain SIGIO signal */
678 case 0:
679 do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group);
680 }
681 }
682
683 void send_sigio(struct fown_struct *fown, int fd, int band)
684 {
685 struct task_struct *p;
686 enum pid_type type;
687 struct pid *pid;
688 int group = 1;
689
690 read_lock(&fown->lock);
691
692 type = fown->pid_type;
693 if (type == PIDTYPE_MAX) {
694 group = 0;
695 type = PIDTYPE_PID;
696 }
697
698 pid = fown->pid;
699 if (!pid)
700 goto out_unlock_fown;
701
702 read_lock(&tasklist_lock);
703 do_each_pid_task(pid, type, p) {
704 send_sigio_to_task(p, fown, fd, band, group);
705 } while_each_pid_task(pid, type, p);
706 read_unlock(&tasklist_lock);
707 out_unlock_fown:
708 read_unlock(&fown->lock);
709 }
710
711 static void send_sigurg_to_task(struct task_struct *p,
712 struct fown_struct *fown, int group)
713 {
714 if (sigio_perm(p, fown, SIGURG))
715 do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group);
716 }
717
718 int send_sigurg(struct fown_struct *fown)
719 {
720 struct task_struct *p;
721 enum pid_type type;
722 struct pid *pid;
723 int group = 1;
724 int ret = 0;
725
726 read_lock(&fown->lock);
727
728 type = fown->pid_type;
729 if (type == PIDTYPE_MAX) {
730 group = 0;
731 type = PIDTYPE_PID;
732 }
733
734 pid = fown->pid;
735 if (!pid)
736 goto out_unlock_fown;
737
738 ret = 1;
739
740 read_lock(&tasklist_lock);
741 do_each_pid_task(pid, type, p) {
742 send_sigurg_to_task(p, fown, group);
743 } while_each_pid_task(pid, type, p);
744 read_unlock(&tasklist_lock);
745 out_unlock_fown:
746 read_unlock(&fown->lock);
747 return ret;
748 }
749
750 static DEFINE_SPINLOCK(fasync_lock);
751 static struct kmem_cache *fasync_cache __read_mostly;
752
753 static void fasync_free_rcu(struct rcu_head *head)
754 {
755 kmem_cache_free(fasync_cache,
756 container_of(head, struct fasync_struct, fa_rcu));
757 }
758
759 /*
760 * Remove a fasync entry. If successfully removed, return
761 * positive and clear the FASYNC flag. If no entry exists,
762 * do nothing and return 0.
763 *
764 * NOTE! It is very important that the FASYNC flag always
765 * match the state "is the filp on a fasync list".
766 *
767 */
768 int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
769 {
770 struct fasync_struct *fa, **fp;
771 int result = 0;
772
773 spin_lock(&filp->f_lock);
774 spin_lock(&fasync_lock);
775 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
776 if (fa->fa_file != filp)
777 continue;
778
779 spin_lock_irq(&fa->fa_lock);
780 fa->fa_file = NULL;
781 spin_unlock_irq(&fa->fa_lock);
782
783 *fp = fa->fa_next;
784 call_rcu(&fa->fa_rcu, fasync_free_rcu);
785 filp->f_flags &= ~FASYNC;
786 result = 1;
787 break;
788 }
789 spin_unlock(&fasync_lock);
790 spin_unlock(&filp->f_lock);
791 return result;
792 }
793
794 struct fasync_struct *fasync_alloc(void)
795 {
796 return kmem_cache_alloc(fasync_cache, GFP_KERNEL);
797 }
798
799 /*
800 * NOTE! This can be used only for unused fasync entries:
801 * entries that actually got inserted on the fasync list
802 * need to be released by rcu - see fasync_remove_entry.
803 */
804 void fasync_free(struct fasync_struct *new)
805 {
806 kmem_cache_free(fasync_cache, new);
807 }
808
809 /*
810 * Insert a new entry into the fasync list. Return the pointer to the
811 * old one if we didn't use the new one.
812 *
813 * NOTE! It is very important that the FASYNC flag always
814 * match the state "is the filp on a fasync list".
815 */
816 struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
817 {
818 struct fasync_struct *fa, **fp;
819
820 spin_lock(&filp->f_lock);
821 spin_lock(&fasync_lock);
822 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
823 if (fa->fa_file != filp)
824 continue;
825
826 spin_lock_irq(&fa->fa_lock);
827 fa->fa_fd = fd;
828 spin_unlock_irq(&fa->fa_lock);
829 goto out;
830 }
831
832 spin_lock_init(&new->fa_lock);
833 new->magic = FASYNC_MAGIC;
834 new->fa_file = filp;
835 new->fa_fd = fd;
836 new->fa_next = *fapp;
837 rcu_assign_pointer(*fapp, new);
838 filp->f_flags |= FASYNC;
839
840 out:
841 spin_unlock(&fasync_lock);
842 spin_unlock(&filp->f_lock);
843 return fa;
844 }
845
846 /*
847 * Add a fasync entry. Return negative on error, positive if
848 * added, and zero if did nothing but change an existing one.
849 */
850 static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
851 {
852 struct fasync_struct *new;
853
854 new = fasync_alloc();
855 if (!new)
856 return -ENOMEM;
857
858 /*
859 * fasync_insert_entry() returns the old (update) entry if
860 * it existed.
861 *
862 * So free the (unused) new entry and return 0 to let the
863 * caller know that we didn't add any new fasync entries.
864 */
865 if (fasync_insert_entry(fd, filp, fapp, new)) {
866 fasync_free(new);
867 return 0;
868 }
869
870 return 1;
871 }
872
873 /*
874 * fasync_helper() is used by almost all character device drivers
875 * to set up the fasync queue, and for regular files by the file
876 * lease code. It returns negative on error, 0 if it did no changes
877 * and positive if it added/deleted the entry.
878 */
879 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
880 {
881 if (!on)
882 return fasync_remove_entry(filp, fapp);
883 return fasync_add_entry(fd, filp, fapp);
884 }
885
886 EXPORT_SYMBOL(fasync_helper);
887
888 /*
889 * rcu_read_lock() is held
890 */
891 static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
892 {
893 while (fa) {
894 struct fown_struct *fown;
895 unsigned long flags;
896
897 if (fa->magic != FASYNC_MAGIC) {
898 printk(KERN_ERR "kill_fasync: bad magic number in "
899 "fasync_struct!\n");
900 return;
901 }
902 spin_lock_irqsave(&fa->fa_lock, flags);
903 if (fa->fa_file) {
904 fown = &fa->fa_file->f_owner;
905 /* Don't send SIGURG to processes which have not set a
906 queued signum: SIGURG has its own default signalling
907 mechanism. */
908 if (!(sig == SIGURG && fown->signum == 0))
909 send_sigio(fown, fa->fa_fd, band);
910 }
911 spin_unlock_irqrestore(&fa->fa_lock, flags);
912 fa = rcu_dereference(fa->fa_next);
913 }
914 }
915
916 void kill_fasync(struct fasync_struct **fp, int sig, int band)
917 {
918 /* First a quick test without locking: usually
919 * the list is empty.
920 */
921 if (*fp) {
922 rcu_read_lock();
923 kill_fasync_rcu(rcu_dereference(*fp), sig, band);
924 rcu_read_unlock();
925 }
926 }
927 EXPORT_SYMBOL(kill_fasync);
928
929 static int __init fcntl_init(void)
930 {
931 /*
932 * Please add new bits here to ensure allocation uniqueness.
933 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
934 * is defined as O_NONBLOCK on some platforms and not on others.
935 */
936 BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ !=
937 HWEIGHT32(
938 (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
939 __FMODE_EXEC | __FMODE_NONOTIFY));
940
941 fasync_cache = kmem_cache_create("fasync_cache",
942 sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL);
943 return 0;
944 }
945
946 module_init(fcntl_init)