]> git.proxmox.com Git - mirror_ubuntu-eoan-kernel.git/blob - fs/pipe.c
pipe: add support for shrinking and growing pipes
[mirror_ubuntu-eoan-kernel.git] / fs / pipe.c
1 /*
2 * linux/fs/pipe.c
3 *
4 * Copyright (C) 1991, 1992, 1999 Linus Torvalds
5 */
6
7 #include <linux/mm.h>
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/fs.h>
14 #include <linux/log2.h>
15 #include <linux/mount.h>
16 #include <linux/pipe_fs_i.h>
17 #include <linux/uio.h>
18 #include <linux/highmem.h>
19 #include <linux/pagemap.h>
20 #include <linux/audit.h>
21 #include <linux/syscalls.h>
22
23 #include <asm/uaccess.h>
24 #include <asm/ioctls.h>
25
26 /*
27 * We use a start+len construction, which provides full use of the
28 * allocated memory.
29 * -- Florian Coosmann (FGC)
30 *
31 * Reads with count = 0 should always return 0.
32 * -- Julian Bradfield 1999-06-07.
33 *
34 * FIFOs and Pipes now generate SIGIO for both readers and writers.
35 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
36 *
37 * pipe_read & write cleanup
38 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
39 */
40
41 static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
42 {
43 if (pipe->inode)
44 mutex_lock_nested(&pipe->inode->i_mutex, subclass);
45 }
46
47 void pipe_lock(struct pipe_inode_info *pipe)
48 {
49 /*
50 * pipe_lock() nests non-pipe inode locks (for writing to a file)
51 */
52 pipe_lock_nested(pipe, I_MUTEX_PARENT);
53 }
54 EXPORT_SYMBOL(pipe_lock);
55
56 void pipe_unlock(struct pipe_inode_info *pipe)
57 {
58 if (pipe->inode)
59 mutex_unlock(&pipe->inode->i_mutex);
60 }
61 EXPORT_SYMBOL(pipe_unlock);
62
63 void pipe_double_lock(struct pipe_inode_info *pipe1,
64 struct pipe_inode_info *pipe2)
65 {
66 BUG_ON(pipe1 == pipe2);
67
68 if (pipe1 < pipe2) {
69 pipe_lock_nested(pipe1, I_MUTEX_PARENT);
70 pipe_lock_nested(pipe2, I_MUTEX_CHILD);
71 } else {
72 pipe_lock_nested(pipe2, I_MUTEX_PARENT);
73 pipe_lock_nested(pipe1, I_MUTEX_CHILD);
74 }
75 }
76
77 /* Drop the inode semaphore and wait for a pipe event, atomically */
78 void pipe_wait(struct pipe_inode_info *pipe)
79 {
80 DEFINE_WAIT(wait);
81
82 /*
83 * Pipes are system-local resources, so sleeping on them
84 * is considered a noninteractive wait:
85 */
86 prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
87 pipe_unlock(pipe);
88 schedule();
89 finish_wait(&pipe->wait, &wait);
90 pipe_lock(pipe);
91 }
92
93 static int
94 pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
95 int atomic)
96 {
97 unsigned long copy;
98
99 while (len > 0) {
100 while (!iov->iov_len)
101 iov++;
102 copy = min_t(unsigned long, len, iov->iov_len);
103
104 if (atomic) {
105 if (__copy_from_user_inatomic(to, iov->iov_base, copy))
106 return -EFAULT;
107 } else {
108 if (copy_from_user(to, iov->iov_base, copy))
109 return -EFAULT;
110 }
111 to += copy;
112 len -= copy;
113 iov->iov_base += copy;
114 iov->iov_len -= copy;
115 }
116 return 0;
117 }
118
119 static int
120 pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len,
121 int atomic)
122 {
123 unsigned long copy;
124
125 while (len > 0) {
126 while (!iov->iov_len)
127 iov++;
128 copy = min_t(unsigned long, len, iov->iov_len);
129
130 if (atomic) {
131 if (__copy_to_user_inatomic(iov->iov_base, from, copy))
132 return -EFAULT;
133 } else {
134 if (copy_to_user(iov->iov_base, from, copy))
135 return -EFAULT;
136 }
137 from += copy;
138 len -= copy;
139 iov->iov_base += copy;
140 iov->iov_len -= copy;
141 }
142 return 0;
143 }
144
145 /*
146 * Attempt to pre-fault in the user memory, so we can use atomic copies.
147 * Returns the number of bytes not faulted in.
148 */
149 static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len)
150 {
151 while (!iov->iov_len)
152 iov++;
153
154 while (len > 0) {
155 unsigned long this_len;
156
157 this_len = min_t(unsigned long, len, iov->iov_len);
158 if (fault_in_pages_writeable(iov->iov_base, this_len))
159 break;
160
161 len -= this_len;
162 iov++;
163 }
164
165 return len;
166 }
167
168 /*
169 * Pre-fault in the user memory, so we can use atomic copies.
170 */
171 static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len)
172 {
173 while (!iov->iov_len)
174 iov++;
175
176 while (len > 0) {
177 unsigned long this_len;
178
179 this_len = min_t(unsigned long, len, iov->iov_len);
180 fault_in_pages_readable(iov->iov_base, this_len);
181 len -= this_len;
182 iov++;
183 }
184 }
185
186 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
187 struct pipe_buffer *buf)
188 {
189 struct page *page = buf->page;
190
191 /*
192 * If nobody else uses this page, and we don't already have a
193 * temporary page, let's keep track of it as a one-deep
194 * allocation cache. (Otherwise just release our reference to it)
195 */
196 if (page_count(page) == 1 && !pipe->tmp_page)
197 pipe->tmp_page = page;
198 else
199 page_cache_release(page);
200 }
201
202 /**
203 * generic_pipe_buf_map - virtually map a pipe buffer
204 * @pipe: the pipe that the buffer belongs to
205 * @buf: the buffer that should be mapped
206 * @atomic: whether to use an atomic map
207 *
208 * Description:
209 * This function returns a kernel virtual address mapping for the
210 * pipe_buffer passed in @buf. If @atomic is set, an atomic map is provided
211 * and the caller has to be careful not to fault before calling
212 * the unmap function.
213 *
214 * Note that this function occupies KM_USER0 if @atomic != 0.
215 */
216 void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
217 struct pipe_buffer *buf, int atomic)
218 {
219 if (atomic) {
220 buf->flags |= PIPE_BUF_FLAG_ATOMIC;
221 return kmap_atomic(buf->page, KM_USER0);
222 }
223
224 return kmap(buf->page);
225 }
226
227 /**
228 * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer
229 * @pipe: the pipe that the buffer belongs to
230 * @buf: the buffer that should be unmapped
231 * @map_data: the data that the mapping function returned
232 *
233 * Description:
234 * This function undoes the mapping that ->map() provided.
235 */
236 void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
237 struct pipe_buffer *buf, void *map_data)
238 {
239 if (buf->flags & PIPE_BUF_FLAG_ATOMIC) {
240 buf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
241 kunmap_atomic(map_data, KM_USER0);
242 } else
243 kunmap(buf->page);
244 }
245
246 /**
247 * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer
248 * @pipe: the pipe that the buffer belongs to
249 * @buf: the buffer to attempt to steal
250 *
251 * Description:
252 * This function attempts to steal the &struct page attached to
253 * @buf. If successful, this function returns 0 and returns with
254 * the page locked. The caller may then reuse the page for whatever
255 * he wishes; the typical use is insertion into a different file
256 * page cache.
257 */
258 int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
259 struct pipe_buffer *buf)
260 {
261 struct page *page = buf->page;
262
263 /*
264 * A reference of one is golden, that means that the owner of this
265 * page is the only one holding a reference to it. lock the page
266 * and return OK.
267 */
268 if (page_count(page) == 1) {
269 lock_page(page);
270 return 0;
271 }
272
273 return 1;
274 }
275
276 /**
277 * generic_pipe_buf_get - get a reference to a &struct pipe_buffer
278 * @pipe: the pipe that the buffer belongs to
279 * @buf: the buffer to get a reference to
280 *
281 * Description:
282 * This function grabs an extra reference to @buf. It's used in
283 * in the tee() system call, when we duplicate the buffers in one
284 * pipe into another.
285 */
286 void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
287 {
288 page_cache_get(buf->page);
289 }
290
291 /**
292 * generic_pipe_buf_confirm - verify contents of the pipe buffer
293 * @info: the pipe that the buffer belongs to
294 * @buf: the buffer to confirm
295 *
296 * Description:
297 * This function does nothing, because the generic pipe code uses
298 * pages that are always good when inserted into the pipe.
299 */
300 int generic_pipe_buf_confirm(struct pipe_inode_info *info,
301 struct pipe_buffer *buf)
302 {
303 return 0;
304 }
305
306 /**
307 * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
308 * @pipe: the pipe that the buffer belongs to
309 * @buf: the buffer to put a reference to
310 *
311 * Description:
312 * This function releases a reference to @buf.
313 */
314 void generic_pipe_buf_release(struct pipe_inode_info *pipe,
315 struct pipe_buffer *buf)
316 {
317 page_cache_release(buf->page);
318 }
319
320 static const struct pipe_buf_operations anon_pipe_buf_ops = {
321 .can_merge = 1,
322 .map = generic_pipe_buf_map,
323 .unmap = generic_pipe_buf_unmap,
324 .confirm = generic_pipe_buf_confirm,
325 .release = anon_pipe_buf_release,
326 .steal = generic_pipe_buf_steal,
327 .get = generic_pipe_buf_get,
328 };
329
330 static ssize_t
331 pipe_read(struct kiocb *iocb, const struct iovec *_iov,
332 unsigned long nr_segs, loff_t pos)
333 {
334 struct file *filp = iocb->ki_filp;
335 struct inode *inode = filp->f_path.dentry->d_inode;
336 struct pipe_inode_info *pipe;
337 int do_wakeup;
338 ssize_t ret;
339 struct iovec *iov = (struct iovec *)_iov;
340 size_t total_len;
341
342 total_len = iov_length(iov, nr_segs);
343 /* Null read succeeds. */
344 if (unlikely(total_len == 0))
345 return 0;
346
347 do_wakeup = 0;
348 ret = 0;
349 mutex_lock(&inode->i_mutex);
350 pipe = inode->i_pipe;
351 for (;;) {
352 int bufs = pipe->nrbufs;
353 if (bufs) {
354 int curbuf = pipe->curbuf;
355 struct pipe_buffer *buf = pipe->bufs + curbuf;
356 const struct pipe_buf_operations *ops = buf->ops;
357 void *addr;
358 size_t chars = buf->len;
359 int error, atomic;
360
361 if (chars > total_len)
362 chars = total_len;
363
364 error = ops->confirm(pipe, buf);
365 if (error) {
366 if (!ret)
367 error = ret;
368 break;
369 }
370
371 atomic = !iov_fault_in_pages_write(iov, chars);
372 redo:
373 addr = ops->map(pipe, buf, atomic);
374 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic);
375 ops->unmap(pipe, buf, addr);
376 if (unlikely(error)) {
377 /*
378 * Just retry with the slow path if we failed.
379 */
380 if (atomic) {
381 atomic = 0;
382 goto redo;
383 }
384 if (!ret)
385 ret = error;
386 break;
387 }
388 ret += chars;
389 buf->offset += chars;
390 buf->len -= chars;
391 if (!buf->len) {
392 buf->ops = NULL;
393 ops->release(pipe, buf);
394 curbuf = (curbuf + 1) & (pipe->buffers - 1);
395 pipe->curbuf = curbuf;
396 pipe->nrbufs = --bufs;
397 do_wakeup = 1;
398 }
399 total_len -= chars;
400 if (!total_len)
401 break; /* common path: read succeeded */
402 }
403 if (bufs) /* More to do? */
404 continue;
405 if (!pipe->writers)
406 break;
407 if (!pipe->waiting_writers) {
408 /* syscall merging: Usually we must not sleep
409 * if O_NONBLOCK is set, or if we got some data.
410 * But if a writer sleeps in kernel space, then
411 * we can wait for that data without violating POSIX.
412 */
413 if (ret)
414 break;
415 if (filp->f_flags & O_NONBLOCK) {
416 ret = -EAGAIN;
417 break;
418 }
419 }
420 if (signal_pending(current)) {
421 if (!ret)
422 ret = -ERESTARTSYS;
423 break;
424 }
425 if (do_wakeup) {
426 wake_up_interruptible_sync(&pipe->wait);
427 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
428 }
429 pipe_wait(pipe);
430 }
431 mutex_unlock(&inode->i_mutex);
432
433 /* Signal writers asynchronously that there is more room. */
434 if (do_wakeup) {
435 wake_up_interruptible_sync(&pipe->wait);
436 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
437 }
438 if (ret > 0)
439 file_accessed(filp);
440 return ret;
441 }
442
443 static ssize_t
444 pipe_write(struct kiocb *iocb, const struct iovec *_iov,
445 unsigned long nr_segs, loff_t ppos)
446 {
447 struct file *filp = iocb->ki_filp;
448 struct inode *inode = filp->f_path.dentry->d_inode;
449 struct pipe_inode_info *pipe;
450 ssize_t ret;
451 int do_wakeup;
452 struct iovec *iov = (struct iovec *)_iov;
453 size_t total_len;
454 ssize_t chars;
455
456 total_len = iov_length(iov, nr_segs);
457 /* Null write succeeds. */
458 if (unlikely(total_len == 0))
459 return 0;
460
461 do_wakeup = 0;
462 ret = 0;
463 mutex_lock(&inode->i_mutex);
464 pipe = inode->i_pipe;
465
466 if (!pipe->readers) {
467 send_sig(SIGPIPE, current, 0);
468 ret = -EPIPE;
469 goto out;
470 }
471
472 /* We try to merge small writes */
473 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
474 if (pipe->nrbufs && chars != 0) {
475 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
476 (pipe->buffers - 1);
477 struct pipe_buffer *buf = pipe->bufs + lastbuf;
478 const struct pipe_buf_operations *ops = buf->ops;
479 int offset = buf->offset + buf->len;
480
481 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
482 int error, atomic = 1;
483 void *addr;
484
485 error = ops->confirm(pipe, buf);
486 if (error)
487 goto out;
488
489 iov_fault_in_pages_read(iov, chars);
490 redo1:
491 addr = ops->map(pipe, buf, atomic);
492 error = pipe_iov_copy_from_user(offset + addr, iov,
493 chars, atomic);
494 ops->unmap(pipe, buf, addr);
495 ret = error;
496 do_wakeup = 1;
497 if (error) {
498 if (atomic) {
499 atomic = 0;
500 goto redo1;
501 }
502 goto out;
503 }
504 buf->len += chars;
505 total_len -= chars;
506 ret = chars;
507 if (!total_len)
508 goto out;
509 }
510 }
511
512 for (;;) {
513 int bufs;
514
515 if (!pipe->readers) {
516 send_sig(SIGPIPE, current, 0);
517 if (!ret)
518 ret = -EPIPE;
519 break;
520 }
521 bufs = pipe->nrbufs;
522 if (bufs < pipe->buffers) {
523 int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1);
524 struct pipe_buffer *buf = pipe->bufs + newbuf;
525 struct page *page = pipe->tmp_page;
526 char *src;
527 int error, atomic = 1;
528
529 if (!page) {
530 page = alloc_page(GFP_HIGHUSER);
531 if (unlikely(!page)) {
532 ret = ret ? : -ENOMEM;
533 break;
534 }
535 pipe->tmp_page = page;
536 }
537 /* Always wake up, even if the copy fails. Otherwise
538 * we lock up (O_NONBLOCK-)readers that sleep due to
539 * syscall merging.
540 * FIXME! Is this really true?
541 */
542 do_wakeup = 1;
543 chars = PAGE_SIZE;
544 if (chars > total_len)
545 chars = total_len;
546
547 iov_fault_in_pages_read(iov, chars);
548 redo2:
549 if (atomic)
550 src = kmap_atomic(page, KM_USER0);
551 else
552 src = kmap(page);
553
554 error = pipe_iov_copy_from_user(src, iov, chars,
555 atomic);
556 if (atomic)
557 kunmap_atomic(src, KM_USER0);
558 else
559 kunmap(page);
560
561 if (unlikely(error)) {
562 if (atomic) {
563 atomic = 0;
564 goto redo2;
565 }
566 if (!ret)
567 ret = error;
568 break;
569 }
570 ret += chars;
571
572 /* Insert it into the buffer array */
573 buf->page = page;
574 buf->ops = &anon_pipe_buf_ops;
575 buf->offset = 0;
576 buf->len = chars;
577 pipe->nrbufs = ++bufs;
578 pipe->tmp_page = NULL;
579
580 total_len -= chars;
581 if (!total_len)
582 break;
583 }
584 if (bufs < pipe->buffers)
585 continue;
586 if (filp->f_flags & O_NONBLOCK) {
587 if (!ret)
588 ret = -EAGAIN;
589 break;
590 }
591 if (signal_pending(current)) {
592 if (!ret)
593 ret = -ERESTARTSYS;
594 break;
595 }
596 if (do_wakeup) {
597 wake_up_interruptible_sync(&pipe->wait);
598 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
599 do_wakeup = 0;
600 }
601 pipe->waiting_writers++;
602 pipe_wait(pipe);
603 pipe->waiting_writers--;
604 }
605 out:
606 mutex_unlock(&inode->i_mutex);
607 if (do_wakeup) {
608 wake_up_interruptible_sync(&pipe->wait);
609 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
610 }
611 if (ret > 0)
612 file_update_time(filp);
613 return ret;
614 }
615
616 static ssize_t
617 bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
618 {
619 return -EBADF;
620 }
621
622 static ssize_t
623 bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
624 loff_t *ppos)
625 {
626 return -EBADF;
627 }
628
629 static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
630 {
631 struct inode *inode = filp->f_path.dentry->d_inode;
632 struct pipe_inode_info *pipe;
633 int count, buf, nrbufs;
634
635 switch (cmd) {
636 case FIONREAD:
637 mutex_lock(&inode->i_mutex);
638 pipe = inode->i_pipe;
639 count = 0;
640 buf = pipe->curbuf;
641 nrbufs = pipe->nrbufs;
642 while (--nrbufs >= 0) {
643 count += pipe->bufs[buf].len;
644 buf = (buf+1) & (pipe->buffers - 1);
645 }
646 mutex_unlock(&inode->i_mutex);
647
648 return put_user(count, (int __user *)arg);
649 default:
650 return -EINVAL;
651 }
652 }
653
654 /* No kernel lock held - fine */
655 static unsigned int
656 pipe_poll(struct file *filp, poll_table *wait)
657 {
658 unsigned int mask;
659 struct inode *inode = filp->f_path.dentry->d_inode;
660 struct pipe_inode_info *pipe = inode->i_pipe;
661 int nrbufs;
662
663 poll_wait(filp, &pipe->wait, wait);
664
665 /* Reading only -- no need for acquiring the semaphore. */
666 nrbufs = pipe->nrbufs;
667 mask = 0;
668 if (filp->f_mode & FMODE_READ) {
669 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
670 if (!pipe->writers && filp->f_version != pipe->w_counter)
671 mask |= POLLHUP;
672 }
673
674 if (filp->f_mode & FMODE_WRITE) {
675 mask |= (nrbufs < pipe->buffers) ? POLLOUT | POLLWRNORM : 0;
676 /*
677 * Most Unices do not set POLLERR for FIFOs but on Linux they
678 * behave exactly like pipes for poll().
679 */
680 if (!pipe->readers)
681 mask |= POLLERR;
682 }
683
684 return mask;
685 }
686
687 static int
688 pipe_release(struct inode *inode, int decr, int decw)
689 {
690 struct pipe_inode_info *pipe;
691
692 mutex_lock(&inode->i_mutex);
693 pipe = inode->i_pipe;
694 pipe->readers -= decr;
695 pipe->writers -= decw;
696
697 if (!pipe->readers && !pipe->writers) {
698 free_pipe_info(inode);
699 } else {
700 wake_up_interruptible_sync(&pipe->wait);
701 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
702 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
703 }
704 mutex_unlock(&inode->i_mutex);
705
706 return 0;
707 }
708
709 static int
710 pipe_read_fasync(int fd, struct file *filp, int on)
711 {
712 struct inode *inode = filp->f_path.dentry->d_inode;
713 int retval;
714
715 mutex_lock(&inode->i_mutex);
716 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
717 mutex_unlock(&inode->i_mutex);
718
719 return retval;
720 }
721
722
723 static int
724 pipe_write_fasync(int fd, struct file *filp, int on)
725 {
726 struct inode *inode = filp->f_path.dentry->d_inode;
727 int retval;
728
729 mutex_lock(&inode->i_mutex);
730 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers);
731 mutex_unlock(&inode->i_mutex);
732
733 return retval;
734 }
735
736
737 static int
738 pipe_rdwr_fasync(int fd, struct file *filp, int on)
739 {
740 struct inode *inode = filp->f_path.dentry->d_inode;
741 struct pipe_inode_info *pipe = inode->i_pipe;
742 int retval;
743
744 mutex_lock(&inode->i_mutex);
745 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
746 if (retval >= 0) {
747 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
748 if (retval < 0) /* this can happen only if on == T */
749 fasync_helper(-1, filp, 0, &pipe->fasync_readers);
750 }
751 mutex_unlock(&inode->i_mutex);
752 return retval;
753 }
754
755
756 static int
757 pipe_read_release(struct inode *inode, struct file *filp)
758 {
759 return pipe_release(inode, 1, 0);
760 }
761
762 static int
763 pipe_write_release(struct inode *inode, struct file *filp)
764 {
765 return pipe_release(inode, 0, 1);
766 }
767
768 static int
769 pipe_rdwr_release(struct inode *inode, struct file *filp)
770 {
771 int decr, decw;
772
773 decr = (filp->f_mode & FMODE_READ) != 0;
774 decw = (filp->f_mode & FMODE_WRITE) != 0;
775 return pipe_release(inode, decr, decw);
776 }
777
778 static int
779 pipe_read_open(struct inode *inode, struct file *filp)
780 {
781 int ret = -ENOENT;
782
783 mutex_lock(&inode->i_mutex);
784
785 if (inode->i_pipe) {
786 ret = 0;
787 inode->i_pipe->readers++;
788 }
789
790 mutex_unlock(&inode->i_mutex);
791
792 return ret;
793 }
794
795 static int
796 pipe_write_open(struct inode *inode, struct file *filp)
797 {
798 int ret = -ENOENT;
799
800 mutex_lock(&inode->i_mutex);
801
802 if (inode->i_pipe) {
803 ret = 0;
804 inode->i_pipe->writers++;
805 }
806
807 mutex_unlock(&inode->i_mutex);
808
809 return ret;
810 }
811
812 static int
813 pipe_rdwr_open(struct inode *inode, struct file *filp)
814 {
815 int ret = -ENOENT;
816
817 mutex_lock(&inode->i_mutex);
818
819 if (inode->i_pipe) {
820 ret = 0;
821 if (filp->f_mode & FMODE_READ)
822 inode->i_pipe->readers++;
823 if (filp->f_mode & FMODE_WRITE)
824 inode->i_pipe->writers++;
825 }
826
827 mutex_unlock(&inode->i_mutex);
828
829 return ret;
830 }
831
832 /*
833 * The file_operations structs are not static because they
834 * are also used in linux/fs/fifo.c to do operations on FIFOs.
835 *
836 * Pipes reuse fifos' file_operations structs.
837 */
838 const struct file_operations read_pipefifo_fops = {
839 .llseek = no_llseek,
840 .read = do_sync_read,
841 .aio_read = pipe_read,
842 .write = bad_pipe_w,
843 .poll = pipe_poll,
844 .unlocked_ioctl = pipe_ioctl,
845 .open = pipe_read_open,
846 .release = pipe_read_release,
847 .fasync = pipe_read_fasync,
848 };
849
850 const struct file_operations write_pipefifo_fops = {
851 .llseek = no_llseek,
852 .read = bad_pipe_r,
853 .write = do_sync_write,
854 .aio_write = pipe_write,
855 .poll = pipe_poll,
856 .unlocked_ioctl = pipe_ioctl,
857 .open = pipe_write_open,
858 .release = pipe_write_release,
859 .fasync = pipe_write_fasync,
860 };
861
862 const struct file_operations rdwr_pipefifo_fops = {
863 .llseek = no_llseek,
864 .read = do_sync_read,
865 .aio_read = pipe_read,
866 .write = do_sync_write,
867 .aio_write = pipe_write,
868 .poll = pipe_poll,
869 .unlocked_ioctl = pipe_ioctl,
870 .open = pipe_rdwr_open,
871 .release = pipe_rdwr_release,
872 .fasync = pipe_rdwr_fasync,
873 };
874
875 struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
876 {
877 struct pipe_inode_info *pipe;
878
879 pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
880 if (pipe) {
881 pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL);
882 if (pipe->bufs) {
883 init_waitqueue_head(&pipe->wait);
884 pipe->r_counter = pipe->w_counter = 1;
885 pipe->inode = inode;
886 pipe->buffers = PIPE_DEF_BUFFERS;
887 return pipe;
888 }
889 kfree(pipe);
890 }
891
892 return NULL;
893 }
894
895 void __free_pipe_info(struct pipe_inode_info *pipe)
896 {
897 int i;
898
899 for (i = 0; i < pipe->buffers; i++) {
900 struct pipe_buffer *buf = pipe->bufs + i;
901 if (buf->ops)
902 buf->ops->release(pipe, buf);
903 }
904 if (pipe->tmp_page)
905 __free_page(pipe->tmp_page);
906 kfree(pipe->bufs);
907 kfree(pipe);
908 }
909
910 void free_pipe_info(struct inode *inode)
911 {
912 __free_pipe_info(inode->i_pipe);
913 inode->i_pipe = NULL;
914 }
915
916 static struct vfsmount *pipe_mnt __read_mostly;
917
918 /*
919 * pipefs_dname() is called from d_path().
920 */
921 static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
922 {
923 return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
924 dentry->d_inode->i_ino);
925 }
926
927 static const struct dentry_operations pipefs_dentry_operations = {
928 .d_dname = pipefs_dname,
929 };
930
931 static struct inode * get_pipe_inode(void)
932 {
933 struct inode *inode = new_inode(pipe_mnt->mnt_sb);
934 struct pipe_inode_info *pipe;
935
936 if (!inode)
937 goto fail_inode;
938
939 pipe = alloc_pipe_info(inode);
940 if (!pipe)
941 goto fail_iput;
942 inode->i_pipe = pipe;
943
944 pipe->readers = pipe->writers = 1;
945 inode->i_fop = &rdwr_pipefifo_fops;
946
947 /*
948 * Mark the inode dirty from the very beginning,
949 * that way it will never be moved to the dirty
950 * list because "mark_inode_dirty()" will think
951 * that it already _is_ on the dirty list.
952 */
953 inode->i_state = I_DIRTY;
954 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
955 inode->i_uid = current_fsuid();
956 inode->i_gid = current_fsgid();
957 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
958
959 return inode;
960
961 fail_iput:
962 iput(inode);
963
964 fail_inode:
965 return NULL;
966 }
967
968 struct file *create_write_pipe(int flags)
969 {
970 int err;
971 struct inode *inode;
972 struct file *f;
973 struct path path;
974 struct qstr name = { .name = "" };
975
976 err = -ENFILE;
977 inode = get_pipe_inode();
978 if (!inode)
979 goto err;
980
981 err = -ENOMEM;
982 path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
983 if (!path.dentry)
984 goto err_inode;
985 path.mnt = mntget(pipe_mnt);
986
987 path.dentry->d_op = &pipefs_dentry_operations;
988 d_instantiate(path.dentry, inode);
989
990 err = -ENFILE;
991 f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops);
992 if (!f)
993 goto err_dentry;
994 f->f_mapping = inode->i_mapping;
995
996 f->f_flags = O_WRONLY | (flags & O_NONBLOCK);
997 f->f_version = 0;
998
999 return f;
1000
1001 err_dentry:
1002 free_pipe_info(inode);
1003 path_put(&path);
1004 return ERR_PTR(err);
1005
1006 err_inode:
1007 free_pipe_info(inode);
1008 iput(inode);
1009 err:
1010 return ERR_PTR(err);
1011 }
1012
1013 void free_write_pipe(struct file *f)
1014 {
1015 free_pipe_info(f->f_dentry->d_inode);
1016 path_put(&f->f_path);
1017 put_filp(f);
1018 }
1019
1020 struct file *create_read_pipe(struct file *wrf, int flags)
1021 {
1022 /* Grab pipe from the writer */
1023 struct file *f = alloc_file(&wrf->f_path, FMODE_READ,
1024 &read_pipefifo_fops);
1025 if (!f)
1026 return ERR_PTR(-ENFILE);
1027
1028 path_get(&wrf->f_path);
1029 f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
1030
1031 return f;
1032 }
1033
1034 int do_pipe_flags(int *fd, int flags)
1035 {
1036 struct file *fw, *fr;
1037 int error;
1038 int fdw, fdr;
1039
1040 if (flags & ~(O_CLOEXEC | O_NONBLOCK))
1041 return -EINVAL;
1042
1043 fw = create_write_pipe(flags);
1044 if (IS_ERR(fw))
1045 return PTR_ERR(fw);
1046 fr = create_read_pipe(fw, flags);
1047 error = PTR_ERR(fr);
1048 if (IS_ERR(fr))
1049 goto err_write_pipe;
1050
1051 error = get_unused_fd_flags(flags);
1052 if (error < 0)
1053 goto err_read_pipe;
1054 fdr = error;
1055
1056 error = get_unused_fd_flags(flags);
1057 if (error < 0)
1058 goto err_fdr;
1059 fdw = error;
1060
1061 audit_fd_pair(fdr, fdw);
1062 fd_install(fdr, fr);
1063 fd_install(fdw, fw);
1064 fd[0] = fdr;
1065 fd[1] = fdw;
1066
1067 return 0;
1068
1069 err_fdr:
1070 put_unused_fd(fdr);
1071 err_read_pipe:
1072 path_put(&fr->f_path);
1073 put_filp(fr);
1074 err_write_pipe:
1075 free_write_pipe(fw);
1076 return error;
1077 }
1078
1079 /*
1080 * sys_pipe() is the normal C calling standard for creating
1081 * a pipe. It's not the way Unix traditionally does this, though.
1082 */
1083 SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
1084 {
1085 int fd[2];
1086 int error;
1087
1088 error = do_pipe_flags(fd, flags);
1089 if (!error) {
1090 if (copy_to_user(fildes, fd, sizeof(fd))) {
1091 sys_close(fd[0]);
1092 sys_close(fd[1]);
1093 error = -EFAULT;
1094 }
1095 }
1096 return error;
1097 }
1098
1099 SYSCALL_DEFINE1(pipe, int __user *, fildes)
1100 {
1101 return sys_pipe2(fildes, 0);
1102 }
1103
1104 /*
1105 * Allocate a new array of pipe buffers and copy the info over. Returns the
1106 * pipe size if successful, or return -ERROR on error.
1107 */
1108 static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
1109 {
1110 struct pipe_buffer *bufs;
1111
1112 /*
1113 * Must be a power-of-2 currently
1114 */
1115 if (!is_power_of_2(arg))
1116 return -EINVAL;
1117
1118 /*
1119 * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't
1120 * expect a lot of shrink+grow operations, just free and allocate
1121 * again like we would do for growing. If the pipe currently
1122 * contains more buffers than arg, then return busy.
1123 */
1124 if (arg < pipe->nrbufs)
1125 return -EBUSY;
1126
1127 bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL);
1128 if (unlikely(!bufs))
1129 return -ENOMEM;
1130
1131 /*
1132 * The pipe array wraps around, so just start the new one at zero
1133 * and adjust the indexes.
1134 */
1135 if (pipe->nrbufs) {
1136 const unsigned int tail = pipe->nrbufs & (pipe->buffers - 1);
1137 const unsigned int head = pipe->nrbufs - tail;
1138
1139 if (head)
1140 memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer));
1141 if (tail)
1142 memcpy(bufs + head, pipe->bufs + pipe->curbuf, tail * sizeof(struct pipe_buffer));
1143 }
1144
1145 pipe->curbuf = 0;
1146 kfree(pipe->bufs);
1147 pipe->bufs = bufs;
1148 pipe->buffers = arg;
1149 return arg;
1150 }
1151
1152 long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
1153 {
1154 struct pipe_inode_info *pipe;
1155 long ret;
1156
1157 pipe = file->f_path.dentry->d_inode->i_pipe;
1158 if (!pipe)
1159 return -EBADF;
1160
1161 mutex_lock(&pipe->inode->i_mutex);
1162
1163 switch (cmd) {
1164 case F_SETPIPE_SZ:
1165 ret = pipe_set_size(pipe, arg);
1166 break;
1167 case F_GETPIPE_SZ:
1168 ret = pipe->buffers;
1169 break;
1170 default:
1171 ret = -EINVAL;
1172 break;
1173 }
1174
1175 mutex_unlock(&pipe->inode->i_mutex);
1176 return ret;
1177 }
1178
1179 /*
1180 * pipefs should _never_ be mounted by userland - too much of security hassle,
1181 * no real gain from having the whole whorehouse mounted. So we don't need
1182 * any operations on the root directory. However, we need a non-trivial
1183 * d_name - pipe: will go nicely and kill the special-casing in procfs.
1184 */
1185 static int pipefs_get_sb(struct file_system_type *fs_type,
1186 int flags, const char *dev_name, void *data,
1187 struct vfsmount *mnt)
1188 {
1189 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt);
1190 }
1191
1192 static struct file_system_type pipe_fs_type = {
1193 .name = "pipefs",
1194 .get_sb = pipefs_get_sb,
1195 .kill_sb = kill_anon_super,
1196 };
1197
1198 static int __init init_pipe_fs(void)
1199 {
1200 int err = register_filesystem(&pipe_fs_type);
1201
1202 if (!err) {
1203 pipe_mnt = kern_mount(&pipe_fs_type);
1204 if (IS_ERR(pipe_mnt)) {
1205 err = PTR_ERR(pipe_mnt);
1206 unregister_filesystem(&pipe_fs_type);
1207 }
1208 }
1209 return err;
1210 }
1211
1212 static void __exit exit_pipe_fs(void)
1213 {
1214 unregister_filesystem(&pipe_fs_type);
1215 mntput(pipe_mnt);
1216 }
1217
1218 fs_initcall(init_pipe_fs);
1219 module_exit(exit_pipe_fs);