1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2017 Red Hat, Inc.
6 #include <linux/cred.h>
7 #include <linux/file.h>
8 #include <linux/mount.h>
9 #include <linux/xattr.h>
10 #include <linux/uio.h>
11 #include <linux/uaccess.h>
12 #include <linux/splice.h>
13 #include <linux/security.h>
16 #include "overlayfs.h"
20 struct kiocb
*orig_iocb
;
24 static struct kmem_cache
*ovl_aio_request_cachep
;
26 static char ovl_whatisit(struct inode
*inode
, struct inode
*realinode
)
28 if (realinode
!= ovl_inode_upper(inode
))
30 if (ovl_has_upperdata(inode
))
36 /* No atime modificaton nor notify on underlying */
37 #define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY)
39 static struct file
*ovl_open_realfile(const struct file
*file
,
40 struct inode
*realinode
)
42 struct inode
*inode
= file_inode(file
);
43 struct file
*realfile
;
44 const struct cred
*old_cred
;
45 int flags
= file
->f_flags
| OVL_OPEN_FLAGS
;
46 int acc_mode
= ACC_MODE(flags
);
50 acc_mode
|= MAY_APPEND
;
52 old_cred
= ovl_override_creds(inode
->i_sb
);
53 err
= inode_permission(&init_user_ns
, realinode
, MAY_OPEN
| acc_mode
);
55 realfile
= ERR_PTR(err
);
57 if (!inode_owner_or_capable(&init_user_ns
, realinode
))
60 realfile
= open_with_fake_path(&file
->f_path
, flags
, realinode
,
63 revert_creds(old_cred
);
65 pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
66 file
, file
, ovl_whatisit(inode
, realinode
), file
->f_flags
,
67 realfile
, IS_ERR(realfile
) ? 0 : realfile
->f_flags
);
72 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
74 static int ovl_change_flags(struct file
*file
, unsigned int flags
)
76 struct inode
*inode
= file_inode(file
);
79 flags
&= OVL_SETFL_MASK
;
81 if (((flags
^ file
->f_flags
) & O_APPEND
) && IS_APPEND(inode
))
84 if (flags
& O_DIRECT
) {
85 if (!file
->f_mapping
->a_ops
||
86 !file
->f_mapping
->a_ops
->direct_IO
)
90 if (file
->f_op
->check_flags
) {
91 err
= file
->f_op
->check_flags(flags
);
96 spin_lock(&file
->f_lock
);
97 file
->f_flags
= (file
->f_flags
& ~OVL_SETFL_MASK
) | flags
;
98 spin_unlock(&file
->f_lock
);
103 static int ovl_real_fdget_meta(const struct file
*file
, struct fd
*real
,
106 struct inode
*inode
= file_inode(file
);
107 struct inode
*realinode
;
110 real
->file
= file
->private_data
;
113 realinode
= ovl_inode_real(inode
);
115 realinode
= ovl_inode_realdata(inode
);
117 /* Has it been copied up since we'd opened it? */
118 if (unlikely(file_inode(real
->file
) != realinode
)) {
119 real
->flags
= FDPUT_FPUT
;
120 real
->file
= ovl_open_realfile(file
, realinode
);
122 return PTR_ERR_OR_ZERO(real
->file
);
125 /* Did the flags change since open? */
126 if (unlikely((file
->f_flags
^ real
->file
->f_flags
) & ~OVL_OPEN_FLAGS
))
127 return ovl_change_flags(real
->file
, file
->f_flags
);
132 static int ovl_real_fdget(const struct file
*file
, struct fd
*real
)
134 if (d_is_dir(file_dentry(file
))) {
136 real
->file
= ovl_dir_real_file(file
, false);
138 return PTR_ERR_OR_ZERO(real
->file
);
141 return ovl_real_fdget_meta(file
, real
, false);
144 static int ovl_open(struct inode
*inode
, struct file
*file
)
146 struct file
*realfile
;
149 err
= ovl_maybe_copy_up(file_dentry(file
), file
->f_flags
);
153 /* No longer need these flags, so don't pass them on to underlying fs */
154 file
->f_flags
&= ~(O_CREAT
| O_EXCL
| O_NOCTTY
| O_TRUNC
);
156 realfile
= ovl_open_realfile(file
, ovl_inode_realdata(inode
));
157 if (IS_ERR(realfile
))
158 return PTR_ERR(realfile
);
160 file
->private_data
= realfile
;
165 static int ovl_release(struct inode
*inode
, struct file
*file
)
167 fput(file
->private_data
);
172 static loff_t
ovl_llseek(struct file
*file
, loff_t offset
, int whence
)
174 struct inode
*inode
= file_inode(file
);
176 const struct cred
*old_cred
;
180 * The two special cases below do not need to involve real fs,
181 * so we can optimizing concurrent callers.
184 if (whence
== SEEK_CUR
)
187 if (whence
== SEEK_SET
)
188 return vfs_setpos(file
, 0, 0);
191 ret
= ovl_real_fdget(file
, &real
);
196 * Overlay file f_pos is the master copy that is preserved
197 * through copy up and modified on read/write, but only real
198 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
199 * limitations that are more strict than ->s_maxbytes for specific
200 * files, so we use the real file to perform seeks.
202 ovl_inode_lock(inode
);
203 real
.file
->f_pos
= file
->f_pos
;
205 old_cred
= ovl_override_creds(inode
->i_sb
);
206 ret
= vfs_llseek(real
.file
, offset
, whence
);
207 revert_creds(old_cred
);
209 file
->f_pos
= real
.file
->f_pos
;
210 ovl_inode_unlock(inode
);
217 static void ovl_file_accessed(struct file
*file
)
219 struct inode
*inode
, *upperinode
;
221 if (file
->f_flags
& O_NOATIME
)
224 inode
= file_inode(file
);
225 upperinode
= ovl_inode_upper(inode
);
230 if ((!timespec64_equal(&inode
->i_mtime
, &upperinode
->i_mtime
) ||
231 !timespec64_equal(&inode
->i_ctime
, &upperinode
->i_ctime
))) {
232 inode
->i_mtime
= upperinode
->i_mtime
;
233 inode
->i_ctime
= upperinode
->i_ctime
;
236 touch_atime(&file
->f_path
);
239 static rwf_t
ovl_iocb_to_rwf(int ifl
)
243 if (ifl
& IOCB_NOWAIT
)
245 if (ifl
& IOCB_HIPRI
)
247 if (ifl
& IOCB_DSYNC
)
255 static void ovl_aio_cleanup_handler(struct ovl_aio_req
*aio_req
)
257 struct kiocb
*iocb
= &aio_req
->iocb
;
258 struct kiocb
*orig_iocb
= aio_req
->orig_iocb
;
260 if (iocb
->ki_flags
& IOCB_WRITE
) {
261 struct inode
*inode
= file_inode(orig_iocb
->ki_filp
);
263 /* Actually acquired in ovl_write_iter() */
264 __sb_writers_acquired(file_inode(iocb
->ki_filp
)->i_sb
,
266 file_end_write(iocb
->ki_filp
);
267 ovl_copyattr(ovl_inode_real(inode
), inode
);
270 orig_iocb
->ki_pos
= iocb
->ki_pos
;
272 kmem_cache_free(ovl_aio_request_cachep
, aio_req
);
275 static void ovl_aio_rw_complete(struct kiocb
*iocb
, long res
, long res2
)
277 struct ovl_aio_req
*aio_req
= container_of(iocb
,
278 struct ovl_aio_req
, iocb
);
279 struct kiocb
*orig_iocb
= aio_req
->orig_iocb
;
281 ovl_aio_cleanup_handler(aio_req
);
282 orig_iocb
->ki_complete(orig_iocb
, res
, res2
);
285 static ssize_t
ovl_read_iter(struct kiocb
*iocb
, struct iov_iter
*iter
)
287 struct file
*file
= iocb
->ki_filp
;
289 const struct cred
*old_cred
;
292 if (!iov_iter_count(iter
))
295 ret
= ovl_real_fdget(file
, &real
);
300 if (iocb
->ki_flags
& IOCB_DIRECT
&&
301 (!real
.file
->f_mapping
->a_ops
||
302 !real
.file
->f_mapping
->a_ops
->direct_IO
))
305 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
306 if (is_sync_kiocb(iocb
)) {
307 ret
= vfs_iter_read(real
.file
, iter
, &iocb
->ki_pos
,
308 ovl_iocb_to_rwf(iocb
->ki_flags
));
310 struct ovl_aio_req
*aio_req
;
313 aio_req
= kmem_cache_zalloc(ovl_aio_request_cachep
, GFP_KERNEL
);
319 aio_req
->orig_iocb
= iocb
;
320 kiocb_clone(&aio_req
->iocb
, iocb
, real
.file
);
321 aio_req
->iocb
.ki_complete
= ovl_aio_rw_complete
;
322 ret
= vfs_iocb_iter_read(real
.file
, &aio_req
->iocb
, iter
);
323 if (ret
!= -EIOCBQUEUED
)
324 ovl_aio_cleanup_handler(aio_req
);
327 revert_creds(old_cred
);
328 ovl_file_accessed(file
);
335 static ssize_t
ovl_write_iter(struct kiocb
*iocb
, struct iov_iter
*iter
)
337 struct file
*file
= iocb
->ki_filp
;
338 struct inode
*inode
= file_inode(file
);
340 const struct cred
*old_cred
;
342 int ifl
= iocb
->ki_flags
;
344 if (!iov_iter_count(iter
))
349 ovl_copyattr(ovl_inode_real(inode
), inode
);
350 ret
= file_remove_privs(file
);
354 ret
= ovl_real_fdget(file
, &real
);
359 if (iocb
->ki_flags
& IOCB_DIRECT
&&
360 (!real
.file
->f_mapping
->a_ops
||
361 !real
.file
->f_mapping
->a_ops
->direct_IO
))
364 if (!ovl_should_sync(OVL_FS(inode
->i_sb
)))
365 ifl
&= ~(IOCB_DSYNC
| IOCB_SYNC
);
367 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
368 if (is_sync_kiocb(iocb
)) {
369 file_start_write(real
.file
);
370 ret
= vfs_iter_write(real
.file
, iter
, &iocb
->ki_pos
,
371 ovl_iocb_to_rwf(ifl
));
372 file_end_write(real
.file
);
374 ovl_copyattr(ovl_inode_real(inode
), inode
);
376 struct ovl_aio_req
*aio_req
;
379 aio_req
= kmem_cache_zalloc(ovl_aio_request_cachep
, GFP_KERNEL
);
383 file_start_write(real
.file
);
384 /* Pacify lockdep, same trick as done in aio_write() */
385 __sb_writers_release(file_inode(real
.file
)->i_sb
,
389 aio_req
->orig_iocb
= iocb
;
390 kiocb_clone(&aio_req
->iocb
, iocb
, real
.file
);
391 aio_req
->iocb
.ki_flags
= ifl
;
392 aio_req
->iocb
.ki_complete
= ovl_aio_rw_complete
;
393 ret
= vfs_iocb_iter_write(real
.file
, &aio_req
->iocb
, iter
);
394 if (ret
!= -EIOCBQUEUED
)
395 ovl_aio_cleanup_handler(aio_req
);
398 revert_creds(old_cred
);
409 * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
410 * due to lock order inversion between pipe->mutex in iter_file_splice_write()
411 * and file_start_write(real.file) in ovl_write_iter().
413 * So do everything ovl_write_iter() does and call iter_file_splice_write() on
416 static ssize_t
ovl_splice_write(struct pipe_inode_info
*pipe
, struct file
*out
,
417 loff_t
*ppos
, size_t len
, unsigned int flags
)
420 const struct cred
*old_cred
;
421 struct inode
*inode
= file_inode(out
);
422 struct inode
*realinode
= ovl_inode_real(inode
);
427 ovl_copyattr(realinode
, inode
);
428 ret
= file_remove_privs(out
);
432 ret
= ovl_real_fdget(out
, &real
);
436 old_cred
= ovl_override_creds(inode
->i_sb
);
437 file_start_write(real
.file
);
439 ret
= iter_file_splice_write(pipe
, real
.file
, ppos
, len
, flags
);
441 file_end_write(real
.file
);
443 ovl_copyattr(realinode
, inode
);
444 revert_creds(old_cred
);
453 static int ovl_fsync(struct file
*file
, loff_t start
, loff_t end
, int datasync
)
456 const struct cred
*old_cred
;
459 ret
= ovl_sync_status(OVL_FS(file_inode(file
)->i_sb
));
463 ret
= ovl_real_fdget_meta(file
, &real
, !datasync
);
467 /* Don't sync lower file for fear of receiving EROFS error */
468 if (file_inode(real
.file
) == ovl_inode_upper(file_inode(file
))) {
469 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
470 ret
= vfs_fsync_range(real
.file
, start
, end
, datasync
);
471 revert_creds(old_cred
);
479 static int ovl_mmap(struct file
*file
, struct vm_area_struct
*vma
)
481 struct file
*realfile
= file
->private_data
;
482 const struct cred
*old_cred
;
485 if (!realfile
->f_op
->mmap
)
488 if (WARN_ON(file
!= vma
->vm_file
))
491 vma_set_file(vma
, realfile
);
493 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
494 ret
= call_mmap(vma
->vm_file
, vma
);
495 revert_creds(old_cred
);
496 ovl_file_accessed(file
);
501 static long ovl_fallocate(struct file
*file
, int mode
, loff_t offset
, loff_t len
)
503 struct inode
*inode
= file_inode(file
);
505 const struct cred
*old_cred
;
508 ret
= ovl_real_fdget(file
, &real
);
512 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
513 ret
= vfs_fallocate(real
.file
, mode
, offset
, len
);
514 revert_creds(old_cred
);
517 ovl_copyattr(ovl_inode_real(inode
), inode
);
524 static int ovl_fadvise(struct file
*file
, loff_t offset
, loff_t len
, int advice
)
527 const struct cred
*old_cred
;
530 ret
= ovl_real_fdget(file
, &real
);
534 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
535 ret
= vfs_fadvise(real
.file
, offset
, len
, advice
);
536 revert_creds(old_cred
);
549 static loff_t
ovl_copyfile(struct file
*file_in
, loff_t pos_in
,
550 struct file
*file_out
, loff_t pos_out
,
551 loff_t len
, unsigned int flags
, enum ovl_copyop op
)
553 struct inode
*inode_out
= file_inode(file_out
);
554 struct fd real_in
, real_out
;
555 const struct cred
*old_cred
;
558 ret
= ovl_real_fdget(file_out
, &real_out
);
562 ret
= ovl_real_fdget(file_in
, &real_in
);
568 old_cred
= ovl_override_creds(file_inode(file_out
)->i_sb
);
571 ret
= vfs_copy_file_range(real_in
.file
, pos_in
,
572 real_out
.file
, pos_out
, len
, flags
);
576 ret
= vfs_clone_file_range(real_in
.file
, pos_in
,
577 real_out
.file
, pos_out
, len
, flags
);
581 ret
= vfs_dedupe_file_range_one(real_in
.file
, pos_in
,
582 real_out
.file
, pos_out
, len
,
586 revert_creds(old_cred
);
589 ovl_copyattr(ovl_inode_real(inode_out
), inode_out
);
597 static ssize_t
ovl_copy_file_range(struct file
*file_in
, loff_t pos_in
,
598 struct file
*file_out
, loff_t pos_out
,
599 size_t len
, unsigned int flags
)
601 return ovl_copyfile(file_in
, pos_in
, file_out
, pos_out
, len
, flags
,
605 static loff_t
ovl_remap_file_range(struct file
*file_in
, loff_t pos_in
,
606 struct file
*file_out
, loff_t pos_out
,
607 loff_t len
, unsigned int remap_flags
)
611 if (remap_flags
& ~(REMAP_FILE_DEDUP
| REMAP_FILE_ADVISORY
))
614 if (remap_flags
& REMAP_FILE_DEDUP
)
620 * Don't copy up because of a dedupe request, this wouldn't make sense
621 * most of the time (data would be duplicated instead of deduplicated).
623 if (op
== OVL_DEDUPE
&&
624 (!ovl_inode_upper(file_inode(file_in
)) ||
625 !ovl_inode_upper(file_inode(file_out
))))
628 return ovl_copyfile(file_in
, pos_in
, file_out
, pos_out
, len
,
632 static int ovl_flush(struct file
*file
, fl_owner_t id
)
635 const struct cred
*old_cred
;
638 err
= ovl_real_fdget(file
, &real
);
642 if (real
.file
->f_op
->flush
) {
643 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
644 err
= real
.file
->f_op
->flush(real
.file
, id
);
645 revert_creds(old_cred
);
652 const struct file_operations ovl_file_operations
= {
654 .release
= ovl_release
,
655 .llseek
= ovl_llseek
,
656 .read_iter
= ovl_read_iter
,
657 .write_iter
= ovl_write_iter
,
660 .fallocate
= ovl_fallocate
,
661 .fadvise
= ovl_fadvise
,
663 .splice_read
= generic_file_splice_read
,
664 .splice_write
= ovl_splice_write
,
666 .copy_file_range
= ovl_copy_file_range
,
667 .remap_file_range
= ovl_remap_file_range
,
670 int __init
ovl_aio_request_cache_init(void)
672 ovl_aio_request_cachep
= kmem_cache_create("ovl_aio_req",
673 sizeof(struct ovl_aio_req
),
674 0, SLAB_HWCACHE_ALIGN
, NULL
);
675 if (!ovl_aio_request_cachep
)
681 void ovl_aio_request_cache_destroy(void)
683 kmem_cache_destroy(ovl_aio_request_cachep
);