2 * Copyright (C) 2017 Red Hat, Inc.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License version 2 as published by
6 * the Free Software Foundation.
9 #include <linux/cred.h>
10 #include <linux/file.h>
11 #include <linux/mount.h>
12 #include <linux/xattr.h>
13 #include <linux/uio.h>
14 #include <linux/uaccess.h>
15 #include "overlayfs.h"
17 static char ovl_whatisit(struct inode
*inode
, struct inode
*realinode
)
19 if (realinode
!= ovl_inode_upper(inode
))
21 if (ovl_has_upperdata(inode
))
27 static struct file
*ovl_open_realfile(const struct file
*file
,
28 struct inode
*realinode
)
30 struct inode
*inode
= file_inode(file
);
31 struct file
*realfile
;
32 const struct cred
*old_cred
;
33 int flags
= file
->f_flags
| O_NOATIME
| FMODE_NONOTIFY
;
35 old_cred
= ovl_override_creds(inode
->i_sb
);
36 realfile
= open_with_fake_path(&file
->f_path
, flags
, realinode
,
38 revert_creds(old_cred
);
40 pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
41 file
, file
, ovl_whatisit(inode
, realinode
), file
->f_flags
,
42 realfile
, IS_ERR(realfile
) ? 0 : realfile
->f_flags
);
47 #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
49 static int ovl_change_flags(struct file
*file
, unsigned int flags
)
51 struct inode
*inode
= file_inode(file
);
54 /* No atime modificaton on underlying */
55 flags
|= O_NOATIME
| FMODE_NONOTIFY
;
57 /* If some flag changed that cannot be changed then something's amiss */
58 if (WARN_ON((file
->f_flags
^ flags
) & ~OVL_SETFL_MASK
))
61 flags
&= OVL_SETFL_MASK
;
63 if (((flags
^ file
->f_flags
) & O_APPEND
) && IS_APPEND(inode
))
66 if (flags
& O_DIRECT
) {
67 if (!file
->f_mapping
->a_ops
||
68 !file
->f_mapping
->a_ops
->direct_IO
)
72 if (file
->f_op
->check_flags
) {
73 err
= file
->f_op
->check_flags(flags
);
78 spin_lock(&file
->f_lock
);
79 file
->f_flags
= (file
->f_flags
& ~OVL_SETFL_MASK
) | flags
;
80 spin_unlock(&file
->f_lock
);
85 static int ovl_real_fdget_meta(const struct file
*file
, struct fd
*real
,
88 struct inode
*inode
= file_inode(file
);
89 struct inode
*realinode
;
92 real
->file
= file
->private_data
;
95 realinode
= ovl_inode_real(inode
);
97 realinode
= ovl_inode_realdata(inode
);
99 /* Has it been copied up since we'd opened it? */
100 if (unlikely(file_inode(real
->file
) != realinode
)) {
101 real
->flags
= FDPUT_FPUT
;
102 real
->file
= ovl_open_realfile(file
, realinode
);
104 return PTR_ERR_OR_ZERO(real
->file
);
107 /* Did the flags change since open? */
108 if (unlikely((file
->f_flags
^ real
->file
->f_flags
) & ~O_NOATIME
))
109 return ovl_change_flags(real
->file
, file
->f_flags
);
114 static int ovl_real_fdget(const struct file
*file
, struct fd
*real
)
116 return ovl_real_fdget_meta(file
, real
, false);
119 static int ovl_open(struct inode
*inode
, struct file
*file
)
121 struct file
*realfile
;
124 err
= ovl_maybe_copy_up(file_dentry(file
), file
->f_flags
);
128 /* No longer need these flags, so don't pass them on to underlying fs */
129 file
->f_flags
&= ~(O_CREAT
| O_EXCL
| O_NOCTTY
| O_TRUNC
);
131 realfile
= ovl_open_realfile(file
, ovl_inode_realdata(inode
));
132 if (IS_ERR(realfile
))
133 return PTR_ERR(realfile
);
135 file
->private_data
= realfile
;
140 static int ovl_release(struct inode
*inode
, struct file
*file
)
142 fput(file
->private_data
);
147 static loff_t
ovl_llseek(struct file
*file
, loff_t offset
, int whence
)
149 struct inode
*inode
= file_inode(file
);
151 const struct cred
*old_cred
;
155 * The two special cases below do not need to involve real fs,
156 * so we can optimizing concurrent callers.
159 if (whence
== SEEK_CUR
)
162 if (whence
== SEEK_SET
)
163 return vfs_setpos(file
, 0, 0);
166 ret
= ovl_real_fdget(file
, &real
);
171 * Overlay file f_pos is the master copy that is preserved
172 * through copy up and modified on read/write, but only real
173 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
174 * limitations that are more strict than ->s_maxbytes for specific
175 * files, so we use the real file to perform seeks.
178 real
.file
->f_pos
= file
->f_pos
;
180 old_cred
= ovl_override_creds(inode
->i_sb
);
181 ret
= vfs_llseek(real
.file
, offset
, whence
);
182 revert_creds(old_cred
);
184 file
->f_pos
= real
.file
->f_pos
;
192 static void ovl_file_accessed(struct file
*file
)
194 struct inode
*inode
, *upperinode
;
196 if (file
->f_flags
& O_NOATIME
)
199 inode
= file_inode(file
);
200 upperinode
= ovl_inode_upper(inode
);
205 if ((!timespec64_equal(&inode
->i_mtime
, &upperinode
->i_mtime
) ||
206 !timespec64_equal(&inode
->i_ctime
, &upperinode
->i_ctime
))) {
207 inode
->i_mtime
= upperinode
->i_mtime
;
208 inode
->i_ctime
= upperinode
->i_ctime
;
211 touch_atime(&file
->f_path
);
214 static rwf_t
ovl_iocb_to_rwf(struct kiocb
*iocb
)
216 int ifl
= iocb
->ki_flags
;
219 if (ifl
& IOCB_NOWAIT
)
221 if (ifl
& IOCB_HIPRI
)
223 if (ifl
& IOCB_DSYNC
)
231 static ssize_t
ovl_read_iter(struct kiocb
*iocb
, struct iov_iter
*iter
)
233 struct file
*file
= iocb
->ki_filp
;
235 const struct cred
*old_cred
;
238 if (!iov_iter_count(iter
))
241 ret
= ovl_real_fdget(file
, &real
);
245 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
246 ret
= vfs_iter_read(real
.file
, iter
, &iocb
->ki_pos
,
247 ovl_iocb_to_rwf(iocb
));
248 revert_creds(old_cred
);
250 ovl_file_accessed(file
);
257 static ssize_t
ovl_write_iter(struct kiocb
*iocb
, struct iov_iter
*iter
)
259 struct file
*file
= iocb
->ki_filp
;
260 struct inode
*inode
= file_inode(file
);
262 const struct cred
*old_cred
;
265 if (!iov_iter_count(iter
))
270 ovl_copyattr(ovl_inode_real(inode
), inode
);
271 ret
= file_remove_privs(file
);
275 ret
= ovl_real_fdget(file
, &real
);
279 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
280 file_start_write(real
.file
);
281 ret
= vfs_iter_write(real
.file
, iter
, &iocb
->ki_pos
,
282 ovl_iocb_to_rwf(iocb
));
283 file_end_write(real
.file
);
284 revert_creds(old_cred
);
287 ovl_copyattr(ovl_inode_real(inode
), inode
);
297 static int ovl_fsync(struct file
*file
, loff_t start
, loff_t end
, int datasync
)
300 const struct cred
*old_cred
;
303 ret
= ovl_real_fdget_meta(file
, &real
, !datasync
);
307 /* Don't sync lower file for fear of receiving EROFS error */
308 if (file_inode(real
.file
) == ovl_inode_upper(file_inode(file
))) {
309 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
310 ret
= vfs_fsync_range(real
.file
, start
, end
, datasync
);
311 revert_creds(old_cred
);
319 static int ovl_mmap(struct file
*file
, struct vm_area_struct
*vma
)
321 struct file
*realfile
= file
->private_data
;
322 const struct cred
*old_cred
;
325 if (!realfile
->f_op
->mmap
)
328 if (WARN_ON(file
!= vma
->vm_file
))
331 vma
->vm_file
= get_file(realfile
);
333 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
334 ret
= call_mmap(vma
->vm_file
, vma
);
335 revert_creds(old_cred
);
338 /* Drop reference count from new vm_file value */
341 /* Drop reference count from previous vm_file value */
345 ovl_file_accessed(file
);
350 static long ovl_fallocate(struct file
*file
, int mode
, loff_t offset
, loff_t len
)
352 struct inode
*inode
= file_inode(file
);
354 const struct cred
*old_cred
;
357 ret
= ovl_real_fdget(file
, &real
);
361 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
362 ret
= vfs_fallocate(real
.file
, mode
, offset
, len
);
363 revert_creds(old_cred
);
366 ovl_copyattr(ovl_inode_real(inode
), inode
);
373 static int ovl_fadvise(struct file
*file
, loff_t offset
, loff_t len
, int advice
)
376 const struct cred
*old_cred
;
379 ret
= ovl_real_fdget(file
, &real
);
383 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
384 ret
= vfs_fadvise(real
.file
, offset
, len
, advice
);
385 revert_creds(old_cred
);
392 static long ovl_real_ioctl(struct file
*file
, unsigned int cmd
,
396 const struct cred
*old_cred
;
399 ret
= ovl_real_fdget(file
, &real
);
403 old_cred
= ovl_override_creds(file_inode(file
)->i_sb
);
404 ret
= vfs_ioctl(real
.file
, cmd
, arg
);
405 revert_creds(old_cred
);
412 static unsigned int ovl_get_inode_flags(struct inode
*inode
)
414 unsigned int flags
= READ_ONCE(inode
->i_flags
);
415 unsigned int ovl_iflags
= 0;
418 ovl_iflags
|= FS_SYNC_FL
;
419 if (flags
& S_APPEND
)
420 ovl_iflags
|= FS_APPEND_FL
;
421 if (flags
& S_IMMUTABLE
)
422 ovl_iflags
|= FS_IMMUTABLE_FL
;
423 if (flags
& S_NOATIME
)
424 ovl_iflags
|= FS_NOATIME_FL
;
429 static long ovl_ioctl_set_flags(struct file
*file
, unsigned int cmd
,
433 struct inode
*inode
= file_inode(file
);
435 unsigned int old_flags
;
437 if (!inode_owner_or_capable(inode
))
440 if (get_user(flags
, (int __user
*) arg
))
443 ret
= mnt_want_write_file(file
);
449 /* Check the capability before cred override */
451 old_flags
= ovl_get_inode_flags(inode
);
452 if (((flags
^ old_flags
) & (FS_APPEND_FL
| FS_IMMUTABLE_FL
)) &&
453 !capable(CAP_LINUX_IMMUTABLE
))
456 ret
= ovl_maybe_copy_up(file_dentry(file
), O_WRONLY
);
460 ret
= ovl_real_ioctl(file
, cmd
, arg
);
462 ovl_copyflags(ovl_inode_real(inode
), inode
);
466 mnt_drop_write_file(file
);
472 static long ovl_ioctl(struct file
*file
, unsigned int cmd
, unsigned long arg
)
477 case FS_IOC_GETFLAGS
:
478 case FS_IOC_FSGETXATTR
:
479 ret
= ovl_real_ioctl(file
, cmd
, arg
);
482 case FS_IOC_SETFLAGS
:
483 case FS_IOC_FSSETXATTR
:
484 ret
= ovl_ioctl_set_flags(file
, cmd
, arg
);
494 static long ovl_compat_ioctl(struct file
*file
, unsigned int cmd
,
498 case FS_IOC32_GETFLAGS
:
499 cmd
= FS_IOC_GETFLAGS
;
502 case FS_IOC32_SETFLAGS
:
503 cmd
= FS_IOC_SETFLAGS
;
510 return ovl_ioctl(file
, cmd
, arg
);
519 static loff_t
ovl_copyfile(struct file
*file_in
, loff_t pos_in
,
520 struct file
*file_out
, loff_t pos_out
,
521 loff_t len
, unsigned int flags
, enum ovl_copyop op
)
523 struct inode
*inode_out
= file_inode(file_out
);
524 struct fd real_in
, real_out
;
525 const struct cred
*old_cred
;
528 ret
= ovl_real_fdget(file_out
, &real_out
);
532 ret
= ovl_real_fdget(file_in
, &real_in
);
538 old_cred
= ovl_override_creds(file_inode(file_out
)->i_sb
);
541 ret
= vfs_copy_file_range(real_in
.file
, pos_in
,
542 real_out
.file
, pos_out
, len
, flags
);
546 ret
= vfs_clone_file_range(real_in
.file
, pos_in
,
547 real_out
.file
, pos_out
, len
, flags
);
551 ret
= vfs_dedupe_file_range_one(real_in
.file
, pos_in
,
552 real_out
.file
, pos_out
, len
,
556 revert_creds(old_cred
);
559 ovl_copyattr(ovl_inode_real(inode_out
), inode_out
);
567 static ssize_t
ovl_copy_file_range(struct file
*file_in
, loff_t pos_in
,
568 struct file
*file_out
, loff_t pos_out
,
569 size_t len
, unsigned int flags
)
571 return ovl_copyfile(file_in
, pos_in
, file_out
, pos_out
, len
, flags
,
575 static loff_t
ovl_remap_file_range(struct file
*file_in
, loff_t pos_in
,
576 struct file
*file_out
, loff_t pos_out
,
577 loff_t len
, unsigned int remap_flags
)
581 if (remap_flags
& ~(REMAP_FILE_DEDUP
| REMAP_FILE_ADVISORY
))
584 if (remap_flags
& REMAP_FILE_DEDUP
)
590 * Don't copy up because of a dedupe request, this wouldn't make sense
591 * most of the time (data would be duplicated instead of deduplicated).
593 if (op
== OVL_DEDUPE
&&
594 (!ovl_inode_upper(file_inode(file_in
)) ||
595 !ovl_inode_upper(file_inode(file_out
))))
598 return ovl_copyfile(file_in
, pos_in
, file_out
, pos_out
, len
,
602 const struct file_operations ovl_file_operations
= {
604 .release
= ovl_release
,
605 .llseek
= ovl_llseek
,
606 .read_iter
= ovl_read_iter
,
607 .write_iter
= ovl_write_iter
,
610 .fallocate
= ovl_fallocate
,
611 .fadvise
= ovl_fadvise
,
612 .unlocked_ioctl
= ovl_ioctl
,
613 .compat_ioctl
= ovl_compat_ioctl
,
615 .copy_file_range
= ovl_copy_file_range
,
616 .remap_file_range
= ovl_remap_file_range
,