1 #include <linux/btrfs.h>
2 #include <linux/capability.h>
3 #include <linux/cred.h>
4 #include <linux/mount.h>
5 #include <linux/fdtable.h>
6 #include <linux/file.h>
8 #include <linux/namei.h>
9 #include <linux/module.h>
10 #include <linux/kernel.h>
11 #include <linux/magic.h>
12 #include <linux/parser.h>
13 #include <linux/security.h>
14 #include <linux/seq_file.h>
15 #include <linux/statfs.h>
16 #include <linux/slab.h>
17 #include <linux/user_namespace.h>
18 #include <linux/uidgid.h>
19 #include <linux/xattr.h>
20 #include <linux/posix_acl.h>
21 #include <linux/posix_acl_xattr.h>
22 #include <linux/uio.h>
23 #include <linux/fiemap.h>
24 #include <linux/pagemap.h>
26 struct shiftfs_super_info
{
28 struct user_namespace
*userns
;
29 /* creds of process who created the super block */
30 const struct cred
*creator_cred
;
32 unsigned int passthrough
;
33 unsigned int passthrough_mark
;
36 static void shiftfs_fill_inode(struct inode
*inode
, unsigned long ino
,
37 umode_t mode
, dev_t dev
, struct dentry
*dentry
);
39 #define SHIFTFS_PASSTHROUGH_NONE 0
40 #define SHIFTFS_PASSTHROUGH_STAT 1
41 #define SHIFTFS_PASSTHROUGH_IOCTL 2
42 #define SHIFTFS_PASSTHROUGH_ALL \
43 (SHIFTFS_PASSTHROUGH_STAT | SHIFTFS_PASSTHROUGH_IOCTL)
45 static inline bool shiftfs_passthrough_ioctls(struct shiftfs_super_info
*info
)
47 if (!(info
->passthrough
& SHIFTFS_PASSTHROUGH_IOCTL
))
53 static inline bool shiftfs_passthrough_statfs(struct shiftfs_super_info
*info
)
55 if (!(info
->passthrough
& SHIFTFS_PASSTHROUGH_STAT
))
67 /* global filesystem options */
68 static const match_table_t tokens
= {
70 { OPT_PASSTHROUGH
, "passthrough=%u" },
74 static const struct cred
*shiftfs_override_creds(const struct super_block
*sb
)
76 struct shiftfs_super_info
*sbinfo
= sb
->s_fs_info
;
78 return override_creds(sbinfo
->creator_cred
);
81 static inline void shiftfs_revert_object_creds(const struct cred
*oldcred
,
84 revert_creds(oldcred
);
88 static kuid_t
shift_kuid(struct user_namespace
*from
, struct user_namespace
*to
,
91 uid_t uid
= from_kuid(from
, kuid
);
92 return make_kuid(to
, uid
);
95 static kgid_t
shift_kgid(struct user_namespace
*from
, struct user_namespace
*to
,
98 gid_t gid
= from_kgid(from
, kgid
);
99 return make_kgid(to
, gid
);
102 static int shiftfs_override_object_creds(const struct super_block
*sb
,
103 const struct cred
**oldcred
,
104 struct cred
**newcred
,
105 struct dentry
*dentry
, umode_t mode
,
108 struct shiftfs_super_info
*sbinfo
= sb
->s_fs_info
;
109 kuid_t fsuid
= current_fsuid();
110 kgid_t fsgid
= current_fsgid();
112 *oldcred
= shiftfs_override_creds(sb
);
114 *newcred
= prepare_creds();
116 revert_creds(*oldcred
);
120 (*newcred
)->fsuid
= shift_kuid(sb
->s_user_ns
, sbinfo
->userns
, fsuid
);
121 (*newcred
)->fsgid
= shift_kgid(sb
->s_user_ns
, sbinfo
->userns
, fsgid
);
124 int err
= security_dentry_create_files_as(dentry
, mode
,
128 shiftfs_revert_object_creds(*oldcred
, *newcred
);
133 put_cred(override_creds(*newcred
));
137 static void shiftfs_copyattr(struct inode
*from
, struct inode
*to
)
139 struct user_namespace
*from_ns
= from
->i_sb
->s_user_ns
;
140 struct user_namespace
*to_ns
= to
->i_sb
->s_user_ns
;
142 to
->i_uid
= shift_kuid(from_ns
, to_ns
, from
->i_uid
);
143 to
->i_gid
= shift_kgid(from_ns
, to_ns
, from
->i_gid
);
144 to
->i_mode
= from
->i_mode
;
145 to
->i_atime
= from
->i_atime
;
146 to
->i_mtime
= from
->i_mtime
;
147 to
->i_ctime
= from
->i_ctime
;
148 i_size_write(to
, i_size_read(from
));
151 static void shiftfs_copyflags(struct inode
*from
, struct inode
*to
)
153 unsigned int mask
= S_SYNC
| S_IMMUTABLE
| S_APPEND
| S_NOATIME
;
155 inode_set_flags(to
, from
->i_flags
& mask
, mask
);
158 static void shiftfs_file_accessed(struct file
*file
)
160 struct inode
*upperi
, *loweri
;
162 if (file
->f_flags
& O_NOATIME
)
165 upperi
= file_inode(file
);
166 loweri
= upperi
->i_private
;
171 upperi
->i_mtime
= loweri
->i_mtime
;
172 upperi
->i_ctime
= loweri
->i_ctime
;
174 touch_atime(&file
->f_path
);
177 static int shiftfs_parse_mount_options(struct shiftfs_super_info
*sbinfo
,
181 substring_t args
[MAX_OPT_ARGS
];
183 sbinfo
->mark
= false;
184 sbinfo
->passthrough
= 0;
186 while ((p
= strsep(&options
, ",")) != NULL
) {
187 int err
, intarg
, token
;
192 token
= match_token(p
, tokens
, args
);
197 case OPT_PASSTHROUGH
:
198 err
= match_int(&args
[0], &intarg
);
202 if (intarg
& ~SHIFTFS_PASSTHROUGH_ALL
)
205 sbinfo
->passthrough
= intarg
;
215 static void shiftfs_d_release(struct dentry
*dentry
)
217 struct dentry
*lowerd
= dentry
->d_fsdata
;
223 static struct dentry
*shiftfs_d_real(struct dentry
*dentry
,
224 const struct inode
*inode
)
226 struct dentry
*lowerd
= dentry
->d_fsdata
;
228 if (inode
&& d_inode(dentry
) == inode
)
231 lowerd
= d_real(lowerd
, inode
);
232 if (lowerd
&& (!inode
|| inode
== d_inode(lowerd
)))
235 WARN(1, "shiftfs_d_real(%pd4, %s:%lu): real dentry not found\n", dentry
,
236 inode
? inode
->i_sb
->s_id
: "NULL", inode
? inode
->i_ino
: 0);
240 static int shiftfs_d_weak_revalidate(struct dentry
*dentry
, unsigned int flags
)
243 struct dentry
*lowerd
= dentry
->d_fsdata
;
245 if (d_is_negative(lowerd
) != d_is_negative(dentry
))
248 if ((lowerd
->d_flags
& DCACHE_OP_WEAK_REVALIDATE
))
249 err
= lowerd
->d_op
->d_weak_revalidate(lowerd
, flags
);
251 if (d_really_is_positive(dentry
)) {
252 struct inode
*inode
= d_inode(dentry
);
253 struct inode
*loweri
= d_inode(lowerd
);
255 shiftfs_copyattr(loweri
, inode
);
261 static int shiftfs_d_revalidate(struct dentry
*dentry
, unsigned int flags
)
264 struct dentry
*lowerd
= dentry
->d_fsdata
;
266 if (d_unhashed(lowerd
) ||
267 ((d_is_negative(lowerd
) != d_is_negative(dentry
))))
270 if (flags
& LOOKUP_RCU
)
273 if ((lowerd
->d_flags
& DCACHE_OP_REVALIDATE
))
274 err
= lowerd
->d_op
->d_revalidate(lowerd
, flags
);
276 if (d_really_is_positive(dentry
)) {
277 struct inode
*inode
= d_inode(dentry
);
278 struct inode
*loweri
= d_inode(lowerd
);
280 shiftfs_copyattr(loweri
, inode
);
286 static const struct dentry_operations shiftfs_dentry_ops
= {
287 .d_release
= shiftfs_d_release
,
288 .d_real
= shiftfs_d_real
,
289 .d_revalidate
= shiftfs_d_revalidate
,
290 .d_weak_revalidate
= shiftfs_d_weak_revalidate
,
293 static const char *shiftfs_get_link(struct dentry
*dentry
, struct inode
*inode
,
294 struct delayed_call
*done
)
297 const struct cred
*oldcred
;
298 struct dentry
*lowerd
;
300 /* RCU lookup not supported */
302 return ERR_PTR(-ECHILD
);
304 lowerd
= dentry
->d_fsdata
;
305 oldcred
= shiftfs_override_creds(dentry
->d_sb
);
306 p
= vfs_get_link(lowerd
, done
);
307 revert_creds(oldcred
);
312 static int shiftfs_setxattr(struct dentry
*dentry
, struct inode
*inode
,
313 const char *name
, const void *value
,
314 size_t size
, int flags
)
316 struct dentry
*lowerd
= dentry
->d_fsdata
;
318 const struct cred
*oldcred
;
320 oldcred
= shiftfs_override_creds(dentry
->d_sb
);
321 err
= vfs_setxattr(&init_user_ns
, lowerd
, name
, value
, size
, flags
);
322 revert_creds(oldcred
);
324 shiftfs_copyattr(lowerd
->d_inode
, inode
);
329 static int shiftfs_xattr_get(const struct xattr_handler
*handler
,
330 struct dentry
*dentry
, struct inode
*inode
,
331 const char *name
, void *value
, size_t size
)
333 struct dentry
*lowerd
= dentry
->d_fsdata
;
335 const struct cred
*oldcred
;
337 oldcred
= shiftfs_override_creds(dentry
->d_sb
);
338 err
= vfs_getxattr(&init_user_ns
, lowerd
, name
, value
, size
);
339 revert_creds(oldcred
);
344 static ssize_t
shiftfs_listxattr(struct dentry
*dentry
, char *list
,
347 struct dentry
*lowerd
= dentry
->d_fsdata
;
349 const struct cred
*oldcred
;
351 oldcred
= shiftfs_override_creds(dentry
->d_sb
);
352 err
= vfs_listxattr(lowerd
, list
, size
);
353 revert_creds(oldcred
);
358 static int shiftfs_removexattr(struct user_namespace
*ns
,
359 struct dentry
*dentry
, const char *name
)
361 struct dentry
*lowerd
= dentry
->d_fsdata
;
363 const struct cred
*oldcred
;
365 oldcred
= shiftfs_override_creds(dentry
->d_sb
);
366 err
= vfs_removexattr(&init_user_ns
, lowerd
, name
);
367 revert_creds(oldcred
);
370 shiftfs_copyattr(lowerd
->d_inode
, d_inode(dentry
));
375 static int shiftfs_xattr_set(const struct xattr_handler
*handler
,
376 struct user_namespace
*ns
,
377 struct dentry
*dentry
, struct inode
*inode
,
378 const char *name
, const void *value
, size_t size
,
382 return shiftfs_removexattr(&init_user_ns
, dentry
, name
);
383 return shiftfs_setxattr(dentry
, inode
, name
, value
, size
, flags
);
386 static int shiftfs_inode_test(struct inode
*inode
, void *data
)
388 return inode
->i_private
== data
;
391 static int shiftfs_inode_set(struct inode
*inode
, void *data
)
393 inode
->i_private
= data
;
397 static int shiftfs_create_object(struct inode
*diri
, struct dentry
*dentry
,
398 umode_t mode
, const char *symlink
,
399 struct dentry
*hardlink
, bool excl
)
402 const struct cred
*oldcred
;
403 struct cred
*newcred
;
404 void *loweri_iop_ptr
= NULL
;
405 umode_t modei
= mode
;
406 struct super_block
*dir_sb
= diri
->i_sb
;
407 struct dentry
*lowerd_new
= dentry
->d_fsdata
;
408 struct inode
*inode
= NULL
, *loweri_dir
= diri
->i_private
;
409 const struct inode_operations
*loweri_dir_iop
= loweri_dir
->i_op
;
410 struct dentry
*lowerd_link
= NULL
;
413 loweri_iop_ptr
= loweri_dir_iop
->link
;
415 switch (mode
& S_IFMT
) {
417 loweri_iop_ptr
= loweri_dir_iop
->mkdir
;
420 loweri_iop_ptr
= loweri_dir_iop
->create
;
423 loweri_iop_ptr
= loweri_dir_iop
->symlink
;
428 loweri_iop_ptr
= loweri_dir_iop
->mknod
;
432 if (!loweri_iop_ptr
) {
437 inode_lock_nested(loweri_dir
, I_MUTEX_PARENT
);
440 inode
= new_inode(dir_sb
);
447 * new_inode() will have added the new inode to the super
448 * block's list of inodes. Further below we will call
449 * inode_insert5() Which would perform the same operation again
450 * thereby corrupting the list. To avoid this raise I_CREATING
451 * in i_state which will cause inode_insert5() to skip this
452 * step. I_CREATING will be cleared by d_instantiate_new()
455 spin_lock(&inode
->i_lock
);
456 inode
->i_state
|= I_CREATING
;
457 spin_unlock(&inode
->i_lock
);
459 inode_init_owner(&init_user_ns
, inode
, diri
, mode
);
460 modei
= inode
->i_mode
;
463 err
= shiftfs_override_object_creds(dentry
->d_sb
, &oldcred
, &newcred
,
464 dentry
, modei
, hardlink
!= NULL
);
469 lowerd_link
= hardlink
->d_fsdata
;
470 err
= vfs_link(lowerd_link
, &init_user_ns
, loweri_dir
, lowerd_new
, NULL
);
472 switch (modei
& S_IFMT
) {
474 err
= vfs_mkdir(&init_user_ns
, loweri_dir
, lowerd_new
, modei
);
477 err
= vfs_create(&init_user_ns
, loweri_dir
, lowerd_new
, modei
, excl
);
480 err
= vfs_symlink(&init_user_ns
, loweri_dir
, lowerd_new
, symlink
);
485 err
= vfs_mknod(&init_user_ns
, loweri_dir
, lowerd_new
, modei
, 0);
493 shiftfs_revert_object_creds(oldcred
, newcred
);
495 if (!err
&& WARN_ON(!lowerd_new
->d_inode
))
501 inode
= d_inode(hardlink
);
504 /* copy up times from lower inode */
505 shiftfs_copyattr(d_inode(lowerd_link
), inode
);
506 set_nlink(d_inode(hardlink
), d_inode(lowerd_link
)->i_nlink
);
507 d_instantiate(dentry
, inode
);
509 struct inode
*inode_tmp
;
510 struct inode
*loweri_new
= d_inode(lowerd_new
);
512 inode_tmp
= inode_insert5(inode
, (unsigned long)loweri_new
,
513 shiftfs_inode_test
, shiftfs_inode_set
,
515 if (unlikely(inode_tmp
!= inode
)) {
516 pr_err_ratelimited("shiftfs: newly created inode found in cache\n");
523 shiftfs_fill_inode(inode
, loweri_new
->i_ino
, loweri_new
->i_mode
,
525 d_instantiate_new(dentry
, inode
);
528 shiftfs_copyattr(loweri_dir
, diri
);
529 if (loweri_iop_ptr
== loweri_dir_iop
->mkdir
)
530 set_nlink(diri
, loweri_dir
->i_nlink
);
536 inode_unlock(loweri_dir
);
541 static int shiftfs_create(struct user_namespace
*ns
,
542 struct inode
*dir
, struct dentry
*dentry
,
543 umode_t mode
, bool excl
)
547 return shiftfs_create_object(dir
, dentry
, mode
, NULL
, NULL
, excl
);
550 static int shiftfs_mkdir(struct user_namespace
*ns
, struct inode
*dir
, struct dentry
*dentry
,
555 return shiftfs_create_object(dir
, dentry
, mode
, NULL
, NULL
, false);
558 static int shiftfs_link(struct dentry
*hardlink
, struct inode
*dir
,
559 struct dentry
*dentry
)
561 return shiftfs_create_object(dir
, dentry
, 0, NULL
, hardlink
, false);
564 static int shiftfs_mknod(struct user_namespace
*ns
,
565 struct inode
*dir
, struct dentry
*dentry
, umode_t mode
,
568 if (!S_ISFIFO(mode
) && !S_ISSOCK(mode
))
571 return shiftfs_create_object(dir
, dentry
, mode
, NULL
, NULL
, false);
574 static int shiftfs_symlink(struct user_namespace
*ns
, struct inode
*dir
, struct dentry
*dentry
,
577 return shiftfs_create_object(dir
, dentry
, S_IFLNK
, symlink
, NULL
, false);
580 static int shiftfs_rm(struct inode
*dir
, struct dentry
*dentry
, bool rmdir
)
582 struct dentry
*lowerd
= dentry
->d_fsdata
;
583 struct inode
*loweri
= dir
->i_private
;
584 struct inode
*inode
= d_inode(dentry
);
586 const struct cred
*oldcred
;
589 oldcred
= shiftfs_override_creds(dentry
->d_sb
);
590 inode_lock_nested(loweri
, I_MUTEX_PARENT
);
592 err
= vfs_rmdir(&init_user_ns
, loweri
, lowerd
);
594 err
= vfs_unlink(&init_user_ns
, loweri
, lowerd
, NULL
);
595 revert_creds(oldcred
);
605 inode_unlock(loweri
);
607 shiftfs_copyattr(loweri
, dir
);
613 static int shiftfs_unlink(struct inode
*dir
, struct dentry
*dentry
)
615 return shiftfs_rm(dir
, dentry
, false);
618 static int shiftfs_rmdir(struct inode
*dir
, struct dentry
*dentry
)
620 return shiftfs_rm(dir
, dentry
, true);
623 static int shiftfs_rename(struct user_namespace
*ns
,
624 struct inode
*olddir
, struct dentry
*old
,
625 struct inode
*newdir
, struct dentry
*new,
628 struct dentry
*lowerd_dir_old
= old
->d_parent
->d_fsdata
,
629 *lowerd_dir_new
= new->d_parent
->d_fsdata
,
630 *lowerd_old
= old
->d_fsdata
, *lowerd_new
= new->d_fsdata
,
632 struct inode
*loweri_dir_old
= lowerd_dir_old
->d_inode
,
633 *loweri_dir_new
= lowerd_dir_new
->d_inode
;
634 struct renamedata rd
= {
635 .old_mnt_userns
= &init_user_ns
,
636 .old_dir
= loweri_dir_old
,
637 .old_dentry
= lowerd_old
,
638 .new_mnt_userns
= &init_user_ns
,
639 .new_dir
= loweri_dir_new
,
640 .new_dentry
= lowerd_new
,
643 const struct cred
*oldcred
;
645 trapd
= lock_rename(lowerd_dir_new
, lowerd_dir_old
);
647 if (trapd
== lowerd_old
|| trapd
== lowerd_new
)
650 oldcred
= shiftfs_override_creds(old
->d_sb
);
651 err
= vfs_rename(&rd
);
652 revert_creds(oldcred
);
654 shiftfs_copyattr(loweri_dir_old
, olddir
);
655 shiftfs_copyattr(loweri_dir_new
, newdir
);
658 unlock_rename(lowerd_dir_new
, lowerd_dir_old
);
663 static struct dentry
*shiftfs_lookup(struct inode
*dir
, struct dentry
*dentry
,
668 const struct cred
*oldcred
;
669 struct dentry
*lowerd
= dentry
->d_parent
->d_fsdata
;
670 struct inode
*inode
= NULL
, *loweri
= lowerd
->d_inode
;
673 oldcred
= shiftfs_override_creds(dentry
->d_sb
);
674 new = lookup_one_len(dentry
->d_name
.name
, lowerd
, dentry
->d_name
.len
);
675 revert_creds(oldcred
);
676 inode_unlock(loweri
);
681 dentry
->d_fsdata
= new;
687 inode
= iget5_locked(dentry
->d_sb
, (unsigned long)newi
,
688 shiftfs_inode_test
, shiftfs_inode_set
, newi
);
691 return ERR_PTR(-ENOMEM
);
693 if (inode
->i_state
& I_NEW
) {
695 * inode->i_private set by shiftfs_inode_set(), but we still
696 * need to take a reference
699 shiftfs_fill_inode(inode
, newi
->i_ino
, newi
->i_mode
, 0, new);
700 unlock_new_inode(inode
);
704 return d_splice_alias(inode
, dentry
);
707 static int shiftfs_permission(struct user_namespace
*ns
, struct inode
*inode
, int mask
)
710 const struct cred
*oldcred
;
711 struct inode
*loweri
= inode
->i_private
;
714 WARN_ON(!(mask
& MAY_NOT_BLOCK
));
718 err
= generic_permission(&init_user_ns
, inode
, mask
);
722 oldcred
= shiftfs_override_creds(inode
->i_sb
);
723 err
= inode_permission(&init_user_ns
, loweri
, mask
);
724 revert_creds(oldcred
);
729 static int shiftfs_fiemap(struct inode
*inode
,
730 struct fiemap_extent_info
*fieinfo
, u64 start
,
734 const struct cred
*oldcred
;
735 struct inode
*loweri
= inode
->i_private
;
737 if (!loweri
->i_op
->fiemap
)
740 oldcred
= shiftfs_override_creds(inode
->i_sb
);
741 if (fieinfo
->fi_flags
& FIEMAP_FLAG_SYNC
)
742 filemap_write_and_wait(loweri
->i_mapping
);
743 err
= loweri
->i_op
->fiemap(loweri
, fieinfo
, start
, len
);
744 revert_creds(oldcred
);
749 static int shiftfs_tmpfile(struct user_namespace
*ns
,
750 struct inode
*dir
, struct file
*file
,
754 const struct cred
*oldcred
;
755 struct inode
*loweri
= dir
->i_private
;
757 if (!loweri
->i_op
->tmpfile
)
760 oldcred
= shiftfs_override_creds(dir
->i_sb
);
761 err
= loweri
->i_op
->tmpfile(&init_user_ns
, loweri
, file
, mode
);
762 revert_creds(oldcred
);
767 static int shiftfs_setattr(struct user_namespace
*ns
, struct dentry
*dentry
, struct iattr
*attr
)
769 struct dentry
*lowerd
= dentry
->d_fsdata
;
770 struct inode
*loweri
= lowerd
->d_inode
;
771 struct iattr newattr
;
772 const struct cred
*oldcred
;
773 struct super_block
*sb
= dentry
->d_sb
;
774 struct shiftfs_super_info
*sbinfo
= sb
->s_fs_info
;
777 err
= setattr_prepare(&init_user_ns
, dentry
, attr
);
782 newattr
.ia_uid
= shift_kuid(sb
->s_user_ns
, sbinfo
->userns
, attr
->ia_uid
);
783 newattr
.ia_gid
= shift_kgid(sb
->s_user_ns
, sbinfo
->userns
, attr
->ia_gid
);
786 * mode change is for clearing setuid/setgid bits. Allow lower fs
787 * to interpret this in its own way.
789 if (newattr
.ia_valid
& (ATTR_KILL_SUID
|ATTR_KILL_SGID
))
790 newattr
.ia_valid
&= ~ATTR_MODE
;
793 oldcred
= shiftfs_override_creds(dentry
->d_sb
);
794 err
= notify_change(&init_user_ns
, lowerd
, &newattr
, NULL
);
795 revert_creds(oldcred
);
796 inode_unlock(loweri
);
798 shiftfs_copyattr(loweri
, d_inode(dentry
));
803 static int shiftfs_getattr(struct user_namespace
*ns
,
804 const struct path
*path
, struct kstat
*stat
,
805 u32 request_mask
, unsigned int query_flags
)
807 struct inode
*inode
= path
->dentry
->d_inode
;
808 struct dentry
*lowerd
= path
->dentry
->d_fsdata
;
809 struct inode
*loweri
= lowerd
->d_inode
;
810 struct shiftfs_super_info
*info
= path
->dentry
->d_sb
->s_fs_info
;
811 struct path newpath
= { .mnt
= info
->mnt
, .dentry
= lowerd
};
812 struct user_namespace
*from_ns
= loweri
->i_sb
->s_user_ns
;
813 struct user_namespace
*to_ns
= inode
->i_sb
->s_user_ns
;
814 const struct cred
*oldcred
;
817 oldcred
= shiftfs_override_creds(inode
->i_sb
);
818 err
= vfs_getattr(&newpath
, stat
, request_mask
, query_flags
);
819 revert_creds(oldcred
);
824 /* transform the underlying id */
825 stat
->uid
= shift_kuid(from_ns
, to_ns
, stat
->uid
);
826 stat
->gid
= shift_kgid(from_ns
, to_ns
, stat
->gid
);
830 #ifdef CONFIG_SHIFT_FS_POSIX_ACL
833 shift_acl_ids(struct user_namespace
*from
, struct user_namespace
*to
,
834 struct posix_acl
*acl
)
838 for (i
= 0; i
< acl
->a_count
; i
++) {
839 struct posix_acl_entry
*e
= &acl
->a_entries
[i
];
842 e
->e_uid
= shift_kuid(from
, to
, e
->e_uid
);
843 if (!uid_valid(e
->e_uid
))
847 e
->e_gid
= shift_kgid(from
, to
, e
->e_gid
);
848 if (!gid_valid(e
->e_gid
))
857 shift_acl_xattr_ids(struct user_namespace
*from
, struct user_namespace
*to
,
858 void *value
, size_t size
)
860 struct posix_acl_xattr_header
*header
= value
;
861 struct posix_acl_xattr_entry
*entry
= (void *)(header
+ 1), *end
;
868 if (size
< sizeof(struct posix_acl_xattr_header
))
870 if (header
->a_version
!= cpu_to_le32(POSIX_ACL_XATTR_VERSION
))
873 count
= posix_acl_xattr_count(size
);
879 for (end
= entry
+ count
; entry
!= end
; entry
++) {
880 switch(le16_to_cpu(entry
->e_tag
)) {
882 kuid
= make_kuid(&init_user_ns
, le32_to_cpu(entry
->e_id
));
883 kuid
= shift_kuid(from
, to
, kuid
);
884 entry
->e_id
= cpu_to_le32(from_kuid(&init_user_ns
, kuid
));
887 kgid
= make_kgid(from
, le32_to_cpu(entry
->e_id
));
888 kgid
= shift_kgid(from
, to
, kgid
);
889 entry
->e_id
= cpu_to_le32(from_kgid(from
, kgid
));
897 static struct posix_acl
*
898 shiftfs_get_acl(struct inode
*inode
, int type
, bool rcu
)
900 struct inode
*loweri
= inode
->i_private
;
901 const struct cred
*oldcred
;
902 struct posix_acl
*lower_acl
, *acl
= NULL
;
903 struct user_namespace
*from_ns
= loweri
->i_sb
->s_user_ns
;
904 struct user_namespace
*to_ns
= inode
->i_sb
->s_user_ns
;
909 return ERR_PTR(-ECHILD
);
911 if (!IS_POSIXACL(loweri
))
914 oldcred
= shiftfs_override_creds(inode
->i_sb
);
915 lower_acl
= get_inode_acl(loweri
, type
);
916 revert_creds(oldcred
);
918 if (lower_acl
&& !IS_ERR(lower_acl
)) {
919 /* XXX: export posix_acl_clone? */
920 size
= sizeof(struct posix_acl
) +
921 lower_acl
->a_count
* sizeof(struct posix_acl_entry
);
922 acl
= kmemdup(lower_acl
, size
, GFP_KERNEL
);
923 posix_acl_release(lower_acl
);
926 return ERR_PTR(-ENOMEM
);
928 refcount_set(&acl
->a_refcount
, 1);
930 err
= shift_acl_ids(from_ns
, to_ns
, acl
);
941 shiftfs_posix_acl_xattr_get(const struct xattr_handler
*handler
,
942 struct dentry
*dentry
, struct inode
*inode
,
943 const char *name
, void *buffer
, size_t size
)
945 struct inode
*loweri
= inode
->i_private
;
948 ret
= shiftfs_xattr_get(NULL
, dentry
, inode
, handler
->name
,
954 shift_acl_xattr_ids(loweri
->i_sb
->s_user_ns
, inode
->i_sb
->s_user_ns
,
956 inode_unlock(loweri
);
961 shiftfs_posix_acl_xattr_set(const struct xattr_handler
*handler
,
962 struct user_namespace
*ns
,
963 struct dentry
*dentry
, struct inode
*inode
,
964 const char *name
, const void *value
,
965 size_t size
, int flags
)
967 struct inode
*loweri
= inode
->i_private
;
970 if (!IS_POSIXACL(loweri
) || !loweri
->i_op
->set_acl
)
972 if (handler
->flags
== ACL_TYPE_DEFAULT
&& !S_ISDIR(inode
->i_mode
))
973 return value
? -EACCES
: 0;
974 if (!inode_owner_or_capable(&init_user_ns
, inode
))
978 shift_acl_xattr_ids(inode
->i_sb
->s_user_ns
,
979 loweri
->i_sb
->s_user_ns
,
980 (void *)value
, size
);
981 err
= shiftfs_setxattr(dentry
, inode
, handler
->name
, value
,
984 err
= shiftfs_removexattr(&init_user_ns
, dentry
, handler
->name
);
988 shiftfs_copyattr(loweri
, inode
);
993 static const struct xattr_handler
994 shiftfs_posix_acl_access_xattr_handler
= {
995 .name
= XATTR_NAME_POSIX_ACL_ACCESS
,
996 .flags
= ACL_TYPE_ACCESS
,
997 .get
= shiftfs_posix_acl_xattr_get
,
998 .set
= shiftfs_posix_acl_xattr_set
,
1001 static const struct xattr_handler
1002 shiftfs_posix_acl_default_xattr_handler
= {
1003 .name
= XATTR_NAME_POSIX_ACL_DEFAULT
,
1004 .flags
= ACL_TYPE_DEFAULT
,
1005 .get
= shiftfs_posix_acl_xattr_get
,
1006 .set
= shiftfs_posix_acl_xattr_set
,
1009 #else /* !CONFIG_SHIFT_FS_POSIX_ACL */
1011 #define shiftfs_get_acl NULL
1013 #endif /* CONFIG_SHIFT_FS_POSIX_ACL */
1015 static const struct inode_operations shiftfs_dir_inode_operations
= {
1016 .lookup
= shiftfs_lookup
,
1017 .mkdir
= shiftfs_mkdir
,
1018 .symlink
= shiftfs_symlink
,
1019 .unlink
= shiftfs_unlink
,
1020 .rmdir
= shiftfs_rmdir
,
1021 .rename
= shiftfs_rename
,
1022 .link
= shiftfs_link
,
1023 .setattr
= shiftfs_setattr
,
1024 .create
= shiftfs_create
,
1025 .mknod
= shiftfs_mknod
,
1026 .permission
= shiftfs_permission
,
1027 .getattr
= shiftfs_getattr
,
1028 .listxattr
= shiftfs_listxattr
,
1029 .get_inode_acl
= shiftfs_get_acl
,
1032 static const struct inode_operations shiftfs_file_inode_operations
= {
1033 .fiemap
= shiftfs_fiemap
,
1034 .getattr
= shiftfs_getattr
,
1035 .get_inode_acl
= shiftfs_get_acl
,
1036 .listxattr
= shiftfs_listxattr
,
1037 .permission
= shiftfs_permission
,
1038 .setattr
= shiftfs_setattr
,
1039 .tmpfile
= shiftfs_tmpfile
,
1042 static const struct inode_operations shiftfs_special_inode_operations
= {
1043 .getattr
= shiftfs_getattr
,
1044 .get_inode_acl
= shiftfs_get_acl
,
1045 .listxattr
= shiftfs_listxattr
,
1046 .permission
= shiftfs_permission
,
1047 .setattr
= shiftfs_setattr
,
1050 static const struct inode_operations shiftfs_symlink_inode_operations
= {
1051 .getattr
= shiftfs_getattr
,
1052 .get_link
= shiftfs_get_link
,
1053 .listxattr
= shiftfs_listxattr
,
1054 .setattr
= shiftfs_setattr
,
1057 static struct file
*shiftfs_open_realfile(const struct file
*file
,
1058 struct inode
*realinode
)
1060 struct file
*realfile
;
1061 const struct cred
*old_cred
;
1062 struct inode
*inode
= file_inode(file
);
1063 struct dentry
*lowerd
= file
->f_path
.dentry
->d_fsdata
;
1064 struct shiftfs_super_info
*info
= inode
->i_sb
->s_fs_info
;
1065 struct path realpath
= { .mnt
= info
->mnt
, .dentry
= lowerd
};
1067 old_cred
= shiftfs_override_creds(inode
->i_sb
);
1068 realfile
= open_with_fake_path(&realpath
, file
->f_flags
, realinode
,
1069 info
->creator_cred
);
1070 revert_creds(old_cred
);
1075 #define SHIFTFS_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
1077 static int shiftfs_change_flags(struct file
*file
, unsigned int flags
)
1079 struct inode
*inode
= file_inode(file
);
1082 /* if some flag changed that cannot be changed then something's amiss */
1083 if (WARN_ON((file
->f_flags
^ flags
) & ~SHIFTFS_SETFL_MASK
))
1086 flags
&= SHIFTFS_SETFL_MASK
;
1088 if (((flags
^ file
->f_flags
) & O_APPEND
) && IS_APPEND(inode
))
1091 if (flags
& O_DIRECT
) {
1092 if (!file
->f_mapping
->a_ops
||
1093 !file
->f_mapping
->a_ops
->direct_IO
)
1097 if (file
->f_op
->check_flags
) {
1098 err
= file
->f_op
->check_flags(flags
);
1103 spin_lock(&file
->f_lock
);
1104 file
->f_flags
= (file
->f_flags
& ~SHIFTFS_SETFL_MASK
) | flags
;
1105 spin_unlock(&file
->f_lock
);
1110 static int shiftfs_open(struct inode
*inode
, struct file
*file
)
1112 struct file
*realfile
;
1114 realfile
= shiftfs_open_realfile(file
, inode
->i_private
);
1115 if (IS_ERR(realfile
))
1116 return PTR_ERR(realfile
);
1118 file
->private_data
= realfile
;
1119 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO. */
1120 file
->f_mapping
= realfile
->f_mapping
;
1125 static int shiftfs_dir_open(struct inode
*inode
, struct file
*file
)
1127 struct file
*realfile
;
1128 const struct cred
*oldcred
;
1129 struct dentry
*lowerd
= file
->f_path
.dentry
->d_fsdata
;
1130 struct shiftfs_super_info
*info
= inode
->i_sb
->s_fs_info
;
1131 struct path realpath
= { .mnt
= info
->mnt
, .dentry
= lowerd
};
1133 oldcred
= shiftfs_override_creds(file
->f_path
.dentry
->d_sb
);
1134 realfile
= dentry_open(&realpath
, file
->f_flags
| O_NOATIME
,
1135 info
->creator_cred
);
1136 revert_creds(oldcred
);
1137 if (IS_ERR(realfile
))
1138 return PTR_ERR(realfile
);
1140 file
->private_data
= realfile
;
1145 static int shiftfs_release(struct inode
*inode
, struct file
*file
)
1147 struct file
*realfile
= file
->private_data
;
1155 static int shiftfs_dir_release(struct inode
*inode
, struct file
*file
)
1157 return shiftfs_release(inode
, file
);
1160 static loff_t
shiftfs_dir_llseek(struct file
*file
, loff_t offset
, int whence
)
1162 struct file
*realfile
= file
->private_data
;
1164 return vfs_llseek(realfile
, offset
, whence
);
1167 static loff_t
shiftfs_file_llseek(struct file
*file
, loff_t offset
, int whence
)
1169 struct inode
*realinode
= file_inode(file
)->i_private
;
1171 return generic_file_llseek_size(file
, offset
, whence
,
1172 realinode
->i_sb
->s_maxbytes
,
1173 i_size_read(realinode
));
1176 /* XXX: Need to figure out what to to about atime updates, maybe other
1177 * timestamps too ... ref. ovl_file_accessed() */
1179 static rwf_t
shiftfs_iocb_to_rwf(struct kiocb
*iocb
)
1181 int ifl
= iocb
->ki_flags
;
1184 if (ifl
& IOCB_NOWAIT
)
1185 flags
|= RWF_NOWAIT
;
1186 if (ifl
& IOCB_HIPRI
)
1188 if (ifl
& IOCB_DSYNC
)
1190 if (ifl
& IOCB_SYNC
)
1196 static int shiftfs_real_fdget(const struct file
*file
, struct fd
*lowerfd
)
1198 struct file
*realfile
;
1200 if (file
->f_op
->open
!= shiftfs_open
&&
1201 file
->f_op
->open
!= shiftfs_dir_open
)
1204 realfile
= file
->private_data
;
1206 lowerfd
->file
= realfile
;
1208 /* Did the flags change since open? */
1209 if (unlikely(file
->f_flags
& ~lowerfd
->file
->f_flags
))
1210 return shiftfs_change_flags(lowerfd
->file
, file
->f_flags
);
1215 static ssize_t
shiftfs_read_iter(struct kiocb
*iocb
, struct iov_iter
*iter
)
1217 struct file
*file
= iocb
->ki_filp
;
1219 const struct cred
*oldcred
;
1222 if (!iov_iter_count(iter
))
1225 ret
= shiftfs_real_fdget(file
, &lowerfd
);
1229 oldcred
= shiftfs_override_creds(file
->f_path
.dentry
->d_sb
);
1230 ret
= vfs_iter_read(lowerfd
.file
, iter
, &iocb
->ki_pos
,
1231 shiftfs_iocb_to_rwf(iocb
));
1232 revert_creds(oldcred
);
1234 shiftfs_file_accessed(file
);
1240 static ssize_t
shiftfs_write_iter(struct kiocb
*iocb
, struct iov_iter
*iter
)
1242 struct file
*file
= iocb
->ki_filp
;
1243 struct inode
*inode
= file_inode(file
);
1245 const struct cred
*oldcred
;
1248 if (!iov_iter_count(iter
))
1253 shiftfs_copyattr(inode
->i_private
, inode
);
1254 ret
= file_remove_privs(file
);
1258 ret
= shiftfs_real_fdget(file
, &lowerfd
);
1262 oldcred
= shiftfs_override_creds(file
->f_path
.dentry
->d_sb
);
1263 file_start_write(lowerfd
.file
);
1264 ret
= vfs_iter_write(lowerfd
.file
, iter
, &iocb
->ki_pos
,
1265 shiftfs_iocb_to_rwf(iocb
));
1266 file_end_write(lowerfd
.file
);
1267 revert_creds(oldcred
);
1270 shiftfs_copyattr(inode
->i_private
, inode
);
1275 inode_unlock(inode
);
1279 static int shiftfs_fsync(struct file
*file
, loff_t start
, loff_t end
,
1283 const struct cred
*oldcred
;
1286 ret
= shiftfs_real_fdget(file
, &lowerfd
);
1290 oldcred
= shiftfs_override_creds(file
->f_path
.dentry
->d_sb
);
1291 ret
= vfs_fsync_range(lowerfd
.file
, start
, end
, datasync
);
1292 revert_creds(oldcred
);
1298 static int shiftfs_mmap(struct file
*file
, struct vm_area_struct
*vma
)
1300 struct file
*realfile
= file
->private_data
;
1301 const struct cred
*oldcred
;
1304 if (!realfile
->f_op
->mmap
)
1307 if (WARN_ON(file
!= vma
->vm_file
))
1310 oldcred
= shiftfs_override_creds(file
->f_path
.dentry
->d_sb
);
1311 vma
->vm_file
= get_file(realfile
);
1312 ret
= call_mmap(vma
->vm_file
, vma
);
1313 revert_creds(oldcred
);
1315 shiftfs_file_accessed(file
);
1319 * Drop refcount from new vm_file value and restore original
1322 vma
->vm_file
= file
;
1325 /* Drop refcount from previous vm_file value */
1332 static long shiftfs_fallocate(struct file
*file
, int mode
, loff_t offset
,
1335 struct inode
*inode
= file_inode(file
);
1336 struct inode
*loweri
= inode
->i_private
;
1338 const struct cred
*oldcred
;
1341 ret
= shiftfs_real_fdget(file
, &lowerfd
);
1345 oldcred
= shiftfs_override_creds(file
->f_path
.dentry
->d_sb
);
1346 ret
= vfs_fallocate(lowerfd
.file
, mode
, offset
, len
);
1347 revert_creds(oldcred
);
1350 shiftfs_copyattr(loweri
, inode
);
1356 static int shiftfs_fadvise(struct file
*file
, loff_t offset
, loff_t len
,
1360 const struct cred
*oldcred
;
1363 ret
= shiftfs_real_fdget(file
, &lowerfd
);
1367 oldcred
= shiftfs_override_creds(file
->f_path
.dentry
->d_sb
);
1368 ret
= vfs_fadvise(lowerfd
.file
, offset
, len
, advice
);
1369 revert_creds(oldcred
);
1375 static int shiftfs_override_ioctl_creds(int cmd
, const struct super_block
*sb
,
1376 const struct cred
**oldcred
,
1377 struct cred
**newcred
)
1379 struct shiftfs_super_info
*sbinfo
= sb
->s_fs_info
;
1380 kuid_t fsuid
= current_fsuid();
1381 kgid_t fsgid
= current_fsgid();
1383 *oldcred
= shiftfs_override_creds(sb
);
1385 *newcred
= prepare_creds();
1387 revert_creds(*oldcred
);
1391 (*newcred
)->fsuid
= shift_kuid(sb
->s_user_ns
, sbinfo
->userns
, fsuid
);
1392 (*newcred
)->fsgid
= shift_kgid(sb
->s_user_ns
, sbinfo
->userns
, fsgid
);
1394 /* clear all caps to prevent bypassing capable() checks */
1395 cap_clear((*newcred
)->cap_bset
);
1396 cap_clear((*newcred
)->cap_effective
);
1397 cap_clear((*newcred
)->cap_inheritable
);
1398 cap_clear((*newcred
)->cap_permitted
);
1400 if (cmd
== BTRFS_IOC_SNAP_DESTROY
) {
1401 kuid_t kuid_root
= make_kuid(sb
->s_user_ns
, 0);
1403 * Allow the root user in the container to remove subvolumes
1406 if (uid_valid(kuid_root
) && uid_eq(fsuid
, kuid_root
))
1407 cap_raise((*newcred
)->cap_effective
, CAP_DAC_OVERRIDE
);
1410 put_cred(override_creds(*newcred
));
1414 static inline void shiftfs_revert_ioctl_creds(const struct cred
*oldcred
,
1415 struct cred
*newcred
)
1417 return shiftfs_revert_object_creds(oldcred
, newcred
);
1420 static inline bool is_btrfs_snap_ioctl(int cmd
)
1422 if ((cmd
== BTRFS_IOC_SNAP_CREATE
) || (cmd
== BTRFS_IOC_SNAP_CREATE_V2
))
1428 static int shiftfs_btrfs_ioctl_fd_restore(int cmd
, int fd
, void __user
*arg
,
1429 struct btrfs_ioctl_vol_args
*v1
,
1430 struct btrfs_ioctl_vol_args_v2
*v2
)
1434 if (!is_btrfs_snap_ioctl(cmd
))
1437 if (cmd
== BTRFS_IOC_SNAP_CREATE
)
1438 ret
= copy_to_user(arg
, v1
, sizeof(*v1
));
1440 ret
= copy_to_user(arg
, v2
, sizeof(*v2
));
1446 return ret
? -EFAULT
: 0;
1449 static int shiftfs_btrfs_ioctl_fd_replace(int cmd
, void __user
*arg
,
1450 struct btrfs_ioctl_vol_args
**b1
,
1451 struct btrfs_ioctl_vol_args_v2
**b2
,
1457 struct btrfs_ioctl_vol_args
*v1
= NULL
;
1458 struct btrfs_ioctl_vol_args_v2
*v2
= NULL
;
1463 if (!is_btrfs_snap_ioctl(cmd
))
1466 if (cmd
== BTRFS_IOC_SNAP_CREATE
) {
1467 v1
= memdup_user(arg
, sizeof(*v1
));
1472 v2
= memdup_user(arg
, sizeof(*v2
));
1484 ret
= shiftfs_real_fdget(src
.file
, &lfd
);
1491 * shiftfs_real_fdget() does not take a reference to lfd.file, so
1492 * take a reference here to offset the one which will be put by
1493 * close_fd(), and make sure that reference is put on fdput(lfd).
1496 lfd
.flags
|= FDPUT_FPUT
;
1499 *newfd
= get_unused_fd_flags(lfd
.file
->f_flags
);
1506 fd_install(*newfd
, lfd
.file
);
1508 if (cmd
== BTRFS_IOC_SNAP_CREATE
) {
1510 ret
= copy_to_user(arg
, v1
, sizeof(*v1
));
1514 ret
= copy_to_user(arg
, v2
, sizeof(*v2
));
1522 shiftfs_btrfs_ioctl_fd_restore(cmd
, *newfd
, arg
, v1
, v2
);
1535 static long shiftfs_real_ioctl(struct file
*file
, unsigned int cmd
,
1539 struct cred
*newcred
;
1540 const struct cred
*oldcred
;
1542 long err
= 0, ret
= 0;
1543 void __user
*argp
= (void __user
*)arg
;
1544 struct super_block
*sb
= file
->f_path
.dentry
->d_sb
;
1545 struct btrfs_ioctl_vol_args
*btrfs_v1
= NULL
;
1546 struct btrfs_ioctl_vol_args_v2
*btrfs_v2
= NULL
;
1548 ret
= shiftfs_btrfs_ioctl_fd_replace(cmd
, argp
, &btrfs_v1
, &btrfs_v2
,
1553 ret
= shiftfs_real_fdget(file
, &lowerfd
);
1557 ret
= shiftfs_override_ioctl_creds(cmd
, sb
, &oldcred
, &newcred
);
1561 ret
= vfs_ioctl(lowerfd
.file
, cmd
, arg
);
1563 shiftfs_revert_ioctl_creds(oldcred
, newcred
);
1565 shiftfs_copyattr(file_inode(lowerfd
.file
), file_inode(file
));
1566 shiftfs_copyflags(file_inode(lowerfd
.file
), file_inode(file
));
1572 err
= shiftfs_btrfs_ioctl_fd_restore(cmd
, newfd
, argp
,
1573 btrfs_v1
, btrfs_v2
);
1580 static bool in_ioctl_whitelist(int flag
, unsigned long arg
)
1582 void __user
*argp
= (void __user
*)arg
;
1586 case BTRFS_IOC_FS_INFO
:
1588 case BTRFS_IOC_SNAP_CREATE
:
1590 case BTRFS_IOC_SNAP_CREATE_V2
:
1592 case BTRFS_IOC_SUBVOL_CREATE
:
1594 case BTRFS_IOC_SUBVOL_CREATE_V2
:
1596 case BTRFS_IOC_SUBVOL_GETFLAGS
:
1598 case BTRFS_IOC_SUBVOL_SETFLAGS
:
1599 if (copy_from_user(&flags
, argp
, sizeof(flags
)))
1602 if (flags
& ~BTRFS_SUBVOL_RDONLY
)
1606 case BTRFS_IOC_SNAP_DESTROY
:
1613 static long shiftfs_ioctl(struct file
*file
, unsigned int cmd
,
1617 case FS_IOC_GETVERSION
:
1619 case FS_IOC_GETFLAGS
:
1621 case FS_IOC_SETFLAGS
:
1624 if (!in_ioctl_whitelist(cmd
, arg
) ||
1625 !shiftfs_passthrough_ioctls(file
->f_path
.dentry
->d_sb
->s_fs_info
))
1629 return shiftfs_real_ioctl(file
, cmd
, arg
);
1632 static long shiftfs_compat_ioctl(struct file
*file
, unsigned int cmd
,
1636 case FS_IOC32_GETVERSION
:
1638 case FS_IOC32_GETFLAGS
:
1640 case FS_IOC32_SETFLAGS
:
1643 if (!in_ioctl_whitelist(cmd
, arg
) ||
1644 !shiftfs_passthrough_ioctls(file
->f_path
.dentry
->d_sb
->s_fs_info
))
1645 return -ENOIOCTLCMD
;
1648 return shiftfs_real_ioctl(file
, cmd
, arg
);
1651 enum shiftfs_copyop
{
1657 static ssize_t
shiftfs_copyfile(struct file
*file_in
, loff_t pos_in
,
1658 struct file
*file_out
, loff_t pos_out
, u64 len
,
1659 unsigned int flags
, enum shiftfs_copyop op
)
1662 struct fd real_in
, real_out
;
1663 const struct cred
*oldcred
;
1664 struct inode
*inode_out
= file_inode(file_out
);
1665 struct inode
*loweri
= inode_out
->i_private
;
1667 ret
= shiftfs_real_fdget(file_out
, &real_out
);
1671 ret
= shiftfs_real_fdget(file_in
, &real_in
);
1677 oldcred
= shiftfs_override_creds(inode_out
->i_sb
);
1680 ret
= vfs_copy_file_range(real_in
.file
, pos_in
, real_out
.file
,
1681 pos_out
, len
, flags
);
1685 ret
= vfs_clone_file_range(real_in
.file
, pos_in
, real_out
.file
,
1686 pos_out
, len
, flags
);
1689 case SHIFTFS_DEDUPE
:
1690 ret
= vfs_dedupe_file_range_one(real_in
.file
, pos_in
,
1691 real_out
.file
, pos_out
, len
,
1695 revert_creds(oldcred
);
1698 shiftfs_copyattr(loweri
, inode_out
);
1706 static ssize_t
shiftfs_copy_file_range(struct file
*file_in
, loff_t pos_in
,
1707 struct file
*file_out
, loff_t pos_out
,
1708 size_t len
, unsigned int flags
)
1710 return shiftfs_copyfile(file_in
, pos_in
, file_out
, pos_out
, len
, flags
,
1714 static loff_t
shiftfs_remap_file_range(struct file
*file_in
, loff_t pos_in
,
1715 struct file
*file_out
, loff_t pos_out
,
1716 loff_t len
, unsigned int remap_flags
)
1718 enum shiftfs_copyop op
;
1720 if (remap_flags
& ~(REMAP_FILE_DEDUP
| REMAP_FILE_ADVISORY
))
1723 if (remap_flags
& REMAP_FILE_DEDUP
)
1724 op
= SHIFTFS_DEDUPE
;
1728 return shiftfs_copyfile(file_in
, pos_in
, file_out
, pos_out
, len
,
1732 static int shiftfs_iterate_shared(struct file
*file
, struct dir_context
*ctx
)
1734 const struct cred
*oldcred
;
1736 struct file
*realfile
= file
->private_data
;
1738 oldcred
= shiftfs_override_creds(file
->f_path
.dentry
->d_sb
);
1739 err
= iterate_dir(realfile
, ctx
);
1740 revert_creds(oldcred
);
1745 const struct file_operations shiftfs_file_operations
= {
1746 .open
= shiftfs_open
,
1747 .release
= shiftfs_release
,
1748 .llseek
= shiftfs_file_llseek
,
1749 .read_iter
= shiftfs_read_iter
,
1750 .write_iter
= shiftfs_write_iter
,
1751 .fsync
= shiftfs_fsync
,
1752 .mmap
= shiftfs_mmap
,
1753 .fallocate
= shiftfs_fallocate
,
1754 .fadvise
= shiftfs_fadvise
,
1755 .unlocked_ioctl
= shiftfs_ioctl
,
1756 .compat_ioctl
= shiftfs_compat_ioctl
,
1757 .copy_file_range
= shiftfs_copy_file_range
,
1758 .remap_file_range
= shiftfs_remap_file_range
,
1759 .splice_read
= generic_file_splice_read
,
1760 .splice_write
= iter_file_splice_write
,
1763 const struct file_operations shiftfs_dir_operations
= {
1764 .open
= shiftfs_dir_open
,
1765 .release
= shiftfs_dir_release
,
1766 .compat_ioctl
= shiftfs_compat_ioctl
,
1767 .fsync
= shiftfs_fsync
,
1768 .iterate_shared
= shiftfs_iterate_shared
,
1769 .llseek
= shiftfs_dir_llseek
,
1770 .read
= generic_read_dir
,
1771 .unlocked_ioctl
= shiftfs_ioctl
,
1774 static const struct address_space_operations shiftfs_aops
= {
1775 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
1776 .direct_IO
= noop_direct_IO
,
1779 static void shiftfs_fill_inode(struct inode
*inode
, unsigned long ino
,
1780 umode_t mode
, dev_t dev
, struct dentry
*dentry
)
1782 struct inode
*loweri
;
1785 inode
->i_flags
|= S_NOCMTIME
;
1788 inode
->i_mode
= mode
;
1789 switch (mode
& S_IFMT
) {
1791 inode
->i_op
= &shiftfs_dir_inode_operations
;
1792 inode
->i_fop
= &shiftfs_dir_operations
;
1795 inode
->i_op
= &shiftfs_symlink_inode_operations
;
1798 inode
->i_op
= &shiftfs_file_inode_operations
;
1799 inode
->i_fop
= &shiftfs_file_operations
;
1800 inode
->i_mapping
->a_ops
= &shiftfs_aops
;
1803 inode
->i_op
= &shiftfs_special_inode_operations
;
1804 init_special_inode(inode
, mode
, dev
);
1811 loweri
= dentry
->d_inode
;
1812 if (!loweri
->i_op
->get_link
)
1813 inode
->i_opflags
|= IOP_NOFOLLOW
;
1815 shiftfs_copyattr(loweri
, inode
);
1816 shiftfs_copyflags(loweri
, inode
);
1817 set_nlink(inode
, loweri
->i_nlink
);
1820 static int shiftfs_show_options(struct seq_file
*m
, struct dentry
*dentry
)
1822 struct super_block
*sb
= dentry
->d_sb
;
1823 struct shiftfs_super_info
*sbinfo
= sb
->s_fs_info
;
1826 seq_show_option(m
, "mark", NULL
);
1828 if (sbinfo
->passthrough
)
1829 seq_printf(m
, ",passthrough=%u", sbinfo
->passthrough
);
1834 static int shiftfs_statfs(struct dentry
*dentry
, struct kstatfs
*buf
)
1836 struct super_block
*sb
= dentry
->d_sb
;
1837 struct shiftfs_super_info
*sbinfo
= sb
->s_fs_info
;
1838 struct dentry
*root
= sb
->s_root
;
1839 struct dentry
*realroot
= root
->d_fsdata
;
1840 struct path realpath
= { .mnt
= sbinfo
->mnt
, .dentry
= realroot
};
1843 err
= vfs_statfs(&realpath
, buf
);
1847 if (!shiftfs_passthrough_statfs(sbinfo
))
1848 buf
->f_type
= sb
->s_magic
;
1853 static void shiftfs_evict_inode(struct inode
*inode
)
1855 struct inode
*loweri
= inode
->i_private
;
1863 static void shiftfs_put_super(struct super_block
*sb
)
1865 struct shiftfs_super_info
*sbinfo
= sb
->s_fs_info
;
1868 mntput(sbinfo
->mnt
);
1869 put_cred(sbinfo
->creator_cred
);
1874 static const struct xattr_handler shiftfs_xattr_handler
= {
1876 .get
= shiftfs_xattr_get
,
1877 .set
= shiftfs_xattr_set
,
1880 const struct xattr_handler
*shiftfs_xattr_handlers
[] = {
1881 #ifdef CONFIG_SHIFT_FS_POSIX_ACL
1882 &shiftfs_posix_acl_access_xattr_handler
,
1883 &shiftfs_posix_acl_default_xattr_handler
,
1885 &shiftfs_xattr_handler
,
1889 static inline bool passthrough_is_subset(int old_flags
, int new_flags
)
1891 if ((new_flags
& old_flags
) != new_flags
)
1897 static int shiftfs_super_check_flags(unsigned long old_flags
,
1898 unsigned long new_flags
)
1900 if ((old_flags
& SB_RDONLY
) && !(new_flags
& SB_RDONLY
))
1903 if ((old_flags
& SB_NOSUID
) && !(new_flags
& SB_NOSUID
))
1906 if ((old_flags
& SB_NODEV
) && !(new_flags
& SB_NODEV
))
1909 if ((old_flags
& SB_NOEXEC
) && !(new_flags
& SB_NOEXEC
))
1912 if ((old_flags
& SB_NOATIME
) && !(new_flags
& SB_NOATIME
))
1915 if ((old_flags
& SB_NODIRATIME
) && !(new_flags
& SB_NODIRATIME
))
1918 if (!(old_flags
& SB_POSIXACL
) && (new_flags
& SB_POSIXACL
))
1924 static int shiftfs_remount(struct super_block
*sb
, int *flags
, char *data
)
1927 struct shiftfs_super_info
new = {};
1928 struct shiftfs_super_info
*info
= sb
->s_fs_info
;
1930 err
= shiftfs_parse_mount_options(&new, data
);
1934 err
= shiftfs_super_check_flags(sb
->s_flags
, *flags
);
1938 /* Mark mount option cannot be changed. */
1939 if (info
->mark
|| (info
->mark
!= new.mark
))
1942 if (info
->passthrough
!= new.passthrough
) {
1943 /* Don't allow exceeding passthrough options of mark mount. */
1944 if (!passthrough_is_subset(info
->passthrough_mark
,
1948 info
->passthrough
= new.passthrough
;
1954 static const struct super_operations shiftfs_super_ops
= {
1955 .put_super
= shiftfs_put_super
,
1956 .show_options
= shiftfs_show_options
,
1957 .statfs
= shiftfs_statfs
,
1958 .remount_fs
= shiftfs_remount
,
1959 .evict_inode
= shiftfs_evict_inode
,
1962 struct shiftfs_data
{
1967 static void shiftfs_super_force_flags(struct super_block
*sb
,
1968 unsigned long lower_flags
)
1970 sb
->s_flags
|= lower_flags
& (SB_RDONLY
| SB_NOSUID
| SB_NODEV
|
1971 SB_NOEXEC
| SB_NOATIME
| SB_NODIRATIME
);
1973 if (!(lower_flags
& SB_POSIXACL
))
1974 sb
->s_flags
&= ~SB_POSIXACL
;
1977 static int shiftfs_fill_super(struct super_block
*sb
, void *raw_data
,
1981 struct path path
= {};
1982 struct shiftfs_super_info
*sbinfo_mp
;
1984 struct inode
*inode
= NULL
;
1985 struct dentry
*dentry
= NULL
;
1986 struct shiftfs_data
*data
= raw_data
;
1987 struct shiftfs_super_info
*sbinfo
= NULL
;
1992 sb
->s_fs_info
= kzalloc(sizeof(*sbinfo
), GFP_KERNEL
);
1995 sbinfo
= sb
->s_fs_info
;
1997 err
= shiftfs_parse_mount_options(sbinfo
, data
->data
);
2001 /* to mount a mark, must be userns admin */
2002 if (!sbinfo
->mark
&& !ns_capable(current_user_ns(), CAP_SYS_ADMIN
))
2005 name
= kstrdup(data
->path
, GFP_KERNEL
);
2009 err
= kern_path(name
, LOOKUP_FOLLOW
, &path
);
2013 if (!S_ISDIR(path
.dentry
->d_inode
->i_mode
)) {
2019 * It makes no sense to handle idmapped layers from shiftfs.
2020 * And we didn't support it properly anyway.
2022 if (is_idmapped_mnt(path
.mnt
)) {
2024 pr_err("idmapped layers are currently not supported\n");
2028 sb
->s_flags
|= SB_POSIXACL
;
2031 struct cred
*cred_tmp
;
2032 struct super_block
*lower_sb
= path
.mnt
->mnt_sb
;
2034 /* to mark a mount point, must root wrt lower s_user_ns */
2035 if (!ns_capable(lower_sb
->s_user_ns
, CAP_SYS_ADMIN
)) {
2041 * this part is visible unshifted, so make sure no
2042 * executables that could be used to give suid
2045 sb
->s_iflags
= SB_I_NOEXEC
;
2047 shiftfs_super_force_flags(sb
, lower_sb
->s_flags
);
2050 * Handle nesting of shiftfs mounts by referring this mark
2051 * mount back to the original mark mount. This is more
2052 * efficient and alleviates concerns about stack depth.
2054 if (lower_sb
->s_magic
== SHIFTFS_MAGIC
) {
2055 sbinfo_mp
= lower_sb
->s_fs_info
;
2057 /* Doesn't make sense to mark a mark mount */
2058 if (sbinfo_mp
->mark
) {
2063 if (!passthrough_is_subset(sbinfo_mp
->passthrough
,
2064 sbinfo
->passthrough
)) {
2069 sbinfo
->mnt
= mntget(sbinfo_mp
->mnt
);
2070 dentry
= dget(path
.dentry
->d_fsdata
);
2072 * Copy up the passthrough mount options from the
2073 * parent mark mountpoint.
2075 sbinfo
->passthrough_mark
= sbinfo_mp
->passthrough_mark
;
2076 sbinfo
->creator_cred
= get_cred(sbinfo_mp
->creator_cred
);
2078 sbinfo
->mnt
= mntget(path
.mnt
);
2079 dentry
= dget(path
.dentry
);
2081 * For a new mark passthrough_mark and passthrough
2084 sbinfo
->passthrough_mark
= sbinfo
->passthrough
;
2086 cred_tmp
= prepare_creds();
2091 /* Don't override disk quota limits or use reserved space. */
2092 cap_lower(cred_tmp
->cap_effective
, CAP_SYS_RESOURCE
);
2093 sbinfo
->creator_cred
= cred_tmp
;
2097 * This leg executes if we're admin capable in the namespace,
2098 * so be very careful.
2101 if (path
.dentry
->d_sb
->s_magic
!= SHIFTFS_MAGIC
)
2104 sbinfo_mp
= path
.dentry
->d_sb
->s_fs_info
;
2105 if (!sbinfo_mp
->mark
)
2108 if (!passthrough_is_subset(sbinfo_mp
->passthrough
,
2109 sbinfo
->passthrough
))
2112 sbinfo
->mnt
= mntget(sbinfo_mp
->mnt
);
2113 sbinfo
->creator_cred
= get_cred(sbinfo_mp
->creator_cred
);
2114 dentry
= dget(path
.dentry
->d_fsdata
);
2116 * Copy up passthrough settings from mark mountpoint so we can
2117 * verify when the overlay wants to remount with different
2118 * passthrough settings.
2120 sbinfo
->passthrough_mark
= sbinfo_mp
->passthrough
;
2121 shiftfs_super_force_flags(sb
, path
.mnt
->mnt_sb
->s_flags
);
2124 sb
->s_stack_depth
= dentry
->d_sb
->s_stack_depth
+ 1;
2125 if (sb
->s_stack_depth
> FILESYSTEM_MAX_STACK_DEPTH
) {
2126 printk(KERN_ERR
"shiftfs: maximum stacking depth exceeded\n");
2131 inode
= new_inode(sb
);
2136 shiftfs_fill_inode(inode
, dentry
->d_inode
->i_ino
, S_IFDIR
, 0, dentry
);
2138 ihold(dentry
->d_inode
);
2139 inode
->i_private
= dentry
->d_inode
;
2141 sb
->s_magic
= SHIFTFS_MAGIC
;
2142 sb
->s_maxbytes
= MAX_LFS_FILESIZE
;
2143 sb
->s_op
= &shiftfs_super_ops
;
2144 sb
->s_xattr
= shiftfs_xattr_handlers
;
2145 sb
->s_d_op
= &shiftfs_dentry_ops
;
2146 sb
->s_root
= d_make_root(inode
);
2152 sb
->s_root
->d_fsdata
= dentry
;
2153 sbinfo
->userns
= get_user_ns(dentry
->d_sb
->s_user_ns
);
2154 shiftfs_copyattr(dentry
->d_inode
, sb
->s_root
->d_inode
);
2170 static struct dentry
*shiftfs_mount(struct file_system_type
*fs_type
,
2171 int flags
, const char *dev_name
, void *data
)
2173 struct shiftfs_data d
= { data
, dev_name
};
2175 return mount_nodev(fs_type
, flags
, &d
, shiftfs_fill_super
);
2178 static struct file_system_type shiftfs_type
= {
2179 .owner
= THIS_MODULE
,
2181 .mount
= shiftfs_mount
,
2182 .kill_sb
= kill_anon_super
,
2183 .fs_flags
= FS_USERNS_MOUNT
,
2186 static int __init
shiftfs_init(void)
2188 return register_filesystem(&shiftfs_type
);
2191 static void __exit
shiftfs_exit(void)
2193 unregister_filesystem(&shiftfs_type
);
2196 MODULE_ALIAS_FS("shiftfs");
2197 MODULE_AUTHOR("James Bottomley");
2198 MODULE_AUTHOR("Seth Forshee <seth.forshee@canonical.com>");
2199 MODULE_AUTHOR("Christian Brauner <christian.brauner@ubuntu.com>");
2200 MODULE_DESCRIPTION("id shifting filesystem");
2201 MODULE_LICENSE("GPL v2");
2202 module_init(shiftfs_init
)
2203 module_exit(shiftfs_exit
)