]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - fs/shiftfs.c
KVM: x86/speculation: Disable Fill buffer clear within guests
[mirror_ubuntu-jammy-kernel.git] / fs / shiftfs.c
CommitLineData
88f5bff3 1#include <linux/btrfs.h>
3ded6d6b 2#include <linux/capability.h>
aa269008
JB
3#include <linux/cred.h>
4#include <linux/mount.h>
88f5bff3 5#include <linux/fdtable.h>
aa269008
JB
6#include <linux/file.h>
7#include <linux/fs.h>
8#include <linux/namei.h>
9#include <linux/module.h>
10#include <linux/kernel.h>
11#include <linux/magic.h>
12#include <linux/parser.h>
3ded6d6b 13#include <linux/security.h>
aa269008
JB
14#include <linux/seq_file.h>
15#include <linux/statfs.h>
16#include <linux/slab.h>
17#include <linux/user_namespace.h>
18#include <linux/uidgid.h>
19#include <linux/xattr.h>
3ded6d6b
CB
20#include <linux/posix_acl.h>
21#include <linux/posix_acl_xattr.h>
22#include <linux/uio.h>
5b0d135d 23#include <linux/fiemap.h>
aa269008
JB
24
25struct shiftfs_super_info {
26 struct vfsmount *mnt;
27 struct user_namespace *userns;
3ded6d6b
CB
28 /* creds of process who created the super block */
29 const struct cred *creator_cred;
aa269008 30 bool mark;
3ded6d6b 31 unsigned int passthrough;
14ba9a56 32 unsigned int passthrough_mark;
aa269008
JB
33};
34
3ded6d6b
CB
35static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
36 umode_t mode, dev_t dev, struct dentry *dentry);
37
38#define SHIFTFS_PASSTHROUGH_NONE 0
39#define SHIFTFS_PASSTHROUGH_STAT 1
88f5bff3
CB
40#define SHIFTFS_PASSTHROUGH_IOCTL 2
41#define SHIFTFS_PASSTHROUGH_ALL \
42 (SHIFTFS_PASSTHROUGH_STAT | SHIFTFS_PASSTHROUGH_IOCTL)
43
44static inline bool shiftfs_passthrough_ioctls(struct shiftfs_super_info *info)
45{
46 if (!(info->passthrough & SHIFTFS_PASSTHROUGH_IOCTL))
47 return false;
48
88f5bff3
CB
49 return true;
50}
3ded6d6b
CB
51
52static inline bool shiftfs_passthrough_statfs(struct shiftfs_super_info *info)
53{
54 if (!(info->passthrough & SHIFTFS_PASSTHROUGH_STAT))
55 return false;
56
3ded6d6b
CB
57 return true;
58}
aa269008
JB
59
60enum {
61 OPT_MARK,
3ded6d6b 62 OPT_PASSTHROUGH,
aa269008
JB
63 OPT_LAST,
64};
65
66/* global filesystem options */
67static const match_table_t tokens = {
68 { OPT_MARK, "mark" },
3ded6d6b 69 { OPT_PASSTHROUGH, "passthrough=%u" },
aa269008
JB
70 { OPT_LAST, NULL }
71};
72
3ded6d6b 73static const struct cred *shiftfs_override_creds(const struct super_block *sb)
aa269008 74{
3ded6d6b 75 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
aa269008 76
3ded6d6b
CB
77 return override_creds(sbinfo->creator_cred);
78}
79
80static inline void shiftfs_revert_object_creds(const struct cred *oldcred,
81 struct cred *newcred)
82{
83 revert_creds(oldcred);
84 put_cred(newcred);
85}
86
ef687aea
SF
87static kuid_t shift_kuid(struct user_namespace *from, struct user_namespace *to,
88 kuid_t kuid)
89{
90 uid_t uid = from_kuid(from, kuid);
91 return make_kuid(to, uid);
92}
93
94static kgid_t shift_kgid(struct user_namespace *from, struct user_namespace *to,
95 kgid_t kgid)
96{
97 gid_t gid = from_kgid(from, kgid);
98 return make_kgid(to, gid);
99}
100
3ded6d6b
CB
101static int shiftfs_override_object_creds(const struct super_block *sb,
102 const struct cred **oldcred,
103 struct cred **newcred,
104 struct dentry *dentry, umode_t mode,
105 bool hardlink)
106{
ef687aea 107 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
3ded6d6b
CB
108 kuid_t fsuid = current_fsuid();
109 kgid_t fsgid = current_fsgid();
110
111 *oldcred = shiftfs_override_creds(sb);
112
113 *newcred = prepare_creds();
114 if (!*newcred) {
115 revert_creds(*oldcred);
116 return -ENOMEM;
117 }
118
ef687aea
SF
119 (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid);
120 (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid);
3ded6d6b
CB
121
122 if (!hardlink) {
123 int err = security_dentry_create_files_as(dentry, mode,
124 &dentry->d_name,
125 *oldcred, *newcred);
126 if (err) {
127 shiftfs_revert_object_creds(*oldcred, *newcred);
128 return err;
129 }
130 }
aa269008 131
3ded6d6b
CB
132 put_cred(override_creds(*newcred));
133 return 0;
134}
aa269008 135
3ded6d6b
CB
136static void shiftfs_copyattr(struct inode *from, struct inode *to)
137{
138 struct user_namespace *from_ns = from->i_sb->s_user_ns;
139 struct user_namespace *to_ns = to->i_sb->s_user_ns;
140
141 to->i_uid = shift_kuid(from_ns, to_ns, from->i_uid);
142 to->i_gid = shift_kgid(from_ns, to_ns, from->i_gid);
143 to->i_mode = from->i_mode;
144 to->i_atime = from->i_atime;
145 to->i_mtime = from->i_mtime;
146 to->i_ctime = from->i_ctime;
147 i_size_write(to, i_size_read(from));
148}
aa269008 149
3ded6d6b
CB
150static void shiftfs_copyflags(struct inode *from, struct inode *to)
151{
152 unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
aa269008 153
3ded6d6b 154 inode_set_flags(to, from->i_flags & mask, mask);
aa269008
JB
155}
156
3ded6d6b 157static void shiftfs_file_accessed(struct file *file)
aa269008 158{
3ded6d6b
CB
159 struct inode *upperi, *loweri;
160
161 if (file->f_flags & O_NOATIME)
aa269008
JB
162 return;
163
3ded6d6b
CB
164 upperi = file_inode(file);
165 loweri = upperi->i_private;
166
167 if (!loweri)
168 return;
169
170 upperi->i_mtime = loweri->i_mtime;
171 upperi->i_ctime = loweri->i_ctime;
172
173 touch_atime(&file->f_path);
aa269008
JB
174}
175
3ded6d6b
CB
176static int shiftfs_parse_mount_options(struct shiftfs_super_info *sbinfo,
177 char *options)
aa269008
JB
178{
179 char *p;
180 substring_t args[MAX_OPT_ARGS];
181
3ded6d6b
CB
182 sbinfo->mark = false;
183 sbinfo->passthrough = 0;
aa269008
JB
184
185 while ((p = strsep(&options, ",")) != NULL) {
3ded6d6b 186 int err, intarg, token;
aa269008
JB
187
188 if (!*p)
189 continue;
190
191 token = match_token(p, tokens, args);
192 switch (token) {
193 case OPT_MARK:
3ded6d6b
CB
194 sbinfo->mark = true;
195 break;
196 case OPT_PASSTHROUGH:
197 err = match_int(&args[0], &intarg);
198 if (err)
199 return err;
200
201 if (intarg & ~SHIFTFS_PASSTHROUGH_ALL)
202 return -EINVAL;
203
204 sbinfo->passthrough = intarg;
aa269008
JB
205 break;
206 default:
207 return -EINVAL;
208 }
209 }
3ded6d6b 210
aa269008
JB
211 return 0;
212}
213
214static void shiftfs_d_release(struct dentry *dentry)
215{
3ded6d6b 216 struct dentry *lowerd = dentry->d_fsdata;
aa269008 217
3ded6d6b
CB
218 if (lowerd)
219 dput(lowerd);
aa269008
JB
220}
221
222static struct dentry *shiftfs_d_real(struct dentry *dentry,
223 const struct inode *inode)
224{
3ded6d6b
CB
225 struct dentry *lowerd = dentry->d_fsdata;
226
227 if (inode && d_inode(dentry) == inode)
228 return dentry;
aa269008 229
3ded6d6b
CB
230 lowerd = d_real(lowerd, inode);
231 if (lowerd && (!inode || inode == d_inode(lowerd)))
232 return lowerd;
aa269008 233
3ded6d6b
CB
234 WARN(1, "shiftfs_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
235 inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
236 return dentry;
aa269008
JB
237}
238
239static int shiftfs_d_weak_revalidate(struct dentry *dentry, unsigned int flags)
240{
3ded6d6b
CB
241 int err = 1;
242 struct dentry *lowerd = dentry->d_fsdata;
aa269008 243
3ded6d6b 244 if (d_is_negative(lowerd) != d_is_negative(dentry))
aa269008
JB
245 return 0;
246
3ded6d6b
CB
247 if ((lowerd->d_flags & DCACHE_OP_WEAK_REVALIDATE))
248 err = lowerd->d_op->d_weak_revalidate(lowerd, flags);
aa269008 249
3ded6d6b
CB
250 if (d_really_is_positive(dentry)) {
251 struct inode *inode = d_inode(dentry);
252 struct inode *loweri = d_inode(lowerd);
253
254 shiftfs_copyattr(loweri, inode);
3ded6d6b
CB
255 }
256
257 return err;
aa269008
JB
258}
259
260static int shiftfs_d_revalidate(struct dentry *dentry, unsigned int flags)
261{
3ded6d6b
CB
262 int err = 1;
263 struct dentry *lowerd = dentry->d_fsdata;
aa269008 264
3ded6d6b
CB
265 if (d_unhashed(lowerd) ||
266 ((d_is_negative(lowerd) != d_is_negative(dentry))))
aa269008
JB
267 return 0;
268
3ded6d6b
CB
269 if (flags & LOOKUP_RCU)
270 return -ECHILD;
aa269008 271
3ded6d6b
CB
272 if ((lowerd->d_flags & DCACHE_OP_REVALIDATE))
273 err = lowerd->d_op->d_revalidate(lowerd, flags);
aa269008 274
3ded6d6b
CB
275 if (d_really_is_positive(dentry)) {
276 struct inode *inode = d_inode(dentry);
277 struct inode *loweri = d_inode(lowerd);
aa269008 278
3ded6d6b 279 shiftfs_copyattr(loweri, inode);
3ded6d6b 280 }
aa269008 281
3ded6d6b 282 return err;
aa269008
JB
283}
284
285static const struct dentry_operations shiftfs_dentry_ops = {
3ded6d6b
CB
286 .d_release = shiftfs_d_release,
287 .d_real = shiftfs_d_real,
288 .d_revalidate = shiftfs_d_revalidate,
aa269008
JB
289 .d_weak_revalidate = shiftfs_d_weak_revalidate,
290};
291
aa269008
JB
292static const char *shiftfs_get_link(struct dentry *dentry, struct inode *inode,
293 struct delayed_call *done)
294{
3ded6d6b
CB
295 const char *p;
296 const struct cred *oldcred;
297 struct dentry *lowerd;
aa269008 298
3ded6d6b
CB
299 /* RCU lookup not supported */
300 if (!dentry)
aa269008 301 return ERR_PTR(-ECHILD);
3ded6d6b
CB
302
303 lowerd = dentry->d_fsdata;
304 oldcred = shiftfs_override_creds(dentry->d_sb);
305 p = vfs_get_link(lowerd, done);
306 revert_creds(oldcred);
307
308 return p;
aa269008
JB
309}
310
d347e71d
AR
311static int shiftfs_setxattr(struct user_namespace *ns,
312 struct dentry *dentry, struct inode *inode,
aa269008
JB
313 const char *name, const void *value,
314 size_t size, int flags)
315{
3ded6d6b
CB
316 struct dentry *lowerd = dentry->d_fsdata;
317 int err;
318 const struct cred *oldcred;
319
320 oldcred = shiftfs_override_creds(dentry->d_sb);
d347e71d 321 err = vfs_setxattr(ns, lowerd, name, value, size, flags);
3ded6d6b 322 revert_creds(oldcred);
aa269008 323
3ded6d6b 324 shiftfs_copyattr(lowerd->d_inode, inode);
aa269008
JB
325
326 return err;
327}
328
329static int shiftfs_xattr_get(const struct xattr_handler *handler,
330 struct dentry *dentry, struct inode *inode,
331 const char *name, void *value, size_t size)
332{
3ded6d6b 333 struct dentry *lowerd = dentry->d_fsdata;
aa269008 334 int err;
3ded6d6b 335 const struct cred *oldcred;
aa269008 336
3ded6d6b 337 oldcred = shiftfs_override_creds(dentry->d_sb);
d347e71d 338 err = vfs_getxattr(&init_user_ns, lowerd, name, value, size);
3ded6d6b 339 revert_creds(oldcred);
aa269008
JB
340
341 return err;
342}
343
344static ssize_t shiftfs_listxattr(struct dentry *dentry, char *list,
345 size_t size)
346{
3ded6d6b 347 struct dentry *lowerd = dentry->d_fsdata;
aa269008 348 int err;
3ded6d6b 349 const struct cred *oldcred;
aa269008 350
3ded6d6b
CB
351 oldcred = shiftfs_override_creds(dentry->d_sb);
352 err = vfs_listxattr(lowerd, list, size);
353 revert_creds(oldcred);
aa269008
JB
354
355 return err;
356}
357
d347e71d
AR
358static int shiftfs_removexattr(struct user_namespace *ns,
359 struct dentry *dentry, const char *name)
aa269008 360{
3ded6d6b 361 struct dentry *lowerd = dentry->d_fsdata;
aa269008 362 int err;
3ded6d6b
CB
363 const struct cred *oldcred;
364
365 oldcred = shiftfs_override_creds(dentry->d_sb);
d347e71d 366 err = vfs_removexattr(ns, lowerd, name);
3ded6d6b 367 revert_creds(oldcred);
aa269008 368
3ded6d6b
CB
369 /* update c/mtime */
370 shiftfs_copyattr(lowerd->d_inode, d_inode(dentry));
aa269008
JB
371
372 return err;
373}
374
375static int shiftfs_xattr_set(const struct xattr_handler *handler,
d347e71d 376 struct user_namespace *ns,
aa269008
JB
377 struct dentry *dentry, struct inode *inode,
378 const char *name, const void *value, size_t size,
379 int flags)
380{
381 if (!value)
d347e71d
AR
382 return shiftfs_removexattr(ns, dentry, name);
383 return shiftfs_setxattr(ns, dentry, inode, name, value, size, flags);
aa269008
JB
384}
385
3ded6d6b 386static int shiftfs_inode_test(struct inode *inode, void *data)
aa269008 387{
3ded6d6b
CB
388 return inode->i_private == data;
389}
aa269008 390
3ded6d6b
CB
391static int shiftfs_inode_set(struct inode *inode, void *data)
392{
393 inode->i_private = data;
394 return 0;
aa269008
JB
395}
396
d347e71d
AR
397static int shiftfs_create_object(struct user_namespace *ns,
398 struct inode *diri, struct dentry *dentry,
3ded6d6b
CB
399 umode_t mode, const char *symlink,
400 struct dentry *hardlink, bool excl)
aa269008 401{
aa269008 402 int err;
3ded6d6b
CB
403 const struct cred *oldcred;
404 struct cred *newcred;
405 void *loweri_iop_ptr = NULL;
406 umode_t modei = mode;
407 struct super_block *dir_sb = diri->i_sb;
408 struct dentry *lowerd_new = dentry->d_fsdata;
409 struct inode *inode = NULL, *loweri_dir = diri->i_private;
410 const struct inode_operations *loweri_dir_iop = loweri_dir->i_op;
411 struct dentry *lowerd_link = NULL;
aa269008
JB
412
413 if (hardlink) {
3ded6d6b 414 loweri_iop_ptr = loweri_dir_iop->link;
aa269008
JB
415 } else {
416 switch (mode & S_IFMT) {
417 case S_IFDIR:
3ded6d6b 418 loweri_iop_ptr = loweri_dir_iop->mkdir;
aa269008
JB
419 break;
420 case S_IFREG:
3ded6d6b 421 loweri_iop_ptr = loweri_dir_iop->create;
aa269008
JB
422 break;
423 case S_IFLNK:
3ded6d6b
CB
424 loweri_iop_ptr = loweri_dir_iop->symlink;
425 break;
426 case S_IFSOCK:
427 /* fall through */
428 case S_IFIFO:
429 loweri_iop_ptr = loweri_dir_iop->mknod;
430 break;
aa269008
JB
431 }
432 }
3ded6d6b
CB
433 if (!loweri_iop_ptr) {
434 err = -EINVAL;
435 goto out_iput;
436 }
aa269008 437
3ded6d6b 438 inode_lock_nested(loweri_dir, I_MUTEX_PARENT);
aa269008 439
3ded6d6b
CB
440 if (!hardlink) {
441 inode = new_inode(dir_sb);
442 if (!inode) {
443 err = -ENOMEM;
444 goto out_iput;
445 }
446
447 /*
448 * new_inode() will have added the new inode to the super
449 * block's list of inodes. Further below we will call
450 * inode_insert5() Which would perform the same operation again
451 * thereby corrupting the list. To avoid this raise I_CREATING
452 * in i_state which will cause inode_insert5() to skip this
453 * step. I_CREATING will be cleared by d_instantiate_new()
454 * below.
455 */
456 spin_lock(&inode->i_lock);
457 inode->i_state |= I_CREATING;
458 spin_unlock(&inode->i_lock);
aa269008 459
d347e71d 460 inode_init_owner(ns, inode, diri, mode);
3ded6d6b
CB
461 modei = inode->i_mode;
462 }
aa269008 463
3ded6d6b
CB
464 err = shiftfs_override_object_creds(dentry->d_sb, &oldcred, &newcred,
465 dentry, modei, hardlink != NULL);
466 if (err)
467 goto out_iput;
aa269008 468
aa269008 469 if (hardlink) {
3ded6d6b 470 lowerd_link = hardlink->d_fsdata;
d347e71d 471 err = vfs_link(lowerd_link, ns, loweri_dir, lowerd_new, NULL);
aa269008 472 } else {
3ded6d6b 473 switch (modei & S_IFMT) {
aa269008 474 case S_IFDIR:
d347e71d 475 err = vfs_mkdir(ns, loweri_dir, lowerd_new, modei);
aa269008
JB
476 break;
477 case S_IFREG:
d347e71d 478 err = vfs_create(ns, loweri_dir, lowerd_new, modei, excl);
aa269008
JB
479 break;
480 case S_IFLNK:
d347e71d 481 err = vfs_symlink(ns, loweri_dir, lowerd_new, symlink);
3ded6d6b
CB
482 break;
483 case S_IFSOCK:
484 /* fall through */
485 case S_IFIFO:
d347e71d 486 err = vfs_mknod(ns, loweri_dir, lowerd_new, modei, 0);
3ded6d6b
CB
487 break;
488 default:
489 err = -EINVAL;
490 break;
aa269008
JB
491 }
492 }
493
3ded6d6b 494 shiftfs_revert_object_creds(oldcred, newcred);
aa269008 495
3ded6d6b
CB
496 if (!err && WARN_ON(!lowerd_new->d_inode))
497 err = -EIO;
aa269008 498 if (err)
3ded6d6b
CB
499 goto out_iput;
500
501 if (hardlink) {
502 inode = d_inode(hardlink);
503 ihold(inode);
504
505 /* copy up times from lower inode */
506 shiftfs_copyattr(d_inode(lowerd_link), inode);
507 set_nlink(d_inode(hardlink), d_inode(lowerd_link)->i_nlink);
508 d_instantiate(dentry, inode);
509 } else {
510 struct inode *inode_tmp;
511 struct inode *loweri_new = d_inode(lowerd_new);
512
513 inode_tmp = inode_insert5(inode, (unsigned long)loweri_new,
514 shiftfs_inode_test, shiftfs_inode_set,
515 loweri_new);
516 if (unlikely(inode_tmp != inode)) {
517 pr_err_ratelimited("shiftfs: newly created inode found in cache\n");
518 iput(inode_tmp);
519 err = -EINVAL;
520 goto out_iput;
521 }
aa269008 522
3ded6d6b
CB
523 ihold(loweri_new);
524 shiftfs_fill_inode(inode, loweri_new->i_ino, loweri_new->i_mode,
525 0, lowerd_new);
526 d_instantiate_new(dentry, inode);
527 }
aa269008 528
3ded6d6b
CB
529 shiftfs_copyattr(loweri_dir, diri);
530 if (loweri_iop_ptr == loweri_dir_iop->mkdir)
531 set_nlink(diri, loweri_dir->i_nlink);
aa269008 532
3ded6d6b 533 inode = NULL;
aa269008 534
3ded6d6b
CB
535out_iput:
536 iput(inode);
537 inode_unlock(loweri_dir);
aa269008
JB
538
539 return err;
540}
541
d347e71d
AR
542static int shiftfs_create(struct user_namespace *ns,
543 struct inode *dir, struct dentry *dentry,
aa269008
JB
544 umode_t mode, bool excl)
545{
546 mode |= S_IFREG;
547
d347e71d 548 return shiftfs_create_object(ns, dir, dentry, mode, NULL, NULL, excl);
aa269008
JB
549}
550
d347e71d 551static int shiftfs_mkdir(struct user_namespace *ns, struct inode *dir, struct dentry *dentry,
aa269008
JB
552 umode_t mode)
553{
554 mode |= S_IFDIR;
555
d347e71d 556 return shiftfs_create_object(ns, dir, dentry, mode, NULL, NULL, false);
aa269008
JB
557}
558
559static int shiftfs_link(struct dentry *hardlink, struct inode *dir,
560 struct dentry *dentry)
561{
d347e71d 562 return shiftfs_create_object(&init_user_ns, dir, dentry, 0, NULL, hardlink, false);
3ded6d6b
CB
563}
564
d347e71d
AR
565static int shiftfs_mknod(struct user_namespace *ns,
566 struct inode *dir, struct dentry *dentry, umode_t mode,
3ded6d6b
CB
567 dev_t rdev)
568{
569 if (!S_ISFIFO(mode) && !S_ISSOCK(mode))
570 return -EPERM;
571
d347e71d 572 return shiftfs_create_object(ns, dir, dentry, mode, NULL, NULL, false);
aa269008
JB
573}
574
d347e71d 575static int shiftfs_symlink(struct user_namespace *ns, struct inode *dir, struct dentry *dentry,
aa269008
JB
576 const char *symlink)
577{
d347e71d 578 return shiftfs_create_object(ns, dir, dentry, S_IFLNK, symlink, NULL, false);
aa269008
JB
579}
580
581static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir)
582{
3ded6d6b
CB
583 struct dentry *lowerd = dentry->d_fsdata;
584 struct inode *loweri = dir->i_private;
b8abaae8 585 struct inode *inode = d_inode(dentry);
aa269008 586 int err;
3ded6d6b 587 const struct cred *oldcred;
aa269008 588
a7a5232f 589 dget(lowerd);
3ded6d6b
CB
590 oldcred = shiftfs_override_creds(dentry->d_sb);
591 inode_lock_nested(loweri, I_MUTEX_PARENT);
aa269008 592 if (rmdir)
d347e71d 593 err = vfs_rmdir(&init_user_ns, loweri, lowerd);
aa269008 594 else
d347e71d 595 err = vfs_unlink(&init_user_ns, loweri, lowerd, NULL);
3ded6d6b 596 revert_creds(oldcred);
aa269008 597
b8abaae8 598 if (!err) {
3ded6d6b
CB
599 d_drop(dentry);
600
b8abaae8
CB
601 if (rmdir)
602 clear_nlink(inode);
603 else
604 drop_nlink(inode);
605 }
606 inode_unlock(loweri);
607
608 shiftfs_copyattr(loweri, dir);
a7a5232f 609 dput(lowerd);
aa269008
JB
610
611 return err;
612}
613
614static int shiftfs_unlink(struct inode *dir, struct dentry *dentry)
615{
616 return shiftfs_rm(dir, dentry, false);
617}
618
619static int shiftfs_rmdir(struct inode *dir, struct dentry *dentry)
620{
621 return shiftfs_rm(dir, dentry, true);
622}
623
d347e71d
AR
624static int shiftfs_rename(struct user_namespace *ns,
625 struct inode *olddir, struct dentry *old,
aa269008
JB
626 struct inode *newdir, struct dentry *new,
627 unsigned int flags)
628{
3ded6d6b
CB
629 struct dentry *lowerd_dir_old = old->d_parent->d_fsdata,
630 *lowerd_dir_new = new->d_parent->d_fsdata,
631 *lowerd_old = old->d_fsdata, *lowerd_new = new->d_fsdata,
632 *trapd;
633 struct inode *loweri_dir_old = lowerd_dir_old->d_inode,
634 *loweri_dir_new = lowerd_dir_new->d_inode;
d347e71d
AR
635 struct renamedata rd = {
636 .old_mnt_userns = ns,
637 .old_dir = loweri_dir_old,
638 .old_dentry = lowerd_old,
639 .new_mnt_userns = ns,
640 .new_dir = loweri_dir_new,
641 .new_dentry = lowerd_new,
642 };
aa269008 643 int err = -EINVAL;
3ded6d6b 644 const struct cred *oldcred;
aa269008 645
3ded6d6b 646 trapd = lock_rename(lowerd_dir_new, lowerd_dir_old);
aa269008 647
3ded6d6b 648 if (trapd == lowerd_old || trapd == lowerd_new)
aa269008
JB
649 goto out_unlock;
650
3ded6d6b 651 oldcred = shiftfs_override_creds(old->d_sb);
d347e71d 652 err = vfs_rename(&rd);
3ded6d6b 653 revert_creds(oldcred);
aa269008 654
3ded6d6b
CB
655 shiftfs_copyattr(loweri_dir_old, olddir);
656 shiftfs_copyattr(loweri_dir_new, newdir);
aa269008 657
3ded6d6b
CB
658out_unlock:
659 unlock_rename(lowerd_dir_new, lowerd_dir_old);
aa269008
JB
660
661 return err;
662}
663
664static struct dentry *shiftfs_lookup(struct inode *dir, struct dentry *dentry,
665 unsigned int flags)
666{
3ded6d6b
CB
667 struct dentry *new;
668 struct inode *newi;
669 const struct cred *oldcred;
670 struct dentry *lowerd = dentry->d_parent->d_fsdata;
671 struct inode *inode = NULL, *loweri = lowerd->d_inode;
672
673 inode_lock(loweri);
674 oldcred = shiftfs_override_creds(dentry->d_sb);
675 new = lookup_one_len(dentry->d_name.name, lowerd, dentry->d_name.len);
676 revert_creds(oldcred);
677 inode_unlock(loweri);
aa269008
JB
678
679 if (IS_ERR(new))
680 return new;
681
682 dentry->d_fsdata = new;
683
3ded6d6b
CB
684 newi = new->d_inode;
685 if (!newi)
aa269008
JB
686 goto out;
687
3ded6d6b
CB
688 inode = iget5_locked(dentry->d_sb, (unsigned long)newi,
689 shiftfs_inode_test, shiftfs_inode_set, newi);
690 if (!inode) {
aa269008
JB
691 dput(new);
692 return ERR_PTR(-ENOMEM);
693 }
3ded6d6b
CB
694 if (inode->i_state & I_NEW) {
695 /*
696 * inode->i_private set by shiftfs_inode_set(), but we still
697 * need to take a reference
698 */
699 ihold(newi);
700 shiftfs_fill_inode(inode, newi->i_ino, newi->i_mode, 0, new);
701 unlock_new_inode(inode);
702 }
aa269008 703
3ded6d6b
CB
704out:
705 return d_splice_alias(inode, dentry);
aa269008
JB
706}
707
d347e71d 708static int shiftfs_permission(struct user_namespace *ns, struct inode *inode, int mask)
aa269008 709{
aa269008 710 int err;
3ded6d6b
CB
711 const struct cred *oldcred;
712 struct inode *loweri = inode->i_private;
aa269008 713
3ded6d6b
CB
714 if (!loweri) {
715 WARN_ON(!(mask & MAY_NOT_BLOCK));
aa269008 716 return -ECHILD;
3ded6d6b 717 }
aa269008 718
d347e71d 719 err = generic_permission(ns, inode, mask);
3ded6d6b
CB
720 if (err)
721 return err;
722
723 oldcred = shiftfs_override_creds(inode->i_sb);
d347e71d 724 err = inode_permission(ns, loweri, mask);
3ded6d6b
CB
725 revert_creds(oldcred);
726
727 return err;
728}
729
730static int shiftfs_fiemap(struct inode *inode,
731 struct fiemap_extent_info *fieinfo, u64 start,
732 u64 len)
733{
734 int err;
735 const struct cred *oldcred;
736 struct inode *loweri = inode->i_private;
737
738 if (!loweri->i_op->fiemap)
739 return -EOPNOTSUPP;
740
741 oldcred = shiftfs_override_creds(inode->i_sb);
742 if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
743 filemap_write_and_wait(loweri->i_mapping);
744 err = loweri->i_op->fiemap(loweri, fieinfo, start, len);
745 revert_creds(oldcred);
746
747 return err;
748}
749
d347e71d
AR
750static int shiftfs_tmpfile(struct user_namespace *ns,
751 struct inode *dir, struct dentry *dentry,
3ded6d6b
CB
752 umode_t mode)
753{
754 int err;
755 const struct cred *oldcred;
756 struct dentry *lowerd = dentry->d_fsdata;
757 struct inode *loweri = dir->i_private;
758
759 if (!loweri->i_op->tmpfile)
760 return -EOPNOTSUPP;
761
762 oldcred = shiftfs_override_creds(dir->i_sb);
d347e71d 763 err = loweri->i_op->tmpfile(ns, loweri, lowerd, mode);
3ded6d6b 764 revert_creds(oldcred);
aa269008
JB
765
766 return err;
767}
768
d347e71d 769static int shiftfs_setattr(struct user_namespace *ns, struct dentry *dentry, struct iattr *attr)
aa269008 770{
3ded6d6b
CB
771 struct dentry *lowerd = dentry->d_fsdata;
772 struct inode *loweri = lowerd->d_inode;
25af0f23 773 struct iattr newattr;
3ded6d6b 774 const struct cred *oldcred;
aa269008 775 struct super_block *sb = dentry->d_sb;
ef687aea 776 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
aa269008
JB
777 int err;
778
d347e71d 779 err = setattr_prepare(ns, dentry, attr);
3ded6d6b
CB
780 if (err)
781 return err;
782
25af0f23 783 newattr = *attr;
ef687aea
SF
784 newattr.ia_uid = shift_kuid(sb->s_user_ns, sbinfo->userns, attr->ia_uid);
785 newattr.ia_gid = shift_kgid(sb->s_user_ns, sbinfo->userns, attr->ia_gid);
aa269008 786
25af0f23
SF
787 /*
788 * mode change is for clearing setuid/setgid bits. Allow lower fs
789 * to interpret this in its own way.
790 */
791 if (newattr.ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
792 newattr.ia_valid &= ~ATTR_MODE;
793
3ded6d6b
CB
794 inode_lock(loweri);
795 oldcred = shiftfs_override_creds(dentry->d_sb);
d347e71d 796 err = notify_change(ns, lowerd, &newattr, NULL);
3ded6d6b
CB
797 revert_creds(oldcred);
798 inode_unlock(loweri);
aa269008 799
3ded6d6b 800 shiftfs_copyattr(loweri, d_inode(dentry));
aa269008 801
3ded6d6b 802 return err;
aa269008
JB
803}
804
d347e71d
AR
805static int shiftfs_getattr(struct user_namespace *ns,
806 const struct path *path, struct kstat *stat,
aa269008
JB
807 u32 request_mask, unsigned int query_flags)
808{
809 struct inode *inode = path->dentry->d_inode;
3ded6d6b
CB
810 struct dentry *lowerd = path->dentry->d_fsdata;
811 struct inode *loweri = lowerd->d_inode;
812 struct shiftfs_super_info *info = path->dentry->d_sb->s_fs_info;
813 struct path newpath = { .mnt = info->mnt, .dentry = lowerd };
814 struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
815 struct user_namespace *to_ns = inode->i_sb->s_user_ns;
816 const struct cred *oldcred;
817 int err;
818
819 oldcred = shiftfs_override_creds(inode->i_sb);
820 err = vfs_getattr(&newpath, stat, request_mask, query_flags);
821 revert_creds(oldcred);
aa269008
JB
822
823 if (err)
824 return err;
825
826 /* transform the underlying id */
3ded6d6b
CB
827 stat->uid = shift_kuid(from_ns, to_ns, stat->uid);
828 stat->gid = shift_kgid(from_ns, to_ns, stat->gid);
aa269008
JB
829 return 0;
830}
831
3ded6d6b 832#ifdef CONFIG_SHIFT_FS_POSIX_ACL
aa269008 833
3ded6d6b
CB
834static int
835shift_acl_ids(struct user_namespace *from, struct user_namespace *to,
836 struct posix_acl *acl)
aa269008 837{
3ded6d6b
CB
838 int i;
839
840 for (i = 0; i < acl->a_count; i++) {
841 struct posix_acl_entry *e = &acl->a_entries[i];
842 switch(e->e_tag) {
843 case ACL_USER:
844 e->e_uid = shift_kuid(from, to, e->e_uid);
845 if (!uid_valid(e->e_uid))
846 return -EOVERFLOW;
847 break;
848 case ACL_GROUP:
849 e->e_gid = shift_kgid(from, to, e->e_gid);
850 if (!gid_valid(e->e_gid))
851 return -EOVERFLOW;
852 break;
853 }
854 }
855 return 0;
856}
aa269008 857
3ded6d6b
CB
858static void
859shift_acl_xattr_ids(struct user_namespace *from, struct user_namespace *to,
860 void *value, size_t size)
861{
862 struct posix_acl_xattr_header *header = value;
863 struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
864 int count;
865 kuid_t kuid;
866 kgid_t kgid;
aa269008 867
3ded6d6b
CB
868 if (!value)
869 return;
870 if (size < sizeof(struct posix_acl_xattr_header))
871 return;
872 if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
873 return;
aa269008 874
3ded6d6b
CB
875 count = posix_acl_xattr_count(size);
876 if (count < 0)
877 return;
878 if (count == 0)
879 return;
aa269008 880
3ded6d6b
CB
881 for (end = entry + count; entry != end; entry++) {
882 switch(le16_to_cpu(entry->e_tag)) {
883 case ACL_USER:
884 kuid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
885 kuid = shift_kuid(from, to, kuid);
886 entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, kuid));
887 break;
888 case ACL_GROUP:
d347e71d 889 kgid = make_kgid(from, le32_to_cpu(entry->e_id));
3ded6d6b 890 kgid = shift_kgid(from, to, kgid);
d347e71d 891 entry->e_id = cpu_to_le32(from_kgid(from, kgid));
3ded6d6b
CB
892 break;
893 default:
894 break;
895 }
896 }
aa269008
JB
897}
898
d347e71d
AR
899static struct posix_acl *
900shiftfs_get_acl(struct inode *inode, int type, bool rcu)
aa269008 901{
3ded6d6b
CB
902 struct inode *loweri = inode->i_private;
903 const struct cred *oldcred;
904 struct posix_acl *lower_acl, *acl = NULL;
905 struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
906 struct user_namespace *to_ns = inode->i_sb->s_user_ns;
907 int size;
908 int err;
aa269008 909
d347e71d
AR
910 if (rcu)
911 return ERR_PTR(-ECHILD);
912
3ded6d6b
CB
913 if (!IS_POSIXACL(loweri))
914 return NULL;
aa269008 915
3ded6d6b
CB
916 oldcred = shiftfs_override_creds(inode->i_sb);
917 lower_acl = get_acl(loweri, type);
918 revert_creds(oldcred);
aa269008 919
3ded6d6b
CB
920 if (lower_acl && !IS_ERR(lower_acl)) {
921 /* XXX: export posix_acl_clone? */
922 size = sizeof(struct posix_acl) +
923 lower_acl->a_count * sizeof(struct posix_acl_entry);
924 acl = kmemdup(lower_acl, size, GFP_KERNEL);
925 posix_acl_release(lower_acl);
aa269008 926
3ded6d6b
CB
927 if (!acl)
928 return ERR_PTR(-ENOMEM);
aa269008 929
3ded6d6b 930 refcount_set(&acl->a_refcount, 1);
aa269008 931
3ded6d6b
CB
932 err = shift_acl_ids(from_ns, to_ns, acl);
933 if (err) {
934 kfree(acl);
935 return ERR_PTR(err);
936 }
937 }
938
939 return acl;
aa269008
JB
940}
941
3ded6d6b
CB
942static int
943shiftfs_posix_acl_xattr_get(const struct xattr_handler *handler,
944 struct dentry *dentry, struct inode *inode,
945 const char *name, void *buffer, size_t size)
aa269008 946{
3ded6d6b
CB
947 struct inode *loweri = inode->i_private;
948 int ret;
949
950 ret = shiftfs_xattr_get(NULL, dentry, inode, handler->name,
951 buffer, size);
952 if (ret < 0)
953 return ret;
aa269008 954
3ded6d6b
CB
955 inode_lock(loweri);
956 shift_acl_xattr_ids(loweri->i_sb->s_user_ns, inode->i_sb->s_user_ns,
957 buffer, size);
958 inode_unlock(loweri);
959 return ret;
aa269008
JB
960}
961
3ded6d6b
CB
962static int
963shiftfs_posix_acl_xattr_set(const struct xattr_handler *handler,
d347e71d 964 struct user_namespace *ns,
3ded6d6b
CB
965 struct dentry *dentry, struct inode *inode,
966 const char *name, const void *value,
967 size_t size, int flags)
968{
969 struct inode *loweri = inode->i_private;
970 int err;
aa269008 971
3ded6d6b
CB
972 if (!IS_POSIXACL(loweri) || !loweri->i_op->set_acl)
973 return -EOPNOTSUPP;
974 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
975 return value ? -EACCES : 0;
d347e71d 976 if (!inode_owner_or_capable(ns, inode))
3ded6d6b
CB
977 return -EPERM;
978
979 if (value) {
980 shift_acl_xattr_ids(inode->i_sb->s_user_ns,
981 loweri->i_sb->s_user_ns,
982 (void *)value, size);
d347e71d 983 err = shiftfs_setxattr(ns, dentry, inode, handler->name, value,
3ded6d6b
CB
984 size, flags);
985 } else {
d347e71d 986 err = shiftfs_removexattr(ns, dentry, handler->name);
3ded6d6b 987 }
aa269008 988
3ded6d6b
CB
989 if (!err)
990 shiftfs_copyattr(loweri, inode);
991
992 return err;
993}
994
995static const struct xattr_handler
996shiftfs_posix_acl_access_xattr_handler = {
997 .name = XATTR_NAME_POSIX_ACL_ACCESS,
998 .flags = ACL_TYPE_ACCESS,
999 .get = shiftfs_posix_acl_xattr_get,
1000 .set = shiftfs_posix_acl_xattr_set,
aa269008
JB
1001};
1002
3ded6d6b
CB
1003static const struct xattr_handler
1004shiftfs_posix_acl_default_xattr_handler = {
1005 .name = XATTR_NAME_POSIX_ACL_DEFAULT,
1006 .flags = ACL_TYPE_DEFAULT,
1007 .get = shiftfs_posix_acl_xattr_get,
1008 .set = shiftfs_posix_acl_xattr_set,
aa269008
JB
1009};
1010
3ded6d6b 1011#else /* !CONFIG_SHIFT_FS_POSIX_ACL */
aa269008 1012
3ded6d6b 1013#define shiftfs_get_acl NULL
aa269008 1014
3ded6d6b 1015#endif /* CONFIG_SHIFT_FS_POSIX_ACL */
aa269008 1016
3ded6d6b
CB
1017static const struct inode_operations shiftfs_dir_inode_operations = {
1018 .lookup = shiftfs_lookup,
1019 .mkdir = shiftfs_mkdir,
1020 .symlink = shiftfs_symlink,
1021 .unlink = shiftfs_unlink,
1022 .rmdir = shiftfs_rmdir,
1023 .rename = shiftfs_rename,
1024 .link = shiftfs_link,
1025 .setattr = shiftfs_setattr,
1026 .create = shiftfs_create,
1027 .mknod = shiftfs_mknod,
1028 .permission = shiftfs_permission,
1029 .getattr = shiftfs_getattr,
1030 .listxattr = shiftfs_listxattr,
1031 .get_acl = shiftfs_get_acl,
1032};
1033
1034static const struct inode_operations shiftfs_file_inode_operations = {
1035 .fiemap = shiftfs_fiemap,
1036 .getattr = shiftfs_getattr,
1037 .get_acl = shiftfs_get_acl,
1038 .listxattr = shiftfs_listxattr,
1039 .permission = shiftfs_permission,
1040 .setattr = shiftfs_setattr,
1041 .tmpfile = shiftfs_tmpfile,
1042};
1043
1044static const struct inode_operations shiftfs_special_inode_operations = {
1045 .getattr = shiftfs_getattr,
1046 .get_acl = shiftfs_get_acl,
1047 .listxattr = shiftfs_listxattr,
1048 .permission = shiftfs_permission,
1049 .setattr = shiftfs_setattr,
1050};
1051
1052static const struct inode_operations shiftfs_symlink_inode_operations = {
1053 .getattr = shiftfs_getattr,
1054 .get_link = shiftfs_get_link,
1055 .listxattr = shiftfs_listxattr,
1056 .setattr = shiftfs_setattr,
1057};
1058
1059static struct file *shiftfs_open_realfile(const struct file *file,
b4de76a8 1060 struct inode *realinode)
3ded6d6b 1061{
b4de76a8
CB
1062 struct file *realfile;
1063 const struct cred *old_cred;
3ded6d6b 1064 struct inode *inode = file_inode(file);
b4de76a8 1065 struct dentry *lowerd = file->f_path.dentry->d_fsdata;
3ded6d6b 1066 struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
b4de76a8 1067 struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
3ded6d6b 1068
b4de76a8
CB
1069 old_cred = shiftfs_override_creds(inode->i_sb);
1070 realfile = open_with_fake_path(&realpath, file->f_flags, realinode,
1071 info->creator_cred);
1072 revert_creds(old_cred);
3ded6d6b 1073
b4de76a8 1074 return realfile;
3ded6d6b
CB
1075}
1076
1077#define SHIFTFS_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
1078
1079static int shiftfs_change_flags(struct file *file, unsigned int flags)
1080{
1081 struct inode *inode = file_inode(file);
1082 int err;
1083
1084 /* if some flag changed that cannot be changed then something's amiss */
1085 if (WARN_ON((file->f_flags ^ flags) & ~SHIFTFS_SETFL_MASK))
1086 return -EIO;
1087
1088 flags &= SHIFTFS_SETFL_MASK;
1089
1090 if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
1091 return -EPERM;
1092
1093 if (flags & O_DIRECT) {
1094 if (!file->f_mapping->a_ops ||
1095 !file->f_mapping->a_ops->direct_IO)
1096 return -EINVAL;
1097 }
1098
1099 if (file->f_op->check_flags) {
1100 err = file->f_op->check_flags(flags);
1101 if (err)
1102 return err;
1103 }
1104
1105 spin_lock(&file->f_lock);
1106 file->f_flags = (file->f_flags & ~SHIFTFS_SETFL_MASK) | flags;
1107 spin_unlock(&file->f_lock);
1108
1109 return 0;
1110}
1111
3ded6d6b
CB
1112static int shiftfs_open(struct inode *inode, struct file *file)
1113{
3ded6d6b 1114 struct file *realfile;
3ded6d6b 1115
b4de76a8
CB
1116 realfile = shiftfs_open_realfile(file, inode->i_private);
1117 if (IS_ERR(realfile))
3ded6d6b 1118 return PTR_ERR(realfile);
3ded6d6b 1119
b4de76a8 1120 file->private_data = realfile;
71d7f9d9
CB
1121 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO. */
1122 file->f_mapping = realfile->f_mapping;
1123
3ded6d6b
CB
1124 return 0;
1125}
1126
b4de76a8 1127static int shiftfs_dir_open(struct inode *inode, struct file *file)
3ded6d6b 1128{
b4de76a8
CB
1129 struct file *realfile;
1130 const struct cred *oldcred;
1131 struct dentry *lowerd = file->f_path.dentry->d_fsdata;
1132 struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
1133 struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
1134
1135 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1136 realfile = dentry_open(&realpath, file->f_flags | O_NOATIME,
1137 info->creator_cred);
1138 revert_creds(oldcred);
1139 if (IS_ERR(realfile))
1140 return PTR_ERR(realfile);
3ded6d6b 1141
b4de76a8 1142 file->private_data = realfile;
3ded6d6b 1143
b4de76a8
CB
1144 return 0;
1145}
1146
1147static int shiftfs_release(struct inode *inode, struct file *file)
1148{
1149 struct file *realfile = file->private_data;
1150
1151 if (realfile)
1152 fput(realfile);
3ded6d6b
CB
1153
1154 return 0;
1155}
1156
b4de76a8
CB
1157static int shiftfs_dir_release(struct inode *inode, struct file *file)
1158{
1159 return shiftfs_release(inode, file);
1160}
1161
958069e8
CB
1162static loff_t shiftfs_dir_llseek(struct file *file, loff_t offset, int whence)
1163{
b4de76a8 1164 struct file *realfile = file->private_data;
958069e8
CB
1165
1166 return vfs_llseek(realfile, offset, whence);
1167}
1168
1169static loff_t shiftfs_file_llseek(struct file *file, loff_t offset, int whence)
3ded6d6b
CB
1170{
1171 struct inode *realinode = file_inode(file)->i_private;
1172
1173 return generic_file_llseek_size(file, offset, whence,
1174 realinode->i_sb->s_maxbytes,
1175 i_size_read(realinode));
1176}
1177
1178/* XXX: Need to figure out what to to about atime updates, maybe other
1179 * timestamps too ... ref. ovl_file_accessed() */
1180
1181static rwf_t shiftfs_iocb_to_rwf(struct kiocb *iocb)
1182{
1183 int ifl = iocb->ki_flags;
1184 rwf_t flags = 0;
1185
1186 if (ifl & IOCB_NOWAIT)
1187 flags |= RWF_NOWAIT;
1188 if (ifl & IOCB_HIPRI)
1189 flags |= RWF_HIPRI;
1190 if (ifl & IOCB_DSYNC)
1191 flags |= RWF_DSYNC;
1192 if (ifl & IOCB_SYNC)
1193 flags |= RWF_SYNC;
1194
1195 return flags;
1196}
1197
392d05f6
CB
1198static int shiftfs_real_fdget(const struct file *file, struct fd *lowerfd)
1199{
1200 struct file *realfile;
1201
1202 if (file->f_op->open != shiftfs_open &&
1203 file->f_op->open != shiftfs_dir_open)
1204 return -EINVAL;
1205
1206 realfile = file->private_data;
1207 lowerfd->flags = 0;
1208 lowerfd->file = realfile;
1209
1210 /* Did the flags change since open? */
1211 if (unlikely(file->f_flags & ~lowerfd->file->f_flags))
1212 return shiftfs_change_flags(lowerfd->file, file->f_flags);
1213
1214 return 0;
1215}
1216
3ded6d6b
CB
1217static ssize_t shiftfs_read_iter(struct kiocb *iocb, struct iov_iter *iter)
1218{
1219 struct file *file = iocb->ki_filp;
1220 struct fd lowerfd;
1221 const struct cred *oldcred;
1222 ssize_t ret;
1223
1224 if (!iov_iter_count(iter))
1225 return 0;
1226
1227 ret = shiftfs_real_fdget(file, &lowerfd);
1228 if (ret)
1229 return ret;
1230
1231 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1232 ret = vfs_iter_read(lowerfd.file, iter, &iocb->ki_pos,
1233 shiftfs_iocb_to_rwf(iocb));
1234 revert_creds(oldcred);
1235
1236 shiftfs_file_accessed(file);
1237
1238 fdput(lowerfd);
1239 return ret;
1240}
1241
1242static ssize_t shiftfs_write_iter(struct kiocb *iocb, struct iov_iter *iter)
1243{
1244 struct file *file = iocb->ki_filp;
1245 struct inode *inode = file_inode(file);
1246 struct fd lowerfd;
1247 const struct cred *oldcred;
1248 ssize_t ret;
1249
1250 if (!iov_iter_count(iter))
1251 return 0;
1252
1253 inode_lock(inode);
1254 /* Update mode */
1255 shiftfs_copyattr(inode->i_private, inode);
1256 ret = file_remove_privs(file);
1257 if (ret)
1258 goto out_unlock;
1259
1260 ret = shiftfs_real_fdget(file, &lowerfd);
1261 if (ret)
1262 goto out_unlock;
1263
1264 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1265 file_start_write(lowerfd.file);
1266 ret = vfs_iter_write(lowerfd.file, iter, &iocb->ki_pos,
1267 shiftfs_iocb_to_rwf(iocb));
1268 file_end_write(lowerfd.file);
1269 revert_creds(oldcred);
1270
1271 /* Update size */
1272 shiftfs_copyattr(inode->i_private, inode);
1273
1274 fdput(lowerfd);
1275
1276out_unlock:
1277 inode_unlock(inode);
1278 return ret;
1279}
1280
1281static int shiftfs_fsync(struct file *file, loff_t start, loff_t end,
1282 int datasync)
1283{
1284 struct fd lowerfd;
1285 const struct cred *oldcred;
1286 int ret;
1287
1288 ret = shiftfs_real_fdget(file, &lowerfd);
1289 if (ret)
1290 return ret;
1291
1292 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1293 ret = vfs_fsync_range(lowerfd.file, start, end, datasync);
1294 revert_creds(oldcred);
1295
1296 fdput(lowerfd);
1297 return ret;
1298}
1299
1300static int shiftfs_mmap(struct file *file, struct vm_area_struct *vma)
1301{
b4de76a8 1302 struct file *realfile = file->private_data;
3ded6d6b
CB
1303 const struct cred *oldcred;
1304 int ret;
1305
1306 if (!realfile->f_op->mmap)
1307 return -ENODEV;
1308
1309 if (WARN_ON(file != vma->vm_file))
1310 return -EIO;
1311
1312 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1313 vma->vm_file = get_file(realfile);
1314 ret = call_mmap(vma->vm_file, vma);
1315 revert_creds(oldcred);
1316
1317 shiftfs_file_accessed(file);
1318
3a60b1d0
SF
1319 if (ret) {
1320 /*
1321 * Drop refcount from new vm_file value and restore original
1322 * vm_file value
1323 */
1324 vma->vm_file = file;
1325 fput(realfile);
1326 } else {
1327 /* Drop refcount from previous vm_file value */
1328 fput(file);
1329 }
3ded6d6b
CB
1330
1331 return ret;
1332}
1333
1334static long shiftfs_fallocate(struct file *file, int mode, loff_t offset,
1335 loff_t len)
1336{
1337 struct inode *inode = file_inode(file);
1338 struct inode *loweri = inode->i_private;
1339 struct fd lowerfd;
1340 const struct cred *oldcred;
1341 int ret;
1342
1343 ret = shiftfs_real_fdget(file, &lowerfd);
1344 if (ret)
1345 return ret;
1346
1347 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1348 ret = vfs_fallocate(lowerfd.file, mode, offset, len);
1349 revert_creds(oldcred);
1350
1351 /* Update size */
1352 shiftfs_copyattr(loweri, inode);
1353
1354 fdput(lowerfd);
1355 return ret;
1356}
1357
1358static int shiftfs_fadvise(struct file *file, loff_t offset, loff_t len,
1359 int advice)
1360{
1361 struct fd lowerfd;
1362 const struct cred *oldcred;
1363 int ret;
1364
1365 ret = shiftfs_real_fdget(file, &lowerfd);
1366 if (ret)
1367 return ret;
1368
1369 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1370 ret = vfs_fadvise(lowerfd.file, offset, len, advice);
1371 revert_creds(oldcred);
1372
1373 fdput(lowerfd);
1374 return ret;
1375}
1376
87a7b993 1377static int shiftfs_override_ioctl_creds(int cmd, const struct super_block *sb,
3ded6d6b
CB
1378 const struct cred **oldcred,
1379 struct cred **newcred)
1380{
ef687aea 1381 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
3ded6d6b
CB
1382 kuid_t fsuid = current_fsuid();
1383 kgid_t fsgid = current_fsgid();
1384
1385 *oldcred = shiftfs_override_creds(sb);
1386
1387 *newcred = prepare_creds();
1388 if (!*newcred) {
1389 revert_creds(*oldcred);
1390 return -ENOMEM;
1391 }
1392
ef687aea
SF
1393 (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid);
1394 (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid);
3ded6d6b
CB
1395
1396 /* clear all caps to prevent bypassing capable() checks */
1397 cap_clear((*newcred)->cap_bset);
1398 cap_clear((*newcred)->cap_effective);
1399 cap_clear((*newcred)->cap_inheritable);
1400 cap_clear((*newcred)->cap_permitted);
1401
87a7b993
CB
1402 if (cmd == BTRFS_IOC_SNAP_DESTROY) {
1403 kuid_t kuid_root = make_kuid(sb->s_user_ns, 0);
1404 /*
1405 * Allow the root user in the container to remove subvolumes
1406 * from other users.
1407 */
1408 if (uid_valid(kuid_root) && uid_eq(fsuid, kuid_root))
1409 cap_raise((*newcred)->cap_effective, CAP_DAC_OVERRIDE);
1410 }
1411
3ded6d6b
CB
1412 put_cred(override_creds(*newcred));
1413 return 0;
1414}
1415
1416static inline void shiftfs_revert_ioctl_creds(const struct cred *oldcred,
1417 struct cred *newcred)
1418{
1419 return shiftfs_revert_object_creds(oldcred, newcred);
1420}
1421
88f5bff3
CB
1422static inline bool is_btrfs_snap_ioctl(int cmd)
1423{
1424 if ((cmd == BTRFS_IOC_SNAP_CREATE) || (cmd == BTRFS_IOC_SNAP_CREATE_V2))
1425 return true;
1426
1427 return false;
1428}
1429
89126b92 1430static int shiftfs_btrfs_ioctl_fd_restore(int cmd, int fd, void __user *arg,
88f5bff3
CB
1431 struct btrfs_ioctl_vol_args *v1,
1432 struct btrfs_ioctl_vol_args_v2 *v2)
1433{
1434 int ret;
1435
1436 if (!is_btrfs_snap_ioctl(cmd))
1437 return 0;
1438
1439 if (cmd == BTRFS_IOC_SNAP_CREATE)
1440 ret = copy_to_user(arg, v1, sizeof(*v1));
1441 else
1442 ret = copy_to_user(arg, v2, sizeof(*v2));
1443
a6e9d80a 1444 close_fd(fd);
88f5bff3
CB
1445 kfree(v1);
1446 kfree(v2);
1447
902ca980 1448 return ret ? -EFAULT: 0;
88f5bff3
CB
1449}
1450
1451static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg,
1452 struct btrfs_ioctl_vol_args **b1,
1453 struct btrfs_ioctl_vol_args_v2 **b2,
88f5bff3
CB
1454 int *newfd)
1455{
1456 int oldfd, ret;
1457 struct fd src;
89126b92 1458 struct fd lfd = {};
88f5bff3
CB
1459 struct btrfs_ioctl_vol_args *v1 = NULL;
1460 struct btrfs_ioctl_vol_args_v2 *v2 = NULL;
1461
c88d1b2c
SF
1462 *b1 = NULL;
1463 *b2 = NULL;
1464
88f5bff3
CB
1465 if (!is_btrfs_snap_ioctl(cmd))
1466 return 0;
1467
1468 if (cmd == BTRFS_IOC_SNAP_CREATE) {
1469 v1 = memdup_user(arg, sizeof(*v1));
1470 if (IS_ERR(v1))
1471 return PTR_ERR(v1);
1472 oldfd = v1->fd;
88f5bff3
CB
1473 } else {
1474 v2 = memdup_user(arg, sizeof(*v2));
1475 if (IS_ERR(v2))
1476 return PTR_ERR(v2);
1477 oldfd = v2->fd;
88f5bff3
CB
1478 }
1479
1480 src = fdget(oldfd);
c88d1b2c
SF
1481 if (!src.file) {
1482 ret = -EINVAL;
1483 goto err_free;
1484 }
88f5bff3 1485
89126b92
SF
1486 ret = shiftfs_real_fdget(src.file, &lfd);
1487 if (ret) {
1488 fdput(src);
c88d1b2c 1489 goto err_free;
89126b92
SF
1490 }
1491
1492 /*
1493 * shiftfs_real_fdget() does not take a reference to lfd.file, so
1494 * take a reference here to offset the one which will be put by
a6e9d80a 1495 * close_fd(), and make sure that reference is put on fdput(lfd).
89126b92
SF
1496 */
1497 get_file(lfd.file);
1498 lfd.flags |= FDPUT_FPUT;
1499 fdput(src);
88f5bff3 1500
89126b92 1501 *newfd = get_unused_fd_flags(lfd.file->f_flags);
88f5bff3 1502 if (*newfd < 0) {
89126b92 1503 fdput(lfd);
c88d1b2c
SF
1504 ret = *newfd;
1505 goto err_free;
88f5bff3
CB
1506 }
1507
89126b92 1508 fd_install(*newfd, lfd.file);
88f5bff3
CB
1509
1510 if (cmd == BTRFS_IOC_SNAP_CREATE) {
1511 v1->fd = *newfd;
1512 ret = copy_to_user(arg, v1, sizeof(*v1));
1513 v1->fd = oldfd;
1514 } else {
1515 v2->fd = *newfd;
1516 ret = copy_to_user(arg, v2, sizeof(*v2));
1517 v2->fd = oldfd;
1518 }
1519
c88d1b2c
SF
1520 if (!ret) {
1521 *b1 = v1;
1522 *b2 = v2;
1523 } else {
89126b92 1524 shiftfs_btrfs_ioctl_fd_restore(cmd, *newfd, arg, v1, v2);
902ca980 1525 ret = -EFAULT;
c88d1b2c
SF
1526 }
1527
1528 return ret;
1529
1530err_free:
1531 kfree(v1);
1532 kfree(v2);
88f5bff3
CB
1533
1534 return ret;
1535}
1536
3ded6d6b
CB
1537static long shiftfs_real_ioctl(struct file *file, unsigned int cmd,
1538 unsigned long arg)
1539{
3ded6d6b
CB
1540 struct fd lowerfd;
1541 struct cred *newcred;
1542 const struct cred *oldcred;
88f5bff3
CB
1543 int newfd = -EBADF;
1544 long err = 0, ret = 0;
1545 void __user *argp = (void __user *)arg;
3ded6d6b 1546 struct super_block *sb = file->f_path.dentry->d_sb;
88f5bff3
CB
1547 struct btrfs_ioctl_vol_args *btrfs_v1 = NULL;
1548 struct btrfs_ioctl_vol_args_v2 *btrfs_v2 = NULL;
1549
1550 ret = shiftfs_btrfs_ioctl_fd_replace(cmd, argp, &btrfs_v1, &btrfs_v2,
89126b92 1551 &newfd);
88f5bff3
CB
1552 if (ret < 0)
1553 return ret;
3ded6d6b
CB
1554
1555 ret = shiftfs_real_fdget(file, &lowerfd);
1556 if (ret)
88f5bff3 1557 goto out_restore;
3ded6d6b 1558
87a7b993 1559 ret = shiftfs_override_ioctl_creds(cmd, sb, &oldcred, &newcred);
3ded6d6b
CB
1560 if (ret)
1561 goto out_fdput;
1562
1563 ret = vfs_ioctl(lowerfd.file, cmd, arg);
1564
1565 shiftfs_revert_ioctl_creds(oldcred, newcred);
1566
1567 shiftfs_copyattr(file_inode(lowerfd.file), file_inode(file));
1568 shiftfs_copyflags(file_inode(lowerfd.file), file_inode(file));
1569
1570out_fdput:
1571 fdput(lowerfd);
1572
88f5bff3 1573out_restore:
89126b92 1574 err = shiftfs_btrfs_ioctl_fd_restore(cmd, newfd, argp,
88f5bff3
CB
1575 btrfs_v1, btrfs_v2);
1576 if (!ret)
1577 ret = err;
1578
3ded6d6b
CB
1579 return ret;
1580}
1581
4a60080e 1582static bool in_ioctl_whitelist(int flag, unsigned long arg)
88f5bff3 1583{
4a60080e
CB
1584 void __user *argp = (void __user *)arg;
1585 u64 flags = 0;
1586
88f5bff3 1587 switch (flag) {
4a60080e
CB
1588 case BTRFS_IOC_FS_INFO:
1589 return true;
88f5bff3
CB
1590 case BTRFS_IOC_SNAP_CREATE:
1591 return true;
1592 case BTRFS_IOC_SNAP_CREATE_V2:
1593 return true;
1594 case BTRFS_IOC_SUBVOL_CREATE:
1595 return true;
1596 case BTRFS_IOC_SUBVOL_CREATE_V2:
4a60080e
CB
1597 return true;
1598 case BTRFS_IOC_SUBVOL_GETFLAGS:
1599 return true;
1600 case BTRFS_IOC_SUBVOL_SETFLAGS:
26d1a2e4 1601 if (copy_from_user(&flags, argp, sizeof(flags)))
4a60080e
CB
1602 return false;
1603
1604 if (flags & ~BTRFS_SUBVOL_RDONLY)
1605 return false;
1606
88f5bff3
CB
1607 return true;
1608 case BTRFS_IOC_SNAP_DESTROY:
1609 return true;
1610 }
1611
1612 return false;
1613}
1614
3ded6d6b
CB
1615static long shiftfs_ioctl(struct file *file, unsigned int cmd,
1616 unsigned long arg)
1617{
1618 switch (cmd) {
1619 case FS_IOC_GETVERSION:
1620 /* fall through */
1621 case FS_IOC_GETFLAGS:
1622 /* fall through */
1623 case FS_IOC_SETFLAGS:
1624 break;
1625 default:
4a60080e 1626 if (!in_ioctl_whitelist(cmd, arg) ||
88f5bff3
CB
1627 !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1628 return -ENOTTY;
3ded6d6b
CB
1629 }
1630
1631 return shiftfs_real_ioctl(file, cmd, arg);
1632}
1633
1634static long shiftfs_compat_ioctl(struct file *file, unsigned int cmd,
1635 unsigned long arg)
1636{
1637 switch (cmd) {
1638 case FS_IOC32_GETVERSION:
1639 /* fall through */
1640 case FS_IOC32_GETFLAGS:
1641 /* fall through */
1642 case FS_IOC32_SETFLAGS:
1643 break;
1644 default:
4a60080e 1645 if (!in_ioctl_whitelist(cmd, arg) ||
88f5bff3
CB
1646 !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1647 return -ENOIOCTLCMD;
3ded6d6b
CB
1648 }
1649
1650 return shiftfs_real_ioctl(file, cmd, arg);
1651}
1652
1653enum shiftfs_copyop {
1654 SHIFTFS_COPY,
1655 SHIFTFS_CLONE,
1656 SHIFTFS_DEDUPE,
1657};
1658
1659static ssize_t shiftfs_copyfile(struct file *file_in, loff_t pos_in,
1660 struct file *file_out, loff_t pos_out, u64 len,
1661 unsigned int flags, enum shiftfs_copyop op)
1662{
1663 ssize_t ret;
1664 struct fd real_in, real_out;
1665 const struct cred *oldcred;
1666 struct inode *inode_out = file_inode(file_out);
1667 struct inode *loweri = inode_out->i_private;
1668
1669 ret = shiftfs_real_fdget(file_out, &real_out);
1670 if (ret)
1671 return ret;
1672
1673 ret = shiftfs_real_fdget(file_in, &real_in);
1674 if (ret) {
1675 fdput(real_out);
1676 return ret;
1677 }
1678
1679 oldcred = shiftfs_override_creds(inode_out->i_sb);
1680 switch (op) {
1681 case SHIFTFS_COPY:
1682 ret = vfs_copy_file_range(real_in.file, pos_in, real_out.file,
1683 pos_out, len, flags);
1684 break;
1685
1686 case SHIFTFS_CLONE:
1687 ret = vfs_clone_file_range(real_in.file, pos_in, real_out.file,
1688 pos_out, len, flags);
1689 break;
1690
1691 case SHIFTFS_DEDUPE:
1692 ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
1693 real_out.file, pos_out, len,
1694 flags);
1695 break;
1696 }
1697 revert_creds(oldcred);
1698
1699 /* Update size */
1700 shiftfs_copyattr(loweri, inode_out);
1701
1702 fdput(real_in);
1703 fdput(real_out);
1704
1705 return ret;
1706}
1707
1708static ssize_t shiftfs_copy_file_range(struct file *file_in, loff_t pos_in,
1709 struct file *file_out, loff_t pos_out,
1710 size_t len, unsigned int flags)
1711{
1712 return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
1713 SHIFTFS_COPY);
1714}
1715
1716static loff_t shiftfs_remap_file_range(struct file *file_in, loff_t pos_in,
1717 struct file *file_out, loff_t pos_out,
1718 loff_t len, unsigned int remap_flags)
1719{
1720 enum shiftfs_copyop op;
1721
1722 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
1723 return -EINVAL;
1724
1725 if (remap_flags & REMAP_FILE_DEDUP)
1726 op = SHIFTFS_DEDUPE;
1727 else
1728 op = SHIFTFS_CLONE;
1729
1730 return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len,
1731 remap_flags, op);
1732}
1733
1734static int shiftfs_iterate_shared(struct file *file, struct dir_context *ctx)
1735{
1736 const struct cred *oldcred;
1737 int err = -ENOTDIR;
b4de76a8 1738 struct file *realfile = file->private_data;
3ded6d6b
CB
1739
1740 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1741 err = iterate_dir(realfile, ctx);
1742 revert_creds(oldcred);
1743
1744 return err;
1745}
1746
1747const struct file_operations shiftfs_file_operations = {
1748 .open = shiftfs_open,
1749 .release = shiftfs_release,
958069e8 1750 .llseek = shiftfs_file_llseek,
3ded6d6b
CB
1751 .read_iter = shiftfs_read_iter,
1752 .write_iter = shiftfs_write_iter,
1753 .fsync = shiftfs_fsync,
1754 .mmap = shiftfs_mmap,
1755 .fallocate = shiftfs_fallocate,
1756 .fadvise = shiftfs_fadvise,
1757 .unlocked_ioctl = shiftfs_ioctl,
1758 .compat_ioctl = shiftfs_compat_ioctl,
1759 .copy_file_range = shiftfs_copy_file_range,
1760 .remap_file_range = shiftfs_remap_file_range,
37c48396
CB
1761 .splice_read = generic_file_splice_read,
1762 .splice_write = iter_file_splice_write,
3ded6d6b
CB
1763};
1764
1765const struct file_operations shiftfs_dir_operations = {
b4de76a8
CB
1766 .open = shiftfs_dir_open,
1767 .release = shiftfs_dir_release,
3ded6d6b
CB
1768 .compat_ioctl = shiftfs_compat_ioctl,
1769 .fsync = shiftfs_fsync,
1770 .iterate_shared = shiftfs_iterate_shared,
958069e8 1771 .llseek = shiftfs_dir_llseek,
3ded6d6b 1772 .read = generic_read_dir,
3ded6d6b
CB
1773 .unlocked_ioctl = shiftfs_ioctl,
1774};
1775
1776static const struct address_space_operations shiftfs_aops = {
1777 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
1778 .direct_IO = noop_direct_IO,
1779};
1780
1781static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
1782 umode_t mode, dev_t dev, struct dentry *dentry)
1783{
1784 struct inode *loweri;
1785
1786 inode->i_ino = ino;
1787 inode->i_flags |= S_NOCMTIME;
1788
1789 mode &= S_IFMT;
1790 inode->i_mode = mode;
1791 switch (mode & S_IFMT) {
1792 case S_IFDIR:
1793 inode->i_op = &shiftfs_dir_inode_operations;
1794 inode->i_fop = &shiftfs_dir_operations;
1795 break;
1796 case S_IFLNK:
1797 inode->i_op = &shiftfs_symlink_inode_operations;
1798 break;
1799 case S_IFREG:
1800 inode->i_op = &shiftfs_file_inode_operations;
1801 inode->i_fop = &shiftfs_file_operations;
1802 inode->i_mapping->a_ops = &shiftfs_aops;
1803 break;
1804 default:
1805 inode->i_op = &shiftfs_special_inode_operations;
1806 init_special_inode(inode, mode, dev);
1807 break;
1808 }
1809
1810 if (!dentry)
1811 return;
1812
1813 loweri = dentry->d_inode;
1814 if (!loweri->i_op->get_link)
1815 inode->i_opflags |= IOP_NOFOLLOW;
1816
1817 shiftfs_copyattr(loweri, inode);
1818 shiftfs_copyflags(loweri, inode);
1819 set_nlink(inode, loweri->i_nlink);
1820}
1821
1822static int shiftfs_show_options(struct seq_file *m, struct dentry *dentry)
1823{
1824 struct super_block *sb = dentry->d_sb;
1825 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1826
1827 if (sbinfo->mark)
1828 seq_show_option(m, "mark", NULL);
1829
1830 if (sbinfo->passthrough)
1831 seq_printf(m, ",passthrough=%u", sbinfo->passthrough);
1832
1833 return 0;
1834}
1835
1836static int shiftfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1837{
1838 struct super_block *sb = dentry->d_sb;
1839 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1840 struct dentry *root = sb->s_root;
1841 struct dentry *realroot = root->d_fsdata;
1842 struct path realpath = { .mnt = sbinfo->mnt, .dentry = realroot };
1843 int err;
1844
1845 err = vfs_statfs(&realpath, buf);
aa269008 1846 if (err)
3ded6d6b 1847 return err;
aa269008 1848
3ded6d6b
CB
1849 if (!shiftfs_passthrough_statfs(sbinfo))
1850 buf->f_type = sb->s_magic;
aa269008 1851
3ded6d6b
CB
1852 return 0;
1853}
aa269008 1854
3ded6d6b
CB
1855static void shiftfs_evict_inode(struct inode *inode)
1856{
1857 struct inode *loweri = inode->i_private;
1858
1859 clear_inode(inode);
1860
1861 if (loweri)
1862 iput(loweri);
1863}
1864
1865static void shiftfs_put_super(struct super_block *sb)
1866{
1867 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1868
1869 if (sbinfo) {
1870 mntput(sbinfo->mnt);
1871 put_cred(sbinfo->creator_cred);
1872 kfree(sbinfo);
1873 }
1874}
1875
1876static const struct xattr_handler shiftfs_xattr_handler = {
1877 .prefix = "",
1878 .get = shiftfs_xattr_get,
1879 .set = shiftfs_xattr_set,
1880};
1881
1882const struct xattr_handler *shiftfs_xattr_handlers[] = {
1883#ifdef CONFIG_SHIFT_FS_POSIX_ACL
1884 &shiftfs_posix_acl_access_xattr_handler,
1885 &shiftfs_posix_acl_default_xattr_handler,
1886#endif
1887 &shiftfs_xattr_handler,
1888 NULL
1889};
1890
1891static inline bool passthrough_is_subset(int old_flags, int new_flags)
1892{
1893 if ((new_flags & old_flags) != new_flags)
1894 return false;
1895
1896 return true;
1897}
1898
40b373d6
CB
1899static int shiftfs_super_check_flags(unsigned long old_flags,
1900 unsigned long new_flags)
1901{
1902 if ((old_flags & SB_RDONLY) && !(new_flags & SB_RDONLY))
1903 return -EPERM;
1904
1905 if ((old_flags & SB_NOSUID) && !(new_flags & SB_NOSUID))
1906 return -EPERM;
1907
1908 if ((old_flags & SB_NODEV) && !(new_flags & SB_NODEV))
1909 return -EPERM;
1910
1911 if ((old_flags & SB_NOEXEC) && !(new_flags & SB_NOEXEC))
1912 return -EPERM;
1913
1914 if ((old_flags & SB_NOATIME) && !(new_flags & SB_NOATIME))
1915 return -EPERM;
1916
1917 if ((old_flags & SB_NODIRATIME) && !(new_flags & SB_NODIRATIME))
1918 return -EPERM;
1919
1920 if (!(old_flags & SB_POSIXACL) && (new_flags & SB_POSIXACL))
1921 return -EPERM;
1922
1923 return 0;
1924}
1925
3ded6d6b
CB
1926static int shiftfs_remount(struct super_block *sb, int *flags, char *data)
1927{
1928 int err;
1929 struct shiftfs_super_info new = {};
1930 struct shiftfs_super_info *info = sb->s_fs_info;
1931
1932 err = shiftfs_parse_mount_options(&new, data);
aa269008 1933 if (err)
3ded6d6b
CB
1934 return err;
1935
40b373d6
CB
1936 err = shiftfs_super_check_flags(sb->s_flags, *flags);
1937 if (err)
1938 return err;
1939
3ded6d6b
CB
1940 /* Mark mount option cannot be changed. */
1941 if (info->mark || (info->mark != new.mark))
1942 return -EPERM;
1943
1944 if (info->passthrough != new.passthrough) {
1945 /* Don't allow exceeding passthrough options of mark mount. */
14ba9a56 1946 if (!passthrough_is_subset(info->passthrough_mark,
3ded6d6b
CB
1947 info->passthrough))
1948 return -EPERM;
1949
1950 info->passthrough = new.passthrough;
1951 }
1952
1953 return 0;
1954}
aa269008 1955
3ded6d6b
CB
1956static const struct super_operations shiftfs_super_ops = {
1957 .put_super = shiftfs_put_super,
1958 .show_options = shiftfs_show_options,
1959 .statfs = shiftfs_statfs,
1960 .remount_fs = shiftfs_remount,
1961 .evict_inode = shiftfs_evict_inode,
1962};
1963
1964struct shiftfs_data {
1965 void *data;
1966 const char *path;
1967};
1968
40b373d6
CB
1969static void shiftfs_super_force_flags(struct super_block *sb,
1970 unsigned long lower_flags)
1971{
1972 sb->s_flags |= lower_flags & (SB_RDONLY | SB_NOSUID | SB_NODEV |
1973 SB_NOEXEC | SB_NOATIME | SB_NODIRATIME);
1974
1975 if (!(lower_flags & SB_POSIXACL))
1976 sb->s_flags &= ~SB_POSIXACL;
1977}
1978
3ded6d6b
CB
1979static int shiftfs_fill_super(struct super_block *sb, void *raw_data,
1980 int silent)
1981{
1982 int err;
1983 struct path path = {};
1984 struct shiftfs_super_info *sbinfo_mp;
1985 char *name = NULL;
1986 struct inode *inode = NULL;
1987 struct dentry *dentry = NULL;
1988 struct shiftfs_data *data = raw_data;
1989 struct shiftfs_super_info *sbinfo = NULL;
1990
1991 if (!data->path)
1992 return -EINVAL;
1993
1994 sb->s_fs_info = kzalloc(sizeof(*sbinfo), GFP_KERNEL);
1995 if (!sb->s_fs_info)
1996 return -ENOMEM;
1997 sbinfo = sb->s_fs_info;
1998
1999 err = shiftfs_parse_mount_options(sbinfo, data->data);
2000 if (err)
2001 return err;
2002
2003 /* to mount a mark, must be userns admin */
2004 if (!sbinfo->mark && !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
2005 return -EPERM;
2006
2007 name = kstrdup(data->path, GFP_KERNEL);
2008 if (!name)
2009 return -ENOMEM;
2010
2011 err = kern_path(name, LOOKUP_FOLLOW, &path);
2012 if (err)
2013 goto out_free_name;
aa269008
JB
2014
2015 if (!S_ISDIR(path.dentry->d_inode->i_mode)) {
2016 err = -ENOTDIR;
3ded6d6b 2017 goto out_put_path;
aa269008
JB
2018 }
2019
40b373d6
CB
2020 sb->s_flags |= SB_POSIXACL;
2021
3ded6d6b 2022 if (sbinfo->mark) {
9f4c882a 2023 struct cred *cred_tmp;
3ded6d6b
CB
2024 struct super_block *lower_sb = path.mnt->mnt_sb;
2025
2026 /* to mark a mount point, must root wrt lower s_user_ns */
2027 if (!ns_capable(lower_sb->s_user_ns, CAP_SYS_ADMIN)) {
2028 err = -EPERM;
2029 goto out_put_path;
2030 }
aa269008 2031
aa269008
JB
2032 /*
2033 * this part is visible unshifted, so make sure no
2034 * executables that could be used to give suid
2035 * privileges
2036 */
2037 sb->s_iflags = SB_I_NOEXEC;
aa269008 2038
40b373d6
CB
2039 shiftfs_super_force_flags(sb, lower_sb->s_flags);
2040
aa269008 2041 /*
3ded6d6b
CB
2042 * Handle nesting of shiftfs mounts by referring this mark
2043 * mount back to the original mark mount. This is more
2044 * efficient and alleviates concerns about stack depth.
aa269008 2045 */
3ded6d6b
CB
2046 if (lower_sb->s_magic == SHIFTFS_MAGIC) {
2047 sbinfo_mp = lower_sb->s_fs_info;
2048
2049 /* Doesn't make sense to mark a mark mount */
2050 if (sbinfo_mp->mark) {
2051 err = -EINVAL;
2052 goto out_put_path;
2053 }
2054
2055 if (!passthrough_is_subset(sbinfo_mp->passthrough,
2056 sbinfo->passthrough)) {
2057 err = -EPERM;
2058 goto out_put_path;
2059 }
2060
2061 sbinfo->mnt = mntget(sbinfo_mp->mnt);
2062 dentry = dget(path.dentry->d_fsdata);
14ba9a56
CB
2063 /*
2064 * Copy up the passthrough mount options from the
2065 * parent mark mountpoint.
2066 */
2067 sbinfo->passthrough_mark = sbinfo_mp->passthrough_mark;
b91d316e 2068 sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred);
3ded6d6b
CB
2069 } else {
2070 sbinfo->mnt = mntget(path.mnt);
2071 dentry = dget(path.dentry);
14ba9a56
CB
2072 /*
2073 * For a new mark passthrough_mark and passthrough
2074 * are identical.
2075 */
2076 sbinfo->passthrough_mark = sbinfo->passthrough;
3ded6d6b 2077
b91d316e
CB
2078 cred_tmp = prepare_creds();
2079 if (!cred_tmp) {
2080 err = -ENOMEM;
2081 goto out_put_path;
2082 }
2083 /* Don't override disk quota limits or use reserved space. */
2084 cap_lower(cred_tmp->cap_effective, CAP_SYS_RESOURCE);
2085 sbinfo->creator_cred = cred_tmp;
3ded6d6b
CB
2086 }
2087 } else {
2088 /*
2089 * This leg executes if we're admin capable in the namespace,
2090 * so be very careful.
2091 */
2092 err = -EPERM;
aa269008 2093 if (path.dentry->d_sb->s_magic != SHIFTFS_MAGIC)
3ded6d6b
CB
2094 goto out_put_path;
2095
2096 sbinfo_mp = path.dentry->d_sb->s_fs_info;
2097 if (!sbinfo_mp->mark)
2098 goto out_put_path;
2099
2100 if (!passthrough_is_subset(sbinfo_mp->passthrough,
2101 sbinfo->passthrough))
2102 goto out_put_path;
2103
2104 sbinfo->mnt = mntget(sbinfo_mp->mnt);
2105 sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred);
aa269008 2106 dentry = dget(path.dentry->d_fsdata);
14ba9a56
CB
2107 /*
2108 * Copy up passthrough settings from mark mountpoint so we can
2109 * verify when the overlay wants to remount with different
2110 * passthrough settings.
2111 */
2112 sbinfo->passthrough_mark = sbinfo_mp->passthrough;
40b373d6 2113 shiftfs_super_force_flags(sb, path.mnt->mnt_sb->s_flags);
3ded6d6b
CB
2114 }
2115
2116 sb->s_stack_depth = dentry->d_sb->s_stack_depth + 1;
2117 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
2118 printk(KERN_ERR "shiftfs: maximum stacking depth exceeded\n");
2119 err = -EINVAL;
2120 goto out_put_path;
2121 }
2122
2123 inode = new_inode(sb);
2124 if (!inode) {
2125 err = -ENOMEM;
2126 goto out_put_path;
aa269008 2127 }
3ded6d6b
CB
2128 shiftfs_fill_inode(inode, dentry->d_inode->i_ino, S_IFDIR, 0, dentry);
2129
2130 ihold(dentry->d_inode);
2131 inode->i_private = dentry->d_inode;
2132
aa269008 2133 sb->s_magic = SHIFTFS_MAGIC;
ed1034ae 2134 sb->s_maxbytes = MAX_LFS_FILESIZE;
aa269008
JB
2135 sb->s_op = &shiftfs_super_ops;
2136 sb->s_xattr = shiftfs_xattr_handlers;
2137 sb->s_d_op = &shiftfs_dentry_ops;
3ded6d6b
CB
2138 sb->s_root = d_make_root(inode);
2139 if (!sb->s_root) {
2140 err = -ENOMEM;
2141 goto out_put_path;
2142 }
2143
aa269008 2144 sb->s_root->d_fsdata = dentry;
3ded6d6b
CB
2145 sbinfo->userns = get_user_ns(dentry->d_sb->s_user_ns);
2146 shiftfs_copyattr(dentry->d_inode, sb->s_root->d_inode);
aa269008 2147
3ded6d6b
CB
2148 dentry = NULL;
2149 err = 0;
aa269008 2150
3ded6d6b 2151out_put_path:
aa269008 2152 path_put(&path);
3ded6d6b
CB
2153
2154out_free_name:
aa269008 2155 kfree(name);
3ded6d6b
CB
2156
2157 dput(dentry);
2158
aa269008
JB
2159 return err;
2160}
2161
2162static struct dentry *shiftfs_mount(struct file_system_type *fs_type,
2163 int flags, const char *dev_name, void *data)
2164{
2165 struct shiftfs_data d = { data, dev_name };
2166
2167 return mount_nodev(fs_type, flags, &d, shiftfs_fill_super);
2168}
2169
2170static struct file_system_type shiftfs_type = {
2171 .owner = THIS_MODULE,
2172 .name = "shiftfs",
2173 .mount = shiftfs_mount,
2174 .kill_sb = kill_anon_super,
2175 .fs_flags = FS_USERNS_MOUNT,
2176};
2177
2178static int __init shiftfs_init(void)
2179{
2180 return register_filesystem(&shiftfs_type);
2181}
2182
2183static void __exit shiftfs_exit(void)
2184{
2185 unregister_filesystem(&shiftfs_type);
2186}
2187
2188MODULE_ALIAS_FS("shiftfs");
2189MODULE_AUTHOR("James Bottomley");
3ded6d6b
CB
2190MODULE_AUTHOR("Seth Forshee <seth.forshee@canonical.com>");
2191MODULE_AUTHOR("Christian Brauner <christian.brauner@ubuntu.com>");
2192MODULE_DESCRIPTION("id shifting filesystem");
aa269008
JB
2193MODULE_LICENSE("GPL v2");
2194module_init(shiftfs_init)
2195module_exit(shiftfs_exit)