]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blob - fs/shiftfs.c
UBUNTU: SAUCE: shiftfs: fix passing of attrs to underaly for setattr
[mirror_ubuntu-hirsute-kernel.git] / fs / shiftfs.c
1 #include <linux/btrfs.h>
2 #include <linux/capability.h>
3 #include <linux/cred.h>
4 #include <linux/mount.h>
5 #include <linux/fdtable.h>
6 #include <linux/file.h>
7 #include <linux/fs.h>
8 #include <linux/namei.h>
9 #include <linux/module.h>
10 #include <linux/kernel.h>
11 #include <linux/magic.h>
12 #include <linux/parser.h>
13 #include <linux/security.h>
14 #include <linux/seq_file.h>
15 #include <linux/statfs.h>
16 #include <linux/slab.h>
17 #include <linux/user_namespace.h>
18 #include <linux/uidgid.h>
19 #include <linux/xattr.h>
20 #include <linux/posix_acl.h>
21 #include <linux/posix_acl_xattr.h>
22 #include <linux/uio.h>
23
24 struct shiftfs_super_info {
25 struct vfsmount *mnt;
26 struct user_namespace *userns;
27 /* creds of process who created the super block */
28 const struct cred *creator_cred;
29 bool mark;
30 unsigned int passthrough;
31 struct shiftfs_super_info *info_mark;
32 };
33
34 struct shiftfs_file_info {
35 struct path realpath;
36 struct file *realfile;
37 };
38
39 struct kmem_cache *shiftfs_file_info_cache;
40
41 static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
42 umode_t mode, dev_t dev, struct dentry *dentry);
43
44 #define SHIFTFS_PASSTHROUGH_NONE 0
45 #define SHIFTFS_PASSTHROUGH_STAT 1
46 #define SHIFTFS_PASSTHROUGH_IOCTL 2
47 #define SHIFTFS_PASSTHROUGH_ALL \
48 (SHIFTFS_PASSTHROUGH_STAT | SHIFTFS_PASSTHROUGH_IOCTL)
49
50 static inline bool shiftfs_passthrough_ioctls(struct shiftfs_super_info *info)
51 {
52 if (!(info->passthrough & SHIFTFS_PASSTHROUGH_IOCTL))
53 return false;
54
55 if (info->info_mark &&
56 !(info->info_mark->passthrough & SHIFTFS_PASSTHROUGH_IOCTL))
57 return false;
58
59 return true;
60 }
61
62 static inline bool shiftfs_passthrough_statfs(struct shiftfs_super_info *info)
63 {
64 if (!(info->passthrough & SHIFTFS_PASSTHROUGH_STAT))
65 return false;
66
67 if (info->info_mark &&
68 !(info->info_mark->passthrough & SHIFTFS_PASSTHROUGH_STAT))
69 return false;
70
71 return true;
72 }
73
74 enum {
75 OPT_MARK,
76 OPT_PASSTHROUGH,
77 OPT_LAST,
78 };
79
80 /* global filesystem options */
81 static const match_table_t tokens = {
82 { OPT_MARK, "mark" },
83 { OPT_PASSTHROUGH, "passthrough=%u" },
84 { OPT_LAST, NULL }
85 };
86
87 static const struct cred *shiftfs_override_creds(const struct super_block *sb)
88 {
89 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
90
91 return override_creds(sbinfo->creator_cred);
92 }
93
94 static inline void shiftfs_revert_object_creds(const struct cred *oldcred,
95 struct cred *newcred)
96 {
97 revert_creds(oldcred);
98 put_cred(newcred);
99 }
100
101 static int shiftfs_override_object_creds(const struct super_block *sb,
102 const struct cred **oldcred,
103 struct cred **newcred,
104 struct dentry *dentry, umode_t mode,
105 bool hardlink)
106 {
107 kuid_t fsuid = current_fsuid();
108 kgid_t fsgid = current_fsgid();
109
110 *oldcred = shiftfs_override_creds(sb);
111
112 *newcred = prepare_creds();
113 if (!*newcred) {
114 revert_creds(*oldcred);
115 return -ENOMEM;
116 }
117
118 (*newcred)->fsuid = KUIDT_INIT(from_kuid(sb->s_user_ns, fsuid));
119 (*newcred)->fsgid = KGIDT_INIT(from_kgid(sb->s_user_ns, fsgid));
120
121 if (!hardlink) {
122 int err = security_dentry_create_files_as(dentry, mode,
123 &dentry->d_name,
124 *oldcred, *newcred);
125 if (err) {
126 shiftfs_revert_object_creds(*oldcred, *newcred);
127 return err;
128 }
129 }
130
131 put_cred(override_creds(*newcred));
132 return 0;
133 }
134
135 static kuid_t shift_kuid(struct user_namespace *from, struct user_namespace *to,
136 kuid_t kuid)
137 {
138 uid_t uid = from_kuid(from, kuid);
139 return make_kuid(to, uid);
140 }
141
142 static kgid_t shift_kgid(struct user_namespace *from, struct user_namespace *to,
143 kgid_t kgid)
144 {
145 gid_t gid = from_kgid(from, kgid);
146 return make_kgid(to, gid);
147 }
148
149 static void shiftfs_copyattr(struct inode *from, struct inode *to)
150 {
151 struct user_namespace *from_ns = from->i_sb->s_user_ns;
152 struct user_namespace *to_ns = to->i_sb->s_user_ns;
153
154 to->i_uid = shift_kuid(from_ns, to_ns, from->i_uid);
155 to->i_gid = shift_kgid(from_ns, to_ns, from->i_gid);
156 to->i_mode = from->i_mode;
157 to->i_atime = from->i_atime;
158 to->i_mtime = from->i_mtime;
159 to->i_ctime = from->i_ctime;
160 i_size_write(to, i_size_read(from));
161 }
162
163 static void shiftfs_copyflags(struct inode *from, struct inode *to)
164 {
165 unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
166
167 inode_set_flags(to, from->i_flags & mask, mask);
168 }
169
170 static void shiftfs_file_accessed(struct file *file)
171 {
172 struct inode *upperi, *loweri;
173
174 if (file->f_flags & O_NOATIME)
175 return;
176
177 upperi = file_inode(file);
178 loweri = upperi->i_private;
179
180 if (!loweri)
181 return;
182
183 upperi->i_mtime = loweri->i_mtime;
184 upperi->i_ctime = loweri->i_ctime;
185
186 touch_atime(&file->f_path);
187 }
188
189 static int shiftfs_parse_mount_options(struct shiftfs_super_info *sbinfo,
190 char *options)
191 {
192 char *p;
193 substring_t args[MAX_OPT_ARGS];
194
195 sbinfo->mark = false;
196 sbinfo->passthrough = 0;
197
198 while ((p = strsep(&options, ",")) != NULL) {
199 int err, intarg, token;
200
201 if (!*p)
202 continue;
203
204 token = match_token(p, tokens, args);
205 switch (token) {
206 case OPT_MARK:
207 sbinfo->mark = true;
208 break;
209 case OPT_PASSTHROUGH:
210 err = match_int(&args[0], &intarg);
211 if (err)
212 return err;
213
214 if (intarg & ~SHIFTFS_PASSTHROUGH_ALL)
215 return -EINVAL;
216
217 sbinfo->passthrough = intarg;
218 break;
219 default:
220 return -EINVAL;
221 }
222 }
223
224 return 0;
225 }
226
227 static void shiftfs_d_release(struct dentry *dentry)
228 {
229 struct dentry *lowerd = dentry->d_fsdata;
230
231 if (lowerd)
232 dput(lowerd);
233 }
234
235 static struct dentry *shiftfs_d_real(struct dentry *dentry,
236 const struct inode *inode)
237 {
238 struct dentry *lowerd = dentry->d_fsdata;
239
240 if (inode && d_inode(dentry) == inode)
241 return dentry;
242
243 lowerd = d_real(lowerd, inode);
244 if (lowerd && (!inode || inode == d_inode(lowerd)))
245 return lowerd;
246
247 WARN(1, "shiftfs_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
248 inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
249 return dentry;
250 }
251
252 static int shiftfs_d_weak_revalidate(struct dentry *dentry, unsigned int flags)
253 {
254 int err = 1;
255 struct dentry *lowerd = dentry->d_fsdata;
256
257 if (d_is_negative(lowerd) != d_is_negative(dentry))
258 return 0;
259
260 if ((lowerd->d_flags & DCACHE_OP_WEAK_REVALIDATE))
261 err = lowerd->d_op->d_weak_revalidate(lowerd, flags);
262
263 if (d_really_is_positive(dentry)) {
264 struct inode *inode = d_inode(dentry);
265 struct inode *loweri = d_inode(lowerd);
266
267 shiftfs_copyattr(loweri, inode);
268 if (!inode->i_nlink)
269 err = 0;
270 }
271
272 return err;
273 }
274
275 static int shiftfs_d_revalidate(struct dentry *dentry, unsigned int flags)
276 {
277 int err = 1;
278 struct dentry *lowerd = dentry->d_fsdata;
279
280 if (d_unhashed(lowerd) ||
281 ((d_is_negative(lowerd) != d_is_negative(dentry))))
282 return 0;
283
284 if (flags & LOOKUP_RCU)
285 return -ECHILD;
286
287 if ((lowerd->d_flags & DCACHE_OP_REVALIDATE))
288 err = lowerd->d_op->d_revalidate(lowerd, flags);
289
290 if (d_really_is_positive(dentry)) {
291 struct inode *inode = d_inode(dentry);
292 struct inode *loweri = d_inode(lowerd);
293
294 shiftfs_copyattr(loweri, inode);
295 if (!inode->i_nlink)
296 err = 0;
297 }
298
299 return err;
300 }
301
302 static const struct dentry_operations shiftfs_dentry_ops = {
303 .d_release = shiftfs_d_release,
304 .d_real = shiftfs_d_real,
305 .d_revalidate = shiftfs_d_revalidate,
306 .d_weak_revalidate = shiftfs_d_weak_revalidate,
307 };
308
309 static const char *shiftfs_get_link(struct dentry *dentry, struct inode *inode,
310 struct delayed_call *done)
311 {
312 const char *p;
313 const struct cred *oldcred;
314 struct dentry *lowerd;
315
316 /* RCU lookup not supported */
317 if (!dentry)
318 return ERR_PTR(-ECHILD);
319
320 lowerd = dentry->d_fsdata;
321 oldcred = shiftfs_override_creds(dentry->d_sb);
322 p = vfs_get_link(lowerd, done);
323 revert_creds(oldcred);
324
325 return p;
326 }
327
328 static int shiftfs_setxattr(struct dentry *dentry, struct inode *inode,
329 const char *name, const void *value,
330 size_t size, int flags)
331 {
332 struct dentry *lowerd = dentry->d_fsdata;
333 int err;
334 const struct cred *oldcred;
335
336 oldcred = shiftfs_override_creds(dentry->d_sb);
337 err = vfs_setxattr(lowerd, name, value, size, flags);
338 revert_creds(oldcred);
339
340 shiftfs_copyattr(lowerd->d_inode, inode);
341
342 return err;
343 }
344
345 static int shiftfs_xattr_get(const struct xattr_handler *handler,
346 struct dentry *dentry, struct inode *inode,
347 const char *name, void *value, size_t size)
348 {
349 struct dentry *lowerd = dentry->d_fsdata;
350 int err;
351 const struct cred *oldcred;
352
353 oldcred = shiftfs_override_creds(dentry->d_sb);
354 err = vfs_getxattr(lowerd, name, value, size);
355 revert_creds(oldcred);
356
357 return err;
358 }
359
360 static ssize_t shiftfs_listxattr(struct dentry *dentry, char *list,
361 size_t size)
362 {
363 struct dentry *lowerd = dentry->d_fsdata;
364 int err;
365 const struct cred *oldcred;
366
367 oldcred = shiftfs_override_creds(dentry->d_sb);
368 err = vfs_listxattr(lowerd, list, size);
369 revert_creds(oldcred);
370
371 return err;
372 }
373
374 static int shiftfs_removexattr(struct dentry *dentry, const char *name)
375 {
376 struct dentry *lowerd = dentry->d_fsdata;
377 int err;
378 const struct cred *oldcred;
379
380 oldcred = shiftfs_override_creds(dentry->d_sb);
381 err = vfs_removexattr(lowerd, name);
382 revert_creds(oldcred);
383
384 /* update c/mtime */
385 shiftfs_copyattr(lowerd->d_inode, d_inode(dentry));
386
387 return err;
388 }
389
390 static int shiftfs_xattr_set(const struct xattr_handler *handler,
391 struct dentry *dentry, struct inode *inode,
392 const char *name, const void *value, size_t size,
393 int flags)
394 {
395 if (!value)
396 return shiftfs_removexattr(dentry, name);
397 return shiftfs_setxattr(dentry, inode, name, value, size, flags);
398 }
399
400 static int shiftfs_inode_test(struct inode *inode, void *data)
401 {
402 return inode->i_private == data;
403 }
404
405 static int shiftfs_inode_set(struct inode *inode, void *data)
406 {
407 inode->i_private = data;
408 return 0;
409 }
410
411 static int shiftfs_create_object(struct inode *diri, struct dentry *dentry,
412 umode_t mode, const char *symlink,
413 struct dentry *hardlink, bool excl)
414 {
415 int err;
416 const struct cred *oldcred;
417 struct cred *newcred;
418 void *loweri_iop_ptr = NULL;
419 umode_t modei = mode;
420 struct super_block *dir_sb = diri->i_sb;
421 struct dentry *lowerd_new = dentry->d_fsdata;
422 struct inode *inode = NULL, *loweri_dir = diri->i_private;
423 const struct inode_operations *loweri_dir_iop = loweri_dir->i_op;
424 struct dentry *lowerd_link = NULL;
425
426 if (hardlink) {
427 loweri_iop_ptr = loweri_dir_iop->link;
428 } else {
429 switch (mode & S_IFMT) {
430 case S_IFDIR:
431 loweri_iop_ptr = loweri_dir_iop->mkdir;
432 break;
433 case S_IFREG:
434 loweri_iop_ptr = loweri_dir_iop->create;
435 break;
436 case S_IFLNK:
437 loweri_iop_ptr = loweri_dir_iop->symlink;
438 break;
439 case S_IFSOCK:
440 /* fall through */
441 case S_IFIFO:
442 loweri_iop_ptr = loweri_dir_iop->mknod;
443 break;
444 }
445 }
446 if (!loweri_iop_ptr) {
447 err = -EINVAL;
448 goto out_iput;
449 }
450
451 inode_lock_nested(loweri_dir, I_MUTEX_PARENT);
452
453 if (!hardlink) {
454 inode = new_inode(dir_sb);
455 if (!inode) {
456 err = -ENOMEM;
457 goto out_iput;
458 }
459
460 /*
461 * new_inode() will have added the new inode to the super
462 * block's list of inodes. Further below we will call
463 * inode_insert5() Which would perform the same operation again
464 * thereby corrupting the list. To avoid this raise I_CREATING
465 * in i_state which will cause inode_insert5() to skip this
466 * step. I_CREATING will be cleared by d_instantiate_new()
467 * below.
468 */
469 spin_lock(&inode->i_lock);
470 inode->i_state |= I_CREATING;
471 spin_unlock(&inode->i_lock);
472
473 inode_init_owner(inode, diri, mode);
474 modei = inode->i_mode;
475 }
476
477 err = shiftfs_override_object_creds(dentry->d_sb, &oldcred, &newcred,
478 dentry, modei, hardlink != NULL);
479 if (err)
480 goto out_iput;
481
482 if (hardlink) {
483 lowerd_link = hardlink->d_fsdata;
484 err = vfs_link(lowerd_link, loweri_dir, lowerd_new, NULL);
485 } else {
486 switch (modei & S_IFMT) {
487 case S_IFDIR:
488 err = vfs_mkdir(loweri_dir, lowerd_new, modei);
489 break;
490 case S_IFREG:
491 err = vfs_create(loweri_dir, lowerd_new, modei, excl);
492 break;
493 case S_IFLNK:
494 err = vfs_symlink(loweri_dir, lowerd_new, symlink);
495 break;
496 case S_IFSOCK:
497 /* fall through */
498 case S_IFIFO:
499 err = vfs_mknod(loweri_dir, lowerd_new, modei, 0);
500 break;
501 default:
502 err = -EINVAL;
503 break;
504 }
505 }
506
507 shiftfs_revert_object_creds(oldcred, newcred);
508
509 if (!err && WARN_ON(!lowerd_new->d_inode))
510 err = -EIO;
511 if (err)
512 goto out_iput;
513
514 if (hardlink) {
515 inode = d_inode(hardlink);
516 ihold(inode);
517
518 /* copy up times from lower inode */
519 shiftfs_copyattr(d_inode(lowerd_link), inode);
520 set_nlink(d_inode(hardlink), d_inode(lowerd_link)->i_nlink);
521 d_instantiate(dentry, inode);
522 } else {
523 struct inode *inode_tmp;
524 struct inode *loweri_new = d_inode(lowerd_new);
525
526 inode_tmp = inode_insert5(inode, (unsigned long)loweri_new,
527 shiftfs_inode_test, shiftfs_inode_set,
528 loweri_new);
529 if (unlikely(inode_tmp != inode)) {
530 pr_err_ratelimited("shiftfs: newly created inode found in cache\n");
531 iput(inode_tmp);
532 err = -EINVAL;
533 goto out_iput;
534 }
535
536 ihold(loweri_new);
537 shiftfs_fill_inode(inode, loweri_new->i_ino, loweri_new->i_mode,
538 0, lowerd_new);
539 d_instantiate_new(dentry, inode);
540 }
541
542 shiftfs_copyattr(loweri_dir, diri);
543 if (loweri_iop_ptr == loweri_dir_iop->mkdir)
544 set_nlink(diri, loweri_dir->i_nlink);
545
546 inode = NULL;
547
548 out_iput:
549 iput(inode);
550 inode_unlock(loweri_dir);
551
552 return err;
553 }
554
555 static int shiftfs_create(struct inode *dir, struct dentry *dentry,
556 umode_t mode, bool excl)
557 {
558 mode |= S_IFREG;
559
560 return shiftfs_create_object(dir, dentry, mode, NULL, NULL, excl);
561 }
562
563 static int shiftfs_mkdir(struct inode *dir, struct dentry *dentry,
564 umode_t mode)
565 {
566 mode |= S_IFDIR;
567
568 return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
569 }
570
571 static int shiftfs_link(struct dentry *hardlink, struct inode *dir,
572 struct dentry *dentry)
573 {
574 return shiftfs_create_object(dir, dentry, 0, NULL, hardlink, false);
575 }
576
577 static int shiftfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
578 dev_t rdev)
579 {
580 if (!S_ISFIFO(mode) && !S_ISSOCK(mode))
581 return -EPERM;
582
583 return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
584 }
585
586 static int shiftfs_symlink(struct inode *dir, struct dentry *dentry,
587 const char *symlink)
588 {
589 return shiftfs_create_object(dir, dentry, S_IFLNK, symlink, NULL, false);
590 }
591
592 static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir)
593 {
594 struct dentry *lowerd = dentry->d_fsdata;
595 struct inode *loweri = dir->i_private;
596 int err;
597 const struct cred *oldcred;
598
599 oldcred = shiftfs_override_creds(dentry->d_sb);
600 inode_lock_nested(loweri, I_MUTEX_PARENT);
601 if (rmdir)
602 err = vfs_rmdir(loweri, lowerd);
603 else
604 err = vfs_unlink(loweri, lowerd, NULL);
605 inode_unlock(loweri);
606 revert_creds(oldcred);
607
608 shiftfs_copyattr(loweri, dir);
609 set_nlink(d_inode(dentry), loweri->i_nlink);
610 if (!err)
611 d_drop(dentry);
612
613 set_nlink(dir, loweri->i_nlink);
614
615 return err;
616 }
617
618 static int shiftfs_unlink(struct inode *dir, struct dentry *dentry)
619 {
620 return shiftfs_rm(dir, dentry, false);
621 }
622
623 static int shiftfs_rmdir(struct inode *dir, struct dentry *dentry)
624 {
625 return shiftfs_rm(dir, dentry, true);
626 }
627
628 static int shiftfs_rename(struct inode *olddir, struct dentry *old,
629 struct inode *newdir, struct dentry *new,
630 unsigned int flags)
631 {
632 struct dentry *lowerd_dir_old = old->d_parent->d_fsdata,
633 *lowerd_dir_new = new->d_parent->d_fsdata,
634 *lowerd_old = old->d_fsdata, *lowerd_new = new->d_fsdata,
635 *trapd;
636 struct inode *loweri_dir_old = lowerd_dir_old->d_inode,
637 *loweri_dir_new = lowerd_dir_new->d_inode;
638 int err = -EINVAL;
639 const struct cred *oldcred;
640
641 trapd = lock_rename(lowerd_dir_new, lowerd_dir_old);
642
643 if (trapd == lowerd_old || trapd == lowerd_new)
644 goto out_unlock;
645
646 oldcred = shiftfs_override_creds(old->d_sb);
647 err = vfs_rename(loweri_dir_old, lowerd_old, loweri_dir_new, lowerd_new,
648 NULL, flags);
649 revert_creds(oldcred);
650
651 shiftfs_copyattr(loweri_dir_old, olddir);
652 shiftfs_copyattr(loweri_dir_new, newdir);
653
654 out_unlock:
655 unlock_rename(lowerd_dir_new, lowerd_dir_old);
656
657 return err;
658 }
659
660 static struct dentry *shiftfs_lookup(struct inode *dir, struct dentry *dentry,
661 unsigned int flags)
662 {
663 struct dentry *new;
664 struct inode *newi;
665 const struct cred *oldcred;
666 struct dentry *lowerd = dentry->d_parent->d_fsdata;
667 struct inode *inode = NULL, *loweri = lowerd->d_inode;
668
669 inode_lock(loweri);
670 oldcred = shiftfs_override_creds(dentry->d_sb);
671 new = lookup_one_len(dentry->d_name.name, lowerd, dentry->d_name.len);
672 revert_creds(oldcred);
673 inode_unlock(loweri);
674
675 if (IS_ERR(new))
676 return new;
677
678 dentry->d_fsdata = new;
679
680 newi = new->d_inode;
681 if (!newi)
682 goto out;
683
684 inode = iget5_locked(dentry->d_sb, (unsigned long)newi,
685 shiftfs_inode_test, shiftfs_inode_set, newi);
686 if (!inode) {
687 dput(new);
688 return ERR_PTR(-ENOMEM);
689 }
690 if (inode->i_state & I_NEW) {
691 /*
692 * inode->i_private set by shiftfs_inode_set(), but we still
693 * need to take a reference
694 */
695 ihold(newi);
696 shiftfs_fill_inode(inode, newi->i_ino, newi->i_mode, 0, new);
697 unlock_new_inode(inode);
698 }
699
700 out:
701 return d_splice_alias(inode, dentry);
702 }
703
704 static int shiftfs_permission(struct inode *inode, int mask)
705 {
706 int err;
707 const struct cred *oldcred;
708 struct inode *loweri = inode->i_private;
709
710 if (!loweri) {
711 WARN_ON(!(mask & MAY_NOT_BLOCK));
712 return -ECHILD;
713 }
714
715 err = generic_permission(inode, mask);
716 if (err)
717 return err;
718
719 oldcred = shiftfs_override_creds(inode->i_sb);
720 err = inode_permission(loweri, mask);
721 revert_creds(oldcred);
722
723 return err;
724 }
725
726 static int shiftfs_fiemap(struct inode *inode,
727 struct fiemap_extent_info *fieinfo, u64 start,
728 u64 len)
729 {
730 int err;
731 const struct cred *oldcred;
732 struct inode *loweri = inode->i_private;
733
734 if (!loweri->i_op->fiemap)
735 return -EOPNOTSUPP;
736
737 oldcred = shiftfs_override_creds(inode->i_sb);
738 if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
739 filemap_write_and_wait(loweri->i_mapping);
740 err = loweri->i_op->fiemap(loweri, fieinfo, start, len);
741 revert_creds(oldcred);
742
743 return err;
744 }
745
746 static int shiftfs_tmpfile(struct inode *dir, struct dentry *dentry,
747 umode_t mode)
748 {
749 int err;
750 const struct cred *oldcred;
751 struct dentry *lowerd = dentry->d_fsdata;
752 struct inode *loweri = dir->i_private;
753
754 if (!loweri->i_op->tmpfile)
755 return -EOPNOTSUPP;
756
757 oldcred = shiftfs_override_creds(dir->i_sb);
758 err = loweri->i_op->tmpfile(loweri, lowerd, mode);
759 revert_creds(oldcred);
760
761 return err;
762 }
763
764 static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr)
765 {
766 struct dentry *lowerd = dentry->d_fsdata;
767 struct inode *loweri = lowerd->d_inode;
768 struct iattr newattr;
769 const struct cred *oldcred;
770 struct super_block *sb = dentry->d_sb;
771 int err;
772
773 err = setattr_prepare(dentry, attr);
774 if (err)
775 return err;
776
777 newattr = *attr;
778 newattr.ia_uid = KUIDT_INIT(from_kuid(sb->s_user_ns, attr->ia_uid));
779 newattr.ia_gid = KGIDT_INIT(from_kgid(sb->s_user_ns, attr->ia_gid));
780
781 /*
782 * mode change is for clearing setuid/setgid bits. Allow lower fs
783 * to interpret this in its own way.
784 */
785 if (newattr.ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
786 newattr.ia_valid &= ~ATTR_MODE;
787
788 inode_lock(loweri);
789 oldcred = shiftfs_override_creds(dentry->d_sb);
790 err = notify_change(lowerd, &newattr, NULL);
791 revert_creds(oldcred);
792 inode_unlock(loweri);
793
794 shiftfs_copyattr(loweri, d_inode(dentry));
795
796 return err;
797 }
798
799 static int shiftfs_getattr(const struct path *path, struct kstat *stat,
800 u32 request_mask, unsigned int query_flags)
801 {
802 struct inode *inode = path->dentry->d_inode;
803 struct dentry *lowerd = path->dentry->d_fsdata;
804 struct inode *loweri = lowerd->d_inode;
805 struct shiftfs_super_info *info = path->dentry->d_sb->s_fs_info;
806 struct path newpath = { .mnt = info->mnt, .dentry = lowerd };
807 struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
808 struct user_namespace *to_ns = inode->i_sb->s_user_ns;
809 const struct cred *oldcred;
810 int err;
811
812 oldcred = shiftfs_override_creds(inode->i_sb);
813 err = vfs_getattr(&newpath, stat, request_mask, query_flags);
814 revert_creds(oldcred);
815
816 if (err)
817 return err;
818
819 /* transform the underlying id */
820 stat->uid = shift_kuid(from_ns, to_ns, stat->uid);
821 stat->gid = shift_kgid(from_ns, to_ns, stat->gid);
822 return 0;
823 }
824
825 #ifdef CONFIG_SHIFT_FS_POSIX_ACL
826
827 static int
828 shift_acl_ids(struct user_namespace *from, struct user_namespace *to,
829 struct posix_acl *acl)
830 {
831 int i;
832
833 for (i = 0; i < acl->a_count; i++) {
834 struct posix_acl_entry *e = &acl->a_entries[i];
835 switch(e->e_tag) {
836 case ACL_USER:
837 e->e_uid = shift_kuid(from, to, e->e_uid);
838 if (!uid_valid(e->e_uid))
839 return -EOVERFLOW;
840 break;
841 case ACL_GROUP:
842 e->e_gid = shift_kgid(from, to, e->e_gid);
843 if (!gid_valid(e->e_gid))
844 return -EOVERFLOW;
845 break;
846 }
847 }
848 return 0;
849 }
850
851 static void
852 shift_acl_xattr_ids(struct user_namespace *from, struct user_namespace *to,
853 void *value, size_t size)
854 {
855 struct posix_acl_xattr_header *header = value;
856 struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
857 int count;
858 kuid_t kuid;
859 kgid_t kgid;
860
861 if (!value)
862 return;
863 if (size < sizeof(struct posix_acl_xattr_header))
864 return;
865 if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
866 return;
867
868 count = posix_acl_xattr_count(size);
869 if (count < 0)
870 return;
871 if (count == 0)
872 return;
873
874 for (end = entry + count; entry != end; entry++) {
875 switch(le16_to_cpu(entry->e_tag)) {
876 case ACL_USER:
877 kuid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
878 kuid = shift_kuid(from, to, kuid);
879 entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, kuid));
880 break;
881 case ACL_GROUP:
882 kgid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
883 kgid = shift_kgid(from, to, kgid);
884 entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, kgid));
885 break;
886 default:
887 break;
888 }
889 }
890 }
891
892 static struct posix_acl *shiftfs_get_acl(struct inode *inode, int type)
893 {
894 struct inode *loweri = inode->i_private;
895 const struct cred *oldcred;
896 struct posix_acl *lower_acl, *acl = NULL;
897 struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
898 struct user_namespace *to_ns = inode->i_sb->s_user_ns;
899 int size;
900 int err;
901
902 if (!IS_POSIXACL(loweri))
903 return NULL;
904
905 oldcred = shiftfs_override_creds(inode->i_sb);
906 lower_acl = get_acl(loweri, type);
907 revert_creds(oldcred);
908
909 if (lower_acl && !IS_ERR(lower_acl)) {
910 /* XXX: export posix_acl_clone? */
911 size = sizeof(struct posix_acl) +
912 lower_acl->a_count * sizeof(struct posix_acl_entry);
913 acl = kmemdup(lower_acl, size, GFP_KERNEL);
914 posix_acl_release(lower_acl);
915
916 if (!acl)
917 return ERR_PTR(-ENOMEM);
918
919 refcount_set(&acl->a_refcount, 1);
920
921 err = shift_acl_ids(from_ns, to_ns, acl);
922 if (err) {
923 kfree(acl);
924 return ERR_PTR(err);
925 }
926 }
927
928 return acl;
929 }
930
931 static int
932 shiftfs_posix_acl_xattr_get(const struct xattr_handler *handler,
933 struct dentry *dentry, struct inode *inode,
934 const char *name, void *buffer, size_t size)
935 {
936 struct inode *loweri = inode->i_private;
937 int ret;
938
939 ret = shiftfs_xattr_get(NULL, dentry, inode, handler->name,
940 buffer, size);
941 if (ret < 0)
942 return ret;
943
944 inode_lock(loweri);
945 shift_acl_xattr_ids(loweri->i_sb->s_user_ns, inode->i_sb->s_user_ns,
946 buffer, size);
947 inode_unlock(loweri);
948 return ret;
949 }
950
951 static int
952 shiftfs_posix_acl_xattr_set(const struct xattr_handler *handler,
953 struct dentry *dentry, struct inode *inode,
954 const char *name, const void *value,
955 size_t size, int flags)
956 {
957 struct inode *loweri = inode->i_private;
958 int err;
959
960 if (!IS_POSIXACL(loweri) || !loweri->i_op->set_acl)
961 return -EOPNOTSUPP;
962 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
963 return value ? -EACCES : 0;
964 if (!inode_owner_or_capable(inode))
965 return -EPERM;
966
967 if (value) {
968 shift_acl_xattr_ids(inode->i_sb->s_user_ns,
969 loweri->i_sb->s_user_ns,
970 (void *)value, size);
971 err = shiftfs_setxattr(dentry, inode, handler->name, value,
972 size, flags);
973 } else {
974 err = shiftfs_removexattr(dentry, handler->name);
975 }
976
977 if (!err)
978 shiftfs_copyattr(loweri, inode);
979
980 return err;
981 }
982
983 static const struct xattr_handler
984 shiftfs_posix_acl_access_xattr_handler = {
985 .name = XATTR_NAME_POSIX_ACL_ACCESS,
986 .flags = ACL_TYPE_ACCESS,
987 .get = shiftfs_posix_acl_xattr_get,
988 .set = shiftfs_posix_acl_xattr_set,
989 };
990
991 static const struct xattr_handler
992 shiftfs_posix_acl_default_xattr_handler = {
993 .name = XATTR_NAME_POSIX_ACL_DEFAULT,
994 .flags = ACL_TYPE_DEFAULT,
995 .get = shiftfs_posix_acl_xattr_get,
996 .set = shiftfs_posix_acl_xattr_set,
997 };
998
999 #else /* !CONFIG_SHIFT_FS_POSIX_ACL */
1000
1001 #define shiftfs_get_acl NULL
1002
1003 #endif /* CONFIG_SHIFT_FS_POSIX_ACL */
1004
1005 static const struct inode_operations shiftfs_dir_inode_operations = {
1006 .lookup = shiftfs_lookup,
1007 .mkdir = shiftfs_mkdir,
1008 .symlink = shiftfs_symlink,
1009 .unlink = shiftfs_unlink,
1010 .rmdir = shiftfs_rmdir,
1011 .rename = shiftfs_rename,
1012 .link = shiftfs_link,
1013 .setattr = shiftfs_setattr,
1014 .create = shiftfs_create,
1015 .mknod = shiftfs_mknod,
1016 .permission = shiftfs_permission,
1017 .getattr = shiftfs_getattr,
1018 .listxattr = shiftfs_listxattr,
1019 .get_acl = shiftfs_get_acl,
1020 };
1021
1022 static const struct inode_operations shiftfs_file_inode_operations = {
1023 .fiemap = shiftfs_fiemap,
1024 .getattr = shiftfs_getattr,
1025 .get_acl = shiftfs_get_acl,
1026 .listxattr = shiftfs_listxattr,
1027 .permission = shiftfs_permission,
1028 .setattr = shiftfs_setattr,
1029 .tmpfile = shiftfs_tmpfile,
1030 };
1031
1032 static const struct inode_operations shiftfs_special_inode_operations = {
1033 .getattr = shiftfs_getattr,
1034 .get_acl = shiftfs_get_acl,
1035 .listxattr = shiftfs_listxattr,
1036 .permission = shiftfs_permission,
1037 .setattr = shiftfs_setattr,
1038 };
1039
1040 static const struct inode_operations shiftfs_symlink_inode_operations = {
1041 .getattr = shiftfs_getattr,
1042 .get_link = shiftfs_get_link,
1043 .listxattr = shiftfs_listxattr,
1044 .setattr = shiftfs_setattr,
1045 };
1046
1047 static struct file *shiftfs_open_realfile(const struct file *file,
1048 struct path *realpath)
1049 {
1050 struct file *lowerf;
1051 const struct cred *oldcred;
1052 struct inode *inode = file_inode(file);
1053 struct inode *loweri = realpath->dentry->d_inode;
1054 struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
1055
1056 oldcred = shiftfs_override_creds(inode->i_sb);
1057 /* XXX: open_with_fake_path() not gauranteed to stay around, if
1058 * removed use dentry_open() */
1059 lowerf = open_with_fake_path(realpath, file->f_flags, loweri, info->creator_cred);
1060 revert_creds(oldcred);
1061
1062 return lowerf;
1063 }
1064
1065 #define SHIFTFS_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
1066
1067 static int shiftfs_change_flags(struct file *file, unsigned int flags)
1068 {
1069 struct inode *inode = file_inode(file);
1070 int err;
1071
1072 /* if some flag changed that cannot be changed then something's amiss */
1073 if (WARN_ON((file->f_flags ^ flags) & ~SHIFTFS_SETFL_MASK))
1074 return -EIO;
1075
1076 flags &= SHIFTFS_SETFL_MASK;
1077
1078 if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
1079 return -EPERM;
1080
1081 if (flags & O_DIRECT) {
1082 if (!file->f_mapping->a_ops ||
1083 !file->f_mapping->a_ops->direct_IO)
1084 return -EINVAL;
1085 }
1086
1087 if (file->f_op->check_flags) {
1088 err = file->f_op->check_flags(flags);
1089 if (err)
1090 return err;
1091 }
1092
1093 spin_lock(&file->f_lock);
1094 file->f_flags = (file->f_flags & ~SHIFTFS_SETFL_MASK) | flags;
1095 spin_unlock(&file->f_lock);
1096
1097 return 0;
1098 }
1099
1100 static int shiftfs_real_fdget(const struct file *file, struct fd *lowerfd)
1101 {
1102 struct shiftfs_file_info *file_info = file->private_data;
1103 struct file *realfile = file_info->realfile;
1104
1105 lowerfd->flags = 0;
1106 lowerfd->file = realfile;
1107
1108 /* Did the flags change since open? */
1109 if (unlikely(file->f_flags & ~lowerfd->file->f_flags))
1110 return shiftfs_change_flags(lowerfd->file, file->f_flags);
1111
1112 return 0;
1113 }
1114
1115 static int shiftfs_open(struct inode *inode, struct file *file)
1116 {
1117 struct shiftfs_super_info *ssi = inode->i_sb->s_fs_info;
1118 struct shiftfs_file_info *file_info;
1119 struct file *realfile;
1120 struct path *realpath;
1121
1122 file_info = kmem_cache_zalloc(shiftfs_file_info_cache, GFP_KERNEL);
1123 if (!file_info)
1124 return -ENOMEM;
1125
1126 realpath = &file_info->realpath;
1127 realpath->mnt = ssi->mnt;
1128 realpath->dentry = file->f_path.dentry->d_fsdata;
1129
1130 realfile = shiftfs_open_realfile(file, realpath);
1131 if (IS_ERR(realfile)) {
1132 kmem_cache_free(shiftfs_file_info_cache, file_info);
1133 return PTR_ERR(realfile);
1134 }
1135
1136 file->private_data = file_info;
1137 file_info->realfile = realfile;
1138 return 0;
1139 }
1140
1141 static int shiftfs_release(struct inode *inode, struct file *file)
1142 {
1143 struct shiftfs_file_info *file_info = file->private_data;
1144
1145 if (file_info) {
1146 if (file_info->realfile)
1147 fput(file_info->realfile);
1148
1149 kmem_cache_free(shiftfs_file_info_cache, file_info);
1150 }
1151
1152 return 0;
1153 }
1154
1155 static loff_t shiftfs_llseek(struct file *file, loff_t offset, int whence)
1156 {
1157 struct inode *realinode = file_inode(file)->i_private;
1158
1159 return generic_file_llseek_size(file, offset, whence,
1160 realinode->i_sb->s_maxbytes,
1161 i_size_read(realinode));
1162 }
1163
1164 /* XXX: Need to figure out what to to about atime updates, maybe other
1165 * timestamps too ... ref. ovl_file_accessed() */
1166
1167 static rwf_t shiftfs_iocb_to_rwf(struct kiocb *iocb)
1168 {
1169 int ifl = iocb->ki_flags;
1170 rwf_t flags = 0;
1171
1172 if (ifl & IOCB_NOWAIT)
1173 flags |= RWF_NOWAIT;
1174 if (ifl & IOCB_HIPRI)
1175 flags |= RWF_HIPRI;
1176 if (ifl & IOCB_DSYNC)
1177 flags |= RWF_DSYNC;
1178 if (ifl & IOCB_SYNC)
1179 flags |= RWF_SYNC;
1180
1181 return flags;
1182 }
1183
1184 static ssize_t shiftfs_read_iter(struct kiocb *iocb, struct iov_iter *iter)
1185 {
1186 struct file *file = iocb->ki_filp;
1187 struct fd lowerfd;
1188 const struct cred *oldcred;
1189 ssize_t ret;
1190
1191 if (!iov_iter_count(iter))
1192 return 0;
1193
1194 ret = shiftfs_real_fdget(file, &lowerfd);
1195 if (ret)
1196 return ret;
1197
1198 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1199 ret = vfs_iter_read(lowerfd.file, iter, &iocb->ki_pos,
1200 shiftfs_iocb_to_rwf(iocb));
1201 revert_creds(oldcred);
1202
1203 shiftfs_file_accessed(file);
1204
1205 fdput(lowerfd);
1206 return ret;
1207 }
1208
1209 static ssize_t shiftfs_write_iter(struct kiocb *iocb, struct iov_iter *iter)
1210 {
1211 struct file *file = iocb->ki_filp;
1212 struct inode *inode = file_inode(file);
1213 struct fd lowerfd;
1214 const struct cred *oldcred;
1215 ssize_t ret;
1216
1217 if (!iov_iter_count(iter))
1218 return 0;
1219
1220 inode_lock(inode);
1221 /* Update mode */
1222 shiftfs_copyattr(inode->i_private, inode);
1223 ret = file_remove_privs(file);
1224 if (ret)
1225 goto out_unlock;
1226
1227 ret = shiftfs_real_fdget(file, &lowerfd);
1228 if (ret)
1229 goto out_unlock;
1230
1231 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1232 file_start_write(lowerfd.file);
1233 ret = vfs_iter_write(lowerfd.file, iter, &iocb->ki_pos,
1234 shiftfs_iocb_to_rwf(iocb));
1235 file_end_write(lowerfd.file);
1236 revert_creds(oldcred);
1237
1238 /* Update size */
1239 shiftfs_copyattr(inode->i_private, inode);
1240
1241 fdput(lowerfd);
1242
1243 out_unlock:
1244 inode_unlock(inode);
1245 return ret;
1246 }
1247
1248 static int shiftfs_fsync(struct file *file, loff_t start, loff_t end,
1249 int datasync)
1250 {
1251 struct fd lowerfd;
1252 const struct cred *oldcred;
1253 int ret;
1254
1255 ret = shiftfs_real_fdget(file, &lowerfd);
1256 if (ret)
1257 return ret;
1258
1259 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1260 ret = vfs_fsync_range(lowerfd.file, start, end, datasync);
1261 revert_creds(oldcred);
1262
1263 fdput(lowerfd);
1264 return ret;
1265 }
1266
1267 static int shiftfs_mmap(struct file *file, struct vm_area_struct *vma)
1268 {
1269 struct shiftfs_file_info *file_info = file->private_data;
1270 struct file *realfile = file_info->realfile;
1271 const struct cred *oldcred;
1272 int ret;
1273
1274 if (!realfile->f_op->mmap)
1275 return -ENODEV;
1276
1277 if (WARN_ON(file != vma->vm_file))
1278 return -EIO;
1279
1280 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1281 vma->vm_file = get_file(realfile);
1282 ret = call_mmap(vma->vm_file, vma);
1283 revert_creds(oldcred);
1284
1285 shiftfs_file_accessed(file);
1286
1287 if (ret)
1288 fput(realfile); /* Drop refcount from new vm_file value */
1289 else
1290 fput(file); /* Drop refcount from previous vm_file value */
1291
1292 return ret;
1293 }
1294
1295 static long shiftfs_fallocate(struct file *file, int mode, loff_t offset,
1296 loff_t len)
1297 {
1298 struct inode *inode = file_inode(file);
1299 struct inode *loweri = inode->i_private;
1300 struct fd lowerfd;
1301 const struct cred *oldcred;
1302 int ret;
1303
1304 ret = shiftfs_real_fdget(file, &lowerfd);
1305 if (ret)
1306 return ret;
1307
1308 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1309 ret = vfs_fallocate(lowerfd.file, mode, offset, len);
1310 revert_creds(oldcred);
1311
1312 /* Update size */
1313 shiftfs_copyattr(loweri, inode);
1314
1315 fdput(lowerfd);
1316 return ret;
1317 }
1318
1319 static int shiftfs_fadvise(struct file *file, loff_t offset, loff_t len,
1320 int advice)
1321 {
1322 struct fd lowerfd;
1323 const struct cred *oldcred;
1324 int ret;
1325
1326 ret = shiftfs_real_fdget(file, &lowerfd);
1327 if (ret)
1328 return ret;
1329
1330 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1331 ret = vfs_fadvise(lowerfd.file, offset, len, advice);
1332 revert_creds(oldcred);
1333
1334 fdput(lowerfd);
1335 return ret;
1336 }
1337
1338 static int shiftfs_override_ioctl_creds(const struct super_block *sb,
1339 const struct cred **oldcred,
1340 struct cred **newcred)
1341 {
1342 kuid_t fsuid = current_fsuid();
1343 kgid_t fsgid = current_fsgid();
1344
1345 *oldcred = shiftfs_override_creds(sb);
1346
1347 *newcred = prepare_creds();
1348 if (!*newcred) {
1349 revert_creds(*oldcred);
1350 return -ENOMEM;
1351 }
1352
1353 (*newcred)->fsuid = KUIDT_INIT(from_kuid(sb->s_user_ns, fsuid));
1354 (*newcred)->fsgid = KGIDT_INIT(from_kgid(sb->s_user_ns, fsgid));
1355
1356 /* clear all caps to prevent bypassing capable() checks */
1357 cap_clear((*newcred)->cap_bset);
1358 cap_clear((*newcred)->cap_effective);
1359 cap_clear((*newcred)->cap_inheritable);
1360 cap_clear((*newcred)->cap_permitted);
1361
1362 put_cred(override_creds(*newcred));
1363 return 0;
1364 }
1365
1366 static inline void shiftfs_revert_ioctl_creds(const struct cred *oldcred,
1367 struct cred *newcred)
1368 {
1369 return shiftfs_revert_object_creds(oldcred, newcred);
1370 }
1371
1372 static inline bool is_btrfs_snap_ioctl(int cmd)
1373 {
1374 if ((cmd == BTRFS_IOC_SNAP_CREATE) || (cmd == BTRFS_IOC_SNAP_CREATE_V2))
1375 return true;
1376
1377 return false;
1378 }
1379
1380 static int shiftfs_btrfs_ioctl_fd_restore(int cmd, struct fd lfd, int fd,
1381 void __user *arg,
1382 struct btrfs_ioctl_vol_args *v1,
1383 struct btrfs_ioctl_vol_args_v2 *v2)
1384 {
1385 int ret;
1386
1387 if (!is_btrfs_snap_ioctl(cmd))
1388 return 0;
1389
1390 if (cmd == BTRFS_IOC_SNAP_CREATE)
1391 ret = copy_to_user(arg, v1, sizeof(*v1));
1392 else
1393 ret = copy_to_user(arg, v2, sizeof(*v2));
1394
1395 fdput(lfd);
1396 __close_fd(current->files, fd);
1397 kfree(v1);
1398 kfree(v2);
1399
1400 return ret;
1401 }
1402
1403 static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg,
1404 struct btrfs_ioctl_vol_args **b1,
1405 struct btrfs_ioctl_vol_args_v2 **b2,
1406 struct fd *lfd,
1407 int *newfd)
1408 {
1409 int oldfd, ret;
1410 struct fd src;
1411 struct btrfs_ioctl_vol_args *v1 = NULL;
1412 struct btrfs_ioctl_vol_args_v2 *v2 = NULL;
1413
1414 if (!is_btrfs_snap_ioctl(cmd))
1415 return 0;
1416
1417 if (cmd == BTRFS_IOC_SNAP_CREATE) {
1418 v1 = memdup_user(arg, sizeof(*v1));
1419 if (IS_ERR(v1))
1420 return PTR_ERR(v1);
1421 oldfd = v1->fd;
1422 *b1 = v1;
1423 } else {
1424 v2 = memdup_user(arg, sizeof(*v2));
1425 if (IS_ERR(v2))
1426 return PTR_ERR(v2);
1427 oldfd = v2->fd;
1428 *b2 = v2;
1429 }
1430
1431 src = fdget(oldfd);
1432 if (!src.file)
1433 return -EINVAL;
1434
1435 ret = shiftfs_real_fdget(src.file, lfd);
1436 fdput(src);
1437 if (ret)
1438 return ret;
1439
1440 *newfd = get_unused_fd_flags(lfd->file->f_flags);
1441 if (*newfd < 0) {
1442 fdput(*lfd);
1443 return *newfd;
1444 }
1445
1446 fd_install(*newfd, lfd->file);
1447
1448 if (cmd == BTRFS_IOC_SNAP_CREATE) {
1449 v1->fd = *newfd;
1450 ret = copy_to_user(arg, v1, sizeof(*v1));
1451 v1->fd = oldfd;
1452 } else {
1453 v2->fd = *newfd;
1454 ret = copy_to_user(arg, v2, sizeof(*v2));
1455 v2->fd = oldfd;
1456 }
1457
1458 if (ret)
1459 shiftfs_btrfs_ioctl_fd_restore(cmd, *lfd, *newfd, arg, v1, v2);
1460
1461 return ret;
1462 }
1463
1464 static long shiftfs_real_ioctl(struct file *file, unsigned int cmd,
1465 unsigned long arg)
1466 {
1467 struct fd lowerfd;
1468 struct cred *newcred;
1469 const struct cred *oldcred;
1470 int newfd = -EBADF;
1471 long err = 0, ret = 0;
1472 void __user *argp = (void __user *)arg;
1473 struct fd btrfs_lfd = {};
1474 struct super_block *sb = file->f_path.dentry->d_sb;
1475 struct btrfs_ioctl_vol_args *btrfs_v1 = NULL;
1476 struct btrfs_ioctl_vol_args_v2 *btrfs_v2 = NULL;
1477
1478 ret = shiftfs_btrfs_ioctl_fd_replace(cmd, argp, &btrfs_v1, &btrfs_v2,
1479 &btrfs_lfd, &newfd);
1480 if (ret < 0)
1481 return ret;
1482
1483 ret = shiftfs_real_fdget(file, &lowerfd);
1484 if (ret)
1485 goto out_restore;
1486
1487 ret = shiftfs_override_ioctl_creds(sb, &oldcred, &newcred);
1488 if (ret)
1489 goto out_fdput;
1490
1491 ret = vfs_ioctl(lowerfd.file, cmd, arg);
1492
1493 shiftfs_revert_ioctl_creds(oldcred, newcred);
1494
1495 shiftfs_copyattr(file_inode(lowerfd.file), file_inode(file));
1496 shiftfs_copyflags(file_inode(lowerfd.file), file_inode(file));
1497
1498 out_fdput:
1499 fdput(lowerfd);
1500
1501 out_restore:
1502 err = shiftfs_btrfs_ioctl_fd_restore(cmd, btrfs_lfd, newfd, argp,
1503 btrfs_v1, btrfs_v2);
1504 if (!ret)
1505 ret = err;
1506
1507 return ret;
1508 }
1509
1510 static bool in_ioctl_whitelist(int flag)
1511 {
1512 switch (flag) {
1513 case BTRFS_IOC_SNAP_CREATE:
1514 return true;
1515 case BTRFS_IOC_SNAP_CREATE_V2:
1516 return true;
1517 case BTRFS_IOC_SUBVOL_CREATE:
1518 return true;
1519 case BTRFS_IOC_SUBVOL_CREATE_V2:
1520 return true;
1521 case BTRFS_IOC_SNAP_DESTROY:
1522 return true;
1523 }
1524
1525 return false;
1526 }
1527
1528 static long shiftfs_ioctl(struct file *file, unsigned int cmd,
1529 unsigned long arg)
1530 {
1531 switch (cmd) {
1532 case FS_IOC_GETVERSION:
1533 /* fall through */
1534 case FS_IOC_GETFLAGS:
1535 /* fall through */
1536 case FS_IOC_SETFLAGS:
1537 break;
1538 default:
1539 if (!in_ioctl_whitelist(cmd) ||
1540 !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1541 return -ENOTTY;
1542 }
1543
1544 return shiftfs_real_ioctl(file, cmd, arg);
1545 }
1546
1547 static long shiftfs_compat_ioctl(struct file *file, unsigned int cmd,
1548 unsigned long arg)
1549 {
1550 switch (cmd) {
1551 case FS_IOC32_GETVERSION:
1552 /* fall through */
1553 case FS_IOC32_GETFLAGS:
1554 /* fall through */
1555 case FS_IOC32_SETFLAGS:
1556 break;
1557 default:
1558 if (!in_ioctl_whitelist(cmd) ||
1559 !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1560 return -ENOIOCTLCMD;
1561 }
1562
1563 return shiftfs_real_ioctl(file, cmd, arg);
1564 }
1565
1566 enum shiftfs_copyop {
1567 SHIFTFS_COPY,
1568 SHIFTFS_CLONE,
1569 SHIFTFS_DEDUPE,
1570 };
1571
1572 static ssize_t shiftfs_copyfile(struct file *file_in, loff_t pos_in,
1573 struct file *file_out, loff_t pos_out, u64 len,
1574 unsigned int flags, enum shiftfs_copyop op)
1575 {
1576 ssize_t ret;
1577 struct fd real_in, real_out;
1578 const struct cred *oldcred;
1579 struct inode *inode_out = file_inode(file_out);
1580 struct inode *loweri = inode_out->i_private;
1581
1582 ret = shiftfs_real_fdget(file_out, &real_out);
1583 if (ret)
1584 return ret;
1585
1586 ret = shiftfs_real_fdget(file_in, &real_in);
1587 if (ret) {
1588 fdput(real_out);
1589 return ret;
1590 }
1591
1592 oldcred = shiftfs_override_creds(inode_out->i_sb);
1593 switch (op) {
1594 case SHIFTFS_COPY:
1595 ret = vfs_copy_file_range(real_in.file, pos_in, real_out.file,
1596 pos_out, len, flags);
1597 break;
1598
1599 case SHIFTFS_CLONE:
1600 ret = vfs_clone_file_range(real_in.file, pos_in, real_out.file,
1601 pos_out, len, flags);
1602 break;
1603
1604 case SHIFTFS_DEDUPE:
1605 ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
1606 real_out.file, pos_out, len,
1607 flags);
1608 break;
1609 }
1610 revert_creds(oldcred);
1611
1612 /* Update size */
1613 shiftfs_copyattr(loweri, inode_out);
1614
1615 fdput(real_in);
1616 fdput(real_out);
1617
1618 return ret;
1619 }
1620
1621 static ssize_t shiftfs_copy_file_range(struct file *file_in, loff_t pos_in,
1622 struct file *file_out, loff_t pos_out,
1623 size_t len, unsigned int flags)
1624 {
1625 return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
1626 SHIFTFS_COPY);
1627 }
1628
1629 static loff_t shiftfs_remap_file_range(struct file *file_in, loff_t pos_in,
1630 struct file *file_out, loff_t pos_out,
1631 loff_t len, unsigned int remap_flags)
1632 {
1633 enum shiftfs_copyop op;
1634
1635 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
1636 return -EINVAL;
1637
1638 if (remap_flags & REMAP_FILE_DEDUP)
1639 op = SHIFTFS_DEDUPE;
1640 else
1641 op = SHIFTFS_CLONE;
1642
1643 return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len,
1644 remap_flags, op);
1645 }
1646
1647 static int shiftfs_iterate_shared(struct file *file, struct dir_context *ctx)
1648 {
1649 const struct cred *oldcred;
1650 int err = -ENOTDIR;
1651 struct shiftfs_file_info *file_info = file->private_data;
1652 struct file *realfile = file_info->realfile;
1653
1654 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1655 err = iterate_dir(realfile, ctx);
1656 revert_creds(oldcred);
1657
1658 return err;
1659 }
1660
1661 const struct file_operations shiftfs_file_operations = {
1662 .open = shiftfs_open,
1663 .release = shiftfs_release,
1664 .llseek = shiftfs_llseek,
1665 .read_iter = shiftfs_read_iter,
1666 .write_iter = shiftfs_write_iter,
1667 .fsync = shiftfs_fsync,
1668 .mmap = shiftfs_mmap,
1669 .fallocate = shiftfs_fallocate,
1670 .fadvise = shiftfs_fadvise,
1671 .unlocked_ioctl = shiftfs_ioctl,
1672 .compat_ioctl = shiftfs_compat_ioctl,
1673 .copy_file_range = shiftfs_copy_file_range,
1674 .remap_file_range = shiftfs_remap_file_range,
1675 };
1676
1677 const struct file_operations shiftfs_dir_operations = {
1678 .compat_ioctl = shiftfs_compat_ioctl,
1679 .fsync = shiftfs_fsync,
1680 .iterate_shared = shiftfs_iterate_shared,
1681 .llseek = shiftfs_llseek,
1682 .open = shiftfs_open,
1683 .read = generic_read_dir,
1684 .release = shiftfs_release,
1685 .unlocked_ioctl = shiftfs_ioctl,
1686 };
1687
1688 static const struct address_space_operations shiftfs_aops = {
1689 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
1690 .direct_IO = noop_direct_IO,
1691 };
1692
1693 static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
1694 umode_t mode, dev_t dev, struct dentry *dentry)
1695 {
1696 struct inode *loweri;
1697
1698 inode->i_ino = ino;
1699 inode->i_flags |= S_NOCMTIME;
1700
1701 mode &= S_IFMT;
1702 inode->i_mode = mode;
1703 switch (mode & S_IFMT) {
1704 case S_IFDIR:
1705 inode->i_op = &shiftfs_dir_inode_operations;
1706 inode->i_fop = &shiftfs_dir_operations;
1707 break;
1708 case S_IFLNK:
1709 inode->i_op = &shiftfs_symlink_inode_operations;
1710 break;
1711 case S_IFREG:
1712 inode->i_op = &shiftfs_file_inode_operations;
1713 inode->i_fop = &shiftfs_file_operations;
1714 inode->i_mapping->a_ops = &shiftfs_aops;
1715 break;
1716 default:
1717 inode->i_op = &shiftfs_special_inode_operations;
1718 init_special_inode(inode, mode, dev);
1719 break;
1720 }
1721
1722 if (!dentry)
1723 return;
1724
1725 loweri = dentry->d_inode;
1726 if (!loweri->i_op->get_link)
1727 inode->i_opflags |= IOP_NOFOLLOW;
1728
1729 shiftfs_copyattr(loweri, inode);
1730 shiftfs_copyflags(loweri, inode);
1731 set_nlink(inode, loweri->i_nlink);
1732 }
1733
1734 static int shiftfs_show_options(struct seq_file *m, struct dentry *dentry)
1735 {
1736 struct super_block *sb = dentry->d_sb;
1737 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1738
1739 if (sbinfo->mark)
1740 seq_show_option(m, "mark", NULL);
1741
1742 if (sbinfo->passthrough)
1743 seq_printf(m, ",passthrough=%u", sbinfo->passthrough);
1744
1745 return 0;
1746 }
1747
1748 static int shiftfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1749 {
1750 struct super_block *sb = dentry->d_sb;
1751 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1752 struct dentry *root = sb->s_root;
1753 struct dentry *realroot = root->d_fsdata;
1754 struct path realpath = { .mnt = sbinfo->mnt, .dentry = realroot };
1755 int err;
1756
1757 err = vfs_statfs(&realpath, buf);
1758 if (err)
1759 return err;
1760
1761 if (!shiftfs_passthrough_statfs(sbinfo))
1762 buf->f_type = sb->s_magic;
1763
1764 return 0;
1765 }
1766
1767 static void shiftfs_evict_inode(struct inode *inode)
1768 {
1769 struct inode *loweri = inode->i_private;
1770
1771 clear_inode(inode);
1772
1773 if (loweri)
1774 iput(loweri);
1775 }
1776
1777 static void shiftfs_put_super(struct super_block *sb)
1778 {
1779 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1780
1781 if (sbinfo) {
1782 mntput(sbinfo->mnt);
1783 put_cred(sbinfo->creator_cred);
1784 kfree(sbinfo);
1785 }
1786 }
1787
1788 static const struct xattr_handler shiftfs_xattr_handler = {
1789 .prefix = "",
1790 .get = shiftfs_xattr_get,
1791 .set = shiftfs_xattr_set,
1792 };
1793
1794 const struct xattr_handler *shiftfs_xattr_handlers[] = {
1795 #ifdef CONFIG_SHIFT_FS_POSIX_ACL
1796 &shiftfs_posix_acl_access_xattr_handler,
1797 &shiftfs_posix_acl_default_xattr_handler,
1798 #endif
1799 &shiftfs_xattr_handler,
1800 NULL
1801 };
1802
1803 static inline bool passthrough_is_subset(int old_flags, int new_flags)
1804 {
1805 if ((new_flags & old_flags) != new_flags)
1806 return false;
1807
1808 return true;
1809 }
1810
1811 static int shiftfs_remount(struct super_block *sb, int *flags, char *data)
1812 {
1813 int err;
1814 struct shiftfs_super_info new = {};
1815 struct shiftfs_super_info *info = sb->s_fs_info;
1816
1817 err = shiftfs_parse_mount_options(&new, data);
1818 if (err)
1819 return err;
1820
1821 /* Mark mount option cannot be changed. */
1822 if (info->mark || (info->mark != new.mark))
1823 return -EPERM;
1824
1825 if (info->passthrough != new.passthrough) {
1826 /* Don't allow exceeding passthrough options of mark mount. */
1827 if (!passthrough_is_subset(info->info_mark->passthrough,
1828 info->passthrough))
1829 return -EPERM;
1830
1831 info->passthrough = new.passthrough;
1832 }
1833
1834 return 0;
1835 }
1836
1837 static const struct super_operations shiftfs_super_ops = {
1838 .put_super = shiftfs_put_super,
1839 .show_options = shiftfs_show_options,
1840 .statfs = shiftfs_statfs,
1841 .remount_fs = shiftfs_remount,
1842 .evict_inode = shiftfs_evict_inode,
1843 };
1844
1845 struct shiftfs_data {
1846 void *data;
1847 const char *path;
1848 };
1849
1850 static int shiftfs_fill_super(struct super_block *sb, void *raw_data,
1851 int silent)
1852 {
1853 int err;
1854 struct path path = {};
1855 struct shiftfs_super_info *sbinfo_mp;
1856 char *name = NULL;
1857 struct inode *inode = NULL;
1858 struct dentry *dentry = NULL;
1859 struct shiftfs_data *data = raw_data;
1860 struct shiftfs_super_info *sbinfo = NULL;
1861
1862 if (!data->path)
1863 return -EINVAL;
1864
1865 sb->s_fs_info = kzalloc(sizeof(*sbinfo), GFP_KERNEL);
1866 if (!sb->s_fs_info)
1867 return -ENOMEM;
1868 sbinfo = sb->s_fs_info;
1869
1870 err = shiftfs_parse_mount_options(sbinfo, data->data);
1871 if (err)
1872 return err;
1873
1874 /* to mount a mark, must be userns admin */
1875 if (!sbinfo->mark && !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
1876 return -EPERM;
1877
1878 name = kstrdup(data->path, GFP_KERNEL);
1879 if (!name)
1880 return -ENOMEM;
1881
1882 err = kern_path(name, LOOKUP_FOLLOW, &path);
1883 if (err)
1884 goto out_free_name;
1885
1886 if (!S_ISDIR(path.dentry->d_inode->i_mode)) {
1887 err = -ENOTDIR;
1888 goto out_put_path;
1889 }
1890
1891 if (sbinfo->mark) {
1892 struct super_block *lower_sb = path.mnt->mnt_sb;
1893
1894 /* to mark a mount point, must root wrt lower s_user_ns */
1895 if (!ns_capable(lower_sb->s_user_ns, CAP_SYS_ADMIN)) {
1896 err = -EPERM;
1897 goto out_put_path;
1898 }
1899
1900 /*
1901 * this part is visible unshifted, so make sure no
1902 * executables that could be used to give suid
1903 * privileges
1904 */
1905 sb->s_iflags = SB_I_NOEXEC;
1906
1907 /*
1908 * Handle nesting of shiftfs mounts by referring this mark
1909 * mount back to the original mark mount. This is more
1910 * efficient and alleviates concerns about stack depth.
1911 */
1912 if (lower_sb->s_magic == SHIFTFS_MAGIC) {
1913 sbinfo_mp = lower_sb->s_fs_info;
1914
1915 /* Doesn't make sense to mark a mark mount */
1916 if (sbinfo_mp->mark) {
1917 err = -EINVAL;
1918 goto out_put_path;
1919 }
1920
1921 if (!passthrough_is_subset(sbinfo_mp->passthrough,
1922 sbinfo->passthrough)) {
1923 err = -EPERM;
1924 goto out_put_path;
1925 }
1926
1927 sbinfo->mnt = mntget(sbinfo_mp->mnt);
1928 dentry = dget(path.dentry->d_fsdata);
1929 } else {
1930 sbinfo->mnt = mntget(path.mnt);
1931 dentry = dget(path.dentry);
1932 }
1933
1934 sbinfo->creator_cred = prepare_creds();
1935 if (!sbinfo->creator_cred) {
1936 err = -ENOMEM;
1937 goto out_put_path;
1938 }
1939 } else {
1940 /*
1941 * This leg executes if we're admin capable in the namespace,
1942 * so be very careful.
1943 */
1944 err = -EPERM;
1945 if (path.dentry->d_sb->s_magic != SHIFTFS_MAGIC)
1946 goto out_put_path;
1947
1948 sbinfo_mp = path.dentry->d_sb->s_fs_info;
1949 if (!sbinfo_mp->mark)
1950 goto out_put_path;
1951
1952 if (!passthrough_is_subset(sbinfo_mp->passthrough,
1953 sbinfo->passthrough))
1954 goto out_put_path;
1955
1956 sbinfo->mnt = mntget(sbinfo_mp->mnt);
1957 sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred);
1958 dentry = dget(path.dentry->d_fsdata);
1959 sbinfo->info_mark = sbinfo_mp;
1960 }
1961
1962 sb->s_stack_depth = dentry->d_sb->s_stack_depth + 1;
1963 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
1964 printk(KERN_ERR "shiftfs: maximum stacking depth exceeded\n");
1965 err = -EINVAL;
1966 goto out_put_path;
1967 }
1968
1969 inode = new_inode(sb);
1970 if (!inode) {
1971 err = -ENOMEM;
1972 goto out_put_path;
1973 }
1974 shiftfs_fill_inode(inode, dentry->d_inode->i_ino, S_IFDIR, 0, dentry);
1975
1976 ihold(dentry->d_inode);
1977 inode->i_private = dentry->d_inode;
1978
1979 sb->s_magic = SHIFTFS_MAGIC;
1980 sb->s_op = &shiftfs_super_ops;
1981 sb->s_xattr = shiftfs_xattr_handlers;
1982 sb->s_d_op = &shiftfs_dentry_ops;
1983 sb->s_flags |= SB_POSIXACL;
1984 sb->s_root = d_make_root(inode);
1985 if (!sb->s_root) {
1986 err = -ENOMEM;
1987 goto out_put_path;
1988 }
1989
1990 sb->s_root->d_fsdata = dentry;
1991 sbinfo->userns = get_user_ns(dentry->d_sb->s_user_ns);
1992 shiftfs_copyattr(dentry->d_inode, sb->s_root->d_inode);
1993
1994 dentry = NULL;
1995 err = 0;
1996
1997 out_put_path:
1998 path_put(&path);
1999
2000 out_free_name:
2001 kfree(name);
2002
2003 dput(dentry);
2004
2005 return err;
2006 }
2007
2008 static struct dentry *shiftfs_mount(struct file_system_type *fs_type,
2009 int flags, const char *dev_name, void *data)
2010 {
2011 struct shiftfs_data d = { data, dev_name };
2012
2013 return mount_nodev(fs_type, flags, &d, shiftfs_fill_super);
2014 }
2015
2016 static struct file_system_type shiftfs_type = {
2017 .owner = THIS_MODULE,
2018 .name = "shiftfs",
2019 .mount = shiftfs_mount,
2020 .kill_sb = kill_anon_super,
2021 .fs_flags = FS_USERNS_MOUNT,
2022 };
2023
2024 static int __init shiftfs_init(void)
2025 {
2026 shiftfs_file_info_cache = kmem_cache_create(
2027 "shiftfs_file_info_cache", sizeof(struct shiftfs_file_info), 0,
2028 SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT | SLAB_MEM_SPREAD, NULL);
2029 if (!shiftfs_file_info_cache)
2030 return -ENOMEM;
2031
2032 return register_filesystem(&shiftfs_type);
2033 }
2034
2035 static void __exit shiftfs_exit(void)
2036 {
2037 unregister_filesystem(&shiftfs_type);
2038 kmem_cache_destroy(shiftfs_file_info_cache);
2039 }
2040
2041 MODULE_ALIAS_FS("shiftfs");
2042 MODULE_AUTHOR("James Bottomley");
2043 MODULE_AUTHOR("Seth Forshee <seth.forshee@canonical.com>");
2044 MODULE_AUTHOR("Christian Brauner <christian.brauner@ubuntu.com>");
2045 MODULE_DESCRIPTION("id shifting filesystem");
2046 MODULE_LICENSE("GPL v2");
2047 module_init(shiftfs_init)
2048 module_exit(shiftfs_exit)