]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blob - fs/shiftfs.c
UBUNTU: SAUCE: shiftfs: rework how shiftfs opens files
[mirror_ubuntu-hirsute-kernel.git] / fs / shiftfs.c
1 #include <linux/btrfs.h>
2 #include <linux/capability.h>
3 #include <linux/cred.h>
4 #include <linux/mount.h>
5 #include <linux/fdtable.h>
6 #include <linux/file.h>
7 #include <linux/fs.h>
8 #include <linux/namei.h>
9 #include <linux/module.h>
10 #include <linux/kernel.h>
11 #include <linux/magic.h>
12 #include <linux/parser.h>
13 #include <linux/security.h>
14 #include <linux/seq_file.h>
15 #include <linux/statfs.h>
16 #include <linux/slab.h>
17 #include <linux/user_namespace.h>
18 #include <linux/uidgid.h>
19 #include <linux/xattr.h>
20 #include <linux/posix_acl.h>
21 #include <linux/posix_acl_xattr.h>
22 #include <linux/uio.h>
23
24 struct shiftfs_super_info {
25 struct vfsmount *mnt;
26 struct user_namespace *userns;
27 /* creds of process who created the super block */
28 const struct cred *creator_cred;
29 bool mark;
30 unsigned int passthrough;
31 unsigned int passthrough_mark;
32 };
33
34 static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
35 umode_t mode, dev_t dev, struct dentry *dentry);
36
37 #define SHIFTFS_PASSTHROUGH_NONE 0
38 #define SHIFTFS_PASSTHROUGH_STAT 1
39 #define SHIFTFS_PASSTHROUGH_IOCTL 2
40 #define SHIFTFS_PASSTHROUGH_ALL \
41 (SHIFTFS_PASSTHROUGH_STAT | SHIFTFS_PASSTHROUGH_IOCTL)
42
43 static inline bool shiftfs_passthrough_ioctls(struct shiftfs_super_info *info)
44 {
45 if (!(info->passthrough & SHIFTFS_PASSTHROUGH_IOCTL))
46 return false;
47
48 return true;
49 }
50
51 static inline bool shiftfs_passthrough_statfs(struct shiftfs_super_info *info)
52 {
53 if (!(info->passthrough & SHIFTFS_PASSTHROUGH_STAT))
54 return false;
55
56 return true;
57 }
58
59 enum {
60 OPT_MARK,
61 OPT_PASSTHROUGH,
62 OPT_LAST,
63 };
64
65 /* global filesystem options */
66 static const match_table_t tokens = {
67 { OPT_MARK, "mark" },
68 { OPT_PASSTHROUGH, "passthrough=%u" },
69 { OPT_LAST, NULL }
70 };
71
72 static const struct cred *shiftfs_override_creds(const struct super_block *sb)
73 {
74 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
75
76 return override_creds(sbinfo->creator_cred);
77 }
78
79 static inline void shiftfs_revert_object_creds(const struct cred *oldcred,
80 struct cred *newcred)
81 {
82 revert_creds(oldcred);
83 put_cred(newcred);
84 }
85
86 static int shiftfs_override_object_creds(const struct super_block *sb,
87 const struct cred **oldcred,
88 struct cred **newcred,
89 struct dentry *dentry, umode_t mode,
90 bool hardlink)
91 {
92 kuid_t fsuid = current_fsuid();
93 kgid_t fsgid = current_fsgid();
94
95 *oldcred = shiftfs_override_creds(sb);
96
97 *newcred = prepare_creds();
98 if (!*newcred) {
99 revert_creds(*oldcred);
100 return -ENOMEM;
101 }
102
103 (*newcred)->fsuid = KUIDT_INIT(from_kuid(sb->s_user_ns, fsuid));
104 (*newcred)->fsgid = KGIDT_INIT(from_kgid(sb->s_user_ns, fsgid));
105
106 if (!hardlink) {
107 int err = security_dentry_create_files_as(dentry, mode,
108 &dentry->d_name,
109 *oldcred, *newcred);
110 if (err) {
111 shiftfs_revert_object_creds(*oldcred, *newcred);
112 return err;
113 }
114 }
115
116 put_cred(override_creds(*newcred));
117 return 0;
118 }
119
120 static kuid_t shift_kuid(struct user_namespace *from, struct user_namespace *to,
121 kuid_t kuid)
122 {
123 uid_t uid = from_kuid(from, kuid);
124 return make_kuid(to, uid);
125 }
126
127 static kgid_t shift_kgid(struct user_namespace *from, struct user_namespace *to,
128 kgid_t kgid)
129 {
130 gid_t gid = from_kgid(from, kgid);
131 return make_kgid(to, gid);
132 }
133
134 static void shiftfs_copyattr(struct inode *from, struct inode *to)
135 {
136 struct user_namespace *from_ns = from->i_sb->s_user_ns;
137 struct user_namespace *to_ns = to->i_sb->s_user_ns;
138
139 to->i_uid = shift_kuid(from_ns, to_ns, from->i_uid);
140 to->i_gid = shift_kgid(from_ns, to_ns, from->i_gid);
141 to->i_mode = from->i_mode;
142 to->i_atime = from->i_atime;
143 to->i_mtime = from->i_mtime;
144 to->i_ctime = from->i_ctime;
145 i_size_write(to, i_size_read(from));
146 }
147
148 static void shiftfs_copyflags(struct inode *from, struct inode *to)
149 {
150 unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
151
152 inode_set_flags(to, from->i_flags & mask, mask);
153 }
154
155 static void shiftfs_file_accessed(struct file *file)
156 {
157 struct inode *upperi, *loweri;
158
159 if (file->f_flags & O_NOATIME)
160 return;
161
162 upperi = file_inode(file);
163 loweri = upperi->i_private;
164
165 if (!loweri)
166 return;
167
168 upperi->i_mtime = loweri->i_mtime;
169 upperi->i_ctime = loweri->i_ctime;
170
171 touch_atime(&file->f_path);
172 }
173
174 static int shiftfs_parse_mount_options(struct shiftfs_super_info *sbinfo,
175 char *options)
176 {
177 char *p;
178 substring_t args[MAX_OPT_ARGS];
179
180 sbinfo->mark = false;
181 sbinfo->passthrough = 0;
182
183 while ((p = strsep(&options, ",")) != NULL) {
184 int err, intarg, token;
185
186 if (!*p)
187 continue;
188
189 token = match_token(p, tokens, args);
190 switch (token) {
191 case OPT_MARK:
192 sbinfo->mark = true;
193 break;
194 case OPT_PASSTHROUGH:
195 err = match_int(&args[0], &intarg);
196 if (err)
197 return err;
198
199 if (intarg & ~SHIFTFS_PASSTHROUGH_ALL)
200 return -EINVAL;
201
202 sbinfo->passthrough = intarg;
203 break;
204 default:
205 return -EINVAL;
206 }
207 }
208
209 return 0;
210 }
211
212 static void shiftfs_d_release(struct dentry *dentry)
213 {
214 struct dentry *lowerd = dentry->d_fsdata;
215
216 if (lowerd)
217 dput(lowerd);
218 }
219
220 static struct dentry *shiftfs_d_real(struct dentry *dentry,
221 const struct inode *inode)
222 {
223 struct dentry *lowerd = dentry->d_fsdata;
224
225 if (inode && d_inode(dentry) == inode)
226 return dentry;
227
228 lowerd = d_real(lowerd, inode);
229 if (lowerd && (!inode || inode == d_inode(lowerd)))
230 return lowerd;
231
232 WARN(1, "shiftfs_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
233 inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
234 return dentry;
235 }
236
237 static int shiftfs_d_weak_revalidate(struct dentry *dentry, unsigned int flags)
238 {
239 int err = 1;
240 struct dentry *lowerd = dentry->d_fsdata;
241
242 if (d_is_negative(lowerd) != d_is_negative(dentry))
243 return 0;
244
245 if ((lowerd->d_flags & DCACHE_OP_WEAK_REVALIDATE))
246 err = lowerd->d_op->d_weak_revalidate(lowerd, flags);
247
248 if (d_really_is_positive(dentry)) {
249 struct inode *inode = d_inode(dentry);
250 struct inode *loweri = d_inode(lowerd);
251
252 shiftfs_copyattr(loweri, inode);
253 if (!inode->i_nlink)
254 err = 0;
255 }
256
257 return err;
258 }
259
260 static int shiftfs_d_revalidate(struct dentry *dentry, unsigned int flags)
261 {
262 int err = 1;
263 struct dentry *lowerd = dentry->d_fsdata;
264
265 if (d_unhashed(lowerd) ||
266 ((d_is_negative(lowerd) != d_is_negative(dentry))))
267 return 0;
268
269 if (flags & LOOKUP_RCU)
270 return -ECHILD;
271
272 if ((lowerd->d_flags & DCACHE_OP_REVALIDATE))
273 err = lowerd->d_op->d_revalidate(lowerd, flags);
274
275 if (d_really_is_positive(dentry)) {
276 struct inode *inode = d_inode(dentry);
277 struct inode *loweri = d_inode(lowerd);
278
279 shiftfs_copyattr(loweri, inode);
280 if (!inode->i_nlink)
281 err = 0;
282 }
283
284 return err;
285 }
286
287 static const struct dentry_operations shiftfs_dentry_ops = {
288 .d_release = shiftfs_d_release,
289 .d_real = shiftfs_d_real,
290 .d_revalidate = shiftfs_d_revalidate,
291 .d_weak_revalidate = shiftfs_d_weak_revalidate,
292 };
293
294 static const char *shiftfs_get_link(struct dentry *dentry, struct inode *inode,
295 struct delayed_call *done)
296 {
297 const char *p;
298 const struct cred *oldcred;
299 struct dentry *lowerd;
300
301 /* RCU lookup not supported */
302 if (!dentry)
303 return ERR_PTR(-ECHILD);
304
305 lowerd = dentry->d_fsdata;
306 oldcred = shiftfs_override_creds(dentry->d_sb);
307 p = vfs_get_link(lowerd, done);
308 revert_creds(oldcred);
309
310 return p;
311 }
312
313 static int shiftfs_setxattr(struct dentry *dentry, struct inode *inode,
314 const char *name, const void *value,
315 size_t size, int flags)
316 {
317 struct dentry *lowerd = dentry->d_fsdata;
318 int err;
319 const struct cred *oldcred;
320
321 oldcred = shiftfs_override_creds(dentry->d_sb);
322 err = vfs_setxattr(lowerd, name, value, size, flags);
323 revert_creds(oldcred);
324
325 shiftfs_copyattr(lowerd->d_inode, inode);
326
327 return err;
328 }
329
330 static int shiftfs_xattr_get(const struct xattr_handler *handler,
331 struct dentry *dentry, struct inode *inode,
332 const char *name, void *value, size_t size)
333 {
334 struct dentry *lowerd = dentry->d_fsdata;
335 int err;
336 const struct cred *oldcred;
337
338 oldcred = shiftfs_override_creds(dentry->d_sb);
339 err = vfs_getxattr(lowerd, name, value, size);
340 revert_creds(oldcred);
341
342 return err;
343 }
344
345 static ssize_t shiftfs_listxattr(struct dentry *dentry, char *list,
346 size_t size)
347 {
348 struct dentry *lowerd = dentry->d_fsdata;
349 int err;
350 const struct cred *oldcred;
351
352 oldcred = shiftfs_override_creds(dentry->d_sb);
353 err = vfs_listxattr(lowerd, list, size);
354 revert_creds(oldcred);
355
356 return err;
357 }
358
359 static int shiftfs_removexattr(struct dentry *dentry, const char *name)
360 {
361 struct dentry *lowerd = dentry->d_fsdata;
362 int err;
363 const struct cred *oldcred;
364
365 oldcred = shiftfs_override_creds(dentry->d_sb);
366 err = vfs_removexattr(lowerd, name);
367 revert_creds(oldcred);
368
369 /* update c/mtime */
370 shiftfs_copyattr(lowerd->d_inode, d_inode(dentry));
371
372 return err;
373 }
374
375 static int shiftfs_xattr_set(const struct xattr_handler *handler,
376 struct dentry *dentry, struct inode *inode,
377 const char *name, const void *value, size_t size,
378 int flags)
379 {
380 if (!value)
381 return shiftfs_removexattr(dentry, name);
382 return shiftfs_setxattr(dentry, inode, name, value, size, flags);
383 }
384
385 static int shiftfs_inode_test(struct inode *inode, void *data)
386 {
387 return inode->i_private == data;
388 }
389
390 static int shiftfs_inode_set(struct inode *inode, void *data)
391 {
392 inode->i_private = data;
393 return 0;
394 }
395
396 static int shiftfs_create_object(struct inode *diri, struct dentry *dentry,
397 umode_t mode, const char *symlink,
398 struct dentry *hardlink, bool excl)
399 {
400 int err;
401 const struct cred *oldcred;
402 struct cred *newcred;
403 void *loweri_iop_ptr = NULL;
404 umode_t modei = mode;
405 struct super_block *dir_sb = diri->i_sb;
406 struct dentry *lowerd_new = dentry->d_fsdata;
407 struct inode *inode = NULL, *loweri_dir = diri->i_private;
408 const struct inode_operations *loweri_dir_iop = loweri_dir->i_op;
409 struct dentry *lowerd_link = NULL;
410
411 if (hardlink) {
412 loweri_iop_ptr = loweri_dir_iop->link;
413 } else {
414 switch (mode & S_IFMT) {
415 case S_IFDIR:
416 loweri_iop_ptr = loweri_dir_iop->mkdir;
417 break;
418 case S_IFREG:
419 loweri_iop_ptr = loweri_dir_iop->create;
420 break;
421 case S_IFLNK:
422 loweri_iop_ptr = loweri_dir_iop->symlink;
423 break;
424 case S_IFSOCK:
425 /* fall through */
426 case S_IFIFO:
427 loweri_iop_ptr = loweri_dir_iop->mknod;
428 break;
429 }
430 }
431 if (!loweri_iop_ptr) {
432 err = -EINVAL;
433 goto out_iput;
434 }
435
436 inode_lock_nested(loweri_dir, I_MUTEX_PARENT);
437
438 if (!hardlink) {
439 inode = new_inode(dir_sb);
440 if (!inode) {
441 err = -ENOMEM;
442 goto out_iput;
443 }
444
445 /*
446 * new_inode() will have added the new inode to the super
447 * block's list of inodes. Further below we will call
448 * inode_insert5() Which would perform the same operation again
449 * thereby corrupting the list. To avoid this raise I_CREATING
450 * in i_state which will cause inode_insert5() to skip this
451 * step. I_CREATING will be cleared by d_instantiate_new()
452 * below.
453 */
454 spin_lock(&inode->i_lock);
455 inode->i_state |= I_CREATING;
456 spin_unlock(&inode->i_lock);
457
458 inode_init_owner(inode, diri, mode);
459 modei = inode->i_mode;
460 }
461
462 err = shiftfs_override_object_creds(dentry->d_sb, &oldcred, &newcred,
463 dentry, modei, hardlink != NULL);
464 if (err)
465 goto out_iput;
466
467 if (hardlink) {
468 lowerd_link = hardlink->d_fsdata;
469 err = vfs_link(lowerd_link, loweri_dir, lowerd_new, NULL);
470 } else {
471 switch (modei & S_IFMT) {
472 case S_IFDIR:
473 err = vfs_mkdir(loweri_dir, lowerd_new, modei);
474 break;
475 case S_IFREG:
476 err = vfs_create(loweri_dir, lowerd_new, modei, excl);
477 break;
478 case S_IFLNK:
479 err = vfs_symlink(loweri_dir, lowerd_new, symlink);
480 break;
481 case S_IFSOCK:
482 /* fall through */
483 case S_IFIFO:
484 err = vfs_mknod(loweri_dir, lowerd_new, modei, 0);
485 break;
486 default:
487 err = -EINVAL;
488 break;
489 }
490 }
491
492 shiftfs_revert_object_creds(oldcred, newcred);
493
494 if (!err && WARN_ON(!lowerd_new->d_inode))
495 err = -EIO;
496 if (err)
497 goto out_iput;
498
499 if (hardlink) {
500 inode = d_inode(hardlink);
501 ihold(inode);
502
503 /* copy up times from lower inode */
504 shiftfs_copyattr(d_inode(lowerd_link), inode);
505 set_nlink(d_inode(hardlink), d_inode(lowerd_link)->i_nlink);
506 d_instantiate(dentry, inode);
507 } else {
508 struct inode *inode_tmp;
509 struct inode *loweri_new = d_inode(lowerd_new);
510
511 inode_tmp = inode_insert5(inode, (unsigned long)loweri_new,
512 shiftfs_inode_test, shiftfs_inode_set,
513 loweri_new);
514 if (unlikely(inode_tmp != inode)) {
515 pr_err_ratelimited("shiftfs: newly created inode found in cache\n");
516 iput(inode_tmp);
517 err = -EINVAL;
518 goto out_iput;
519 }
520
521 ihold(loweri_new);
522 shiftfs_fill_inode(inode, loweri_new->i_ino, loweri_new->i_mode,
523 0, lowerd_new);
524 d_instantiate_new(dentry, inode);
525 }
526
527 shiftfs_copyattr(loweri_dir, diri);
528 if (loweri_iop_ptr == loweri_dir_iop->mkdir)
529 set_nlink(diri, loweri_dir->i_nlink);
530
531 inode = NULL;
532
533 out_iput:
534 iput(inode);
535 inode_unlock(loweri_dir);
536
537 return err;
538 }
539
540 static int shiftfs_create(struct inode *dir, struct dentry *dentry,
541 umode_t mode, bool excl)
542 {
543 mode |= S_IFREG;
544
545 return shiftfs_create_object(dir, dentry, mode, NULL, NULL, excl);
546 }
547
548 static int shiftfs_mkdir(struct inode *dir, struct dentry *dentry,
549 umode_t mode)
550 {
551 mode |= S_IFDIR;
552
553 return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
554 }
555
556 static int shiftfs_link(struct dentry *hardlink, struct inode *dir,
557 struct dentry *dentry)
558 {
559 return shiftfs_create_object(dir, dentry, 0, NULL, hardlink, false);
560 }
561
562 static int shiftfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
563 dev_t rdev)
564 {
565 if (!S_ISFIFO(mode) && !S_ISSOCK(mode))
566 return -EPERM;
567
568 return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
569 }
570
571 static int shiftfs_symlink(struct inode *dir, struct dentry *dentry,
572 const char *symlink)
573 {
574 return shiftfs_create_object(dir, dentry, S_IFLNK, symlink, NULL, false);
575 }
576
577 static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir)
578 {
579 struct dentry *lowerd = dentry->d_fsdata;
580 struct inode *loweri = dir->i_private;
581 struct inode *inode = d_inode(dentry);
582 int err;
583 const struct cred *oldcred;
584
585 oldcred = shiftfs_override_creds(dentry->d_sb);
586 inode_lock_nested(loweri, I_MUTEX_PARENT);
587 if (rmdir)
588 err = vfs_rmdir(loweri, lowerd);
589 else
590 err = vfs_unlink(loweri, lowerd, NULL);
591 revert_creds(oldcred);
592
593 if (!err) {
594 d_drop(dentry);
595
596 if (rmdir)
597 clear_nlink(inode);
598 else
599 drop_nlink(inode);
600 }
601 inode_unlock(loweri);
602
603 shiftfs_copyattr(loweri, dir);
604
605 return err;
606 }
607
608 static int shiftfs_unlink(struct inode *dir, struct dentry *dentry)
609 {
610 return shiftfs_rm(dir, dentry, false);
611 }
612
613 static int shiftfs_rmdir(struct inode *dir, struct dentry *dentry)
614 {
615 return shiftfs_rm(dir, dentry, true);
616 }
617
618 static int shiftfs_rename(struct inode *olddir, struct dentry *old,
619 struct inode *newdir, struct dentry *new,
620 unsigned int flags)
621 {
622 struct dentry *lowerd_dir_old = old->d_parent->d_fsdata,
623 *lowerd_dir_new = new->d_parent->d_fsdata,
624 *lowerd_old = old->d_fsdata, *lowerd_new = new->d_fsdata,
625 *trapd;
626 struct inode *loweri_dir_old = lowerd_dir_old->d_inode,
627 *loweri_dir_new = lowerd_dir_new->d_inode;
628 int err = -EINVAL;
629 const struct cred *oldcred;
630
631 trapd = lock_rename(lowerd_dir_new, lowerd_dir_old);
632
633 if (trapd == lowerd_old || trapd == lowerd_new)
634 goto out_unlock;
635
636 oldcred = shiftfs_override_creds(old->d_sb);
637 err = vfs_rename(loweri_dir_old, lowerd_old, loweri_dir_new, lowerd_new,
638 NULL, flags);
639 revert_creds(oldcred);
640
641 shiftfs_copyattr(loweri_dir_old, olddir);
642 shiftfs_copyattr(loweri_dir_new, newdir);
643
644 out_unlock:
645 unlock_rename(lowerd_dir_new, lowerd_dir_old);
646
647 return err;
648 }
649
650 static struct dentry *shiftfs_lookup(struct inode *dir, struct dentry *dentry,
651 unsigned int flags)
652 {
653 struct dentry *new;
654 struct inode *newi;
655 const struct cred *oldcred;
656 struct dentry *lowerd = dentry->d_parent->d_fsdata;
657 struct inode *inode = NULL, *loweri = lowerd->d_inode;
658
659 inode_lock(loweri);
660 oldcred = shiftfs_override_creds(dentry->d_sb);
661 new = lookup_one_len(dentry->d_name.name, lowerd, dentry->d_name.len);
662 revert_creds(oldcred);
663 inode_unlock(loweri);
664
665 if (IS_ERR(new))
666 return new;
667
668 dentry->d_fsdata = new;
669
670 newi = new->d_inode;
671 if (!newi)
672 goto out;
673
674 inode = iget5_locked(dentry->d_sb, (unsigned long)newi,
675 shiftfs_inode_test, shiftfs_inode_set, newi);
676 if (!inode) {
677 dput(new);
678 return ERR_PTR(-ENOMEM);
679 }
680 if (inode->i_state & I_NEW) {
681 /*
682 * inode->i_private set by shiftfs_inode_set(), but we still
683 * need to take a reference
684 */
685 ihold(newi);
686 shiftfs_fill_inode(inode, newi->i_ino, newi->i_mode, 0, new);
687 unlock_new_inode(inode);
688 }
689
690 out:
691 return d_splice_alias(inode, dentry);
692 }
693
694 static int shiftfs_permission(struct inode *inode, int mask)
695 {
696 int err;
697 const struct cred *oldcred;
698 struct inode *loweri = inode->i_private;
699
700 if (!loweri) {
701 WARN_ON(!(mask & MAY_NOT_BLOCK));
702 return -ECHILD;
703 }
704
705 err = generic_permission(inode, mask);
706 if (err)
707 return err;
708
709 oldcred = shiftfs_override_creds(inode->i_sb);
710 err = inode_permission(loweri, mask);
711 revert_creds(oldcred);
712
713 return err;
714 }
715
716 static int shiftfs_fiemap(struct inode *inode,
717 struct fiemap_extent_info *fieinfo, u64 start,
718 u64 len)
719 {
720 int err;
721 const struct cred *oldcred;
722 struct inode *loweri = inode->i_private;
723
724 if (!loweri->i_op->fiemap)
725 return -EOPNOTSUPP;
726
727 oldcred = shiftfs_override_creds(inode->i_sb);
728 if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
729 filemap_write_and_wait(loweri->i_mapping);
730 err = loweri->i_op->fiemap(loweri, fieinfo, start, len);
731 revert_creds(oldcred);
732
733 return err;
734 }
735
736 static int shiftfs_tmpfile(struct inode *dir, struct dentry *dentry,
737 umode_t mode)
738 {
739 int err;
740 const struct cred *oldcred;
741 struct dentry *lowerd = dentry->d_fsdata;
742 struct inode *loweri = dir->i_private;
743
744 if (!loweri->i_op->tmpfile)
745 return -EOPNOTSUPP;
746
747 oldcred = shiftfs_override_creds(dir->i_sb);
748 err = loweri->i_op->tmpfile(loweri, lowerd, mode);
749 revert_creds(oldcred);
750
751 return err;
752 }
753
754 static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr)
755 {
756 struct dentry *lowerd = dentry->d_fsdata;
757 struct inode *loweri = lowerd->d_inode;
758 struct iattr newattr;
759 const struct cred *oldcred;
760 struct super_block *sb = dentry->d_sb;
761 int err;
762
763 err = setattr_prepare(dentry, attr);
764 if (err)
765 return err;
766
767 newattr = *attr;
768 newattr.ia_uid = KUIDT_INIT(from_kuid(sb->s_user_ns, attr->ia_uid));
769 newattr.ia_gid = KGIDT_INIT(from_kgid(sb->s_user_ns, attr->ia_gid));
770
771 /*
772 * mode change is for clearing setuid/setgid bits. Allow lower fs
773 * to interpret this in its own way.
774 */
775 if (newattr.ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
776 newattr.ia_valid &= ~ATTR_MODE;
777
778 inode_lock(loweri);
779 oldcred = shiftfs_override_creds(dentry->d_sb);
780 err = notify_change(lowerd, &newattr, NULL);
781 revert_creds(oldcred);
782 inode_unlock(loweri);
783
784 shiftfs_copyattr(loweri, d_inode(dentry));
785
786 return err;
787 }
788
789 static int shiftfs_getattr(const struct path *path, struct kstat *stat,
790 u32 request_mask, unsigned int query_flags)
791 {
792 struct inode *inode = path->dentry->d_inode;
793 struct dentry *lowerd = path->dentry->d_fsdata;
794 struct inode *loweri = lowerd->d_inode;
795 struct shiftfs_super_info *info = path->dentry->d_sb->s_fs_info;
796 struct path newpath = { .mnt = info->mnt, .dentry = lowerd };
797 struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
798 struct user_namespace *to_ns = inode->i_sb->s_user_ns;
799 const struct cred *oldcred;
800 int err;
801
802 oldcred = shiftfs_override_creds(inode->i_sb);
803 err = vfs_getattr(&newpath, stat, request_mask, query_flags);
804 revert_creds(oldcred);
805
806 if (err)
807 return err;
808
809 /* transform the underlying id */
810 stat->uid = shift_kuid(from_ns, to_ns, stat->uid);
811 stat->gid = shift_kgid(from_ns, to_ns, stat->gid);
812 return 0;
813 }
814
815 #ifdef CONFIG_SHIFT_FS_POSIX_ACL
816
817 static int
818 shift_acl_ids(struct user_namespace *from, struct user_namespace *to,
819 struct posix_acl *acl)
820 {
821 int i;
822
823 for (i = 0; i < acl->a_count; i++) {
824 struct posix_acl_entry *e = &acl->a_entries[i];
825 switch(e->e_tag) {
826 case ACL_USER:
827 e->e_uid = shift_kuid(from, to, e->e_uid);
828 if (!uid_valid(e->e_uid))
829 return -EOVERFLOW;
830 break;
831 case ACL_GROUP:
832 e->e_gid = shift_kgid(from, to, e->e_gid);
833 if (!gid_valid(e->e_gid))
834 return -EOVERFLOW;
835 break;
836 }
837 }
838 return 0;
839 }
840
841 static void
842 shift_acl_xattr_ids(struct user_namespace *from, struct user_namespace *to,
843 void *value, size_t size)
844 {
845 struct posix_acl_xattr_header *header = value;
846 struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
847 int count;
848 kuid_t kuid;
849 kgid_t kgid;
850
851 if (!value)
852 return;
853 if (size < sizeof(struct posix_acl_xattr_header))
854 return;
855 if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
856 return;
857
858 count = posix_acl_xattr_count(size);
859 if (count < 0)
860 return;
861 if (count == 0)
862 return;
863
864 for (end = entry + count; entry != end; entry++) {
865 switch(le16_to_cpu(entry->e_tag)) {
866 case ACL_USER:
867 kuid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
868 kuid = shift_kuid(from, to, kuid);
869 entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, kuid));
870 break;
871 case ACL_GROUP:
872 kgid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
873 kgid = shift_kgid(from, to, kgid);
874 entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, kgid));
875 break;
876 default:
877 break;
878 }
879 }
880 }
881
882 static struct posix_acl *shiftfs_get_acl(struct inode *inode, int type)
883 {
884 struct inode *loweri = inode->i_private;
885 const struct cred *oldcred;
886 struct posix_acl *lower_acl, *acl = NULL;
887 struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
888 struct user_namespace *to_ns = inode->i_sb->s_user_ns;
889 int size;
890 int err;
891
892 if (!IS_POSIXACL(loweri))
893 return NULL;
894
895 oldcred = shiftfs_override_creds(inode->i_sb);
896 lower_acl = get_acl(loweri, type);
897 revert_creds(oldcred);
898
899 if (lower_acl && !IS_ERR(lower_acl)) {
900 /* XXX: export posix_acl_clone? */
901 size = sizeof(struct posix_acl) +
902 lower_acl->a_count * sizeof(struct posix_acl_entry);
903 acl = kmemdup(lower_acl, size, GFP_KERNEL);
904 posix_acl_release(lower_acl);
905
906 if (!acl)
907 return ERR_PTR(-ENOMEM);
908
909 refcount_set(&acl->a_refcount, 1);
910
911 err = shift_acl_ids(from_ns, to_ns, acl);
912 if (err) {
913 kfree(acl);
914 return ERR_PTR(err);
915 }
916 }
917
918 return acl;
919 }
920
921 static int
922 shiftfs_posix_acl_xattr_get(const struct xattr_handler *handler,
923 struct dentry *dentry, struct inode *inode,
924 const char *name, void *buffer, size_t size)
925 {
926 struct inode *loweri = inode->i_private;
927 int ret;
928
929 ret = shiftfs_xattr_get(NULL, dentry, inode, handler->name,
930 buffer, size);
931 if (ret < 0)
932 return ret;
933
934 inode_lock(loweri);
935 shift_acl_xattr_ids(loweri->i_sb->s_user_ns, inode->i_sb->s_user_ns,
936 buffer, size);
937 inode_unlock(loweri);
938 return ret;
939 }
940
941 static int
942 shiftfs_posix_acl_xattr_set(const struct xattr_handler *handler,
943 struct dentry *dentry, struct inode *inode,
944 const char *name, const void *value,
945 size_t size, int flags)
946 {
947 struct inode *loweri = inode->i_private;
948 int err;
949
950 if (!IS_POSIXACL(loweri) || !loweri->i_op->set_acl)
951 return -EOPNOTSUPP;
952 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
953 return value ? -EACCES : 0;
954 if (!inode_owner_or_capable(inode))
955 return -EPERM;
956
957 if (value) {
958 shift_acl_xattr_ids(inode->i_sb->s_user_ns,
959 loweri->i_sb->s_user_ns,
960 (void *)value, size);
961 err = shiftfs_setxattr(dentry, inode, handler->name, value,
962 size, flags);
963 } else {
964 err = shiftfs_removexattr(dentry, handler->name);
965 }
966
967 if (!err)
968 shiftfs_copyattr(loweri, inode);
969
970 return err;
971 }
972
973 static const struct xattr_handler
974 shiftfs_posix_acl_access_xattr_handler = {
975 .name = XATTR_NAME_POSIX_ACL_ACCESS,
976 .flags = ACL_TYPE_ACCESS,
977 .get = shiftfs_posix_acl_xattr_get,
978 .set = shiftfs_posix_acl_xattr_set,
979 };
980
981 static const struct xattr_handler
982 shiftfs_posix_acl_default_xattr_handler = {
983 .name = XATTR_NAME_POSIX_ACL_DEFAULT,
984 .flags = ACL_TYPE_DEFAULT,
985 .get = shiftfs_posix_acl_xattr_get,
986 .set = shiftfs_posix_acl_xattr_set,
987 };
988
989 #else /* !CONFIG_SHIFT_FS_POSIX_ACL */
990
991 #define shiftfs_get_acl NULL
992
993 #endif /* CONFIG_SHIFT_FS_POSIX_ACL */
994
995 static const struct inode_operations shiftfs_dir_inode_operations = {
996 .lookup = shiftfs_lookup,
997 .mkdir = shiftfs_mkdir,
998 .symlink = shiftfs_symlink,
999 .unlink = shiftfs_unlink,
1000 .rmdir = shiftfs_rmdir,
1001 .rename = shiftfs_rename,
1002 .link = shiftfs_link,
1003 .setattr = shiftfs_setattr,
1004 .create = shiftfs_create,
1005 .mknod = shiftfs_mknod,
1006 .permission = shiftfs_permission,
1007 .getattr = shiftfs_getattr,
1008 .listxattr = shiftfs_listxattr,
1009 .get_acl = shiftfs_get_acl,
1010 };
1011
1012 static const struct inode_operations shiftfs_file_inode_operations = {
1013 .fiemap = shiftfs_fiemap,
1014 .getattr = shiftfs_getattr,
1015 .get_acl = shiftfs_get_acl,
1016 .listxattr = shiftfs_listxattr,
1017 .permission = shiftfs_permission,
1018 .setattr = shiftfs_setattr,
1019 .tmpfile = shiftfs_tmpfile,
1020 };
1021
1022 static const struct inode_operations shiftfs_special_inode_operations = {
1023 .getattr = shiftfs_getattr,
1024 .get_acl = shiftfs_get_acl,
1025 .listxattr = shiftfs_listxattr,
1026 .permission = shiftfs_permission,
1027 .setattr = shiftfs_setattr,
1028 };
1029
1030 static const struct inode_operations shiftfs_symlink_inode_operations = {
1031 .getattr = shiftfs_getattr,
1032 .get_link = shiftfs_get_link,
1033 .listxattr = shiftfs_listxattr,
1034 .setattr = shiftfs_setattr,
1035 };
1036
1037 static struct file *shiftfs_open_realfile(const struct file *file,
1038 struct inode *realinode)
1039 {
1040 struct file *realfile;
1041 const struct cred *old_cred;
1042 struct inode *inode = file_inode(file);
1043 struct dentry *lowerd = file->f_path.dentry->d_fsdata;
1044 struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
1045 struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
1046
1047 old_cred = shiftfs_override_creds(inode->i_sb);
1048 realfile = open_with_fake_path(&realpath, file->f_flags, realinode,
1049 info->creator_cred);
1050 revert_creds(old_cred);
1051
1052 return realfile;
1053 }
1054
1055 #define SHIFTFS_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
1056
1057 static int shiftfs_change_flags(struct file *file, unsigned int flags)
1058 {
1059 struct inode *inode = file_inode(file);
1060 int err;
1061
1062 /* if some flag changed that cannot be changed then something's amiss */
1063 if (WARN_ON((file->f_flags ^ flags) & ~SHIFTFS_SETFL_MASK))
1064 return -EIO;
1065
1066 flags &= SHIFTFS_SETFL_MASK;
1067
1068 if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
1069 return -EPERM;
1070
1071 if (flags & O_DIRECT) {
1072 if (!file->f_mapping->a_ops ||
1073 !file->f_mapping->a_ops->direct_IO)
1074 return -EINVAL;
1075 }
1076
1077 if (file->f_op->check_flags) {
1078 err = file->f_op->check_flags(flags);
1079 if (err)
1080 return err;
1081 }
1082
1083 spin_lock(&file->f_lock);
1084 file->f_flags = (file->f_flags & ~SHIFTFS_SETFL_MASK) | flags;
1085 spin_unlock(&file->f_lock);
1086
1087 return 0;
1088 }
1089
1090 static int shiftfs_real_fdget(const struct file *file, struct fd *lowerfd)
1091 {
1092 struct file *realfile = file->private_data;
1093
1094 lowerfd->flags = 0;
1095 lowerfd->file = realfile;
1096
1097 /* Did the flags change since open? */
1098 if (unlikely(file->f_flags & ~lowerfd->file->f_flags))
1099 return shiftfs_change_flags(lowerfd->file, file->f_flags);
1100
1101 return 0;
1102 }
1103
1104 static int shiftfs_open(struct inode *inode, struct file *file)
1105 {
1106 struct file *realfile;
1107
1108 realfile = shiftfs_open_realfile(file, inode->i_private);
1109 if (IS_ERR(realfile))
1110 return PTR_ERR(realfile);
1111
1112 file->private_data = realfile;
1113 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO. */
1114 file->f_mapping = realfile->f_mapping;
1115
1116 return 0;
1117 }
1118
1119 static int shiftfs_dir_open(struct inode *inode, struct file *file)
1120 {
1121 struct file *realfile;
1122 const struct cred *oldcred;
1123 struct dentry *lowerd = file->f_path.dentry->d_fsdata;
1124 struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
1125 struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
1126
1127 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1128 realfile = dentry_open(&realpath, file->f_flags | O_NOATIME,
1129 info->creator_cred);
1130 revert_creds(oldcred);
1131 if (IS_ERR(realfile))
1132 return PTR_ERR(realfile);
1133
1134 file->private_data = realfile;
1135
1136 return 0;
1137 }
1138
1139 static int shiftfs_release(struct inode *inode, struct file *file)
1140 {
1141 struct file *realfile = file->private_data;
1142
1143 if (realfile)
1144 fput(realfile);
1145
1146 return 0;
1147 }
1148
1149 static int shiftfs_dir_release(struct inode *inode, struct file *file)
1150 {
1151 return shiftfs_release(inode, file);
1152 }
1153
1154 static loff_t shiftfs_dir_llseek(struct file *file, loff_t offset, int whence)
1155 {
1156 struct file *realfile = file->private_data;
1157
1158 return vfs_llseek(realfile, offset, whence);
1159 }
1160
1161 static loff_t shiftfs_file_llseek(struct file *file, loff_t offset, int whence)
1162 {
1163 struct inode *realinode = file_inode(file)->i_private;
1164
1165 return generic_file_llseek_size(file, offset, whence,
1166 realinode->i_sb->s_maxbytes,
1167 i_size_read(realinode));
1168 }
1169
1170 /* XXX: Need to figure out what to to about atime updates, maybe other
1171 * timestamps too ... ref. ovl_file_accessed() */
1172
1173 static rwf_t shiftfs_iocb_to_rwf(struct kiocb *iocb)
1174 {
1175 int ifl = iocb->ki_flags;
1176 rwf_t flags = 0;
1177
1178 if (ifl & IOCB_NOWAIT)
1179 flags |= RWF_NOWAIT;
1180 if (ifl & IOCB_HIPRI)
1181 flags |= RWF_HIPRI;
1182 if (ifl & IOCB_DSYNC)
1183 flags |= RWF_DSYNC;
1184 if (ifl & IOCB_SYNC)
1185 flags |= RWF_SYNC;
1186
1187 return flags;
1188 }
1189
1190 static ssize_t shiftfs_read_iter(struct kiocb *iocb, struct iov_iter *iter)
1191 {
1192 struct file *file = iocb->ki_filp;
1193 struct fd lowerfd;
1194 const struct cred *oldcred;
1195 ssize_t ret;
1196
1197 if (!iov_iter_count(iter))
1198 return 0;
1199
1200 ret = shiftfs_real_fdget(file, &lowerfd);
1201 if (ret)
1202 return ret;
1203
1204 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1205 ret = vfs_iter_read(lowerfd.file, iter, &iocb->ki_pos,
1206 shiftfs_iocb_to_rwf(iocb));
1207 revert_creds(oldcred);
1208
1209 shiftfs_file_accessed(file);
1210
1211 fdput(lowerfd);
1212 return ret;
1213 }
1214
1215 static ssize_t shiftfs_write_iter(struct kiocb *iocb, struct iov_iter *iter)
1216 {
1217 struct file *file = iocb->ki_filp;
1218 struct inode *inode = file_inode(file);
1219 struct fd lowerfd;
1220 const struct cred *oldcred;
1221 ssize_t ret;
1222
1223 if (!iov_iter_count(iter))
1224 return 0;
1225
1226 inode_lock(inode);
1227 /* Update mode */
1228 shiftfs_copyattr(inode->i_private, inode);
1229 ret = file_remove_privs(file);
1230 if (ret)
1231 goto out_unlock;
1232
1233 ret = shiftfs_real_fdget(file, &lowerfd);
1234 if (ret)
1235 goto out_unlock;
1236
1237 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1238 file_start_write(lowerfd.file);
1239 ret = vfs_iter_write(lowerfd.file, iter, &iocb->ki_pos,
1240 shiftfs_iocb_to_rwf(iocb));
1241 file_end_write(lowerfd.file);
1242 revert_creds(oldcred);
1243
1244 /* Update size */
1245 shiftfs_copyattr(inode->i_private, inode);
1246
1247 fdput(lowerfd);
1248
1249 out_unlock:
1250 inode_unlock(inode);
1251 return ret;
1252 }
1253
1254 static int shiftfs_fsync(struct file *file, loff_t start, loff_t end,
1255 int datasync)
1256 {
1257 struct fd lowerfd;
1258 const struct cred *oldcred;
1259 int ret;
1260
1261 ret = shiftfs_real_fdget(file, &lowerfd);
1262 if (ret)
1263 return ret;
1264
1265 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1266 ret = vfs_fsync_range(lowerfd.file, start, end, datasync);
1267 revert_creds(oldcred);
1268
1269 fdput(lowerfd);
1270 return ret;
1271 }
1272
1273 static int shiftfs_mmap(struct file *file, struct vm_area_struct *vma)
1274 {
1275 struct file *realfile = file->private_data;
1276 const struct cred *oldcred;
1277 int ret;
1278
1279 if (!realfile->f_op->mmap)
1280 return -ENODEV;
1281
1282 if (WARN_ON(file != vma->vm_file))
1283 return -EIO;
1284
1285 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1286 vma->vm_file = get_file(realfile);
1287 ret = call_mmap(vma->vm_file, vma);
1288 revert_creds(oldcred);
1289
1290 shiftfs_file_accessed(file);
1291
1292 if (ret)
1293 fput(realfile); /* Drop refcount from new vm_file value */
1294 else
1295 fput(file); /* Drop refcount from previous vm_file value */
1296
1297 return ret;
1298 }
1299
1300 static long shiftfs_fallocate(struct file *file, int mode, loff_t offset,
1301 loff_t len)
1302 {
1303 struct inode *inode = file_inode(file);
1304 struct inode *loweri = inode->i_private;
1305 struct fd lowerfd;
1306 const struct cred *oldcred;
1307 int ret;
1308
1309 ret = shiftfs_real_fdget(file, &lowerfd);
1310 if (ret)
1311 return ret;
1312
1313 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1314 ret = vfs_fallocate(lowerfd.file, mode, offset, len);
1315 revert_creds(oldcred);
1316
1317 /* Update size */
1318 shiftfs_copyattr(loweri, inode);
1319
1320 fdput(lowerfd);
1321 return ret;
1322 }
1323
1324 static int shiftfs_fadvise(struct file *file, loff_t offset, loff_t len,
1325 int advice)
1326 {
1327 struct fd lowerfd;
1328 const struct cred *oldcred;
1329 int ret;
1330
1331 ret = shiftfs_real_fdget(file, &lowerfd);
1332 if (ret)
1333 return ret;
1334
1335 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1336 ret = vfs_fadvise(lowerfd.file, offset, len, advice);
1337 revert_creds(oldcred);
1338
1339 fdput(lowerfd);
1340 return ret;
1341 }
1342
1343 static int shiftfs_override_ioctl_creds(const struct super_block *sb,
1344 const struct cred **oldcred,
1345 struct cred **newcred)
1346 {
1347 kuid_t fsuid = current_fsuid();
1348 kgid_t fsgid = current_fsgid();
1349
1350 *oldcred = shiftfs_override_creds(sb);
1351
1352 *newcred = prepare_creds();
1353 if (!*newcred) {
1354 revert_creds(*oldcred);
1355 return -ENOMEM;
1356 }
1357
1358 (*newcred)->fsuid = KUIDT_INIT(from_kuid(sb->s_user_ns, fsuid));
1359 (*newcred)->fsgid = KGIDT_INIT(from_kgid(sb->s_user_ns, fsgid));
1360
1361 /* clear all caps to prevent bypassing capable() checks */
1362 cap_clear((*newcred)->cap_bset);
1363 cap_clear((*newcred)->cap_effective);
1364 cap_clear((*newcred)->cap_inheritable);
1365 cap_clear((*newcred)->cap_permitted);
1366
1367 put_cred(override_creds(*newcred));
1368 return 0;
1369 }
1370
1371 static inline void shiftfs_revert_ioctl_creds(const struct cred *oldcred,
1372 struct cred *newcred)
1373 {
1374 return shiftfs_revert_object_creds(oldcred, newcred);
1375 }
1376
1377 static inline bool is_btrfs_snap_ioctl(int cmd)
1378 {
1379 if ((cmd == BTRFS_IOC_SNAP_CREATE) || (cmd == BTRFS_IOC_SNAP_CREATE_V2))
1380 return true;
1381
1382 return false;
1383 }
1384
1385 static int shiftfs_btrfs_ioctl_fd_restore(int cmd, struct fd lfd, int fd,
1386 void __user *arg,
1387 struct btrfs_ioctl_vol_args *v1,
1388 struct btrfs_ioctl_vol_args_v2 *v2)
1389 {
1390 int ret;
1391
1392 if (!is_btrfs_snap_ioctl(cmd))
1393 return 0;
1394
1395 if (cmd == BTRFS_IOC_SNAP_CREATE)
1396 ret = copy_to_user(arg, v1, sizeof(*v1));
1397 else
1398 ret = copy_to_user(arg, v2, sizeof(*v2));
1399
1400 fdput(lfd);
1401 __close_fd(current->files, fd);
1402 kfree(v1);
1403 kfree(v2);
1404
1405 return ret;
1406 }
1407
1408 static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg,
1409 struct btrfs_ioctl_vol_args **b1,
1410 struct btrfs_ioctl_vol_args_v2 **b2,
1411 struct fd *lfd,
1412 int *newfd)
1413 {
1414 int oldfd, ret;
1415 struct fd src;
1416 struct btrfs_ioctl_vol_args *v1 = NULL;
1417 struct btrfs_ioctl_vol_args_v2 *v2 = NULL;
1418
1419 if (!is_btrfs_snap_ioctl(cmd))
1420 return 0;
1421
1422 if (cmd == BTRFS_IOC_SNAP_CREATE) {
1423 v1 = memdup_user(arg, sizeof(*v1));
1424 if (IS_ERR(v1))
1425 return PTR_ERR(v1);
1426 oldfd = v1->fd;
1427 *b1 = v1;
1428 } else {
1429 v2 = memdup_user(arg, sizeof(*v2));
1430 if (IS_ERR(v2))
1431 return PTR_ERR(v2);
1432 oldfd = v2->fd;
1433 *b2 = v2;
1434 }
1435
1436 src = fdget(oldfd);
1437 if (!src.file)
1438 return -EINVAL;
1439
1440 ret = shiftfs_real_fdget(src.file, lfd);
1441 fdput(src);
1442 if (ret)
1443 return ret;
1444
1445 *newfd = get_unused_fd_flags(lfd->file->f_flags);
1446 if (*newfd < 0) {
1447 fdput(*lfd);
1448 return *newfd;
1449 }
1450
1451 fd_install(*newfd, lfd->file);
1452
1453 if (cmd == BTRFS_IOC_SNAP_CREATE) {
1454 v1->fd = *newfd;
1455 ret = copy_to_user(arg, v1, sizeof(*v1));
1456 v1->fd = oldfd;
1457 } else {
1458 v2->fd = *newfd;
1459 ret = copy_to_user(arg, v2, sizeof(*v2));
1460 v2->fd = oldfd;
1461 }
1462
1463 if (ret)
1464 shiftfs_btrfs_ioctl_fd_restore(cmd, *lfd, *newfd, arg, v1, v2);
1465
1466 return ret;
1467 }
1468
1469 static long shiftfs_real_ioctl(struct file *file, unsigned int cmd,
1470 unsigned long arg)
1471 {
1472 struct fd lowerfd;
1473 struct cred *newcred;
1474 const struct cred *oldcred;
1475 int newfd = -EBADF;
1476 long err = 0, ret = 0;
1477 void __user *argp = (void __user *)arg;
1478 struct fd btrfs_lfd = {};
1479 struct super_block *sb = file->f_path.dentry->d_sb;
1480 struct btrfs_ioctl_vol_args *btrfs_v1 = NULL;
1481 struct btrfs_ioctl_vol_args_v2 *btrfs_v2 = NULL;
1482
1483 ret = shiftfs_btrfs_ioctl_fd_replace(cmd, argp, &btrfs_v1, &btrfs_v2,
1484 &btrfs_lfd, &newfd);
1485 if (ret < 0)
1486 return ret;
1487
1488 ret = shiftfs_real_fdget(file, &lowerfd);
1489 if (ret)
1490 goto out_restore;
1491
1492 ret = shiftfs_override_ioctl_creds(sb, &oldcred, &newcred);
1493 if (ret)
1494 goto out_fdput;
1495
1496 ret = vfs_ioctl(lowerfd.file, cmd, arg);
1497
1498 shiftfs_revert_ioctl_creds(oldcred, newcred);
1499
1500 shiftfs_copyattr(file_inode(lowerfd.file), file_inode(file));
1501 shiftfs_copyflags(file_inode(lowerfd.file), file_inode(file));
1502
1503 out_fdput:
1504 fdput(lowerfd);
1505
1506 out_restore:
1507 err = shiftfs_btrfs_ioctl_fd_restore(cmd, btrfs_lfd, newfd, argp,
1508 btrfs_v1, btrfs_v2);
1509 if (!ret)
1510 ret = err;
1511
1512 return ret;
1513 }
1514
1515 static bool in_ioctl_whitelist(int flag, unsigned long arg)
1516 {
1517 void __user *argp = (void __user *)arg;
1518 u64 flags = 0;
1519
1520 switch (flag) {
1521 case BTRFS_IOC_FS_INFO:
1522 return true;
1523 case BTRFS_IOC_SNAP_CREATE:
1524 return true;
1525 case BTRFS_IOC_SNAP_CREATE_V2:
1526 return true;
1527 case BTRFS_IOC_SUBVOL_CREATE:
1528 return true;
1529 case BTRFS_IOC_SUBVOL_CREATE_V2:
1530 return true;
1531 case BTRFS_IOC_SUBVOL_GETFLAGS:
1532 return true;
1533 case BTRFS_IOC_SUBVOL_SETFLAGS:
1534 if (copy_from_user(&flags, argp, sizeof(flags)))
1535 return false;
1536
1537 if (flags & ~BTRFS_SUBVOL_RDONLY)
1538 return false;
1539
1540 return true;
1541 case BTRFS_IOC_SNAP_DESTROY:
1542 return true;
1543 }
1544
1545 return false;
1546 }
1547
1548 static long shiftfs_ioctl(struct file *file, unsigned int cmd,
1549 unsigned long arg)
1550 {
1551 switch (cmd) {
1552 case FS_IOC_GETVERSION:
1553 /* fall through */
1554 case FS_IOC_GETFLAGS:
1555 /* fall through */
1556 case FS_IOC_SETFLAGS:
1557 break;
1558 default:
1559 if (!in_ioctl_whitelist(cmd, arg) ||
1560 !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1561 return -ENOTTY;
1562 }
1563
1564 return shiftfs_real_ioctl(file, cmd, arg);
1565 }
1566
1567 static long shiftfs_compat_ioctl(struct file *file, unsigned int cmd,
1568 unsigned long arg)
1569 {
1570 switch (cmd) {
1571 case FS_IOC32_GETVERSION:
1572 /* fall through */
1573 case FS_IOC32_GETFLAGS:
1574 /* fall through */
1575 case FS_IOC32_SETFLAGS:
1576 break;
1577 default:
1578 if (!in_ioctl_whitelist(cmd, arg) ||
1579 !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1580 return -ENOIOCTLCMD;
1581 }
1582
1583 return shiftfs_real_ioctl(file, cmd, arg);
1584 }
1585
1586 enum shiftfs_copyop {
1587 SHIFTFS_COPY,
1588 SHIFTFS_CLONE,
1589 SHIFTFS_DEDUPE,
1590 };
1591
1592 static ssize_t shiftfs_copyfile(struct file *file_in, loff_t pos_in,
1593 struct file *file_out, loff_t pos_out, u64 len,
1594 unsigned int flags, enum shiftfs_copyop op)
1595 {
1596 ssize_t ret;
1597 struct fd real_in, real_out;
1598 const struct cred *oldcred;
1599 struct inode *inode_out = file_inode(file_out);
1600 struct inode *loweri = inode_out->i_private;
1601
1602 ret = shiftfs_real_fdget(file_out, &real_out);
1603 if (ret)
1604 return ret;
1605
1606 ret = shiftfs_real_fdget(file_in, &real_in);
1607 if (ret) {
1608 fdput(real_out);
1609 return ret;
1610 }
1611
1612 oldcred = shiftfs_override_creds(inode_out->i_sb);
1613 switch (op) {
1614 case SHIFTFS_COPY:
1615 ret = vfs_copy_file_range(real_in.file, pos_in, real_out.file,
1616 pos_out, len, flags);
1617 break;
1618
1619 case SHIFTFS_CLONE:
1620 ret = vfs_clone_file_range(real_in.file, pos_in, real_out.file,
1621 pos_out, len, flags);
1622 break;
1623
1624 case SHIFTFS_DEDUPE:
1625 ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
1626 real_out.file, pos_out, len,
1627 flags);
1628 break;
1629 }
1630 revert_creds(oldcred);
1631
1632 /* Update size */
1633 shiftfs_copyattr(loweri, inode_out);
1634
1635 fdput(real_in);
1636 fdput(real_out);
1637
1638 return ret;
1639 }
1640
1641 static ssize_t shiftfs_copy_file_range(struct file *file_in, loff_t pos_in,
1642 struct file *file_out, loff_t pos_out,
1643 size_t len, unsigned int flags)
1644 {
1645 return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
1646 SHIFTFS_COPY);
1647 }
1648
1649 static loff_t shiftfs_remap_file_range(struct file *file_in, loff_t pos_in,
1650 struct file *file_out, loff_t pos_out,
1651 loff_t len, unsigned int remap_flags)
1652 {
1653 enum shiftfs_copyop op;
1654
1655 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
1656 return -EINVAL;
1657
1658 if (remap_flags & REMAP_FILE_DEDUP)
1659 op = SHIFTFS_DEDUPE;
1660 else
1661 op = SHIFTFS_CLONE;
1662
1663 return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len,
1664 remap_flags, op);
1665 }
1666
1667 static int shiftfs_iterate_shared(struct file *file, struct dir_context *ctx)
1668 {
1669 const struct cred *oldcred;
1670 int err = -ENOTDIR;
1671 struct file *realfile = file->private_data;
1672
1673 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1674 err = iterate_dir(realfile, ctx);
1675 revert_creds(oldcred);
1676
1677 return err;
1678 }
1679
1680 const struct file_operations shiftfs_file_operations = {
1681 .open = shiftfs_open,
1682 .release = shiftfs_release,
1683 .llseek = shiftfs_file_llseek,
1684 .read_iter = shiftfs_read_iter,
1685 .write_iter = shiftfs_write_iter,
1686 .fsync = shiftfs_fsync,
1687 .mmap = shiftfs_mmap,
1688 .fallocate = shiftfs_fallocate,
1689 .fadvise = shiftfs_fadvise,
1690 .unlocked_ioctl = shiftfs_ioctl,
1691 .compat_ioctl = shiftfs_compat_ioctl,
1692 .copy_file_range = shiftfs_copy_file_range,
1693 .remap_file_range = shiftfs_remap_file_range,
1694 };
1695
1696 const struct file_operations shiftfs_dir_operations = {
1697 .open = shiftfs_dir_open,
1698 .release = shiftfs_dir_release,
1699 .compat_ioctl = shiftfs_compat_ioctl,
1700 .fsync = shiftfs_fsync,
1701 .iterate_shared = shiftfs_iterate_shared,
1702 .llseek = shiftfs_dir_llseek,
1703 .read = generic_read_dir,
1704 .unlocked_ioctl = shiftfs_ioctl,
1705 };
1706
1707 static const struct address_space_operations shiftfs_aops = {
1708 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
1709 .direct_IO = noop_direct_IO,
1710 };
1711
1712 static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
1713 umode_t mode, dev_t dev, struct dentry *dentry)
1714 {
1715 struct inode *loweri;
1716
1717 inode->i_ino = ino;
1718 inode->i_flags |= S_NOCMTIME;
1719
1720 mode &= S_IFMT;
1721 inode->i_mode = mode;
1722 switch (mode & S_IFMT) {
1723 case S_IFDIR:
1724 inode->i_op = &shiftfs_dir_inode_operations;
1725 inode->i_fop = &shiftfs_dir_operations;
1726 break;
1727 case S_IFLNK:
1728 inode->i_op = &shiftfs_symlink_inode_operations;
1729 break;
1730 case S_IFREG:
1731 inode->i_op = &shiftfs_file_inode_operations;
1732 inode->i_fop = &shiftfs_file_operations;
1733 inode->i_mapping->a_ops = &shiftfs_aops;
1734 break;
1735 default:
1736 inode->i_op = &shiftfs_special_inode_operations;
1737 init_special_inode(inode, mode, dev);
1738 break;
1739 }
1740
1741 if (!dentry)
1742 return;
1743
1744 loweri = dentry->d_inode;
1745 if (!loweri->i_op->get_link)
1746 inode->i_opflags |= IOP_NOFOLLOW;
1747
1748 shiftfs_copyattr(loweri, inode);
1749 shiftfs_copyflags(loweri, inode);
1750 set_nlink(inode, loweri->i_nlink);
1751 }
1752
1753 static int shiftfs_show_options(struct seq_file *m, struct dentry *dentry)
1754 {
1755 struct super_block *sb = dentry->d_sb;
1756 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1757
1758 if (sbinfo->mark)
1759 seq_show_option(m, "mark", NULL);
1760
1761 if (sbinfo->passthrough)
1762 seq_printf(m, ",passthrough=%u", sbinfo->passthrough);
1763
1764 return 0;
1765 }
1766
1767 static int shiftfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1768 {
1769 struct super_block *sb = dentry->d_sb;
1770 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1771 struct dentry *root = sb->s_root;
1772 struct dentry *realroot = root->d_fsdata;
1773 struct path realpath = { .mnt = sbinfo->mnt, .dentry = realroot };
1774 int err;
1775
1776 err = vfs_statfs(&realpath, buf);
1777 if (err)
1778 return err;
1779
1780 if (!shiftfs_passthrough_statfs(sbinfo))
1781 buf->f_type = sb->s_magic;
1782
1783 return 0;
1784 }
1785
1786 static void shiftfs_evict_inode(struct inode *inode)
1787 {
1788 struct inode *loweri = inode->i_private;
1789
1790 clear_inode(inode);
1791
1792 if (loweri)
1793 iput(loweri);
1794 }
1795
1796 static void shiftfs_put_super(struct super_block *sb)
1797 {
1798 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1799
1800 if (sbinfo) {
1801 mntput(sbinfo->mnt);
1802 put_cred(sbinfo->creator_cred);
1803 kfree(sbinfo);
1804 }
1805 }
1806
1807 static const struct xattr_handler shiftfs_xattr_handler = {
1808 .prefix = "",
1809 .get = shiftfs_xattr_get,
1810 .set = shiftfs_xattr_set,
1811 };
1812
1813 const struct xattr_handler *shiftfs_xattr_handlers[] = {
1814 #ifdef CONFIG_SHIFT_FS_POSIX_ACL
1815 &shiftfs_posix_acl_access_xattr_handler,
1816 &shiftfs_posix_acl_default_xattr_handler,
1817 #endif
1818 &shiftfs_xattr_handler,
1819 NULL
1820 };
1821
1822 static inline bool passthrough_is_subset(int old_flags, int new_flags)
1823 {
1824 if ((new_flags & old_flags) != new_flags)
1825 return false;
1826
1827 return true;
1828 }
1829
1830 static int shiftfs_super_check_flags(unsigned long old_flags,
1831 unsigned long new_flags)
1832 {
1833 if ((old_flags & SB_RDONLY) && !(new_flags & SB_RDONLY))
1834 return -EPERM;
1835
1836 if ((old_flags & SB_NOSUID) && !(new_flags & SB_NOSUID))
1837 return -EPERM;
1838
1839 if ((old_flags & SB_NODEV) && !(new_flags & SB_NODEV))
1840 return -EPERM;
1841
1842 if ((old_flags & SB_NOEXEC) && !(new_flags & SB_NOEXEC))
1843 return -EPERM;
1844
1845 if ((old_flags & SB_NOATIME) && !(new_flags & SB_NOATIME))
1846 return -EPERM;
1847
1848 if ((old_flags & SB_NODIRATIME) && !(new_flags & SB_NODIRATIME))
1849 return -EPERM;
1850
1851 if (!(old_flags & SB_POSIXACL) && (new_flags & SB_POSIXACL))
1852 return -EPERM;
1853
1854 return 0;
1855 }
1856
1857 static int shiftfs_remount(struct super_block *sb, int *flags, char *data)
1858 {
1859 int err;
1860 struct shiftfs_super_info new = {};
1861 struct shiftfs_super_info *info = sb->s_fs_info;
1862
1863 err = shiftfs_parse_mount_options(&new, data);
1864 if (err)
1865 return err;
1866
1867 err = shiftfs_super_check_flags(sb->s_flags, *flags);
1868 if (err)
1869 return err;
1870
1871 /* Mark mount option cannot be changed. */
1872 if (info->mark || (info->mark != new.mark))
1873 return -EPERM;
1874
1875 if (info->passthrough != new.passthrough) {
1876 /* Don't allow exceeding passthrough options of mark mount. */
1877 if (!passthrough_is_subset(info->passthrough_mark,
1878 info->passthrough))
1879 return -EPERM;
1880
1881 info->passthrough = new.passthrough;
1882 }
1883
1884 return 0;
1885 }
1886
1887 static const struct super_operations shiftfs_super_ops = {
1888 .put_super = shiftfs_put_super,
1889 .show_options = shiftfs_show_options,
1890 .statfs = shiftfs_statfs,
1891 .remount_fs = shiftfs_remount,
1892 .evict_inode = shiftfs_evict_inode,
1893 };
1894
1895 struct shiftfs_data {
1896 void *data;
1897 const char *path;
1898 };
1899
1900 static void shiftfs_super_force_flags(struct super_block *sb,
1901 unsigned long lower_flags)
1902 {
1903 sb->s_flags |= lower_flags & (SB_RDONLY | SB_NOSUID | SB_NODEV |
1904 SB_NOEXEC | SB_NOATIME | SB_NODIRATIME);
1905
1906 if (!(lower_flags & SB_POSIXACL))
1907 sb->s_flags &= ~SB_POSIXACL;
1908 }
1909
1910 static int shiftfs_fill_super(struct super_block *sb, void *raw_data,
1911 int silent)
1912 {
1913 int err;
1914 struct path path = {};
1915 struct shiftfs_super_info *sbinfo_mp;
1916 char *name = NULL;
1917 struct inode *inode = NULL;
1918 struct dentry *dentry = NULL;
1919 struct shiftfs_data *data = raw_data;
1920 struct shiftfs_super_info *sbinfo = NULL;
1921
1922 if (!data->path)
1923 return -EINVAL;
1924
1925 sb->s_fs_info = kzalloc(sizeof(*sbinfo), GFP_KERNEL);
1926 if (!sb->s_fs_info)
1927 return -ENOMEM;
1928 sbinfo = sb->s_fs_info;
1929
1930 err = shiftfs_parse_mount_options(sbinfo, data->data);
1931 if (err)
1932 return err;
1933
1934 /* to mount a mark, must be userns admin */
1935 if (!sbinfo->mark && !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
1936 return -EPERM;
1937
1938 name = kstrdup(data->path, GFP_KERNEL);
1939 if (!name)
1940 return -ENOMEM;
1941
1942 err = kern_path(name, LOOKUP_FOLLOW, &path);
1943 if (err)
1944 goto out_free_name;
1945
1946 if (!S_ISDIR(path.dentry->d_inode->i_mode)) {
1947 err = -ENOTDIR;
1948 goto out_put_path;
1949 }
1950
1951 sb->s_flags |= SB_POSIXACL;
1952
1953 if (sbinfo->mark) {
1954 struct super_block *lower_sb = path.mnt->mnt_sb;
1955
1956 /* to mark a mount point, must root wrt lower s_user_ns */
1957 if (!ns_capable(lower_sb->s_user_ns, CAP_SYS_ADMIN)) {
1958 err = -EPERM;
1959 goto out_put_path;
1960 }
1961
1962 /*
1963 * this part is visible unshifted, so make sure no
1964 * executables that could be used to give suid
1965 * privileges
1966 */
1967 sb->s_iflags = SB_I_NOEXEC;
1968
1969 shiftfs_super_force_flags(sb, lower_sb->s_flags);
1970
1971 /*
1972 * Handle nesting of shiftfs mounts by referring this mark
1973 * mount back to the original mark mount. This is more
1974 * efficient and alleviates concerns about stack depth.
1975 */
1976 if (lower_sb->s_magic == SHIFTFS_MAGIC) {
1977 sbinfo_mp = lower_sb->s_fs_info;
1978
1979 /* Doesn't make sense to mark a mark mount */
1980 if (sbinfo_mp->mark) {
1981 err = -EINVAL;
1982 goto out_put_path;
1983 }
1984
1985 if (!passthrough_is_subset(sbinfo_mp->passthrough,
1986 sbinfo->passthrough)) {
1987 err = -EPERM;
1988 goto out_put_path;
1989 }
1990
1991 sbinfo->mnt = mntget(sbinfo_mp->mnt);
1992 dentry = dget(path.dentry->d_fsdata);
1993 /*
1994 * Copy up the passthrough mount options from the
1995 * parent mark mountpoint.
1996 */
1997 sbinfo->passthrough_mark = sbinfo_mp->passthrough_mark;
1998 } else {
1999 sbinfo->mnt = mntget(path.mnt);
2000 dentry = dget(path.dentry);
2001 /*
2002 * For a new mark passthrough_mark and passthrough
2003 * are identical.
2004 */
2005 sbinfo->passthrough_mark = sbinfo->passthrough;
2006 }
2007
2008 sbinfo->creator_cred = prepare_creds();
2009 if (!sbinfo->creator_cred) {
2010 err = -ENOMEM;
2011 goto out_put_path;
2012 }
2013 } else {
2014 /*
2015 * This leg executes if we're admin capable in the namespace,
2016 * so be very careful.
2017 */
2018 err = -EPERM;
2019 if (path.dentry->d_sb->s_magic != SHIFTFS_MAGIC)
2020 goto out_put_path;
2021
2022 sbinfo_mp = path.dentry->d_sb->s_fs_info;
2023 if (!sbinfo_mp->mark)
2024 goto out_put_path;
2025
2026 if (!passthrough_is_subset(sbinfo_mp->passthrough,
2027 sbinfo->passthrough))
2028 goto out_put_path;
2029
2030 sbinfo->mnt = mntget(sbinfo_mp->mnt);
2031 sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred);
2032 dentry = dget(path.dentry->d_fsdata);
2033 /*
2034 * Copy up passthrough settings from mark mountpoint so we can
2035 * verify when the overlay wants to remount with different
2036 * passthrough settings.
2037 */
2038 sbinfo->passthrough_mark = sbinfo_mp->passthrough;
2039 shiftfs_super_force_flags(sb, path.mnt->mnt_sb->s_flags);
2040 }
2041
2042 sb->s_stack_depth = dentry->d_sb->s_stack_depth + 1;
2043 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
2044 printk(KERN_ERR "shiftfs: maximum stacking depth exceeded\n");
2045 err = -EINVAL;
2046 goto out_put_path;
2047 }
2048
2049 inode = new_inode(sb);
2050 if (!inode) {
2051 err = -ENOMEM;
2052 goto out_put_path;
2053 }
2054 shiftfs_fill_inode(inode, dentry->d_inode->i_ino, S_IFDIR, 0, dentry);
2055
2056 ihold(dentry->d_inode);
2057 inode->i_private = dentry->d_inode;
2058
2059 sb->s_magic = SHIFTFS_MAGIC;
2060 sb->s_op = &shiftfs_super_ops;
2061 sb->s_xattr = shiftfs_xattr_handlers;
2062 sb->s_d_op = &shiftfs_dentry_ops;
2063 sb->s_root = d_make_root(inode);
2064 if (!sb->s_root) {
2065 err = -ENOMEM;
2066 goto out_put_path;
2067 }
2068
2069 sb->s_root->d_fsdata = dentry;
2070 sbinfo->userns = get_user_ns(dentry->d_sb->s_user_ns);
2071 shiftfs_copyattr(dentry->d_inode, sb->s_root->d_inode);
2072
2073 dentry = NULL;
2074 err = 0;
2075
2076 out_put_path:
2077 path_put(&path);
2078
2079 out_free_name:
2080 kfree(name);
2081
2082 dput(dentry);
2083
2084 return err;
2085 }
2086
2087 static struct dentry *shiftfs_mount(struct file_system_type *fs_type,
2088 int flags, const char *dev_name, void *data)
2089 {
2090 struct shiftfs_data d = { data, dev_name };
2091
2092 return mount_nodev(fs_type, flags, &d, shiftfs_fill_super);
2093 }
2094
2095 static struct file_system_type shiftfs_type = {
2096 .owner = THIS_MODULE,
2097 .name = "shiftfs",
2098 .mount = shiftfs_mount,
2099 .kill_sb = kill_anon_super,
2100 .fs_flags = FS_USERNS_MOUNT,
2101 };
2102
2103 static int __init shiftfs_init(void)
2104 {
2105 return register_filesystem(&shiftfs_type);
2106 }
2107
2108 static void __exit shiftfs_exit(void)
2109 {
2110 unregister_filesystem(&shiftfs_type);
2111 }
2112
2113 MODULE_ALIAS_FS("shiftfs");
2114 MODULE_AUTHOR("James Bottomley");
2115 MODULE_AUTHOR("Seth Forshee <seth.forshee@canonical.com>");
2116 MODULE_AUTHOR("Christian Brauner <christian.brauner@ubuntu.com>");
2117 MODULE_DESCRIPTION("id shifting filesystem");
2118 MODULE_LICENSE("GPL v2");
2119 module_init(shiftfs_init)
2120 module_exit(shiftfs_exit)