]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - fs/shiftfs.c
bnxt_en: fix ternary sign extension bug in bnxt_show_temp()
[mirror_ubuntu-hirsute-kernel.git] / fs / shiftfs.c
CommitLineData
1c1ca807 1#include <linux/btrfs.h>
45bd5d34 2#include <linux/capability.h>
8bde1547
JB
3#include <linux/cred.h>
4#include <linux/mount.h>
1c1ca807 5#include <linux/fdtable.h>
8bde1547
JB
6#include <linux/file.h>
7#include <linux/fs.h>
8#include <linux/namei.h>
9#include <linux/module.h>
10#include <linux/kernel.h>
11#include <linux/magic.h>
12#include <linux/parser.h>
45bd5d34 13#include <linux/security.h>
8bde1547
JB
14#include <linux/seq_file.h>
15#include <linux/statfs.h>
16#include <linux/slab.h>
17#include <linux/user_namespace.h>
18#include <linux/uidgid.h>
19#include <linux/xattr.h>
45bd5d34
CB
20#include <linux/posix_acl.h>
21#include <linux/posix_acl_xattr.h>
22#include <linux/uio.h>
60d66969 23#include <linux/fiemap.h>
8bde1547
JB
24
25struct shiftfs_super_info {
26 struct vfsmount *mnt;
27 struct user_namespace *userns;
45bd5d34
CB
28 /* creds of process who created the super block */
29 const struct cred *creator_cred;
8bde1547 30 bool mark;
45bd5d34 31 unsigned int passthrough;
d6d053b7 32 unsigned int passthrough_mark;
8bde1547
JB
33};
34
45bd5d34
CB
35static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
36 umode_t mode, dev_t dev, struct dentry *dentry);
37
38#define SHIFTFS_PASSTHROUGH_NONE 0
39#define SHIFTFS_PASSTHROUGH_STAT 1
1c1ca807
CB
40#define SHIFTFS_PASSTHROUGH_IOCTL 2
41#define SHIFTFS_PASSTHROUGH_ALL \
42 (SHIFTFS_PASSTHROUGH_STAT | SHIFTFS_PASSTHROUGH_IOCTL)
43
44static inline bool shiftfs_passthrough_ioctls(struct shiftfs_super_info *info)
45{
46 if (!(info->passthrough & SHIFTFS_PASSTHROUGH_IOCTL))
47 return false;
48
1c1ca807
CB
49 return true;
50}
45bd5d34
CB
51
52static inline bool shiftfs_passthrough_statfs(struct shiftfs_super_info *info)
53{
54 if (!(info->passthrough & SHIFTFS_PASSTHROUGH_STAT))
55 return false;
56
45bd5d34
CB
57 return true;
58}
8bde1547
JB
59
60enum {
61 OPT_MARK,
45bd5d34 62 OPT_PASSTHROUGH,
8bde1547
JB
63 OPT_LAST,
64};
65
66/* global filesystem options */
67static const match_table_t tokens = {
68 { OPT_MARK, "mark" },
45bd5d34 69 { OPT_PASSTHROUGH, "passthrough=%u" },
8bde1547
JB
70 { OPT_LAST, NULL }
71};
72
45bd5d34 73static const struct cred *shiftfs_override_creds(const struct super_block *sb)
8bde1547 74{
45bd5d34 75 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
8bde1547 76
45bd5d34
CB
77 return override_creds(sbinfo->creator_cred);
78}
79
80static inline void shiftfs_revert_object_creds(const struct cred *oldcred,
81 struct cred *newcred)
82{
83 revert_creds(oldcred);
84 put_cred(newcred);
85}
86
70aba758
SF
87static kuid_t shift_kuid(struct user_namespace *from, struct user_namespace *to,
88 kuid_t kuid)
89{
90 uid_t uid = from_kuid(from, kuid);
91 return make_kuid(to, uid);
92}
93
94static kgid_t shift_kgid(struct user_namespace *from, struct user_namespace *to,
95 kgid_t kgid)
96{
97 gid_t gid = from_kgid(from, kgid);
98 return make_kgid(to, gid);
99}
100
45bd5d34
CB
101static int shiftfs_override_object_creds(const struct super_block *sb,
102 const struct cred **oldcred,
103 struct cred **newcred,
104 struct dentry *dentry, umode_t mode,
105 bool hardlink)
106{
70aba758 107 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
45bd5d34
CB
108 kuid_t fsuid = current_fsuid();
109 kgid_t fsgid = current_fsgid();
110
111 *oldcred = shiftfs_override_creds(sb);
112
113 *newcred = prepare_creds();
114 if (!*newcred) {
115 revert_creds(*oldcred);
116 return -ENOMEM;
117 }
118
70aba758
SF
119 (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid);
120 (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid);
45bd5d34
CB
121
122 if (!hardlink) {
123 int err = security_dentry_create_files_as(dentry, mode,
124 &dentry->d_name,
125 *oldcred, *newcred);
126 if (err) {
127 shiftfs_revert_object_creds(*oldcred, *newcred);
128 return err;
129 }
130 }
8bde1547 131
45bd5d34
CB
132 put_cred(override_creds(*newcred));
133 return 0;
134}
8bde1547 135
45bd5d34
CB
136static void shiftfs_copyattr(struct inode *from, struct inode *to)
137{
138 struct user_namespace *from_ns = from->i_sb->s_user_ns;
139 struct user_namespace *to_ns = to->i_sb->s_user_ns;
140
141 to->i_uid = shift_kuid(from_ns, to_ns, from->i_uid);
142 to->i_gid = shift_kgid(from_ns, to_ns, from->i_gid);
143 to->i_mode = from->i_mode;
144 to->i_atime = from->i_atime;
145 to->i_mtime = from->i_mtime;
146 to->i_ctime = from->i_ctime;
147 i_size_write(to, i_size_read(from));
148}
8bde1547 149
45bd5d34
CB
150static void shiftfs_copyflags(struct inode *from, struct inode *to)
151{
152 unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
8bde1547 153
45bd5d34 154 inode_set_flags(to, from->i_flags & mask, mask);
8bde1547
JB
155}
156
45bd5d34 157static void shiftfs_file_accessed(struct file *file)
8bde1547 158{
45bd5d34
CB
159 struct inode *upperi, *loweri;
160
161 if (file->f_flags & O_NOATIME)
8bde1547
JB
162 return;
163
45bd5d34
CB
164 upperi = file_inode(file);
165 loweri = upperi->i_private;
166
167 if (!loweri)
168 return;
169
170 upperi->i_mtime = loweri->i_mtime;
171 upperi->i_ctime = loweri->i_ctime;
172
173 touch_atime(&file->f_path);
8bde1547
JB
174}
175
45bd5d34
CB
176static int shiftfs_parse_mount_options(struct shiftfs_super_info *sbinfo,
177 char *options)
8bde1547
JB
178{
179 char *p;
180 substring_t args[MAX_OPT_ARGS];
181
45bd5d34
CB
182 sbinfo->mark = false;
183 sbinfo->passthrough = 0;
8bde1547
JB
184
185 while ((p = strsep(&options, ",")) != NULL) {
45bd5d34 186 int err, intarg, token;
8bde1547
JB
187
188 if (!*p)
189 continue;
190
191 token = match_token(p, tokens, args);
192 switch (token) {
193 case OPT_MARK:
45bd5d34
CB
194 sbinfo->mark = true;
195 break;
196 case OPT_PASSTHROUGH:
197 err = match_int(&args[0], &intarg);
198 if (err)
199 return err;
200
201 if (intarg & ~SHIFTFS_PASSTHROUGH_ALL)
202 return -EINVAL;
203
204 sbinfo->passthrough = intarg;
8bde1547
JB
205 break;
206 default:
207 return -EINVAL;
208 }
209 }
45bd5d34 210
8bde1547
JB
211 return 0;
212}
213
214static void shiftfs_d_release(struct dentry *dentry)
215{
45bd5d34 216 struct dentry *lowerd = dentry->d_fsdata;
8bde1547 217
45bd5d34
CB
218 if (lowerd)
219 dput(lowerd);
8bde1547
JB
220}
221
222static struct dentry *shiftfs_d_real(struct dentry *dentry,
223 const struct inode *inode)
224{
45bd5d34
CB
225 struct dentry *lowerd = dentry->d_fsdata;
226
227 if (inode && d_inode(dentry) == inode)
228 return dentry;
8bde1547 229
45bd5d34
CB
230 lowerd = d_real(lowerd, inode);
231 if (lowerd && (!inode || inode == d_inode(lowerd)))
232 return lowerd;
8bde1547 233
45bd5d34
CB
234 WARN(1, "shiftfs_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
235 inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
236 return dentry;
8bde1547
JB
237}
238
239static int shiftfs_d_weak_revalidate(struct dentry *dentry, unsigned int flags)
240{
45bd5d34
CB
241 int err = 1;
242 struct dentry *lowerd = dentry->d_fsdata;
8bde1547 243
45bd5d34 244 if (d_is_negative(lowerd) != d_is_negative(dentry))
8bde1547
JB
245 return 0;
246
45bd5d34
CB
247 if ((lowerd->d_flags & DCACHE_OP_WEAK_REVALIDATE))
248 err = lowerd->d_op->d_weak_revalidate(lowerd, flags);
8bde1547 249
45bd5d34
CB
250 if (d_really_is_positive(dentry)) {
251 struct inode *inode = d_inode(dentry);
252 struct inode *loweri = d_inode(lowerd);
253
254 shiftfs_copyattr(loweri, inode);
45bd5d34
CB
255 }
256
257 return err;
8bde1547
JB
258}
259
260static int shiftfs_d_revalidate(struct dentry *dentry, unsigned int flags)
261{
45bd5d34
CB
262 int err = 1;
263 struct dentry *lowerd = dentry->d_fsdata;
8bde1547 264
45bd5d34
CB
265 if (d_unhashed(lowerd) ||
266 ((d_is_negative(lowerd) != d_is_negative(dentry))))
8bde1547
JB
267 return 0;
268
45bd5d34
CB
269 if (flags & LOOKUP_RCU)
270 return -ECHILD;
8bde1547 271
45bd5d34
CB
272 if ((lowerd->d_flags & DCACHE_OP_REVALIDATE))
273 err = lowerd->d_op->d_revalidate(lowerd, flags);
8bde1547 274
45bd5d34
CB
275 if (d_really_is_positive(dentry)) {
276 struct inode *inode = d_inode(dentry);
277 struct inode *loweri = d_inode(lowerd);
8bde1547 278
45bd5d34 279 shiftfs_copyattr(loweri, inode);
45bd5d34 280 }
8bde1547 281
45bd5d34 282 return err;
8bde1547
JB
283}
284
285static const struct dentry_operations shiftfs_dentry_ops = {
45bd5d34
CB
286 .d_release = shiftfs_d_release,
287 .d_real = shiftfs_d_real,
288 .d_revalidate = shiftfs_d_revalidate,
8bde1547
JB
289 .d_weak_revalidate = shiftfs_d_weak_revalidate,
290};
291
8bde1547
JB
292static const char *shiftfs_get_link(struct dentry *dentry, struct inode *inode,
293 struct delayed_call *done)
294{
45bd5d34
CB
295 const char *p;
296 const struct cred *oldcred;
297 struct dentry *lowerd;
8bde1547 298
45bd5d34
CB
299 /* RCU lookup not supported */
300 if (!dentry)
8bde1547 301 return ERR_PTR(-ECHILD);
45bd5d34
CB
302
303 lowerd = dentry->d_fsdata;
304 oldcred = shiftfs_override_creds(dentry->d_sb);
305 p = vfs_get_link(lowerd, done);
306 revert_creds(oldcred);
307
308 return p;
8bde1547
JB
309}
310
311static int shiftfs_setxattr(struct dentry *dentry, struct inode *inode,
312 const char *name, const void *value,
313 size_t size, int flags)
314{
45bd5d34
CB
315 struct dentry *lowerd = dentry->d_fsdata;
316 int err;
317 const struct cred *oldcred;
318
319 oldcred = shiftfs_override_creds(dentry->d_sb);
320 err = vfs_setxattr(lowerd, name, value, size, flags);
321 revert_creds(oldcred);
8bde1547 322
45bd5d34 323 shiftfs_copyattr(lowerd->d_inode, inode);
8bde1547
JB
324
325 return err;
326}
327
328static int shiftfs_xattr_get(const struct xattr_handler *handler,
329 struct dentry *dentry, struct inode *inode,
330 const char *name, void *value, size_t size)
331{
45bd5d34 332 struct dentry *lowerd = dentry->d_fsdata;
8bde1547 333 int err;
45bd5d34 334 const struct cred *oldcred;
8bde1547 335
45bd5d34
CB
336 oldcred = shiftfs_override_creds(dentry->d_sb);
337 err = vfs_getxattr(lowerd, name, value, size);
338 revert_creds(oldcred);
8bde1547
JB
339
340 return err;
341}
342
343static ssize_t shiftfs_listxattr(struct dentry *dentry, char *list,
344 size_t size)
345{
45bd5d34 346 struct dentry *lowerd = dentry->d_fsdata;
8bde1547 347 int err;
45bd5d34 348 const struct cred *oldcred;
8bde1547 349
45bd5d34
CB
350 oldcred = shiftfs_override_creds(dentry->d_sb);
351 err = vfs_listxattr(lowerd, list, size);
352 revert_creds(oldcred);
8bde1547
JB
353
354 return err;
355}
356
357static int shiftfs_removexattr(struct dentry *dentry, const char *name)
358{
45bd5d34 359 struct dentry *lowerd = dentry->d_fsdata;
8bde1547 360 int err;
45bd5d34
CB
361 const struct cred *oldcred;
362
363 oldcred = shiftfs_override_creds(dentry->d_sb);
364 err = vfs_removexattr(lowerd, name);
365 revert_creds(oldcred);
8bde1547 366
45bd5d34
CB
367 /* update c/mtime */
368 shiftfs_copyattr(lowerd->d_inode, d_inode(dentry));
8bde1547
JB
369
370 return err;
371}
372
373static int shiftfs_xattr_set(const struct xattr_handler *handler,
374 struct dentry *dentry, struct inode *inode,
375 const char *name, const void *value, size_t size,
376 int flags)
377{
378 if (!value)
379 return shiftfs_removexattr(dentry, name);
380 return shiftfs_setxattr(dentry, inode, name, value, size, flags);
381}
382
45bd5d34 383static int shiftfs_inode_test(struct inode *inode, void *data)
8bde1547 384{
45bd5d34
CB
385 return inode->i_private == data;
386}
8bde1547 387
45bd5d34
CB
388static int shiftfs_inode_set(struct inode *inode, void *data)
389{
390 inode->i_private = data;
391 return 0;
8bde1547
JB
392}
393
45bd5d34
CB
394static int shiftfs_create_object(struct inode *diri, struct dentry *dentry,
395 umode_t mode, const char *symlink,
396 struct dentry *hardlink, bool excl)
8bde1547 397{
8bde1547 398 int err;
45bd5d34
CB
399 const struct cred *oldcred;
400 struct cred *newcred;
401 void *loweri_iop_ptr = NULL;
402 umode_t modei = mode;
403 struct super_block *dir_sb = diri->i_sb;
404 struct dentry *lowerd_new = dentry->d_fsdata;
405 struct inode *inode = NULL, *loweri_dir = diri->i_private;
406 const struct inode_operations *loweri_dir_iop = loweri_dir->i_op;
407 struct dentry *lowerd_link = NULL;
8bde1547
JB
408
409 if (hardlink) {
45bd5d34 410 loweri_iop_ptr = loweri_dir_iop->link;
8bde1547
JB
411 } else {
412 switch (mode & S_IFMT) {
413 case S_IFDIR:
45bd5d34 414 loweri_iop_ptr = loweri_dir_iop->mkdir;
8bde1547
JB
415 break;
416 case S_IFREG:
45bd5d34 417 loweri_iop_ptr = loweri_dir_iop->create;
8bde1547
JB
418 break;
419 case S_IFLNK:
45bd5d34
CB
420 loweri_iop_ptr = loweri_dir_iop->symlink;
421 break;
422 case S_IFSOCK:
423 /* fall through */
424 case S_IFIFO:
425 loweri_iop_ptr = loweri_dir_iop->mknod;
426 break;
8bde1547
JB
427 }
428 }
45bd5d34
CB
429 if (!loweri_iop_ptr) {
430 err = -EINVAL;
431 goto out_iput;
432 }
8bde1547 433
45bd5d34 434 inode_lock_nested(loweri_dir, I_MUTEX_PARENT);
8bde1547 435
45bd5d34
CB
436 if (!hardlink) {
437 inode = new_inode(dir_sb);
438 if (!inode) {
439 err = -ENOMEM;
440 goto out_iput;
441 }
442
443 /*
444 * new_inode() will have added the new inode to the super
445 * block's list of inodes. Further below we will call
446 * inode_insert5() Which would perform the same operation again
447 * thereby corrupting the list. To avoid this raise I_CREATING
448 * in i_state which will cause inode_insert5() to skip this
449 * step. I_CREATING will be cleared by d_instantiate_new()
450 * below.
451 */
452 spin_lock(&inode->i_lock);
453 inode->i_state |= I_CREATING;
454 spin_unlock(&inode->i_lock);
8bde1547 455
45bd5d34
CB
456 inode_init_owner(inode, diri, mode);
457 modei = inode->i_mode;
458 }
8bde1547 459
45bd5d34
CB
460 err = shiftfs_override_object_creds(dentry->d_sb, &oldcred, &newcred,
461 dentry, modei, hardlink != NULL);
462 if (err)
463 goto out_iput;
8bde1547 464
8bde1547 465 if (hardlink) {
45bd5d34
CB
466 lowerd_link = hardlink->d_fsdata;
467 err = vfs_link(lowerd_link, loweri_dir, lowerd_new, NULL);
8bde1547 468 } else {
45bd5d34 469 switch (modei & S_IFMT) {
8bde1547 470 case S_IFDIR:
45bd5d34 471 err = vfs_mkdir(loweri_dir, lowerd_new, modei);
8bde1547
JB
472 break;
473 case S_IFREG:
45bd5d34 474 err = vfs_create(loweri_dir, lowerd_new, modei, excl);
8bde1547
JB
475 break;
476 case S_IFLNK:
45bd5d34
CB
477 err = vfs_symlink(loweri_dir, lowerd_new, symlink);
478 break;
479 case S_IFSOCK:
480 /* fall through */
481 case S_IFIFO:
482 err = vfs_mknod(loweri_dir, lowerd_new, modei, 0);
483 break;
484 default:
485 err = -EINVAL;
486 break;
8bde1547
JB
487 }
488 }
489
45bd5d34 490 shiftfs_revert_object_creds(oldcred, newcred);
8bde1547 491
45bd5d34
CB
492 if (!err && WARN_ON(!lowerd_new->d_inode))
493 err = -EIO;
8bde1547 494 if (err)
45bd5d34
CB
495 goto out_iput;
496
497 if (hardlink) {
498 inode = d_inode(hardlink);
499 ihold(inode);
500
501 /* copy up times from lower inode */
502 shiftfs_copyattr(d_inode(lowerd_link), inode);
503 set_nlink(d_inode(hardlink), d_inode(lowerd_link)->i_nlink);
504 d_instantiate(dentry, inode);
505 } else {
506 struct inode *inode_tmp;
507 struct inode *loweri_new = d_inode(lowerd_new);
508
509 inode_tmp = inode_insert5(inode, (unsigned long)loweri_new,
510 shiftfs_inode_test, shiftfs_inode_set,
511 loweri_new);
512 if (unlikely(inode_tmp != inode)) {
513 pr_err_ratelimited("shiftfs: newly created inode found in cache\n");
514 iput(inode_tmp);
515 err = -EINVAL;
516 goto out_iput;
517 }
8bde1547 518
45bd5d34
CB
519 ihold(loweri_new);
520 shiftfs_fill_inode(inode, loweri_new->i_ino, loweri_new->i_mode,
521 0, lowerd_new);
522 d_instantiate_new(dentry, inode);
523 }
8bde1547 524
45bd5d34
CB
525 shiftfs_copyattr(loweri_dir, diri);
526 if (loweri_iop_ptr == loweri_dir_iop->mkdir)
527 set_nlink(diri, loweri_dir->i_nlink);
8bde1547 528
45bd5d34 529 inode = NULL;
8bde1547 530
45bd5d34
CB
531out_iput:
532 iput(inode);
533 inode_unlock(loweri_dir);
8bde1547
JB
534
535 return err;
536}
537
538static int shiftfs_create(struct inode *dir, struct dentry *dentry,
539 umode_t mode, bool excl)
540{
541 mode |= S_IFREG;
542
45bd5d34 543 return shiftfs_create_object(dir, dentry, mode, NULL, NULL, excl);
8bde1547
JB
544}
545
546static int shiftfs_mkdir(struct inode *dir, struct dentry *dentry,
547 umode_t mode)
548{
549 mode |= S_IFDIR;
550
45bd5d34 551 return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
8bde1547
JB
552}
553
554static int shiftfs_link(struct dentry *hardlink, struct inode *dir,
555 struct dentry *dentry)
556{
45bd5d34
CB
557 return shiftfs_create_object(dir, dentry, 0, NULL, hardlink, false);
558}
559
560static int shiftfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
561 dev_t rdev)
562{
563 if (!S_ISFIFO(mode) && !S_ISSOCK(mode))
564 return -EPERM;
565
566 return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
8bde1547
JB
567}
568
569static int shiftfs_symlink(struct inode *dir, struct dentry *dentry,
570 const char *symlink)
571{
45bd5d34 572 return shiftfs_create_object(dir, dentry, S_IFLNK, symlink, NULL, false);
8bde1547
JB
573}
574
575static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir)
576{
45bd5d34
CB
577 struct dentry *lowerd = dentry->d_fsdata;
578 struct inode *loweri = dir->i_private;
0161f2e5 579 struct inode *inode = d_inode(dentry);
8bde1547 580 int err;
45bd5d34 581 const struct cred *oldcred;
8bde1547 582
28017416 583 dget(lowerd);
45bd5d34
CB
584 oldcred = shiftfs_override_creds(dentry->d_sb);
585 inode_lock_nested(loweri, I_MUTEX_PARENT);
8bde1547 586 if (rmdir)
45bd5d34 587 err = vfs_rmdir(loweri, lowerd);
8bde1547 588 else
45bd5d34 589 err = vfs_unlink(loweri, lowerd, NULL);
45bd5d34 590 revert_creds(oldcred);
8bde1547 591
0161f2e5 592 if (!err) {
45bd5d34
CB
593 d_drop(dentry);
594
0161f2e5
CB
595 if (rmdir)
596 clear_nlink(inode);
597 else
598 drop_nlink(inode);
599 }
600 inode_unlock(loweri);
601
602 shiftfs_copyattr(loweri, dir);
28017416 603 dput(lowerd);
8bde1547
JB
604
605 return err;
606}
607
608static int shiftfs_unlink(struct inode *dir, struct dentry *dentry)
609{
610 return shiftfs_rm(dir, dentry, false);
611}
612
613static int shiftfs_rmdir(struct inode *dir, struct dentry *dentry)
614{
615 return shiftfs_rm(dir, dentry, true);
616}
617
618static int shiftfs_rename(struct inode *olddir, struct dentry *old,
619 struct inode *newdir, struct dentry *new,
620 unsigned int flags)
621{
45bd5d34
CB
622 struct dentry *lowerd_dir_old = old->d_parent->d_fsdata,
623 *lowerd_dir_new = new->d_parent->d_fsdata,
624 *lowerd_old = old->d_fsdata, *lowerd_new = new->d_fsdata,
625 *trapd;
626 struct inode *loweri_dir_old = lowerd_dir_old->d_inode,
627 *loweri_dir_new = lowerd_dir_new->d_inode;
8bde1547 628 int err = -EINVAL;
45bd5d34 629 const struct cred *oldcred;
8bde1547 630
45bd5d34 631 trapd = lock_rename(lowerd_dir_new, lowerd_dir_old);
8bde1547 632
45bd5d34 633 if (trapd == lowerd_old || trapd == lowerd_new)
8bde1547
JB
634 goto out_unlock;
635
45bd5d34
CB
636 oldcred = shiftfs_override_creds(old->d_sb);
637 err = vfs_rename(loweri_dir_old, lowerd_old, loweri_dir_new, lowerd_new,
638 NULL, flags);
639 revert_creds(oldcred);
8bde1547 640
45bd5d34
CB
641 shiftfs_copyattr(loweri_dir_old, olddir);
642 shiftfs_copyattr(loweri_dir_new, newdir);
8bde1547 643
45bd5d34
CB
644out_unlock:
645 unlock_rename(lowerd_dir_new, lowerd_dir_old);
8bde1547
JB
646
647 return err;
648}
649
650static struct dentry *shiftfs_lookup(struct inode *dir, struct dentry *dentry,
651 unsigned int flags)
652{
45bd5d34
CB
653 struct dentry *new;
654 struct inode *newi;
655 const struct cred *oldcred;
656 struct dentry *lowerd = dentry->d_parent->d_fsdata;
657 struct inode *inode = NULL, *loweri = lowerd->d_inode;
658
659 inode_lock(loweri);
660 oldcred = shiftfs_override_creds(dentry->d_sb);
661 new = lookup_one_len(dentry->d_name.name, lowerd, dentry->d_name.len);
662 revert_creds(oldcred);
663 inode_unlock(loweri);
8bde1547
JB
664
665 if (IS_ERR(new))
666 return new;
667
668 dentry->d_fsdata = new;
669
45bd5d34
CB
670 newi = new->d_inode;
671 if (!newi)
8bde1547
JB
672 goto out;
673
45bd5d34
CB
674 inode = iget5_locked(dentry->d_sb, (unsigned long)newi,
675 shiftfs_inode_test, shiftfs_inode_set, newi);
676 if (!inode) {
8bde1547
JB
677 dput(new);
678 return ERR_PTR(-ENOMEM);
679 }
45bd5d34
CB
680 if (inode->i_state & I_NEW) {
681 /*
682 * inode->i_private set by shiftfs_inode_set(), but we still
683 * need to take a reference
684 */
685 ihold(newi);
686 shiftfs_fill_inode(inode, newi->i_ino, newi->i_mode, 0, new);
687 unlock_new_inode(inode);
688 }
8bde1547 689
45bd5d34
CB
690out:
691 return d_splice_alias(inode, dentry);
8bde1547
JB
692}
693
694static int shiftfs_permission(struct inode *inode, int mask)
695{
8bde1547 696 int err;
45bd5d34
CB
697 const struct cred *oldcred;
698 struct inode *loweri = inode->i_private;
8bde1547 699
45bd5d34
CB
700 if (!loweri) {
701 WARN_ON(!(mask & MAY_NOT_BLOCK));
8bde1547 702 return -ECHILD;
45bd5d34 703 }
8bde1547 704
45bd5d34
CB
705 err = generic_permission(inode, mask);
706 if (err)
707 return err;
708
709 oldcred = shiftfs_override_creds(inode->i_sb);
710 err = inode_permission(loweri, mask);
711 revert_creds(oldcred);
712
713 return err;
714}
715
716static int shiftfs_fiemap(struct inode *inode,
717 struct fiemap_extent_info *fieinfo, u64 start,
718 u64 len)
719{
720 int err;
721 const struct cred *oldcred;
722 struct inode *loweri = inode->i_private;
723
724 if (!loweri->i_op->fiemap)
725 return -EOPNOTSUPP;
726
727 oldcred = shiftfs_override_creds(inode->i_sb);
728 if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
729 filemap_write_and_wait(loweri->i_mapping);
730 err = loweri->i_op->fiemap(loweri, fieinfo, start, len);
731 revert_creds(oldcred);
732
733 return err;
734}
735
736static int shiftfs_tmpfile(struct inode *dir, struct dentry *dentry,
737 umode_t mode)
738{
739 int err;
740 const struct cred *oldcred;
741 struct dentry *lowerd = dentry->d_fsdata;
742 struct inode *loweri = dir->i_private;
743
744 if (!loweri->i_op->tmpfile)
745 return -EOPNOTSUPP;
746
747 oldcred = shiftfs_override_creds(dir->i_sb);
748 err = loweri->i_op->tmpfile(loweri, lowerd, mode);
749 revert_creds(oldcred);
8bde1547
JB
750
751 return err;
752}
753
754static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr)
755{
45bd5d34
CB
756 struct dentry *lowerd = dentry->d_fsdata;
757 struct inode *loweri = lowerd->d_inode;
8feb2474 758 struct iattr newattr;
45bd5d34 759 const struct cred *oldcred;
8bde1547 760 struct super_block *sb = dentry->d_sb;
70aba758 761 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
8bde1547
JB
762 int err;
763
45bd5d34
CB
764 err = setattr_prepare(dentry, attr);
765 if (err)
766 return err;
767
8feb2474 768 newattr = *attr;
70aba758
SF
769 newattr.ia_uid = shift_kuid(sb->s_user_ns, sbinfo->userns, attr->ia_uid);
770 newattr.ia_gid = shift_kgid(sb->s_user_ns, sbinfo->userns, attr->ia_gid);
8bde1547 771
8feb2474
SF
772 /*
773 * mode change is for clearing setuid/setgid bits. Allow lower fs
774 * to interpret this in its own way.
775 */
776 if (newattr.ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
777 newattr.ia_valid &= ~ATTR_MODE;
778
45bd5d34
CB
779 inode_lock(loweri);
780 oldcred = shiftfs_override_creds(dentry->d_sb);
ce4b8676 781 err = notify_change(lowerd, &newattr, NULL);
45bd5d34
CB
782 revert_creds(oldcred);
783 inode_unlock(loweri);
8bde1547 784
45bd5d34 785 shiftfs_copyattr(loweri, d_inode(dentry));
8bde1547 786
45bd5d34 787 return err;
8bde1547
JB
788}
789
790static int shiftfs_getattr(const struct path *path, struct kstat *stat,
791 u32 request_mask, unsigned int query_flags)
792{
793 struct inode *inode = path->dentry->d_inode;
45bd5d34
CB
794 struct dentry *lowerd = path->dentry->d_fsdata;
795 struct inode *loweri = lowerd->d_inode;
796 struct shiftfs_super_info *info = path->dentry->d_sb->s_fs_info;
797 struct path newpath = { .mnt = info->mnt, .dentry = lowerd };
798 struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
799 struct user_namespace *to_ns = inode->i_sb->s_user_ns;
800 const struct cred *oldcred;
801 int err;
802
803 oldcred = shiftfs_override_creds(inode->i_sb);
804 err = vfs_getattr(&newpath, stat, request_mask, query_flags);
805 revert_creds(oldcred);
8bde1547
JB
806
807 if (err)
808 return err;
809
810 /* transform the underlying id */
45bd5d34
CB
811 stat->uid = shift_kuid(from_ns, to_ns, stat->uid);
812 stat->gid = shift_kgid(from_ns, to_ns, stat->gid);
8bde1547
JB
813 return 0;
814}
815
45bd5d34 816#ifdef CONFIG_SHIFT_FS_POSIX_ACL
8bde1547 817
45bd5d34
CB
818static int
819shift_acl_ids(struct user_namespace *from, struct user_namespace *to,
820 struct posix_acl *acl)
8bde1547 821{
45bd5d34
CB
822 int i;
823
824 for (i = 0; i < acl->a_count; i++) {
825 struct posix_acl_entry *e = &acl->a_entries[i];
826 switch(e->e_tag) {
827 case ACL_USER:
828 e->e_uid = shift_kuid(from, to, e->e_uid);
829 if (!uid_valid(e->e_uid))
830 return -EOVERFLOW;
831 break;
832 case ACL_GROUP:
833 e->e_gid = shift_kgid(from, to, e->e_gid);
834 if (!gid_valid(e->e_gid))
835 return -EOVERFLOW;
836 break;
837 }
838 }
839 return 0;
840}
8bde1547 841
45bd5d34
CB
842static void
843shift_acl_xattr_ids(struct user_namespace *from, struct user_namespace *to,
844 void *value, size_t size)
845{
846 struct posix_acl_xattr_header *header = value;
847 struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
848 int count;
849 kuid_t kuid;
850 kgid_t kgid;
8bde1547 851
45bd5d34
CB
852 if (!value)
853 return;
854 if (size < sizeof(struct posix_acl_xattr_header))
855 return;
856 if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
857 return;
8bde1547 858
45bd5d34
CB
859 count = posix_acl_xattr_count(size);
860 if (count < 0)
861 return;
862 if (count == 0)
863 return;
8bde1547 864
45bd5d34
CB
865 for (end = entry + count; entry != end; entry++) {
866 switch(le16_to_cpu(entry->e_tag)) {
867 case ACL_USER:
868 kuid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
869 kuid = shift_kuid(from, to, kuid);
870 entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, kuid));
871 break;
872 case ACL_GROUP:
873 kgid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
874 kgid = shift_kgid(from, to, kgid);
875 entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, kgid));
876 break;
877 default:
878 break;
879 }
880 }
8bde1547
JB
881}
882
45bd5d34 883static struct posix_acl *shiftfs_get_acl(struct inode *inode, int type)
8bde1547 884{
45bd5d34
CB
885 struct inode *loweri = inode->i_private;
886 const struct cred *oldcred;
887 struct posix_acl *lower_acl, *acl = NULL;
888 struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
889 struct user_namespace *to_ns = inode->i_sb->s_user_ns;
890 int size;
891 int err;
8bde1547 892
45bd5d34
CB
893 if (!IS_POSIXACL(loweri))
894 return NULL;
8bde1547 895
45bd5d34
CB
896 oldcred = shiftfs_override_creds(inode->i_sb);
897 lower_acl = get_acl(loweri, type);
898 revert_creds(oldcred);
8bde1547 899
45bd5d34
CB
900 if (lower_acl && !IS_ERR(lower_acl)) {
901 /* XXX: export posix_acl_clone? */
902 size = sizeof(struct posix_acl) +
903 lower_acl->a_count * sizeof(struct posix_acl_entry);
904 acl = kmemdup(lower_acl, size, GFP_KERNEL);
905 posix_acl_release(lower_acl);
8bde1547 906
45bd5d34
CB
907 if (!acl)
908 return ERR_PTR(-ENOMEM);
8bde1547 909
45bd5d34 910 refcount_set(&acl->a_refcount, 1);
8bde1547 911
45bd5d34
CB
912 err = shift_acl_ids(from_ns, to_ns, acl);
913 if (err) {
914 kfree(acl);
915 return ERR_PTR(err);
916 }
917 }
918
919 return acl;
8bde1547
JB
920}
921
45bd5d34
CB
922static int
923shiftfs_posix_acl_xattr_get(const struct xattr_handler *handler,
924 struct dentry *dentry, struct inode *inode,
925 const char *name, void *buffer, size_t size)
8bde1547 926{
45bd5d34
CB
927 struct inode *loweri = inode->i_private;
928 int ret;
929
930 ret = shiftfs_xattr_get(NULL, dentry, inode, handler->name,
931 buffer, size);
932 if (ret < 0)
933 return ret;
8bde1547 934
45bd5d34
CB
935 inode_lock(loweri);
936 shift_acl_xattr_ids(loweri->i_sb->s_user_ns, inode->i_sb->s_user_ns,
937 buffer, size);
938 inode_unlock(loweri);
939 return ret;
8bde1547
JB
940}
941
45bd5d34
CB
942static int
943shiftfs_posix_acl_xattr_set(const struct xattr_handler *handler,
944 struct dentry *dentry, struct inode *inode,
945 const char *name, const void *value,
946 size_t size, int flags)
947{
948 struct inode *loweri = inode->i_private;
949 int err;
8bde1547 950
45bd5d34
CB
951 if (!IS_POSIXACL(loweri) || !loweri->i_op->set_acl)
952 return -EOPNOTSUPP;
953 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
954 return value ? -EACCES : 0;
955 if (!inode_owner_or_capable(inode))
956 return -EPERM;
957
958 if (value) {
959 shift_acl_xattr_ids(inode->i_sb->s_user_ns,
960 loweri->i_sb->s_user_ns,
961 (void *)value, size);
962 err = shiftfs_setxattr(dentry, inode, handler->name, value,
963 size, flags);
964 } else {
965 err = shiftfs_removexattr(dentry, handler->name);
966 }
8bde1547 967
45bd5d34
CB
968 if (!err)
969 shiftfs_copyattr(loweri, inode);
970
971 return err;
972}
973
974static const struct xattr_handler
975shiftfs_posix_acl_access_xattr_handler = {
976 .name = XATTR_NAME_POSIX_ACL_ACCESS,
977 .flags = ACL_TYPE_ACCESS,
978 .get = shiftfs_posix_acl_xattr_get,
979 .set = shiftfs_posix_acl_xattr_set,
8bde1547
JB
980};
981
45bd5d34
CB
982static const struct xattr_handler
983shiftfs_posix_acl_default_xattr_handler = {
984 .name = XATTR_NAME_POSIX_ACL_DEFAULT,
985 .flags = ACL_TYPE_DEFAULT,
986 .get = shiftfs_posix_acl_xattr_get,
987 .set = shiftfs_posix_acl_xattr_set,
8bde1547
JB
988};
989
45bd5d34 990#else /* !CONFIG_SHIFT_FS_POSIX_ACL */
8bde1547 991
45bd5d34 992#define shiftfs_get_acl NULL
8bde1547 993
45bd5d34 994#endif /* CONFIG_SHIFT_FS_POSIX_ACL */
8bde1547 995
45bd5d34
CB
996static const struct inode_operations shiftfs_dir_inode_operations = {
997 .lookup = shiftfs_lookup,
998 .mkdir = shiftfs_mkdir,
999 .symlink = shiftfs_symlink,
1000 .unlink = shiftfs_unlink,
1001 .rmdir = shiftfs_rmdir,
1002 .rename = shiftfs_rename,
1003 .link = shiftfs_link,
1004 .setattr = shiftfs_setattr,
1005 .create = shiftfs_create,
1006 .mknod = shiftfs_mknod,
1007 .permission = shiftfs_permission,
1008 .getattr = shiftfs_getattr,
1009 .listxattr = shiftfs_listxattr,
1010 .get_acl = shiftfs_get_acl,
1011};
1012
1013static const struct inode_operations shiftfs_file_inode_operations = {
1014 .fiemap = shiftfs_fiemap,
1015 .getattr = shiftfs_getattr,
1016 .get_acl = shiftfs_get_acl,
1017 .listxattr = shiftfs_listxattr,
1018 .permission = shiftfs_permission,
1019 .setattr = shiftfs_setattr,
1020 .tmpfile = shiftfs_tmpfile,
1021};
1022
1023static const struct inode_operations shiftfs_special_inode_operations = {
1024 .getattr = shiftfs_getattr,
1025 .get_acl = shiftfs_get_acl,
1026 .listxattr = shiftfs_listxattr,
1027 .permission = shiftfs_permission,
1028 .setattr = shiftfs_setattr,
1029};
1030
1031static const struct inode_operations shiftfs_symlink_inode_operations = {
1032 .getattr = shiftfs_getattr,
1033 .get_link = shiftfs_get_link,
1034 .listxattr = shiftfs_listxattr,
1035 .setattr = shiftfs_setattr,
1036};
1037
1038static struct file *shiftfs_open_realfile(const struct file *file,
159ec080 1039 struct inode *realinode)
45bd5d34 1040{
159ec080
CB
1041 struct file *realfile;
1042 const struct cred *old_cred;
45bd5d34 1043 struct inode *inode = file_inode(file);
159ec080 1044 struct dentry *lowerd = file->f_path.dentry->d_fsdata;
45bd5d34 1045 struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
159ec080 1046 struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
45bd5d34 1047
159ec080
CB
1048 old_cred = shiftfs_override_creds(inode->i_sb);
1049 realfile = open_with_fake_path(&realpath, file->f_flags, realinode,
1050 info->creator_cred);
1051 revert_creds(old_cred);
45bd5d34 1052
159ec080 1053 return realfile;
45bd5d34
CB
1054}
1055
1056#define SHIFTFS_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
1057
1058static int shiftfs_change_flags(struct file *file, unsigned int flags)
1059{
1060 struct inode *inode = file_inode(file);
1061 int err;
1062
1063 /* if some flag changed that cannot be changed then something's amiss */
1064 if (WARN_ON((file->f_flags ^ flags) & ~SHIFTFS_SETFL_MASK))
1065 return -EIO;
1066
1067 flags &= SHIFTFS_SETFL_MASK;
1068
1069 if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
1070 return -EPERM;
1071
1072 if (flags & O_DIRECT) {
1073 if (!file->f_mapping->a_ops ||
1074 !file->f_mapping->a_ops->direct_IO)
1075 return -EINVAL;
1076 }
1077
1078 if (file->f_op->check_flags) {
1079 err = file->f_op->check_flags(flags);
1080 if (err)
1081 return err;
1082 }
1083
1084 spin_lock(&file->f_lock);
1085 file->f_flags = (file->f_flags & ~SHIFTFS_SETFL_MASK) | flags;
1086 spin_unlock(&file->f_lock);
1087
1088 return 0;
1089}
1090
45bd5d34
CB
1091static int shiftfs_open(struct inode *inode, struct file *file)
1092{
45bd5d34 1093 struct file *realfile;
45bd5d34 1094
159ec080
CB
1095 realfile = shiftfs_open_realfile(file, inode->i_private);
1096 if (IS_ERR(realfile))
45bd5d34 1097 return PTR_ERR(realfile);
45bd5d34 1098
159ec080 1099 file->private_data = realfile;
2813574e
CB
1100 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO. */
1101 file->f_mapping = realfile->f_mapping;
1102
45bd5d34
CB
1103 return 0;
1104}
1105
159ec080 1106static int shiftfs_dir_open(struct inode *inode, struct file *file)
45bd5d34 1107{
159ec080
CB
1108 struct file *realfile;
1109 const struct cred *oldcred;
1110 struct dentry *lowerd = file->f_path.dentry->d_fsdata;
1111 struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
1112 struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
1113
1114 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1115 realfile = dentry_open(&realpath, file->f_flags | O_NOATIME,
1116 info->creator_cred);
1117 revert_creds(oldcred);
1118 if (IS_ERR(realfile))
1119 return PTR_ERR(realfile);
45bd5d34 1120
159ec080 1121 file->private_data = realfile;
45bd5d34 1122
159ec080
CB
1123 return 0;
1124}
1125
1126static int shiftfs_release(struct inode *inode, struct file *file)
1127{
1128 struct file *realfile = file->private_data;
1129
1130 if (realfile)
1131 fput(realfile);
45bd5d34
CB
1132
1133 return 0;
1134}
1135
159ec080
CB
1136static int shiftfs_dir_release(struct inode *inode, struct file *file)
1137{
1138 return shiftfs_release(inode, file);
1139}
1140
f3365a72
CB
1141static loff_t shiftfs_dir_llseek(struct file *file, loff_t offset, int whence)
1142{
159ec080 1143 struct file *realfile = file->private_data;
f3365a72
CB
1144
1145 return vfs_llseek(realfile, offset, whence);
1146}
1147
1148static loff_t shiftfs_file_llseek(struct file *file, loff_t offset, int whence)
45bd5d34
CB
1149{
1150 struct inode *realinode = file_inode(file)->i_private;
1151
1152 return generic_file_llseek_size(file, offset, whence,
1153 realinode->i_sb->s_maxbytes,
1154 i_size_read(realinode));
1155}
1156
1157/* XXX: Need to figure out what to to about atime updates, maybe other
1158 * timestamps too ... ref. ovl_file_accessed() */
1159
1160static rwf_t shiftfs_iocb_to_rwf(struct kiocb *iocb)
1161{
1162 int ifl = iocb->ki_flags;
1163 rwf_t flags = 0;
1164
1165 if (ifl & IOCB_NOWAIT)
1166 flags |= RWF_NOWAIT;
1167 if (ifl & IOCB_HIPRI)
1168 flags |= RWF_HIPRI;
1169 if (ifl & IOCB_DSYNC)
1170 flags |= RWF_DSYNC;
1171 if (ifl & IOCB_SYNC)
1172 flags |= RWF_SYNC;
1173
1174 return flags;
1175}
1176
294b3b78
CB
1177static int shiftfs_real_fdget(const struct file *file, struct fd *lowerfd)
1178{
1179 struct file *realfile;
1180
1181 if (file->f_op->open != shiftfs_open &&
1182 file->f_op->open != shiftfs_dir_open)
1183 return -EINVAL;
1184
1185 realfile = file->private_data;
1186 lowerfd->flags = 0;
1187 lowerfd->file = realfile;
1188
1189 /* Did the flags change since open? */
1190 if (unlikely(file->f_flags & ~lowerfd->file->f_flags))
1191 return shiftfs_change_flags(lowerfd->file, file->f_flags);
1192
1193 return 0;
1194}
1195
45bd5d34
CB
1196static ssize_t shiftfs_read_iter(struct kiocb *iocb, struct iov_iter *iter)
1197{
1198 struct file *file = iocb->ki_filp;
1199 struct fd lowerfd;
1200 const struct cred *oldcred;
1201 ssize_t ret;
1202
1203 if (!iov_iter_count(iter))
1204 return 0;
1205
1206 ret = shiftfs_real_fdget(file, &lowerfd);
1207 if (ret)
1208 return ret;
1209
1210 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1211 ret = vfs_iter_read(lowerfd.file, iter, &iocb->ki_pos,
1212 shiftfs_iocb_to_rwf(iocb));
1213 revert_creds(oldcred);
1214
1215 shiftfs_file_accessed(file);
1216
1217 fdput(lowerfd);
1218 return ret;
1219}
1220
1221static ssize_t shiftfs_write_iter(struct kiocb *iocb, struct iov_iter *iter)
1222{
1223 struct file *file = iocb->ki_filp;
1224 struct inode *inode = file_inode(file);
1225 struct fd lowerfd;
1226 const struct cred *oldcred;
1227 ssize_t ret;
1228
1229 if (!iov_iter_count(iter))
1230 return 0;
1231
1232 inode_lock(inode);
1233 /* Update mode */
1234 shiftfs_copyattr(inode->i_private, inode);
1235 ret = file_remove_privs(file);
1236 if (ret)
1237 goto out_unlock;
1238
1239 ret = shiftfs_real_fdget(file, &lowerfd);
1240 if (ret)
1241 goto out_unlock;
1242
1243 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1244 file_start_write(lowerfd.file);
1245 ret = vfs_iter_write(lowerfd.file, iter, &iocb->ki_pos,
1246 shiftfs_iocb_to_rwf(iocb));
1247 file_end_write(lowerfd.file);
1248 revert_creds(oldcred);
1249
1250 /* Update size */
1251 shiftfs_copyattr(inode->i_private, inode);
1252
1253 fdput(lowerfd);
1254
1255out_unlock:
1256 inode_unlock(inode);
1257 return ret;
1258}
1259
1260static int shiftfs_fsync(struct file *file, loff_t start, loff_t end,
1261 int datasync)
1262{
1263 struct fd lowerfd;
1264 const struct cred *oldcred;
1265 int ret;
1266
1267 ret = shiftfs_real_fdget(file, &lowerfd);
1268 if (ret)
1269 return ret;
1270
1271 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1272 ret = vfs_fsync_range(lowerfd.file, start, end, datasync);
1273 revert_creds(oldcred);
1274
1275 fdput(lowerfd);
1276 return ret;
1277}
1278
1279static int shiftfs_mmap(struct file *file, struct vm_area_struct *vma)
1280{
159ec080 1281 struct file *realfile = file->private_data;
45bd5d34
CB
1282 const struct cred *oldcred;
1283 int ret;
1284
1285 if (!realfile->f_op->mmap)
1286 return -ENODEV;
1287
1288 if (WARN_ON(file != vma->vm_file))
1289 return -EIO;
1290
1291 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1292 vma->vm_file = get_file(realfile);
1293 ret = call_mmap(vma->vm_file, vma);
1294 revert_creds(oldcred);
1295
1296 shiftfs_file_accessed(file);
1297
4341d573
SF
1298 if (ret) {
1299 /*
1300 * Drop refcount from new vm_file value and restore original
1301 * vm_file value
1302 */
1303 vma->vm_file = file;
1304 fput(realfile);
1305 } else {
1306 /* Drop refcount from previous vm_file value */
1307 fput(file);
1308 }
45bd5d34
CB
1309
1310 return ret;
1311}
1312
1313static long shiftfs_fallocate(struct file *file, int mode, loff_t offset,
1314 loff_t len)
1315{
1316 struct inode *inode = file_inode(file);
1317 struct inode *loweri = inode->i_private;
1318 struct fd lowerfd;
1319 const struct cred *oldcred;
1320 int ret;
1321
1322 ret = shiftfs_real_fdget(file, &lowerfd);
1323 if (ret)
1324 return ret;
1325
1326 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1327 ret = vfs_fallocate(lowerfd.file, mode, offset, len);
1328 revert_creds(oldcred);
1329
1330 /* Update size */
1331 shiftfs_copyattr(loweri, inode);
1332
1333 fdput(lowerfd);
1334 return ret;
1335}
1336
1337static int shiftfs_fadvise(struct file *file, loff_t offset, loff_t len,
1338 int advice)
1339{
1340 struct fd lowerfd;
1341 const struct cred *oldcred;
1342 int ret;
1343
1344 ret = shiftfs_real_fdget(file, &lowerfd);
1345 if (ret)
1346 return ret;
1347
1348 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1349 ret = vfs_fadvise(lowerfd.file, offset, len, advice);
1350 revert_creds(oldcred);
1351
1352 fdput(lowerfd);
1353 return ret;
1354}
1355
f91b73d4 1356static int shiftfs_override_ioctl_creds(int cmd, const struct super_block *sb,
45bd5d34
CB
1357 const struct cred **oldcred,
1358 struct cred **newcred)
1359{
70aba758 1360 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
45bd5d34
CB
1361 kuid_t fsuid = current_fsuid();
1362 kgid_t fsgid = current_fsgid();
1363
1364 *oldcred = shiftfs_override_creds(sb);
1365
1366 *newcred = prepare_creds();
1367 if (!*newcred) {
1368 revert_creds(*oldcred);
1369 return -ENOMEM;
1370 }
1371
70aba758
SF
1372 (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid);
1373 (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid);
45bd5d34
CB
1374
1375 /* clear all caps to prevent bypassing capable() checks */
1376 cap_clear((*newcred)->cap_bset);
1377 cap_clear((*newcred)->cap_effective);
1378 cap_clear((*newcred)->cap_inheritable);
1379 cap_clear((*newcred)->cap_permitted);
1380
f91b73d4
CB
1381 if (cmd == BTRFS_IOC_SNAP_DESTROY) {
1382 kuid_t kuid_root = make_kuid(sb->s_user_ns, 0);
1383 /*
1384 * Allow the root user in the container to remove subvolumes
1385 * from other users.
1386 */
1387 if (uid_valid(kuid_root) && uid_eq(fsuid, kuid_root))
1388 cap_raise((*newcred)->cap_effective, CAP_DAC_OVERRIDE);
1389 }
1390
45bd5d34
CB
1391 put_cred(override_creds(*newcred));
1392 return 0;
1393}
1394
1395static inline void shiftfs_revert_ioctl_creds(const struct cred *oldcred,
1396 struct cred *newcred)
1397{
1398 return shiftfs_revert_object_creds(oldcred, newcred);
1399}
1400
1c1ca807
CB
1401static inline bool is_btrfs_snap_ioctl(int cmd)
1402{
1403 if ((cmd == BTRFS_IOC_SNAP_CREATE) || (cmd == BTRFS_IOC_SNAP_CREATE_V2))
1404 return true;
1405
1406 return false;
1407}
1408
f7e373a5 1409static int shiftfs_btrfs_ioctl_fd_restore(int cmd, int fd, void __user *arg,
1c1ca807
CB
1410 struct btrfs_ioctl_vol_args *v1,
1411 struct btrfs_ioctl_vol_args_v2 *v2)
1412{
1413 int ret;
1414
1415 if (!is_btrfs_snap_ioctl(cmd))
1416 return 0;
1417
1418 if (cmd == BTRFS_IOC_SNAP_CREATE)
1419 ret = copy_to_user(arg, v1, sizeof(*v1));
1420 else
1421 ret = copy_to_user(arg, v2, sizeof(*v2));
1422
c6843acc 1423 close_fd(fd);
1c1ca807
CB
1424 kfree(v1);
1425 kfree(v2);
1426
e11c9078 1427 return ret ? -EFAULT: 0;
1c1ca807
CB
1428}
1429
1430static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg,
1431 struct btrfs_ioctl_vol_args **b1,
1432 struct btrfs_ioctl_vol_args_v2 **b2,
1c1ca807
CB
1433 int *newfd)
1434{
1435 int oldfd, ret;
1436 struct fd src;
f7e373a5 1437 struct fd lfd = {};
1c1ca807
CB
1438 struct btrfs_ioctl_vol_args *v1 = NULL;
1439 struct btrfs_ioctl_vol_args_v2 *v2 = NULL;
1440
eb00293d
SF
1441 *b1 = NULL;
1442 *b2 = NULL;
1443
1c1ca807
CB
1444 if (!is_btrfs_snap_ioctl(cmd))
1445 return 0;
1446
1447 if (cmd == BTRFS_IOC_SNAP_CREATE) {
1448 v1 = memdup_user(arg, sizeof(*v1));
1449 if (IS_ERR(v1))
1450 return PTR_ERR(v1);
1451 oldfd = v1->fd;
1c1ca807
CB
1452 } else {
1453 v2 = memdup_user(arg, sizeof(*v2));
1454 if (IS_ERR(v2))
1455 return PTR_ERR(v2);
1456 oldfd = v2->fd;
1c1ca807
CB
1457 }
1458
1459 src = fdget(oldfd);
eb00293d
SF
1460 if (!src.file) {
1461 ret = -EINVAL;
1462 goto err_free;
1463 }
1c1ca807 1464
f7e373a5
SF
1465 ret = shiftfs_real_fdget(src.file, &lfd);
1466 if (ret) {
1467 fdput(src);
eb00293d 1468 goto err_free;
f7e373a5
SF
1469 }
1470
1471 /*
1472 * shiftfs_real_fdget() does not take a reference to lfd.file, so
1473 * take a reference here to offset the one which will be put by
c6843acc 1474 * close_fd(), and make sure that reference is put on fdput(lfd).
f7e373a5
SF
1475 */
1476 get_file(lfd.file);
1477 lfd.flags |= FDPUT_FPUT;
1478 fdput(src);
1c1ca807 1479
f7e373a5 1480 *newfd = get_unused_fd_flags(lfd.file->f_flags);
1c1ca807 1481 if (*newfd < 0) {
f7e373a5 1482 fdput(lfd);
eb00293d
SF
1483 ret = *newfd;
1484 goto err_free;
1c1ca807
CB
1485 }
1486
f7e373a5 1487 fd_install(*newfd, lfd.file);
1c1ca807
CB
1488
1489 if (cmd == BTRFS_IOC_SNAP_CREATE) {
1490 v1->fd = *newfd;
1491 ret = copy_to_user(arg, v1, sizeof(*v1));
1492 v1->fd = oldfd;
1493 } else {
1494 v2->fd = *newfd;
1495 ret = copy_to_user(arg, v2, sizeof(*v2));
1496 v2->fd = oldfd;
1497 }
1498
eb00293d
SF
1499 if (!ret) {
1500 *b1 = v1;
1501 *b2 = v2;
1502 } else {
f7e373a5 1503 shiftfs_btrfs_ioctl_fd_restore(cmd, *newfd, arg, v1, v2);
e11c9078 1504 ret = -EFAULT;
eb00293d
SF
1505 }
1506
1507 return ret;
1508
1509err_free:
1510 kfree(v1);
1511 kfree(v2);
1c1ca807
CB
1512
1513 return ret;
1514}
1515
45bd5d34
CB
1516static long shiftfs_real_ioctl(struct file *file, unsigned int cmd,
1517 unsigned long arg)
1518{
45bd5d34
CB
1519 struct fd lowerfd;
1520 struct cred *newcred;
1521 const struct cred *oldcred;
1c1ca807
CB
1522 int newfd = -EBADF;
1523 long err = 0, ret = 0;
1524 void __user *argp = (void __user *)arg;
45bd5d34 1525 struct super_block *sb = file->f_path.dentry->d_sb;
1c1ca807
CB
1526 struct btrfs_ioctl_vol_args *btrfs_v1 = NULL;
1527 struct btrfs_ioctl_vol_args_v2 *btrfs_v2 = NULL;
1528
1529 ret = shiftfs_btrfs_ioctl_fd_replace(cmd, argp, &btrfs_v1, &btrfs_v2,
f7e373a5 1530 &newfd);
1c1ca807
CB
1531 if (ret < 0)
1532 return ret;
45bd5d34
CB
1533
1534 ret = shiftfs_real_fdget(file, &lowerfd);
1535 if (ret)
1c1ca807 1536 goto out_restore;
45bd5d34 1537
f91b73d4 1538 ret = shiftfs_override_ioctl_creds(cmd, sb, &oldcred, &newcred);
45bd5d34
CB
1539 if (ret)
1540 goto out_fdput;
1541
1542 ret = vfs_ioctl(lowerfd.file, cmd, arg);
1543
1544 shiftfs_revert_ioctl_creds(oldcred, newcred);
1545
1546 shiftfs_copyattr(file_inode(lowerfd.file), file_inode(file));
1547 shiftfs_copyflags(file_inode(lowerfd.file), file_inode(file));
1548
1549out_fdput:
1550 fdput(lowerfd);
1551
1c1ca807 1552out_restore:
f7e373a5 1553 err = shiftfs_btrfs_ioctl_fd_restore(cmd, newfd, argp,
1c1ca807
CB
1554 btrfs_v1, btrfs_v2);
1555 if (!ret)
1556 ret = err;
1557
45bd5d34
CB
1558 return ret;
1559}
1560
d1aed12c 1561static bool in_ioctl_whitelist(int flag, unsigned long arg)
1c1ca807 1562{
d1aed12c
CB
1563 void __user *argp = (void __user *)arg;
1564 u64 flags = 0;
1565
1c1ca807 1566 switch (flag) {
d1aed12c
CB
1567 case BTRFS_IOC_FS_INFO:
1568 return true;
1c1ca807
CB
1569 case BTRFS_IOC_SNAP_CREATE:
1570 return true;
1571 case BTRFS_IOC_SNAP_CREATE_V2:
1572 return true;
1573 case BTRFS_IOC_SUBVOL_CREATE:
1574 return true;
1575 case BTRFS_IOC_SUBVOL_CREATE_V2:
d1aed12c
CB
1576 return true;
1577 case BTRFS_IOC_SUBVOL_GETFLAGS:
1578 return true;
1579 case BTRFS_IOC_SUBVOL_SETFLAGS:
2af2b593 1580 if (copy_from_user(&flags, argp, sizeof(flags)))
d1aed12c
CB
1581 return false;
1582
1583 if (flags & ~BTRFS_SUBVOL_RDONLY)
1584 return false;
1585
1c1ca807
CB
1586 return true;
1587 case BTRFS_IOC_SNAP_DESTROY:
1588 return true;
1589 }
1590
1591 return false;
1592}
1593
45bd5d34
CB
1594static long shiftfs_ioctl(struct file *file, unsigned int cmd,
1595 unsigned long arg)
1596{
1597 switch (cmd) {
1598 case FS_IOC_GETVERSION:
1599 /* fall through */
1600 case FS_IOC_GETFLAGS:
1601 /* fall through */
1602 case FS_IOC_SETFLAGS:
1603 break;
1604 default:
d1aed12c 1605 if (!in_ioctl_whitelist(cmd, arg) ||
1c1ca807
CB
1606 !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1607 return -ENOTTY;
45bd5d34
CB
1608 }
1609
1610 return shiftfs_real_ioctl(file, cmd, arg);
1611}
1612
1613static long shiftfs_compat_ioctl(struct file *file, unsigned int cmd,
1614 unsigned long arg)
1615{
1616 switch (cmd) {
1617 case FS_IOC32_GETVERSION:
1618 /* fall through */
1619 case FS_IOC32_GETFLAGS:
1620 /* fall through */
1621 case FS_IOC32_SETFLAGS:
1622 break;
1623 default:
d1aed12c 1624 if (!in_ioctl_whitelist(cmd, arg) ||
1c1ca807
CB
1625 !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1626 return -ENOIOCTLCMD;
45bd5d34
CB
1627 }
1628
1629 return shiftfs_real_ioctl(file, cmd, arg);
1630}
1631
1632enum shiftfs_copyop {
1633 SHIFTFS_COPY,
1634 SHIFTFS_CLONE,
1635 SHIFTFS_DEDUPE,
1636};
1637
1638static ssize_t shiftfs_copyfile(struct file *file_in, loff_t pos_in,
1639 struct file *file_out, loff_t pos_out, u64 len,
1640 unsigned int flags, enum shiftfs_copyop op)
1641{
1642 ssize_t ret;
1643 struct fd real_in, real_out;
1644 const struct cred *oldcred;
1645 struct inode *inode_out = file_inode(file_out);
1646 struct inode *loweri = inode_out->i_private;
1647
1648 ret = shiftfs_real_fdget(file_out, &real_out);
1649 if (ret)
1650 return ret;
1651
1652 ret = shiftfs_real_fdget(file_in, &real_in);
1653 if (ret) {
1654 fdput(real_out);
1655 return ret;
1656 }
1657
1658 oldcred = shiftfs_override_creds(inode_out->i_sb);
1659 switch (op) {
1660 case SHIFTFS_COPY:
1661 ret = vfs_copy_file_range(real_in.file, pos_in, real_out.file,
1662 pos_out, len, flags);
1663 break;
1664
1665 case SHIFTFS_CLONE:
1666 ret = vfs_clone_file_range(real_in.file, pos_in, real_out.file,
1667 pos_out, len, flags);
1668 break;
1669
1670 case SHIFTFS_DEDUPE:
1671 ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
1672 real_out.file, pos_out, len,
1673 flags);
1674 break;
1675 }
1676 revert_creds(oldcred);
1677
1678 /* Update size */
1679 shiftfs_copyattr(loweri, inode_out);
1680
1681 fdput(real_in);
1682 fdput(real_out);
1683
1684 return ret;
1685}
1686
1687static ssize_t shiftfs_copy_file_range(struct file *file_in, loff_t pos_in,
1688 struct file *file_out, loff_t pos_out,
1689 size_t len, unsigned int flags)
1690{
1691 return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
1692 SHIFTFS_COPY);
1693}
1694
1695static loff_t shiftfs_remap_file_range(struct file *file_in, loff_t pos_in,
1696 struct file *file_out, loff_t pos_out,
1697 loff_t len, unsigned int remap_flags)
1698{
1699 enum shiftfs_copyop op;
1700
1701 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
1702 return -EINVAL;
1703
1704 if (remap_flags & REMAP_FILE_DEDUP)
1705 op = SHIFTFS_DEDUPE;
1706 else
1707 op = SHIFTFS_CLONE;
1708
1709 return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len,
1710 remap_flags, op);
1711}
1712
1713static int shiftfs_iterate_shared(struct file *file, struct dir_context *ctx)
1714{
1715 const struct cred *oldcred;
1716 int err = -ENOTDIR;
159ec080 1717 struct file *realfile = file->private_data;
45bd5d34
CB
1718
1719 oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1720 err = iterate_dir(realfile, ctx);
1721 revert_creds(oldcred);
1722
1723 return err;
1724}
1725
1726const struct file_operations shiftfs_file_operations = {
1727 .open = shiftfs_open,
1728 .release = shiftfs_release,
f3365a72 1729 .llseek = shiftfs_file_llseek,
45bd5d34
CB
1730 .read_iter = shiftfs_read_iter,
1731 .write_iter = shiftfs_write_iter,
1732 .fsync = shiftfs_fsync,
1733 .mmap = shiftfs_mmap,
1734 .fallocate = shiftfs_fallocate,
1735 .fadvise = shiftfs_fadvise,
1736 .unlocked_ioctl = shiftfs_ioctl,
1737 .compat_ioctl = shiftfs_compat_ioctl,
1738 .copy_file_range = shiftfs_copy_file_range,
1739 .remap_file_range = shiftfs_remap_file_range,
1740};
1741
1742const struct file_operations shiftfs_dir_operations = {
159ec080
CB
1743 .open = shiftfs_dir_open,
1744 .release = shiftfs_dir_release,
45bd5d34
CB
1745 .compat_ioctl = shiftfs_compat_ioctl,
1746 .fsync = shiftfs_fsync,
1747 .iterate_shared = shiftfs_iterate_shared,
f3365a72 1748 .llseek = shiftfs_dir_llseek,
45bd5d34 1749 .read = generic_read_dir,
45bd5d34
CB
1750 .unlocked_ioctl = shiftfs_ioctl,
1751};
1752
1753static const struct address_space_operations shiftfs_aops = {
1754 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
1755 .direct_IO = noop_direct_IO,
1756};
1757
1758static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
1759 umode_t mode, dev_t dev, struct dentry *dentry)
1760{
1761 struct inode *loweri;
1762
1763 inode->i_ino = ino;
1764 inode->i_flags |= S_NOCMTIME;
1765
1766 mode &= S_IFMT;
1767 inode->i_mode = mode;
1768 switch (mode & S_IFMT) {
1769 case S_IFDIR:
1770 inode->i_op = &shiftfs_dir_inode_operations;
1771 inode->i_fop = &shiftfs_dir_operations;
1772 break;
1773 case S_IFLNK:
1774 inode->i_op = &shiftfs_symlink_inode_operations;
1775 break;
1776 case S_IFREG:
1777 inode->i_op = &shiftfs_file_inode_operations;
1778 inode->i_fop = &shiftfs_file_operations;
1779 inode->i_mapping->a_ops = &shiftfs_aops;
1780 break;
1781 default:
1782 inode->i_op = &shiftfs_special_inode_operations;
1783 init_special_inode(inode, mode, dev);
1784 break;
1785 }
1786
1787 if (!dentry)
1788 return;
1789
1790 loweri = dentry->d_inode;
1791 if (!loweri->i_op->get_link)
1792 inode->i_opflags |= IOP_NOFOLLOW;
1793
1794 shiftfs_copyattr(loweri, inode);
1795 shiftfs_copyflags(loweri, inode);
1796 set_nlink(inode, loweri->i_nlink);
1797}
1798
1799static int shiftfs_show_options(struct seq_file *m, struct dentry *dentry)
1800{
1801 struct super_block *sb = dentry->d_sb;
1802 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1803
1804 if (sbinfo->mark)
1805 seq_show_option(m, "mark", NULL);
1806
1807 if (sbinfo->passthrough)
1808 seq_printf(m, ",passthrough=%u", sbinfo->passthrough);
1809
1810 return 0;
1811}
1812
1813static int shiftfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1814{
1815 struct super_block *sb = dentry->d_sb;
1816 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1817 struct dentry *root = sb->s_root;
1818 struct dentry *realroot = root->d_fsdata;
1819 struct path realpath = { .mnt = sbinfo->mnt, .dentry = realroot };
1820 int err;
1821
1822 err = vfs_statfs(&realpath, buf);
8bde1547 1823 if (err)
45bd5d34 1824 return err;
8bde1547 1825
45bd5d34
CB
1826 if (!shiftfs_passthrough_statfs(sbinfo))
1827 buf->f_type = sb->s_magic;
8bde1547 1828
45bd5d34
CB
1829 return 0;
1830}
8bde1547 1831
45bd5d34
CB
1832static void shiftfs_evict_inode(struct inode *inode)
1833{
1834 struct inode *loweri = inode->i_private;
1835
1836 clear_inode(inode);
1837
1838 if (loweri)
1839 iput(loweri);
1840}
1841
1842static void shiftfs_put_super(struct super_block *sb)
1843{
1844 struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1845
1846 if (sbinfo) {
1847 mntput(sbinfo->mnt);
1848 put_cred(sbinfo->creator_cred);
1849 kfree(sbinfo);
1850 }
1851}
1852
1853static const struct xattr_handler shiftfs_xattr_handler = {
1854 .prefix = "",
1855 .get = shiftfs_xattr_get,
1856 .set = shiftfs_xattr_set,
1857};
1858
1859const struct xattr_handler *shiftfs_xattr_handlers[] = {
1860#ifdef CONFIG_SHIFT_FS_POSIX_ACL
1861 &shiftfs_posix_acl_access_xattr_handler,
1862 &shiftfs_posix_acl_default_xattr_handler,
1863#endif
1864 &shiftfs_xattr_handler,
1865 NULL
1866};
1867
1868static inline bool passthrough_is_subset(int old_flags, int new_flags)
1869{
1870 if ((new_flags & old_flags) != new_flags)
1871 return false;
1872
1873 return true;
1874}
1875
e36a1be6
CB
1876static int shiftfs_super_check_flags(unsigned long old_flags,
1877 unsigned long new_flags)
1878{
1879 if ((old_flags & SB_RDONLY) && !(new_flags & SB_RDONLY))
1880 return -EPERM;
1881
1882 if ((old_flags & SB_NOSUID) && !(new_flags & SB_NOSUID))
1883 return -EPERM;
1884
1885 if ((old_flags & SB_NODEV) && !(new_flags & SB_NODEV))
1886 return -EPERM;
1887
1888 if ((old_flags & SB_NOEXEC) && !(new_flags & SB_NOEXEC))
1889 return -EPERM;
1890
1891 if ((old_flags & SB_NOATIME) && !(new_flags & SB_NOATIME))
1892 return -EPERM;
1893
1894 if ((old_flags & SB_NODIRATIME) && !(new_flags & SB_NODIRATIME))
1895 return -EPERM;
1896
1897 if (!(old_flags & SB_POSIXACL) && (new_flags & SB_POSIXACL))
1898 return -EPERM;
1899
1900 return 0;
1901}
1902
45bd5d34
CB
1903static int shiftfs_remount(struct super_block *sb, int *flags, char *data)
1904{
1905 int err;
1906 struct shiftfs_super_info new = {};
1907 struct shiftfs_super_info *info = sb->s_fs_info;
1908
1909 err = shiftfs_parse_mount_options(&new, data);
8bde1547 1910 if (err)
45bd5d34
CB
1911 return err;
1912
e36a1be6
CB
1913 err = shiftfs_super_check_flags(sb->s_flags, *flags);
1914 if (err)
1915 return err;
1916
45bd5d34
CB
1917 /* Mark mount option cannot be changed. */
1918 if (info->mark || (info->mark != new.mark))
1919 return -EPERM;
1920
1921 if (info->passthrough != new.passthrough) {
1922 /* Don't allow exceeding passthrough options of mark mount. */
d6d053b7 1923 if (!passthrough_is_subset(info->passthrough_mark,
45bd5d34
CB
1924 info->passthrough))
1925 return -EPERM;
1926
1927 info->passthrough = new.passthrough;
1928 }
1929
1930 return 0;
1931}
8bde1547 1932
45bd5d34
CB
1933static const struct super_operations shiftfs_super_ops = {
1934 .put_super = shiftfs_put_super,
1935 .show_options = shiftfs_show_options,
1936 .statfs = shiftfs_statfs,
1937 .remount_fs = shiftfs_remount,
1938 .evict_inode = shiftfs_evict_inode,
1939};
1940
1941struct shiftfs_data {
1942 void *data;
1943 const char *path;
1944};
1945
e36a1be6
CB
1946static void shiftfs_super_force_flags(struct super_block *sb,
1947 unsigned long lower_flags)
1948{
1949 sb->s_flags |= lower_flags & (SB_RDONLY | SB_NOSUID | SB_NODEV |
1950 SB_NOEXEC | SB_NOATIME | SB_NODIRATIME);
1951
1952 if (!(lower_flags & SB_POSIXACL))
1953 sb->s_flags &= ~SB_POSIXACL;
1954}
1955
45bd5d34
CB
1956static int shiftfs_fill_super(struct super_block *sb, void *raw_data,
1957 int silent)
1958{
1959 int err;
1960 struct path path = {};
1961 struct shiftfs_super_info *sbinfo_mp;
1962 char *name = NULL;
1963 struct inode *inode = NULL;
1964 struct dentry *dentry = NULL;
1965 struct shiftfs_data *data = raw_data;
1966 struct shiftfs_super_info *sbinfo = NULL;
1967
1968 if (!data->path)
1969 return -EINVAL;
1970
1971 sb->s_fs_info = kzalloc(sizeof(*sbinfo), GFP_KERNEL);
1972 if (!sb->s_fs_info)
1973 return -ENOMEM;
1974 sbinfo = sb->s_fs_info;
1975
1976 err = shiftfs_parse_mount_options(sbinfo, data->data);
1977 if (err)
1978 return err;
1979
1980 /* to mount a mark, must be userns admin */
1981 if (!sbinfo->mark && !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
1982 return -EPERM;
1983
1984 name = kstrdup(data->path, GFP_KERNEL);
1985 if (!name)
1986 return -ENOMEM;
1987
1988 err = kern_path(name, LOOKUP_FOLLOW, &path);
1989 if (err)
1990 goto out_free_name;
8bde1547
JB
1991
1992 if (!S_ISDIR(path.dentry->d_inode->i_mode)) {
1993 err = -ENOTDIR;
45bd5d34 1994 goto out_put_path;
8bde1547
JB
1995 }
1996
e36a1be6
CB
1997 sb->s_flags |= SB_POSIXACL;
1998
45bd5d34 1999 if (sbinfo->mark) {
4f28efed 2000 struct cred *cred_tmp;
45bd5d34
CB
2001 struct super_block *lower_sb = path.mnt->mnt_sb;
2002
2003 /* to mark a mount point, must root wrt lower s_user_ns */
2004 if (!ns_capable(lower_sb->s_user_ns, CAP_SYS_ADMIN)) {
2005 err = -EPERM;
2006 goto out_put_path;
2007 }
8bde1547 2008
8bde1547
JB
2009 /*
2010 * this part is visible unshifted, so make sure no
2011 * executables that could be used to give suid
2012 * privileges
2013 */
2014 sb->s_iflags = SB_I_NOEXEC;
8bde1547 2015
e36a1be6
CB
2016 shiftfs_super_force_flags(sb, lower_sb->s_flags);
2017
8bde1547 2018 /*
45bd5d34
CB
2019 * Handle nesting of shiftfs mounts by referring this mark
2020 * mount back to the original mark mount. This is more
2021 * efficient and alleviates concerns about stack depth.
8bde1547 2022 */
45bd5d34
CB
2023 if (lower_sb->s_magic == SHIFTFS_MAGIC) {
2024 sbinfo_mp = lower_sb->s_fs_info;
2025
2026 /* Doesn't make sense to mark a mark mount */
2027 if (sbinfo_mp->mark) {
2028 err = -EINVAL;
2029 goto out_put_path;
2030 }
2031
2032 if (!passthrough_is_subset(sbinfo_mp->passthrough,
2033 sbinfo->passthrough)) {
2034 err = -EPERM;
2035 goto out_put_path;
2036 }
2037
2038 sbinfo->mnt = mntget(sbinfo_mp->mnt);
2039 dentry = dget(path.dentry->d_fsdata);
d6d053b7
CB
2040 /*
2041 * Copy up the passthrough mount options from the
2042 * parent mark mountpoint.
2043 */
2044 sbinfo->passthrough_mark = sbinfo_mp->passthrough_mark;
e33fe991 2045 sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred);
45bd5d34
CB
2046 } else {
2047 sbinfo->mnt = mntget(path.mnt);
2048 dentry = dget(path.dentry);
d6d053b7
CB
2049 /*
2050 * For a new mark passthrough_mark and passthrough
2051 * are identical.
2052 */
2053 sbinfo->passthrough_mark = sbinfo->passthrough;
45bd5d34 2054
e33fe991
CB
2055 cred_tmp = prepare_creds();
2056 if (!cred_tmp) {
2057 err = -ENOMEM;
2058 goto out_put_path;
2059 }
2060 /* Don't override disk quota limits or use reserved space. */
2061 cap_lower(cred_tmp->cap_effective, CAP_SYS_RESOURCE);
2062 sbinfo->creator_cred = cred_tmp;
45bd5d34
CB
2063 }
2064 } else {
2065 /*
2066 * This leg executes if we're admin capable in the namespace,
2067 * so be very careful.
2068 */
2069 err = -EPERM;
8bde1547 2070 if (path.dentry->d_sb->s_magic != SHIFTFS_MAGIC)
45bd5d34
CB
2071 goto out_put_path;
2072
2073 sbinfo_mp = path.dentry->d_sb->s_fs_info;
2074 if (!sbinfo_mp->mark)
2075 goto out_put_path;
2076
2077 if (!passthrough_is_subset(sbinfo_mp->passthrough,
2078 sbinfo->passthrough))
2079 goto out_put_path;
2080
2081 sbinfo->mnt = mntget(sbinfo_mp->mnt);
2082 sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred);
8bde1547 2083 dentry = dget(path.dentry->d_fsdata);
d6d053b7
CB
2084 /*
2085 * Copy up passthrough settings from mark mountpoint so we can
2086 * verify when the overlay wants to remount with different
2087 * passthrough settings.
2088 */
2089 sbinfo->passthrough_mark = sbinfo_mp->passthrough;
e36a1be6 2090 shiftfs_super_force_flags(sb, path.mnt->mnt_sb->s_flags);
45bd5d34
CB
2091 }
2092
2093 sb->s_stack_depth = dentry->d_sb->s_stack_depth + 1;
2094 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
2095 printk(KERN_ERR "shiftfs: maximum stacking depth exceeded\n");
2096 err = -EINVAL;
2097 goto out_put_path;
2098 }
2099
2100 inode = new_inode(sb);
2101 if (!inode) {
2102 err = -ENOMEM;
2103 goto out_put_path;
8bde1547 2104 }
45bd5d34
CB
2105 shiftfs_fill_inode(inode, dentry->d_inode->i_ino, S_IFDIR, 0, dentry);
2106
2107 ihold(dentry->d_inode);
2108 inode->i_private = dentry->d_inode;
2109
8bde1547 2110 sb->s_magic = SHIFTFS_MAGIC;
f9619714 2111 sb->s_maxbytes = MAX_LFS_FILESIZE;
8bde1547
JB
2112 sb->s_op = &shiftfs_super_ops;
2113 sb->s_xattr = shiftfs_xattr_handlers;
2114 sb->s_d_op = &shiftfs_dentry_ops;
45bd5d34
CB
2115 sb->s_root = d_make_root(inode);
2116 if (!sb->s_root) {
2117 err = -ENOMEM;
2118 goto out_put_path;
2119 }
2120
8bde1547 2121 sb->s_root->d_fsdata = dentry;
45bd5d34
CB
2122 sbinfo->userns = get_user_ns(dentry->d_sb->s_user_ns);
2123 shiftfs_copyattr(dentry->d_inode, sb->s_root->d_inode);
8bde1547 2124
45bd5d34
CB
2125 dentry = NULL;
2126 err = 0;
8bde1547 2127
45bd5d34 2128out_put_path:
8bde1547 2129 path_put(&path);
45bd5d34
CB
2130
2131out_free_name:
8bde1547 2132 kfree(name);
45bd5d34
CB
2133
2134 dput(dentry);
2135
8bde1547
JB
2136 return err;
2137}
2138
2139static struct dentry *shiftfs_mount(struct file_system_type *fs_type,
2140 int flags, const char *dev_name, void *data)
2141{
2142 struct shiftfs_data d = { data, dev_name };
2143
2144 return mount_nodev(fs_type, flags, &d, shiftfs_fill_super);
2145}
2146
2147static struct file_system_type shiftfs_type = {
2148 .owner = THIS_MODULE,
2149 .name = "shiftfs",
2150 .mount = shiftfs_mount,
2151 .kill_sb = kill_anon_super,
2152 .fs_flags = FS_USERNS_MOUNT,
2153};
2154
2155static int __init shiftfs_init(void)
2156{
2157 return register_filesystem(&shiftfs_type);
2158}
2159
2160static void __exit shiftfs_exit(void)
2161{
2162 unregister_filesystem(&shiftfs_type);
2163}
2164
2165MODULE_ALIAS_FS("shiftfs");
2166MODULE_AUTHOR("James Bottomley");
45bd5d34
CB
2167MODULE_AUTHOR("Seth Forshee <seth.forshee@canonical.com>");
2168MODULE_AUTHOR("Christian Brauner <christian.brauner@ubuntu.com>");
2169MODULE_DESCRIPTION("id shifting filesystem");
8bde1547
JB
2170MODULE_LICENSE("GPL v2");
2171module_init(shiftfs_init)
2172module_exit(shiftfs_exit)